1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } iterator_backtrack;
285
286 typedef struct recurse_entry {
287 struct recurse_entry *next;
288 /* Contains the function entry. */
289 struct sljit_label *entry;
290 /* Collects the calls until the function is not created. */
291 jump_list *calls;
292 /* Points to the starting opcode. */
293 sljit_sw start;
294 } recurse_entry;
295
296 typedef struct recurse_backtrack {
297 backtrack_common common;
298 BOOL inlined_pattern;
299 } recurse_backtrack;
300
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
302
303 typedef struct then_trap_backtrack {
304 backtrack_common common;
305 /* If then_trap is not NULL, this structure contains the real
306 then_trap for the backtracking path. */
307 struct then_trap_backtrack *then_trap;
308 /* Points to the starting opcode. */
309 sljit_sw start;
310 /* Exit point for the then opcodes of this alternative. */
311 jump_list *quit;
312 /* Frame size of the current alternative. */
313 int framesize;
314 } then_trap_backtrack;
315
316 #define MAX_RANGE_SIZE 4
317
318 typedef struct compiler_common {
319 /* The sljit ceneric compiler. */
320 struct sljit_compiler *compiler;
321 /* First byte code. */
322 pcre_uchar *start;
323 /* Maps private data offset to each opcode. */
324 sljit_si *private_data_ptrs;
325 /* Chain list of read-only data ptrs. */
326 void *read_only_data_head;
327 /* Tells whether the capturing bracket is optimized. */
328 pcre_uint8 *optimized_cbracket;
329 /* Tells whether the starting offset is a target of then. */
330 pcre_uint8 *then_offsets;
331 /* Current position where a THEN must jump. */
332 then_trap_backtrack *then_trap;
333 /* Starting offset of private data for capturing brackets. */
334 int cbra_ptr;
335 /* Output vector starting point. Must be divisible by 2. */
336 int ovector_start;
337 /* Last known position of the requested byte. */
338 int req_char_ptr;
339 /* Head of the last recursion. */
340 int recursive_head_ptr;
341 /* First inspected character for partial matching. */
342 int start_used_ptr;
343 /* Starting pointer for partial soft matches. */
344 int hit_start;
345 /* End pointer of the first line. */
346 int first_line_end;
347 /* Points to the marked string. */
348 int mark_ptr;
349 /* Recursive control verb management chain. */
350 int control_head_ptr;
351 /* Points to the last matched capture block index. */
352 int capture_last_ptr;
353 /* Points to the starting position of the current match. */
354 int start_ptr;
355
356 /* Flipped and lower case tables. */
357 const pcre_uint8 *fcc;
358 sljit_sw lcc;
359 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
360 int mode;
361 /* TRUE, when minlength is greater than 0. */
362 BOOL might_be_empty;
363 /* \K is found in the pattern. */
364 BOOL has_set_som;
365 /* (*SKIP:arg) is found in the pattern. */
366 BOOL has_skip_arg;
367 /* (*THEN) is found in the pattern. */
368 BOOL has_then;
369 /* Needs to know the start position anytime. */
370 BOOL needs_start_ptr;
371 /* Currently in recurse or negative assert. */
372 BOOL local_exit;
373 /* Currently in a positive assert. */
374 BOOL positive_assert;
375 /* Newline control. */
376 int nltype;
377 pcre_uint32 nlmax;
378 pcre_uint32 nlmin;
379 int newline;
380 int bsr_nltype;
381 pcre_uint32 bsr_nlmax;
382 pcre_uint32 bsr_nlmin;
383 /* Dollar endonly. */
384 int endonly;
385 /* Tables. */
386 sljit_sw ctypes;
387 /* Named capturing brackets. */
388 pcre_uchar *name_table;
389 sljit_sw name_count;
390 sljit_sw name_entry_size;
391
392 /* Labels and jump lists. */
393 struct sljit_label *partialmatchlabel;
394 struct sljit_label *quit_label;
395 struct sljit_label *forced_quit_label;
396 struct sljit_label *accept_label;
397 struct sljit_label *ff_newline_shortcut;
398 stub_list *stubs;
399 label_addr_list *label_addrs;
400 recurse_entry *entries;
401 recurse_entry *currententry;
402 jump_list *partialmatch;
403 jump_list *quit;
404 jump_list *positive_assert_quit;
405 jump_list *forced_quit;
406 jump_list *accept;
407 jump_list *calllimit;
408 jump_list *stackalloc;
409 jump_list *revertframes;
410 jump_list *wordboundary;
411 jump_list *anynewline;
412 jump_list *hspace;
413 jump_list *vspace;
414 jump_list *casefulcmp;
415 jump_list *caselesscmp;
416 jump_list *reset_match;
417 BOOL jscript_compat;
418 #ifdef SUPPORT_UTF
419 BOOL utf;
420 #ifdef SUPPORT_UCP
421 BOOL use_ucp;
422 #endif
423 #ifdef COMPILE_PCRE8
424 jump_list *utfreadchar;
425 jump_list *utfreadchar16;
426 jump_list *utfreadtype8;
427 #endif
428 #endif /* SUPPORT_UTF */
429 #ifdef SUPPORT_UCP
430 jump_list *getucd;
431 #endif
432 } compiler_common;
433
434 /* For byte_sequence_compare. */
435
436 typedef struct compare_context {
437 int length;
438 int sourcereg;
439 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
440 int ucharptr;
441 union {
442 sljit_si asint;
443 sljit_uh asushort;
444 #if defined COMPILE_PCRE8
445 sljit_ub asbyte;
446 sljit_ub asuchars[4];
447 #elif defined COMPILE_PCRE16
448 sljit_uh asuchars[2];
449 #elif defined COMPILE_PCRE32
450 sljit_ui asuchars[1];
451 #endif
452 } c;
453 union {
454 sljit_si asint;
455 sljit_uh asushort;
456 #if defined COMPILE_PCRE8
457 sljit_ub asbyte;
458 sljit_ub asuchars[4];
459 #elif defined COMPILE_PCRE16
460 sljit_uh asuchars[2];
461 #elif defined COMPILE_PCRE32
462 sljit_ui asuchars[1];
463 #endif
464 } oc;
465 #endif
466 } compare_context;
467
468 /* Undefine sljit macros. */
469 #undef CMP
470
471 /* Used for accessing the elements of the stack. */
472 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
473
474 #define TMP1 SLJIT_R0
475 #define TMP2 SLJIT_R2
476 #define TMP3 SLJIT_R3
477 #define STR_PTR SLJIT_S0
478 #define STR_END SLJIT_S1
479 #define STACK_TOP SLJIT_R1
480 #define STACK_LIMIT SLJIT_S2
481 #define COUNT_MATCH SLJIT_S3
482 #define ARGUMENTS SLJIT_S4
483 #define RETURN_ADDR SLJIT_R4
484
485 /* Local space layout. */
486 /* These two locals can be used by the current opcode. */
487 #define LOCALS0 (0 * sizeof(sljit_sw))
488 #define LOCALS1 (1 * sizeof(sljit_sw))
489 /* Two local variables for possessive quantifiers (char1 cannot use them). */
490 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
491 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
492 /* Max limit of recursions. */
493 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
494 /* The output vector is stored on the stack, and contains pointers
495 to characters. The vector data is divided into two groups: the first
496 group contains the start / end character pointers, and the second is
497 the start pointers when the end of the capturing group has not yet reached. */
498 #define OVECTOR_START (common->ovector_start)
499 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
500 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
501 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
502
503 #if defined COMPILE_PCRE8
504 #define MOV_UCHAR SLJIT_MOV_UB
505 #define MOVU_UCHAR SLJIT_MOVU_UB
506 #elif defined COMPILE_PCRE16
507 #define MOV_UCHAR SLJIT_MOV_UH
508 #define MOVU_UCHAR SLJIT_MOVU_UH
509 #elif defined COMPILE_PCRE32
510 #define MOV_UCHAR SLJIT_MOV_UI
511 #define MOVU_UCHAR SLJIT_MOVU_UI
512 #else
513 #error Unsupported compiling mode
514 #endif
515
516 /* Shortcuts. */
517 #define DEFINE_COMPILER \
518 struct sljit_compiler *compiler = common->compiler
519 #define OP1(op, dst, dstw, src, srcw) \
520 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
521 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
522 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
523 #define LABEL() \
524 sljit_emit_label(compiler)
525 #define JUMP(type) \
526 sljit_emit_jump(compiler, (type))
527 #define JUMPTO(type, label) \
528 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
529 #define JUMPHERE(jump) \
530 sljit_set_label((jump), sljit_emit_label(compiler))
531 #define SET_LABEL(jump, label) \
532 sljit_set_label((jump), (label))
533 #define CMP(type, src1, src1w, src2, src2w) \
534 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
535 #define CMPTO(type, src1, src1w, src2, src2w, label) \
536 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
537 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
538 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
539 #define GET_LOCAL_BASE(dst, dstw, offset) \
540 sljit_get_local_base(compiler, (dst), (dstw), (offset))
541
542 #define READ_CHAR_MAX 0x7fffffff
543
bracketend(pcre_uchar * cc)544 static pcre_uchar *bracketend(pcre_uchar *cc)
545 {
546 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
547 do cc += GET(cc, 1); while (*cc == OP_ALT);
548 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
549 cc += 1 + LINK_SIZE;
550 return cc;
551 }
552
no_alternatives(pcre_uchar * cc)553 static int no_alternatives(pcre_uchar *cc)
554 {
555 int count = 0;
556 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
557 do
558 {
559 cc += GET(cc, 1);
560 count++;
561 }
562 while (*cc == OP_ALT);
563 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
564 return count;
565 }
566
567 static int ones_in_half_byte[16] = {
568 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
569 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
570 };
571
572 /* Functions whose might need modification for all new supported opcodes:
573 next_opcode
574 check_opcode_types
575 set_private_data_ptrs
576 get_framesize
577 init_frame
578 get_private_data_copy_length
579 copy_private_data
580 compile_matchingpath
581 compile_backtrackingpath
582 */
583
next_opcode(compiler_common * common,pcre_uchar * cc)584 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
585 {
586 SLJIT_UNUSED_ARG(common);
587 switch(*cc)
588 {
589 case OP_SOD:
590 case OP_SOM:
591 case OP_SET_SOM:
592 case OP_NOT_WORD_BOUNDARY:
593 case OP_WORD_BOUNDARY:
594 case OP_NOT_DIGIT:
595 case OP_DIGIT:
596 case OP_NOT_WHITESPACE:
597 case OP_WHITESPACE:
598 case OP_NOT_WORDCHAR:
599 case OP_WORDCHAR:
600 case OP_ANY:
601 case OP_ALLANY:
602 case OP_NOTPROP:
603 case OP_PROP:
604 case OP_ANYNL:
605 case OP_NOT_HSPACE:
606 case OP_HSPACE:
607 case OP_NOT_VSPACE:
608 case OP_VSPACE:
609 case OP_EXTUNI:
610 case OP_EODN:
611 case OP_EOD:
612 case OP_CIRC:
613 case OP_CIRCM:
614 case OP_DOLL:
615 case OP_DOLLM:
616 case OP_CRSTAR:
617 case OP_CRMINSTAR:
618 case OP_CRPLUS:
619 case OP_CRMINPLUS:
620 case OP_CRQUERY:
621 case OP_CRMINQUERY:
622 case OP_CRRANGE:
623 case OP_CRMINRANGE:
624 case OP_CRPOSSTAR:
625 case OP_CRPOSPLUS:
626 case OP_CRPOSQUERY:
627 case OP_CRPOSRANGE:
628 case OP_CLASS:
629 case OP_NCLASS:
630 case OP_REF:
631 case OP_REFI:
632 case OP_DNREF:
633 case OP_DNREFI:
634 case OP_RECURSE:
635 case OP_CALLOUT:
636 case OP_ALT:
637 case OP_KET:
638 case OP_KETRMAX:
639 case OP_KETRMIN:
640 case OP_KETRPOS:
641 case OP_REVERSE:
642 case OP_ASSERT:
643 case OP_ASSERT_NOT:
644 case OP_ASSERTBACK:
645 case OP_ASSERTBACK_NOT:
646 case OP_ONCE:
647 case OP_ONCE_NC:
648 case OP_BRA:
649 case OP_BRAPOS:
650 case OP_CBRA:
651 case OP_CBRAPOS:
652 case OP_COND:
653 case OP_SBRA:
654 case OP_SBRAPOS:
655 case OP_SCBRA:
656 case OP_SCBRAPOS:
657 case OP_SCOND:
658 case OP_CREF:
659 case OP_DNCREF:
660 case OP_RREF:
661 case OP_DNRREF:
662 case OP_DEF:
663 case OP_BRAZERO:
664 case OP_BRAMINZERO:
665 case OP_BRAPOSZERO:
666 case OP_PRUNE:
667 case OP_SKIP:
668 case OP_THEN:
669 case OP_COMMIT:
670 case OP_FAIL:
671 case OP_ACCEPT:
672 case OP_ASSERT_ACCEPT:
673 case OP_CLOSE:
674 case OP_SKIPZERO:
675 return cc + PRIV(OP_lengths)[*cc];
676
677 case OP_CHAR:
678 case OP_CHARI:
679 case OP_NOT:
680 case OP_NOTI:
681 case OP_STAR:
682 case OP_MINSTAR:
683 case OP_PLUS:
684 case OP_MINPLUS:
685 case OP_QUERY:
686 case OP_MINQUERY:
687 case OP_UPTO:
688 case OP_MINUPTO:
689 case OP_EXACT:
690 case OP_POSSTAR:
691 case OP_POSPLUS:
692 case OP_POSQUERY:
693 case OP_POSUPTO:
694 case OP_STARI:
695 case OP_MINSTARI:
696 case OP_PLUSI:
697 case OP_MINPLUSI:
698 case OP_QUERYI:
699 case OP_MINQUERYI:
700 case OP_UPTOI:
701 case OP_MINUPTOI:
702 case OP_EXACTI:
703 case OP_POSSTARI:
704 case OP_POSPLUSI:
705 case OP_POSQUERYI:
706 case OP_POSUPTOI:
707 case OP_NOTSTAR:
708 case OP_NOTMINSTAR:
709 case OP_NOTPLUS:
710 case OP_NOTMINPLUS:
711 case OP_NOTQUERY:
712 case OP_NOTMINQUERY:
713 case OP_NOTUPTO:
714 case OP_NOTMINUPTO:
715 case OP_NOTEXACT:
716 case OP_NOTPOSSTAR:
717 case OP_NOTPOSPLUS:
718 case OP_NOTPOSQUERY:
719 case OP_NOTPOSUPTO:
720 case OP_NOTSTARI:
721 case OP_NOTMINSTARI:
722 case OP_NOTPLUSI:
723 case OP_NOTMINPLUSI:
724 case OP_NOTQUERYI:
725 case OP_NOTMINQUERYI:
726 case OP_NOTUPTOI:
727 case OP_NOTMINUPTOI:
728 case OP_NOTEXACTI:
729 case OP_NOTPOSSTARI:
730 case OP_NOTPOSPLUSI:
731 case OP_NOTPOSQUERYI:
732 case OP_NOTPOSUPTOI:
733 cc += PRIV(OP_lengths)[*cc];
734 #ifdef SUPPORT_UTF
735 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
736 #endif
737 return cc;
738
739 /* Special cases. */
740 case OP_TYPESTAR:
741 case OP_TYPEMINSTAR:
742 case OP_TYPEPLUS:
743 case OP_TYPEMINPLUS:
744 case OP_TYPEQUERY:
745 case OP_TYPEMINQUERY:
746 case OP_TYPEUPTO:
747 case OP_TYPEMINUPTO:
748 case OP_TYPEEXACT:
749 case OP_TYPEPOSSTAR:
750 case OP_TYPEPOSPLUS:
751 case OP_TYPEPOSQUERY:
752 case OP_TYPEPOSUPTO:
753 return cc + PRIV(OP_lengths)[*cc] - 1;
754
755 case OP_ANYBYTE:
756 #ifdef SUPPORT_UTF
757 if (common->utf) return NULL;
758 #endif
759 return cc + 1;
760
761 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
762 case OP_XCLASS:
763 return cc + GET(cc, 1);
764 #endif
765
766 case OP_MARK:
767 case OP_PRUNE_ARG:
768 case OP_SKIP_ARG:
769 case OP_THEN_ARG:
770 return cc + 1 + 2 + cc[1];
771
772 default:
773 /* All opcodes are supported now! */
774 SLJIT_ASSERT_STOP();
775 return NULL;
776 }
777 }
778
check_opcode_types(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend)779 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
780 {
781 int count;
782 pcre_uchar *slot;
783
784 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
785 while (cc < ccend)
786 {
787 switch(*cc)
788 {
789 case OP_SET_SOM:
790 common->has_set_som = TRUE;
791 common->might_be_empty = TRUE;
792 cc += 1;
793 break;
794
795 case OP_REF:
796 case OP_REFI:
797 common->optimized_cbracket[GET2(cc, 1)] = 0;
798 cc += 1 + IMM2_SIZE;
799 break;
800
801 case OP_CBRAPOS:
802 case OP_SCBRAPOS:
803 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
804 cc += 1 + LINK_SIZE + IMM2_SIZE;
805 break;
806
807 case OP_COND:
808 case OP_SCOND:
809 /* Only AUTO_CALLOUT can insert this opcode. We do
810 not intend to support this case. */
811 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
812 return FALSE;
813 cc += 1 + LINK_SIZE;
814 break;
815
816 case OP_CREF:
817 common->optimized_cbracket[GET2(cc, 1)] = 0;
818 cc += 1 + IMM2_SIZE;
819 break;
820
821 case OP_DNREF:
822 case OP_DNREFI:
823 case OP_DNCREF:
824 count = GET2(cc, 1 + IMM2_SIZE);
825 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
826 while (count-- > 0)
827 {
828 common->optimized_cbracket[GET2(slot, 0)] = 0;
829 slot += common->name_entry_size;
830 }
831 cc += 1 + 2 * IMM2_SIZE;
832 break;
833
834 case OP_RECURSE:
835 /* Set its value only once. */
836 if (common->recursive_head_ptr == 0)
837 {
838 common->recursive_head_ptr = common->ovector_start;
839 common->ovector_start += sizeof(sljit_sw);
840 }
841 cc += 1 + LINK_SIZE;
842 break;
843
844 case OP_CALLOUT:
845 if (common->capture_last_ptr == 0)
846 {
847 common->capture_last_ptr = common->ovector_start;
848 common->ovector_start += sizeof(sljit_sw);
849 }
850 cc += 2 + 2 * LINK_SIZE;
851 break;
852
853 case OP_THEN_ARG:
854 common->has_then = TRUE;
855 common->control_head_ptr = 1;
856 /* Fall through. */
857
858 case OP_PRUNE_ARG:
859 common->needs_start_ptr = TRUE;
860 /* Fall through. */
861
862 case OP_MARK:
863 if (common->mark_ptr == 0)
864 {
865 common->mark_ptr = common->ovector_start;
866 common->ovector_start += sizeof(sljit_sw);
867 }
868 cc += 1 + 2 + cc[1];
869 break;
870
871 case OP_THEN:
872 common->has_then = TRUE;
873 common->control_head_ptr = 1;
874 /* Fall through. */
875
876 case OP_PRUNE:
877 case OP_SKIP:
878 common->needs_start_ptr = TRUE;
879 cc += 1;
880 break;
881
882 case OP_SKIP_ARG:
883 common->control_head_ptr = 1;
884 common->has_skip_arg = TRUE;
885 cc += 1 + 2 + cc[1];
886 break;
887
888 default:
889 cc = next_opcode(common, cc);
890 if (cc == NULL)
891 return FALSE;
892 break;
893 }
894 }
895 return TRUE;
896 }
897
get_class_iterator_size(pcre_uchar * cc)898 static int get_class_iterator_size(pcre_uchar *cc)
899 {
900 switch(*cc)
901 {
902 case OP_CRSTAR:
903 case OP_CRPLUS:
904 return 2;
905
906 case OP_CRMINSTAR:
907 case OP_CRMINPLUS:
908 case OP_CRQUERY:
909 case OP_CRMINQUERY:
910 return 1;
911
912 case OP_CRRANGE:
913 case OP_CRMINRANGE:
914 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
915 return 0;
916 return 2;
917
918 default:
919 return 0;
920 }
921 }
922
detect_repeat(compiler_common * common,pcre_uchar * begin)923 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
924 {
925 pcre_uchar *end = bracketend(begin);
926 pcre_uchar *next;
927 pcre_uchar *next_end;
928 pcre_uchar *max_end;
929 pcre_uchar type;
930 sljit_sw length = end - begin;
931 int min, max, i;
932
933 /* Detect fixed iterations first. */
934 if (end[-(1 + LINK_SIZE)] != OP_KET)
935 return FALSE;
936
937 /* Already detected repeat. */
938 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
939 return TRUE;
940
941 next = end;
942 min = 1;
943 while (1)
944 {
945 if (*next != *begin)
946 break;
947 next_end = bracketend(next);
948 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
949 break;
950 next = next_end;
951 min++;
952 }
953
954 if (min == 2)
955 return FALSE;
956
957 max = 0;
958 max_end = next;
959 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
960 {
961 type = *next;
962 while (1)
963 {
964 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
965 break;
966 next_end = bracketend(next + 2 + LINK_SIZE);
967 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
968 break;
969 next = next_end;
970 max++;
971 }
972
973 if (next[0] == type && next[1] == *begin && max >= 1)
974 {
975 next_end = bracketend(next + 1);
976 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
977 {
978 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
979 if (*next_end != OP_KET)
980 break;
981
982 if (i == max)
983 {
984 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
985 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
986 /* +2 the original and the last. */
987 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
988 if (min == 1)
989 return TRUE;
990 min--;
991 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
992 }
993 }
994 }
995 }
996
997 if (min >= 3)
998 {
999 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1000 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1001 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1002 return TRUE;
1003 }
1004
1005 return FALSE;
1006 }
1007
1008 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1009 case OP_MINSTAR: \
1010 case OP_MINPLUS: \
1011 case OP_QUERY: \
1012 case OP_MINQUERY: \
1013 case OP_MINSTARI: \
1014 case OP_MINPLUSI: \
1015 case OP_QUERYI: \
1016 case OP_MINQUERYI: \
1017 case OP_NOTMINSTAR: \
1018 case OP_NOTMINPLUS: \
1019 case OP_NOTQUERY: \
1020 case OP_NOTMINQUERY: \
1021 case OP_NOTMINSTARI: \
1022 case OP_NOTMINPLUSI: \
1023 case OP_NOTQUERYI: \
1024 case OP_NOTMINQUERYI:
1025
1026 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1027 case OP_STAR: \
1028 case OP_PLUS: \
1029 case OP_STARI: \
1030 case OP_PLUSI: \
1031 case OP_NOTSTAR: \
1032 case OP_NOTPLUS: \
1033 case OP_NOTSTARI: \
1034 case OP_NOTPLUSI:
1035
1036 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1037 case OP_UPTO: \
1038 case OP_MINUPTO: \
1039 case OP_UPTOI: \
1040 case OP_MINUPTOI: \
1041 case OP_NOTUPTO: \
1042 case OP_NOTMINUPTO: \
1043 case OP_NOTUPTOI: \
1044 case OP_NOTMINUPTOI:
1045
1046 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1047 case OP_TYPEMINSTAR: \
1048 case OP_TYPEMINPLUS: \
1049 case OP_TYPEQUERY: \
1050 case OP_TYPEMINQUERY:
1051
1052 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1053 case OP_TYPESTAR: \
1054 case OP_TYPEPLUS:
1055
1056 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1057 case OP_TYPEUPTO: \
1058 case OP_TYPEMINUPTO:
1059
set_private_data_ptrs(compiler_common * common,int * private_data_start,pcre_uchar * ccend)1060 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1061 {
1062 pcre_uchar *cc = common->start;
1063 pcre_uchar *alternative;
1064 pcre_uchar *end = NULL;
1065 int private_data_ptr = *private_data_start;
1066 int space, size, bracketlen;
1067
1068 while (cc < ccend)
1069 {
1070 space = 0;
1071 size = 0;
1072 bracketlen = 0;
1073 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1074 return;
1075
1076 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1077 if (detect_repeat(common, cc))
1078 {
1079 /* These brackets are converted to repeats, so no global
1080 based single character repeat is allowed. */
1081 if (cc >= end)
1082 end = bracketend(cc);
1083 }
1084
1085 switch(*cc)
1086 {
1087 case OP_KET:
1088 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1089 {
1090 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1091 private_data_ptr += sizeof(sljit_sw);
1092 cc += common->private_data_ptrs[cc + 1 - common->start];
1093 }
1094 cc += 1 + LINK_SIZE;
1095 break;
1096
1097 case OP_ASSERT:
1098 case OP_ASSERT_NOT:
1099 case OP_ASSERTBACK:
1100 case OP_ASSERTBACK_NOT:
1101 case OP_ONCE:
1102 case OP_ONCE_NC:
1103 case OP_BRAPOS:
1104 case OP_SBRA:
1105 case OP_SBRAPOS:
1106 case OP_SCOND:
1107 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1108 private_data_ptr += sizeof(sljit_sw);
1109 bracketlen = 1 + LINK_SIZE;
1110 break;
1111
1112 case OP_CBRAPOS:
1113 case OP_SCBRAPOS:
1114 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1115 private_data_ptr += sizeof(sljit_sw);
1116 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1117 break;
1118
1119 case OP_COND:
1120 /* Might be a hidden SCOND. */
1121 alternative = cc + GET(cc, 1);
1122 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1123 {
1124 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1125 private_data_ptr += sizeof(sljit_sw);
1126 }
1127 bracketlen = 1 + LINK_SIZE;
1128 break;
1129
1130 case OP_BRA:
1131 bracketlen = 1 + LINK_SIZE;
1132 break;
1133
1134 case OP_CBRA:
1135 case OP_SCBRA:
1136 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1137 break;
1138
1139 CASE_ITERATOR_PRIVATE_DATA_1
1140 space = 1;
1141 size = -2;
1142 break;
1143
1144 CASE_ITERATOR_PRIVATE_DATA_2A
1145 space = 2;
1146 size = -2;
1147 break;
1148
1149 CASE_ITERATOR_PRIVATE_DATA_2B
1150 space = 2;
1151 size = -(2 + IMM2_SIZE);
1152 break;
1153
1154 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1155 space = 1;
1156 size = 1;
1157 break;
1158
1159 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1160 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1161 space = 2;
1162 size = 1;
1163 break;
1164
1165 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1166 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1167 space = 2;
1168 size = 1 + IMM2_SIZE;
1169 break;
1170
1171 case OP_CLASS:
1172 case OP_NCLASS:
1173 size += 1 + 32 / sizeof(pcre_uchar);
1174 space = get_class_iterator_size(cc + size);
1175 break;
1176
1177 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1178 case OP_XCLASS:
1179 size = GET(cc, 1);
1180 space = get_class_iterator_size(cc + size);
1181 break;
1182 #endif
1183
1184 default:
1185 cc = next_opcode(common, cc);
1186 SLJIT_ASSERT(cc != NULL);
1187 break;
1188 }
1189
1190 /* Character iterators, which are not inside a repeated bracket,
1191 gets a private slot instead of allocating it on the stack. */
1192 if (space > 0 && cc >= end)
1193 {
1194 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1195 private_data_ptr += sizeof(sljit_sw) * space;
1196 }
1197
1198 if (size != 0)
1199 {
1200 if (size < 0)
1201 {
1202 cc += -size;
1203 #ifdef SUPPORT_UTF
1204 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1205 #endif
1206 }
1207 else
1208 cc += size;
1209 }
1210
1211 if (bracketlen > 0)
1212 {
1213 if (cc >= end)
1214 {
1215 end = bracketend(cc);
1216 if (end[-1 - LINK_SIZE] == OP_KET)
1217 end = NULL;
1218 }
1219 cc += bracketlen;
1220 }
1221 }
1222 *private_data_start = private_data_ptr;
1223 }
1224
1225 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL recursive,BOOL * needs_control_head)1226 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1227 {
1228 int length = 0;
1229 int possessive = 0;
1230 BOOL stack_restore = FALSE;
1231 BOOL setsom_found = recursive;
1232 BOOL setmark_found = recursive;
1233 /* The last capture is a local variable even for recursions. */
1234 BOOL capture_last_found = FALSE;
1235
1236 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1237 SLJIT_ASSERT(common->control_head_ptr != 0);
1238 *needs_control_head = TRUE;
1239 #else
1240 *needs_control_head = FALSE;
1241 #endif
1242
1243 if (ccend == NULL)
1244 {
1245 ccend = bracketend(cc) - (1 + LINK_SIZE);
1246 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1247 {
1248 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1249 /* This is correct regardless of common->capture_last_ptr. */
1250 capture_last_found = TRUE;
1251 }
1252 cc = next_opcode(common, cc);
1253 }
1254
1255 SLJIT_ASSERT(cc != NULL);
1256 while (cc < ccend)
1257 switch(*cc)
1258 {
1259 case OP_SET_SOM:
1260 SLJIT_ASSERT(common->has_set_som);
1261 stack_restore = TRUE;
1262 if (!setsom_found)
1263 {
1264 length += 2;
1265 setsom_found = TRUE;
1266 }
1267 cc += 1;
1268 break;
1269
1270 case OP_MARK:
1271 case OP_PRUNE_ARG:
1272 case OP_THEN_ARG:
1273 SLJIT_ASSERT(common->mark_ptr != 0);
1274 stack_restore = TRUE;
1275 if (!setmark_found)
1276 {
1277 length += 2;
1278 setmark_found = TRUE;
1279 }
1280 if (common->control_head_ptr != 0)
1281 *needs_control_head = TRUE;
1282 cc += 1 + 2 + cc[1];
1283 break;
1284
1285 case OP_RECURSE:
1286 stack_restore = TRUE;
1287 if (common->has_set_som && !setsom_found)
1288 {
1289 length += 2;
1290 setsom_found = TRUE;
1291 }
1292 if (common->mark_ptr != 0 && !setmark_found)
1293 {
1294 length += 2;
1295 setmark_found = TRUE;
1296 }
1297 if (common->capture_last_ptr != 0 && !capture_last_found)
1298 {
1299 length += 2;
1300 capture_last_found = TRUE;
1301 }
1302 cc += 1 + LINK_SIZE;
1303 break;
1304
1305 case OP_CBRA:
1306 case OP_CBRAPOS:
1307 case OP_SCBRA:
1308 case OP_SCBRAPOS:
1309 stack_restore = TRUE;
1310 if (common->capture_last_ptr != 0 && !capture_last_found)
1311 {
1312 length += 2;
1313 capture_last_found = TRUE;
1314 }
1315 length += 3;
1316 cc += 1 + LINK_SIZE + IMM2_SIZE;
1317 break;
1318
1319 default:
1320 stack_restore = TRUE;
1321 /* Fall through. */
1322
1323 case OP_NOT_WORD_BOUNDARY:
1324 case OP_WORD_BOUNDARY:
1325 case OP_NOT_DIGIT:
1326 case OP_DIGIT:
1327 case OP_NOT_WHITESPACE:
1328 case OP_WHITESPACE:
1329 case OP_NOT_WORDCHAR:
1330 case OP_WORDCHAR:
1331 case OP_ANY:
1332 case OP_ALLANY:
1333 case OP_ANYBYTE:
1334 case OP_NOTPROP:
1335 case OP_PROP:
1336 case OP_ANYNL:
1337 case OP_NOT_HSPACE:
1338 case OP_HSPACE:
1339 case OP_NOT_VSPACE:
1340 case OP_VSPACE:
1341 case OP_EXTUNI:
1342 case OP_EODN:
1343 case OP_EOD:
1344 case OP_CIRC:
1345 case OP_CIRCM:
1346 case OP_DOLL:
1347 case OP_DOLLM:
1348 case OP_CHAR:
1349 case OP_CHARI:
1350 case OP_NOT:
1351 case OP_NOTI:
1352
1353 case OP_EXACT:
1354 case OP_POSSTAR:
1355 case OP_POSPLUS:
1356 case OP_POSQUERY:
1357 case OP_POSUPTO:
1358
1359 case OP_EXACTI:
1360 case OP_POSSTARI:
1361 case OP_POSPLUSI:
1362 case OP_POSQUERYI:
1363 case OP_POSUPTOI:
1364
1365 case OP_NOTEXACT:
1366 case OP_NOTPOSSTAR:
1367 case OP_NOTPOSPLUS:
1368 case OP_NOTPOSQUERY:
1369 case OP_NOTPOSUPTO:
1370
1371 case OP_NOTEXACTI:
1372 case OP_NOTPOSSTARI:
1373 case OP_NOTPOSPLUSI:
1374 case OP_NOTPOSQUERYI:
1375 case OP_NOTPOSUPTOI:
1376
1377 case OP_TYPEEXACT:
1378 case OP_TYPEPOSSTAR:
1379 case OP_TYPEPOSPLUS:
1380 case OP_TYPEPOSQUERY:
1381 case OP_TYPEPOSUPTO:
1382
1383 case OP_CLASS:
1384 case OP_NCLASS:
1385 case OP_XCLASS:
1386
1387 cc = next_opcode(common, cc);
1388 SLJIT_ASSERT(cc != NULL);
1389 break;
1390 }
1391
1392 /* Possessive quantifiers can use a special case. */
1393 if (SLJIT_UNLIKELY(possessive == length))
1394 return stack_restore ? no_frame : no_stack;
1395
1396 if (length > 0)
1397 return length + 1;
1398 return stack_restore ? no_frame : no_stack;
1399 }
1400
init_frame(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,int stackpos,int stacktop,BOOL recursive)1401 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1402 {
1403 DEFINE_COMPILER;
1404 BOOL setsom_found = recursive;
1405 BOOL setmark_found = recursive;
1406 /* The last capture is a local variable even for recursions. */
1407 BOOL capture_last_found = FALSE;
1408 int offset;
1409
1410 /* >= 1 + shortest item size (2) */
1411 SLJIT_UNUSED_ARG(stacktop);
1412 SLJIT_ASSERT(stackpos >= stacktop + 2);
1413
1414 stackpos = STACK(stackpos);
1415 if (ccend == NULL)
1416 {
1417 ccend = bracketend(cc) - (1 + LINK_SIZE);
1418 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1419 cc = next_opcode(common, cc);
1420 }
1421
1422 SLJIT_ASSERT(cc != NULL);
1423 while (cc < ccend)
1424 switch(*cc)
1425 {
1426 case OP_SET_SOM:
1427 SLJIT_ASSERT(common->has_set_som);
1428 if (!setsom_found)
1429 {
1430 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1431 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1432 stackpos += (int)sizeof(sljit_sw);
1433 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1434 stackpos += (int)sizeof(sljit_sw);
1435 setsom_found = TRUE;
1436 }
1437 cc += 1;
1438 break;
1439
1440 case OP_MARK:
1441 case OP_PRUNE_ARG:
1442 case OP_THEN_ARG:
1443 SLJIT_ASSERT(common->mark_ptr != 0);
1444 if (!setmark_found)
1445 {
1446 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1447 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1448 stackpos += (int)sizeof(sljit_sw);
1449 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1450 stackpos += (int)sizeof(sljit_sw);
1451 setmark_found = TRUE;
1452 }
1453 cc += 1 + 2 + cc[1];
1454 break;
1455
1456 case OP_RECURSE:
1457 if (common->has_set_som && !setsom_found)
1458 {
1459 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1460 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1461 stackpos += (int)sizeof(sljit_sw);
1462 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1463 stackpos += (int)sizeof(sljit_sw);
1464 setsom_found = TRUE;
1465 }
1466 if (common->mark_ptr != 0 && !setmark_found)
1467 {
1468 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1469 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1470 stackpos += (int)sizeof(sljit_sw);
1471 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1472 stackpos += (int)sizeof(sljit_sw);
1473 setmark_found = TRUE;
1474 }
1475 if (common->capture_last_ptr != 0 && !capture_last_found)
1476 {
1477 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1478 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1479 stackpos += (int)sizeof(sljit_sw);
1480 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1481 stackpos += (int)sizeof(sljit_sw);
1482 capture_last_found = TRUE;
1483 }
1484 cc += 1 + LINK_SIZE;
1485 break;
1486
1487 case OP_CBRA:
1488 case OP_CBRAPOS:
1489 case OP_SCBRA:
1490 case OP_SCBRAPOS:
1491 if (common->capture_last_ptr != 0 && !capture_last_found)
1492 {
1493 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1494 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1495 stackpos += (int)sizeof(sljit_sw);
1496 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1497 stackpos += (int)sizeof(sljit_sw);
1498 capture_last_found = TRUE;
1499 }
1500 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1501 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1502 stackpos += (int)sizeof(sljit_sw);
1503 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1504 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1505 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1506 stackpos += (int)sizeof(sljit_sw);
1507 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1508 stackpos += (int)sizeof(sljit_sw);
1509
1510 cc += 1 + LINK_SIZE + IMM2_SIZE;
1511 break;
1512
1513 default:
1514 cc = next_opcode(common, cc);
1515 SLJIT_ASSERT(cc != NULL);
1516 break;
1517 }
1518
1519 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1520 SLJIT_ASSERT(stackpos == STACK(stacktop));
1521 }
1522
get_private_data_copy_length(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL needs_control_head)1523 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1524 {
1525 int private_data_length = needs_control_head ? 3 : 2;
1526 int size;
1527 pcre_uchar *alternative;
1528 /* Calculate the sum of the private machine words. */
1529 while (cc < ccend)
1530 {
1531 size = 0;
1532 switch(*cc)
1533 {
1534 case OP_KET:
1535 if (PRIVATE_DATA(cc) != 0)
1536 {
1537 private_data_length++;
1538 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1539 cc += PRIVATE_DATA(cc + 1);
1540 }
1541 cc += 1 + LINK_SIZE;
1542 break;
1543
1544 case OP_ASSERT:
1545 case OP_ASSERT_NOT:
1546 case OP_ASSERTBACK:
1547 case OP_ASSERTBACK_NOT:
1548 case OP_ONCE:
1549 case OP_ONCE_NC:
1550 case OP_BRAPOS:
1551 case OP_SBRA:
1552 case OP_SBRAPOS:
1553 case OP_SCOND:
1554 private_data_length++;
1555 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1556 cc += 1 + LINK_SIZE;
1557 break;
1558
1559 case OP_CBRA:
1560 case OP_SCBRA:
1561 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1562 private_data_length++;
1563 cc += 1 + LINK_SIZE + IMM2_SIZE;
1564 break;
1565
1566 case OP_CBRAPOS:
1567 case OP_SCBRAPOS:
1568 private_data_length += 2;
1569 cc += 1 + LINK_SIZE + IMM2_SIZE;
1570 break;
1571
1572 case OP_COND:
1573 /* Might be a hidden SCOND. */
1574 alternative = cc + GET(cc, 1);
1575 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1576 private_data_length++;
1577 cc += 1 + LINK_SIZE;
1578 break;
1579
1580 CASE_ITERATOR_PRIVATE_DATA_1
1581 if (PRIVATE_DATA(cc))
1582 private_data_length++;
1583 cc += 2;
1584 #ifdef SUPPORT_UTF
1585 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1586 #endif
1587 break;
1588
1589 CASE_ITERATOR_PRIVATE_DATA_2A
1590 if (PRIVATE_DATA(cc))
1591 private_data_length += 2;
1592 cc += 2;
1593 #ifdef SUPPORT_UTF
1594 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1595 #endif
1596 break;
1597
1598 CASE_ITERATOR_PRIVATE_DATA_2B
1599 if (PRIVATE_DATA(cc))
1600 private_data_length += 2;
1601 cc += 2 + IMM2_SIZE;
1602 #ifdef SUPPORT_UTF
1603 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1604 #endif
1605 break;
1606
1607 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1608 if (PRIVATE_DATA(cc))
1609 private_data_length++;
1610 cc += 1;
1611 break;
1612
1613 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1614 if (PRIVATE_DATA(cc))
1615 private_data_length += 2;
1616 cc += 1;
1617 break;
1618
1619 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1620 if (PRIVATE_DATA(cc))
1621 private_data_length += 2;
1622 cc += 1 + IMM2_SIZE;
1623 break;
1624
1625 case OP_CLASS:
1626 case OP_NCLASS:
1627 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1628 case OP_XCLASS:
1629 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1630 #else
1631 size = 1 + 32 / (int)sizeof(pcre_uchar);
1632 #endif
1633 if (PRIVATE_DATA(cc))
1634 private_data_length += get_class_iterator_size(cc + size);
1635 cc += size;
1636 break;
1637
1638 default:
1639 cc = next_opcode(common, cc);
1640 SLJIT_ASSERT(cc != NULL);
1641 break;
1642 }
1643 }
1644 SLJIT_ASSERT(cc == ccend);
1645 return private_data_length;
1646 }
1647
copy_private_data(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL save,int stackptr,int stacktop,BOOL needs_control_head)1648 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1649 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1650 {
1651 DEFINE_COMPILER;
1652 int srcw[2];
1653 int count, size;
1654 BOOL tmp1next = TRUE;
1655 BOOL tmp1empty = TRUE;
1656 BOOL tmp2empty = TRUE;
1657 pcre_uchar *alternative;
1658 enum {
1659 start,
1660 loop,
1661 end
1662 } status;
1663
1664 status = save ? start : loop;
1665 stackptr = STACK(stackptr - 2);
1666 stacktop = STACK(stacktop - 1);
1667
1668 if (!save)
1669 {
1670 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1671 if (stackptr < stacktop)
1672 {
1673 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1674 stackptr += sizeof(sljit_sw);
1675 tmp1empty = FALSE;
1676 }
1677 if (stackptr < stacktop)
1678 {
1679 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1680 stackptr += sizeof(sljit_sw);
1681 tmp2empty = FALSE;
1682 }
1683 /* The tmp1next must be TRUE in either way. */
1684 }
1685
1686 do
1687 {
1688 count = 0;
1689 switch(status)
1690 {
1691 case start:
1692 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1693 count = 1;
1694 srcw[0] = common->recursive_head_ptr;
1695 if (needs_control_head)
1696 {
1697 SLJIT_ASSERT(common->control_head_ptr != 0);
1698 count = 2;
1699 srcw[1] = common->control_head_ptr;
1700 }
1701 status = loop;
1702 break;
1703
1704 case loop:
1705 if (cc >= ccend)
1706 {
1707 status = end;
1708 break;
1709 }
1710
1711 switch(*cc)
1712 {
1713 case OP_KET:
1714 if (PRIVATE_DATA(cc) != 0)
1715 {
1716 count = 1;
1717 srcw[0] = PRIVATE_DATA(cc);
1718 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1719 cc += PRIVATE_DATA(cc + 1);
1720 }
1721 cc += 1 + LINK_SIZE;
1722 break;
1723
1724 case OP_ASSERT:
1725 case OP_ASSERT_NOT:
1726 case OP_ASSERTBACK:
1727 case OP_ASSERTBACK_NOT:
1728 case OP_ONCE:
1729 case OP_ONCE_NC:
1730 case OP_BRAPOS:
1731 case OP_SBRA:
1732 case OP_SBRAPOS:
1733 case OP_SCOND:
1734 count = 1;
1735 srcw[0] = PRIVATE_DATA(cc);
1736 SLJIT_ASSERT(srcw[0] != 0);
1737 cc += 1 + LINK_SIZE;
1738 break;
1739
1740 case OP_CBRA:
1741 case OP_SCBRA:
1742 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1743 {
1744 count = 1;
1745 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1746 }
1747 cc += 1 + LINK_SIZE + IMM2_SIZE;
1748 break;
1749
1750 case OP_CBRAPOS:
1751 case OP_SCBRAPOS:
1752 count = 2;
1753 srcw[0] = PRIVATE_DATA(cc);
1754 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1755 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1756 cc += 1 + LINK_SIZE + IMM2_SIZE;
1757 break;
1758
1759 case OP_COND:
1760 /* Might be a hidden SCOND. */
1761 alternative = cc + GET(cc, 1);
1762 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1763 {
1764 count = 1;
1765 srcw[0] = PRIVATE_DATA(cc);
1766 SLJIT_ASSERT(srcw[0] != 0);
1767 }
1768 cc += 1 + LINK_SIZE;
1769 break;
1770
1771 CASE_ITERATOR_PRIVATE_DATA_1
1772 if (PRIVATE_DATA(cc))
1773 {
1774 count = 1;
1775 srcw[0] = PRIVATE_DATA(cc);
1776 }
1777 cc += 2;
1778 #ifdef SUPPORT_UTF
1779 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1780 #endif
1781 break;
1782
1783 CASE_ITERATOR_PRIVATE_DATA_2A
1784 if (PRIVATE_DATA(cc))
1785 {
1786 count = 2;
1787 srcw[0] = PRIVATE_DATA(cc);
1788 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1789 }
1790 cc += 2;
1791 #ifdef SUPPORT_UTF
1792 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1793 #endif
1794 break;
1795
1796 CASE_ITERATOR_PRIVATE_DATA_2B
1797 if (PRIVATE_DATA(cc))
1798 {
1799 count = 2;
1800 srcw[0] = PRIVATE_DATA(cc);
1801 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1802 }
1803 cc += 2 + IMM2_SIZE;
1804 #ifdef SUPPORT_UTF
1805 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1806 #endif
1807 break;
1808
1809 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1810 if (PRIVATE_DATA(cc))
1811 {
1812 count = 1;
1813 srcw[0] = PRIVATE_DATA(cc);
1814 }
1815 cc += 1;
1816 break;
1817
1818 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1819 if (PRIVATE_DATA(cc))
1820 {
1821 count = 2;
1822 srcw[0] = PRIVATE_DATA(cc);
1823 srcw[1] = srcw[0] + sizeof(sljit_sw);
1824 }
1825 cc += 1;
1826 break;
1827
1828 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1829 if (PRIVATE_DATA(cc))
1830 {
1831 count = 2;
1832 srcw[0] = PRIVATE_DATA(cc);
1833 srcw[1] = srcw[0] + sizeof(sljit_sw);
1834 }
1835 cc += 1 + IMM2_SIZE;
1836 break;
1837
1838 case OP_CLASS:
1839 case OP_NCLASS:
1840 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1841 case OP_XCLASS:
1842 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1843 #else
1844 size = 1 + 32 / (int)sizeof(pcre_uchar);
1845 #endif
1846 if (PRIVATE_DATA(cc))
1847 switch(get_class_iterator_size(cc + size))
1848 {
1849 case 1:
1850 count = 1;
1851 srcw[0] = PRIVATE_DATA(cc);
1852 break;
1853
1854 case 2:
1855 count = 2;
1856 srcw[0] = PRIVATE_DATA(cc);
1857 srcw[1] = srcw[0] + sizeof(sljit_sw);
1858 break;
1859
1860 default:
1861 SLJIT_ASSERT_STOP();
1862 break;
1863 }
1864 cc += size;
1865 break;
1866
1867 default:
1868 cc = next_opcode(common, cc);
1869 SLJIT_ASSERT(cc != NULL);
1870 break;
1871 }
1872 break;
1873
1874 case end:
1875 SLJIT_ASSERT_STOP();
1876 break;
1877 }
1878
1879 while (count > 0)
1880 {
1881 count--;
1882 if (save)
1883 {
1884 if (tmp1next)
1885 {
1886 if (!tmp1empty)
1887 {
1888 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1889 stackptr += sizeof(sljit_sw);
1890 }
1891 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1892 tmp1empty = FALSE;
1893 tmp1next = FALSE;
1894 }
1895 else
1896 {
1897 if (!tmp2empty)
1898 {
1899 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1900 stackptr += sizeof(sljit_sw);
1901 }
1902 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1903 tmp2empty = FALSE;
1904 tmp1next = TRUE;
1905 }
1906 }
1907 else
1908 {
1909 if (tmp1next)
1910 {
1911 SLJIT_ASSERT(!tmp1empty);
1912 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
1913 tmp1empty = stackptr >= stacktop;
1914 if (!tmp1empty)
1915 {
1916 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1917 stackptr += sizeof(sljit_sw);
1918 }
1919 tmp1next = FALSE;
1920 }
1921 else
1922 {
1923 SLJIT_ASSERT(!tmp2empty);
1924 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
1925 tmp2empty = stackptr >= stacktop;
1926 if (!tmp2empty)
1927 {
1928 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1929 stackptr += sizeof(sljit_sw);
1930 }
1931 tmp1next = TRUE;
1932 }
1933 }
1934 }
1935 }
1936 while (status != end);
1937
1938 if (save)
1939 {
1940 if (tmp1next)
1941 {
1942 if (!tmp1empty)
1943 {
1944 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1945 stackptr += sizeof(sljit_sw);
1946 }
1947 if (!tmp2empty)
1948 {
1949 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1950 stackptr += sizeof(sljit_sw);
1951 }
1952 }
1953 else
1954 {
1955 if (!tmp2empty)
1956 {
1957 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1958 stackptr += sizeof(sljit_sw);
1959 }
1960 if (!tmp1empty)
1961 {
1962 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1963 stackptr += sizeof(sljit_sw);
1964 }
1965 }
1966 }
1967 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1968 }
1969
set_then_offsets(compiler_common * common,pcre_uchar * cc,pcre_uint8 * current_offset)1970 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1971 {
1972 pcre_uchar *end = bracketend(cc);
1973 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1974
1975 /* Assert captures then. */
1976 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1977 current_offset = NULL;
1978 /* Conditional block does not. */
1979 if (*cc == OP_COND || *cc == OP_SCOND)
1980 has_alternatives = FALSE;
1981
1982 cc = next_opcode(common, cc);
1983 if (has_alternatives)
1984 current_offset = common->then_offsets + (cc - common->start);
1985
1986 while (cc < end)
1987 {
1988 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1989 cc = set_then_offsets(common, cc, current_offset);
1990 else
1991 {
1992 if (*cc == OP_ALT && has_alternatives)
1993 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1994 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1995 *current_offset = 1;
1996 cc = next_opcode(common, cc);
1997 }
1998 }
1999
2000 return end;
2001 }
2002
2003 #undef CASE_ITERATOR_PRIVATE_DATA_1
2004 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2005 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2006 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2007 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2008 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2009
is_powerof2(unsigned int value)2010 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2011 {
2012 return (value & (value - 1)) == 0;
2013 }
2014
set_jumps(jump_list * list,struct sljit_label * label)2015 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2016 {
2017 while (list)
2018 {
2019 /* sljit_set_label is clever enough to do nothing
2020 if either the jump or the label is NULL. */
2021 SET_LABEL(list->jump, label);
2022 list = list->next;
2023 }
2024 }
2025
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)2026 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2027 {
2028 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2029 if (list_item)
2030 {
2031 list_item->next = *list;
2032 list_item->jump = jump;
2033 *list = list_item;
2034 }
2035 }
2036
add_stub(compiler_common * common,struct sljit_jump * start)2037 static void add_stub(compiler_common *common, struct sljit_jump *start)
2038 {
2039 DEFINE_COMPILER;
2040 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2041
2042 if (list_item)
2043 {
2044 list_item->start = start;
2045 list_item->quit = LABEL();
2046 list_item->next = common->stubs;
2047 common->stubs = list_item;
2048 }
2049 }
2050
flush_stubs(compiler_common * common)2051 static void flush_stubs(compiler_common *common)
2052 {
2053 DEFINE_COMPILER;
2054 stub_list *list_item = common->stubs;
2055
2056 while (list_item)
2057 {
2058 JUMPHERE(list_item->start);
2059 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2060 JUMPTO(SLJIT_JUMP, list_item->quit);
2061 list_item = list_item->next;
2062 }
2063 common->stubs = NULL;
2064 }
2065
add_label_addr(compiler_common * common,sljit_uw * update_addr)2066 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2067 {
2068 DEFINE_COMPILER;
2069 label_addr_list *label_addr;
2070
2071 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2072 if (label_addr == NULL)
2073 return;
2074 label_addr->label = LABEL();
2075 label_addr->update_addr = update_addr;
2076 label_addr->next = common->label_addrs;
2077 common->label_addrs = label_addr;
2078 }
2079
count_match(compiler_common * common)2080 static SLJIT_INLINE void count_match(compiler_common *common)
2081 {
2082 DEFINE_COMPILER;
2083
2084 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2085 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2086 }
2087
allocate_stack(compiler_common * common,int size)2088 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2089 {
2090 /* May destroy all locals and registers except TMP2. */
2091 DEFINE_COMPILER;
2092
2093 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2094 #ifdef DESTROY_REGISTERS
2095 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2096 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2097 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2098 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2099 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2100 #endif
2101 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2102 }
2103
free_stack(compiler_common * common,int size)2104 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2105 {
2106 DEFINE_COMPILER;
2107 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2108 }
2109
allocate_read_only_data(compiler_common * common,sljit_uw size)2110 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2111 {
2112 DEFINE_COMPILER;
2113 sljit_uw *result;
2114
2115 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2116 return NULL;
2117
2118 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2119 if (SLJIT_UNLIKELY(result == NULL))
2120 {
2121 sljit_set_compiler_memory_error(compiler);
2122 return NULL;
2123 }
2124
2125 *(void**)result = common->read_only_data_head;
2126 common->read_only_data_head = (void *)result;
2127 return result + 1;
2128 }
2129
free_read_only_data(void * current,void * allocator_data)2130 static void free_read_only_data(void *current, void *allocator_data)
2131 {
2132 void *next;
2133
2134 SLJIT_UNUSED_ARG(allocator_data);
2135
2136 while (current != NULL)
2137 {
2138 next = *(void**)current;
2139 SLJIT_FREE(current, allocator_data);
2140 current = next;
2141 }
2142 }
2143
reset_ovector(compiler_common * common,int length)2144 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2145 {
2146 DEFINE_COMPILER;
2147 struct sljit_label *loop;
2148 int i;
2149
2150 /* At this point we can freely use all temporary registers. */
2151 SLJIT_ASSERT(length > 1);
2152 /* TMP1 returns with begin - 1. */
2153 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2154 if (length < 8)
2155 {
2156 for (i = 1; i < length; i++)
2157 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2158 }
2159 else
2160 {
2161 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2162 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2163 loop = LABEL();
2164 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2165 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2166 JUMPTO(SLJIT_NOT_ZERO, loop);
2167 }
2168 }
2169
do_reset_match(compiler_common * common,int length)2170 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2171 {
2172 DEFINE_COMPILER;
2173 struct sljit_label *loop;
2174 int i;
2175
2176 SLJIT_ASSERT(length > 1);
2177 /* OVECTOR(1) contains the "string begin - 1" constant. */
2178 if (length > 2)
2179 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2180 if (length < 8)
2181 {
2182 for (i = 2; i < length; i++)
2183 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2184 }
2185 else
2186 {
2187 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2188 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2189 loop = LABEL();
2190 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2191 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2192 JUMPTO(SLJIT_NOT_ZERO, loop);
2193 }
2194
2195 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2196 if (common->mark_ptr != 0)
2197 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2198 if (common->control_head_ptr != 0)
2199 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2200 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2201 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2202 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2203 }
2204
do_search_mark(sljit_sw * current,const pcre_uchar * skip_arg)2205 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2206 {
2207 while (current != NULL)
2208 {
2209 switch (current[-2])
2210 {
2211 case type_then_trap:
2212 break;
2213
2214 case type_mark:
2215 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2216 return current[-4];
2217 break;
2218
2219 default:
2220 SLJIT_ASSERT_STOP();
2221 break;
2222 }
2223 current = (sljit_sw*)current[-1];
2224 }
2225 return -1;
2226 }
2227
copy_ovector(compiler_common * common,int topbracket)2228 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2229 {
2230 DEFINE_COMPILER;
2231 struct sljit_label *loop;
2232 struct sljit_jump *early_quit;
2233
2234 /* At this point we can freely use all registers. */
2235 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2236 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2237
2238 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2239 if (common->mark_ptr != 0)
2240 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2241 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2242 if (common->mark_ptr != 0)
2243 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2244 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2245 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2246 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2247 /* Unlikely, but possible */
2248 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2249 loop = LABEL();
2250 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2251 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2252 /* Copy the integer value to the output buffer */
2253 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2254 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2255 #endif
2256 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2257 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2258 JUMPTO(SLJIT_NOT_ZERO, loop);
2259 JUMPHERE(early_quit);
2260
2261 /* Calculate the return value, which is the maximum ovector value. */
2262 if (topbracket > 1)
2263 {
2264 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2265 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2266
2267 /* OVECTOR(0) is never equal to SLJIT_S2. */
2268 loop = LABEL();
2269 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2270 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2271 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2272 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2273 }
2274 else
2275 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2276 }
2277
return_with_partial_match(compiler_common * common,struct sljit_label * quit)2278 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2279 {
2280 DEFINE_COMPILER;
2281 struct sljit_jump *jump;
2282
2283 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2284 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2285 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2286
2287 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2288 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2289 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2290 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2291
2292 /* Store match begin and end. */
2293 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2294 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2295
2296 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2297 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2298 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2299 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2300 #endif
2301 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2302 JUMPHERE(jump);
2303
2304 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2305 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2306 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2307 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2308 #endif
2309 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2310
2311 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2312 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2313 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2314 #endif
2315 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2316
2317 JUMPTO(SLJIT_JUMP, quit);
2318 }
2319
check_start_used_ptr(compiler_common * common)2320 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2321 {
2322 /* May destroy TMP1. */
2323 DEFINE_COMPILER;
2324 struct sljit_jump *jump;
2325
2326 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2327 {
2328 /* The value of -1 must be kept for start_used_ptr! */
2329 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2330 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2331 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2332 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2333 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2334 JUMPHERE(jump);
2335 }
2336 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2337 {
2338 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2339 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2340 JUMPHERE(jump);
2341 }
2342 }
2343
char_has_othercase(compiler_common * common,pcre_uchar * cc)2344 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2345 {
2346 /* Detects if the character has an othercase. */
2347 unsigned int c;
2348
2349 #ifdef SUPPORT_UTF
2350 if (common->utf)
2351 {
2352 GETCHAR(c, cc);
2353 if (c > 127)
2354 {
2355 #ifdef SUPPORT_UCP
2356 return c != UCD_OTHERCASE(c);
2357 #else
2358 return FALSE;
2359 #endif
2360 }
2361 #ifndef COMPILE_PCRE8
2362 return common->fcc[c] != c;
2363 #endif
2364 }
2365 else
2366 #endif
2367 c = *cc;
2368 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2369 }
2370
char_othercase(compiler_common * common,unsigned int c)2371 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2372 {
2373 /* Returns with the othercase. */
2374 #ifdef SUPPORT_UTF
2375 if (common->utf && c > 127)
2376 {
2377 #ifdef SUPPORT_UCP
2378 return UCD_OTHERCASE(c);
2379 #else
2380 return c;
2381 #endif
2382 }
2383 #endif
2384 return TABLE_GET(c, common->fcc, c);
2385 }
2386
char_get_othercase_bit(compiler_common * common,pcre_uchar * cc)2387 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2388 {
2389 /* Detects if the character and its othercase has only 1 bit difference. */
2390 unsigned int c, oc, bit;
2391 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2392 int n;
2393 #endif
2394
2395 #ifdef SUPPORT_UTF
2396 if (common->utf)
2397 {
2398 GETCHAR(c, cc);
2399 if (c <= 127)
2400 oc = common->fcc[c];
2401 else
2402 {
2403 #ifdef SUPPORT_UCP
2404 oc = UCD_OTHERCASE(c);
2405 #else
2406 oc = c;
2407 #endif
2408 }
2409 }
2410 else
2411 {
2412 c = *cc;
2413 oc = TABLE_GET(c, common->fcc, c);
2414 }
2415 #else
2416 c = *cc;
2417 oc = TABLE_GET(c, common->fcc, c);
2418 #endif
2419
2420 SLJIT_ASSERT(c != oc);
2421
2422 bit = c ^ oc;
2423 /* Optimized for English alphabet. */
2424 if (c <= 127 && bit == 0x20)
2425 return (0 << 8) | 0x20;
2426
2427 /* Since c != oc, they must have at least 1 bit difference. */
2428 if (!is_powerof2(bit))
2429 return 0;
2430
2431 #if defined COMPILE_PCRE8
2432
2433 #ifdef SUPPORT_UTF
2434 if (common->utf && c > 127)
2435 {
2436 n = GET_EXTRALEN(*cc);
2437 while ((bit & 0x3f) == 0)
2438 {
2439 n--;
2440 bit >>= 6;
2441 }
2442 return (n << 8) | bit;
2443 }
2444 #endif /* SUPPORT_UTF */
2445 return (0 << 8) | bit;
2446
2447 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2448
2449 #ifdef SUPPORT_UTF
2450 if (common->utf && c > 65535)
2451 {
2452 if (bit >= (1 << 10))
2453 bit >>= 10;
2454 else
2455 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2456 }
2457 #endif /* SUPPORT_UTF */
2458 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2459
2460 #endif /* COMPILE_PCRE[8|16|32] */
2461 }
2462
check_partial(compiler_common * common,BOOL force)2463 static void check_partial(compiler_common *common, BOOL force)
2464 {
2465 /* Checks whether a partial matching is occurred. Does not modify registers. */
2466 DEFINE_COMPILER;
2467 struct sljit_jump *jump = NULL;
2468
2469 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2470
2471 if (common->mode == JIT_COMPILE)
2472 return;
2473
2474 if (!force)
2475 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2476 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2477 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2478
2479 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2480 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2481 else
2482 {
2483 if (common->partialmatchlabel != NULL)
2484 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2485 else
2486 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2487 }
2488
2489 if (jump != NULL)
2490 JUMPHERE(jump);
2491 }
2492
check_str_end(compiler_common * common,jump_list ** end_reached)2493 static void check_str_end(compiler_common *common, jump_list **end_reached)
2494 {
2495 /* Does not affect registers. Usually used in a tight spot. */
2496 DEFINE_COMPILER;
2497 struct sljit_jump *jump;
2498
2499 if (common->mode == JIT_COMPILE)
2500 {
2501 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2502 return;
2503 }
2504
2505 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2506 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2507 {
2508 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2509 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2510 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2511 }
2512 else
2513 {
2514 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2515 if (common->partialmatchlabel != NULL)
2516 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2517 else
2518 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2519 }
2520 JUMPHERE(jump);
2521 }
2522
detect_partial_match(compiler_common * common,jump_list ** backtracks)2523 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2524 {
2525 DEFINE_COMPILER;
2526 struct sljit_jump *jump;
2527
2528 if (common->mode == JIT_COMPILE)
2529 {
2530 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2531 return;
2532 }
2533
2534 /* Partial matching mode. */
2535 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2536 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2537 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2538 {
2539 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2540 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2541 }
2542 else
2543 {
2544 if (common->partialmatchlabel != NULL)
2545 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2546 else
2547 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2548 }
2549 JUMPHERE(jump);
2550 }
2551
peek_char(compiler_common * common,pcre_uint32 max)2552 static void peek_char(compiler_common *common, pcre_uint32 max)
2553 {
2554 /* Reads the character into TMP1, keeps STR_PTR.
2555 Does not check STR_END. TMP2 Destroyed. */
2556 DEFINE_COMPILER;
2557 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2558 struct sljit_jump *jump;
2559 #endif
2560
2561 SLJIT_UNUSED_ARG(max);
2562
2563 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2564 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2565 if (common->utf)
2566 {
2567 if (max < 128) return;
2568
2569 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2570 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2571 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2572 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2573 JUMPHERE(jump);
2574 }
2575 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2576
2577 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2578 if (common->utf)
2579 {
2580 if (max < 0xd800) return;
2581
2582 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2583 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2584 /* TMP2 contains the high surrogate. */
2585 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2586 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2587 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2588 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2589 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2590 JUMPHERE(jump);
2591 }
2592 #endif
2593 }
2594
2595 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2596
is_char7_bitset(const pcre_uint8 * bitset,BOOL nclass)2597 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2598 {
2599 /* Tells whether the character codes below 128 are enough
2600 to determine a match. */
2601 const pcre_uint8 value = nclass ? 0xff : 0;
2602 const pcre_uint8 *end = bitset + 32;
2603
2604 bitset += 16;
2605 do
2606 {
2607 if (*bitset++ != value)
2608 return FALSE;
2609 }
2610 while (bitset < end);
2611 return TRUE;
2612 }
2613
read_char7_type(compiler_common * common,BOOL full_read)2614 static void read_char7_type(compiler_common *common, BOOL full_read)
2615 {
2616 /* Reads the precise character type of a character into TMP1, if the character
2617 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2618 full_read argument tells whether characters above max are accepted or not. */
2619 DEFINE_COMPILER;
2620 struct sljit_jump *jump;
2621
2622 SLJIT_ASSERT(common->utf);
2623
2624 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2625 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2626
2627 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2628
2629 if (full_read)
2630 {
2631 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2632 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2633 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2634 JUMPHERE(jump);
2635 }
2636 }
2637
2638 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2639
read_char_range(compiler_common * common,pcre_uint32 min,pcre_uint32 max,BOOL update_str_ptr)2640 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2641 {
2642 /* Reads the precise value of a character into TMP1, if the character is
2643 between min and max (c >= min && c <= max). Otherwise it returns with a value
2644 outside the range. Does not check STR_END. */
2645 DEFINE_COMPILER;
2646 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2647 struct sljit_jump *jump;
2648 #endif
2649 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2650 struct sljit_jump *jump2;
2651 #endif
2652
2653 SLJIT_UNUSED_ARG(update_str_ptr);
2654 SLJIT_UNUSED_ARG(min);
2655 SLJIT_UNUSED_ARG(max);
2656 SLJIT_ASSERT(min <= max);
2657
2658 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2659 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2660
2661 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2662 if (common->utf)
2663 {
2664 if (max < 128 && !update_str_ptr) return;
2665
2666 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2667 if (min >= 0x10000)
2668 {
2669 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2670 if (update_str_ptr)
2671 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2672 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2673 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2674 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2675 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2676 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2677 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2678 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2679 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2680 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2681 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2682 if (!update_str_ptr)
2683 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2684 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2685 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2686 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2687 JUMPHERE(jump2);
2688 if (update_str_ptr)
2689 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2690 }
2691 else if (min >= 0x800 && max <= 0xffff)
2692 {
2693 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2694 if (update_str_ptr)
2695 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2696 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2697 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2698 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2699 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2700 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2701 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2702 if (!update_str_ptr)
2703 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2704 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2705 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2706 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2707 JUMPHERE(jump2);
2708 if (update_str_ptr)
2709 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2710 }
2711 else if (max >= 0x800)
2712 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2713 else if (max < 128)
2714 {
2715 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2716 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2717 }
2718 else
2719 {
2720 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2721 if (!update_str_ptr)
2722 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2723 else
2724 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2725 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2726 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2727 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2728 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2729 if (update_str_ptr)
2730 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2731 }
2732 JUMPHERE(jump);
2733 }
2734 #endif
2735
2736 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2737 if (common->utf)
2738 {
2739 if (max >= 0x10000)
2740 {
2741 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2742 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2743 /* TMP2 contains the high surrogate. */
2744 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2745 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2746 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2747 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2748 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2749 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2750 JUMPHERE(jump);
2751 return;
2752 }
2753
2754 if (max < 0xd800 && !update_str_ptr) return;
2755
2756 /* Skip low surrogate if necessary. */
2757 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2758 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2759 if (update_str_ptr)
2760 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2761 if (max >= 0xd800)
2762 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2763 JUMPHERE(jump);
2764 }
2765 #endif
2766 }
2767
read_char(compiler_common * common)2768 static SLJIT_INLINE void read_char(compiler_common *common)
2769 {
2770 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2771 }
2772
read_char8_type(compiler_common * common,BOOL update_str_ptr)2773 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2774 {
2775 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2776 DEFINE_COMPILER;
2777 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2778 struct sljit_jump *jump;
2779 #endif
2780 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2781 struct sljit_jump *jump2;
2782 #endif
2783
2784 SLJIT_UNUSED_ARG(update_str_ptr);
2785
2786 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2787 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2788
2789 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2790 if (common->utf)
2791 {
2792 /* This can be an extra read in some situations, but hopefully
2793 it is needed in most cases. */
2794 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2795 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2796 if (!update_str_ptr)
2797 {
2798 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2799 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2800 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2801 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2802 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2803 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2804 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2805 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2806 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2807 JUMPHERE(jump2);
2808 }
2809 else
2810 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2811 JUMPHERE(jump);
2812 return;
2813 }
2814 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2815
2816 #if !defined COMPILE_PCRE8
2817 /* The ctypes array contains only 256 values. */
2818 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2819 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2820 #endif
2821 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2822 #if !defined COMPILE_PCRE8
2823 JUMPHERE(jump);
2824 #endif
2825
2826 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2827 if (common->utf && update_str_ptr)
2828 {
2829 /* Skip low surrogate if necessary. */
2830 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2831 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2832 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2833 JUMPHERE(jump);
2834 }
2835 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2836 }
2837
skip_char_back(compiler_common * common)2838 static void skip_char_back(compiler_common *common)
2839 {
2840 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2841 DEFINE_COMPILER;
2842 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2843 #if defined COMPILE_PCRE8
2844 struct sljit_label *label;
2845
2846 if (common->utf)
2847 {
2848 label = LABEL();
2849 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2850 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2851 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2852 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2853 return;
2854 }
2855 #elif defined COMPILE_PCRE16
2856 if (common->utf)
2857 {
2858 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2859 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2860 /* Skip low surrogate if necessary. */
2861 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2862 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2863 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
2864 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2865 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2866 return;
2867 }
2868 #endif /* COMPILE_PCRE[8|16] */
2869 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2870 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2871 }
2872
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)2873 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2874 {
2875 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2876 DEFINE_COMPILER;
2877 struct sljit_jump *jump;
2878
2879 if (nltype == NLTYPE_ANY)
2880 {
2881 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2882 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
2883 }
2884 else if (nltype == NLTYPE_ANYCRLF)
2885 {
2886 if (jumpifmatch)
2887 {
2888 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2889 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2890 }
2891 else
2892 {
2893 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2894 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2895 JUMPHERE(jump);
2896 }
2897 }
2898 else
2899 {
2900 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2901 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2902 }
2903 }
2904
2905 #ifdef SUPPORT_UTF
2906
2907 #if defined COMPILE_PCRE8
do_utfreadchar(compiler_common * common)2908 static void do_utfreadchar(compiler_common *common)
2909 {
2910 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2911 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2912 DEFINE_COMPILER;
2913 struct sljit_jump *jump;
2914
2915 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2916 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2917 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2918 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2919 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2920 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2921
2922 /* Searching for the first zero. */
2923 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2924 jump = JUMP(SLJIT_NOT_ZERO);
2925 /* Two byte sequence. */
2926 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2927 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2928 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2929
2930 JUMPHERE(jump);
2931 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2932 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2933 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2934 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2935 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2936
2937 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2938 jump = JUMP(SLJIT_NOT_ZERO);
2939 /* Three byte sequence. */
2940 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2941 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2942 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2943
2944 /* Four byte sequence. */
2945 JUMPHERE(jump);
2946 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2947 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2948 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2949 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2950 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2951 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2952 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2953 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2954 }
2955
do_utfreadchar16(compiler_common * common)2956 static void do_utfreadchar16(compiler_common *common)
2957 {
2958 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2959 of the character (>= 0xc0). Return value in TMP1. */
2960 DEFINE_COMPILER;
2961 struct sljit_jump *jump;
2962
2963 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2964 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2965 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2966 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2967 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2968 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2969
2970 /* Searching for the first zero. */
2971 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2972 jump = JUMP(SLJIT_NOT_ZERO);
2973 /* Two byte sequence. */
2974 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2975 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2976
2977 JUMPHERE(jump);
2978 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2979 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
2980 /* This code runs only in 8 bit mode. No need to shift the value. */
2981 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2982 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2983 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2984 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2985 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2986 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2987 /* Three byte sequence. */
2988 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2989 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2990 }
2991
do_utfreadtype8(compiler_common * common)2992 static void do_utfreadtype8(compiler_common *common)
2993 {
2994 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2995 of the character (>= 0xc0). Return value in TMP1. */
2996 DEFINE_COMPILER;
2997 struct sljit_jump *jump;
2998 struct sljit_jump *compare;
2999
3000 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3001
3002 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3003 jump = JUMP(SLJIT_NOT_ZERO);
3004 /* Two byte sequence. */
3005 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3006 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3007 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3008 /* The upper 5 bits are known at this point. */
3009 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3010 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3011 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3012 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3013 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3014 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3015
3016 JUMPHERE(compare);
3017 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3018 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3019
3020 /* We only have types for characters less than 256. */
3021 JUMPHERE(jump);
3022 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3023 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3024 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3025 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3026 }
3027
3028 #endif /* COMPILE_PCRE8 */
3029
3030 #endif /* SUPPORT_UTF */
3031
3032 #ifdef SUPPORT_UCP
3033
3034 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3035 #define UCD_BLOCK_MASK 127
3036 #define UCD_BLOCK_SHIFT 7
3037
do_getucd(compiler_common * common)3038 static void do_getucd(compiler_common *common)
3039 {
3040 /* Search the UCD record for the character comes in TMP1.
3041 Returns chartype in TMP1 and UCD offset in TMP2. */
3042 DEFINE_COMPILER;
3043
3044 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3045
3046 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3047 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3048 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3049 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3050 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3051 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3052 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3053 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3054 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3055 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3056 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3057 }
3058 #endif
3059
mainloop_entry(compiler_common * common,BOOL hascrorlf,BOOL firstline)3060 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3061 {
3062 DEFINE_COMPILER;
3063 struct sljit_label *mainloop;
3064 struct sljit_label *newlinelabel = NULL;
3065 struct sljit_jump *start;
3066 struct sljit_jump *end = NULL;
3067 struct sljit_jump *nl = NULL;
3068 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3069 struct sljit_jump *singlechar;
3070 #endif
3071 jump_list *newline = NULL;
3072 BOOL newlinecheck = FALSE;
3073 BOOL readuchar = FALSE;
3074
3075 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3076 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3077 newlinecheck = TRUE;
3078
3079 if (firstline)
3080 {
3081 /* Search for the end of the first line. */
3082 SLJIT_ASSERT(common->first_line_end != 0);
3083 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3084
3085 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3086 {
3087 mainloop = LABEL();
3088 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3089 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3090 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3091 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3092 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3093 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3094 JUMPHERE(end);
3095 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3096 }
3097 else
3098 {
3099 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3100 mainloop = LABEL();
3101 /* Continual stores does not cause data dependency. */
3102 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3103 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3104 check_newlinechar(common, common->nltype, &newline, TRUE);
3105 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3106 JUMPHERE(end);
3107 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3108 set_jumps(newline, LABEL());
3109 }
3110
3111 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3112 }
3113
3114 start = JUMP(SLJIT_JUMP);
3115
3116 if (newlinecheck)
3117 {
3118 newlinelabel = LABEL();
3119 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3120 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3121 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3122 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3123 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3124 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3125 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3126 #endif
3127 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3128 nl = JUMP(SLJIT_JUMP);
3129 }
3130
3131 mainloop = LABEL();
3132
3133 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3134 #ifdef SUPPORT_UTF
3135 if (common->utf) readuchar = TRUE;
3136 #endif
3137 if (newlinecheck) readuchar = TRUE;
3138
3139 if (readuchar)
3140 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3141
3142 if (newlinecheck)
3143 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3144
3145 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3146 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3147 #if defined COMPILE_PCRE8
3148 if (common->utf)
3149 {
3150 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3151 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3152 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3153 JUMPHERE(singlechar);
3154 }
3155 #elif defined COMPILE_PCRE16
3156 if (common->utf)
3157 {
3158 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3159 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3160 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3161 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3162 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3163 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3164 JUMPHERE(singlechar);
3165 }
3166 #endif /* COMPILE_PCRE[8|16] */
3167 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3168 JUMPHERE(start);
3169
3170 if (newlinecheck)
3171 {
3172 JUMPHERE(end);
3173 JUMPHERE(nl);
3174 }
3175
3176 return mainloop;
3177 }
3178
3179 #define MAX_N_CHARS 16
3180 #define MAX_N_BYTES 8
3181
add_prefix_byte(pcre_uint8 byte,pcre_uint8 * bytes)3182 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3183 {
3184 pcre_uint8 len = bytes[0];
3185 int i;
3186
3187 if (len == 255)
3188 return;
3189
3190 if (len == 0)
3191 {
3192 bytes[0] = 1;
3193 bytes[1] = byte;
3194 return;
3195 }
3196
3197 for (i = len; i > 0; i--)
3198 if (bytes[i] == byte)
3199 return;
3200
3201 if (len >= MAX_N_BYTES - 1)
3202 {
3203 bytes[0] = 255;
3204 return;
3205 }
3206
3207 len++;
3208 bytes[len] = byte;
3209 bytes[0] = len;
3210 }
3211
scan_prefix(compiler_common * common,pcre_uchar * cc,pcre_uint32 * chars,pcre_uint8 * bytes,int max_chars)3212 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars)
3213 {
3214 /* Recursive function, which scans prefix literals. */
3215 BOOL last, any, caseless;
3216 int len, repeat, len_save, consumed = 0;
3217 pcre_uint32 chr, mask;
3218 pcre_uchar *alternative, *cc_save, *oc;
3219 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3220 pcre_uchar othercase[8];
3221 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3222 pcre_uchar othercase[2];
3223 #else
3224 pcre_uchar othercase[1];
3225 #endif
3226
3227 repeat = 1;
3228 while (TRUE)
3229 {
3230 last = TRUE;
3231 any = FALSE;
3232 caseless = FALSE;
3233 switch (*cc)
3234 {
3235 case OP_CHARI:
3236 caseless = TRUE;
3237 case OP_CHAR:
3238 last = FALSE;
3239 cc++;
3240 break;
3241
3242 case OP_SOD:
3243 case OP_SOM:
3244 case OP_SET_SOM:
3245 case OP_NOT_WORD_BOUNDARY:
3246 case OP_WORD_BOUNDARY:
3247 case OP_EODN:
3248 case OP_EOD:
3249 case OP_CIRC:
3250 case OP_CIRCM:
3251 case OP_DOLL:
3252 case OP_DOLLM:
3253 /* Zero width assertions. */
3254 cc++;
3255 continue;
3256
3257 case OP_ASSERT:
3258 case OP_ASSERT_NOT:
3259 case OP_ASSERTBACK:
3260 case OP_ASSERTBACK_NOT:
3261 cc = bracketend(cc);
3262 continue;
3263
3264 case OP_PLUSI:
3265 case OP_MINPLUSI:
3266 case OP_POSPLUSI:
3267 caseless = TRUE;
3268 case OP_PLUS:
3269 case OP_MINPLUS:
3270 case OP_POSPLUS:
3271 cc++;
3272 break;
3273
3274 case OP_EXACTI:
3275 caseless = TRUE;
3276 case OP_EXACT:
3277 repeat = GET2(cc, 1);
3278 last = FALSE;
3279 cc += 1 + IMM2_SIZE;
3280 break;
3281
3282 case OP_QUERYI:
3283 case OP_MINQUERYI:
3284 case OP_POSQUERYI:
3285 caseless = TRUE;
3286 case OP_QUERY:
3287 case OP_MINQUERY:
3288 case OP_POSQUERY:
3289 len = 1;
3290 cc++;
3291 #ifdef SUPPORT_UTF
3292 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3293 #endif
3294 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars);
3295 if (max_chars == 0)
3296 return consumed;
3297 last = FALSE;
3298 break;
3299
3300 case OP_KET:
3301 cc += 1 + LINK_SIZE;
3302 continue;
3303
3304 case OP_ALT:
3305 cc += GET(cc, 1);
3306 continue;
3307
3308 case OP_ONCE:
3309 case OP_ONCE_NC:
3310 case OP_BRA:
3311 case OP_BRAPOS:
3312 case OP_CBRA:
3313 case OP_CBRAPOS:
3314 alternative = cc + GET(cc, 1);
3315 while (*alternative == OP_ALT)
3316 {
3317 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars);
3318 if (max_chars == 0)
3319 return consumed;
3320 alternative += GET(alternative, 1);
3321 }
3322
3323 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3324 cc += IMM2_SIZE;
3325 cc += 1 + LINK_SIZE;
3326 continue;
3327
3328 case OP_CLASS:
3329 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3330 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3331 #endif
3332 any = TRUE;
3333 cc += 1 + 32 / sizeof(pcre_uchar);
3334 break;
3335
3336 case OP_NCLASS:
3337 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3338 if (common->utf) return consumed;
3339 #endif
3340 any = TRUE;
3341 cc += 1 + 32 / sizeof(pcre_uchar);
3342 break;
3343
3344 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3345 case OP_XCLASS:
3346 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3347 if (common->utf) return consumed;
3348 #endif
3349 any = TRUE;
3350 cc += GET(cc, 1);
3351 break;
3352 #endif
3353
3354 case OP_DIGIT:
3355 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3356 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3357 return consumed;
3358 #endif
3359 any = TRUE;
3360 cc++;
3361 break;
3362
3363 case OP_WHITESPACE:
3364 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3365 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3366 return consumed;
3367 #endif
3368 any = TRUE;
3369 cc++;
3370 break;
3371
3372 case OP_WORDCHAR:
3373 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3374 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3375 return consumed;
3376 #endif
3377 any = TRUE;
3378 cc++;
3379 break;
3380
3381 case OP_NOT:
3382 case OP_NOTI:
3383 cc++;
3384 /* Fall through. */
3385 case OP_NOT_DIGIT:
3386 case OP_NOT_WHITESPACE:
3387 case OP_NOT_WORDCHAR:
3388 case OP_ANY:
3389 case OP_ALLANY:
3390 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3391 if (common->utf) return consumed;
3392 #endif
3393 any = TRUE;
3394 cc++;
3395 break;
3396
3397 #ifdef SUPPORT_UCP
3398 case OP_NOTPROP:
3399 case OP_PROP:
3400 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3401 if (common->utf) return consumed;
3402 #endif
3403 any = TRUE;
3404 cc += 1 + 2;
3405 break;
3406 #endif
3407
3408 case OP_TYPEEXACT:
3409 repeat = GET2(cc, 1);
3410 cc += 1 + IMM2_SIZE;
3411 continue;
3412
3413 case OP_NOTEXACT:
3414 case OP_NOTEXACTI:
3415 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3416 if (common->utf) return consumed;
3417 #endif
3418 any = TRUE;
3419 repeat = GET2(cc, 1);
3420 cc += 1 + IMM2_SIZE + 1;
3421 break;
3422
3423 default:
3424 return consumed;
3425 }
3426
3427 if (any)
3428 {
3429 #if defined COMPILE_PCRE8
3430 mask = 0xff;
3431 #elif defined COMPILE_PCRE16
3432 mask = 0xffff;
3433 #elif defined COMPILE_PCRE32
3434 mask = 0xffffffff;
3435 #else
3436 SLJIT_ASSERT_STOP();
3437 #endif
3438
3439 do
3440 {
3441 chars[0] = mask;
3442 chars[1] = mask;
3443 bytes[0] = 255;
3444
3445 consumed++;
3446 if (--max_chars == 0)
3447 return consumed;
3448 chars += 2;
3449 bytes += MAX_N_BYTES;
3450 }
3451 while (--repeat > 0);
3452
3453 repeat = 1;
3454 continue;
3455 }
3456
3457 len = 1;
3458 #ifdef SUPPORT_UTF
3459 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3460 #endif
3461
3462 if (caseless && char_has_othercase(common, cc))
3463 {
3464 #ifdef SUPPORT_UTF
3465 if (common->utf)
3466 {
3467 GETCHAR(chr, cc);
3468 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3469 return consumed;
3470 }
3471 else
3472 #endif
3473 {
3474 chr = *cc;
3475 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3476 }
3477 }
3478 else
3479 caseless = FALSE;
3480
3481 len_save = len;
3482 cc_save = cc;
3483 while (TRUE)
3484 {
3485 oc = othercase;
3486 do
3487 {
3488 chr = *cc;
3489 #ifdef COMPILE_PCRE32
3490 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3491 return consumed;
3492 #endif
3493 add_prefix_byte((pcre_uint8)chr, bytes);
3494
3495 mask = 0;
3496 if (caseless)
3497 {
3498 add_prefix_byte((pcre_uint8)*oc, bytes);
3499 mask = *cc ^ *oc;
3500 chr |= mask;
3501 }
3502
3503 #ifdef COMPILE_PCRE32
3504 if (chars[0] == NOTACHAR && chars[1] == 0)
3505 #else
3506 if (chars[0] == NOTACHAR)
3507 #endif
3508 {
3509 chars[0] = chr;
3510 chars[1] = mask;
3511 }
3512 else
3513 {
3514 mask |= chars[0] ^ chr;
3515 chr |= mask;
3516 chars[0] = chr;
3517 chars[1] |= mask;
3518 }
3519
3520 len--;
3521 consumed++;
3522 if (--max_chars == 0)
3523 return consumed;
3524 chars += 2;
3525 bytes += MAX_N_BYTES;
3526 cc++;
3527 oc++;
3528 }
3529 while (len > 0);
3530
3531 if (--repeat == 0)
3532 break;
3533
3534 len = len_save;
3535 cc = cc_save;
3536 }
3537
3538 repeat = 1;
3539 if (last)
3540 return consumed;
3541 }
3542 }
3543
fast_forward_first_n_chars(compiler_common * common,BOOL firstline)3544 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3545 {
3546 DEFINE_COMPILER;
3547 struct sljit_label *start;
3548 struct sljit_jump *quit;
3549 pcre_uint32 chars[MAX_N_CHARS * 2];
3550 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3551 pcre_uint8 ones[MAX_N_CHARS];
3552 int offsets[3];
3553 pcre_uint32 mask;
3554 pcre_uint8 *byte_set, *byte_set_end;
3555 int i, max, from;
3556 int range_right = -1, range_len = 3 - 1;
3557 sljit_ub *update_table = NULL;
3558 BOOL in_range;
3559
3560 for (i = 0; i < MAX_N_CHARS; i++)
3561 {
3562 chars[i << 1] = NOTACHAR;
3563 chars[(i << 1) + 1] = 0;
3564 bytes[i * MAX_N_BYTES] = 0;
3565 }
3566
3567 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS);
3568
3569 if (max <= 1)
3570 return FALSE;
3571
3572 for (i = 0; i < max; i++)
3573 {
3574 mask = chars[(i << 1) + 1];
3575 ones[i] = ones_in_half_byte[mask & 0xf];
3576 mask >>= 4;
3577 while (mask != 0)
3578 {
3579 ones[i] += ones_in_half_byte[mask & 0xf];
3580 mask >>= 4;
3581 }
3582 }
3583
3584 in_range = FALSE;
3585 from = 0; /* Prevent compiler "uninitialized" warning */
3586 for (i = 0; i <= max; i++)
3587 {
3588 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3589 {
3590 range_len = i - from;
3591 range_right = i - 1;
3592 }
3593
3594 if (i < max && bytes[i * MAX_N_BYTES] < 255)
3595 {
3596 if (!in_range)
3597 {
3598 in_range = TRUE;
3599 from = i;
3600 }
3601 }
3602 else if (in_range)
3603 in_range = FALSE;
3604 }
3605
3606 if (range_right >= 0)
3607 {
3608 update_table = (sljit_ub *)allocate_read_only_data(common, 256);
3609 if (update_table == NULL)
3610 return TRUE;
3611 memset(update_table, IN_UCHARS(range_len), 256);
3612
3613 for (i = 0; i < range_len; i++)
3614 {
3615 byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3616 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3617 byte_set_end = byte_set + byte_set[0];
3618 byte_set++;
3619 while (byte_set <= byte_set_end)
3620 {
3621 if (update_table[*byte_set] > IN_UCHARS(i))
3622 update_table[*byte_set] = IN_UCHARS(i);
3623 byte_set++;
3624 }
3625 }
3626 }
3627
3628 offsets[0] = -1;
3629 /* Scan forward. */
3630 for (i = 0; i < max; i++)
3631 if (ones[i] <= 2) {
3632 offsets[0] = i;
3633 break;
3634 }
3635
3636 if (offsets[0] < 0 && range_right < 0)
3637 return FALSE;
3638
3639 if (offsets[0] >= 0)
3640 {
3641 /* Scan backward. */
3642 offsets[1] = -1;
3643 for (i = max - 1; i > offsets[0]; i--)
3644 if (ones[i] <= 2 && i != range_right)
3645 {
3646 offsets[1] = i;
3647 break;
3648 }
3649
3650 /* This case is handled better by fast_forward_first_char. */
3651 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3652 return FALSE;
3653
3654 offsets[2] = -1;
3655 /* We only search for a middle character if there is no range check. */
3656 if (offsets[1] >= 0 && range_right == -1)
3657 {
3658 /* Scan from middle. */
3659 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3660 if (ones[i] <= 2)
3661 {
3662 offsets[2] = i;
3663 break;
3664 }
3665
3666 if (offsets[2] == -1)
3667 {
3668 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3669 if (ones[i] <= 2)
3670 {
3671 offsets[2] = i;
3672 break;
3673 }
3674 }
3675 }
3676
3677 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3678 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3679
3680 chars[0] = chars[offsets[0] << 1];
3681 chars[1] = chars[(offsets[0] << 1) + 1];
3682 if (offsets[2] >= 0)
3683 {
3684 chars[2] = chars[offsets[2] << 1];
3685 chars[3] = chars[(offsets[2] << 1) + 1];
3686 }
3687 if (offsets[1] >= 0)
3688 {
3689 chars[4] = chars[offsets[1] << 1];
3690 chars[5] = chars[(offsets[1] << 1) + 1];
3691 }
3692 }
3693
3694 max -= 1;
3695 if (firstline)
3696 {
3697 SLJIT_ASSERT(common->first_line_end != 0);
3698 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3699 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3700 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3701 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
3702 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3703 JUMPHERE(quit);
3704 }
3705 else
3706 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3707
3708 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3709 if (range_right >= 0)
3710 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3711 #endif
3712
3713 start = LABEL();
3714 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3715
3716 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3717
3718 if (range_right >= 0)
3719 {
3720 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3721 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3722 #else
3723 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3724 #endif
3725
3726 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3727 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3728 #else
3729 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3730 #endif
3731 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3732 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3733 }
3734
3735 if (offsets[0] >= 0)
3736 {
3737 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3738 if (offsets[1] >= 0)
3739 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3740 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3741
3742 if (chars[1] != 0)
3743 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3744 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3745 if (offsets[2] >= 0)
3746 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3747
3748 if (offsets[1] >= 0)
3749 {
3750 if (chars[5] != 0)
3751 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3752 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3753 }
3754
3755 if (offsets[2] >= 0)
3756 {
3757 if (chars[3] != 0)
3758 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3759 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3760 }
3761 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3762 }
3763
3764 JUMPHERE(quit);
3765
3766 if (firstline)
3767 {
3768 if (range_right >= 0)
3769 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3770 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3771 if (range_right >= 0)
3772 {
3773 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3774 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3775 JUMPHERE(quit);
3776 }
3777 }
3778 else
3779 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3780 return TRUE;
3781 }
3782
3783 #undef MAX_N_CHARS
3784 #undef MAX_N_BYTES
3785
fast_forward_first_char(compiler_common * common,pcre_uchar first_char,BOOL caseless,BOOL firstline)3786 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3787 {
3788 DEFINE_COMPILER;
3789 struct sljit_label *start;
3790 struct sljit_jump *quit;
3791 struct sljit_jump *found;
3792 pcre_uchar oc, bit;
3793
3794 if (firstline)
3795 {
3796 SLJIT_ASSERT(common->first_line_end != 0);
3797 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3798 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3799 }
3800
3801 start = LABEL();
3802 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3803 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3804
3805 oc = first_char;
3806 if (caseless)
3807 {
3808 oc = TABLE_GET(first_char, common->fcc, first_char);
3809 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3810 if (first_char > 127 && common->utf)
3811 oc = UCD_OTHERCASE(first_char);
3812 #endif
3813 }
3814 if (first_char == oc)
3815 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3816 else
3817 {
3818 bit = first_char ^ oc;
3819 if (is_powerof2(bit))
3820 {
3821 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3822 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3823 }
3824 else
3825 {
3826 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3827 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3828 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3829 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
3830 found = JUMP(SLJIT_NOT_ZERO);
3831 }
3832 }
3833
3834 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3835 JUMPTO(SLJIT_JUMP, start);
3836 JUMPHERE(found);
3837 JUMPHERE(quit);
3838
3839 if (firstline)
3840 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3841 }
3842
fast_forward_newline(compiler_common * common,BOOL firstline)3843 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3844 {
3845 DEFINE_COMPILER;
3846 struct sljit_label *loop;
3847 struct sljit_jump *lastchar;
3848 struct sljit_jump *firstchar;
3849 struct sljit_jump *quit;
3850 struct sljit_jump *foundcr = NULL;
3851 struct sljit_jump *notfoundnl;
3852 jump_list *newline = NULL;
3853
3854 if (firstline)
3855 {
3856 SLJIT_ASSERT(common->first_line_end != 0);
3857 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3858 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3859 }
3860
3861 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3862 {
3863 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3864 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3865 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3866 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3867 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3868
3869 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3870 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3871 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
3872 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3873 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3874 #endif
3875 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3876
3877 loop = LABEL();
3878 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3879 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3880 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3881 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3882 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3883 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3884
3885 JUMPHERE(quit);
3886 JUMPHERE(firstchar);
3887 JUMPHERE(lastchar);
3888
3889 if (firstline)
3890 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3891 return;
3892 }
3893
3894 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3895 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3896 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3897 skip_char_back(common);
3898
3899 loop = LABEL();
3900 common->ff_newline_shortcut = loop;
3901
3902 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3903 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3904 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3905 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3906 check_newlinechar(common, common->nltype, &newline, FALSE);
3907 set_jumps(newline, loop);
3908
3909 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3910 {
3911 quit = JUMP(SLJIT_JUMP);
3912 JUMPHERE(foundcr);
3913 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3914 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3915 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3916 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3917 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3918 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3919 #endif
3920 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3921 JUMPHERE(notfoundnl);
3922 JUMPHERE(quit);
3923 }
3924 JUMPHERE(lastchar);
3925 JUMPHERE(firstchar);
3926
3927 if (firstline)
3928 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3929 }
3930
3931 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3932
fast_forward_start_bits(compiler_common * common,pcre_uint8 * start_bits,BOOL firstline)3933 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3934 {
3935 DEFINE_COMPILER;
3936 struct sljit_label *start;
3937 struct sljit_jump *quit;
3938 struct sljit_jump *found = NULL;
3939 jump_list *matches = NULL;
3940 #ifndef COMPILE_PCRE8
3941 struct sljit_jump *jump;
3942 #endif
3943
3944 if (firstline)
3945 {
3946 SLJIT_ASSERT(common->first_line_end != 0);
3947 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3948 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3949 }
3950
3951 start = LABEL();
3952 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3953 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3954 #ifdef SUPPORT_UTF
3955 if (common->utf)
3956 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3957 #endif
3958
3959 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3960 {
3961 #ifndef COMPILE_PCRE8
3962 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
3963 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3964 JUMPHERE(jump);
3965 #endif
3966 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3967 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3968 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3969 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3970 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3971 found = JUMP(SLJIT_NOT_ZERO);
3972 }
3973
3974 #ifdef SUPPORT_UTF
3975 if (common->utf)
3976 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3977 #endif
3978 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3979 #ifdef SUPPORT_UTF
3980 #if defined COMPILE_PCRE8
3981 if (common->utf)
3982 {
3983 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3984 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3985 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3986 }
3987 #elif defined COMPILE_PCRE16
3988 if (common->utf)
3989 {
3990 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3991 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3992 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3993 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3994 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3995 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3996 }
3997 #endif /* COMPILE_PCRE[8|16] */
3998 #endif /* SUPPORT_UTF */
3999 JUMPTO(SLJIT_JUMP, start);
4000 if (found != NULL)
4001 JUMPHERE(found);
4002 if (matches != NULL)
4003 set_jumps(matches, LABEL());
4004 JUMPHERE(quit);
4005
4006 if (firstline)
4007 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4008 }
4009
search_requested_char(compiler_common * common,pcre_uchar req_char,BOOL caseless,BOOL has_firstchar)4010 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4011 {
4012 DEFINE_COMPILER;
4013 struct sljit_label *loop;
4014 struct sljit_jump *toolong;
4015 struct sljit_jump *alreadyfound;
4016 struct sljit_jump *found;
4017 struct sljit_jump *foundoc = NULL;
4018 struct sljit_jump *notfound;
4019 pcre_uint32 oc, bit;
4020
4021 SLJIT_ASSERT(common->req_char_ptr != 0);
4022 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4023 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4024 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4025 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4026
4027 if (has_firstchar)
4028 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4029 else
4030 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4031
4032 loop = LABEL();
4033 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4034
4035 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4036 oc = req_char;
4037 if (caseless)
4038 {
4039 oc = TABLE_GET(req_char, common->fcc, req_char);
4040 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4041 if (req_char > 127 && common->utf)
4042 oc = UCD_OTHERCASE(req_char);
4043 #endif
4044 }
4045 if (req_char == oc)
4046 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4047 else
4048 {
4049 bit = req_char ^ oc;
4050 if (is_powerof2(bit))
4051 {
4052 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4053 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4054 }
4055 else
4056 {
4057 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4058 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4059 }
4060 }
4061 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4062 JUMPTO(SLJIT_JUMP, loop);
4063
4064 JUMPHERE(found);
4065 if (foundoc)
4066 JUMPHERE(foundoc);
4067 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4068 JUMPHERE(alreadyfound);
4069 JUMPHERE(toolong);
4070 return notfound;
4071 }
4072
do_revertframes(compiler_common * common)4073 static void do_revertframes(compiler_common *common)
4074 {
4075 DEFINE_COMPILER;
4076 struct sljit_jump *jump;
4077 struct sljit_label *mainloop;
4078
4079 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4080 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4081 GET_LOCAL_BASE(TMP3, 0, 0);
4082
4083 /* Drop frames until we reach STACK_TOP. */
4084 mainloop = LABEL();
4085 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4086 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4087 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4088
4089 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4090 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4091 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4092 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4093 JUMPTO(SLJIT_JUMP, mainloop);
4094
4095 JUMPHERE(jump);
4096 jump = JUMP(SLJIT_SIG_LESS);
4097 /* End of dropping frames. */
4098 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4099
4100 JUMPHERE(jump);
4101 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4102 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4103 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4104 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4105 JUMPTO(SLJIT_JUMP, mainloop);
4106 }
4107
check_wordboundary(compiler_common * common)4108 static void check_wordboundary(compiler_common *common)
4109 {
4110 DEFINE_COMPILER;
4111 struct sljit_jump *skipread;
4112 jump_list *skipread_list = NULL;
4113 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4114 struct sljit_jump *jump;
4115 #endif
4116
4117 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4118
4119 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4120 /* Get type of the previous char, and put it to LOCALS1. */
4121 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4122 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4123 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4124 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4125 skip_char_back(common);
4126 check_start_used_ptr(common);
4127 read_char(common);
4128
4129 /* Testing char type. */
4130 #ifdef SUPPORT_UCP
4131 if (common->use_ucp)
4132 {
4133 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4134 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4135 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4136 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4137 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4138 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4139 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4140 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4141 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4142 JUMPHERE(jump);
4143 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4144 }
4145 else
4146 #endif
4147 {
4148 #ifndef COMPILE_PCRE8
4149 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4150 #elif defined SUPPORT_UTF
4151 /* Here LOCALS1 has already been zeroed. */
4152 jump = NULL;
4153 if (common->utf)
4154 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4155 #endif /* COMPILE_PCRE8 */
4156 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4157 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4158 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4159 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4160 #ifndef COMPILE_PCRE8
4161 JUMPHERE(jump);
4162 #elif defined SUPPORT_UTF
4163 if (jump != NULL)
4164 JUMPHERE(jump);
4165 #endif /* COMPILE_PCRE8 */
4166 }
4167 JUMPHERE(skipread);
4168
4169 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4170 check_str_end(common, &skipread_list);
4171 peek_char(common, READ_CHAR_MAX);
4172
4173 /* Testing char type. This is a code duplication. */
4174 #ifdef SUPPORT_UCP
4175 if (common->use_ucp)
4176 {
4177 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4178 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4179 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4180 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4181 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4182 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4183 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4184 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4185 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4186 JUMPHERE(jump);
4187 }
4188 else
4189 #endif
4190 {
4191 #ifndef COMPILE_PCRE8
4192 /* TMP2 may be destroyed by peek_char. */
4193 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4194 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4195 #elif defined SUPPORT_UTF
4196 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4197 jump = NULL;
4198 if (common->utf)
4199 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4200 #endif
4201 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4202 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4203 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4204 #ifndef COMPILE_PCRE8
4205 JUMPHERE(jump);
4206 #elif defined SUPPORT_UTF
4207 if (jump != NULL)
4208 JUMPHERE(jump);
4209 #endif /* COMPILE_PCRE8 */
4210 }
4211 set_jumps(skipread_list, LABEL());
4212
4213 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4214 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4215 }
4216
check_class_ranges(compiler_common * common,const pcre_uint8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)4217 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4218 {
4219 DEFINE_COMPILER;
4220 int ranges[MAX_RANGE_SIZE];
4221 pcre_uint8 bit, cbit, all;
4222 int i, byte, length = 0;
4223
4224 bit = bits[0] & 0x1;
4225 /* All bits will be zero or one (since bit is zero or one). */
4226 all = -bit;
4227
4228 for (i = 0; i < 256; )
4229 {
4230 byte = i >> 3;
4231 if ((i & 0x7) == 0 && bits[byte] == all)
4232 i += 8;
4233 else
4234 {
4235 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4236 if (cbit != bit)
4237 {
4238 if (length >= MAX_RANGE_SIZE)
4239 return FALSE;
4240 ranges[length] = i;
4241 length++;
4242 bit = cbit;
4243 all = -cbit;
4244 }
4245 i++;
4246 }
4247 }
4248
4249 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4250 {
4251 if (length >= MAX_RANGE_SIZE)
4252 return FALSE;
4253 ranges[length] = 256;
4254 length++;
4255 }
4256
4257 if (length < 0 || length > 4)
4258 return FALSE;
4259
4260 bit = bits[0] & 0x1;
4261 if (invert) bit ^= 0x1;
4262
4263 /* No character is accepted. */
4264 if (length == 0 && bit == 0)
4265 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4266
4267 switch(length)
4268 {
4269 case 0:
4270 /* When bit != 0, all characters are accepted. */
4271 return TRUE;
4272
4273 case 1:
4274 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4275 return TRUE;
4276
4277 case 2:
4278 if (ranges[0] + 1 != ranges[1])
4279 {
4280 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4281 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4282 }
4283 else
4284 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4285 return TRUE;
4286
4287 case 3:
4288 if (bit != 0)
4289 {
4290 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4291 if (ranges[0] + 1 != ranges[1])
4292 {
4293 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4294 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4295 }
4296 else
4297 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4298 return TRUE;
4299 }
4300
4301 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4302 if (ranges[1] + 1 != ranges[2])
4303 {
4304 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4305 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4306 }
4307 else
4308 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4309 return TRUE;
4310
4311 case 4:
4312 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4313 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4314 && is_powerof2(ranges[2] - ranges[0]))
4315 {
4316 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4317 if (ranges[2] + 1 != ranges[3])
4318 {
4319 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4320 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4321 }
4322 else
4323 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4324 return TRUE;
4325 }
4326
4327 if (bit != 0)
4328 {
4329 i = 0;
4330 if (ranges[0] + 1 != ranges[1])
4331 {
4332 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4333 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4334 i = ranges[0];
4335 }
4336 else
4337 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4338
4339 if (ranges[2] + 1 != ranges[3])
4340 {
4341 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4342 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4343 }
4344 else
4345 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4346 return TRUE;
4347 }
4348
4349 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4350 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4351 if (ranges[1] + 1 != ranges[2])
4352 {
4353 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4354 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4355 }
4356 else
4357 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4358 return TRUE;
4359
4360 default:
4361 SLJIT_ASSERT_STOP();
4362 return FALSE;
4363 }
4364 }
4365
check_anynewline(compiler_common * common)4366 static void check_anynewline(compiler_common *common)
4367 {
4368 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4369 DEFINE_COMPILER;
4370
4371 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4372
4373 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4374 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4375 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4376 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4377 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4378 #ifdef COMPILE_PCRE8
4379 if (common->utf)
4380 {
4381 #endif
4382 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4383 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4384 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4385 #ifdef COMPILE_PCRE8
4386 }
4387 #endif
4388 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4389 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4390 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4391 }
4392
check_hspace(compiler_common * common)4393 static void check_hspace(compiler_common *common)
4394 {
4395 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4396 DEFINE_COMPILER;
4397
4398 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4399
4400 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4401 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4402 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4403 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4404 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4405 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4406 #ifdef COMPILE_PCRE8
4407 if (common->utf)
4408 {
4409 #endif
4410 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4411 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4412 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4413 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4414 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4415 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4416 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4417 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4418 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4419 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4420 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4421 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4422 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4423 #ifdef COMPILE_PCRE8
4424 }
4425 #endif
4426 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4427 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4428
4429 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4430 }
4431
check_vspace(compiler_common * common)4432 static void check_vspace(compiler_common *common)
4433 {
4434 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4435 DEFINE_COMPILER;
4436
4437 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4438
4439 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4440 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4441 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4442 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4443 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4444 #ifdef COMPILE_PCRE8
4445 if (common->utf)
4446 {
4447 #endif
4448 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4449 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4450 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4451 #ifdef COMPILE_PCRE8
4452 }
4453 #endif
4454 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4455 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4456
4457 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4458 }
4459
4460 #define CHAR1 STR_END
4461 #define CHAR2 STACK_TOP
4462
do_casefulcmp(compiler_common * common)4463 static void do_casefulcmp(compiler_common *common)
4464 {
4465 DEFINE_COMPILER;
4466 struct sljit_jump *jump;
4467 struct sljit_label *label;
4468
4469 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4470 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4471 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4472 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
4473 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4474 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4475
4476 label = LABEL();
4477 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4478 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4479 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4480 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4481 JUMPTO(SLJIT_NOT_ZERO, label);
4482
4483 JUMPHERE(jump);
4484 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4485 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4486 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4487 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4488 }
4489
4490 #define LCC_TABLE STACK_LIMIT
4491
do_caselesscmp(compiler_common * common)4492 static void do_caselesscmp(compiler_common *common)
4493 {
4494 DEFINE_COMPILER;
4495 struct sljit_jump *jump;
4496 struct sljit_label *label;
4497
4498 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4499 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4500
4501 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4502 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
4503 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
4504 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4505 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4506 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4507
4508 label = LABEL();
4509 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4510 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4511 #ifndef COMPILE_PCRE8
4512 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4513 #endif
4514 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4515 #ifndef COMPILE_PCRE8
4516 JUMPHERE(jump);
4517 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4518 #endif
4519 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4520 #ifndef COMPILE_PCRE8
4521 JUMPHERE(jump);
4522 #endif
4523 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4524 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4525 JUMPTO(SLJIT_NOT_ZERO, label);
4526
4527 JUMPHERE(jump);
4528 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4529 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4530 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4531 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4532 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4533 }
4534
4535 #undef LCC_TABLE
4536 #undef CHAR1
4537 #undef CHAR2
4538
4539 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4540
do_utf_caselesscmp(pcre_uchar * src1,jit_arguments * args,pcre_uchar * end1)4541 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4542 {
4543 /* This function would be ineffective to do in JIT level. */
4544 pcre_uint32 c1, c2;
4545 const pcre_uchar *src2 = args->uchar_ptr;
4546 const pcre_uchar *end2 = args->end;
4547 const ucd_record *ur;
4548 const pcre_uint32 *pp;
4549
4550 while (src1 < end1)
4551 {
4552 if (src2 >= end2)
4553 return (pcre_uchar*)1;
4554 GETCHARINC(c1, src1);
4555 GETCHARINC(c2, src2);
4556 ur = GET_UCD(c2);
4557 if (c1 != c2 && c1 != c2 + ur->other_case)
4558 {
4559 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4560 for (;;)
4561 {
4562 if (c1 < *pp) return NULL;
4563 if (c1 == *pp++) break;
4564 }
4565 }
4566 }
4567 return src2;
4568 }
4569
4570 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4571
byte_sequence_compare(compiler_common * common,BOOL caseless,pcre_uchar * cc,compare_context * context,jump_list ** backtracks)4572 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4573 compare_context *context, jump_list **backtracks)
4574 {
4575 DEFINE_COMPILER;
4576 unsigned int othercasebit = 0;
4577 pcre_uchar *othercasechar = NULL;
4578 #ifdef SUPPORT_UTF
4579 int utflength;
4580 #endif
4581
4582 if (caseless && char_has_othercase(common, cc))
4583 {
4584 othercasebit = char_get_othercase_bit(common, cc);
4585 SLJIT_ASSERT(othercasebit);
4586 /* Extracting bit difference info. */
4587 #if defined COMPILE_PCRE8
4588 othercasechar = cc + (othercasebit >> 8);
4589 othercasebit &= 0xff;
4590 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4591 /* Note that this code only handles characters in the BMP. If there
4592 ever are characters outside the BMP whose othercase differs in only one
4593 bit from itself (there currently are none), this code will need to be
4594 revised for COMPILE_PCRE32. */
4595 othercasechar = cc + (othercasebit >> 9);
4596 if ((othercasebit & 0x100) != 0)
4597 othercasebit = (othercasebit & 0xff) << 8;
4598 else
4599 othercasebit &= 0xff;
4600 #endif /* COMPILE_PCRE[8|16|32] */
4601 }
4602
4603 if (context->sourcereg == -1)
4604 {
4605 #if defined COMPILE_PCRE8
4606 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4607 if (context->length >= 4)
4608 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4609 else if (context->length >= 2)
4610 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4611 else
4612 #endif
4613 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4614 #elif defined COMPILE_PCRE16
4615 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4616 if (context->length >= 4)
4617 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4618 else
4619 #endif
4620 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4621 #elif defined COMPILE_PCRE32
4622 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4623 #endif /* COMPILE_PCRE[8|16|32] */
4624 context->sourcereg = TMP2;
4625 }
4626
4627 #ifdef SUPPORT_UTF
4628 utflength = 1;
4629 if (common->utf && HAS_EXTRALEN(*cc))
4630 utflength += GET_EXTRALEN(*cc);
4631
4632 do
4633 {
4634 #endif
4635
4636 context->length -= IN_UCHARS(1);
4637 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4638
4639 /* Unaligned read is supported. */
4640 if (othercasebit != 0 && othercasechar == cc)
4641 {
4642 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4643 context->oc.asuchars[context->ucharptr] = othercasebit;
4644 }
4645 else
4646 {
4647 context->c.asuchars[context->ucharptr] = *cc;
4648 context->oc.asuchars[context->ucharptr] = 0;
4649 }
4650 context->ucharptr++;
4651
4652 #if defined COMPILE_PCRE8
4653 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4654 #else
4655 if (context->ucharptr >= 2 || context->length == 0)
4656 #endif
4657 {
4658 if (context->length >= 4)
4659 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4660 else if (context->length >= 2)
4661 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4662 #if defined COMPILE_PCRE8
4663 else if (context->length >= 1)
4664 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4665 #endif /* COMPILE_PCRE8 */
4666 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4667
4668 switch(context->ucharptr)
4669 {
4670 case 4 / sizeof(pcre_uchar):
4671 if (context->oc.asint != 0)
4672 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4673 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4674 break;
4675
4676 case 2 / sizeof(pcre_uchar):
4677 if (context->oc.asushort != 0)
4678 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4679 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4680 break;
4681
4682 #ifdef COMPILE_PCRE8
4683 case 1:
4684 if (context->oc.asbyte != 0)
4685 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4686 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4687 break;
4688 #endif
4689
4690 default:
4691 SLJIT_ASSERT_STOP();
4692 break;
4693 }
4694 context->ucharptr = 0;
4695 }
4696
4697 #else
4698
4699 /* Unaligned read is unsupported or in 32 bit mode. */
4700 if (context->length >= 1)
4701 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4702
4703 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4704
4705 if (othercasebit != 0 && othercasechar == cc)
4706 {
4707 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4708 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4709 }
4710 else
4711 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4712
4713 #endif
4714
4715 cc++;
4716 #ifdef SUPPORT_UTF
4717 utflength--;
4718 }
4719 while (utflength > 0);
4720 #endif
4721
4722 return cc;
4723 }
4724
4725 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4726
4727 #define SET_TYPE_OFFSET(value) \
4728 if ((value) != typeoffset) \
4729 { \
4730 if ((value) < typeoffset) \
4731 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4732 else \
4733 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4734 } \
4735 typeoffset = (value);
4736
4737 #define SET_CHAR_OFFSET(value) \
4738 if ((value) != charoffset) \
4739 { \
4740 if ((value) < charoffset) \
4741 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4742 else \
4743 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4744 } \
4745 charoffset = (value);
4746
compile_xclass_matchingpath(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks)4747 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4748 {
4749 DEFINE_COMPILER;
4750 jump_list *found = NULL;
4751 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4752 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4753 struct sljit_jump *jump = NULL;
4754 pcre_uchar *ccbegin;
4755 int compares, invertcmp, numberofcmps;
4756 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4757 BOOL utf = common->utf;
4758 #endif
4759
4760 #ifdef SUPPORT_UCP
4761 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4762 BOOL charsaved = FALSE;
4763 int typereg = TMP1, scriptreg = TMP1;
4764 const pcre_uint32 *other_cases;
4765 sljit_uw typeoffset;
4766 #endif
4767
4768 /* Scanning the necessary info. */
4769 cc++;
4770 ccbegin = cc;
4771 compares = 0;
4772 if (cc[-1] & XCL_MAP)
4773 {
4774 min = 0;
4775 cc += 32 / sizeof(pcre_uchar);
4776 }
4777
4778 while (*cc != XCL_END)
4779 {
4780 compares++;
4781 if (*cc == XCL_SINGLE)
4782 {
4783 cc ++;
4784 GETCHARINCTEST(c, cc);
4785 if (c > max) max = c;
4786 if (c < min) min = c;
4787 #ifdef SUPPORT_UCP
4788 needschar = TRUE;
4789 #endif
4790 }
4791 else if (*cc == XCL_RANGE)
4792 {
4793 cc ++;
4794 GETCHARINCTEST(c, cc);
4795 if (c < min) min = c;
4796 GETCHARINCTEST(c, cc);
4797 if (c > max) max = c;
4798 #ifdef SUPPORT_UCP
4799 needschar = TRUE;
4800 #endif
4801 }
4802 #ifdef SUPPORT_UCP
4803 else
4804 {
4805 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4806 cc++;
4807 if (*cc == PT_CLIST)
4808 {
4809 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4810 while (*other_cases != NOTACHAR)
4811 {
4812 if (*other_cases > max) max = *other_cases;
4813 if (*other_cases < min) min = *other_cases;
4814 other_cases++;
4815 }
4816 }
4817 else
4818 {
4819 max = READ_CHAR_MAX;
4820 min = 0;
4821 }
4822
4823 switch(*cc)
4824 {
4825 case PT_ANY:
4826 break;
4827
4828 case PT_LAMP:
4829 case PT_GC:
4830 case PT_PC:
4831 case PT_ALNUM:
4832 needstype = TRUE;
4833 break;
4834
4835 case PT_SC:
4836 needsscript = TRUE;
4837 break;
4838
4839 case PT_SPACE:
4840 case PT_PXSPACE:
4841 case PT_WORD:
4842 case PT_PXGRAPH:
4843 case PT_PXPRINT:
4844 case PT_PXPUNCT:
4845 needstype = TRUE;
4846 needschar = TRUE;
4847 break;
4848
4849 case PT_CLIST:
4850 case PT_UCNC:
4851 needschar = TRUE;
4852 break;
4853
4854 default:
4855 SLJIT_ASSERT_STOP();
4856 break;
4857 }
4858 cc += 2;
4859 }
4860 #endif
4861 }
4862
4863 /* We are not necessary in utf mode even in 8 bit mode. */
4864 cc = ccbegin;
4865 detect_partial_match(common, backtracks);
4866 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4867
4868 if ((cc[-1] & XCL_HASPROP) == 0)
4869 {
4870 if ((cc[-1] & XCL_MAP) != 0)
4871 {
4872 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4873 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4874 {
4875 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4876 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4877 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4878 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4879 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4880 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
4881 }
4882
4883 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4884 JUMPHERE(jump);
4885
4886 cc += 32 / sizeof(pcre_uchar);
4887 }
4888 else
4889 {
4890 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4891 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4892 }
4893 }
4894 else if ((cc[-1] & XCL_MAP) != 0)
4895 {
4896 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4897 #ifdef SUPPORT_UCP
4898 charsaved = TRUE;
4899 #endif
4900 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4901 {
4902 #ifdef COMPILE_PCRE8
4903 SLJIT_ASSERT(common->utf);
4904 #endif
4905 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4906
4907 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4908 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4909 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4910 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4911 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4912 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
4913
4914 JUMPHERE(jump);
4915 }
4916
4917 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4918 cc += 32 / sizeof(pcre_uchar);
4919 }
4920
4921 #ifdef SUPPORT_UCP
4922 /* Simple register allocation. TMP1 is preferred if possible. */
4923 if (needstype || needsscript)
4924 {
4925 if (needschar && !charsaved)
4926 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4927 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4928 if (needschar)
4929 {
4930 if (needstype)
4931 {
4932 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4933 typereg = RETURN_ADDR;
4934 }
4935
4936 if (needsscript)
4937 scriptreg = TMP3;
4938 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4939 }
4940 else if (needstype && needsscript)
4941 scriptreg = TMP3;
4942 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4943
4944 if (needsscript)
4945 {
4946 if (scriptreg == TMP1)
4947 {
4948 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4949 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4950 }
4951 else
4952 {
4953 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4954 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4955 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4956 }
4957 }
4958 }
4959 #endif
4960
4961 /* Generating code. */
4962 charoffset = 0;
4963 numberofcmps = 0;
4964 #ifdef SUPPORT_UCP
4965 typeoffset = 0;
4966 #endif
4967
4968 while (*cc != XCL_END)
4969 {
4970 compares--;
4971 invertcmp = (compares == 0 && list != backtracks);
4972 jump = NULL;
4973
4974 if (*cc == XCL_SINGLE)
4975 {
4976 cc ++;
4977 GETCHARINCTEST(c, cc);
4978
4979 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4980 {
4981 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4982 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
4983 numberofcmps++;
4984 }
4985 else if (numberofcmps > 0)
4986 {
4987 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4988 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4989 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
4990 numberofcmps = 0;
4991 }
4992 else
4993 {
4994 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4995 numberofcmps = 0;
4996 }
4997 }
4998 else if (*cc == XCL_RANGE)
4999 {
5000 cc ++;
5001 GETCHARINCTEST(c, cc);
5002 SET_CHAR_OFFSET(c);
5003 GETCHARINCTEST(c, cc);
5004
5005 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5006 {
5007 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5008 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5009 numberofcmps++;
5010 }
5011 else if (numberofcmps > 0)
5012 {
5013 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5014 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5015 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5016 numberofcmps = 0;
5017 }
5018 else
5019 {
5020 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5021 numberofcmps = 0;
5022 }
5023 }
5024 #ifdef SUPPORT_UCP
5025 else
5026 {
5027 if (*cc == XCL_NOTPROP)
5028 invertcmp ^= 0x1;
5029 cc++;
5030 switch(*cc)
5031 {
5032 case PT_ANY:
5033 if (list != backtracks)
5034 {
5035 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
5036 continue;
5037 }
5038 else if (cc[-1] == XCL_NOTPROP)
5039 continue;
5040 jump = JUMP(SLJIT_JUMP);
5041 break;
5042
5043 case PT_LAMP:
5044 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5045 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5046 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5047 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5048 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5049 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5050 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5051 break;
5052
5053 case PT_GC:
5054 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5055 SET_TYPE_OFFSET(c);
5056 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5057 break;
5058
5059 case PT_PC:
5060 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5061 break;
5062
5063 case PT_SC:
5064 jump = CMP(SLJIT_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
5065 break;
5066
5067 case PT_SPACE:
5068 case PT_PXSPACE:
5069 SET_CHAR_OFFSET(9);
5070 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5071 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5072
5073 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5074 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5075
5076 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5077 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5078
5079 SET_TYPE_OFFSET(ucp_Zl);
5080 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5081 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5082 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5083 break;
5084
5085 case PT_WORD:
5086 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5087 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5088 /* Fall through. */
5089
5090 case PT_ALNUM:
5091 SET_TYPE_OFFSET(ucp_Ll);
5092 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5093 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5094 SET_TYPE_OFFSET(ucp_Nd);
5095 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5096 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5097 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5098 break;
5099
5100 case PT_CLIST:
5101 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5102
5103 /* At least three characters are required.
5104 Otherwise this case would be handled by the normal code path. */
5105 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5106 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5107
5108 /* Optimizing character pairs, if their difference is power of 2. */
5109 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5110 {
5111 if (charoffset == 0)
5112 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5113 else
5114 {
5115 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5116 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5117 }
5118 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5119 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5120 other_cases += 2;
5121 }
5122 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5123 {
5124 if (charoffset == 0)
5125 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5126 else
5127 {
5128 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5129 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5130 }
5131 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5132 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5133
5134 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5135 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5136
5137 other_cases += 3;
5138 }
5139 else
5140 {
5141 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5142 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5143 }
5144
5145 while (*other_cases != NOTACHAR)
5146 {
5147 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5148 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5149 }
5150 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5151 break;
5152
5153 case PT_UCNC:
5154 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5155 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5156 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5157 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5158 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5159 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5160
5161 SET_CHAR_OFFSET(0xa0);
5162 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5163 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5164 SET_CHAR_OFFSET(0);
5165 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5166 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5167 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5168 break;
5169
5170 case PT_PXGRAPH:
5171 /* C and Z groups are the farthest two groups. */
5172 SET_TYPE_OFFSET(ucp_Ll);
5173 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5174 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5175
5176 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5177
5178 /* In case of ucp_Cf, we overwrite the result. */
5179 SET_CHAR_OFFSET(0x2066);
5180 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5181 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5182
5183 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5184 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5185
5186 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5187 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5188
5189 JUMPHERE(jump);
5190 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5191 break;
5192
5193 case PT_PXPRINT:
5194 /* C and Z groups are the farthest two groups. */
5195 SET_TYPE_OFFSET(ucp_Ll);
5196 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5197 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5198
5199 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5200 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5201
5202 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5203
5204 /* In case of ucp_Cf, we overwrite the result. */
5205 SET_CHAR_OFFSET(0x2066);
5206 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5207 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5208
5209 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5210 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5211
5212 JUMPHERE(jump);
5213 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5214 break;
5215
5216 case PT_PXPUNCT:
5217 SET_TYPE_OFFSET(ucp_Sc);
5218 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5219 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5220
5221 SET_CHAR_OFFSET(0);
5222 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
5223 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5224
5225 SET_TYPE_OFFSET(ucp_Pc);
5226 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5227 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5228 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5229 break;
5230 }
5231 cc += 2;
5232 }
5233 #endif
5234
5235 if (jump != NULL)
5236 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5237 }
5238
5239 if (found != NULL)
5240 set_jumps(found, LABEL());
5241 }
5242
5243 #undef SET_TYPE_OFFSET
5244 #undef SET_CHAR_OFFSET
5245
5246 #endif
5247
compile_char1_matchingpath(compiler_common * common,pcre_uchar type,pcre_uchar * cc,jump_list ** backtracks)5248 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5249 {
5250 DEFINE_COMPILER;
5251 int length;
5252 unsigned int c, oc, bit;
5253 compare_context context;
5254 struct sljit_jump *jump[4];
5255 jump_list *end_list;
5256 #ifdef SUPPORT_UTF
5257 struct sljit_label *label;
5258 #ifdef SUPPORT_UCP
5259 pcre_uchar propdata[5];
5260 #endif
5261 #endif /* SUPPORT_UTF */
5262
5263 switch(type)
5264 {
5265 case OP_SOD:
5266 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5267 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5268 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5269 return cc;
5270
5271 case OP_SOM:
5272 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5273 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5274 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5275 return cc;
5276
5277 case OP_NOT_WORD_BOUNDARY:
5278 case OP_WORD_BOUNDARY:
5279 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5280 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5281 return cc;
5282
5283 case OP_NOT_DIGIT:
5284 case OP_DIGIT:
5285 /* Digits are usually 0-9, so it is worth to optimize them. */
5286 detect_partial_match(common, backtracks);
5287 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5288 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5289 read_char7_type(common, type == OP_NOT_DIGIT);
5290 else
5291 #endif
5292 read_char8_type(common, type == OP_NOT_DIGIT);
5293 /* Flip the starting bit in the negative case. */
5294 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5295 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5296 return cc;
5297
5298 case OP_NOT_WHITESPACE:
5299 case OP_WHITESPACE:
5300 detect_partial_match(common, backtracks);
5301 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5302 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5303 read_char7_type(common, type == OP_NOT_WHITESPACE);
5304 else
5305 #endif
5306 read_char8_type(common, type == OP_NOT_WHITESPACE);
5307 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5308 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5309 return cc;
5310
5311 case OP_NOT_WORDCHAR:
5312 case OP_WORDCHAR:
5313 detect_partial_match(common, backtracks);
5314 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5315 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5316 read_char7_type(common, type == OP_NOT_WORDCHAR);
5317 else
5318 #endif
5319 read_char8_type(common, type == OP_NOT_WORDCHAR);
5320 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5321 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5322 return cc;
5323
5324 case OP_ANY:
5325 detect_partial_match(common, backtracks);
5326 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5327 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5328 {
5329 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5330 end_list = NULL;
5331 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5332 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5333 else
5334 check_str_end(common, &end_list);
5335
5336 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5337 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5338 set_jumps(end_list, LABEL());
5339 JUMPHERE(jump[0]);
5340 }
5341 else
5342 check_newlinechar(common, common->nltype, backtracks, TRUE);
5343 return cc;
5344
5345 case OP_ALLANY:
5346 detect_partial_match(common, backtracks);
5347 #ifdef SUPPORT_UTF
5348 if (common->utf)
5349 {
5350 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5351 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5352 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5353 #if defined COMPILE_PCRE8
5354 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5355 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5356 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5357 #elif defined COMPILE_PCRE16
5358 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5359 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5360 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5361 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5362 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5363 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5364 #endif
5365 JUMPHERE(jump[0]);
5366 #endif /* COMPILE_PCRE[8|16] */
5367 return cc;
5368 }
5369 #endif
5370 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5371 return cc;
5372
5373 case OP_ANYBYTE:
5374 detect_partial_match(common, backtracks);
5375 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5376 return cc;
5377
5378 #ifdef SUPPORT_UTF
5379 #ifdef SUPPORT_UCP
5380 case OP_NOTPROP:
5381 case OP_PROP:
5382 propdata[0] = XCL_HASPROP;
5383 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5384 propdata[2] = cc[0];
5385 propdata[3] = cc[1];
5386 propdata[4] = XCL_END;
5387 compile_xclass_matchingpath(common, propdata, backtracks);
5388 return cc + 2;
5389 #endif
5390 #endif
5391
5392 case OP_ANYNL:
5393 detect_partial_match(common, backtracks);
5394 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5395 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5396 /* We don't need to handle soft partial matching case. */
5397 end_list = NULL;
5398 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5399 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5400 else
5401 check_str_end(common, &end_list);
5402 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5403 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5404 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5405 jump[2] = JUMP(SLJIT_JUMP);
5406 JUMPHERE(jump[0]);
5407 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5408 set_jumps(end_list, LABEL());
5409 JUMPHERE(jump[1]);
5410 JUMPHERE(jump[2]);
5411 return cc;
5412
5413 case OP_NOT_HSPACE:
5414 case OP_HSPACE:
5415 detect_partial_match(common, backtracks);
5416 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5417 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5418 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5419 return cc;
5420
5421 case OP_NOT_VSPACE:
5422 case OP_VSPACE:
5423 detect_partial_match(common, backtracks);
5424 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5425 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5426 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5427 return cc;
5428
5429 #ifdef SUPPORT_UCP
5430 case OP_EXTUNI:
5431 detect_partial_match(common, backtracks);
5432 read_char(common);
5433 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5434 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5435 /* Optimize register allocation: use a real register. */
5436 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5437 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5438
5439 label = LABEL();
5440 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5441 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5442 read_char(common);
5443 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5444 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5445 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5446
5447 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5448 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5449 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5450 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5451 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5452 JUMPTO(SLJIT_NOT_ZERO, label);
5453
5454 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5455 JUMPHERE(jump[0]);
5456 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5457
5458 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5459 {
5460 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5461 /* Since we successfully read a char above, partial matching must occure. */
5462 check_partial(common, TRUE);
5463 JUMPHERE(jump[0]);
5464 }
5465 return cc;
5466 #endif
5467
5468 case OP_EODN:
5469 /* Requires rather complex checks. */
5470 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5471 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5472 {
5473 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5474 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5475 if (common->mode == JIT_COMPILE)
5476 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5477 else
5478 {
5479 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
5480 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5481 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
5482 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5483 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5484 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
5485 check_partial(common, TRUE);
5486 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5487 JUMPHERE(jump[1]);
5488 }
5489 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5490 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5491 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5492 }
5493 else if (common->nltype == NLTYPE_FIXED)
5494 {
5495 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5496 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5497 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5498 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5499 }
5500 else
5501 {
5502 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5503 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5504 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5505 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5506 jump[2] = JUMP(SLJIT_GREATER);
5507 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
5508 /* Equal. */
5509 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5510 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5511 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5512
5513 JUMPHERE(jump[1]);
5514 if (common->nltype == NLTYPE_ANYCRLF)
5515 {
5516 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5517 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
5518 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5519 }
5520 else
5521 {
5522 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
5523 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5524 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5525 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5526 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5527 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5528 }
5529 JUMPHERE(jump[2]);
5530 JUMPHERE(jump[3]);
5531 }
5532 JUMPHERE(jump[0]);
5533 check_partial(common, FALSE);
5534 return cc;
5535
5536 case OP_EOD:
5537 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5538 check_partial(common, FALSE);
5539 return cc;
5540
5541 case OP_CIRC:
5542 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5543 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5544 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
5545 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5546 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5547 return cc;
5548
5549 case OP_CIRCM:
5550 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5551 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5552 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
5553 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5554 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5555 jump[0] = JUMP(SLJIT_JUMP);
5556 JUMPHERE(jump[1]);
5557
5558 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5559 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5560 {
5561 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5562 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
5563 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5564 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5565 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5566 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5567 }
5568 else
5569 {
5570 skip_char_back(common);
5571 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5572 check_newlinechar(common, common->nltype, backtracks, FALSE);
5573 }
5574 JUMPHERE(jump[0]);
5575 return cc;
5576
5577 case OP_DOLL:
5578 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5579 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5580 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5581
5582 if (!common->endonly)
5583 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5584 else
5585 {
5586 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5587 check_partial(common, FALSE);
5588 }
5589 return cc;
5590
5591 case OP_DOLLM:
5592 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5593 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5594 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5595 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5596 check_partial(common, FALSE);
5597 jump[0] = JUMP(SLJIT_JUMP);
5598 JUMPHERE(jump[1]);
5599
5600 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5601 {
5602 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5603 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5604 if (common->mode == JIT_COMPILE)
5605 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
5606 else
5607 {
5608 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5609 /* STR_PTR = STR_END - IN_UCHARS(1) */
5610 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5611 check_partial(common, TRUE);
5612 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5613 JUMPHERE(jump[1]);
5614 }
5615
5616 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5617 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5618 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5619 }
5620 else
5621 {
5622 peek_char(common, common->nlmax);
5623 check_newlinechar(common, common->nltype, backtracks, FALSE);
5624 }
5625 JUMPHERE(jump[0]);
5626 return cc;
5627
5628 case OP_CHAR:
5629 case OP_CHARI:
5630 length = 1;
5631 #ifdef SUPPORT_UTF
5632 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5633 #endif
5634 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5635 {
5636 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5637 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5638
5639 context.length = IN_UCHARS(length);
5640 context.sourcereg = -1;
5641 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5642 context.ucharptr = 0;
5643 #endif
5644 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5645 }
5646
5647 detect_partial_match(common, backtracks);
5648 #ifdef SUPPORT_UTF
5649 if (common->utf)
5650 {
5651 GETCHAR(c, cc);
5652 }
5653 else
5654 #endif
5655 c = *cc;
5656
5657 if (type == OP_CHAR || !char_has_othercase(common, cc))
5658 {
5659 read_char_range(common, c, c, FALSE);
5660 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5661 return cc + length;
5662 }
5663 oc = char_othercase(common, c);
5664 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5665 bit = c ^ oc;
5666 if (is_powerof2(bit))
5667 {
5668 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5669 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5670 return cc + length;
5671 }
5672 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
5673 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5674 JUMPHERE(jump[0]);
5675 return cc + length;
5676
5677 case OP_NOT:
5678 case OP_NOTI:
5679 detect_partial_match(common, backtracks);
5680 length = 1;
5681 #ifdef SUPPORT_UTF
5682 if (common->utf)
5683 {
5684 #ifdef COMPILE_PCRE8
5685 c = *cc;
5686 if (c < 128)
5687 {
5688 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5689 if (type == OP_NOT || !char_has_othercase(common, cc))
5690 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5691 else
5692 {
5693 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5694 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5695 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5696 }
5697 /* Skip the variable-length character. */
5698 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5699 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5700 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5701 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5702 JUMPHERE(jump[0]);
5703 return cc + 1;
5704 }
5705 else
5706 #endif /* COMPILE_PCRE8 */
5707 {
5708 GETCHARLEN(c, cc, length);
5709 }
5710 }
5711 else
5712 #endif /* SUPPORT_UTF */
5713 c = *cc;
5714
5715 if (type == OP_NOT || !char_has_othercase(common, cc))
5716 {
5717 read_char_range(common, c, c, TRUE);
5718 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5719 }
5720 else
5721 {
5722 oc = char_othercase(common, c);
5723 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5724 bit = c ^ oc;
5725 if (is_powerof2(bit))
5726 {
5727 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5728 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5729 }
5730 else
5731 {
5732 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5733 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5734 }
5735 }
5736 return cc + length;
5737
5738 case OP_CLASS:
5739 case OP_NCLASS:
5740 detect_partial_match(common, backtracks);
5741
5742 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5743 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5744 read_char_range(common, 0, bit, type == OP_NCLASS);
5745 #else
5746 read_char_range(common, 0, 255, type == OP_NCLASS);
5747 #endif
5748
5749 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5750 return cc + 32 / sizeof(pcre_uchar);
5751
5752 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5753 jump[0] = NULL;
5754 if (common->utf)
5755 {
5756 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
5757 if (type == OP_CLASS)
5758 {
5759 add_jump(compiler, backtracks, jump[0]);
5760 jump[0] = NULL;
5761 }
5762 }
5763 #elif !defined COMPILE_PCRE8
5764 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5765 if (type == OP_CLASS)
5766 {
5767 add_jump(compiler, backtracks, jump[0]);
5768 jump[0] = NULL;
5769 }
5770 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5771
5772 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5773 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5774 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5775 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5776 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5777 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5778
5779 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5780 if (jump[0] != NULL)
5781 JUMPHERE(jump[0]);
5782 #endif
5783
5784 return cc + 32 / sizeof(pcre_uchar);
5785
5786 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5787 case OP_XCLASS:
5788 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5789 return cc + GET(cc, 0) - 1;
5790 #endif
5791
5792 case OP_REVERSE:
5793 length = GET(cc, 0);
5794 if (length == 0)
5795 return cc + LINK_SIZE;
5796 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5797 #ifdef SUPPORT_UTF
5798 if (common->utf)
5799 {
5800 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5801 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5802 label = LABEL();
5803 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5804 skip_char_back(common);
5805 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5806 JUMPTO(SLJIT_NOT_ZERO, label);
5807 }
5808 else
5809 #endif
5810 {
5811 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5812 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5813 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
5814 }
5815 check_start_used_ptr(common);
5816 return cc + LINK_SIZE;
5817 }
5818 SLJIT_ASSERT_STOP();
5819 return cc;
5820 }
5821
compile_charn_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,jump_list ** backtracks)5822 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5823 {
5824 /* This function consumes at least one input character. */
5825 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5826 DEFINE_COMPILER;
5827 pcre_uchar *ccbegin = cc;
5828 compare_context context;
5829 int size;
5830
5831 context.length = 0;
5832 do
5833 {
5834 if (cc >= ccend)
5835 break;
5836
5837 if (*cc == OP_CHAR)
5838 {
5839 size = 1;
5840 #ifdef SUPPORT_UTF
5841 if (common->utf && HAS_EXTRALEN(cc[1]))
5842 size += GET_EXTRALEN(cc[1]);
5843 #endif
5844 }
5845 else if (*cc == OP_CHARI)
5846 {
5847 size = 1;
5848 #ifdef SUPPORT_UTF
5849 if (common->utf)
5850 {
5851 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5852 size = 0;
5853 else if (HAS_EXTRALEN(cc[1]))
5854 size += GET_EXTRALEN(cc[1]);
5855 }
5856 else
5857 #endif
5858 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5859 size = 0;
5860 }
5861 else
5862 size = 0;
5863
5864 cc += 1 + size;
5865 context.length += IN_UCHARS(size);
5866 }
5867 while (size > 0 && context.length <= 128);
5868
5869 cc = ccbegin;
5870 if (context.length > 0)
5871 {
5872 /* We have a fixed-length byte sequence. */
5873 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5874 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5875
5876 context.sourcereg = -1;
5877 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5878 context.ucharptr = 0;
5879 #endif
5880 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5881 return cc;
5882 }
5883
5884 /* A non-fixed length character will be checked if length == 0. */
5885 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5886 }
5887
5888 /* Forward definitions. */
5889 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5890 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5891
5892 #define PUSH_BACKTRACK(size, ccstart, error) \
5893 do \
5894 { \
5895 backtrack = sljit_alloc_memory(compiler, (size)); \
5896 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5897 return error; \
5898 memset(backtrack, 0, size); \
5899 backtrack->prev = parent->top; \
5900 backtrack->cc = (ccstart); \
5901 parent->top = backtrack; \
5902 } \
5903 while (0)
5904
5905 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5906 do \
5907 { \
5908 backtrack = sljit_alloc_memory(compiler, (size)); \
5909 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5910 return; \
5911 memset(backtrack, 0, size); \
5912 backtrack->prev = parent->top; \
5913 backtrack->cc = (ccstart); \
5914 parent->top = backtrack; \
5915 } \
5916 while (0)
5917
5918 #define BACKTRACK_AS(type) ((type *)backtrack)
5919
compile_dnref_search(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks)5920 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5921 {
5922 /* The OVECTOR offset goes to TMP2. */
5923 DEFINE_COMPILER;
5924 int count = GET2(cc, 1 + IMM2_SIZE);
5925 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5926 unsigned int offset;
5927 jump_list *found = NULL;
5928
5929 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5930
5931 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
5932
5933 count--;
5934 while (count-- > 0)
5935 {
5936 offset = GET2(slot, 0) << 1;
5937 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5938 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5939 slot += common->name_entry_size;
5940 }
5941
5942 offset = GET2(slot, 0) << 1;
5943 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5944 if (backtracks != NULL && !common->jscript_compat)
5945 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5946
5947 set_jumps(found, LABEL());
5948 }
5949
compile_ref_matchingpath(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)5950 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5951 {
5952 DEFINE_COMPILER;
5953 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5954 int offset = 0;
5955 struct sljit_jump *jump = NULL;
5956 struct sljit_jump *partial;
5957 struct sljit_jump *nopartial;
5958
5959 if (ref)
5960 {
5961 offset = GET2(cc, 1) << 1;
5962 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
5963 /* OVECTOR(1) contains the "string begin - 1" constant. */
5964 if (withchecks && !common->jscript_compat)
5965 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
5966 }
5967 else
5968 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5969
5970 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5971 if (common->utf && *cc == OP_REFI)
5972 {
5973 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
5974 if (ref)
5975 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
5976 else
5977 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5978
5979 if (withchecks)
5980 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
5981
5982 /* Needed to save important temporary registers. */
5983 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5984 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
5985 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5986 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5987 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5988 if (common->mode == JIT_COMPILE)
5989 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5990 else
5991 {
5992 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5993 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5994 check_partial(common, FALSE);
5995 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5996 JUMPHERE(nopartial);
5997 }
5998 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5999 }
6000 else
6001 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6002 {
6003 if (ref)
6004 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6005 else
6006 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6007
6008 if (withchecks)
6009 jump = JUMP(SLJIT_ZERO);
6010
6011 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6012 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6013 if (common->mode == JIT_COMPILE)
6014 add_jump(compiler, backtracks, partial);
6015
6016 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6017 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6018
6019 if (common->mode != JIT_COMPILE)
6020 {
6021 nopartial = JUMP(SLJIT_JUMP);
6022 JUMPHERE(partial);
6023 /* TMP2 -= STR_END - STR_PTR */
6024 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6025 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6026 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6027 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6028 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6029 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6030 JUMPHERE(partial);
6031 check_partial(common, FALSE);
6032 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6033 JUMPHERE(nopartial);
6034 }
6035 }
6036
6037 if (jump != NULL)
6038 {
6039 if (emptyfail)
6040 add_jump(compiler, backtracks, jump);
6041 else
6042 JUMPHERE(jump);
6043 }
6044 }
6045
compile_ref_iterator_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6046 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6047 {
6048 DEFINE_COMPILER;
6049 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6050 backtrack_common *backtrack;
6051 pcre_uchar type;
6052 int offset = 0;
6053 struct sljit_label *label;
6054 struct sljit_jump *zerolength;
6055 struct sljit_jump *jump = NULL;
6056 pcre_uchar *ccbegin = cc;
6057 int min = 0, max = 0;
6058 BOOL minimize;
6059
6060 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6061
6062 if (ref)
6063 offset = GET2(cc, 1) << 1;
6064 else
6065 cc += IMM2_SIZE;
6066 type = cc[1 + IMM2_SIZE];
6067
6068 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6069 minimize = (type & 0x1) != 0;
6070 switch(type)
6071 {
6072 case OP_CRSTAR:
6073 case OP_CRMINSTAR:
6074 min = 0;
6075 max = 0;
6076 cc += 1 + IMM2_SIZE + 1;
6077 break;
6078 case OP_CRPLUS:
6079 case OP_CRMINPLUS:
6080 min = 1;
6081 max = 0;
6082 cc += 1 + IMM2_SIZE + 1;
6083 break;
6084 case OP_CRQUERY:
6085 case OP_CRMINQUERY:
6086 min = 0;
6087 max = 1;
6088 cc += 1 + IMM2_SIZE + 1;
6089 break;
6090 case OP_CRRANGE:
6091 case OP_CRMINRANGE:
6092 min = GET2(cc, 1 + IMM2_SIZE + 1);
6093 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6094 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6095 break;
6096 default:
6097 SLJIT_ASSERT_STOP();
6098 break;
6099 }
6100
6101 if (!minimize)
6102 {
6103 if (min == 0)
6104 {
6105 allocate_stack(common, 2);
6106 if (ref)
6107 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6108 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6109 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6110 /* Temporary release of STR_PTR. */
6111 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6112 /* Handles both invalid and empty cases. Since the minimum repeat,
6113 is zero the invalid case is basically the same as an empty case. */
6114 if (ref)
6115 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6116 else
6117 {
6118 compile_dnref_search(common, ccbegin, NULL);
6119 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6120 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6121 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6122 }
6123 /* Restore if not zero length. */
6124 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6125 }
6126 else
6127 {
6128 allocate_stack(common, 1);
6129 if (ref)
6130 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6131 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6132 if (ref)
6133 {
6134 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6135 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6136 }
6137 else
6138 {
6139 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6140 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6141 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6142 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6143 }
6144 }
6145
6146 if (min > 1 || max > 1)
6147 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6148
6149 label = LABEL();
6150 if (!ref)
6151 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6152 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6153
6154 if (min > 1 || max > 1)
6155 {
6156 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6157 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6158 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6159 if (min > 1)
6160 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6161 if (max > 1)
6162 {
6163 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6164 allocate_stack(common, 1);
6165 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6166 JUMPTO(SLJIT_JUMP, label);
6167 JUMPHERE(jump);
6168 }
6169 }
6170
6171 if (max == 0)
6172 {
6173 /* Includes min > 1 case as well. */
6174 allocate_stack(common, 1);
6175 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6176 JUMPTO(SLJIT_JUMP, label);
6177 }
6178
6179 JUMPHERE(zerolength);
6180 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6181
6182 count_match(common);
6183 return cc;
6184 }
6185
6186 allocate_stack(common, ref ? 2 : 3);
6187 if (ref)
6188 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6189 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6190 if (type != OP_CRMINSTAR)
6191 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6192
6193 if (min == 0)
6194 {
6195 /* Handles both invalid and empty cases. Since the minimum repeat,
6196 is zero the invalid case is basically the same as an empty case. */
6197 if (ref)
6198 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6199 else
6200 {
6201 compile_dnref_search(common, ccbegin, NULL);
6202 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6204 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6205 }
6206 /* Length is non-zero, we can match real repeats. */
6207 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6208 jump = JUMP(SLJIT_JUMP);
6209 }
6210 else
6211 {
6212 if (ref)
6213 {
6214 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6215 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6216 }
6217 else
6218 {
6219 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6220 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6221 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6222 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6223 }
6224 }
6225
6226 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6227 if (max > 0)
6228 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6229
6230 if (!ref)
6231 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6232 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6233 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6234
6235 if (min > 1)
6236 {
6237 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6238 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6239 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6240 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
6241 }
6242 else if (max > 0)
6243 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6244
6245 if (jump != NULL)
6246 JUMPHERE(jump);
6247 JUMPHERE(zerolength);
6248
6249 count_match(common);
6250 return cc;
6251 }
6252
compile_recurse_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6253 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6254 {
6255 DEFINE_COMPILER;
6256 backtrack_common *backtrack;
6257 recurse_entry *entry = common->entries;
6258 recurse_entry *prev = NULL;
6259 sljit_sw start = GET(cc, 1);
6260 pcre_uchar *start_cc;
6261 BOOL needs_control_head;
6262
6263 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6264
6265 /* Inlining simple patterns. */
6266 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6267 {
6268 start_cc = common->start + start;
6269 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6270 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6271 return cc + 1 + LINK_SIZE;
6272 }
6273
6274 while (entry != NULL)
6275 {
6276 if (entry->start == start)
6277 break;
6278 prev = entry;
6279 entry = entry->next;
6280 }
6281
6282 if (entry == NULL)
6283 {
6284 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6285 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6286 return NULL;
6287 entry->next = NULL;
6288 entry->entry = NULL;
6289 entry->calls = NULL;
6290 entry->start = start;
6291
6292 if (prev != NULL)
6293 prev->next = entry;
6294 else
6295 common->entries = entry;
6296 }
6297
6298 if (common->has_set_som && common->mark_ptr != 0)
6299 {
6300 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6301 allocate_stack(common, 2);
6302 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
6303 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6304 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6305 }
6306 else if (common->has_set_som || common->mark_ptr != 0)
6307 {
6308 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6309 allocate_stack(common, 1);
6310 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6311 }
6312
6313 if (entry->entry == NULL)
6314 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6315 else
6316 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6317 /* Leave if the match is failed. */
6318 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6319 return cc + 1 + LINK_SIZE;
6320 }
6321
do_callout(struct jit_arguments * arguments,PUBL (callout_block)* callout_block,pcre_uchar ** jit_ovector)6322 static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6323 {
6324 const pcre_uchar *begin = arguments->begin;
6325 int *offset_vector = arguments->offsets;
6326 int offset_count = arguments->offset_count;
6327 int i;
6328
6329 if (PUBL(callout) == NULL)
6330 return 0;
6331
6332 callout_block->version = 2;
6333 callout_block->callout_data = arguments->callout_data;
6334
6335 /* Offsets in subject. */
6336 callout_block->subject_length = arguments->end - arguments->begin;
6337 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6338 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6339 #if defined COMPILE_PCRE8
6340 callout_block->subject = (PCRE_SPTR)begin;
6341 #elif defined COMPILE_PCRE16
6342 callout_block->subject = (PCRE_SPTR16)begin;
6343 #elif defined COMPILE_PCRE32
6344 callout_block->subject = (PCRE_SPTR32)begin;
6345 #endif
6346
6347 /* Convert and copy the JIT offset vector to the offset_vector array. */
6348 callout_block->capture_top = 0;
6349 callout_block->offset_vector = offset_vector;
6350 for (i = 2; i < offset_count; i += 2)
6351 {
6352 offset_vector[i] = jit_ovector[i] - begin;
6353 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6354 if (jit_ovector[i] >= begin)
6355 callout_block->capture_top = i;
6356 }
6357
6358 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6359 if (offset_count > 0)
6360 offset_vector[0] = -1;
6361 if (offset_count > 1)
6362 offset_vector[1] = -1;
6363 return (*PUBL(callout))(callout_block);
6364 }
6365
6366 /* Aligning to 8 byte. */
6367 #define CALLOUT_ARG_SIZE \
6368 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6369
6370 #define CALLOUT_ARG_OFFSET(arg) \
6371 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6372
compile_callout_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6373 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6374 {
6375 DEFINE_COMPILER;
6376 backtrack_common *backtrack;
6377
6378 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6379
6380 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6381
6382 SLJIT_ASSERT(common->capture_last_ptr != 0);
6383 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6384 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6385 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6386 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6387
6388 /* These pointer sized fields temporarly stores internal variables. */
6389 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6390 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6391 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6392
6393 if (common->mark_ptr != 0)
6394 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6395 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6396 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6397 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6398
6399 /* Needed to save important temporary registers. */
6400 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6401 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6402 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
6403 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6404 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6405 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6406 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6407
6408 /* Check return value. */
6409 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6410 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
6411 if (common->forced_quit_label == NULL)
6412 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
6413 else
6414 JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
6415 return cc + 2 + 2 * LINK_SIZE;
6416 }
6417
6418 #undef CALLOUT_ARG_SIZE
6419 #undef CALLOUT_ARG_OFFSET
6420
compile_assert_matchingpath(compiler_common * common,pcre_uchar * cc,assert_backtrack * backtrack,BOOL conditional)6421 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6422 {
6423 DEFINE_COMPILER;
6424 int framesize;
6425 int extrasize;
6426 BOOL needs_control_head;
6427 int private_data_ptr;
6428 backtrack_common altbacktrack;
6429 pcre_uchar *ccbegin;
6430 pcre_uchar opcode;
6431 pcre_uchar bra = OP_BRA;
6432 jump_list *tmp = NULL;
6433 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6434 jump_list **found;
6435 /* Saving previous accept variables. */
6436 BOOL save_local_exit = common->local_exit;
6437 BOOL save_positive_assert = common->positive_assert;
6438 then_trap_backtrack *save_then_trap = common->then_trap;
6439 struct sljit_label *save_quit_label = common->quit_label;
6440 struct sljit_label *save_accept_label = common->accept_label;
6441 jump_list *save_quit = common->quit;
6442 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6443 jump_list *save_accept = common->accept;
6444 struct sljit_jump *jump;
6445 struct sljit_jump *brajump = NULL;
6446
6447 /* Assert captures then. */
6448 common->then_trap = NULL;
6449
6450 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6451 {
6452 SLJIT_ASSERT(!conditional);
6453 bra = *cc;
6454 cc++;
6455 }
6456 private_data_ptr = PRIVATE_DATA(cc);
6457 SLJIT_ASSERT(private_data_ptr != 0);
6458 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6459 backtrack->framesize = framesize;
6460 backtrack->private_data_ptr = private_data_ptr;
6461 opcode = *cc;
6462 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6463 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6464 ccbegin = cc;
6465 cc += GET(cc, 1);
6466
6467 if (bra == OP_BRAMINZERO)
6468 {
6469 /* This is a braminzero backtrack path. */
6470 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6471 free_stack(common, 1);
6472 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6473 }
6474
6475 if (framesize < 0)
6476 {
6477 extrasize = needs_control_head ? 2 : 1;
6478 if (framesize == no_frame)
6479 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
6480 allocate_stack(common, extrasize);
6481 if (needs_control_head)
6482 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6484 if (needs_control_head)
6485 {
6486 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6487 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6488 }
6489 }
6490 else
6491 {
6492 extrasize = needs_control_head ? 3 : 2;
6493 allocate_stack(common, framesize + extrasize);
6494 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6495 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6496 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
6497 if (needs_control_head)
6498 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6499 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6500 if (needs_control_head)
6501 {
6502 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6503 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6504 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6505 }
6506 else
6507 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6508 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6509 }
6510
6511 memset(&altbacktrack, 0, sizeof(backtrack_common));
6512 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6513 {
6514 /* Negative assert is stronger than positive assert. */
6515 common->local_exit = TRUE;
6516 common->quit_label = NULL;
6517 common->quit = NULL;
6518 common->positive_assert = FALSE;
6519 }
6520 else
6521 common->positive_assert = TRUE;
6522 common->positive_assert_quit = NULL;
6523
6524 while (1)
6525 {
6526 common->accept_label = NULL;
6527 common->accept = NULL;
6528 altbacktrack.top = NULL;
6529 altbacktrack.topbacktracks = NULL;
6530
6531 if (*ccbegin == OP_ALT)
6532 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6533
6534 altbacktrack.cc = ccbegin;
6535 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6536 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6537 {
6538 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6539 {
6540 common->local_exit = save_local_exit;
6541 common->quit_label = save_quit_label;
6542 common->quit = save_quit;
6543 }
6544 common->positive_assert = save_positive_assert;
6545 common->then_trap = save_then_trap;
6546 common->accept_label = save_accept_label;
6547 common->positive_assert_quit = save_positive_assert_quit;
6548 common->accept = save_accept;
6549 return NULL;
6550 }
6551 common->accept_label = LABEL();
6552 if (common->accept != NULL)
6553 set_jumps(common->accept, common->accept_label);
6554
6555 /* Reset stack. */
6556 if (framesize < 0)
6557 {
6558 if (framesize == no_frame)
6559 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6560 else
6561 free_stack(common, extrasize);
6562 if (needs_control_head)
6563 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6564 }
6565 else
6566 {
6567 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6568 {
6569 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6570 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6571 if (needs_control_head)
6572 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6573 }
6574 else
6575 {
6576 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6577 if (needs_control_head)
6578 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6579 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6580 }
6581 }
6582
6583 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6584 {
6585 /* We know that STR_PTR was stored on the top of the stack. */
6586 if (conditional)
6587 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6588 else if (bra == OP_BRAZERO)
6589 {
6590 if (framesize < 0)
6591 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6592 else
6593 {
6594 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6595 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6596 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6597 }
6598 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6599 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6600 }
6601 else if (framesize >= 0)
6602 {
6603 /* For OP_BRA and OP_BRAMINZERO. */
6604 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6605 }
6606 }
6607 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6608
6609 compile_backtrackingpath(common, altbacktrack.top);
6610 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6611 {
6612 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6613 {
6614 common->local_exit = save_local_exit;
6615 common->quit_label = save_quit_label;
6616 common->quit = save_quit;
6617 }
6618 common->positive_assert = save_positive_assert;
6619 common->then_trap = save_then_trap;
6620 common->accept_label = save_accept_label;
6621 common->positive_assert_quit = save_positive_assert_quit;
6622 common->accept = save_accept;
6623 return NULL;
6624 }
6625 set_jumps(altbacktrack.topbacktracks, LABEL());
6626
6627 if (*cc != OP_ALT)
6628 break;
6629
6630 ccbegin = cc;
6631 cc += GET(cc, 1);
6632 }
6633
6634 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6635 {
6636 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6637 /* Makes the check less complicated below. */
6638 common->positive_assert_quit = common->quit;
6639 }
6640
6641 /* None of them matched. */
6642 if (common->positive_assert_quit != NULL)
6643 {
6644 jump = JUMP(SLJIT_JUMP);
6645 set_jumps(common->positive_assert_quit, LABEL());
6646 SLJIT_ASSERT(framesize != no_stack);
6647 if (framesize < 0)
6648 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6649 else
6650 {
6651 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6652 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6653 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6654 }
6655 JUMPHERE(jump);
6656 }
6657
6658 if (needs_control_head)
6659 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6660
6661 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6662 {
6663 /* Assert is failed. */
6664 if (conditional || bra == OP_BRAZERO)
6665 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6666
6667 if (framesize < 0)
6668 {
6669 /* The topmost item should be 0. */
6670 if (bra == OP_BRAZERO)
6671 {
6672 if (extrasize == 2)
6673 free_stack(common, 1);
6674 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6675 }
6676 else
6677 free_stack(common, extrasize);
6678 }
6679 else
6680 {
6681 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6682 /* The topmost item should be 0. */
6683 if (bra == OP_BRAZERO)
6684 {
6685 free_stack(common, framesize + extrasize - 1);
6686 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6687 }
6688 else
6689 free_stack(common, framesize + extrasize);
6690 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6691 }
6692 jump = JUMP(SLJIT_JUMP);
6693 if (bra != OP_BRAZERO)
6694 add_jump(compiler, target, jump);
6695
6696 /* Assert is successful. */
6697 set_jumps(tmp, LABEL());
6698 if (framesize < 0)
6699 {
6700 /* We know that STR_PTR was stored on the top of the stack. */
6701 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6702 /* Keep the STR_PTR on the top of the stack. */
6703 if (bra == OP_BRAZERO)
6704 {
6705 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6706 if (extrasize == 2)
6707 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6708 }
6709 else if (bra == OP_BRAMINZERO)
6710 {
6711 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6712 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6713 }
6714 }
6715 else
6716 {
6717 if (bra == OP_BRA)
6718 {
6719 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6720 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6721 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6722 }
6723 else
6724 {
6725 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6726 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6727 if (extrasize == 2)
6728 {
6729 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6730 if (bra == OP_BRAMINZERO)
6731 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6732 }
6733 else
6734 {
6735 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6736 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6737 }
6738 }
6739 }
6740
6741 if (bra == OP_BRAZERO)
6742 {
6743 backtrack->matchingpath = LABEL();
6744 SET_LABEL(jump, backtrack->matchingpath);
6745 }
6746 else if (bra == OP_BRAMINZERO)
6747 {
6748 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6749 JUMPHERE(brajump);
6750 if (framesize >= 0)
6751 {
6752 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6753 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6754 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6755 }
6756 set_jumps(backtrack->common.topbacktracks, LABEL());
6757 }
6758 }
6759 else
6760 {
6761 /* AssertNot is successful. */
6762 if (framesize < 0)
6763 {
6764 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6765 if (bra != OP_BRA)
6766 {
6767 if (extrasize == 2)
6768 free_stack(common, 1);
6769 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6770 }
6771 else
6772 free_stack(common, extrasize);
6773 }
6774 else
6775 {
6776 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6777 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6778 /* The topmost item should be 0. */
6779 if (bra != OP_BRA)
6780 {
6781 free_stack(common, framesize + extrasize - 1);
6782 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6783 }
6784 else
6785 free_stack(common, framesize + extrasize);
6786 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6787 }
6788
6789 if (bra == OP_BRAZERO)
6790 backtrack->matchingpath = LABEL();
6791 else if (bra == OP_BRAMINZERO)
6792 {
6793 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6794 JUMPHERE(brajump);
6795 }
6796
6797 if (bra != OP_BRA)
6798 {
6799 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
6800 set_jumps(backtrack->common.topbacktracks, LABEL());
6801 backtrack->common.topbacktracks = NULL;
6802 }
6803 }
6804
6805 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6806 {
6807 common->local_exit = save_local_exit;
6808 common->quit_label = save_quit_label;
6809 common->quit = save_quit;
6810 }
6811 common->positive_assert = save_positive_assert;
6812 common->then_trap = save_then_trap;
6813 common->accept_label = save_accept_label;
6814 common->positive_assert_quit = save_positive_assert_quit;
6815 common->accept = save_accept;
6816 return cc + 1 + LINK_SIZE;
6817 }
6818
match_once_common(compiler_common * common,pcre_uchar ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)6819 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6820 {
6821 DEFINE_COMPILER;
6822 int stacksize;
6823
6824 if (framesize < 0)
6825 {
6826 if (framesize == no_frame)
6827 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6828 else
6829 {
6830 stacksize = needs_control_head ? 1 : 0;
6831 if (ket != OP_KET || has_alternatives)
6832 stacksize++;
6833 free_stack(common, stacksize);
6834 }
6835
6836 if (needs_control_head)
6837 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6838
6839 /* TMP2 which is set here used by OP_KETRMAX below. */
6840 if (ket == OP_KETRMAX)
6841 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6842 else if (ket == OP_KETRMIN)
6843 {
6844 /* Move the STR_PTR to the private_data_ptr. */
6845 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6846 }
6847 }
6848 else
6849 {
6850 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6851 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6852 if (needs_control_head)
6853 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6854
6855 if (ket == OP_KETRMAX)
6856 {
6857 /* TMP2 which is set here used by OP_KETRMAX below. */
6858 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6859 }
6860 }
6861 if (needs_control_head)
6862 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
6863 }
6864
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)6865 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6866 {
6867 DEFINE_COMPILER;
6868
6869 if (common->capture_last_ptr != 0)
6870 {
6871 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6872 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6873 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6874 stacksize++;
6875 }
6876 if (common->optimized_cbracket[offset >> 1] == 0)
6877 {
6878 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6879 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6880 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6881 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6882 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6883 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
6884 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
6885 stacksize += 2;
6886 }
6887 return stacksize;
6888 }
6889
6890 /*
6891 Handling bracketed expressions is probably the most complex part.
6892
6893 Stack layout naming characters:
6894 S - Push the current STR_PTR
6895 0 - Push a 0 (NULL)
6896 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6897 before the next alternative. Not pushed if there are no alternatives.
6898 M - Any values pushed by the current alternative. Can be empty, or anything.
6899 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6900 L - Push the previous local (pointed by localptr) to the stack
6901 () - opional values stored on the stack
6902 ()* - optonal, can be stored multiple times
6903
6904 The following list shows the regular expression templates, their PCRE byte codes
6905 and stack layout supported by pcre-sljit.
6906
6907 (?:) OP_BRA | OP_KET A M
6908 () OP_CBRA | OP_KET C M
6909 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6910 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6911 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6912 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6913 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6914 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6915 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6916 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6917 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6918 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6919 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6920 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6921 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6922 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6923 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6924 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6925 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6926 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6927 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6928 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6929
6930
6931 Stack layout naming characters:
6932 A - Push the alternative index (starting from 0) on the stack.
6933 Not pushed if there is no alternatives.
6934 M - Any values pushed by the current alternative. Can be empty, or anything.
6935
6936 The next list shows the possible content of a bracket:
6937 (|) OP_*BRA | OP_ALT ... M A
6938 (?()|) OP_*COND | OP_ALT M A
6939 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6940 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6941 Or nothing, if trace is unnecessary
6942 */
6943
compile_bracket_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6944 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6945 {
6946 DEFINE_COMPILER;
6947 backtrack_common *backtrack;
6948 pcre_uchar opcode;
6949 int private_data_ptr = 0;
6950 int offset = 0;
6951 int i, stacksize;
6952 int repeat_ptr = 0, repeat_length = 0;
6953 int repeat_type = 0, repeat_count = 0;
6954 pcre_uchar *ccbegin;
6955 pcre_uchar *matchingpath;
6956 pcre_uchar *slot;
6957 pcre_uchar bra = OP_BRA;
6958 pcre_uchar ket;
6959 assert_backtrack *assert;
6960 BOOL has_alternatives;
6961 BOOL needs_control_head = FALSE;
6962 struct sljit_jump *jump;
6963 struct sljit_jump *skip;
6964 struct sljit_label *rmax_label = NULL;
6965 struct sljit_jump *braminzero = NULL;
6966
6967 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6968
6969 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6970 {
6971 bra = *cc;
6972 cc++;
6973 opcode = *cc;
6974 }
6975
6976 opcode = *cc;
6977 ccbegin = cc;
6978 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6979 ket = *matchingpath;
6980 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6981 {
6982 repeat_ptr = PRIVATE_DATA(matchingpath);
6983 repeat_length = PRIVATE_DATA(matchingpath + 1);
6984 repeat_type = PRIVATE_DATA(matchingpath + 2);
6985 repeat_count = PRIVATE_DATA(matchingpath + 3);
6986 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6987 if (repeat_type == OP_UPTO)
6988 ket = OP_KETRMAX;
6989 if (repeat_type == OP_MINUPTO)
6990 ket = OP_KETRMIN;
6991 }
6992
6993 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6994 {
6995 /* Drop this bracket_backtrack. */
6996 parent->top = backtrack->prev;
6997 return matchingpath + 1 + LINK_SIZE + repeat_length;
6998 }
6999
7000 matchingpath = ccbegin + 1 + LINK_SIZE;
7001 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
7002 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
7003 cc += GET(cc, 1);
7004
7005 has_alternatives = *cc == OP_ALT;
7006 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
7007 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) ? FALSE : TRUE;
7008
7009 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
7010 opcode = OP_SCOND;
7011 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
7012 opcode = OP_ONCE;
7013
7014 if (opcode == OP_CBRA || opcode == OP_SCBRA)
7015 {
7016 /* Capturing brackets has a pre-allocated space. */
7017 offset = GET2(ccbegin, 1 + LINK_SIZE);
7018 if (common->optimized_cbracket[offset] == 0)
7019 {
7020 private_data_ptr = OVECTOR_PRIV(offset);
7021 offset <<= 1;
7022 }
7023 else
7024 {
7025 offset <<= 1;
7026 private_data_ptr = OVECTOR(offset);
7027 }
7028 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7029 matchingpath += IMM2_SIZE;
7030 }
7031 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
7032 {
7033 /* Other brackets simply allocate the next entry. */
7034 private_data_ptr = PRIVATE_DATA(ccbegin);
7035 SLJIT_ASSERT(private_data_ptr != 0);
7036 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7037 if (opcode == OP_ONCE)
7038 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
7039 }
7040
7041 /* Instructions before the first alternative. */
7042 stacksize = 0;
7043 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7044 stacksize++;
7045 if (bra == OP_BRAZERO)
7046 stacksize++;
7047
7048 if (stacksize > 0)
7049 allocate_stack(common, stacksize);
7050
7051 stacksize = 0;
7052 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7053 {
7054 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7055 stacksize++;
7056 }
7057
7058 if (bra == OP_BRAZERO)
7059 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7060
7061 if (bra == OP_BRAMINZERO)
7062 {
7063 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
7064 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7065 if (ket != OP_KETRMIN)
7066 {
7067 free_stack(common, 1);
7068 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7069 }
7070 else
7071 {
7072 if (opcode == OP_ONCE || opcode >= OP_SBRA)
7073 {
7074 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7075 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7076 /* Nothing stored during the first run. */
7077 skip = JUMP(SLJIT_JUMP);
7078 JUMPHERE(jump);
7079 /* Checking zero-length iteration. */
7080 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7081 {
7082 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
7083 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7084 }
7085 else
7086 {
7087 /* Except when the whole stack frame must be saved. */
7088 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7089 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
7090 }
7091 JUMPHERE(skip);
7092 }
7093 else
7094 {
7095 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7096 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7097 JUMPHERE(jump);
7098 }
7099 }
7100 }
7101
7102 if (repeat_type != 0)
7103 {
7104 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
7105 if (repeat_type == OP_EXACT)
7106 rmax_label = LABEL();
7107 }
7108
7109 if (ket == OP_KETRMIN)
7110 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
7111
7112 if (ket == OP_KETRMAX)
7113 {
7114 rmax_label = LABEL();
7115 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
7116 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
7117 }
7118
7119 /* Handling capturing brackets and alternatives. */
7120 if (opcode == OP_ONCE)
7121 {
7122 stacksize = 0;
7123 if (needs_control_head)
7124 {
7125 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7126 stacksize++;
7127 }
7128
7129 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7130 {
7131 /* Neither capturing brackets nor recursions are found in the block. */
7132 if (ket == OP_KETRMIN)
7133 {
7134 stacksize += 2;
7135 if (!needs_control_head)
7136 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7137 }
7138 else
7139 {
7140 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
7141 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7142 if (ket == OP_KETRMAX || has_alternatives)
7143 stacksize++;
7144 }
7145
7146 if (stacksize > 0)
7147 allocate_stack(common, stacksize);
7148
7149 stacksize = 0;
7150 if (needs_control_head)
7151 {
7152 stacksize++;
7153 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7154 }
7155
7156 if (ket == OP_KETRMIN)
7157 {
7158 if (needs_control_head)
7159 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7160 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7161 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
7162 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
7163 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
7164 }
7165 else if (ket == OP_KETRMAX || has_alternatives)
7166 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7167 }
7168 else
7169 {
7170 if (ket != OP_KET || has_alternatives)
7171 stacksize++;
7172
7173 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
7174 allocate_stack(common, stacksize);
7175
7176 if (needs_control_head)
7177 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7178
7179 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7180 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7181
7182 stacksize = needs_control_head ? 1 : 0;
7183 if (ket != OP_KET || has_alternatives)
7184 {
7185 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7186 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7187 stacksize++;
7188 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7189 }
7190 else
7191 {
7192 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7193 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7194 }
7195 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
7196 }
7197 }
7198 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
7199 {
7200 /* Saving the previous values. */
7201 if (common->optimized_cbracket[offset >> 1] != 0)
7202 {
7203 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
7204 allocate_stack(common, 2);
7205 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7206 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
7207 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
7208 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
7209 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7210 }
7211 else
7212 {
7213 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7214 allocate_stack(common, 1);
7215 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
7216 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7217 }
7218 }
7219 else if (opcode == OP_SBRA || opcode == OP_SCOND)
7220 {
7221 /* Saving the previous value. */
7222 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7223 allocate_stack(common, 1);
7224 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
7225 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7226 }
7227 else if (has_alternatives)
7228 {
7229 /* Pushing the starting string pointer. */
7230 allocate_stack(common, 1);
7231 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7232 }
7233
7234 /* Generating code for the first alternative. */
7235 if (opcode == OP_COND || opcode == OP_SCOND)
7236 {
7237 if (*matchingpath == OP_CREF)
7238 {
7239 SLJIT_ASSERT(has_alternatives);
7240 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
7241 CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7242 matchingpath += 1 + IMM2_SIZE;
7243 }
7244 else if (*matchingpath == OP_DNCREF)
7245 {
7246 SLJIT_ASSERT(has_alternatives);
7247
7248 i = GET2(matchingpath, 1 + IMM2_SIZE);
7249 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
7250 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
7251 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
7252 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
7253 slot += common->name_entry_size;
7254 i--;
7255 while (i-- > 0)
7256 {
7257 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
7258 OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
7259 slot += common->name_entry_size;
7260 }
7261 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
7262 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
7263 matchingpath += 1 + 2 * IMM2_SIZE;
7264 }
7265 else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL)
7266 {
7267 /* Never has other case. */
7268 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
7269 SLJIT_ASSERT(!has_alternatives);
7270
7271 if (*matchingpath == OP_FAIL)
7272 stacksize = 0;
7273 if (*matchingpath == OP_RREF)
7274 {
7275 stacksize = GET2(matchingpath, 1);
7276 if (common->currententry == NULL)
7277 stacksize = 0;
7278 else if (stacksize == RREF_ANY)
7279 stacksize = 1;
7280 else if (common->currententry->start == 0)
7281 stacksize = stacksize == 0;
7282 else
7283 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
7284
7285 if (stacksize != 0)
7286 matchingpath += 1 + IMM2_SIZE;
7287 }
7288 else
7289 {
7290 if (common->currententry == NULL || common->currententry->start == 0)
7291 stacksize = 0;
7292 else
7293 {
7294 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
7295 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
7296 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
7297 while (stacksize > 0)
7298 {
7299 if ((int)GET2(slot, 0) == i)
7300 break;
7301 slot += common->name_entry_size;
7302 stacksize--;
7303 }
7304 }
7305
7306 if (stacksize != 0)
7307 matchingpath += 1 + 2 * IMM2_SIZE;
7308 }
7309
7310 /* The stacksize == 0 is a common "else" case. */
7311 if (stacksize == 0)
7312 {
7313 if (*cc == OP_ALT)
7314 {
7315 matchingpath = cc + 1 + LINK_SIZE;
7316 cc += GET(cc, 1);
7317 }
7318 else
7319 matchingpath = cc;
7320 }
7321 }
7322 else
7323 {
7324 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
7325 /* Similar code as PUSH_BACKTRACK macro. */
7326 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
7327 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7328 return NULL;
7329 memset(assert, 0, sizeof(assert_backtrack));
7330 assert->common.cc = matchingpath;
7331 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
7332 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
7333 }
7334 }
7335
7336 compile_matchingpath(common, matchingpath, cc, backtrack);
7337 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7338 return NULL;
7339
7340 if (opcode == OP_ONCE)
7341 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
7342
7343 stacksize = 0;
7344 if (repeat_type == OP_MINUPTO)
7345 {
7346 /* We need to preserve the counter. TMP2 will be used below. */
7347 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
7348 stacksize++;
7349 }
7350 if (ket != OP_KET || bra != OP_BRA)
7351 stacksize++;
7352 if (offset != 0)
7353 {
7354 if (common->capture_last_ptr != 0)
7355 stacksize++;
7356 if (common->optimized_cbracket[offset >> 1] == 0)
7357 stacksize += 2;
7358 }
7359 if (has_alternatives && opcode != OP_ONCE)
7360 stacksize++;
7361
7362 if (stacksize > 0)
7363 allocate_stack(common, stacksize);
7364
7365 stacksize = 0;
7366 if (repeat_type == OP_MINUPTO)
7367 {
7368 /* TMP2 was set above. */
7369 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
7370 stacksize++;
7371 }
7372
7373 if (ket != OP_KET || bra != OP_BRA)
7374 {
7375 if (ket != OP_KET)
7376 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7377 else
7378 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7379 stacksize++;
7380 }
7381
7382 if (offset != 0)
7383 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
7384
7385 if (has_alternatives)
7386 {
7387 if (opcode != OP_ONCE)
7388 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7389 if (ket != OP_KETRMAX)
7390 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7391 }
7392
7393 /* Must be after the matchingpath label. */
7394 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
7395 {
7396 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
7397 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7398 }
7399
7400 if (ket == OP_KETRMAX)
7401 {
7402 if (repeat_type != 0)
7403 {
7404 if (has_alternatives)
7405 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7406 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
7407 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
7408 /* Drop STR_PTR for greedy plus quantifier. */
7409 if (opcode != OP_ONCE)
7410 free_stack(common, 1);
7411 }
7412 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
7413 {
7414 if (has_alternatives)
7415 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7416 /* Checking zero-length iteration. */
7417 if (opcode != OP_ONCE)
7418 {
7419 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
7420 /* Drop STR_PTR for greedy plus quantifier. */
7421 if (bra != OP_BRAZERO)
7422 free_stack(common, 1);
7423 }
7424 else
7425 /* TMP2 must contain the starting STR_PTR. */
7426 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
7427 }
7428 else
7429 JUMPTO(SLJIT_JUMP, rmax_label);
7430 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
7431 }
7432
7433 if (repeat_type == OP_EXACT)
7434 {
7435 count_match(common);
7436 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
7437 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
7438 }
7439 else if (repeat_type == OP_UPTO)
7440 {
7441 /* We need to preserve the counter. */
7442 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
7443 allocate_stack(common, 1);
7444 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7445 }
7446
7447 if (bra == OP_BRAZERO)
7448 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
7449
7450 if (bra == OP_BRAMINZERO)
7451 {
7452 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
7453 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
7454 if (braminzero != NULL)
7455 {
7456 JUMPHERE(braminzero);
7457 /* We need to release the end pointer to perform the
7458 backtrack for the zero-length iteration. When
7459 framesize is < 0, OP_ONCE will do the release itself. */
7460 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
7461 {
7462 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7463 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7464 }
7465 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
7466 free_stack(common, 1);
7467 }
7468 /* Continue to the normal backtrack. */
7469 }
7470
7471 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
7472 count_match(common);
7473
7474 /* Skip the other alternatives. */
7475 while (*cc == OP_ALT)
7476 cc += GET(cc, 1);
7477 cc += 1 + LINK_SIZE;
7478
7479 /* Temporarily encoding the needs_control_head in framesize. */
7480 if (opcode == OP_ONCE)
7481 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
7482 return cc + repeat_length;
7483 }
7484
compile_bracketpos_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7485 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7486 {
7487 DEFINE_COMPILER;
7488 backtrack_common *backtrack;
7489 pcre_uchar opcode;
7490 int private_data_ptr;
7491 int cbraprivptr = 0;
7492 BOOL needs_control_head;
7493 int framesize;
7494 int stacksize;
7495 int offset = 0;
7496 BOOL zero = FALSE;
7497 pcre_uchar *ccbegin = NULL;
7498 int stack; /* Also contains the offset of control head. */
7499 struct sljit_label *loop = NULL;
7500 struct jump_list *emptymatch = NULL;
7501
7502 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
7503 if (*cc == OP_BRAPOSZERO)
7504 {
7505 zero = TRUE;
7506 cc++;
7507 }
7508
7509 opcode = *cc;
7510 private_data_ptr = PRIVATE_DATA(cc);
7511 SLJIT_ASSERT(private_data_ptr != 0);
7512 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
7513 switch(opcode)
7514 {
7515 case OP_BRAPOS:
7516 case OP_SBRAPOS:
7517 ccbegin = cc + 1 + LINK_SIZE;
7518 break;
7519
7520 case OP_CBRAPOS:
7521 case OP_SCBRAPOS:
7522 offset = GET2(cc, 1 + LINK_SIZE);
7523 /* This case cannot be optimized in the same was as
7524 normal capturing brackets. */
7525 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
7526 cbraprivptr = OVECTOR_PRIV(offset);
7527 offset <<= 1;
7528 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
7529 break;
7530
7531 default:
7532 SLJIT_ASSERT_STOP();
7533 break;
7534 }
7535
7536 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
7537 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
7538 if (framesize < 0)
7539 {
7540 if (offset != 0)
7541 {
7542 stacksize = 2;
7543 if (common->capture_last_ptr != 0)
7544 stacksize++;
7545 }
7546 else
7547 stacksize = 1;
7548
7549 if (needs_control_head)
7550 stacksize++;
7551 if (!zero)
7552 stacksize++;
7553
7554 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
7555 allocate_stack(common, stacksize);
7556 if (framesize == no_frame)
7557 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7558
7559 stack = 0;
7560 if (offset != 0)
7561 {
7562 stack = 2;
7563 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7564 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7565 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
7566 if (common->capture_last_ptr != 0)
7567 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7568 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7569 if (needs_control_head)
7570 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7571 if (common->capture_last_ptr != 0)
7572 {
7573 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
7574 stack = 3;
7575 }
7576 }
7577 else
7578 {
7579 if (needs_control_head)
7580 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7581 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7582 stack = 1;
7583 }
7584
7585 if (needs_control_head)
7586 stack++;
7587 if (!zero)
7588 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
7589 if (needs_control_head)
7590 {
7591 stack--;
7592 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
7593 }
7594 }
7595 else
7596 {
7597 stacksize = framesize + 1;
7598 if (!zero)
7599 stacksize++;
7600 if (needs_control_head)
7601 stacksize++;
7602 if (offset == 0)
7603 stacksize++;
7604 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
7605
7606 allocate_stack(common, stacksize);
7607 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7608 if (needs_control_head)
7609 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7610 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
7611
7612 stack = 0;
7613 if (!zero)
7614 {
7615 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
7616 stack = 1;
7617 }
7618 if (needs_control_head)
7619 {
7620 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
7621 stack++;
7622 }
7623 if (offset == 0)
7624 {
7625 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
7626 stack++;
7627 }
7628 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
7629 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
7630 stack -= 1 + (offset == 0);
7631 }
7632
7633 if (offset != 0)
7634 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
7635
7636 loop = LABEL();
7637 while (*cc != OP_KETRPOS)
7638 {
7639 backtrack->top = NULL;
7640 backtrack->topbacktracks = NULL;
7641 cc += GET(cc, 1);
7642
7643 compile_matchingpath(common, ccbegin, cc, backtrack);
7644 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7645 return NULL;
7646
7647 if (framesize < 0)
7648 {
7649 if (framesize == no_frame)
7650 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7651
7652 if (offset != 0)
7653 {
7654 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7655 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7656 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
7657 if (common->capture_last_ptr != 0)
7658 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7659 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7660 }
7661 else
7662 {
7663 if (opcode == OP_SBRAPOS)
7664 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7665 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7666 }
7667
7668 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
7669 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
7670
7671 if (!zero)
7672 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
7673 }
7674 else
7675 {
7676 if (offset != 0)
7677 {
7678 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7679 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7680 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7681 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
7682 if (common->capture_last_ptr != 0)
7683 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7684 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7685 }
7686 else
7687 {
7688 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7689 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7690 if (opcode == OP_SBRAPOS)
7691 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
7692 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
7693 }
7694
7695 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
7696 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
7697
7698 if (!zero)
7699 {
7700 if (framesize < 0)
7701 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
7702 else
7703 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7704 }
7705 }
7706
7707 if (needs_control_head)
7708 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
7709
7710 JUMPTO(SLJIT_JUMP, loop);
7711 flush_stubs(common);
7712
7713 compile_backtrackingpath(common, backtrack->top);
7714 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7715 return NULL;
7716 set_jumps(backtrack->topbacktracks, LABEL());
7717
7718 if (framesize < 0)
7719 {
7720 if (offset != 0)
7721 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7722 else
7723 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7724 }
7725 else
7726 {
7727 if (offset != 0)
7728 {
7729 /* Last alternative. */
7730 if (*cc == OP_KETRPOS)
7731 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7732 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7733 }
7734 else
7735 {
7736 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7737 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
7738 }
7739 }
7740
7741 if (*cc == OP_KETRPOS)
7742 break;
7743 ccbegin = cc + 1 + LINK_SIZE;
7744 }
7745
7746 /* We don't have to restore the control head in case of a failed match. */
7747
7748 backtrack->topbacktracks = NULL;
7749 if (!zero)
7750 {
7751 if (framesize < 0)
7752 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
7753 else /* TMP2 is set to [private_data_ptr] above. */
7754 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
7755 }
7756
7757 /* None of them matched. */
7758 set_jumps(emptymatch, LABEL());
7759 count_match(common);
7760 return cc + 1 + LINK_SIZE;
7761 }
7762
get_iterator_parameters(compiler_common * common,pcre_uchar * cc,pcre_uchar * opcode,pcre_uchar * type,int * max,int * min,pcre_uchar ** end)7763 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *max, int *min, pcre_uchar **end)
7764 {
7765 int class_len;
7766
7767 *opcode = *cc;
7768 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
7769 {
7770 cc++;
7771 *type = OP_CHAR;
7772 }
7773 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
7774 {
7775 cc++;
7776 *type = OP_CHARI;
7777 *opcode -= OP_STARI - OP_STAR;
7778 }
7779 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
7780 {
7781 cc++;
7782 *type = OP_NOT;
7783 *opcode -= OP_NOTSTAR - OP_STAR;
7784 }
7785 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
7786 {
7787 cc++;
7788 *type = OP_NOTI;
7789 *opcode -= OP_NOTSTARI - OP_STAR;
7790 }
7791 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
7792 {
7793 cc++;
7794 *opcode -= OP_TYPESTAR - OP_STAR;
7795 *type = 0;
7796 }
7797 else
7798 {
7799 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
7800 *type = *opcode;
7801 cc++;
7802 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
7803 *opcode = cc[class_len - 1];
7804 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
7805 {
7806 *opcode -= OP_CRSTAR - OP_STAR;
7807 if (end != NULL)
7808 *end = cc + class_len;
7809 }
7810 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
7811 {
7812 *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
7813 if (end != NULL)
7814 *end = cc + class_len;
7815 }
7816 else
7817 {
7818 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
7819 *max = GET2(cc, (class_len + IMM2_SIZE));
7820 *min = GET2(cc, class_len);
7821
7822 if (*min == 0)
7823 {
7824 SLJIT_ASSERT(*max != 0);
7825 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : (*opcode == OP_CRMINRANGE ? OP_MINUPTO : OP_POSUPTO);
7826 }
7827 if (*max == *min)
7828 *opcode = OP_EXACT;
7829
7830 if (end != NULL)
7831 *end = cc + class_len + 2 * IMM2_SIZE;
7832 }
7833 return cc;
7834 }
7835
7836 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
7837 {
7838 *max = GET2(cc, 0);
7839 cc += IMM2_SIZE;
7840 }
7841
7842 if (*type == 0)
7843 {
7844 *type = *cc;
7845 if (end != NULL)
7846 *end = next_opcode(common, cc);
7847 cc++;
7848 return cc;
7849 }
7850
7851 if (end != NULL)
7852 {
7853 *end = cc + 1;
7854 #ifdef SUPPORT_UTF
7855 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
7856 #endif
7857 }
7858 return cc;
7859 }
7860
compile_iterator_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7861 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7862 {
7863 DEFINE_COMPILER;
7864 backtrack_common *backtrack;
7865 pcre_uchar opcode;
7866 pcre_uchar type;
7867 int max = -1, min = -1;
7868 pcre_uchar *end;
7869 jump_list *nomatch = NULL;
7870 struct sljit_jump *jump = NULL;
7871 struct sljit_label *label;
7872 int private_data_ptr = PRIVATE_DATA(cc);
7873 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
7874 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
7875 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
7876 int tmp_base, tmp_offset;
7877
7878 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
7879
7880 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, &end);
7881
7882 switch(type)
7883 {
7884 case OP_NOT_DIGIT:
7885 case OP_DIGIT:
7886 case OP_NOT_WHITESPACE:
7887 case OP_WHITESPACE:
7888 case OP_NOT_WORDCHAR:
7889 case OP_WORDCHAR:
7890 case OP_ANY:
7891 case OP_ALLANY:
7892 case OP_ANYBYTE:
7893 case OP_ANYNL:
7894 case OP_NOT_HSPACE:
7895 case OP_HSPACE:
7896 case OP_NOT_VSPACE:
7897 case OP_VSPACE:
7898 case OP_CHAR:
7899 case OP_CHARI:
7900 case OP_NOT:
7901 case OP_NOTI:
7902 case OP_CLASS:
7903 case OP_NCLASS:
7904 tmp_base = TMP3;
7905 tmp_offset = 0;
7906 break;
7907
7908 default:
7909 SLJIT_ASSERT_STOP();
7910 /* Fall through. */
7911
7912 case OP_EXTUNI:
7913 case OP_XCLASS:
7914 case OP_NOTPROP:
7915 case OP_PROP:
7916 tmp_base = SLJIT_MEM1(SLJIT_SP);
7917 tmp_offset = POSSESSIVE0;
7918 break;
7919 }
7920
7921 switch(opcode)
7922 {
7923 case OP_STAR:
7924 case OP_PLUS:
7925 case OP_UPTO:
7926 case OP_CRRANGE:
7927 if (type == OP_ANYNL || type == OP_EXTUNI)
7928 {
7929 SLJIT_ASSERT(private_data_ptr == 0);
7930 if (opcode == OP_STAR || opcode == OP_UPTO)
7931 {
7932 allocate_stack(common, 2);
7933 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7934 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
7935 }
7936 else
7937 {
7938 allocate_stack(common, 1);
7939 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7940 }
7941
7942 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
7943 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
7944
7945 label = LABEL();
7946 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7947 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
7948 {
7949 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
7950 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7951 if (opcode == OP_CRRANGE && min > 0)
7952 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
7953 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && max > 0))
7954 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
7955 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
7956 }
7957
7958 /* We cannot use TMP3 because of this allocate_stack. */
7959 allocate_stack(common, 1);
7960 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7961 JUMPTO(SLJIT_JUMP, label);
7962 if (jump != NULL)
7963 JUMPHERE(jump);
7964 }
7965 else
7966 {
7967 if (opcode == OP_PLUS)
7968 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7969 if (private_data_ptr == 0)
7970 allocate_stack(common, 2);
7971 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7972 if (opcode <= OP_PLUS)
7973 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
7974 else
7975 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
7976 label = LABEL();
7977 compile_char1_matchingpath(common, type, cc, &nomatch);
7978 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7979 if (opcode <= OP_PLUS)
7980 JUMPTO(SLJIT_JUMP, label);
7981 else if (opcode == OP_CRRANGE && max == 0)
7982 {
7983 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
7984 JUMPTO(SLJIT_JUMP, label);
7985 }
7986 else
7987 {
7988 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
7989 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7990 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
7991 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, max + 1, label);
7992 }
7993 set_jumps(nomatch, LABEL());
7994 if (opcode == OP_CRRANGE)
7995 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS, base, offset1, SLJIT_IMM, min + 1));
7996 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
7997 }
7998 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
7999 break;
8000
8001 case OP_MINSTAR:
8002 case OP_MINPLUS:
8003 if (opcode == OP_MINPLUS)
8004 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8005 if (private_data_ptr == 0)
8006 allocate_stack(common, 1);
8007 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8008 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8009 break;
8010
8011 case OP_MINUPTO:
8012 case OP_CRMINRANGE:
8013 if (private_data_ptr == 0)
8014 allocate_stack(common, 2);
8015 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8016 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
8017 if (opcode == OP_CRMINRANGE)
8018 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8019 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8020 break;
8021
8022 case OP_QUERY:
8023 case OP_MINQUERY:
8024 if (private_data_ptr == 0)
8025 allocate_stack(common, 1);
8026 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8027 if (opcode == OP_QUERY)
8028 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8029 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8030 break;
8031
8032 case OP_EXACT:
8033 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
8034 label = LABEL();
8035 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8036 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8037 JUMPTO(SLJIT_NOT_ZERO, label);
8038 break;
8039
8040 case OP_POSSTAR:
8041 case OP_POSPLUS:
8042 case OP_POSUPTO:
8043 if (opcode == OP_POSPLUS)
8044 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8045 if (opcode == OP_POSUPTO)
8046 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max);
8047 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8048 label = LABEL();
8049 compile_char1_matchingpath(common, type, cc, &nomatch);
8050 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8051 if (opcode != OP_POSUPTO)
8052 JUMPTO(SLJIT_JUMP, label);
8053 else
8054 {
8055 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1);
8056 JUMPTO(SLJIT_NOT_ZERO, label);
8057 }
8058 set_jumps(nomatch, LABEL());
8059 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8060 break;
8061
8062 case OP_POSQUERY:
8063 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8064 compile_char1_matchingpath(common, type, cc, &nomatch);
8065 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8066 set_jumps(nomatch, LABEL());
8067 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8068 break;
8069
8070 case OP_CRPOSRANGE:
8071 /* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */
8072 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min);
8073 label = LABEL();
8074 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8075 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8076 JUMPTO(SLJIT_NOT_ZERO, label);
8077
8078 if (max != 0)
8079 {
8080 SLJIT_ASSERT(max - min > 0);
8081 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max - min);
8082 }
8083 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8084 label = LABEL();
8085 compile_char1_matchingpath(common, type, cc, &nomatch);
8086 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8087 if (max == 0)
8088 JUMPTO(SLJIT_JUMP, label);
8089 else
8090 {
8091 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1);
8092 JUMPTO(SLJIT_NOT_ZERO, label);
8093 }
8094 set_jumps(nomatch, LABEL());
8095 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8096 break;
8097
8098 default:
8099 SLJIT_ASSERT_STOP();
8100 break;
8101 }
8102
8103 count_match(common);
8104 return end;
8105 }
8106
compile_fail_accept_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)8107 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8108 {
8109 DEFINE_COMPILER;
8110 backtrack_common *backtrack;
8111
8112 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
8113
8114 if (*cc == OP_FAIL)
8115 {
8116 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8117 return cc + 1;
8118 }
8119
8120 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
8121 {
8122 /* No need to check notempty conditions. */
8123 if (common->accept_label == NULL)
8124 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
8125 else
8126 JUMPTO(SLJIT_JUMP, common->accept_label);
8127 return cc + 1;
8128 }
8129
8130 if (common->accept_label == NULL)
8131 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
8132 else
8133 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
8134 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8135 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
8136 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8137 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
8138 if (common->accept_label == NULL)
8139 add_jump(compiler, &common->accept, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8140 else
8141 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
8142 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8143 if (common->accept_label == NULL)
8144 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
8145 else
8146 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
8147 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8148 return cc + 1;
8149 }
8150
compile_close_matchingpath(compiler_common * common,pcre_uchar * cc)8151 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
8152 {
8153 DEFINE_COMPILER;
8154 int offset = GET2(cc, 1);
8155 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
8156
8157 /* Data will be discarded anyway... */
8158 if (common->currententry != NULL)
8159 return cc + 1 + IMM2_SIZE;
8160
8161 if (!optimized_cbracket)
8162 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
8163 offset <<= 1;
8164 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8165 if (!optimized_cbracket)
8166 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8167 return cc + 1 + IMM2_SIZE;
8168 }
8169
compile_control_verb_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)8170 static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8171 {
8172 DEFINE_COMPILER;
8173 backtrack_common *backtrack;
8174 pcre_uchar opcode = *cc;
8175 pcre_uchar *ccend = cc + 1;
8176
8177 if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
8178 ccend += 2 + cc[1];
8179
8180 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
8181
8182 if (opcode == OP_SKIP)
8183 {
8184 allocate_stack(common, 1);
8185 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8186 return ccend;
8187 }
8188
8189 if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
8190 {
8191 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8192 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
8193 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
8194 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
8195 }
8196
8197 return ccend;
8198 }
8199
8200 static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP };
8201
compile_then_trap_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,backtrack_common * parent)8202 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
8203 {
8204 DEFINE_COMPILER;
8205 backtrack_common *backtrack;
8206 BOOL needs_control_head;
8207 int size;
8208
8209 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
8210 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
8211 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
8212 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
8213 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
8214
8215 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
8216 size = 3 + (size < 0 ? 0 : size);
8217
8218 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8219 allocate_stack(common, size);
8220 if (size > 3)
8221 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
8222 else
8223 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
8224 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
8225 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
8226 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
8227
8228 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
8229 if (size >= 0)
8230 init_frame(common, cc, ccend, size - 1, 0, FALSE);
8231 }
8232
compile_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,backtrack_common * parent)8233 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
8234 {
8235 DEFINE_COMPILER;
8236 backtrack_common *backtrack;
8237 BOOL has_then_trap = FALSE;
8238 then_trap_backtrack *save_then_trap = NULL;
8239
8240 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
8241
8242 if (common->has_then && common->then_offsets[cc - common->start] != 0)
8243 {
8244 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
8245 has_then_trap = TRUE;
8246 save_then_trap = common->then_trap;
8247 /* Tail item on backtrack. */
8248 compile_then_trap_matchingpath(common, cc, ccend, parent);
8249 }
8250
8251 while (cc < ccend)
8252 {
8253 switch(*cc)
8254 {
8255 case OP_SOD:
8256 case OP_SOM:
8257 case OP_NOT_WORD_BOUNDARY:
8258 case OP_WORD_BOUNDARY:
8259 case OP_NOT_DIGIT:
8260 case OP_DIGIT:
8261 case OP_NOT_WHITESPACE:
8262 case OP_WHITESPACE:
8263 case OP_NOT_WORDCHAR:
8264 case OP_WORDCHAR:
8265 case OP_ANY:
8266 case OP_ALLANY:
8267 case OP_ANYBYTE:
8268 case OP_NOTPROP:
8269 case OP_PROP:
8270 case OP_ANYNL:
8271 case OP_NOT_HSPACE:
8272 case OP_HSPACE:
8273 case OP_NOT_VSPACE:
8274 case OP_VSPACE:
8275 case OP_EXTUNI:
8276 case OP_EODN:
8277 case OP_EOD:
8278 case OP_CIRC:
8279 case OP_CIRCM:
8280 case OP_DOLL:
8281 case OP_DOLLM:
8282 case OP_NOT:
8283 case OP_NOTI:
8284 case OP_REVERSE:
8285 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8286 break;
8287
8288 case OP_SET_SOM:
8289 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
8290 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
8291 allocate_stack(common, 1);
8292 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
8293 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8294 cc++;
8295 break;
8296
8297 case OP_CHAR:
8298 case OP_CHARI:
8299 if (common->mode == JIT_COMPILE)
8300 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8301 else
8302 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8303 break;
8304
8305 case OP_STAR:
8306 case OP_MINSTAR:
8307 case OP_PLUS:
8308 case OP_MINPLUS:
8309 case OP_QUERY:
8310 case OP_MINQUERY:
8311 case OP_UPTO:
8312 case OP_MINUPTO:
8313 case OP_EXACT:
8314 case OP_POSSTAR:
8315 case OP_POSPLUS:
8316 case OP_POSQUERY:
8317 case OP_POSUPTO:
8318 case OP_STARI:
8319 case OP_MINSTARI:
8320 case OP_PLUSI:
8321 case OP_MINPLUSI:
8322 case OP_QUERYI:
8323 case OP_MINQUERYI:
8324 case OP_UPTOI:
8325 case OP_MINUPTOI:
8326 case OP_EXACTI:
8327 case OP_POSSTARI:
8328 case OP_POSPLUSI:
8329 case OP_POSQUERYI:
8330 case OP_POSUPTOI:
8331 case OP_NOTSTAR:
8332 case OP_NOTMINSTAR:
8333 case OP_NOTPLUS:
8334 case OP_NOTMINPLUS:
8335 case OP_NOTQUERY:
8336 case OP_NOTMINQUERY:
8337 case OP_NOTUPTO:
8338 case OP_NOTMINUPTO:
8339 case OP_NOTEXACT:
8340 case OP_NOTPOSSTAR:
8341 case OP_NOTPOSPLUS:
8342 case OP_NOTPOSQUERY:
8343 case OP_NOTPOSUPTO:
8344 case OP_NOTSTARI:
8345 case OP_NOTMINSTARI:
8346 case OP_NOTPLUSI:
8347 case OP_NOTMINPLUSI:
8348 case OP_NOTQUERYI:
8349 case OP_NOTMINQUERYI:
8350 case OP_NOTUPTOI:
8351 case OP_NOTMINUPTOI:
8352 case OP_NOTEXACTI:
8353 case OP_NOTPOSSTARI:
8354 case OP_NOTPOSPLUSI:
8355 case OP_NOTPOSQUERYI:
8356 case OP_NOTPOSUPTOI:
8357 case OP_TYPESTAR:
8358 case OP_TYPEMINSTAR:
8359 case OP_TYPEPLUS:
8360 case OP_TYPEMINPLUS:
8361 case OP_TYPEQUERY:
8362 case OP_TYPEMINQUERY:
8363 case OP_TYPEUPTO:
8364 case OP_TYPEMINUPTO:
8365 case OP_TYPEEXACT:
8366 case OP_TYPEPOSSTAR:
8367 case OP_TYPEPOSPLUS:
8368 case OP_TYPEPOSQUERY:
8369 case OP_TYPEPOSUPTO:
8370 cc = compile_iterator_matchingpath(common, cc, parent);
8371 break;
8372
8373 case OP_CLASS:
8374 case OP_NCLASS:
8375 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
8376 cc = compile_iterator_matchingpath(common, cc, parent);
8377 else
8378 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8379 break;
8380
8381 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
8382 case OP_XCLASS:
8383 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
8384 cc = compile_iterator_matchingpath(common, cc, parent);
8385 else
8386 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8387 break;
8388 #endif
8389
8390 case OP_REF:
8391 case OP_REFI:
8392 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
8393 cc = compile_ref_iterator_matchingpath(common, cc, parent);
8394 else
8395 {
8396 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
8397 cc += 1 + IMM2_SIZE;
8398 }
8399 break;
8400
8401 case OP_DNREF:
8402 case OP_DNREFI:
8403 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
8404 cc = compile_ref_iterator_matchingpath(common, cc, parent);
8405 else
8406 {
8407 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8408 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
8409 cc += 1 + 2 * IMM2_SIZE;
8410 }
8411 break;
8412
8413 case OP_RECURSE:
8414 cc = compile_recurse_matchingpath(common, cc, parent);
8415 break;
8416
8417 case OP_CALLOUT:
8418 cc = compile_callout_matchingpath(common, cc, parent);
8419 break;
8420
8421 case OP_ASSERT:
8422 case OP_ASSERT_NOT:
8423 case OP_ASSERTBACK:
8424 case OP_ASSERTBACK_NOT:
8425 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
8426 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
8427 break;
8428
8429 case OP_BRAMINZERO:
8430 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
8431 cc = bracketend(cc + 1);
8432 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
8433 {
8434 allocate_stack(common, 1);
8435 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8436 }
8437 else
8438 {
8439 allocate_stack(common, 2);
8440 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8441 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
8442 }
8443 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
8444 if (cc[1] > OP_ASSERTBACK_NOT)
8445 count_match(common);
8446 break;
8447
8448 case OP_ONCE:
8449 case OP_ONCE_NC:
8450 case OP_BRA:
8451 case OP_CBRA:
8452 case OP_COND:
8453 case OP_SBRA:
8454 case OP_SCBRA:
8455 case OP_SCOND:
8456 cc = compile_bracket_matchingpath(common, cc, parent);
8457 break;
8458
8459 case OP_BRAZERO:
8460 if (cc[1] > OP_ASSERTBACK_NOT)
8461 cc = compile_bracket_matchingpath(common, cc, parent);
8462 else
8463 {
8464 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
8465 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
8466 }
8467 break;
8468
8469 case OP_BRAPOS:
8470 case OP_CBRAPOS:
8471 case OP_SBRAPOS:
8472 case OP_SCBRAPOS:
8473 case OP_BRAPOSZERO:
8474 cc = compile_bracketpos_matchingpath(common, cc, parent);
8475 break;
8476
8477 case OP_MARK:
8478 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
8479 SLJIT_ASSERT(common->mark_ptr != 0);
8480 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
8481 allocate_stack(common, common->has_skip_arg ? 5 : 1);
8482 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
8484 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
8485 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
8486 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
8487 if (common->has_skip_arg)
8488 {
8489 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8490 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
8491 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
8492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
8493 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
8494 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8495 }
8496 cc += 1 + 2 + cc[1];
8497 break;
8498
8499 case OP_PRUNE:
8500 case OP_PRUNE_ARG:
8501 case OP_SKIP:
8502 case OP_SKIP_ARG:
8503 case OP_THEN:
8504 case OP_THEN_ARG:
8505 case OP_COMMIT:
8506 cc = compile_control_verb_matchingpath(common, cc, parent);
8507 break;
8508
8509 case OP_FAIL:
8510 case OP_ACCEPT:
8511 case OP_ASSERT_ACCEPT:
8512 cc = compile_fail_accept_matchingpath(common, cc, parent);
8513 break;
8514
8515 case OP_CLOSE:
8516 cc = compile_close_matchingpath(common, cc);
8517 break;
8518
8519 case OP_SKIPZERO:
8520 cc = bracketend(cc + 1);
8521 break;
8522
8523 default:
8524 SLJIT_ASSERT_STOP();
8525 return;
8526 }
8527 if (cc == NULL)
8528 return;
8529 }
8530
8531 if (has_then_trap)
8532 {
8533 /* Head item on backtrack. */
8534 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
8535 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
8536 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
8537 common->then_trap = save_then_trap;
8538 }
8539 SLJIT_ASSERT(cc == ccend);
8540 }
8541
8542 #undef PUSH_BACKTRACK
8543 #undef PUSH_BACKTRACK_NOVALUE
8544 #undef BACKTRACK_AS
8545
8546 #define COMPILE_BACKTRACKINGPATH(current) \
8547 do \
8548 { \
8549 compile_backtrackingpath(common, (current)); \
8550 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8551 return; \
8552 } \
8553 while (0)
8554
8555 #define CURRENT_AS(type) ((type *)current)
8556
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)8557 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8558 {
8559 DEFINE_COMPILER;
8560 pcre_uchar *cc = current->cc;
8561 pcre_uchar opcode;
8562 pcre_uchar type;
8563 int max = -1, min = -1;
8564 struct sljit_label *label = NULL;
8565 struct sljit_jump *jump = NULL;
8566 jump_list *jumplist = NULL;
8567 int private_data_ptr = PRIVATE_DATA(cc);
8568 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
8569 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
8570 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
8571
8572 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, NULL);
8573
8574 switch(opcode)
8575 {
8576 case OP_STAR:
8577 case OP_PLUS:
8578 case OP_UPTO:
8579 case OP_CRRANGE:
8580 if (type == OP_ANYNL || type == OP_EXTUNI)
8581 {
8582 SLJIT_ASSERT(private_data_ptr == 0);
8583 set_jumps(current->topbacktracks, LABEL());
8584 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8585 free_stack(common, 1);
8586 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8587 }
8588 else
8589 {
8590 if (opcode == OP_UPTO)
8591 min = 0;
8592 if (opcode <= OP_PLUS)
8593 {
8594 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8595 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
8596 }
8597 else
8598 {
8599 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
8600 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8601 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, min + 1);
8602 OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
8603 }
8604 skip_char_back(common);
8605 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8606 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8607 if (opcode == OP_CRRANGE)
8608 set_jumps(current->topbacktracks, LABEL());
8609 JUMPHERE(jump);
8610 if (private_data_ptr == 0)
8611 free_stack(common, 2);
8612 if (opcode == OP_PLUS)
8613 set_jumps(current->topbacktracks, LABEL());
8614 }
8615 break;
8616
8617 case OP_MINSTAR:
8618 case OP_MINPLUS:
8619 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8620 compile_char1_matchingpath(common, type, cc, &jumplist);
8621 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8622 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8623 set_jumps(jumplist, LABEL());
8624 if (private_data_ptr == 0)
8625 free_stack(common, 1);
8626 if (opcode == OP_MINPLUS)
8627 set_jumps(current->topbacktracks, LABEL());
8628 break;
8629
8630 case OP_MINUPTO:
8631 case OP_CRMINRANGE:
8632 if (opcode == OP_CRMINRANGE)
8633 {
8634 label = LABEL();
8635 set_jumps(current->topbacktracks, label);
8636 }
8637 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8638 compile_char1_matchingpath(common, type, cc, &jumplist);
8639
8640 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
8641 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8642 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8643 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
8644
8645 if (opcode == OP_CRMINRANGE)
8646 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min + 1, label);
8647
8648 if (opcode == OP_CRMINRANGE && max == 0)
8649 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8650 else
8651 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(iterator_backtrack)->matchingpath);
8652
8653 set_jumps(jumplist, LABEL());
8654 if (private_data_ptr == 0)
8655 free_stack(common, 2);
8656 break;
8657
8658 case OP_QUERY:
8659 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8660 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8661 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8662 jump = JUMP(SLJIT_JUMP);
8663 set_jumps(current->topbacktracks, LABEL());
8664 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8665 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8666 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8667 JUMPHERE(jump);
8668 if (private_data_ptr == 0)
8669 free_stack(common, 1);
8670 break;
8671
8672 case OP_MINQUERY:
8673 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8674 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8675 jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8676 compile_char1_matchingpath(common, type, cc, &jumplist);
8677 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8678 set_jumps(jumplist, LABEL());
8679 JUMPHERE(jump);
8680 if (private_data_ptr == 0)
8681 free_stack(common, 1);
8682 break;
8683
8684 case OP_EXACT:
8685 case OP_POSPLUS:
8686 case OP_CRPOSRANGE:
8687 set_jumps(current->topbacktracks, LABEL());
8688 break;
8689
8690 case OP_POSSTAR:
8691 case OP_POSQUERY:
8692 case OP_POSUPTO:
8693 break;
8694
8695 default:
8696 SLJIT_ASSERT_STOP();
8697 break;
8698 }
8699 }
8700
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)8701 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8702 {
8703 DEFINE_COMPILER;
8704 pcre_uchar *cc = current->cc;
8705 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
8706 pcre_uchar type;
8707
8708 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
8709
8710 if ((type & 0x1) == 0)
8711 {
8712 /* Maximize case. */
8713 set_jumps(current->topbacktracks, LABEL());
8714 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8715 free_stack(common, 1);
8716 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8717 return;
8718 }
8719
8720 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8721 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8722 set_jumps(current->topbacktracks, LABEL());
8723 free_stack(common, ref ? 2 : 3);
8724 }
8725
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)8726 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8727 {
8728 DEFINE_COMPILER;
8729
8730 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
8731 compile_backtrackingpath(common, current->top);
8732 set_jumps(current->topbacktracks, LABEL());
8733 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
8734 return;
8735
8736 if (common->has_set_som && common->mark_ptr != 0)
8737 {
8738 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8739 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8740 free_stack(common, 2);
8741 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0);
8742 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
8743 }
8744 else if (common->has_set_som || common->mark_ptr != 0)
8745 {
8746 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8747 free_stack(common, 1);
8748 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
8749 }
8750 }
8751
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)8752 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8753 {
8754 DEFINE_COMPILER;
8755 pcre_uchar *cc = current->cc;
8756 pcre_uchar bra = OP_BRA;
8757 struct sljit_jump *brajump = NULL;
8758
8759 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
8760 if (*cc == OP_BRAZERO)
8761 {
8762 bra = *cc;
8763 cc++;
8764 }
8765
8766 if (bra == OP_BRAZERO)
8767 {
8768 SLJIT_ASSERT(current->topbacktracks == NULL);
8769 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8770 }
8771
8772 if (CURRENT_AS(assert_backtrack)->framesize < 0)
8773 {
8774 set_jumps(current->topbacktracks, LABEL());
8775
8776 if (bra == OP_BRAZERO)
8777 {
8778 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8779 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
8780 free_stack(common, 1);
8781 }
8782 return;
8783 }
8784
8785 if (bra == OP_BRAZERO)
8786 {
8787 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
8788 {
8789 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8790 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
8791 free_stack(common, 1);
8792 return;
8793 }
8794 free_stack(common, 1);
8795 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8796 }
8797
8798 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
8799 {
8800 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
8801 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8802 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw));
8803
8804 set_jumps(current->topbacktracks, LABEL());
8805 }
8806 else
8807 set_jumps(current->topbacktracks, LABEL());
8808
8809 if (bra == OP_BRAZERO)
8810 {
8811 /* We know there is enough place on the stack. */
8812 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8813 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8814 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
8815 JUMPHERE(brajump);
8816 }
8817 }
8818
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)8819 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8820 {
8821 DEFINE_COMPILER;
8822 int opcode, stacksize, alt_count, alt_max;
8823 int offset = 0;
8824 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
8825 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
8826 pcre_uchar *cc = current->cc;
8827 pcre_uchar *ccbegin;
8828 pcre_uchar *ccprev;
8829 pcre_uchar bra = OP_BRA;
8830 pcre_uchar ket;
8831 assert_backtrack *assert;
8832 sljit_uw *next_update_addr = NULL;
8833 BOOL has_alternatives;
8834 BOOL needs_control_head = FALSE;
8835 struct sljit_jump *brazero = NULL;
8836 struct sljit_jump *alt1 = NULL;
8837 struct sljit_jump *alt2 = NULL;
8838 struct sljit_jump *once = NULL;
8839 struct sljit_jump *cond = NULL;
8840 struct sljit_label *rmin_label = NULL;
8841 struct sljit_label *exact_label = NULL;
8842
8843 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
8844 {
8845 bra = *cc;
8846 cc++;
8847 }
8848
8849 opcode = *cc;
8850 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
8851 ket = *ccbegin;
8852 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
8853 {
8854 repeat_ptr = PRIVATE_DATA(ccbegin);
8855 repeat_type = PRIVATE_DATA(ccbegin + 2);
8856 repeat_count = PRIVATE_DATA(ccbegin + 3);
8857 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
8858 if (repeat_type == OP_UPTO)
8859 ket = OP_KETRMAX;
8860 if (repeat_type == OP_MINUPTO)
8861 ket = OP_KETRMIN;
8862 }
8863 ccbegin = cc;
8864 cc += GET(cc, 1);
8865 has_alternatives = *cc == OP_ALT;
8866 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
8867 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
8868 if (opcode == OP_CBRA || opcode == OP_SCBRA)
8869 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
8870 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
8871 opcode = OP_SCOND;
8872 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
8873 opcode = OP_ONCE;
8874
8875 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
8876
8877 /* Decoding the needs_control_head in framesize. */
8878 if (opcode == OP_ONCE)
8879 {
8880 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
8881 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
8882 }
8883
8884 if (ket != OP_KET && repeat_type != 0)
8885 {
8886 /* TMP1 is used in OP_KETRMIN below. */
8887 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8888 free_stack(common, 1);
8889 if (repeat_type == OP_UPTO)
8890 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
8891 else
8892 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
8893 }
8894
8895 if (ket == OP_KETRMAX)
8896 {
8897 if (bra == OP_BRAZERO)
8898 {
8899 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8900 free_stack(common, 1);
8901 brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
8902 }
8903 }
8904 else if (ket == OP_KETRMIN)
8905 {
8906 if (bra != OP_BRAMINZERO)
8907 {
8908 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8909 if (repeat_type != 0)
8910 {
8911 /* TMP1 was set a few lines above. */
8912 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8913 /* Drop STR_PTR for non-greedy plus quantifier. */
8914 if (opcode != OP_ONCE)
8915 free_stack(common, 1);
8916 }
8917 else if (opcode >= OP_SBRA || opcode == OP_ONCE)
8918 {
8919 /* Checking zero-length iteration. */
8920 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
8921 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8922 else
8923 {
8924 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8925 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8926 }
8927 /* Drop STR_PTR for non-greedy plus quantifier. */
8928 if (opcode != OP_ONCE)
8929 free_stack(common, 1);
8930 }
8931 else
8932 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8933 }
8934 rmin_label = LABEL();
8935 if (repeat_type != 0)
8936 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8937 }
8938 else if (bra == OP_BRAZERO)
8939 {
8940 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8941 free_stack(common, 1);
8942 brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
8943 }
8944 else if (repeat_type == OP_EXACT)
8945 {
8946 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8947 exact_label = LABEL();
8948 }
8949
8950 if (offset != 0)
8951 {
8952 if (common->capture_last_ptr != 0)
8953 {
8954 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
8955 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8956 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8957 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
8958 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
8959 free_stack(common, 3);
8960 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
8961 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
8962 }
8963 else if (common->optimized_cbracket[offset >> 1] == 0)
8964 {
8965 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8966 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8967 free_stack(common, 2);
8968 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8969 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
8970 }
8971 }
8972
8973 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
8974 {
8975 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
8976 {
8977 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8978 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8979 }
8980 once = JUMP(SLJIT_JUMP);
8981 }
8982 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
8983 {
8984 if (has_alternatives)
8985 {
8986 /* Always exactly one alternative. */
8987 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8988 free_stack(common, 1);
8989
8990 alt_max = 2;
8991 alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
8992 }
8993 }
8994 else if (has_alternatives)
8995 {
8996 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8997 free_stack(common, 1);
8998
8999 if (alt_max > 4)
9000 {
9001 /* Table jump if alt_max is greater than 4. */
9002 next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
9003 if (SLJIT_UNLIKELY(next_update_addr == NULL))
9004 return;
9005 sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
9006 add_label_addr(common, next_update_addr++);
9007 }
9008 else
9009 {
9010 if (alt_max == 4)
9011 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
9012 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
9013 }
9014 }
9015
9016 COMPILE_BACKTRACKINGPATH(current->top);
9017 if (current->topbacktracks)
9018 set_jumps(current->topbacktracks, LABEL());
9019
9020 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
9021 {
9022 /* Conditional block always has at most one alternative. */
9023 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
9024 {
9025 SLJIT_ASSERT(has_alternatives);
9026 assert = CURRENT_AS(bracket_backtrack)->u.assert;
9027 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
9028 {
9029 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
9030 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9031 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
9032 }
9033 cond = JUMP(SLJIT_JUMP);
9034 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
9035 }
9036 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
9037 {
9038 SLJIT_ASSERT(has_alternatives);
9039 cond = JUMP(SLJIT_JUMP);
9040 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
9041 }
9042 else
9043 SLJIT_ASSERT(!has_alternatives);
9044 }
9045
9046 if (has_alternatives)
9047 {
9048 alt_count = sizeof(sljit_uw);
9049 do
9050 {
9051 current->top = NULL;
9052 current->topbacktracks = NULL;
9053 current->nextbacktracks = NULL;
9054 /* Conditional blocks always have an additional alternative, even if it is empty. */
9055 if (*cc == OP_ALT)
9056 {
9057 ccprev = cc + 1 + LINK_SIZE;
9058 cc += GET(cc, 1);
9059 if (opcode != OP_COND && opcode != OP_SCOND)
9060 {
9061 if (opcode != OP_ONCE)
9062 {
9063 if (private_data_ptr != 0)
9064 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9065 else
9066 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9067 }
9068 else
9069 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
9070 }
9071 compile_matchingpath(common, ccprev, cc, current);
9072 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9073 return;
9074 }
9075
9076 /* Instructions after the current alternative is successfully matched. */
9077 /* There is a similar code in compile_bracket_matchingpath. */
9078 if (opcode == OP_ONCE)
9079 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
9080
9081 stacksize = 0;
9082 if (repeat_type == OP_MINUPTO)
9083 {
9084 /* We need to preserve the counter. TMP2 will be used below. */
9085 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
9086 stacksize++;
9087 }
9088 if (ket != OP_KET || bra != OP_BRA)
9089 stacksize++;
9090 if (offset != 0)
9091 {
9092 if (common->capture_last_ptr != 0)
9093 stacksize++;
9094 if (common->optimized_cbracket[offset >> 1] == 0)
9095 stacksize += 2;
9096 }
9097 if (opcode != OP_ONCE)
9098 stacksize++;
9099
9100 if (stacksize > 0)
9101 allocate_stack(common, stacksize);
9102
9103 stacksize = 0;
9104 if (repeat_type == OP_MINUPTO)
9105 {
9106 /* TMP2 was set above. */
9107 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
9108 stacksize++;
9109 }
9110
9111 if (ket != OP_KET || bra != OP_BRA)
9112 {
9113 if (ket != OP_KET)
9114 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9115 else
9116 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9117 stacksize++;
9118 }
9119
9120 if (offset != 0)
9121 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
9122
9123 if (opcode != OP_ONCE)
9124 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
9125
9126 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
9127 {
9128 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
9129 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
9130 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9131 }
9132
9133 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
9134
9135 if (opcode != OP_ONCE)
9136 {
9137 if (alt_max > 4)
9138 add_label_addr(common, next_update_addr++);
9139 else
9140 {
9141 if (alt_count != 2 * sizeof(sljit_uw))
9142 {
9143 JUMPHERE(alt1);
9144 if (alt_max == 3 && alt_count == sizeof(sljit_uw))
9145 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
9146 }
9147 else
9148 {
9149 JUMPHERE(alt2);
9150 if (alt_max == 4)
9151 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
9152 }
9153 }
9154 alt_count += sizeof(sljit_uw);
9155 }
9156
9157 COMPILE_BACKTRACKINGPATH(current->top);
9158 if (current->topbacktracks)
9159 set_jumps(current->topbacktracks, LABEL());
9160 SLJIT_ASSERT(!current->nextbacktracks);
9161 }
9162 while (*cc == OP_ALT);
9163
9164 if (cond != NULL)
9165 {
9166 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
9167 assert = CURRENT_AS(bracket_backtrack)->u.assert;
9168 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
9169 {
9170 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
9171 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9172 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
9173 }
9174 JUMPHERE(cond);
9175 }
9176
9177 /* Free the STR_PTR. */
9178 if (private_data_ptr == 0)
9179 free_stack(common, 1);
9180 }
9181
9182 if (offset != 0)
9183 {
9184 /* Using both tmp register is better for instruction scheduling. */
9185 if (common->optimized_cbracket[offset >> 1] != 0)
9186 {
9187 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9188 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9189 free_stack(common, 2);
9190 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9191 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
9192 }
9193 else
9194 {
9195 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9196 free_stack(common, 1);
9197 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9198 }
9199 }
9200 else if (opcode == OP_SBRA || opcode == OP_SCOND)
9201 {
9202 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
9203 free_stack(common, 1);
9204 }
9205 else if (opcode == OP_ONCE)
9206 {
9207 cc = ccbegin + GET(ccbegin, 1);
9208 stacksize = needs_control_head ? 1 : 0;
9209
9210 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
9211 {
9212 /* Reset head and drop saved frame. */
9213 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
9214 }
9215 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
9216 {
9217 /* The STR_PTR must be released. */
9218 stacksize++;
9219 }
9220 free_stack(common, stacksize);
9221
9222 JUMPHERE(once);
9223 /* Restore previous private_data_ptr */
9224 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
9225 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw));
9226 else if (ket == OP_KETRMIN)
9227 {
9228 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9229 /* See the comment below. */
9230 free_stack(common, 2);
9231 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9232 }
9233 }
9234
9235 if (repeat_type == OP_EXACT)
9236 {
9237 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
9238 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
9239 CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
9240 }
9241 else if (ket == OP_KETRMAX)
9242 {
9243 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9244 if (bra != OP_BRAZERO)
9245 free_stack(common, 1);
9246
9247 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9248 if (bra == OP_BRAZERO)
9249 {
9250 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9251 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
9252 JUMPHERE(brazero);
9253 free_stack(common, 1);
9254 }
9255 }
9256 else if (ket == OP_KETRMIN)
9257 {
9258 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9259
9260 /* OP_ONCE removes everything in case of a backtrack, so we don't
9261 need to explicitly release the STR_PTR. The extra release would
9262 affect badly the free_stack(2) above. */
9263 if (opcode != OP_ONCE)
9264 free_stack(common, 1);
9265 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
9266 if (opcode == OP_ONCE)
9267 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
9268 else if (bra == OP_BRAMINZERO)
9269 free_stack(common, 1);
9270 }
9271 else if (bra == OP_BRAZERO)
9272 {
9273 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9274 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
9275 JUMPHERE(brazero);
9276 }
9277 }
9278
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)9279 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9280 {
9281 DEFINE_COMPILER;
9282 int offset;
9283 struct sljit_jump *jump;
9284
9285 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
9286 {
9287 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
9288 {
9289 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
9290 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9291 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9292 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9293 if (common->capture_last_ptr != 0)
9294 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9295 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
9296 if (common->capture_last_ptr != 0)
9297 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
9298 }
9299 set_jumps(current->topbacktracks, LABEL());
9300 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
9301 return;
9302 }
9303
9304 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
9305 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9306
9307 if (current->topbacktracks)
9308 {
9309 jump = JUMP(SLJIT_JUMP);
9310 set_jumps(current->topbacktracks, LABEL());
9311 /* Drop the stack frame. */
9312 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
9313 JUMPHERE(jump);
9314 }
9315 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw));
9316 }
9317
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)9318 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9319 {
9320 assert_backtrack backtrack;
9321
9322 current->top = NULL;
9323 current->topbacktracks = NULL;
9324 current->nextbacktracks = NULL;
9325 if (current->cc[1] > OP_ASSERTBACK_NOT)
9326 {
9327 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
9328 compile_bracket_matchingpath(common, current->cc, current);
9329 compile_bracket_backtrackingpath(common, current->top);
9330 }
9331 else
9332 {
9333 memset(&backtrack, 0, sizeof(backtrack));
9334 backtrack.common.cc = current->cc;
9335 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
9336 /* Manual call of compile_assert_matchingpath. */
9337 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
9338 }
9339 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
9340 }
9341
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)9342 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9343 {
9344 DEFINE_COMPILER;
9345 pcre_uchar opcode = *current->cc;
9346 struct sljit_label *loop;
9347 struct sljit_jump *jump;
9348
9349 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
9350 {
9351 if (common->then_trap != NULL)
9352 {
9353 SLJIT_ASSERT(common->control_head_ptr != 0);
9354
9355 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9356 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
9357 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
9358 jump = JUMP(SLJIT_JUMP);
9359
9360 loop = LABEL();
9361 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw));
9362 JUMPHERE(jump);
9363 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop);
9364 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop);
9365 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
9366 return;
9367 }
9368 else if (common->positive_assert)
9369 {
9370 add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
9371 return;
9372 }
9373 }
9374
9375 if (common->local_exit)
9376 {
9377 if (common->quit_label == NULL)
9378 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9379 else
9380 JUMPTO(SLJIT_JUMP, common->quit_label);
9381 return;
9382 }
9383
9384 if (opcode == OP_SKIP_ARG)
9385 {
9386 SLJIT_ASSERT(common->control_head_ptr != 0);
9387 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9388 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
9389 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
9390 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
9391 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9392
9393 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
9394 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1));
9395 return;
9396 }
9397
9398 if (opcode == OP_SKIP)
9399 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9400 else
9401 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
9402 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
9403 }
9404
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)9405 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9406 {
9407 DEFINE_COMPILER;
9408 struct sljit_jump *jump;
9409 int size;
9410
9411 if (CURRENT_AS(then_trap_backtrack)->then_trap)
9412 {
9413 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
9414 return;
9415 }
9416
9417 size = CURRENT_AS(then_trap_backtrack)->framesize;
9418 size = 3 + (size < 0 ? 0 : size);
9419
9420 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
9421 free_stack(common, size);
9422 jump = JUMP(SLJIT_JUMP);
9423
9424 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
9425 /* STACK_TOP is set by THEN. */
9426 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
9427 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9428 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9429 free_stack(common, 3);
9430
9431 JUMPHERE(jump);
9432 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
9433 }
9434
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)9435 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9436 {
9437 DEFINE_COMPILER;
9438 then_trap_backtrack *save_then_trap = common->then_trap;
9439
9440 while (current)
9441 {
9442 if (current->nextbacktracks != NULL)
9443 set_jumps(current->nextbacktracks, LABEL());
9444 switch(*current->cc)
9445 {
9446 case OP_SET_SOM:
9447 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9448 free_stack(common, 1);
9449 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
9450 break;
9451
9452 case OP_STAR:
9453 case OP_MINSTAR:
9454 case OP_PLUS:
9455 case OP_MINPLUS:
9456 case OP_QUERY:
9457 case OP_MINQUERY:
9458 case OP_UPTO:
9459 case OP_MINUPTO:
9460 case OP_EXACT:
9461 case OP_POSSTAR:
9462 case OP_POSPLUS:
9463 case OP_POSQUERY:
9464 case OP_POSUPTO:
9465 case OP_STARI:
9466 case OP_MINSTARI:
9467 case OP_PLUSI:
9468 case OP_MINPLUSI:
9469 case OP_QUERYI:
9470 case OP_MINQUERYI:
9471 case OP_UPTOI:
9472 case OP_MINUPTOI:
9473 case OP_EXACTI:
9474 case OP_POSSTARI:
9475 case OP_POSPLUSI:
9476 case OP_POSQUERYI:
9477 case OP_POSUPTOI:
9478 case OP_NOTSTAR:
9479 case OP_NOTMINSTAR:
9480 case OP_NOTPLUS:
9481 case OP_NOTMINPLUS:
9482 case OP_NOTQUERY:
9483 case OP_NOTMINQUERY:
9484 case OP_NOTUPTO:
9485 case OP_NOTMINUPTO:
9486 case OP_NOTEXACT:
9487 case OP_NOTPOSSTAR:
9488 case OP_NOTPOSPLUS:
9489 case OP_NOTPOSQUERY:
9490 case OP_NOTPOSUPTO:
9491 case OP_NOTSTARI:
9492 case OP_NOTMINSTARI:
9493 case OP_NOTPLUSI:
9494 case OP_NOTMINPLUSI:
9495 case OP_NOTQUERYI:
9496 case OP_NOTMINQUERYI:
9497 case OP_NOTUPTOI:
9498 case OP_NOTMINUPTOI:
9499 case OP_NOTEXACTI:
9500 case OP_NOTPOSSTARI:
9501 case OP_NOTPOSPLUSI:
9502 case OP_NOTPOSQUERYI:
9503 case OP_NOTPOSUPTOI:
9504 case OP_TYPESTAR:
9505 case OP_TYPEMINSTAR:
9506 case OP_TYPEPLUS:
9507 case OP_TYPEMINPLUS:
9508 case OP_TYPEQUERY:
9509 case OP_TYPEMINQUERY:
9510 case OP_TYPEUPTO:
9511 case OP_TYPEMINUPTO:
9512 case OP_TYPEEXACT:
9513 case OP_TYPEPOSSTAR:
9514 case OP_TYPEPOSPLUS:
9515 case OP_TYPEPOSQUERY:
9516 case OP_TYPEPOSUPTO:
9517 case OP_CLASS:
9518 case OP_NCLASS:
9519 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
9520 case OP_XCLASS:
9521 #endif
9522 compile_iterator_backtrackingpath(common, current);
9523 break;
9524
9525 case OP_REF:
9526 case OP_REFI:
9527 case OP_DNREF:
9528 case OP_DNREFI:
9529 compile_ref_iterator_backtrackingpath(common, current);
9530 break;
9531
9532 case OP_RECURSE:
9533 compile_recurse_backtrackingpath(common, current);
9534 break;
9535
9536 case OP_ASSERT:
9537 case OP_ASSERT_NOT:
9538 case OP_ASSERTBACK:
9539 case OP_ASSERTBACK_NOT:
9540 compile_assert_backtrackingpath(common, current);
9541 break;
9542
9543 case OP_ONCE:
9544 case OP_ONCE_NC:
9545 case OP_BRA:
9546 case OP_CBRA:
9547 case OP_COND:
9548 case OP_SBRA:
9549 case OP_SCBRA:
9550 case OP_SCOND:
9551 compile_bracket_backtrackingpath(common, current);
9552 break;
9553
9554 case OP_BRAZERO:
9555 if (current->cc[1] > OP_ASSERTBACK_NOT)
9556 compile_bracket_backtrackingpath(common, current);
9557 else
9558 compile_assert_backtrackingpath(common, current);
9559 break;
9560
9561 case OP_BRAPOS:
9562 case OP_CBRAPOS:
9563 case OP_SBRAPOS:
9564 case OP_SCBRAPOS:
9565 case OP_BRAPOSZERO:
9566 compile_bracketpos_backtrackingpath(common, current);
9567 break;
9568
9569 case OP_BRAMINZERO:
9570 compile_braminzero_backtrackingpath(common, current);
9571 break;
9572
9573 case OP_MARK:
9574 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
9575 if (common->has_skip_arg)
9576 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9577 free_stack(common, common->has_skip_arg ? 5 : 1);
9578 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
9579 if (common->has_skip_arg)
9580 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
9581 break;
9582
9583 case OP_THEN:
9584 case OP_THEN_ARG:
9585 case OP_PRUNE:
9586 case OP_PRUNE_ARG:
9587 case OP_SKIP:
9588 case OP_SKIP_ARG:
9589 compile_control_verb_backtrackingpath(common, current);
9590 break;
9591
9592 case OP_COMMIT:
9593 if (!common->local_exit)
9594 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
9595 if (common->quit_label == NULL)
9596 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9597 else
9598 JUMPTO(SLJIT_JUMP, common->quit_label);
9599 break;
9600
9601 case OP_CALLOUT:
9602 case OP_FAIL:
9603 case OP_ACCEPT:
9604 case OP_ASSERT_ACCEPT:
9605 set_jumps(current->topbacktracks, LABEL());
9606 break;
9607
9608 case OP_THEN_TRAP:
9609 /* A virtual opcode for then traps. */
9610 compile_then_trap_backtrackingpath(common, current);
9611 break;
9612
9613 default:
9614 SLJIT_ASSERT_STOP();
9615 break;
9616 }
9617 current = current->prev;
9618 }
9619 common->then_trap = save_then_trap;
9620 }
9621
compile_recurse(compiler_common * common)9622 static SLJIT_INLINE void compile_recurse(compiler_common *common)
9623 {
9624 DEFINE_COMPILER;
9625 pcre_uchar *cc = common->start + common->currententry->start;
9626 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
9627 pcre_uchar *ccend = bracketend(cc);
9628 BOOL needs_control_head;
9629 int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
9630 int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
9631 int alternativesize;
9632 BOOL needs_frame;
9633 backtrack_common altbacktrack;
9634 struct sljit_jump *jump;
9635
9636 /* Recurse captures then. */
9637 common->then_trap = NULL;
9638
9639 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
9640 needs_frame = framesize >= 0;
9641 if (!needs_frame)
9642 framesize = 0;
9643 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
9644
9645 SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
9646 common->currententry->entry = LABEL();
9647 set_jumps(common->currententry->calls, common->currententry->entry);
9648
9649 sljit_emit_fast_enter(compiler, TMP2, 0);
9650 allocate_stack(common, private_data_size + framesize + alternativesize);
9651 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
9652 copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
9653 if (needs_control_head)
9654 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9655 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
9656 if (needs_frame)
9657 init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
9658
9659 if (alternativesize > 0)
9660 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9661
9662 memset(&altbacktrack, 0, sizeof(backtrack_common));
9663 common->quit_label = NULL;
9664 common->accept_label = NULL;
9665 common->quit = NULL;
9666 common->accept = NULL;
9667 altbacktrack.cc = ccbegin;
9668 cc += GET(cc, 1);
9669 while (1)
9670 {
9671 altbacktrack.top = NULL;
9672 altbacktrack.topbacktracks = NULL;
9673
9674 if (altbacktrack.cc != ccbegin)
9675 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9676
9677 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
9678 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9679 return;
9680
9681 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
9682
9683 compile_backtrackingpath(common, altbacktrack.top);
9684 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9685 return;
9686 set_jumps(altbacktrack.topbacktracks, LABEL());
9687
9688 if (*cc != OP_ALT)
9689 break;
9690
9691 altbacktrack.cc = cc + 1 + LINK_SIZE;
9692 cc += GET(cc, 1);
9693 }
9694
9695 /* None of them matched. */
9696 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
9697 jump = JUMP(SLJIT_JUMP);
9698
9699 if (common->quit != NULL)
9700 {
9701 set_jumps(common->quit, LABEL());
9702 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
9703 if (needs_frame)
9704 {
9705 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9706 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9707 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9708 }
9709 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
9710 common->quit = NULL;
9711 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9712 }
9713
9714 set_jumps(common->accept, LABEL());
9715 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
9716 if (needs_frame)
9717 {
9718 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9719 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9720 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9721 }
9722 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
9723
9724 JUMPHERE(jump);
9725 if (common->quit != NULL)
9726 set_jumps(common->quit, LABEL());
9727 copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
9728 free_stack(common, private_data_size + framesize + alternativesize);
9729 if (needs_control_head)
9730 {
9731 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw));
9732 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
9733 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0);
9734 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
9735 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
9736 }
9737 else
9738 {
9739 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
9740 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
9741 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0);
9742 }
9743 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
9744 }
9745
9746 #undef COMPILE_BACKTRACKINGPATH
9747 #undef CURRENT_AS
9748
9749 void
PRIV(jit_compile)9750 PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
9751 {
9752 struct sljit_compiler *compiler;
9753 backtrack_common rootbacktrack;
9754 compiler_common common_data;
9755 compiler_common *common = &common_data;
9756 const pcre_uint8 *tables = re->tables;
9757 pcre_study_data *study;
9758 int private_data_size;
9759 pcre_uchar *ccend;
9760 executable_functions *functions;
9761 void *executable_func;
9762 sljit_uw executable_size;
9763 sljit_uw total_length;
9764 label_addr_list *label_addr;
9765 struct sljit_label *mainloop_label = NULL;
9766 struct sljit_label *continue_match_label;
9767 struct sljit_label *empty_match_found_label = NULL;
9768 struct sljit_label *empty_match_backtrack_label = NULL;
9769 struct sljit_label *reset_match_label;
9770 struct sljit_label *quit_label;
9771 struct sljit_jump *jump;
9772 struct sljit_jump *minlength_check_failed = NULL;
9773 struct sljit_jump *reqbyte_notfound = NULL;
9774 struct sljit_jump *empty_match = NULL;
9775
9776 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
9777 study = extra->study_data;
9778
9779 if (!tables)
9780 tables = PRIV(default_tables);
9781
9782 memset(&rootbacktrack, 0, sizeof(backtrack_common));
9783 memset(common, 0, sizeof(compiler_common));
9784 rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
9785
9786 common->start = rootbacktrack.cc;
9787 common->read_only_data_head = NULL;
9788 common->fcc = tables + fcc_offset;
9789 common->lcc = (sljit_sw)(tables + lcc_offset);
9790 common->mode = mode;
9791 common->might_be_empty = study->minlength == 0;
9792 common->nltype = NLTYPE_FIXED;
9793 switch(re->options & PCRE_NEWLINE_BITS)
9794 {
9795 case 0:
9796 /* Compile-time default */
9797 switch(NEWLINE)
9798 {
9799 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
9800 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
9801 default: common->newline = NEWLINE; break;
9802 }
9803 break;
9804 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
9805 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
9806 case PCRE_NEWLINE_CR+
9807 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
9808 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
9809 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
9810 default: return;
9811 }
9812 common->nlmax = READ_CHAR_MAX;
9813 common->nlmin = 0;
9814 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
9815 common->bsr_nltype = NLTYPE_ANYCRLF;
9816 else if ((re->options & PCRE_BSR_UNICODE) != 0)
9817 common->bsr_nltype = NLTYPE_ANY;
9818 else
9819 {
9820 #ifdef BSR_ANYCRLF
9821 common->bsr_nltype = NLTYPE_ANYCRLF;
9822 #else
9823 common->bsr_nltype = NLTYPE_ANY;
9824 #endif
9825 }
9826 common->bsr_nlmax = READ_CHAR_MAX;
9827 common->bsr_nlmin = 0;
9828 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
9829 common->ctypes = (sljit_sw)(tables + ctypes_offset);
9830 common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
9831 common->name_count = re->name_count;
9832 common->name_entry_size = re->name_entry_size;
9833 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
9834 #ifdef SUPPORT_UTF
9835 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
9836 common->utf = (re->options & PCRE_UTF8) != 0;
9837 #ifdef SUPPORT_UCP
9838 common->use_ucp = (re->options & PCRE_UCP) != 0;
9839 #endif
9840 if (common->utf)
9841 {
9842 if (common->nltype == NLTYPE_ANY)
9843 common->nlmax = 0x2029;
9844 else if (common->nltype == NLTYPE_ANYCRLF)
9845 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9846 else
9847 {
9848 /* We only care about the first newline character. */
9849 common->nlmax = common->newline & 0xff;
9850 }
9851
9852 if (common->nltype == NLTYPE_FIXED)
9853 common->nlmin = common->newline & 0xff;
9854 else
9855 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
9856
9857 if (common->bsr_nltype == NLTYPE_ANY)
9858 common->bsr_nlmax = 0x2029;
9859 else
9860 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9861 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
9862 }
9863 #endif /* SUPPORT_UTF */
9864 ccend = bracketend(common->start);
9865
9866 /* Calculate the local space size on the stack. */
9867 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
9868 common->optimized_cbracket = (pcre_uint8 *)SLJIT_MALLOC(re->top_bracket + 1, compiler->allocator_data);
9869 if (!common->optimized_cbracket)
9870 return;
9871 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
9872 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
9873 #else
9874 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
9875 #endif
9876
9877 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
9878 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
9879 common->capture_last_ptr = common->ovector_start;
9880 common->ovector_start += sizeof(sljit_sw);
9881 #endif
9882 if (!check_opcode_types(common, common->start, ccend))
9883 {
9884 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
9885 return;
9886 }
9887
9888 /* Checking flags and updating ovector_start. */
9889 if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
9890 {
9891 common->req_char_ptr = common->ovector_start;
9892 common->ovector_start += sizeof(sljit_sw);
9893 }
9894 if (mode != JIT_COMPILE)
9895 {
9896 common->start_used_ptr = common->ovector_start;
9897 common->ovector_start += sizeof(sljit_sw);
9898 if (mode == JIT_PARTIAL_SOFT_COMPILE)
9899 {
9900 common->hit_start = common->ovector_start;
9901 common->ovector_start += 2 * sizeof(sljit_sw);
9902 }
9903 else
9904 {
9905 SLJIT_ASSERT(mode == JIT_PARTIAL_HARD_COMPILE);
9906 common->needs_start_ptr = TRUE;
9907 }
9908 }
9909 if ((re->options & PCRE_FIRSTLINE) != 0)
9910 {
9911 common->first_line_end = common->ovector_start;
9912 common->ovector_start += sizeof(sljit_sw);
9913 }
9914 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
9915 common->control_head_ptr = 1;
9916 #endif
9917 if (common->control_head_ptr != 0)
9918 {
9919 common->control_head_ptr = common->ovector_start;
9920 common->ovector_start += sizeof(sljit_sw);
9921 }
9922 if (common->needs_start_ptr && common->has_set_som)
9923 {
9924 /* Saving the real start pointer is necessary. */
9925 common->start_ptr = common->ovector_start;
9926 common->ovector_start += sizeof(sljit_sw);
9927 }
9928 else
9929 common->needs_start_ptr = FALSE;
9930
9931 /* Aligning ovector to even number of sljit words. */
9932 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
9933 common->ovector_start += sizeof(sljit_sw);
9934
9935 if (common->start_ptr == 0)
9936 common->start_ptr = OVECTOR(0);
9937
9938 /* Capturing brackets cannot be optimized if callouts are allowed. */
9939 if (common->capture_last_ptr != 0)
9940 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
9941
9942 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
9943 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9944
9945 total_length = ccend - common->start;
9946 common->private_data_ptrs = (sljit_si *)SLJIT_MALLOC(total_length * (sizeof(sljit_si) + (common->has_then ? 1 : 0)), compiler->allocator_data);
9947 if (!common->private_data_ptrs)
9948 {
9949 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
9950 return;
9951 }
9952 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_si));
9953
9954 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
9955 set_private_data_ptrs(common, &private_data_size, ccend);
9956 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
9957 {
9958 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
9959 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
9960 return;
9961 }
9962
9963 if (common->has_then)
9964 {
9965 common->then_offsets = (pcre_uint8 *)(common->private_data_ptrs + total_length);
9966 memset(common->then_offsets, 0, total_length);
9967 set_then_offsets(common, common->start, NULL);
9968 }
9969
9970 compiler = sljit_create_compiler(NULL);
9971 if (!compiler)
9972 {
9973 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
9974 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
9975 return;
9976 }
9977 common->compiler = compiler;
9978
9979 /* Main pcre_jit_exec entry. */
9980 sljit_emit_enter(compiler, 0, 1, 5, 5, 0, 0, private_data_size);
9981
9982 /* Register init. */
9983 reset_ovector(common, (re->top_bracket + 1) * 2);
9984 if (common->req_char_ptr != 0)
9985 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
9986
9987 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
9988 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
9989 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
9990 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
9991 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
9992 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
9993 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
9994 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
9995 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
9996
9997 if (mode == JIT_PARTIAL_SOFT_COMPILE)
9998 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
9999 if (common->mark_ptr != 0)
10000 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
10001 if (common->control_head_ptr != 0)
10002 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10003
10004 /* Main part of the matching */
10005 if ((re->options & PCRE_ANCHORED) == 0)
10006 {
10007 mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
10008 continue_match_label = LABEL();
10009 /* Forward search if possible. */
10010 if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
10011 {
10012 if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0))
10013 ;
10014 else if ((re->flags & PCRE_FIRSTSET) != 0)
10015 fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
10016 else if ((re->flags & PCRE_STARTLINE) != 0)
10017 fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
10018 else if (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
10019 fast_forward_start_bits(common, study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
10020 }
10021 }
10022 else
10023 continue_match_label = LABEL();
10024
10025 if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
10026 {
10027 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10028 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
10029 minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
10030 }
10031 if (common->req_char_ptr != 0)
10032 reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
10033
10034 /* Store the current STR_PTR in OVECTOR(0). */
10035 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
10036 /* Copy the limit of allowed recursions. */
10037 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
10038 if (common->capture_last_ptr != 0)
10039 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1);
10040
10041 if (common->needs_start_ptr)
10042 {
10043 SLJIT_ASSERT(common->start_ptr != OVECTOR(0));
10044 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
10045 }
10046 else
10047 SLJIT_ASSERT(common->start_ptr == OVECTOR(0));
10048
10049 /* Copy the beginning of the string. */
10050 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10051 {
10052 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
10053 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
10054 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
10055 JUMPHERE(jump);
10056 }
10057 else if (mode == JIT_PARTIAL_HARD_COMPILE)
10058 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
10059
10060 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
10061 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10062 {
10063 sljit_free_compiler(compiler);
10064 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10065 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10066 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10067 return;
10068 }
10069
10070 if (common->might_be_empty)
10071 {
10072 empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
10073 empty_match_found_label = LABEL();
10074 }
10075
10076 common->accept_label = LABEL();
10077 if (common->accept != NULL)
10078 set_jumps(common->accept, common->accept_label);
10079
10080 /* This means we have a match. Update the ovector. */
10081 copy_ovector(common, re->top_bracket + 1);
10082 common->quit_label = common->forced_quit_label = LABEL();
10083 if (common->quit != NULL)
10084 set_jumps(common->quit, common->quit_label);
10085 if (common->forced_quit != NULL)
10086 set_jumps(common->forced_quit, common->forced_quit_label);
10087 if (minlength_check_failed != NULL)
10088 SET_LABEL(minlength_check_failed, common->forced_quit_label);
10089 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
10090
10091 if (mode != JIT_COMPILE)
10092 {
10093 common->partialmatchlabel = LABEL();
10094 set_jumps(common->partialmatch, common->partialmatchlabel);
10095 return_with_partial_match(common, common->quit_label);
10096 }
10097
10098 if (common->might_be_empty)
10099 empty_match_backtrack_label = LABEL();
10100 compile_backtrackingpath(common, rootbacktrack.top);
10101 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10102 {
10103 sljit_free_compiler(compiler);
10104 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10105 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10106 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10107 return;
10108 }
10109
10110 SLJIT_ASSERT(rootbacktrack.prev == NULL);
10111 reset_match_label = LABEL();
10112
10113 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10114 {
10115 /* Update hit_start only in the first time. */
10116 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
10117 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr);
10118 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
10119 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
10120 JUMPHERE(jump);
10121 }
10122
10123 /* Check we have remaining characters. */
10124 if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
10125 {
10126 SLJIT_ASSERT(common->first_line_end != 0);
10127 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
10128 }
10129
10130 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
10131
10132 if ((re->options & PCRE_ANCHORED) == 0)
10133 {
10134 if (common->ff_newline_shortcut != NULL)
10135 {
10136 if ((re->options & PCRE_FIRSTLINE) == 0)
10137 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
10138 /* There cannot be more newlines here. */
10139 }
10140 else
10141 {
10142 if ((re->options & PCRE_FIRSTLINE) == 0)
10143 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
10144 else
10145 CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
10146 }
10147 }
10148
10149 /* No more remaining characters. */
10150 if (reqbyte_notfound != NULL)
10151 JUMPHERE(reqbyte_notfound);
10152
10153 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10154 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
10155
10156 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10157 JUMPTO(SLJIT_JUMP, common->quit_label);
10158
10159 flush_stubs(common);
10160
10161 if (common->might_be_empty)
10162 {
10163 JUMPHERE(empty_match);
10164 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10165 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
10166 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
10167 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
10168 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
10169 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
10170 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
10171 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
10172 }
10173
10174 common->currententry = common->entries;
10175 common->local_exit = TRUE;
10176 quit_label = common->quit_label;
10177 while (common->currententry != NULL)
10178 {
10179 /* Might add new entries. */
10180 compile_recurse(common);
10181 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10182 {
10183 sljit_free_compiler(compiler);
10184 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10185 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10186 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10187 return;
10188 }
10189 flush_stubs(common);
10190 common->currententry = common->currententry->next;
10191 }
10192 common->local_exit = FALSE;
10193 common->quit_label = quit_label;
10194
10195 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
10196 /* This is a (really) rare case. */
10197 set_jumps(common->stackalloc, LABEL());
10198 /* RETURN_ADDR is not a saved register. */
10199 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10200 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
10201 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10202 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
10203 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
10204 OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
10205
10206 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
10207 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
10208 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10209 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
10210 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
10211 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
10212 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
10213 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10214
10215 /* Allocation failed. */
10216 JUMPHERE(jump);
10217 /* We break the return address cache here, but this is a really rare case. */
10218 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
10219 JUMPTO(SLJIT_JUMP, common->quit_label);
10220
10221 /* Call limit reached. */
10222 set_jumps(common->calllimit, LABEL());
10223 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
10224 JUMPTO(SLJIT_JUMP, common->quit_label);
10225
10226 if (common->revertframes != NULL)
10227 {
10228 set_jumps(common->revertframes, LABEL());
10229 do_revertframes(common);
10230 }
10231 if (common->wordboundary != NULL)
10232 {
10233 set_jumps(common->wordboundary, LABEL());
10234 check_wordboundary(common);
10235 }
10236 if (common->anynewline != NULL)
10237 {
10238 set_jumps(common->anynewline, LABEL());
10239 check_anynewline(common);
10240 }
10241 if (common->hspace != NULL)
10242 {
10243 set_jumps(common->hspace, LABEL());
10244 check_hspace(common);
10245 }
10246 if (common->vspace != NULL)
10247 {
10248 set_jumps(common->vspace, LABEL());
10249 check_vspace(common);
10250 }
10251 if (common->casefulcmp != NULL)
10252 {
10253 set_jumps(common->casefulcmp, LABEL());
10254 do_casefulcmp(common);
10255 }
10256 if (common->caselesscmp != NULL)
10257 {
10258 set_jumps(common->caselesscmp, LABEL());
10259 do_caselesscmp(common);
10260 }
10261 if (common->reset_match != NULL)
10262 {
10263 set_jumps(common->reset_match, LABEL());
10264 do_reset_match(common, (re->top_bracket + 1) * 2);
10265 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
10266 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
10267 JUMPTO(SLJIT_JUMP, reset_match_label);
10268 }
10269 #ifdef SUPPORT_UTF
10270 #ifdef COMPILE_PCRE8
10271 if (common->utfreadchar != NULL)
10272 {
10273 set_jumps(common->utfreadchar, LABEL());
10274 do_utfreadchar(common);
10275 }
10276 if (common->utfreadchar16 != NULL)
10277 {
10278 set_jumps(common->utfreadchar16, LABEL());
10279 do_utfreadchar16(common);
10280 }
10281 if (common->utfreadtype8 != NULL)
10282 {
10283 set_jumps(common->utfreadtype8, LABEL());
10284 do_utfreadtype8(common);
10285 }
10286 #endif /* COMPILE_PCRE8 */
10287 #endif /* SUPPORT_UTF */
10288 #ifdef SUPPORT_UCP
10289 if (common->getucd != NULL)
10290 {
10291 set_jumps(common->getucd, LABEL());
10292 do_getucd(common);
10293 }
10294 #endif
10295
10296 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10297 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10298
10299 executable_func = sljit_generate_code(compiler);
10300 executable_size = sljit_get_generated_code_size(compiler);
10301 label_addr = common->label_addrs;
10302 while (label_addr != NULL)
10303 {
10304 *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
10305 label_addr = label_addr->next;
10306 }
10307 sljit_free_compiler(compiler);
10308 if (executable_func == NULL)
10309 {
10310 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10311 return;
10312 }
10313
10314 /* Reuse the function descriptor if possible. */
10315 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
10316 functions = (executable_functions *)extra->executable_jit;
10317 else
10318 {
10319 /* Note: If your memory-checker has flagged the allocation below as a
10320 * memory leak, it is probably because you either forgot to call
10321 * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or
10322 * pcre16_extra) object, or you called said function after having
10323 * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field
10324 * of the object. (The function will only free the JIT data if the
10325 * bit remains set, as the bit indicates that the pointer to the data
10326 * is valid.)
10327 */
10328 functions = SLJIT_MALLOC(sizeof(executable_functions), compiler->allocator_data);
10329 if (functions == NULL)
10330 {
10331 /* This case is highly unlikely since we just recently
10332 freed a lot of memory. Not impossible though. */
10333 sljit_free_code(executable_func);
10334 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10335 return;
10336 }
10337 memset(functions, 0, sizeof(executable_functions));
10338 functions->top_bracket = (re->top_bracket + 1) * 2;
10339 functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0;
10340 extra->executable_jit = functions;
10341 extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
10342 }
10343
10344 functions->executable_funcs[mode] = executable_func;
10345 functions->read_only_data_heads[mode] = common->read_only_data_head;
10346 functions->executable_sizes[mode] = executable_size;
10347 }
10348
jit_machine_stack_exec(jit_arguments * arguments,void * executable_func)10349 static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, void *executable_func)
10350 {
10351 union {
10352 void *executable_func;
10353 jit_function call_executable_func;
10354 } convert_executable_func;
10355 pcre_uint8 local_space[MACHINE_STACK_SIZE];
10356 struct sljit_stack local_stack;
10357
10358 local_stack.top = (sljit_sw)&local_space;
10359 local_stack.base = local_stack.top;
10360 local_stack.limit = local_stack.base + MACHINE_STACK_SIZE;
10361 local_stack.max_limit = local_stack.limit;
10362 arguments->stack = &local_stack;
10363 convert_executable_func.executable_func = executable_func;
10364 return convert_executable_func.call_executable_func(arguments);
10365 }
10366
10367 int
PRIV(jit_exec)10368 PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
10369 int length, int start_offset, int options, int *offsets, int offset_count)
10370 {
10371 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
10372 union {
10373 void *executable_func;
10374 jit_function call_executable_func;
10375 } convert_executable_func;
10376 jit_arguments arguments;
10377 int max_offset_count;
10378 int retval;
10379 int mode = JIT_COMPILE;
10380
10381 if ((options & PCRE_PARTIAL_HARD) != 0)
10382 mode = JIT_PARTIAL_HARD_COMPILE;
10383 else if ((options & PCRE_PARTIAL_SOFT) != 0)
10384 mode = JIT_PARTIAL_SOFT_COMPILE;
10385
10386 if (functions->executable_funcs[mode] == NULL)
10387 return PCRE_ERROR_JIT_BADOPTION;
10388
10389 /* Sanity checks should be handled by pcre_exec. */
10390 arguments.str = subject + start_offset;
10391 arguments.begin = subject;
10392 arguments.end = subject + length;
10393 arguments.mark_ptr = NULL;
10394 /* JIT decreases this value less frequently than the interpreter. */
10395 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
10396 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
10397 arguments.limit_match = functions->limit_match;
10398 arguments.notbol = (options & PCRE_NOTBOL) != 0;
10399 arguments.noteol = (options & PCRE_NOTEOL) != 0;
10400 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
10401 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
10402 arguments.offsets = offsets;
10403 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
10404 arguments.real_offset_count = offset_count;
10405
10406 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
10407 the output vector for storing captured strings, with the remainder used as
10408 workspace. We don't need the workspace here. For compatibility, we limit the
10409 number of captured strings in the same way as pcre_exec(), so that the user
10410 gets the same result with and without JIT. */
10411
10412 if (offset_count != 2)
10413 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
10414 max_offset_count = functions->top_bracket;
10415 if (offset_count > max_offset_count)
10416 offset_count = max_offset_count;
10417 arguments.offset_count = offset_count;
10418
10419 if (functions->callback)
10420 arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
10421 else
10422 arguments.stack = (struct sljit_stack *)functions->userdata;
10423
10424 if (arguments.stack == NULL)
10425 retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
10426 else
10427 {
10428 convert_executable_func.executable_func = functions->executable_funcs[mode];
10429 retval = convert_executable_func.call_executable_func(&arguments);
10430 }
10431
10432 if (retval * 2 > offset_count)
10433 retval = 0;
10434 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
10435 *(extra_data->mark) = arguments.mark_ptr;
10436
10437 return retval;
10438 }
10439
10440 #if defined COMPILE_PCRE8
10441 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_jit_exec(const pcre * argument_re,const pcre_extra * extra_data,PCRE_SPTR subject,int length,int start_offset,int options,int * offsets,int offset_count,pcre_jit_stack * stack)10442 pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
10443 PCRE_SPTR subject, int length, int start_offset, int options,
10444 int *offsets, int offset_count, pcre_jit_stack *stack)
10445 #elif defined COMPILE_PCRE16
10446 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
10447 pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
10448 PCRE_SPTR16 subject, int length, int start_offset, int options,
10449 int *offsets, int offset_count, pcre16_jit_stack *stack)
10450 #elif defined COMPILE_PCRE32
10451 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
10452 pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
10453 PCRE_SPTR32 subject, int length, int start_offset, int options,
10454 int *offsets, int offset_count, pcre32_jit_stack *stack)
10455 #endif
10456 {
10457 pcre_uchar *subject_ptr = (pcre_uchar *)subject;
10458 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
10459 union {
10460 void *executable_func;
10461 jit_function call_executable_func;
10462 } convert_executable_func;
10463 jit_arguments arguments;
10464 int max_offset_count;
10465 int retval;
10466 int mode = JIT_COMPILE;
10467
10468 SLJIT_UNUSED_ARG(argument_re);
10469
10470 /* Plausibility checks */
10471 if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION;
10472
10473 if ((options & PCRE_PARTIAL_HARD) != 0)
10474 mode = JIT_PARTIAL_HARD_COMPILE;
10475 else if ((options & PCRE_PARTIAL_SOFT) != 0)
10476 mode = JIT_PARTIAL_SOFT_COMPILE;
10477
10478 if (functions->executable_funcs[mode] == NULL)
10479 return PCRE_ERROR_JIT_BADOPTION;
10480
10481 /* Sanity checks should be handled by pcre_exec. */
10482 arguments.stack = (struct sljit_stack *)stack;
10483 arguments.str = subject_ptr + start_offset;
10484 arguments.begin = subject_ptr;
10485 arguments.end = subject_ptr + length;
10486 arguments.mark_ptr = NULL;
10487 /* JIT decreases this value less frequently than the interpreter. */
10488 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
10489 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
10490 arguments.limit_match = functions->limit_match;
10491 arguments.notbol = (options & PCRE_NOTBOL) != 0;
10492 arguments.noteol = (options & PCRE_NOTEOL) != 0;
10493 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
10494 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
10495 arguments.offsets = offsets;
10496 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
10497 arguments.real_offset_count = offset_count;
10498
10499 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
10500 the output vector for storing captured strings, with the remainder used as
10501 workspace. We don't need the workspace here. For compatibility, we limit the
10502 number of captured strings in the same way as pcre_exec(), so that the user
10503 gets the same result with and without JIT. */
10504
10505 if (offset_count != 2)
10506 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
10507 max_offset_count = functions->top_bracket;
10508 if (offset_count > max_offset_count)
10509 offset_count = max_offset_count;
10510 arguments.offset_count = offset_count;
10511
10512 convert_executable_func.executable_func = functions->executable_funcs[mode];
10513 retval = convert_executable_func.call_executable_func(&arguments);
10514
10515 if (retval * 2 > offset_count)
10516 retval = 0;
10517 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
10518 *(extra_data->mark) = arguments.mark_ptr;
10519
10520 return retval;
10521 }
10522
10523 void
PRIV(jit_free)10524 PRIV(jit_free)(void *executable_funcs)
10525 {
10526 int i;
10527 executable_functions *functions = (executable_functions *)executable_funcs;
10528 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
10529 {
10530 if (functions->executable_funcs[i] != NULL)
10531 sljit_free_code(functions->executable_funcs[i]);
10532 free_read_only_data(functions->read_only_data_heads[i], NULL);
10533 }
10534 SLJIT_FREE(functions, compiler->allocator_data);
10535 }
10536
10537 int
PRIV(jit_get_size)10538 PRIV(jit_get_size)(void *executable_funcs)
10539 {
10540 int i;
10541 sljit_uw size = 0;
10542 sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
10543 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
10544 size += executable_sizes[i];
10545 return (int)size;
10546 }
10547
10548 const char*
PRIV(jit_get_target)10549 PRIV(jit_get_target)(void)
10550 {
10551 return sljit_get_platform_name();
10552 }
10553
10554 #if defined COMPILE_PCRE8
10555 PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize,int maxsize)10556 pcre_jit_stack_alloc(int startsize, int maxsize)
10557 #elif defined COMPILE_PCRE16
10558 PCRE_EXP_DECL pcre16_jit_stack *
10559 pcre16_jit_stack_alloc(int startsize, int maxsize)
10560 #elif defined COMPILE_PCRE32
10561 PCRE_EXP_DECL pcre32_jit_stack *
10562 pcre32_jit_stack_alloc(int startsize, int maxsize)
10563 #endif
10564 {
10565 if (startsize < 1 || maxsize < 1)
10566 return NULL;
10567 if (startsize > maxsize)
10568 startsize = maxsize;
10569 startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
10570 maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
10571 return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize, NULL);
10572 }
10573
10574 #if defined COMPILE_PCRE8
10575 PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack * stack)10576 pcre_jit_stack_free(pcre_jit_stack *stack)
10577 #elif defined COMPILE_PCRE16
10578 PCRE_EXP_DECL void
10579 pcre16_jit_stack_free(pcre16_jit_stack *stack)
10580 #elif defined COMPILE_PCRE32
10581 PCRE_EXP_DECL void
10582 pcre32_jit_stack_free(pcre32_jit_stack *stack)
10583 #endif
10584 {
10585 sljit_free_stack((struct sljit_stack *)stack, NULL);
10586 }
10587
10588 #if defined COMPILE_PCRE8
10589 PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra * extra,pcre_jit_callback callback,void * userdata)10590 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
10591 #elif defined COMPILE_PCRE16
10592 PCRE_EXP_DECL void
10593 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
10594 #elif defined COMPILE_PCRE32
10595 PCRE_EXP_DECL void
10596 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
10597 #endif
10598 {
10599 executable_functions *functions;
10600 if (extra != NULL &&
10601 (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
10602 extra->executable_jit != NULL)
10603 {
10604 functions = (executable_functions *)extra->executable_jit;
10605 functions->callback = callback;
10606 functions->userdata = userdata;
10607 }
10608 }
10609
10610 #if defined COMPILE_PCRE8
10611 PCRE_EXP_DECL void
pcre_jit_free_unused_memory(void)10612 pcre_jit_free_unused_memory(void)
10613 #elif defined COMPILE_PCRE16
10614 PCRE_EXP_DECL void
10615 pcre16_jit_free_unused_memory(void)
10616 #elif defined COMPILE_PCRE32
10617 PCRE_EXP_DECL void
10618 pcre32_jit_free_unused_memory(void)
10619 #endif
10620 {
10621 sljit_free_unused_memory_exec();
10622 }
10623
10624 #else /* SUPPORT_JIT */
10625
10626 /* These are dummy functions to avoid linking errors when JIT support is not
10627 being compiled. */
10628
10629 #if defined COMPILE_PCRE8
10630 PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize,int maxsize)10631 pcre_jit_stack_alloc(int startsize, int maxsize)
10632 #elif defined COMPILE_PCRE16
10633 PCRE_EXP_DECL pcre16_jit_stack *
10634 pcre16_jit_stack_alloc(int startsize, int maxsize)
10635 #elif defined COMPILE_PCRE32
10636 PCRE_EXP_DECL pcre32_jit_stack *
10637 pcre32_jit_stack_alloc(int startsize, int maxsize)
10638 #endif
10639 {
10640 (void)startsize;
10641 (void)maxsize;
10642 return NULL;
10643 }
10644
10645 #if defined COMPILE_PCRE8
10646 PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack * stack)10647 pcre_jit_stack_free(pcre_jit_stack *stack)
10648 #elif defined COMPILE_PCRE16
10649 PCRE_EXP_DECL void
10650 pcre16_jit_stack_free(pcre16_jit_stack *stack)
10651 #elif defined COMPILE_PCRE32
10652 PCRE_EXP_DECL void
10653 pcre32_jit_stack_free(pcre32_jit_stack *stack)
10654 #endif
10655 {
10656 (void)stack;
10657 }
10658
10659 #if defined COMPILE_PCRE8
10660 PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra * extra,pcre_jit_callback callback,void * userdata)10661 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
10662 #elif defined COMPILE_PCRE16
10663 PCRE_EXP_DECL void
10664 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
10665 #elif defined COMPILE_PCRE32
10666 PCRE_EXP_DECL void
10667 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
10668 #endif
10669 {
10670 (void)extra;
10671 (void)callback;
10672 (void)userdata;
10673 }
10674
10675 #if defined COMPILE_PCRE8
10676 PCRE_EXP_DECL void
pcre_jit_free_unused_memory(void)10677 pcre_jit_free_unused_memory(void)
10678 #elif defined COMPILE_PCRE16
10679 PCRE_EXP_DECL void
10680 pcre16_jit_free_unused_memory(void)
10681 #elif defined COMPILE_PCRE32
10682 PCRE_EXP_DECL void
10683 pcre32_jit_free_unused_memory(void)
10684 #endif
10685 {
10686 }
10687
10688 #endif
10689
10690 /* End of pcre_jit_compile.c */
10691