1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 sljit_u32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 sljit_u8 notbol;
175 sljit_u8 noteol;
176 sljit_u8 notempty;
177 sljit_u8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 sljit_u32 top_bracket;
187 sljit_u32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct char_iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 union {
285 jump_list *backtracks;
286 struct {
287 unsigned int othercasebit;
288 pcre_uchar chr;
289 BOOL enabled;
290 } charpos;
291 } u;
292 } char_iterator_backtrack;
293
294 typedef struct ref_iterator_backtrack {
295 backtrack_common common;
296 /* Next iteration. */
297 struct sljit_label *matchingpath;
298 } ref_iterator_backtrack;
299
300 typedef struct recurse_entry {
301 struct recurse_entry *next;
302 /* Contains the function entry. */
303 struct sljit_label *entry;
304 /* Collects the calls until the function is not created. */
305 jump_list *calls;
306 /* Points to the starting opcode. */
307 sljit_sw start;
308 } recurse_entry;
309
310 typedef struct recurse_backtrack {
311 backtrack_common common;
312 BOOL inlined_pattern;
313 } recurse_backtrack;
314
315 #define OP_THEN_TRAP OP_TABLE_LENGTH
316
317 typedef struct then_trap_backtrack {
318 backtrack_common common;
319 /* If then_trap is not NULL, this structure contains the real
320 then_trap for the backtracking path. */
321 struct then_trap_backtrack *then_trap;
322 /* Points to the starting opcode. */
323 sljit_sw start;
324 /* Exit point for the then opcodes of this alternative. */
325 jump_list *quit;
326 /* Frame size of the current alternative. */
327 int framesize;
328 } then_trap_backtrack;
329
330 #define MAX_RANGE_SIZE 4
331
332 typedef struct compiler_common {
333 /* The sljit ceneric compiler. */
334 struct sljit_compiler *compiler;
335 /* First byte code. */
336 pcre_uchar *start;
337 /* Maps private data offset to each opcode. */
338 sljit_s32 *private_data_ptrs;
339 /* Chain list of read-only data ptrs. */
340 void *read_only_data_head;
341 /* Tells whether the capturing bracket is optimized. */
342 sljit_u8 *optimized_cbracket;
343 /* Tells whether the starting offset is a target of then. */
344 sljit_u8 *then_offsets;
345 /* Current position where a THEN must jump. */
346 then_trap_backtrack *then_trap;
347 /* Starting offset of private data for capturing brackets. */
348 sljit_s32 cbra_ptr;
349 /* Output vector starting point. Must be divisible by 2. */
350 sljit_s32 ovector_start;
351 /* Points to the starting character of the current match. */
352 sljit_s32 start_ptr;
353 /* Last known position of the requested byte. */
354 sljit_s32 req_char_ptr;
355 /* Head of the last recursion. */
356 sljit_s32 recursive_head_ptr;
357 /* First inspected character for partial matching.
358 (Needed for avoiding zero length partial matches.) */
359 sljit_s32 start_used_ptr;
360 /* Starting pointer for partial soft matches. */
361 sljit_s32 hit_start;
362 /* Pointer of the match end position. */
363 sljit_s32 match_end_ptr;
364 /* Points to the marked string. */
365 sljit_s32 mark_ptr;
366 /* Recursive control verb management chain. */
367 sljit_s32 control_head_ptr;
368 /* Points to the last matched capture block index. */
369 sljit_s32 capture_last_ptr;
370 /* Fast forward skipping byte code pointer. */
371 pcre_uchar *fast_forward_bc_ptr;
372 /* Locals used by fast fail optimization. */
373 sljit_s32 fast_fail_start_ptr;
374 sljit_s32 fast_fail_end_ptr;
375
376 /* Flipped and lower case tables. */
377 const sljit_u8 *fcc;
378 sljit_sw lcc;
379 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
380 int mode;
381 /* TRUE, when minlength is greater than 0. */
382 BOOL might_be_empty;
383 /* \K is found in the pattern. */
384 BOOL has_set_som;
385 /* (*SKIP:arg) is found in the pattern. */
386 BOOL has_skip_arg;
387 /* (*THEN) is found in the pattern. */
388 BOOL has_then;
389 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
390 BOOL has_skip_in_assert_back;
391 /* Currently in recurse or negative assert. */
392 BOOL local_exit;
393 /* Currently in a positive assert. */
394 BOOL positive_assert;
395 /* Newline control. */
396 int nltype;
397 sljit_u32 nlmax;
398 sljit_u32 nlmin;
399 int newline;
400 int bsr_nltype;
401 sljit_u32 bsr_nlmax;
402 sljit_u32 bsr_nlmin;
403 /* Dollar endonly. */
404 int endonly;
405 /* Tables. */
406 sljit_sw ctypes;
407 /* Named capturing brackets. */
408 pcre_uchar *name_table;
409 sljit_sw name_count;
410 sljit_sw name_entry_size;
411
412 /* Labels and jump lists. */
413 struct sljit_label *partialmatchlabel;
414 struct sljit_label *quit_label;
415 struct sljit_label *forced_quit_label;
416 struct sljit_label *accept_label;
417 struct sljit_label *ff_newline_shortcut;
418 stub_list *stubs;
419 label_addr_list *label_addrs;
420 recurse_entry *entries;
421 recurse_entry *currententry;
422 jump_list *partialmatch;
423 jump_list *quit;
424 jump_list *positive_assert_quit;
425 jump_list *forced_quit;
426 jump_list *accept;
427 jump_list *calllimit;
428 jump_list *stackalloc;
429 jump_list *revertframes;
430 jump_list *wordboundary;
431 jump_list *anynewline;
432 jump_list *hspace;
433 jump_list *vspace;
434 jump_list *casefulcmp;
435 jump_list *caselesscmp;
436 jump_list *reset_match;
437 BOOL jscript_compat;
438 #ifdef SUPPORT_UTF
439 BOOL utf;
440 #ifdef SUPPORT_UCP
441 BOOL use_ucp;
442 jump_list *getucd;
443 #endif
444 #ifdef COMPILE_PCRE8
445 jump_list *utfreadchar;
446 jump_list *utfreadchar16;
447 jump_list *utfreadtype8;
448 #endif
449 #endif /* SUPPORT_UTF */
450 } compiler_common;
451
452 /* For byte_sequence_compare. */
453
454 typedef struct compare_context {
455 int length;
456 int sourcereg;
457 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
458 int ucharptr;
459 union {
460 sljit_s32 asint;
461 sljit_u16 asushort;
462 #if defined COMPILE_PCRE8
463 sljit_u8 asbyte;
464 sljit_u8 asuchars[4];
465 #elif defined COMPILE_PCRE16
466 sljit_u16 asuchars[2];
467 #elif defined COMPILE_PCRE32
468 sljit_u32 asuchars[1];
469 #endif
470 } c;
471 union {
472 sljit_s32 asint;
473 sljit_u16 asushort;
474 #if defined COMPILE_PCRE8
475 sljit_u8 asbyte;
476 sljit_u8 asuchars[4];
477 #elif defined COMPILE_PCRE16
478 sljit_u16 asuchars[2];
479 #elif defined COMPILE_PCRE32
480 sljit_u32 asuchars[1];
481 #endif
482 } oc;
483 #endif
484 } compare_context;
485
486 /* Undefine sljit macros. */
487 #undef CMP
488
489 /* Used for accessing the elements of the stack. */
490 #define STACK(i) ((i) * (int)sizeof(sljit_sw))
491
492 #define TMP1 SLJIT_R0
493 #define TMP2 SLJIT_R2
494 #define TMP3 SLJIT_R3
495 #define STR_PTR SLJIT_S0
496 #define STR_END SLJIT_S1
497 #define STACK_TOP SLJIT_R1
498 #define STACK_LIMIT SLJIT_S2
499 #define COUNT_MATCH SLJIT_S3
500 #define ARGUMENTS SLJIT_S4
501 #define RETURN_ADDR SLJIT_R4
502
503 /* Local space layout. */
504 /* These two locals can be used by the current opcode. */
505 #define LOCALS0 (0 * sizeof(sljit_sw))
506 #define LOCALS1 (1 * sizeof(sljit_sw))
507 /* Two local variables for possessive quantifiers (char1 cannot use them). */
508 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
509 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
510 /* Max limit of recursions. */
511 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
512 /* The output vector is stored on the stack, and contains pointers
513 to characters. The vector data is divided into two groups: the first
514 group contains the start / end character pointers, and the second is
515 the start pointers when the end of the capturing group has not yet reached. */
516 #define OVECTOR_START (common->ovector_start)
517 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
518 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
519 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
520
521 #if defined COMPILE_PCRE8
522 #define MOV_UCHAR SLJIT_MOV_U8
523 #define MOVU_UCHAR SLJIT_MOVU_U8
524 #elif defined COMPILE_PCRE16
525 #define MOV_UCHAR SLJIT_MOV_U16
526 #define MOVU_UCHAR SLJIT_MOVU_U16
527 #elif defined COMPILE_PCRE32
528 #define MOV_UCHAR SLJIT_MOV_U32
529 #define MOVU_UCHAR SLJIT_MOVU_U32
530 #else
531 #error Unsupported compiling mode
532 #endif
533
534 /* Shortcuts. */
535 #define DEFINE_COMPILER \
536 struct sljit_compiler *compiler = common->compiler
537 #define OP1(op, dst, dstw, src, srcw) \
538 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
539 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
540 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
541 #define LABEL() \
542 sljit_emit_label(compiler)
543 #define JUMP(type) \
544 sljit_emit_jump(compiler, (type))
545 #define JUMPTO(type, label) \
546 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
547 #define JUMPHERE(jump) \
548 sljit_set_label((jump), sljit_emit_label(compiler))
549 #define SET_LABEL(jump, label) \
550 sljit_set_label((jump), (label))
551 #define CMP(type, src1, src1w, src2, src2w) \
552 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
553 #define CMPTO(type, src1, src1w, src2, src2w, label) \
554 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
555 #define OP_FLAGS(op, dst, dstw, type) \
556 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
557 #define GET_LOCAL_BASE(dst, dstw, offset) \
558 sljit_get_local_base(compiler, (dst), (dstw), (offset))
559
560 #define READ_CHAR_MAX 0x7fffffff
561
562 #define INVALID_UTF_CHAR 888
563
bracketend(pcre_uchar * cc)564 static pcre_uchar *bracketend(pcre_uchar *cc)
565 {
566 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
567 do cc += GET(cc, 1); while (*cc == OP_ALT);
568 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
569 cc += 1 + LINK_SIZE;
570 return cc;
571 }
572
no_alternatives(pcre_uchar * cc)573 static int no_alternatives(pcre_uchar *cc)
574 {
575 int count = 0;
576 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
577 do
578 {
579 cc += GET(cc, 1);
580 count++;
581 }
582 while (*cc == OP_ALT);
583 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
584 return count;
585 }
586
587 /* Functions whose might need modification for all new supported opcodes:
588 next_opcode
589 check_opcode_types
590 set_private_data_ptrs
591 get_framesize
592 init_frame
593 get_private_data_copy_length
594 copy_private_data
595 compile_matchingpath
596 compile_backtrackingpath
597 */
598
next_opcode(compiler_common * common,pcre_uchar * cc)599 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
600 {
601 SLJIT_UNUSED_ARG(common);
602 switch(*cc)
603 {
604 case OP_SOD:
605 case OP_SOM:
606 case OP_SET_SOM:
607 case OP_NOT_WORD_BOUNDARY:
608 case OP_WORD_BOUNDARY:
609 case OP_NOT_DIGIT:
610 case OP_DIGIT:
611 case OP_NOT_WHITESPACE:
612 case OP_WHITESPACE:
613 case OP_NOT_WORDCHAR:
614 case OP_WORDCHAR:
615 case OP_ANY:
616 case OP_ALLANY:
617 case OP_NOTPROP:
618 case OP_PROP:
619 case OP_ANYNL:
620 case OP_NOT_HSPACE:
621 case OP_HSPACE:
622 case OP_NOT_VSPACE:
623 case OP_VSPACE:
624 case OP_EXTUNI:
625 case OP_EODN:
626 case OP_EOD:
627 case OP_CIRC:
628 case OP_CIRCM:
629 case OP_DOLL:
630 case OP_DOLLM:
631 case OP_CRSTAR:
632 case OP_CRMINSTAR:
633 case OP_CRPLUS:
634 case OP_CRMINPLUS:
635 case OP_CRQUERY:
636 case OP_CRMINQUERY:
637 case OP_CRRANGE:
638 case OP_CRMINRANGE:
639 case OP_CRPOSSTAR:
640 case OP_CRPOSPLUS:
641 case OP_CRPOSQUERY:
642 case OP_CRPOSRANGE:
643 case OP_CLASS:
644 case OP_NCLASS:
645 case OP_REF:
646 case OP_REFI:
647 case OP_DNREF:
648 case OP_DNREFI:
649 case OP_RECURSE:
650 case OP_CALLOUT:
651 case OP_ALT:
652 case OP_KET:
653 case OP_KETRMAX:
654 case OP_KETRMIN:
655 case OP_KETRPOS:
656 case OP_REVERSE:
657 case OP_ASSERT:
658 case OP_ASSERT_NOT:
659 case OP_ASSERTBACK:
660 case OP_ASSERTBACK_NOT:
661 case OP_ONCE:
662 case OP_ONCE_NC:
663 case OP_BRA:
664 case OP_BRAPOS:
665 case OP_CBRA:
666 case OP_CBRAPOS:
667 case OP_COND:
668 case OP_SBRA:
669 case OP_SBRAPOS:
670 case OP_SCBRA:
671 case OP_SCBRAPOS:
672 case OP_SCOND:
673 case OP_CREF:
674 case OP_DNCREF:
675 case OP_RREF:
676 case OP_DNRREF:
677 case OP_DEF:
678 case OP_BRAZERO:
679 case OP_BRAMINZERO:
680 case OP_BRAPOSZERO:
681 case OP_PRUNE:
682 case OP_SKIP:
683 case OP_THEN:
684 case OP_COMMIT:
685 case OP_FAIL:
686 case OP_ACCEPT:
687 case OP_ASSERT_ACCEPT:
688 case OP_CLOSE:
689 case OP_SKIPZERO:
690 return cc + PRIV(OP_lengths)[*cc];
691
692 case OP_CHAR:
693 case OP_CHARI:
694 case OP_NOT:
695 case OP_NOTI:
696 case OP_STAR:
697 case OP_MINSTAR:
698 case OP_PLUS:
699 case OP_MINPLUS:
700 case OP_QUERY:
701 case OP_MINQUERY:
702 case OP_UPTO:
703 case OP_MINUPTO:
704 case OP_EXACT:
705 case OP_POSSTAR:
706 case OP_POSPLUS:
707 case OP_POSQUERY:
708 case OP_POSUPTO:
709 case OP_STARI:
710 case OP_MINSTARI:
711 case OP_PLUSI:
712 case OP_MINPLUSI:
713 case OP_QUERYI:
714 case OP_MINQUERYI:
715 case OP_UPTOI:
716 case OP_MINUPTOI:
717 case OP_EXACTI:
718 case OP_POSSTARI:
719 case OP_POSPLUSI:
720 case OP_POSQUERYI:
721 case OP_POSUPTOI:
722 case OP_NOTSTAR:
723 case OP_NOTMINSTAR:
724 case OP_NOTPLUS:
725 case OP_NOTMINPLUS:
726 case OP_NOTQUERY:
727 case OP_NOTMINQUERY:
728 case OP_NOTUPTO:
729 case OP_NOTMINUPTO:
730 case OP_NOTEXACT:
731 case OP_NOTPOSSTAR:
732 case OP_NOTPOSPLUS:
733 case OP_NOTPOSQUERY:
734 case OP_NOTPOSUPTO:
735 case OP_NOTSTARI:
736 case OP_NOTMINSTARI:
737 case OP_NOTPLUSI:
738 case OP_NOTMINPLUSI:
739 case OP_NOTQUERYI:
740 case OP_NOTMINQUERYI:
741 case OP_NOTUPTOI:
742 case OP_NOTMINUPTOI:
743 case OP_NOTEXACTI:
744 case OP_NOTPOSSTARI:
745 case OP_NOTPOSPLUSI:
746 case OP_NOTPOSQUERYI:
747 case OP_NOTPOSUPTOI:
748 cc += PRIV(OP_lengths)[*cc];
749 #ifdef SUPPORT_UTF
750 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
751 #endif
752 return cc;
753
754 /* Special cases. */
755 case OP_TYPESTAR:
756 case OP_TYPEMINSTAR:
757 case OP_TYPEPLUS:
758 case OP_TYPEMINPLUS:
759 case OP_TYPEQUERY:
760 case OP_TYPEMINQUERY:
761 case OP_TYPEUPTO:
762 case OP_TYPEMINUPTO:
763 case OP_TYPEEXACT:
764 case OP_TYPEPOSSTAR:
765 case OP_TYPEPOSPLUS:
766 case OP_TYPEPOSQUERY:
767 case OP_TYPEPOSUPTO:
768 return cc + PRIV(OP_lengths)[*cc] - 1;
769
770 case OP_ANYBYTE:
771 #ifdef SUPPORT_UTF
772 if (common->utf) return NULL;
773 #endif
774 return cc + 1;
775
776 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
777 case OP_XCLASS:
778 return cc + GET(cc, 1);
779 #endif
780
781 case OP_MARK:
782 case OP_PRUNE_ARG:
783 case OP_SKIP_ARG:
784 case OP_THEN_ARG:
785 return cc + 1 + 2 + cc[1];
786
787 default:
788 /* All opcodes are supported now! */
789 SLJIT_UNREACHABLE();
790 return NULL;
791 }
792 }
793
check_opcode_types(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend)794 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
795 {
796 int count;
797 pcre_uchar *slot;
798 pcre_uchar *assert_back_end = cc - 1;
799
800 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
801 while (cc < ccend)
802 {
803 switch(*cc)
804 {
805 case OP_SET_SOM:
806 common->has_set_som = TRUE;
807 common->might_be_empty = TRUE;
808 cc += 1;
809 break;
810
811 case OP_REF:
812 case OP_REFI:
813 common->optimized_cbracket[GET2(cc, 1)] = 0;
814 cc += 1 + IMM2_SIZE;
815 break;
816
817 case OP_CBRAPOS:
818 case OP_SCBRAPOS:
819 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
820 cc += 1 + LINK_SIZE + IMM2_SIZE;
821 break;
822
823 case OP_COND:
824 case OP_SCOND:
825 /* Only AUTO_CALLOUT can insert this opcode. We do
826 not intend to support this case. */
827 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
828 return FALSE;
829 cc += 1 + LINK_SIZE;
830 break;
831
832 case OP_CREF:
833 common->optimized_cbracket[GET2(cc, 1)] = 0;
834 cc += 1 + IMM2_SIZE;
835 break;
836
837 case OP_DNREF:
838 case OP_DNREFI:
839 case OP_DNCREF:
840 count = GET2(cc, 1 + IMM2_SIZE);
841 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
842 while (count-- > 0)
843 {
844 common->optimized_cbracket[GET2(slot, 0)] = 0;
845 slot += common->name_entry_size;
846 }
847 cc += 1 + 2 * IMM2_SIZE;
848 break;
849
850 case OP_RECURSE:
851 /* Set its value only once. */
852 if (common->recursive_head_ptr == 0)
853 {
854 common->recursive_head_ptr = common->ovector_start;
855 common->ovector_start += sizeof(sljit_sw);
856 }
857 cc += 1 + LINK_SIZE;
858 break;
859
860 case OP_CALLOUT:
861 if (common->capture_last_ptr == 0)
862 {
863 common->capture_last_ptr = common->ovector_start;
864 common->ovector_start += sizeof(sljit_sw);
865 }
866 cc += 2 + 2 * LINK_SIZE;
867 break;
868
869 case OP_ASSERTBACK:
870 slot = bracketend(cc);
871 if (slot > assert_back_end)
872 assert_back_end = slot;
873 cc += 1 + LINK_SIZE;
874 break;
875
876 case OP_THEN_ARG:
877 common->has_then = TRUE;
878 common->control_head_ptr = 1;
879 /* Fall through. */
880
881 case OP_PRUNE_ARG:
882 case OP_MARK:
883 if (common->mark_ptr == 0)
884 {
885 common->mark_ptr = common->ovector_start;
886 common->ovector_start += sizeof(sljit_sw);
887 }
888 cc += 1 + 2 + cc[1];
889 break;
890
891 case OP_THEN:
892 common->has_then = TRUE;
893 common->control_head_ptr = 1;
894 cc += 1;
895 break;
896
897 case OP_SKIP:
898 if (cc < assert_back_end)
899 common->has_skip_in_assert_back = TRUE;
900 cc += 1;
901 break;
902
903 case OP_SKIP_ARG:
904 common->control_head_ptr = 1;
905 common->has_skip_arg = TRUE;
906 if (cc < assert_back_end)
907 common->has_skip_in_assert_back = TRUE;
908 cc += 1 + 2 + cc[1];
909 break;
910
911 default:
912 cc = next_opcode(common, cc);
913 if (cc == NULL)
914 return FALSE;
915 break;
916 }
917 }
918 return TRUE;
919 }
920
is_accelerated_repeat(pcre_uchar * cc)921 static BOOL is_accelerated_repeat(pcre_uchar *cc)
922 {
923 switch(*cc)
924 {
925 case OP_TYPESTAR:
926 case OP_TYPEMINSTAR:
927 case OP_TYPEPLUS:
928 case OP_TYPEMINPLUS:
929 case OP_TYPEPOSSTAR:
930 case OP_TYPEPOSPLUS:
931 return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
932
933 case OP_STAR:
934 case OP_MINSTAR:
935 case OP_PLUS:
936 case OP_MINPLUS:
937 case OP_POSSTAR:
938 case OP_POSPLUS:
939
940 case OP_STARI:
941 case OP_MINSTARI:
942 case OP_PLUSI:
943 case OP_MINPLUSI:
944 case OP_POSSTARI:
945 case OP_POSPLUSI:
946
947 case OP_NOTSTAR:
948 case OP_NOTMINSTAR:
949 case OP_NOTPLUS:
950 case OP_NOTMINPLUS:
951 case OP_NOTPOSSTAR:
952 case OP_NOTPOSPLUS:
953
954 case OP_NOTSTARI:
955 case OP_NOTMINSTARI:
956 case OP_NOTPLUSI:
957 case OP_NOTMINPLUSI:
958 case OP_NOTPOSSTARI:
959 case OP_NOTPOSPLUSI:
960 return TRUE;
961
962 case OP_CLASS:
963 case OP_NCLASS:
964 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
965 case OP_XCLASS:
966 cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar)));
967 #else
968 cc += (1 + (32 / sizeof(pcre_uchar)));
969 #endif
970
971 switch(*cc)
972 {
973 case OP_CRSTAR:
974 case OP_CRMINSTAR:
975 case OP_CRPLUS:
976 case OP_CRMINPLUS:
977 case OP_CRPOSSTAR:
978 case OP_CRPOSPLUS:
979 return TRUE;
980 }
981 break;
982 }
983 return FALSE;
984 }
985
detect_fast_forward_skip(compiler_common * common,int * private_data_start)986 static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
987 {
988 pcre_uchar *cc = common->start;
989 pcre_uchar *end;
990
991 /* Skip not repeated brackets. */
992 while (TRUE)
993 {
994 switch(*cc)
995 {
996 case OP_SOD:
997 case OP_SOM:
998 case OP_SET_SOM:
999 case OP_NOT_WORD_BOUNDARY:
1000 case OP_WORD_BOUNDARY:
1001 case OP_EODN:
1002 case OP_EOD:
1003 case OP_CIRC:
1004 case OP_CIRCM:
1005 case OP_DOLL:
1006 case OP_DOLLM:
1007 /* Zero width assertions. */
1008 cc++;
1009 continue;
1010 }
1011
1012 if (*cc != OP_BRA && *cc != OP_CBRA)
1013 break;
1014
1015 end = cc + GET(cc, 1);
1016 if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1017 return FALSE;
1018 if (*cc == OP_CBRA)
1019 {
1020 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1021 return FALSE;
1022 cc += IMM2_SIZE;
1023 }
1024 cc += 1 + LINK_SIZE;
1025 }
1026
1027 if (is_accelerated_repeat(cc))
1028 {
1029 common->fast_forward_bc_ptr = cc;
1030 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1031 *private_data_start += sizeof(sljit_sw);
1032 return TRUE;
1033 }
1034 return FALSE;
1035 }
1036
detect_fast_fail(compiler_common * common,pcre_uchar * cc,int * private_data_start,sljit_s32 depth)1037 static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_s32 depth)
1038 {
1039 pcre_uchar *next_alt;
1040
1041 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
1042
1043 if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1044 return;
1045
1046 next_alt = bracketend(cc) - (1 + LINK_SIZE);
1047 if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1048 return;
1049
1050 do
1051 {
1052 next_alt = cc + GET(cc, 1);
1053
1054 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1055
1056 while (TRUE)
1057 {
1058 switch(*cc)
1059 {
1060 case OP_SOD:
1061 case OP_SOM:
1062 case OP_SET_SOM:
1063 case OP_NOT_WORD_BOUNDARY:
1064 case OP_WORD_BOUNDARY:
1065 case OP_EODN:
1066 case OP_EOD:
1067 case OP_CIRC:
1068 case OP_CIRCM:
1069 case OP_DOLL:
1070 case OP_DOLLM:
1071 /* Zero width assertions. */
1072 cc++;
1073 continue;
1074 }
1075 break;
1076 }
1077
1078 if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1079 detect_fast_fail(common, cc, private_data_start, depth - 1);
1080
1081 if (is_accelerated_repeat(cc))
1082 {
1083 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1084
1085 if (common->fast_fail_start_ptr == 0)
1086 common->fast_fail_start_ptr = *private_data_start;
1087
1088 *private_data_start += sizeof(sljit_sw);
1089 common->fast_fail_end_ptr = *private_data_start;
1090
1091 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1092 return;
1093 }
1094
1095 cc = next_alt;
1096 }
1097 while (*cc == OP_ALT);
1098 }
1099
get_class_iterator_size(pcre_uchar * cc)1100 static int get_class_iterator_size(pcre_uchar *cc)
1101 {
1102 sljit_u32 min;
1103 sljit_u32 max;
1104 switch(*cc)
1105 {
1106 case OP_CRSTAR:
1107 case OP_CRPLUS:
1108 return 2;
1109
1110 case OP_CRMINSTAR:
1111 case OP_CRMINPLUS:
1112 case OP_CRQUERY:
1113 case OP_CRMINQUERY:
1114 return 1;
1115
1116 case OP_CRRANGE:
1117 case OP_CRMINRANGE:
1118 min = GET2(cc, 1);
1119 max = GET2(cc, 1 + IMM2_SIZE);
1120 if (max == 0)
1121 return (*cc == OP_CRRANGE) ? 2 : 1;
1122 max -= min;
1123 if (max > 2)
1124 max = 2;
1125 return max;
1126
1127 default:
1128 return 0;
1129 }
1130 }
1131
detect_repeat(compiler_common * common,pcre_uchar * begin)1132 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
1133 {
1134 pcre_uchar *end = bracketend(begin);
1135 pcre_uchar *next;
1136 pcre_uchar *next_end;
1137 pcre_uchar *max_end;
1138 pcre_uchar type;
1139 sljit_sw length = end - begin;
1140 int min, max, i;
1141
1142 /* Detect fixed iterations first. */
1143 if (end[-(1 + LINK_SIZE)] != OP_KET)
1144 return FALSE;
1145
1146 /* Already detected repeat. */
1147 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1148 return TRUE;
1149
1150 next = end;
1151 min = 1;
1152 while (1)
1153 {
1154 if (*next != *begin)
1155 break;
1156 next_end = bracketend(next);
1157 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1158 break;
1159 next = next_end;
1160 min++;
1161 }
1162
1163 if (min == 2)
1164 return FALSE;
1165
1166 max = 0;
1167 max_end = next;
1168 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1169 {
1170 type = *next;
1171 while (1)
1172 {
1173 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1174 break;
1175 next_end = bracketend(next + 2 + LINK_SIZE);
1176 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1177 break;
1178 next = next_end;
1179 max++;
1180 }
1181
1182 if (next[0] == type && next[1] == *begin && max >= 1)
1183 {
1184 next_end = bracketend(next + 1);
1185 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1186 {
1187 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1188 if (*next_end != OP_KET)
1189 break;
1190
1191 if (i == max)
1192 {
1193 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1194 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1195 /* +2 the original and the last. */
1196 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1197 if (min == 1)
1198 return TRUE;
1199 min--;
1200 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1201 }
1202 }
1203 }
1204 }
1205
1206 if (min >= 3)
1207 {
1208 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1209 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1210 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1211 return TRUE;
1212 }
1213
1214 return FALSE;
1215 }
1216
1217 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1218 case OP_MINSTAR: \
1219 case OP_MINPLUS: \
1220 case OP_QUERY: \
1221 case OP_MINQUERY: \
1222 case OP_MINSTARI: \
1223 case OP_MINPLUSI: \
1224 case OP_QUERYI: \
1225 case OP_MINQUERYI: \
1226 case OP_NOTMINSTAR: \
1227 case OP_NOTMINPLUS: \
1228 case OP_NOTQUERY: \
1229 case OP_NOTMINQUERY: \
1230 case OP_NOTMINSTARI: \
1231 case OP_NOTMINPLUSI: \
1232 case OP_NOTQUERYI: \
1233 case OP_NOTMINQUERYI:
1234
1235 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1236 case OP_STAR: \
1237 case OP_PLUS: \
1238 case OP_STARI: \
1239 case OP_PLUSI: \
1240 case OP_NOTSTAR: \
1241 case OP_NOTPLUS: \
1242 case OP_NOTSTARI: \
1243 case OP_NOTPLUSI:
1244
1245 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1246 case OP_UPTO: \
1247 case OP_MINUPTO: \
1248 case OP_UPTOI: \
1249 case OP_MINUPTOI: \
1250 case OP_NOTUPTO: \
1251 case OP_NOTMINUPTO: \
1252 case OP_NOTUPTOI: \
1253 case OP_NOTMINUPTOI:
1254
1255 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1256 case OP_TYPEMINSTAR: \
1257 case OP_TYPEMINPLUS: \
1258 case OP_TYPEQUERY: \
1259 case OP_TYPEMINQUERY:
1260
1261 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1262 case OP_TYPESTAR: \
1263 case OP_TYPEPLUS:
1264
1265 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1266 case OP_TYPEUPTO: \
1267 case OP_TYPEMINUPTO:
1268
set_private_data_ptrs(compiler_common * common,int * private_data_start,pcre_uchar * ccend)1269 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1270 {
1271 pcre_uchar *cc = common->start;
1272 pcre_uchar *alternative;
1273 pcre_uchar *end = NULL;
1274 int private_data_ptr = *private_data_start;
1275 int space, size, bracketlen;
1276 BOOL repeat_check = TRUE;
1277
1278 while (cc < ccend)
1279 {
1280 space = 0;
1281 size = 0;
1282 bracketlen = 0;
1283 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1284 break;
1285
1286 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1287 {
1288 if (detect_repeat(common, cc))
1289 {
1290 /* These brackets are converted to repeats, so no global
1291 based single character repeat is allowed. */
1292 if (cc >= end)
1293 end = bracketend(cc);
1294 }
1295 }
1296 repeat_check = TRUE;
1297
1298 switch(*cc)
1299 {
1300 case OP_KET:
1301 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1302 {
1303 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1304 private_data_ptr += sizeof(sljit_sw);
1305 cc += common->private_data_ptrs[cc + 1 - common->start];
1306 }
1307 cc += 1 + LINK_SIZE;
1308 break;
1309
1310 case OP_ASSERT:
1311 case OP_ASSERT_NOT:
1312 case OP_ASSERTBACK:
1313 case OP_ASSERTBACK_NOT:
1314 case OP_ONCE:
1315 case OP_ONCE_NC:
1316 case OP_BRAPOS:
1317 case OP_SBRA:
1318 case OP_SBRAPOS:
1319 case OP_SCOND:
1320 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1321 private_data_ptr += sizeof(sljit_sw);
1322 bracketlen = 1 + LINK_SIZE;
1323 break;
1324
1325 case OP_CBRAPOS:
1326 case OP_SCBRAPOS:
1327 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1328 private_data_ptr += sizeof(sljit_sw);
1329 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1330 break;
1331
1332 case OP_COND:
1333 /* Might be a hidden SCOND. */
1334 alternative = cc + GET(cc, 1);
1335 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1336 {
1337 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1338 private_data_ptr += sizeof(sljit_sw);
1339 }
1340 bracketlen = 1 + LINK_SIZE;
1341 break;
1342
1343 case OP_BRA:
1344 bracketlen = 1 + LINK_SIZE;
1345 break;
1346
1347 case OP_CBRA:
1348 case OP_SCBRA:
1349 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1350 break;
1351
1352 case OP_BRAZERO:
1353 case OP_BRAMINZERO:
1354 case OP_BRAPOSZERO:
1355 repeat_check = FALSE;
1356 size = 1;
1357 break;
1358
1359 CASE_ITERATOR_PRIVATE_DATA_1
1360 space = 1;
1361 size = -2;
1362 break;
1363
1364 CASE_ITERATOR_PRIVATE_DATA_2A
1365 space = 2;
1366 size = -2;
1367 break;
1368
1369 CASE_ITERATOR_PRIVATE_DATA_2B
1370 space = 2;
1371 size = -(2 + IMM2_SIZE);
1372 break;
1373
1374 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1375 space = 1;
1376 size = 1;
1377 break;
1378
1379 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1380 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1381 space = 2;
1382 size = 1;
1383 break;
1384
1385 case OP_TYPEUPTO:
1386 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1387 space = 2;
1388 size = 1 + IMM2_SIZE;
1389 break;
1390
1391 case OP_TYPEMINUPTO:
1392 space = 2;
1393 size = 1 + IMM2_SIZE;
1394 break;
1395
1396 case OP_CLASS:
1397 case OP_NCLASS:
1398 space = get_class_iterator_size(cc + size);
1399 size = 1 + 32 / sizeof(pcre_uchar);
1400 break;
1401
1402 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1403 case OP_XCLASS:
1404 space = get_class_iterator_size(cc + size);
1405 size = GET(cc, 1);
1406 break;
1407 #endif
1408
1409 default:
1410 cc = next_opcode(common, cc);
1411 SLJIT_ASSERT(cc != NULL);
1412 break;
1413 }
1414
1415 /* Character iterators, which are not inside a repeated bracket,
1416 gets a private slot instead of allocating it on the stack. */
1417 if (space > 0 && cc >= end)
1418 {
1419 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1420 private_data_ptr += sizeof(sljit_sw) * space;
1421 }
1422
1423 if (size != 0)
1424 {
1425 if (size < 0)
1426 {
1427 cc += -size;
1428 #ifdef SUPPORT_UTF
1429 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1430 #endif
1431 }
1432 else
1433 cc += size;
1434 }
1435
1436 if (bracketlen > 0)
1437 {
1438 if (cc >= end)
1439 {
1440 end = bracketend(cc);
1441 if (end[-1 - LINK_SIZE] == OP_KET)
1442 end = NULL;
1443 }
1444 cc += bracketlen;
1445 }
1446 }
1447 *private_data_start = private_data_ptr;
1448 }
1449
1450 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL recursive,BOOL * needs_control_head)1451 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1452 {
1453 int length = 0;
1454 int possessive = 0;
1455 BOOL stack_restore = FALSE;
1456 BOOL setsom_found = recursive;
1457 BOOL setmark_found = recursive;
1458 /* The last capture is a local variable even for recursions. */
1459 BOOL capture_last_found = FALSE;
1460
1461 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1462 SLJIT_ASSERT(common->control_head_ptr != 0);
1463 *needs_control_head = TRUE;
1464 #else
1465 *needs_control_head = FALSE;
1466 #endif
1467
1468 if (ccend == NULL)
1469 {
1470 ccend = bracketend(cc) - (1 + LINK_SIZE);
1471 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1472 {
1473 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1474 /* This is correct regardless of common->capture_last_ptr. */
1475 capture_last_found = TRUE;
1476 }
1477 cc = next_opcode(common, cc);
1478 }
1479
1480 SLJIT_ASSERT(cc != NULL);
1481 while (cc < ccend)
1482 switch(*cc)
1483 {
1484 case OP_SET_SOM:
1485 SLJIT_ASSERT(common->has_set_som);
1486 stack_restore = TRUE;
1487 if (!setsom_found)
1488 {
1489 length += 2;
1490 setsom_found = TRUE;
1491 }
1492 cc += 1;
1493 break;
1494
1495 case OP_MARK:
1496 case OP_PRUNE_ARG:
1497 case OP_THEN_ARG:
1498 SLJIT_ASSERT(common->mark_ptr != 0);
1499 stack_restore = TRUE;
1500 if (!setmark_found)
1501 {
1502 length += 2;
1503 setmark_found = TRUE;
1504 }
1505 if (common->control_head_ptr != 0)
1506 *needs_control_head = TRUE;
1507 cc += 1 + 2 + cc[1];
1508 break;
1509
1510 case OP_RECURSE:
1511 stack_restore = TRUE;
1512 if (common->has_set_som && !setsom_found)
1513 {
1514 length += 2;
1515 setsom_found = TRUE;
1516 }
1517 if (common->mark_ptr != 0 && !setmark_found)
1518 {
1519 length += 2;
1520 setmark_found = TRUE;
1521 }
1522 if (common->capture_last_ptr != 0 && !capture_last_found)
1523 {
1524 length += 2;
1525 capture_last_found = TRUE;
1526 }
1527 cc += 1 + LINK_SIZE;
1528 break;
1529
1530 case OP_CBRA:
1531 case OP_CBRAPOS:
1532 case OP_SCBRA:
1533 case OP_SCBRAPOS:
1534 stack_restore = TRUE;
1535 if (common->capture_last_ptr != 0 && !capture_last_found)
1536 {
1537 length += 2;
1538 capture_last_found = TRUE;
1539 }
1540 length += 3;
1541 cc += 1 + LINK_SIZE + IMM2_SIZE;
1542 break;
1543
1544 case OP_THEN:
1545 stack_restore = TRUE;
1546 if (common->control_head_ptr != 0)
1547 *needs_control_head = TRUE;
1548 cc ++;
1549 break;
1550
1551 default:
1552 stack_restore = TRUE;
1553 /* Fall through. */
1554
1555 case OP_NOT_WORD_BOUNDARY:
1556 case OP_WORD_BOUNDARY:
1557 case OP_NOT_DIGIT:
1558 case OP_DIGIT:
1559 case OP_NOT_WHITESPACE:
1560 case OP_WHITESPACE:
1561 case OP_NOT_WORDCHAR:
1562 case OP_WORDCHAR:
1563 case OP_ANY:
1564 case OP_ALLANY:
1565 case OP_ANYBYTE:
1566 case OP_NOTPROP:
1567 case OP_PROP:
1568 case OP_ANYNL:
1569 case OP_NOT_HSPACE:
1570 case OP_HSPACE:
1571 case OP_NOT_VSPACE:
1572 case OP_VSPACE:
1573 case OP_EXTUNI:
1574 case OP_EODN:
1575 case OP_EOD:
1576 case OP_CIRC:
1577 case OP_CIRCM:
1578 case OP_DOLL:
1579 case OP_DOLLM:
1580 case OP_CHAR:
1581 case OP_CHARI:
1582 case OP_NOT:
1583 case OP_NOTI:
1584
1585 case OP_EXACT:
1586 case OP_POSSTAR:
1587 case OP_POSPLUS:
1588 case OP_POSQUERY:
1589 case OP_POSUPTO:
1590
1591 case OP_EXACTI:
1592 case OP_POSSTARI:
1593 case OP_POSPLUSI:
1594 case OP_POSQUERYI:
1595 case OP_POSUPTOI:
1596
1597 case OP_NOTEXACT:
1598 case OP_NOTPOSSTAR:
1599 case OP_NOTPOSPLUS:
1600 case OP_NOTPOSQUERY:
1601 case OP_NOTPOSUPTO:
1602
1603 case OP_NOTEXACTI:
1604 case OP_NOTPOSSTARI:
1605 case OP_NOTPOSPLUSI:
1606 case OP_NOTPOSQUERYI:
1607 case OP_NOTPOSUPTOI:
1608
1609 case OP_TYPEEXACT:
1610 case OP_TYPEPOSSTAR:
1611 case OP_TYPEPOSPLUS:
1612 case OP_TYPEPOSQUERY:
1613 case OP_TYPEPOSUPTO:
1614
1615 case OP_CLASS:
1616 case OP_NCLASS:
1617 case OP_XCLASS:
1618 case OP_CALLOUT:
1619
1620 cc = next_opcode(common, cc);
1621 SLJIT_ASSERT(cc != NULL);
1622 break;
1623 }
1624
1625 /* Possessive quantifiers can use a special case. */
1626 if (SLJIT_UNLIKELY(possessive == length))
1627 return stack_restore ? no_frame : no_stack;
1628
1629 if (length > 0)
1630 return length + 1;
1631 return stack_restore ? no_frame : no_stack;
1632 }
1633
init_frame(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,int stackpos,int stacktop,BOOL recursive)1634 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1635 {
1636 DEFINE_COMPILER;
1637 BOOL setsom_found = recursive;
1638 BOOL setmark_found = recursive;
1639 /* The last capture is a local variable even for recursions. */
1640 BOOL capture_last_found = FALSE;
1641 int offset;
1642
1643 /* >= 1 + shortest item size (2) */
1644 SLJIT_UNUSED_ARG(stacktop);
1645 SLJIT_ASSERT(stackpos >= stacktop + 2);
1646
1647 stackpos = STACK(stackpos);
1648 if (ccend == NULL)
1649 {
1650 ccend = bracketend(cc) - (1 + LINK_SIZE);
1651 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1652 cc = next_opcode(common, cc);
1653 }
1654
1655 SLJIT_ASSERT(cc != NULL);
1656 while (cc < ccend)
1657 switch(*cc)
1658 {
1659 case OP_SET_SOM:
1660 SLJIT_ASSERT(common->has_set_som);
1661 if (!setsom_found)
1662 {
1663 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1664 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1665 stackpos -= (int)sizeof(sljit_sw);
1666 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1667 stackpos -= (int)sizeof(sljit_sw);
1668 setsom_found = TRUE;
1669 }
1670 cc += 1;
1671 break;
1672
1673 case OP_MARK:
1674 case OP_PRUNE_ARG:
1675 case OP_THEN_ARG:
1676 SLJIT_ASSERT(common->mark_ptr != 0);
1677 if (!setmark_found)
1678 {
1679 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1680 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1681 stackpos -= (int)sizeof(sljit_sw);
1682 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1683 stackpos -= (int)sizeof(sljit_sw);
1684 setmark_found = TRUE;
1685 }
1686 cc += 1 + 2 + cc[1];
1687 break;
1688
1689 case OP_RECURSE:
1690 if (common->has_set_som && !setsom_found)
1691 {
1692 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1693 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1694 stackpos -= (int)sizeof(sljit_sw);
1695 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1696 stackpos -= (int)sizeof(sljit_sw);
1697 setsom_found = TRUE;
1698 }
1699 if (common->mark_ptr != 0 && !setmark_found)
1700 {
1701 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1702 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1703 stackpos -= (int)sizeof(sljit_sw);
1704 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1705 stackpos -= (int)sizeof(sljit_sw);
1706 setmark_found = TRUE;
1707 }
1708 if (common->capture_last_ptr != 0 && !capture_last_found)
1709 {
1710 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1711 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1712 stackpos -= (int)sizeof(sljit_sw);
1713 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1714 stackpos -= (int)sizeof(sljit_sw);
1715 capture_last_found = TRUE;
1716 }
1717 cc += 1 + LINK_SIZE;
1718 break;
1719
1720 case OP_CBRA:
1721 case OP_CBRAPOS:
1722 case OP_SCBRA:
1723 case OP_SCBRAPOS:
1724 if (common->capture_last_ptr != 0 && !capture_last_found)
1725 {
1726 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1727 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1728 stackpos -= (int)sizeof(sljit_sw);
1729 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1730 stackpos -= (int)sizeof(sljit_sw);
1731 capture_last_found = TRUE;
1732 }
1733 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1734 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1735 stackpos -= (int)sizeof(sljit_sw);
1736 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1737 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1738 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1739 stackpos -= (int)sizeof(sljit_sw);
1740 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1741 stackpos -= (int)sizeof(sljit_sw);
1742
1743 cc += 1 + LINK_SIZE + IMM2_SIZE;
1744 break;
1745
1746 default:
1747 cc = next_opcode(common, cc);
1748 SLJIT_ASSERT(cc != NULL);
1749 break;
1750 }
1751
1752 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1753 SLJIT_ASSERT(stackpos == STACK(stacktop));
1754 }
1755
get_private_data_copy_length(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL needs_control_head)1756 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1757 {
1758 int private_data_length = needs_control_head ? 3 : 2;
1759 int size;
1760 pcre_uchar *alternative;
1761 /* Calculate the sum of the private machine words. */
1762 while (cc < ccend)
1763 {
1764 size = 0;
1765 switch(*cc)
1766 {
1767 case OP_KET:
1768 if (PRIVATE_DATA(cc) != 0)
1769 {
1770 private_data_length++;
1771 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1772 cc += PRIVATE_DATA(cc + 1);
1773 }
1774 cc += 1 + LINK_SIZE;
1775 break;
1776
1777 case OP_ASSERT:
1778 case OP_ASSERT_NOT:
1779 case OP_ASSERTBACK:
1780 case OP_ASSERTBACK_NOT:
1781 case OP_ONCE:
1782 case OP_ONCE_NC:
1783 case OP_BRAPOS:
1784 case OP_SBRA:
1785 case OP_SBRAPOS:
1786 case OP_SCOND:
1787 private_data_length++;
1788 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1789 cc += 1 + LINK_SIZE;
1790 break;
1791
1792 case OP_CBRA:
1793 case OP_SCBRA:
1794 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1795 private_data_length++;
1796 cc += 1 + LINK_SIZE + IMM2_SIZE;
1797 break;
1798
1799 case OP_CBRAPOS:
1800 case OP_SCBRAPOS:
1801 private_data_length += 2;
1802 cc += 1 + LINK_SIZE + IMM2_SIZE;
1803 break;
1804
1805 case OP_COND:
1806 /* Might be a hidden SCOND. */
1807 alternative = cc + GET(cc, 1);
1808 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1809 private_data_length++;
1810 cc += 1 + LINK_SIZE;
1811 break;
1812
1813 CASE_ITERATOR_PRIVATE_DATA_1
1814 if (PRIVATE_DATA(cc))
1815 private_data_length++;
1816 cc += 2;
1817 #ifdef SUPPORT_UTF
1818 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1819 #endif
1820 break;
1821
1822 CASE_ITERATOR_PRIVATE_DATA_2A
1823 if (PRIVATE_DATA(cc))
1824 private_data_length += 2;
1825 cc += 2;
1826 #ifdef SUPPORT_UTF
1827 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1828 #endif
1829 break;
1830
1831 CASE_ITERATOR_PRIVATE_DATA_2B
1832 if (PRIVATE_DATA(cc))
1833 private_data_length += 2;
1834 cc += 2 + IMM2_SIZE;
1835 #ifdef SUPPORT_UTF
1836 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1837 #endif
1838 break;
1839
1840 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1841 if (PRIVATE_DATA(cc))
1842 private_data_length++;
1843 cc += 1;
1844 break;
1845
1846 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1847 if (PRIVATE_DATA(cc))
1848 private_data_length += 2;
1849 cc += 1;
1850 break;
1851
1852 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1853 if (PRIVATE_DATA(cc))
1854 private_data_length += 2;
1855 cc += 1 + IMM2_SIZE;
1856 break;
1857
1858 case OP_CLASS:
1859 case OP_NCLASS:
1860 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1861 case OP_XCLASS:
1862 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1863 #else
1864 size = 1 + 32 / (int)sizeof(pcre_uchar);
1865 #endif
1866 if (PRIVATE_DATA(cc))
1867 private_data_length += get_class_iterator_size(cc + size);
1868 cc += size;
1869 break;
1870
1871 default:
1872 cc = next_opcode(common, cc);
1873 SLJIT_ASSERT(cc != NULL);
1874 break;
1875 }
1876 }
1877 SLJIT_ASSERT(cc == ccend);
1878 return private_data_length;
1879 }
1880
copy_private_data(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL save,int stackptr,int stacktop,BOOL needs_control_head)1881 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1882 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1883 {
1884 DEFINE_COMPILER;
1885 int srcw[2];
1886 int count, size;
1887 BOOL tmp1next = TRUE;
1888 BOOL tmp1empty = TRUE;
1889 BOOL tmp2empty = TRUE;
1890 pcre_uchar *alternative;
1891 enum {
1892 loop,
1893 end
1894 } status;
1895
1896 status = loop;
1897 stackptr = STACK(stackptr);
1898 stacktop = STACK(stacktop - 1);
1899
1900 if (!save)
1901 {
1902 stacktop -= (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1903 if (stackptr < stacktop)
1904 {
1905 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1906 stackptr += sizeof(sljit_sw);
1907 tmp1empty = FALSE;
1908 }
1909 if (stackptr < stacktop)
1910 {
1911 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1912 stackptr += sizeof(sljit_sw);
1913 tmp2empty = FALSE;
1914 }
1915 /* The tmp1next must be TRUE in either way. */
1916 }
1917
1918 SLJIT_ASSERT(common->recursive_head_ptr != 0);
1919
1920 do
1921 {
1922 count = 0;
1923 if (cc >= ccend)
1924 {
1925 if (!save)
1926 break;
1927
1928 count = 1;
1929 srcw[0] = common->recursive_head_ptr;
1930 if (needs_control_head)
1931 {
1932 SLJIT_ASSERT(common->control_head_ptr != 0);
1933 count = 2;
1934 srcw[0] = common->control_head_ptr;
1935 srcw[1] = common->recursive_head_ptr;
1936 }
1937 status = end;
1938 }
1939 else switch(*cc)
1940 {
1941 case OP_KET:
1942 if (PRIVATE_DATA(cc) != 0)
1943 {
1944 count = 1;
1945 srcw[0] = PRIVATE_DATA(cc);
1946 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1947 cc += PRIVATE_DATA(cc + 1);
1948 }
1949 cc += 1 + LINK_SIZE;
1950 break;
1951
1952 case OP_ASSERT:
1953 case OP_ASSERT_NOT:
1954 case OP_ASSERTBACK:
1955 case OP_ASSERTBACK_NOT:
1956 case OP_ONCE:
1957 case OP_ONCE_NC:
1958 case OP_BRAPOS:
1959 case OP_SBRA:
1960 case OP_SBRAPOS:
1961 case OP_SCOND:
1962 count = 1;
1963 srcw[0] = PRIVATE_DATA(cc);
1964 SLJIT_ASSERT(srcw[0] != 0);
1965 cc += 1 + LINK_SIZE;
1966 break;
1967
1968 case OP_CBRA:
1969 case OP_SCBRA:
1970 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1971 {
1972 count = 1;
1973 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1974 }
1975 cc += 1 + LINK_SIZE + IMM2_SIZE;
1976 break;
1977
1978 case OP_CBRAPOS:
1979 case OP_SCBRAPOS:
1980 count = 2;
1981 srcw[0] = PRIVATE_DATA(cc);
1982 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1983 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1984 cc += 1 + LINK_SIZE + IMM2_SIZE;
1985 break;
1986
1987 case OP_COND:
1988 /* Might be a hidden SCOND. */
1989 alternative = cc + GET(cc, 1);
1990 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1991 {
1992 count = 1;
1993 srcw[0] = PRIVATE_DATA(cc);
1994 SLJIT_ASSERT(srcw[0] != 0);
1995 }
1996 cc += 1 + LINK_SIZE;
1997 break;
1998
1999 CASE_ITERATOR_PRIVATE_DATA_1
2000 if (PRIVATE_DATA(cc))
2001 {
2002 count = 1;
2003 srcw[0] = PRIVATE_DATA(cc);
2004 }
2005 cc += 2;
2006 #ifdef SUPPORT_UTF
2007 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2008 #endif
2009 break;
2010
2011 CASE_ITERATOR_PRIVATE_DATA_2A
2012 if (PRIVATE_DATA(cc))
2013 {
2014 count = 2;
2015 srcw[0] = PRIVATE_DATA(cc);
2016 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2017 }
2018 cc += 2;
2019 #ifdef SUPPORT_UTF
2020 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2021 #endif
2022 break;
2023
2024 CASE_ITERATOR_PRIVATE_DATA_2B
2025 if (PRIVATE_DATA(cc))
2026 {
2027 count = 2;
2028 srcw[0] = PRIVATE_DATA(cc);
2029 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2030 }
2031 cc += 2 + IMM2_SIZE;
2032 #ifdef SUPPORT_UTF
2033 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2034 #endif
2035 break;
2036
2037 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2038 if (PRIVATE_DATA(cc))
2039 {
2040 count = 1;
2041 srcw[0] = PRIVATE_DATA(cc);
2042 }
2043 cc += 1;
2044 break;
2045
2046 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2047 if (PRIVATE_DATA(cc))
2048 {
2049 count = 2;
2050 srcw[0] = PRIVATE_DATA(cc);
2051 srcw[1] = srcw[0] + sizeof(sljit_sw);
2052 }
2053 cc += 1;
2054 break;
2055
2056 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2057 if (PRIVATE_DATA(cc))
2058 {
2059 count = 2;
2060 srcw[0] = PRIVATE_DATA(cc);
2061 srcw[1] = srcw[0] + sizeof(sljit_sw);
2062 }
2063 cc += 1 + IMM2_SIZE;
2064 break;
2065
2066 case OP_CLASS:
2067 case OP_NCLASS:
2068 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2069 case OP_XCLASS:
2070 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
2071 #else
2072 size = 1 + 32 / (int)sizeof(pcre_uchar);
2073 #endif
2074 if (PRIVATE_DATA(cc))
2075 switch(get_class_iterator_size(cc + size))
2076 {
2077 case 1:
2078 count = 1;
2079 srcw[0] = PRIVATE_DATA(cc);
2080 break;
2081
2082 case 2:
2083 count = 2;
2084 srcw[0] = PRIVATE_DATA(cc);
2085 srcw[1] = srcw[0] + sizeof(sljit_sw);
2086 break;
2087
2088 default:
2089 SLJIT_UNREACHABLE();
2090 break;
2091 }
2092 cc += size;
2093 break;
2094
2095 default:
2096 cc = next_opcode(common, cc);
2097 SLJIT_ASSERT(cc != NULL);
2098 break;
2099 }
2100
2101 while (count > 0)
2102 {
2103 count--;
2104 if (save)
2105 {
2106 if (tmp1next)
2107 {
2108 if (!tmp1empty)
2109 {
2110 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2111 stackptr += sizeof(sljit_sw);
2112 }
2113 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2114 tmp1empty = FALSE;
2115 tmp1next = FALSE;
2116 }
2117 else
2118 {
2119 if (!tmp2empty)
2120 {
2121 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2122 stackptr += sizeof(sljit_sw);
2123 }
2124 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2125 tmp2empty = FALSE;
2126 tmp1next = TRUE;
2127 }
2128 }
2129 else
2130 {
2131 if (tmp1next)
2132 {
2133 SLJIT_ASSERT(!tmp1empty);
2134 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
2135 tmp1empty = stackptr >= stacktop;
2136 if (!tmp1empty)
2137 {
2138 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2139 stackptr += sizeof(sljit_sw);
2140 }
2141 tmp1next = FALSE;
2142 }
2143 else
2144 {
2145 SLJIT_ASSERT(!tmp2empty);
2146 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
2147 tmp2empty = stackptr >= stacktop;
2148 if (!tmp2empty)
2149 {
2150 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2151 stackptr += sizeof(sljit_sw);
2152 }
2153 tmp1next = TRUE;
2154 }
2155 }
2156 }
2157 }
2158 while (status != end);
2159
2160 if (save)
2161 {
2162 if (tmp1next)
2163 {
2164 if (!tmp1empty)
2165 {
2166 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2167 stackptr += sizeof(sljit_sw);
2168 }
2169 if (!tmp2empty)
2170 {
2171 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2172 stackptr += sizeof(sljit_sw);
2173 }
2174 }
2175 else
2176 {
2177 if (!tmp2empty)
2178 {
2179 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2180 stackptr += sizeof(sljit_sw);
2181 }
2182 if (!tmp1empty)
2183 {
2184 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2185 stackptr += sizeof(sljit_sw);
2186 }
2187 }
2188 }
2189 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
2190 }
2191
set_then_offsets(compiler_common * common,pcre_uchar * cc,sljit_u8 * current_offset)2192 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, sljit_u8 *current_offset)
2193 {
2194 pcre_uchar *end = bracketend(cc);
2195 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2196
2197 /* Assert captures then. */
2198 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2199 current_offset = NULL;
2200 /* Conditional block does not. */
2201 if (*cc == OP_COND || *cc == OP_SCOND)
2202 has_alternatives = FALSE;
2203
2204 cc = next_opcode(common, cc);
2205 if (has_alternatives)
2206 current_offset = common->then_offsets + (cc - common->start);
2207
2208 while (cc < end)
2209 {
2210 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2211 cc = set_then_offsets(common, cc, current_offset);
2212 else
2213 {
2214 if (*cc == OP_ALT && has_alternatives)
2215 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2216 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2217 *current_offset = 1;
2218 cc = next_opcode(common, cc);
2219 }
2220 }
2221
2222 return end;
2223 }
2224
2225 #undef CASE_ITERATOR_PRIVATE_DATA_1
2226 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2227 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2228 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2229 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2230 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2231
is_powerof2(unsigned int value)2232 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2233 {
2234 return (value & (value - 1)) == 0;
2235 }
2236
set_jumps(jump_list * list,struct sljit_label * label)2237 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2238 {
2239 while (list)
2240 {
2241 /* sljit_set_label is clever enough to do nothing
2242 if either the jump or the label is NULL. */
2243 SET_LABEL(list->jump, label);
2244 list = list->next;
2245 }
2246 }
2247
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)2248 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2249 {
2250 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2251 if (list_item)
2252 {
2253 list_item->next = *list;
2254 list_item->jump = jump;
2255 *list = list_item;
2256 }
2257 }
2258
add_stub(compiler_common * common,struct sljit_jump * start)2259 static void add_stub(compiler_common *common, struct sljit_jump *start)
2260 {
2261 DEFINE_COMPILER;
2262 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2263
2264 if (list_item)
2265 {
2266 list_item->start = start;
2267 list_item->quit = LABEL();
2268 list_item->next = common->stubs;
2269 common->stubs = list_item;
2270 }
2271 }
2272
flush_stubs(compiler_common * common)2273 static void flush_stubs(compiler_common *common)
2274 {
2275 DEFINE_COMPILER;
2276 stub_list *list_item = common->stubs;
2277
2278 while (list_item)
2279 {
2280 JUMPHERE(list_item->start);
2281 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2282 JUMPTO(SLJIT_JUMP, list_item->quit);
2283 list_item = list_item->next;
2284 }
2285 common->stubs = NULL;
2286 }
2287
add_label_addr(compiler_common * common,sljit_uw * update_addr)2288 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2289 {
2290 DEFINE_COMPILER;
2291 label_addr_list *label_addr;
2292
2293 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2294 if (label_addr == NULL)
2295 return;
2296 label_addr->label = LABEL();
2297 label_addr->update_addr = update_addr;
2298 label_addr->next = common->label_addrs;
2299 common->label_addrs = label_addr;
2300 }
2301
count_match(compiler_common * common)2302 static SLJIT_INLINE void count_match(compiler_common *common)
2303 {
2304 DEFINE_COMPILER;
2305
2306 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2307 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2308 }
2309
allocate_stack(compiler_common * common,int size)2310 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2311 {
2312 /* May destroy all locals and registers except TMP2. */
2313 DEFINE_COMPILER;
2314
2315 SLJIT_ASSERT(size > 0);
2316 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2317 #ifdef DESTROY_REGISTERS
2318 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2319 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2320 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2321 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2322 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2323 #endif
2324 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
2325 }
2326
free_stack(compiler_common * common,int size)2327 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2328 {
2329 DEFINE_COMPILER;
2330
2331 SLJIT_ASSERT(size > 0);
2332 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2333 }
2334
allocate_read_only_data(compiler_common * common,sljit_uw size)2335 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2336 {
2337 DEFINE_COMPILER;
2338 sljit_uw *result;
2339
2340 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2341 return NULL;
2342
2343 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2344 if (SLJIT_UNLIKELY(result == NULL))
2345 {
2346 sljit_set_compiler_memory_error(compiler);
2347 return NULL;
2348 }
2349
2350 *(void**)result = common->read_only_data_head;
2351 common->read_only_data_head = (void *)result;
2352 return result + 1;
2353 }
2354
free_read_only_data(void * current,void * allocator_data)2355 static void free_read_only_data(void *current, void *allocator_data)
2356 {
2357 void *next;
2358
2359 SLJIT_UNUSED_ARG(allocator_data);
2360
2361 while (current != NULL)
2362 {
2363 next = *(void**)current;
2364 SLJIT_FREE(current, allocator_data);
2365 current = next;
2366 }
2367 }
2368
reset_ovector(compiler_common * common,int length)2369 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2370 {
2371 DEFINE_COMPILER;
2372 struct sljit_label *loop;
2373 int i;
2374
2375 /* At this point we can freely use all temporary registers. */
2376 SLJIT_ASSERT(length > 1);
2377 /* TMP1 returns with begin - 1. */
2378 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2379 if (length < 8)
2380 {
2381 for (i = 1; i < length; i++)
2382 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2383 }
2384 else
2385 {
2386 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2387 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2388 loop = LABEL();
2389 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2390 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2391 JUMPTO(SLJIT_NOT_ZERO, loop);
2392 }
2393 }
2394
reset_fast_fail(compiler_common * common)2395 static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2396 {
2397 DEFINE_COMPILER;
2398 sljit_s32 i;
2399
2400 SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
2401
2402 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2403 for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2404 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2405 }
2406
do_reset_match(compiler_common * common,int length)2407 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2408 {
2409 DEFINE_COMPILER;
2410 struct sljit_label *loop;
2411 int i;
2412
2413 SLJIT_ASSERT(length > 1);
2414 /* OVECTOR(1) contains the "string begin - 1" constant. */
2415 if (length > 2)
2416 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2417 if (length < 8)
2418 {
2419 for (i = 2; i < length; i++)
2420 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2421 }
2422 else
2423 {
2424 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2425 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2426 loop = LABEL();
2427 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2428 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2429 JUMPTO(SLJIT_NOT_ZERO, loop);
2430 }
2431
2432 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2433 if (common->mark_ptr != 0)
2434 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2435 if (common->control_head_ptr != 0)
2436 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2437 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2438 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2439 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2440 }
2441
do_search_mark(sljit_sw * current,const pcre_uchar * skip_arg)2442 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2443 {
2444 while (current != NULL)
2445 {
2446 switch (current[1])
2447 {
2448 case type_then_trap:
2449 break;
2450
2451 case type_mark:
2452 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[2]) == 0)
2453 return current[3];
2454 break;
2455
2456 default:
2457 SLJIT_UNREACHABLE();
2458 break;
2459 }
2460 SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
2461 current = (sljit_sw*)current[0];
2462 }
2463 return -1;
2464 }
2465
copy_ovector(compiler_common * common,int topbracket)2466 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2467 {
2468 DEFINE_COMPILER;
2469 struct sljit_label *loop;
2470 struct sljit_jump *early_quit;
2471
2472 /* At this point we can freely use all registers. */
2473 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2474 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2475
2476 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2477 if (common->mark_ptr != 0)
2478 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2479 OP1(SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2480 if (common->mark_ptr != 0)
2481 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2482 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2483 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2484 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2485 /* Unlikely, but possible */
2486 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2487 loop = LABEL();
2488 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2489 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2490 /* Copy the integer value to the output buffer */
2491 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2492 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2493 #endif
2494 OP1(SLJIT_MOVU_S32, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2495 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2496 JUMPTO(SLJIT_NOT_ZERO, loop);
2497 JUMPHERE(early_quit);
2498
2499 /* Calculate the return value, which is the maximum ovector value. */
2500 if (topbracket > 1)
2501 {
2502 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2503 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2504
2505 /* OVECTOR(0) is never equal to SLJIT_S2. */
2506 loop = LABEL();
2507 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2508 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2509 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2510 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2511 }
2512 else
2513 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2514 }
2515
return_with_partial_match(compiler_common * common,struct sljit_label * quit)2516 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2517 {
2518 DEFINE_COMPILER;
2519 struct sljit_jump *jump;
2520
2521 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2522 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2523 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2524
2525 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2526 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2527 OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2528 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2529
2530 /* Store match begin and end. */
2531 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2532 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2533
2534 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2535 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2536 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2537 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2538 #endif
2539 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2540 JUMPHERE(jump);
2541
2542 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2543 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2544 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2545 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2546 #endif
2547 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2548
2549 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2550 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2551 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2552 #endif
2553 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2554
2555 JUMPTO(SLJIT_JUMP, quit);
2556 }
2557
check_start_used_ptr(compiler_common * common)2558 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2559 {
2560 /* May destroy TMP1. */
2561 DEFINE_COMPILER;
2562 struct sljit_jump *jump;
2563
2564 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2565 {
2566 /* The value of -1 must be kept for start_used_ptr! */
2567 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2568 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2569 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2570 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2571 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2572 JUMPHERE(jump);
2573 }
2574 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2575 {
2576 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2577 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2578 JUMPHERE(jump);
2579 }
2580 }
2581
char_has_othercase(compiler_common * common,pcre_uchar * cc)2582 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2583 {
2584 /* Detects if the character has an othercase. */
2585 unsigned int c;
2586
2587 #ifdef SUPPORT_UTF
2588 if (common->utf)
2589 {
2590 GETCHAR(c, cc);
2591 if (c > 127)
2592 {
2593 #ifdef SUPPORT_UCP
2594 return c != UCD_OTHERCASE(c);
2595 #else
2596 return FALSE;
2597 #endif
2598 }
2599 #ifndef COMPILE_PCRE8
2600 return common->fcc[c] != c;
2601 #endif
2602 }
2603 else
2604 #endif
2605 c = *cc;
2606 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2607 }
2608
char_othercase(compiler_common * common,unsigned int c)2609 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2610 {
2611 /* Returns with the othercase. */
2612 #ifdef SUPPORT_UTF
2613 if (common->utf && c > 127)
2614 {
2615 #ifdef SUPPORT_UCP
2616 return UCD_OTHERCASE(c);
2617 #else
2618 return c;
2619 #endif
2620 }
2621 #endif
2622 return TABLE_GET(c, common->fcc, c);
2623 }
2624
char_get_othercase_bit(compiler_common * common,pcre_uchar * cc)2625 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2626 {
2627 /* Detects if the character and its othercase has only 1 bit difference. */
2628 unsigned int c, oc, bit;
2629 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2630 int n;
2631 #endif
2632
2633 #ifdef SUPPORT_UTF
2634 if (common->utf)
2635 {
2636 GETCHAR(c, cc);
2637 if (c <= 127)
2638 oc = common->fcc[c];
2639 else
2640 {
2641 #ifdef SUPPORT_UCP
2642 oc = UCD_OTHERCASE(c);
2643 #else
2644 oc = c;
2645 #endif
2646 }
2647 }
2648 else
2649 {
2650 c = *cc;
2651 oc = TABLE_GET(c, common->fcc, c);
2652 }
2653 #else
2654 c = *cc;
2655 oc = TABLE_GET(c, common->fcc, c);
2656 #endif
2657
2658 SLJIT_ASSERT(c != oc);
2659
2660 bit = c ^ oc;
2661 /* Optimized for English alphabet. */
2662 if (c <= 127 && bit == 0x20)
2663 return (0 << 8) | 0x20;
2664
2665 /* Since c != oc, they must have at least 1 bit difference. */
2666 if (!is_powerof2(bit))
2667 return 0;
2668
2669 #if defined COMPILE_PCRE8
2670
2671 #ifdef SUPPORT_UTF
2672 if (common->utf && c > 127)
2673 {
2674 n = GET_EXTRALEN(*cc);
2675 while ((bit & 0x3f) == 0)
2676 {
2677 n--;
2678 bit >>= 6;
2679 }
2680 return (n << 8) | bit;
2681 }
2682 #endif /* SUPPORT_UTF */
2683 return (0 << 8) | bit;
2684
2685 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2686
2687 #ifdef SUPPORT_UTF
2688 if (common->utf && c > 65535)
2689 {
2690 if (bit >= (1 << 10))
2691 bit >>= 10;
2692 else
2693 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2694 }
2695 #endif /* SUPPORT_UTF */
2696 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2697
2698 #endif /* COMPILE_PCRE[8|16|32] */
2699 }
2700
check_partial(compiler_common * common,BOOL force)2701 static void check_partial(compiler_common *common, BOOL force)
2702 {
2703 /* Checks whether a partial matching is occurred. Does not modify registers. */
2704 DEFINE_COMPILER;
2705 struct sljit_jump *jump = NULL;
2706
2707 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2708
2709 if (common->mode == JIT_COMPILE)
2710 return;
2711
2712 if (!force)
2713 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2714 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2715 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2716
2717 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2718 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2719 else
2720 {
2721 if (common->partialmatchlabel != NULL)
2722 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2723 else
2724 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2725 }
2726
2727 if (jump != NULL)
2728 JUMPHERE(jump);
2729 }
2730
check_str_end(compiler_common * common,jump_list ** end_reached)2731 static void check_str_end(compiler_common *common, jump_list **end_reached)
2732 {
2733 /* Does not affect registers. Usually used in a tight spot. */
2734 DEFINE_COMPILER;
2735 struct sljit_jump *jump;
2736
2737 if (common->mode == JIT_COMPILE)
2738 {
2739 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2740 return;
2741 }
2742
2743 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2744 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2745 {
2746 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2747 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2748 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2749 }
2750 else
2751 {
2752 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2753 if (common->partialmatchlabel != NULL)
2754 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2755 else
2756 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2757 }
2758 JUMPHERE(jump);
2759 }
2760
detect_partial_match(compiler_common * common,jump_list ** backtracks)2761 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2762 {
2763 DEFINE_COMPILER;
2764 struct sljit_jump *jump;
2765
2766 if (common->mode == JIT_COMPILE)
2767 {
2768 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2769 return;
2770 }
2771
2772 /* Partial matching mode. */
2773 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2774 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2775 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2776 {
2777 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2778 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2779 }
2780 else
2781 {
2782 if (common->partialmatchlabel != NULL)
2783 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2784 else
2785 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2786 }
2787 JUMPHERE(jump);
2788 }
2789
peek_char(compiler_common * common,sljit_u32 max)2790 static void peek_char(compiler_common *common, sljit_u32 max)
2791 {
2792 /* Reads the character into TMP1, keeps STR_PTR.
2793 Does not check STR_END. TMP2 Destroyed. */
2794 DEFINE_COMPILER;
2795 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2796 struct sljit_jump *jump;
2797 #endif
2798
2799 SLJIT_UNUSED_ARG(max);
2800
2801 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2802 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2803 if (common->utf)
2804 {
2805 if (max < 128) return;
2806
2807 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2808 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2809 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2810 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2811 JUMPHERE(jump);
2812 }
2813 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2814
2815 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2816 if (common->utf)
2817 {
2818 if (max < 0xd800) return;
2819
2820 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2821 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2822 /* TMP2 contains the high surrogate. */
2823 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2824 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2825 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2826 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2827 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2828 JUMPHERE(jump);
2829 }
2830 #endif
2831 }
2832
2833 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2834
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)2835 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
2836 {
2837 /* Tells whether the character codes below 128 are enough
2838 to determine a match. */
2839 const sljit_u8 value = nclass ? 0xff : 0;
2840 const sljit_u8 *end = bitset + 32;
2841
2842 bitset += 16;
2843 do
2844 {
2845 if (*bitset++ != value)
2846 return FALSE;
2847 }
2848 while (bitset < end);
2849 return TRUE;
2850 }
2851
read_char7_type(compiler_common * common,BOOL full_read)2852 static void read_char7_type(compiler_common *common, BOOL full_read)
2853 {
2854 /* Reads the precise character type of a character into TMP1, if the character
2855 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2856 full_read argument tells whether characters above max are accepted or not. */
2857 DEFINE_COMPILER;
2858 struct sljit_jump *jump;
2859
2860 SLJIT_ASSERT(common->utf);
2861
2862 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2863 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2864
2865 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2866
2867 if (full_read)
2868 {
2869 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2870 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2871 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2872 JUMPHERE(jump);
2873 }
2874 }
2875
2876 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2877
read_char_range(compiler_common * common,sljit_u32 min,sljit_u32 max,BOOL update_str_ptr)2878 static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr)
2879 {
2880 /* Reads the precise value of a character into TMP1, if the character is
2881 between min and max (c >= min && c <= max). Otherwise it returns with a value
2882 outside the range. Does not check STR_END. */
2883 DEFINE_COMPILER;
2884 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2885 struct sljit_jump *jump;
2886 #endif
2887 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2888 struct sljit_jump *jump2;
2889 #endif
2890
2891 SLJIT_UNUSED_ARG(update_str_ptr);
2892 SLJIT_UNUSED_ARG(min);
2893 SLJIT_UNUSED_ARG(max);
2894 SLJIT_ASSERT(min <= max);
2895
2896 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2897 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2898
2899 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2900 if (common->utf)
2901 {
2902 if (max < 128 && !update_str_ptr) return;
2903
2904 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2905 if (min >= 0x10000)
2906 {
2907 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2908 if (update_str_ptr)
2909 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2910 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2911 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2912 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2913 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2914 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2915 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2916 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2917 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2918 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2919 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2920 if (!update_str_ptr)
2921 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2922 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2923 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2924 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2925 JUMPHERE(jump2);
2926 if (update_str_ptr)
2927 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2928 }
2929 else if (min >= 0x800 && max <= 0xffff)
2930 {
2931 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2932 if (update_str_ptr)
2933 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2934 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2935 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2936 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2937 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2938 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2939 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2940 if (!update_str_ptr)
2941 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2942 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2943 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2944 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2945 JUMPHERE(jump2);
2946 if (update_str_ptr)
2947 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2948 }
2949 else if (max >= 0x800)
2950 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2951 else if (max < 128)
2952 {
2953 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2954 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2955 }
2956 else
2957 {
2958 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2959 if (!update_str_ptr)
2960 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2961 else
2962 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2963 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2964 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2965 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2966 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2967 if (update_str_ptr)
2968 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2969 }
2970 JUMPHERE(jump);
2971 }
2972 #endif
2973
2974 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2975 if (common->utf)
2976 {
2977 if (max >= 0x10000)
2978 {
2979 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2980 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2981 /* TMP2 contains the high surrogate. */
2982 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2983 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2984 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2985 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2986 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2987 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2988 JUMPHERE(jump);
2989 return;
2990 }
2991
2992 if (max < 0xd800 && !update_str_ptr) return;
2993
2994 /* Skip low surrogate if necessary. */
2995 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2996 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2997 if (update_str_ptr)
2998 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2999 if (max >= 0xd800)
3000 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3001 JUMPHERE(jump);
3002 }
3003 #endif
3004 }
3005
read_char(compiler_common * common)3006 static SLJIT_INLINE void read_char(compiler_common *common)
3007 {
3008 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
3009 }
3010
read_char8_type(compiler_common * common,BOOL update_str_ptr)3011 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
3012 {
3013 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
3014 DEFINE_COMPILER;
3015 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3016 struct sljit_jump *jump;
3017 #endif
3018 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3019 struct sljit_jump *jump2;
3020 #endif
3021
3022 SLJIT_UNUSED_ARG(update_str_ptr);
3023
3024 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3025 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3026
3027 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3028 if (common->utf)
3029 {
3030 /* This can be an extra read in some situations, but hopefully
3031 it is needed in most cases. */
3032 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3033 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
3034 if (!update_str_ptr)
3035 {
3036 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3037 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3038 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3039 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3040 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3041 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3042 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3043 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3044 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3045 JUMPHERE(jump2);
3046 }
3047 else
3048 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3049 JUMPHERE(jump);
3050 return;
3051 }
3052 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
3053
3054 #if !defined COMPILE_PCRE8
3055 /* The ctypes array contains only 256 values. */
3056 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3057 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3058 #endif
3059 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3060 #if !defined COMPILE_PCRE8
3061 JUMPHERE(jump);
3062 #endif
3063
3064 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
3065 if (common->utf && update_str_ptr)
3066 {
3067 /* Skip low surrogate if necessary. */
3068 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3069 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3070 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3071 JUMPHERE(jump);
3072 }
3073 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
3074 }
3075
skip_char_back(compiler_common * common)3076 static void skip_char_back(compiler_common *common)
3077 {
3078 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
3079 DEFINE_COMPILER;
3080 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3081 #if defined COMPILE_PCRE8
3082 struct sljit_label *label;
3083
3084 if (common->utf)
3085 {
3086 label = LABEL();
3087 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3088 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3089 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3090 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3091 return;
3092 }
3093 #elif defined COMPILE_PCRE16
3094 if (common->utf)
3095 {
3096 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3097 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3098 /* Skip low surrogate if necessary. */
3099 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3100 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3101 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3102 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3103 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3104 return;
3105 }
3106 #endif /* COMPILE_PCRE[8|16] */
3107 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3108 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3109 }
3110
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)3111 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3112 {
3113 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3114 DEFINE_COMPILER;
3115 struct sljit_jump *jump;
3116
3117 if (nltype == NLTYPE_ANY)
3118 {
3119 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3120 sljit_set_current_flags(compiler, SLJIT_SET_Z);
3121 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3122 }
3123 else if (nltype == NLTYPE_ANYCRLF)
3124 {
3125 if (jumpifmatch)
3126 {
3127 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3128 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3129 }
3130 else
3131 {
3132 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3133 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3134 JUMPHERE(jump);
3135 }
3136 }
3137 else
3138 {
3139 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3140 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3141 }
3142 }
3143
3144 #ifdef SUPPORT_UTF
3145
3146 #if defined COMPILE_PCRE8
do_utfreadchar(compiler_common * common)3147 static void do_utfreadchar(compiler_common *common)
3148 {
3149 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3150 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
3151 DEFINE_COMPILER;
3152 struct sljit_jump *jump;
3153
3154 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3155 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3156 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3157 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3158 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3159 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3160
3161 /* Searching for the first zero. */
3162 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3163 jump = JUMP(SLJIT_NOT_ZERO);
3164 /* Two byte sequence. */
3165 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3166 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
3167 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3168
3169 JUMPHERE(jump);
3170 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3171 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3172 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3173 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3174 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3175
3176 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3177 jump = JUMP(SLJIT_NOT_ZERO);
3178 /* Three byte sequence. */
3179 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3180 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
3181 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3182
3183 /* Four byte sequence. */
3184 JUMPHERE(jump);
3185 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3186 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3187 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3188 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3189 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3190 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3191 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
3192 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3193 }
3194
do_utfreadchar16(compiler_common * common)3195 static void do_utfreadchar16(compiler_common *common)
3196 {
3197 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3198 of the character (>= 0xc0). Return value in TMP1. */
3199 DEFINE_COMPILER;
3200 struct sljit_jump *jump;
3201
3202 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3203 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3204 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3205 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3206 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3207 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3208
3209 /* Searching for the first zero. */
3210 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3211 jump = JUMP(SLJIT_NOT_ZERO);
3212 /* Two byte sequence. */
3213 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3214 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3215
3216 JUMPHERE(jump);
3217 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3218 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
3219 /* This code runs only in 8 bit mode. No need to shift the value. */
3220 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3221 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3222 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3223 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3224 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3225 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3226 /* Three byte sequence. */
3227 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3228 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3229 }
3230
do_utfreadtype8(compiler_common * common)3231 static void do_utfreadtype8(compiler_common *common)
3232 {
3233 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3234 of the character (>= 0xc0). Return value in TMP1. */
3235 DEFINE_COMPILER;
3236 struct sljit_jump *jump;
3237 struct sljit_jump *compare;
3238
3239 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3240
3241 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3242 jump = JUMP(SLJIT_NOT_ZERO);
3243 /* Two byte sequence. */
3244 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3245 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3246 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3247 /* The upper 5 bits are known at this point. */
3248 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3249 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3250 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3251 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3252 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3253 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3254
3255 JUMPHERE(compare);
3256 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3257 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3258
3259 /* We only have types for characters less than 256. */
3260 JUMPHERE(jump);
3261 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3262 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3263 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3264 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3265 }
3266
3267 #endif /* COMPILE_PCRE8 */
3268
3269 #endif /* SUPPORT_UTF */
3270
3271 #ifdef SUPPORT_UCP
3272
3273 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3274 #define UCD_BLOCK_MASK 127
3275 #define UCD_BLOCK_SHIFT 7
3276
do_getucd(compiler_common * common)3277 static void do_getucd(compiler_common *common)
3278 {
3279 /* Search the UCD record for the character comes in TMP1.
3280 Returns chartype in TMP1 and UCD offset in TMP2. */
3281 DEFINE_COMPILER;
3282 #ifdef COMPILE_PCRE32
3283 struct sljit_jump *jump;
3284 #endif
3285
3286 #if defined SLJIT_DEBUG && SLJIT_DEBUG
3287 /* dummy_ucd_record */
3288 const ucd_record *record = GET_UCD(INVALID_UTF_CHAR);
3289 SLJIT_ASSERT(record->script == ucp_Common && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
3290 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
3291 #endif
3292
3293 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3294
3295 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3296
3297 #ifdef COMPILE_PCRE32
3298 if (!common->utf)
3299 {
3300 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1);
3301 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
3302 JUMPHERE(jump);
3303 }
3304 #endif
3305
3306 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3307 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3308 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3309 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3310 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3311 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3312 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3313 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3314 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3315 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3316 }
3317 #endif
3318
mainloop_entry(compiler_common * common,BOOL hascrorlf)3319 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf)
3320 {
3321 DEFINE_COMPILER;
3322 struct sljit_label *mainloop;
3323 struct sljit_label *newlinelabel = NULL;
3324 struct sljit_jump *start;
3325 struct sljit_jump *end = NULL;
3326 struct sljit_jump *end2 = NULL;
3327 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3328 struct sljit_jump *singlechar;
3329 #endif
3330 jump_list *newline = NULL;
3331 BOOL newlinecheck = FALSE;
3332 BOOL readuchar = FALSE;
3333
3334 if (!(hascrorlf || (common->match_end_ptr != 0)) &&
3335 (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3336 newlinecheck = TRUE;
3337
3338 if (common->match_end_ptr != 0)
3339 {
3340 /* Search for the end of the first line. */
3341 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3342
3343 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3344 {
3345 mainloop = LABEL();
3346 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3347 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3348 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3349 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3350 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3351 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3352 JUMPHERE(end);
3353 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3354 }
3355 else
3356 {
3357 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3358 mainloop = LABEL();
3359 /* Continual stores does not cause data dependency. */
3360 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3361 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3362 check_newlinechar(common, common->nltype, &newline, TRUE);
3363 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3364 JUMPHERE(end);
3365 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3366 set_jumps(newline, LABEL());
3367 }
3368
3369 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3370 }
3371
3372 start = JUMP(SLJIT_JUMP);
3373
3374 if (newlinecheck)
3375 {
3376 newlinelabel = LABEL();
3377 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3378 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3379 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3380 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3381 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3382 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3383 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3384 #endif
3385 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3386 end2 = JUMP(SLJIT_JUMP);
3387 }
3388
3389 mainloop = LABEL();
3390
3391 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3392 #ifdef SUPPORT_UTF
3393 if (common->utf) readuchar = TRUE;
3394 #endif
3395 if (newlinecheck) readuchar = TRUE;
3396
3397 if (readuchar)
3398 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3399
3400 if (newlinecheck)
3401 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3402
3403 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3404 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3405 #if defined COMPILE_PCRE8
3406 if (common->utf)
3407 {
3408 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3409 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3410 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3411 JUMPHERE(singlechar);
3412 }
3413 #elif defined COMPILE_PCRE16
3414 if (common->utf)
3415 {
3416 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3417 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3418 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3419 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3420 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3421 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3422 JUMPHERE(singlechar);
3423 }
3424 #endif /* COMPILE_PCRE[8|16] */
3425 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3426 JUMPHERE(start);
3427
3428 if (newlinecheck)
3429 {
3430 JUMPHERE(end);
3431 JUMPHERE(end2);
3432 }
3433
3434 return mainloop;
3435 }
3436
3437 #define MAX_N_CHARS 16
3438 #define MAX_DIFF_CHARS 6
3439
add_prefix_char(pcre_uchar chr,pcre_uchar * chars)3440 static SLJIT_INLINE void add_prefix_char(pcre_uchar chr, pcre_uchar *chars)
3441 {
3442 pcre_uchar i, len;
3443
3444 len = chars[0];
3445 if (len == 255)
3446 return;
3447
3448 if (len == 0)
3449 {
3450 chars[0] = 1;
3451 chars[1] = chr;
3452 return;
3453 }
3454
3455 for (i = len; i > 0; i--)
3456 if (chars[i] == chr)
3457 return;
3458
3459 if (len >= MAX_DIFF_CHARS - 1)
3460 {
3461 chars[0] = 255;
3462 return;
3463 }
3464
3465 len++;
3466 chars[len] = chr;
3467 chars[0] = len;
3468 }
3469
scan_prefix(compiler_common * common,pcre_uchar * cc,pcre_uchar * chars,int max_chars,sljit_u32 * rec_count)3470 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uchar *chars, int max_chars, sljit_u32 *rec_count)
3471 {
3472 /* Recursive function, which scans prefix literals. */
3473 BOOL last, any, class, caseless;
3474 int len, repeat, len_save, consumed = 0;
3475 sljit_u32 chr; /* Any unicode character. */
3476 sljit_u8 *bytes, *bytes_end, byte;
3477 pcre_uchar *alternative, *cc_save, *oc;
3478 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3479 pcre_uchar othercase[8];
3480 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3481 pcre_uchar othercase[2];
3482 #else
3483 pcre_uchar othercase[1];
3484 #endif
3485
3486 repeat = 1;
3487 while (TRUE)
3488 {
3489 if (*rec_count == 0)
3490 return 0;
3491 (*rec_count)--;
3492
3493 last = TRUE;
3494 any = FALSE;
3495 class = FALSE;
3496 caseless = FALSE;
3497
3498 switch (*cc)
3499 {
3500 case OP_CHARI:
3501 caseless = TRUE;
3502 case OP_CHAR:
3503 last = FALSE;
3504 cc++;
3505 break;
3506
3507 case OP_SOD:
3508 case OP_SOM:
3509 case OP_SET_SOM:
3510 case OP_NOT_WORD_BOUNDARY:
3511 case OP_WORD_BOUNDARY:
3512 case OP_EODN:
3513 case OP_EOD:
3514 case OP_CIRC:
3515 case OP_CIRCM:
3516 case OP_DOLL:
3517 case OP_DOLLM:
3518 /* Zero width assertions. */
3519 cc++;
3520 continue;
3521
3522 case OP_ASSERT:
3523 case OP_ASSERT_NOT:
3524 case OP_ASSERTBACK:
3525 case OP_ASSERTBACK_NOT:
3526 cc = bracketend(cc);
3527 continue;
3528
3529 case OP_PLUSI:
3530 case OP_MINPLUSI:
3531 case OP_POSPLUSI:
3532 caseless = TRUE;
3533 case OP_PLUS:
3534 case OP_MINPLUS:
3535 case OP_POSPLUS:
3536 cc++;
3537 break;
3538
3539 case OP_EXACTI:
3540 caseless = TRUE;
3541 case OP_EXACT:
3542 repeat = GET2(cc, 1);
3543 last = FALSE;
3544 cc += 1 + IMM2_SIZE;
3545 break;
3546
3547 case OP_QUERYI:
3548 case OP_MINQUERYI:
3549 case OP_POSQUERYI:
3550 caseless = TRUE;
3551 case OP_QUERY:
3552 case OP_MINQUERY:
3553 case OP_POSQUERY:
3554 len = 1;
3555 cc++;
3556 #ifdef SUPPORT_UTF
3557 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3558 #endif
3559 max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
3560 if (max_chars == 0)
3561 return consumed;
3562 last = FALSE;
3563 break;
3564
3565 case OP_KET:
3566 cc += 1 + LINK_SIZE;
3567 continue;
3568
3569 case OP_ALT:
3570 cc += GET(cc, 1);
3571 continue;
3572
3573 case OP_ONCE:
3574 case OP_ONCE_NC:
3575 case OP_BRA:
3576 case OP_BRAPOS:
3577 case OP_CBRA:
3578 case OP_CBRAPOS:
3579 alternative = cc + GET(cc, 1);
3580 while (*alternative == OP_ALT)
3581 {
3582 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
3583 if (max_chars == 0)
3584 return consumed;
3585 alternative += GET(alternative, 1);
3586 }
3587
3588 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3589 cc += IMM2_SIZE;
3590 cc += 1 + LINK_SIZE;
3591 continue;
3592
3593 case OP_CLASS:
3594 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3595 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
3596 return consumed;
3597 #endif
3598 class = TRUE;
3599 break;
3600
3601 case OP_NCLASS:
3602 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3603 if (common->utf) return consumed;
3604 #endif
3605 class = TRUE;
3606 break;
3607
3608 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3609 case OP_XCLASS:
3610 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3611 if (common->utf) return consumed;
3612 #endif
3613 any = TRUE;
3614 cc += GET(cc, 1);
3615 break;
3616 #endif
3617
3618 case OP_DIGIT:
3619 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3620 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3621 return consumed;
3622 #endif
3623 any = TRUE;
3624 cc++;
3625 break;
3626
3627 case OP_WHITESPACE:
3628 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3629 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3630 return consumed;
3631 #endif
3632 any = TRUE;
3633 cc++;
3634 break;
3635
3636 case OP_WORDCHAR:
3637 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3638 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3639 return consumed;
3640 #endif
3641 any = TRUE;
3642 cc++;
3643 break;
3644
3645 case OP_NOT:
3646 case OP_NOTI:
3647 cc++;
3648 /* Fall through. */
3649 case OP_NOT_DIGIT:
3650 case OP_NOT_WHITESPACE:
3651 case OP_NOT_WORDCHAR:
3652 case OP_ANY:
3653 case OP_ALLANY:
3654 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3655 if (common->utf) return consumed;
3656 #endif
3657 any = TRUE;
3658 cc++;
3659 break;
3660
3661 #ifdef SUPPORT_UTF
3662 case OP_NOTPROP:
3663 case OP_PROP:
3664 #ifndef COMPILE_PCRE32
3665 if (common->utf) return consumed;
3666 #endif
3667 any = TRUE;
3668 cc += 1 + 2;
3669 break;
3670 #endif
3671
3672 case OP_TYPEEXACT:
3673 repeat = GET2(cc, 1);
3674 cc += 1 + IMM2_SIZE;
3675 continue;
3676
3677 case OP_NOTEXACT:
3678 case OP_NOTEXACTI:
3679 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3680 if (common->utf) return consumed;
3681 #endif
3682 any = TRUE;
3683 repeat = GET2(cc, 1);
3684 cc += 1 + IMM2_SIZE + 1;
3685 break;
3686
3687 default:
3688 return consumed;
3689 }
3690
3691 if (any)
3692 {
3693 do
3694 {
3695 chars[0] = 255;
3696
3697 consumed++;
3698 if (--max_chars == 0)
3699 return consumed;
3700 chars += MAX_DIFF_CHARS;
3701 }
3702 while (--repeat > 0);
3703
3704 repeat = 1;
3705 continue;
3706 }
3707
3708 if (class)
3709 {
3710 bytes = (sljit_u8*) (cc + 1);
3711 cc += 1 + 32 / sizeof(pcre_uchar);
3712
3713 switch (*cc)
3714 {
3715 case OP_CRSTAR:
3716 case OP_CRMINSTAR:
3717 case OP_CRPOSSTAR:
3718 case OP_CRQUERY:
3719 case OP_CRMINQUERY:
3720 case OP_CRPOSQUERY:
3721 max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
3722 if (max_chars == 0)
3723 return consumed;
3724 break;
3725
3726 default:
3727 case OP_CRPLUS:
3728 case OP_CRMINPLUS:
3729 case OP_CRPOSPLUS:
3730 break;
3731
3732 case OP_CRRANGE:
3733 case OP_CRMINRANGE:
3734 case OP_CRPOSRANGE:
3735 repeat = GET2(cc, 1);
3736 if (repeat <= 0)
3737 return consumed;
3738 break;
3739 }
3740
3741 do
3742 {
3743 if (bytes[31] & 0x80)
3744 chars[0] = 255;
3745 else if (chars[0] != 255)
3746 {
3747 bytes_end = bytes + 32;
3748 chr = 0;
3749 do
3750 {
3751 byte = *bytes++;
3752 SLJIT_ASSERT((chr & 0x7) == 0);
3753 if (byte == 0)
3754 chr += 8;
3755 else
3756 {
3757 do
3758 {
3759 if ((byte & 0x1) != 0)
3760 add_prefix_char(chr, chars);
3761 byte >>= 1;
3762 chr++;
3763 }
3764 while (byte != 0);
3765 chr = (chr + 7) & ~7;
3766 }
3767 }
3768 while (chars[0] != 255 && bytes < bytes_end);
3769 bytes = bytes_end - 32;
3770 }
3771
3772 consumed++;
3773 if (--max_chars == 0)
3774 return consumed;
3775 chars += MAX_DIFF_CHARS;
3776 }
3777 while (--repeat > 0);
3778
3779 switch (*cc)
3780 {
3781 case OP_CRSTAR:
3782 case OP_CRMINSTAR:
3783 case OP_CRPOSSTAR:
3784 return consumed;
3785
3786 case OP_CRQUERY:
3787 case OP_CRMINQUERY:
3788 case OP_CRPOSQUERY:
3789 cc++;
3790 break;
3791
3792 case OP_CRRANGE:
3793 case OP_CRMINRANGE:
3794 case OP_CRPOSRANGE:
3795 if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
3796 return consumed;
3797 cc += 1 + 2 * IMM2_SIZE;
3798 break;
3799 }
3800
3801 repeat = 1;
3802 continue;
3803 }
3804
3805 len = 1;
3806 #ifdef SUPPORT_UTF
3807 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3808 #endif
3809
3810 if (caseless && char_has_othercase(common, cc))
3811 {
3812 #ifdef SUPPORT_UTF
3813 if (common->utf)
3814 {
3815 GETCHAR(chr, cc);
3816 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3817 return consumed;
3818 }
3819 else
3820 #endif
3821 {
3822 chr = *cc;
3823 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3824 }
3825 }
3826 else
3827 {
3828 caseless = FALSE;
3829 othercase[0] = 0; /* Stops compiler warning - PH */
3830 }
3831
3832 len_save = len;
3833 cc_save = cc;
3834 while (TRUE)
3835 {
3836 oc = othercase;
3837 do
3838 {
3839 chr = *cc;
3840 add_prefix_char(*cc, chars);
3841
3842 if (caseless)
3843 add_prefix_char(*oc, chars);
3844
3845 len--;
3846 consumed++;
3847 if (--max_chars == 0)
3848 return consumed;
3849 chars += MAX_DIFF_CHARS;
3850 cc++;
3851 oc++;
3852 }
3853 while (len > 0);
3854
3855 if (--repeat == 0)
3856 break;
3857
3858 len = len_save;
3859 cc = cc_save;
3860 }
3861
3862 repeat = 1;
3863 if (last)
3864 return consumed;
3865 }
3866 }
3867
3868 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
3869
character_to_int32(pcre_uchar chr)3870 static sljit_s32 character_to_int32(pcre_uchar chr)
3871 {
3872 sljit_s32 value = (sljit_s32)chr;
3873 #if defined COMPILE_PCRE8
3874 #define SSE2_COMPARE_TYPE_INDEX 0
3875 return (value << 24) | (value << 16) | (value << 8) | value;
3876 #elif defined COMPILE_PCRE16
3877 #define SSE2_COMPARE_TYPE_INDEX 1
3878 return (value << 16) | value;
3879 #elif defined COMPILE_PCRE32
3880 #define SSE2_COMPARE_TYPE_INDEX 2
3881 return value;
3882 #else
3883 #error "Unsupported unit width"
3884 #endif
3885 }
3886
fast_forward_first_char2_sse2(compiler_common * common,pcre_uchar char1,pcre_uchar char2)3887 static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, pcre_uchar char1, pcre_uchar char2)
3888 {
3889 DEFINE_COMPILER;
3890 struct sljit_label *start;
3891 struct sljit_jump *quit[3];
3892 struct sljit_jump *nomatch;
3893 sljit_u8 instruction[8];
3894 sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
3895 sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
3896 sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
3897 BOOL load_twice = FALSE;
3898 pcre_uchar bit;
3899
3900 bit = char1 ^ char2;
3901 if (!is_powerof2(bit))
3902 bit = 0;
3903
3904 if ((char1 != char2) && bit == 0)
3905 load_twice = TRUE;
3906
3907 quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3908
3909 /* First part (unaligned start) */
3910
3911 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
3912
3913 SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
3914
3915 /* MOVD xmm, r/m32 */
3916 instruction[0] = 0x66;
3917 instruction[1] = 0x0f;
3918 instruction[2] = 0x6e;
3919 instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
3920 sljit_emit_op_custom(compiler, instruction, 4);
3921
3922 if (char1 != char2)
3923 {
3924 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
3925
3926 /* MOVD xmm, r/m32 */
3927 instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
3928 sljit_emit_op_custom(compiler, instruction, 4);
3929 }
3930
3931 /* PSHUFD xmm1, xmm2/m128, imm8 */
3932 instruction[2] = 0x70;
3933 instruction[3] = 0xc0 | (2 << 3) | 2;
3934 instruction[4] = 0;
3935 sljit_emit_op_custom(compiler, instruction, 5);
3936
3937 if (char1 != char2)
3938 {
3939 /* PSHUFD xmm1, xmm2/m128, imm8 */
3940 instruction[3] = 0xc0 | (3 << 3) | 3;
3941 instruction[4] = 0;
3942 sljit_emit_op_custom(compiler, instruction, 5);
3943 }
3944
3945 OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
3946 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
3947
3948 /* MOVDQA xmm1, xmm2/m128 */
3949 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3950
3951 if (str_ptr_ind < 8)
3952 {
3953 instruction[2] = 0x6f;
3954 instruction[3] = (0 << 3) | str_ptr_ind;
3955 sljit_emit_op_custom(compiler, instruction, 4);
3956
3957 if (load_twice)
3958 {
3959 instruction[3] = (1 << 3) | str_ptr_ind;
3960 sljit_emit_op_custom(compiler, instruction, 4);
3961 }
3962 }
3963 else
3964 {
3965 instruction[1] = 0x41;
3966 instruction[2] = 0x0f;
3967 instruction[3] = 0x6f;
3968 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
3969 sljit_emit_op_custom(compiler, instruction, 5);
3970
3971 if (load_twice)
3972 {
3973 instruction[4] = (1 << 3) | str_ptr_ind;
3974 sljit_emit_op_custom(compiler, instruction, 5);
3975 }
3976 instruction[1] = 0x0f;
3977 }
3978
3979 #else
3980
3981 instruction[2] = 0x6f;
3982 instruction[3] = (0 << 3) | str_ptr_ind;
3983 sljit_emit_op_custom(compiler, instruction, 4);
3984
3985 if (load_twice)
3986 {
3987 instruction[3] = (1 << 3) | str_ptr_ind;
3988 sljit_emit_op_custom(compiler, instruction, 4);
3989 }
3990
3991 #endif
3992
3993 if (bit != 0)
3994 {
3995 /* POR xmm1, xmm2/m128 */
3996 instruction[2] = 0xeb;
3997 instruction[3] = 0xc0 | (0 << 3) | 3;
3998 sljit_emit_op_custom(compiler, instruction, 4);
3999 }
4000
4001 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4002 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4003 instruction[3] = 0xc0 | (0 << 3) | 2;
4004 sljit_emit_op_custom(compiler, instruction, 4);
4005
4006 if (load_twice)
4007 {
4008 instruction[3] = 0xc0 | (1 << 3) | 3;
4009 sljit_emit_op_custom(compiler, instruction, 4);
4010 }
4011
4012 /* PMOVMSKB reg, xmm */
4013 instruction[2] = 0xd7;
4014 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4015 sljit_emit_op_custom(compiler, instruction, 4);
4016
4017 if (load_twice)
4018 {
4019 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
4020 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4021 sljit_emit_op_custom(compiler, instruction, 4);
4022
4023 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4024 OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
4025 }
4026
4027 OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
4028
4029 /* BSF r32, r/m32 */
4030 instruction[0] = 0x0f;
4031 instruction[1] = 0xbc;
4032 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4033 sljit_emit_op_custom(compiler, instruction, 3);
4034 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4035
4036 nomatch = JUMP(SLJIT_ZERO);
4037
4038 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4039 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4040 quit[1] = JUMP(SLJIT_JUMP);
4041
4042 JUMPHERE(nomatch);
4043
4044 start = LABEL();
4045 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
4046 quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4047
4048 /* Second part (aligned) */
4049
4050 instruction[0] = 0x66;
4051 instruction[1] = 0x0f;
4052
4053 /* MOVDQA xmm1, xmm2/m128 */
4054 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4055
4056 if (str_ptr_ind < 8)
4057 {
4058 instruction[2] = 0x6f;
4059 instruction[3] = (0 << 3) | str_ptr_ind;
4060 sljit_emit_op_custom(compiler, instruction, 4);
4061
4062 if (load_twice)
4063 {
4064 instruction[3] = (1 << 3) | str_ptr_ind;
4065 sljit_emit_op_custom(compiler, instruction, 4);
4066 }
4067 }
4068 else
4069 {
4070 instruction[1] = 0x41;
4071 instruction[2] = 0x0f;
4072 instruction[3] = 0x6f;
4073 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
4074 sljit_emit_op_custom(compiler, instruction, 5);
4075
4076 if (load_twice)
4077 {
4078 instruction[4] = (1 << 3) | str_ptr_ind;
4079 sljit_emit_op_custom(compiler, instruction, 5);
4080 }
4081 instruction[1] = 0x0f;
4082 }
4083
4084 #else
4085
4086 instruction[2] = 0x6f;
4087 instruction[3] = (0 << 3) | str_ptr_ind;
4088 sljit_emit_op_custom(compiler, instruction, 4);
4089
4090 if (load_twice)
4091 {
4092 instruction[3] = (1 << 3) | str_ptr_ind;
4093 sljit_emit_op_custom(compiler, instruction, 4);
4094 }
4095
4096 #endif
4097
4098 if (bit != 0)
4099 {
4100 /* POR xmm1, xmm2/m128 */
4101 instruction[2] = 0xeb;
4102 instruction[3] = 0xc0 | (0 << 3) | 3;
4103 sljit_emit_op_custom(compiler, instruction, 4);
4104 }
4105
4106 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4107 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4108 instruction[3] = 0xc0 | (0 << 3) | 2;
4109 sljit_emit_op_custom(compiler, instruction, 4);
4110
4111 if (load_twice)
4112 {
4113 instruction[3] = 0xc0 | (1 << 3) | 3;
4114 sljit_emit_op_custom(compiler, instruction, 4);
4115 }
4116
4117 /* PMOVMSKB reg, xmm */
4118 instruction[2] = 0xd7;
4119 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4120 sljit_emit_op_custom(compiler, instruction, 4);
4121
4122 if (load_twice)
4123 {
4124 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4125 sljit_emit_op_custom(compiler, instruction, 4);
4126
4127 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4128 }
4129
4130 /* BSF r32, r/m32 */
4131 instruction[0] = 0x0f;
4132 instruction[1] = 0xbc;
4133 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4134 sljit_emit_op_custom(compiler, instruction, 3);
4135 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4136
4137 JUMPTO(SLJIT_ZERO, start);
4138
4139 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4140
4141 start = LABEL();
4142 SET_LABEL(quit[0], start);
4143 SET_LABEL(quit[1], start);
4144 SET_LABEL(quit[2], start);
4145 }
4146
4147 #undef SSE2_COMPARE_TYPE_INDEX
4148
4149 #endif
4150
fast_forward_first_char2(compiler_common * common,pcre_uchar char1,pcre_uchar char2,sljit_s32 offset)4151 static void fast_forward_first_char2(compiler_common *common, pcre_uchar char1, pcre_uchar char2, sljit_s32 offset)
4152 {
4153 DEFINE_COMPILER;
4154 struct sljit_label *start;
4155 struct sljit_jump *quit;
4156 struct sljit_jump *found;
4157 pcre_uchar mask;
4158 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4159 struct sljit_label *utf_start = NULL;
4160 struct sljit_jump *utf_quit = NULL;
4161 #endif
4162 BOOL has_match_end = (common->match_end_ptr != 0);
4163
4164 if (offset > 0)
4165 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4166
4167 if (has_match_end)
4168 {
4169 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4170
4171 OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1));
4172 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
4173 sljit_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
4174 }
4175
4176 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4177 if (common->utf && offset > 0)
4178 utf_start = LABEL();
4179 #endif
4180
4181 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
4182
4183 /* SSE2 accelerated first character search. */
4184
4185 if (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
4186 {
4187 fast_forward_first_char2_sse2(common, char1, char2);
4188
4189 SLJIT_ASSERT(common->mode == JIT_COMPILE || offset == 0);
4190 if (common->mode == JIT_COMPILE)
4191 {
4192 /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
4193 SLJIT_ASSERT(common->forced_quit_label == NULL);
4194 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
4195 add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4196
4197 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4198 if (common->utf && offset > 0)
4199 {
4200 SLJIT_ASSERT(common->mode == JIT_COMPILE);
4201
4202 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4203 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4204 #if defined COMPILE_PCRE8
4205 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4206 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4207 #elif defined COMPILE_PCRE16
4208 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4209 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4210 #else
4211 #error "Unknown code width"
4212 #endif
4213 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4214 }
4215 #endif
4216
4217 if (offset > 0)
4218 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4219 }
4220 else
4221 {
4222 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
4223 if (has_match_end)
4224 {
4225 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4226 sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, TMP1, 0);
4227 }
4228 else
4229 sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, STR_END, 0);
4230 }
4231
4232 if (has_match_end)
4233 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4234 return;
4235 }
4236
4237 #endif
4238
4239 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4240
4241 start = LABEL();
4242 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4243
4244 if (char1 == char2)
4245 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
4246 else
4247 {
4248 mask = char1 ^ char2;
4249 if (is_powerof2(mask))
4250 {
4251 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4252 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask);
4253 }
4254 else
4255 {
4256 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
4257 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
4258 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
4259 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
4260 found = JUMP(SLJIT_NOT_ZERO);
4261 }
4262 }
4263
4264 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4265 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start);
4266
4267 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4268 if (common->utf && offset > 0)
4269 utf_quit = JUMP(SLJIT_JUMP);
4270 #endif
4271
4272 JUMPHERE(found);
4273
4274 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4275 if (common->utf && offset > 0)
4276 {
4277 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4278 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4279 #if defined COMPILE_PCRE8
4280 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4281 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4282 #elif defined COMPILE_PCRE16
4283 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4284 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4285 #else
4286 #error "Unknown code width"
4287 #endif
4288 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4289 JUMPHERE(utf_quit);
4290 }
4291 #endif
4292
4293 JUMPHERE(quit);
4294
4295 if (has_match_end)
4296 {
4297 quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4298 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4299 if (offset > 0)
4300 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4301 JUMPHERE(quit);
4302 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4303 }
4304
4305 if (offset > 0)
4306 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4307 }
4308
fast_forward_first_n_chars(compiler_common * common)4309 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
4310 {
4311 DEFINE_COMPILER;
4312 struct sljit_label *start;
4313 struct sljit_jump *quit;
4314 struct sljit_jump *match;
4315 /* bytes[0] represent the number of characters between 0
4316 and MAX_N_BYTES - 1, 255 represents any character. */
4317 pcre_uchar chars[MAX_N_CHARS * MAX_DIFF_CHARS];
4318 sljit_s32 offset;
4319 pcre_uchar mask;
4320 pcre_uchar *char_set, *char_set_end;
4321 int i, max, from;
4322 int range_right = -1, range_len;
4323 sljit_u8 *update_table = NULL;
4324 BOOL in_range;
4325 sljit_u32 rec_count;
4326
4327 for (i = 0; i < MAX_N_CHARS; i++)
4328 chars[i * MAX_DIFF_CHARS] = 0;
4329
4330 rec_count = 10000;
4331 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
4332
4333 if (max < 1)
4334 return FALSE;
4335
4336 in_range = FALSE;
4337 /* Prevent compiler "uninitialized" warning */
4338 from = 0;
4339 range_len = 4 /* minimum length */ - 1;
4340 for (i = 0; i <= max; i++)
4341 {
4342 if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255))
4343 {
4344 range_len = i - from;
4345 range_right = i - 1;
4346 }
4347
4348 if (i < max && chars[i * MAX_DIFF_CHARS] < 255)
4349 {
4350 SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0);
4351 if (!in_range)
4352 {
4353 in_range = TRUE;
4354 from = i;
4355 }
4356 }
4357 else
4358 in_range = FALSE;
4359 }
4360
4361 if (range_right >= 0)
4362 {
4363 update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
4364 if (update_table == NULL)
4365 return TRUE;
4366 memset(update_table, IN_UCHARS(range_len), 256);
4367
4368 for (i = 0; i < range_len; i++)
4369 {
4370 char_set = chars + ((range_right - i) * MAX_DIFF_CHARS);
4371 SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255);
4372 char_set_end = char_set + char_set[0];
4373 char_set++;
4374 while (char_set <= char_set_end)
4375 {
4376 if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
4377 update_table[(*char_set) & 0xff] = IN_UCHARS(i);
4378 char_set++;
4379 }
4380 }
4381 }
4382
4383 offset = -1;
4384 /* Scan forward. */
4385 for (i = 0; i < max; i++)
4386 {
4387 if (offset == -1)
4388 {
4389 if (chars[i * MAX_DIFF_CHARS] <= 2)
4390 offset = i;
4391 }
4392 else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2)
4393 {
4394 if (chars[i * MAX_DIFF_CHARS] == 1)
4395 offset = i;
4396 else
4397 {
4398 mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4399 if (!is_powerof2(mask))
4400 {
4401 mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2];
4402 if (is_powerof2(mask))
4403 offset = i;
4404 }
4405 }
4406 }
4407 }
4408
4409 if (range_right < 0)
4410 {
4411 if (offset < 0)
4412 return FALSE;
4413 SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
4414 /* Works regardless the value is 1 or 2. */
4415 mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
4416 fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset);
4417 return TRUE;
4418 }
4419
4420 if (range_right == offset)
4421 offset = -1;
4422
4423 SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2));
4424
4425 max -= 1;
4426 SLJIT_ASSERT(max > 0);
4427 if (common->match_end_ptr != 0)
4428 {
4429 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4430 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4431 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4432 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
4433 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
4434 JUMPHERE(quit);
4435 }
4436 else
4437 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4438
4439 SLJIT_ASSERT(range_right >= 0);
4440
4441 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4442 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
4443 #endif
4444
4445 start = LABEL();
4446 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4447
4448 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
4449 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
4450 #else
4451 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
4452 #endif
4453
4454 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4455 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
4456 #else
4457 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
4458 #endif
4459 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4460 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
4461
4462 if (offset >= 0)
4463 {
4464 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
4465 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4466
4467 if (chars[offset * MAX_DIFF_CHARS] == 1)
4468 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start);
4469 else
4470 {
4471 mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4472 if (is_powerof2(mask))
4473 {
4474 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4475 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start);
4476 }
4477 else
4478 {
4479 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]);
4480 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start);
4481 JUMPHERE(match);
4482 }
4483 }
4484 }
4485
4486 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4487 if (common->utf && offset != 0)
4488 {
4489 if (offset < 0)
4490 {
4491 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4492 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4493 }
4494 else
4495 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4496 #if defined COMPILE_PCRE8
4497 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4498 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start);
4499 #elif defined COMPILE_PCRE16
4500 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4501 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start);
4502 #else
4503 #error "Unknown code width"
4504 #endif
4505 if (offset < 0)
4506 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4507 }
4508 #endif
4509
4510 if (offset >= 0)
4511 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4512
4513 JUMPHERE(quit);
4514
4515 if (common->match_end_ptr != 0)
4516 {
4517 if (range_right >= 0)
4518 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4519 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4520 if (range_right >= 0)
4521 {
4522 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4523 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
4524 JUMPHERE(quit);
4525 }
4526 }
4527 else
4528 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4529 return TRUE;
4530 }
4531
4532 #undef MAX_N_CHARS
4533 #undef MAX_DIFF_CHARS
4534
fast_forward_first_char(compiler_common * common,pcre_uchar first_char,BOOL caseless)4535 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless)
4536 {
4537 pcre_uchar oc;
4538
4539 oc = first_char;
4540 if (caseless)
4541 {
4542 oc = TABLE_GET(first_char, common->fcc, first_char);
4543 #if defined SUPPORT_UCP && !defined COMPILE_PCRE8
4544 if (first_char > 127 && common->utf)
4545 oc = UCD_OTHERCASE(first_char);
4546 #endif
4547 }
4548
4549 fast_forward_first_char2(common, first_char, oc, 0);
4550 }
4551
fast_forward_newline(compiler_common * common)4552 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
4553 {
4554 DEFINE_COMPILER;
4555 struct sljit_label *loop;
4556 struct sljit_jump *lastchar;
4557 struct sljit_jump *firstchar;
4558 struct sljit_jump *quit;
4559 struct sljit_jump *foundcr = NULL;
4560 struct sljit_jump *notfoundnl;
4561 jump_list *newline = NULL;
4562
4563 if (common->match_end_ptr != 0)
4564 {
4565 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4566 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4567 }
4568
4569 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4570 {
4571 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4572 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4573 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4574 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4575 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4576
4577 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
4578 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
4579 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
4580 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4581 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4582 #endif
4583 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4584
4585 loop = LABEL();
4586 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4587 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4588 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4589 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4590 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
4591 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
4592
4593 JUMPHERE(quit);
4594 JUMPHERE(firstchar);
4595 JUMPHERE(lastchar);
4596
4597 if (common->match_end_ptr != 0)
4598 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4599 return;
4600 }
4601
4602 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4603 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4604 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4605 skip_char_back(common);
4606
4607 loop = LABEL();
4608 common->ff_newline_shortcut = loop;
4609
4610 read_char_range(common, common->nlmin, common->nlmax, TRUE);
4611 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4612 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4613 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4614 check_newlinechar(common, common->nltype, &newline, FALSE);
4615 set_jumps(newline, loop);
4616
4617 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4618 {
4619 quit = JUMP(SLJIT_JUMP);
4620 JUMPHERE(foundcr);
4621 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4622 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4623 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
4624 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4625 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4626 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4627 #endif
4628 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4629 JUMPHERE(notfoundnl);
4630 JUMPHERE(quit);
4631 }
4632 JUMPHERE(lastchar);
4633 JUMPHERE(firstchar);
4634
4635 if (common->match_end_ptr != 0)
4636 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4637 }
4638
4639 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
4640
fast_forward_start_bits(compiler_common * common,const sljit_u8 * start_bits)4641 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits)
4642 {
4643 DEFINE_COMPILER;
4644 struct sljit_label *start;
4645 struct sljit_jump *quit;
4646 struct sljit_jump *found = NULL;
4647 jump_list *matches = NULL;
4648 #ifndef COMPILE_PCRE8
4649 struct sljit_jump *jump;
4650 #endif
4651
4652 if (common->match_end_ptr != 0)
4653 {
4654 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
4655 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4656 }
4657
4658 start = LABEL();
4659 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4660 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4661 #ifdef SUPPORT_UTF
4662 if (common->utf)
4663 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4664 #endif
4665
4666 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
4667 {
4668 #ifndef COMPILE_PCRE8
4669 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
4670 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
4671 JUMPHERE(jump);
4672 #endif
4673 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4674 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4675 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4676 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4677 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4678 found = JUMP(SLJIT_NOT_ZERO);
4679 }
4680
4681 #ifdef SUPPORT_UTF
4682 if (common->utf)
4683 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4684 #endif
4685 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4686 #ifdef SUPPORT_UTF
4687 #if defined COMPILE_PCRE8
4688 if (common->utf)
4689 {
4690 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4691 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4692 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4693 }
4694 #elif defined COMPILE_PCRE16
4695 if (common->utf)
4696 {
4697 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4698 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4699 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4700 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4701 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4702 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4703 }
4704 #endif /* COMPILE_PCRE[8|16] */
4705 #endif /* SUPPORT_UTF */
4706 JUMPTO(SLJIT_JUMP, start);
4707 if (found != NULL)
4708 JUMPHERE(found);
4709 if (matches != NULL)
4710 set_jumps(matches, LABEL());
4711 JUMPHERE(quit);
4712
4713 if (common->match_end_ptr != 0)
4714 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4715 }
4716
search_requested_char(compiler_common * common,pcre_uchar req_char,BOOL caseless,BOOL has_firstchar)4717 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4718 {
4719 DEFINE_COMPILER;
4720 struct sljit_label *loop;
4721 struct sljit_jump *toolong;
4722 struct sljit_jump *alreadyfound;
4723 struct sljit_jump *found;
4724 struct sljit_jump *foundoc = NULL;
4725 struct sljit_jump *notfound;
4726 sljit_u32 oc, bit;
4727
4728 SLJIT_ASSERT(common->req_char_ptr != 0);
4729 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4730 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4731 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4732 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4733
4734 if (has_firstchar)
4735 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4736 else
4737 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4738
4739 loop = LABEL();
4740 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4741
4742 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4743 oc = req_char;
4744 if (caseless)
4745 {
4746 oc = TABLE_GET(req_char, common->fcc, req_char);
4747 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4748 if (req_char > 127 && common->utf)
4749 oc = UCD_OTHERCASE(req_char);
4750 #endif
4751 }
4752 if (req_char == oc)
4753 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4754 else
4755 {
4756 bit = req_char ^ oc;
4757 if (is_powerof2(bit))
4758 {
4759 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4760 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4761 }
4762 else
4763 {
4764 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4765 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4766 }
4767 }
4768 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4769 JUMPTO(SLJIT_JUMP, loop);
4770
4771 JUMPHERE(found);
4772 if (foundoc)
4773 JUMPHERE(foundoc);
4774 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4775 JUMPHERE(alreadyfound);
4776 JUMPHERE(toolong);
4777 return notfound;
4778 }
4779
do_revertframes(compiler_common * common)4780 static void do_revertframes(compiler_common *common)
4781 {
4782 DEFINE_COMPILER;
4783 struct sljit_jump *jump;
4784 struct sljit_label *mainloop;
4785
4786 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4787 OP1(SLJIT_MOV, TMP3, 0, STACK_TOP, 0);
4788 GET_LOCAL_BASE(TMP1, 0, 0);
4789
4790 /* Drop frames until we reach STACK_TOP. */
4791 mainloop = LABEL();
4792 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
4793 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4794
4795 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4796 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw));
4797 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -3 * sizeof(sljit_sw));
4798 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4799 JUMPTO(SLJIT_JUMP, mainloop);
4800
4801 JUMPHERE(jump);
4802 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
4803 /* End of reverting values. */
4804 OP1(SLJIT_MOV, STACK_TOP, 0, TMP3, 0);
4805 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4806
4807 JUMPHERE(jump);
4808 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4809 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4810 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw));
4811 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4812 JUMPTO(SLJIT_JUMP, mainloop);
4813 }
4814
check_wordboundary(compiler_common * common)4815 static void check_wordboundary(compiler_common *common)
4816 {
4817 DEFINE_COMPILER;
4818 struct sljit_jump *skipread;
4819 jump_list *skipread_list = NULL;
4820 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4821 struct sljit_jump *jump;
4822 #endif
4823
4824 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4825
4826 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4827 /* Get type of the previous char, and put it to LOCALS1. */
4828 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4829 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4830 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4831 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4832 skip_char_back(common);
4833 check_start_used_ptr(common);
4834 read_char(common);
4835
4836 /* Testing char type. */
4837 #ifdef SUPPORT_UCP
4838 if (common->use_ucp)
4839 {
4840 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4841 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4842 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4843 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4844 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4845 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
4846 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4847 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4848 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
4849 JUMPHERE(jump);
4850 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4851 }
4852 else
4853 #endif
4854 {
4855 #ifndef COMPILE_PCRE8
4856 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4857 #elif defined SUPPORT_UTF
4858 /* Here LOCALS1 has already been zeroed. */
4859 jump = NULL;
4860 if (common->utf)
4861 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4862 #endif /* COMPILE_PCRE8 */
4863 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4864 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4865 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4866 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4867 #ifndef COMPILE_PCRE8
4868 JUMPHERE(jump);
4869 #elif defined SUPPORT_UTF
4870 if (jump != NULL)
4871 JUMPHERE(jump);
4872 #endif /* COMPILE_PCRE8 */
4873 }
4874 JUMPHERE(skipread);
4875
4876 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4877 check_str_end(common, &skipread_list);
4878 peek_char(common, READ_CHAR_MAX);
4879
4880 /* Testing char type. This is a code duplication. */
4881 #ifdef SUPPORT_UCP
4882 if (common->use_ucp)
4883 {
4884 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4885 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4886 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4887 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4888 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4889 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
4890 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4891 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4892 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
4893 JUMPHERE(jump);
4894 }
4895 else
4896 #endif
4897 {
4898 #ifndef COMPILE_PCRE8
4899 /* TMP2 may be destroyed by peek_char. */
4900 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4901 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4902 #elif defined SUPPORT_UTF
4903 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4904 jump = NULL;
4905 if (common->utf)
4906 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4907 #endif
4908 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4909 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4910 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4911 #ifndef COMPILE_PCRE8
4912 JUMPHERE(jump);
4913 #elif defined SUPPORT_UTF
4914 if (jump != NULL)
4915 JUMPHERE(jump);
4916 #endif /* COMPILE_PCRE8 */
4917 }
4918 set_jumps(skipread_list, LABEL());
4919
4920 OP2(SLJIT_XOR | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4921 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4922 }
4923
check_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)4924 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4925 {
4926 /* May destroy TMP1. */
4927 DEFINE_COMPILER;
4928 int ranges[MAX_RANGE_SIZE];
4929 sljit_u8 bit, cbit, all;
4930 int i, byte, length = 0;
4931
4932 bit = bits[0] & 0x1;
4933 /* All bits will be zero or one (since bit is zero or one). */
4934 all = -bit;
4935
4936 for (i = 0; i < 256; )
4937 {
4938 byte = i >> 3;
4939 if ((i & 0x7) == 0 && bits[byte] == all)
4940 i += 8;
4941 else
4942 {
4943 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4944 if (cbit != bit)
4945 {
4946 if (length >= MAX_RANGE_SIZE)
4947 return FALSE;
4948 ranges[length] = i;
4949 length++;
4950 bit = cbit;
4951 all = -cbit;
4952 }
4953 i++;
4954 }
4955 }
4956
4957 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4958 {
4959 if (length >= MAX_RANGE_SIZE)
4960 return FALSE;
4961 ranges[length] = 256;
4962 length++;
4963 }
4964
4965 if (length < 0 || length > 4)
4966 return FALSE;
4967
4968 bit = bits[0] & 0x1;
4969 if (invert) bit ^= 0x1;
4970
4971 /* No character is accepted. */
4972 if (length == 0 && bit == 0)
4973 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4974
4975 switch(length)
4976 {
4977 case 0:
4978 /* When bit != 0, all characters are accepted. */
4979 return TRUE;
4980
4981 case 1:
4982 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4983 return TRUE;
4984
4985 case 2:
4986 if (ranges[0] + 1 != ranges[1])
4987 {
4988 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4989 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4990 }
4991 else
4992 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4993 return TRUE;
4994
4995 case 3:
4996 if (bit != 0)
4997 {
4998 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4999 if (ranges[0] + 1 != ranges[1])
5000 {
5001 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5002 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5003 }
5004 else
5005 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5006 return TRUE;
5007 }
5008
5009 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
5010 if (ranges[1] + 1 != ranges[2])
5011 {
5012 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
5013 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5014 }
5015 else
5016 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
5017 return TRUE;
5018
5019 case 4:
5020 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
5021 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
5022 && (ranges[1] & (ranges[2] - ranges[0])) == 0
5023 && is_powerof2(ranges[2] - ranges[0]))
5024 {
5025 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
5026 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
5027 if (ranges[2] + 1 != ranges[3])
5028 {
5029 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
5030 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5031 }
5032 else
5033 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5034 return TRUE;
5035 }
5036
5037 if (bit != 0)
5038 {
5039 i = 0;
5040 if (ranges[0] + 1 != ranges[1])
5041 {
5042 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5043 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5044 i = ranges[0];
5045 }
5046 else
5047 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5048
5049 if (ranges[2] + 1 != ranges[3])
5050 {
5051 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
5052 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5053 }
5054 else
5055 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
5056 return TRUE;
5057 }
5058
5059 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5060 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
5061 if (ranges[1] + 1 != ranges[2])
5062 {
5063 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
5064 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5065 }
5066 else
5067 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5068 return TRUE;
5069
5070 default:
5071 SLJIT_UNREACHABLE();
5072 return FALSE;
5073 }
5074 }
5075
check_anynewline(compiler_common * common)5076 static void check_anynewline(compiler_common *common)
5077 {
5078 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5079 DEFINE_COMPILER;
5080
5081 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5082
5083 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5084 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5085 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5086 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5087 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5088 #ifdef COMPILE_PCRE8
5089 if (common->utf)
5090 {
5091 #endif
5092 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5093 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5094 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5095 #ifdef COMPILE_PCRE8
5096 }
5097 #endif
5098 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5099 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5100 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5101 }
5102
check_hspace(compiler_common * common)5103 static void check_hspace(compiler_common *common)
5104 {
5105 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5106 DEFINE_COMPILER;
5107
5108 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5109
5110 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
5111 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5112 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
5113 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5114 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
5115 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5116 #ifdef COMPILE_PCRE8
5117 if (common->utf)
5118 {
5119 #endif
5120 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5121 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
5122 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5123 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
5124 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5125 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
5126 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
5127 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5128 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
5129 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5130 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
5131 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5132 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
5133 #ifdef COMPILE_PCRE8
5134 }
5135 #endif
5136 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5137 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5138
5139 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5140 }
5141
check_vspace(compiler_common * common)5142 static void check_vspace(compiler_common *common)
5143 {
5144 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5145 DEFINE_COMPILER;
5146
5147 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5148
5149 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5150 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5151 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5152 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5153 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5154 #ifdef COMPILE_PCRE8
5155 if (common->utf)
5156 {
5157 #endif
5158 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5159 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5160 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5161 #ifdef COMPILE_PCRE8
5162 }
5163 #endif
5164 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5165 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5166
5167 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5168 }
5169
5170 #define CHAR1 STR_END
5171 #define CHAR2 STACK_TOP
5172
do_casefulcmp(compiler_common * common)5173 static void do_casefulcmp(compiler_common *common)
5174 {
5175 DEFINE_COMPILER;
5176 struct sljit_jump *jump;
5177 struct sljit_label *label;
5178
5179 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5180 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5181 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
5182 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
5183 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5184 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5185
5186 label = LABEL();
5187 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5188 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5189 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
5190 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5191 JUMPTO(SLJIT_NOT_ZERO, label);
5192
5193 JUMPHERE(jump);
5194 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5195 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
5196 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5197 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5198 }
5199
5200 #define LCC_TABLE STACK_LIMIT
5201
do_caselesscmp(compiler_common * common)5202 static void do_caselesscmp(compiler_common *common)
5203 {
5204 DEFINE_COMPILER;
5205 struct sljit_jump *jump;
5206 struct sljit_label *label;
5207
5208 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5209 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5210
5211 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
5212 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
5213 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
5214 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
5215 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5216 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5217
5218 label = LABEL();
5219 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5220 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5221 #ifndef COMPILE_PCRE8
5222 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
5223 #endif
5224 OP1(SLJIT_MOV_U8, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
5225 #ifndef COMPILE_PCRE8
5226 JUMPHERE(jump);
5227 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
5228 #endif
5229 OP1(SLJIT_MOV_U8, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
5230 #ifndef COMPILE_PCRE8
5231 JUMPHERE(jump);
5232 #endif
5233 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
5234 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5235 JUMPTO(SLJIT_NOT_ZERO, label);
5236
5237 JUMPHERE(jump);
5238 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5239 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
5240 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5241 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5242 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5243 }
5244
5245 #undef LCC_TABLE
5246 #undef CHAR1
5247 #undef CHAR2
5248
5249 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5250
do_utf_caselesscmp(pcre_uchar * src1,jit_arguments * args,pcre_uchar * end1)5251 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
5252 {
5253 /* This function would be ineffective to do in JIT level. */
5254 sljit_u32 c1, c2;
5255 const pcre_uchar *src2 = args->uchar_ptr;
5256 const pcre_uchar *end2 = args->end;
5257 const ucd_record *ur;
5258 const sljit_u32 *pp;
5259
5260 while (src1 < end1)
5261 {
5262 if (src2 >= end2)
5263 return (pcre_uchar*)1;
5264 GETCHARINC(c1, src1);
5265 GETCHARINC(c2, src2);
5266 ur = GET_UCD(c2);
5267 if (c1 != c2 && c1 != c2 + ur->other_case)
5268 {
5269 pp = PRIV(ucd_caseless_sets) + ur->caseset;
5270 for (;;)
5271 {
5272 if (c1 < *pp) return NULL;
5273 if (c1 == *pp++) break;
5274 }
5275 }
5276 }
5277 return src2;
5278 }
5279
5280 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5281
byte_sequence_compare(compiler_common * common,BOOL caseless,pcre_uchar * cc,compare_context * context,jump_list ** backtracks)5282 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
5283 compare_context *context, jump_list **backtracks)
5284 {
5285 DEFINE_COMPILER;
5286 unsigned int othercasebit = 0;
5287 pcre_uchar *othercasechar = NULL;
5288 #ifdef SUPPORT_UTF
5289 int utflength;
5290 #endif
5291
5292 if (caseless && char_has_othercase(common, cc))
5293 {
5294 othercasebit = char_get_othercase_bit(common, cc);
5295 SLJIT_ASSERT(othercasebit);
5296 /* Extracting bit difference info. */
5297 #if defined COMPILE_PCRE8
5298 othercasechar = cc + (othercasebit >> 8);
5299 othercasebit &= 0xff;
5300 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5301 /* Note that this code only handles characters in the BMP. If there
5302 ever are characters outside the BMP whose othercase differs in only one
5303 bit from itself (there currently are none), this code will need to be
5304 revised for COMPILE_PCRE32. */
5305 othercasechar = cc + (othercasebit >> 9);
5306 if ((othercasebit & 0x100) != 0)
5307 othercasebit = (othercasebit & 0xff) << 8;
5308 else
5309 othercasebit &= 0xff;
5310 #endif /* COMPILE_PCRE[8|16|32] */
5311 }
5312
5313 if (context->sourcereg == -1)
5314 {
5315 #if defined COMPILE_PCRE8
5316 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5317 if (context->length >= 4)
5318 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5319 else if (context->length >= 2)
5320 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5321 else
5322 #endif
5323 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5324 #elif defined COMPILE_PCRE16
5325 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5326 if (context->length >= 4)
5327 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5328 else
5329 #endif
5330 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5331 #elif defined COMPILE_PCRE32
5332 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5333 #endif /* COMPILE_PCRE[8|16|32] */
5334 context->sourcereg = TMP2;
5335 }
5336
5337 #ifdef SUPPORT_UTF
5338 utflength = 1;
5339 if (common->utf && HAS_EXTRALEN(*cc))
5340 utflength += GET_EXTRALEN(*cc);
5341
5342 do
5343 {
5344 #endif
5345
5346 context->length -= IN_UCHARS(1);
5347 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5348
5349 /* Unaligned read is supported. */
5350 if (othercasebit != 0 && othercasechar == cc)
5351 {
5352 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
5353 context->oc.asuchars[context->ucharptr] = othercasebit;
5354 }
5355 else
5356 {
5357 context->c.asuchars[context->ucharptr] = *cc;
5358 context->oc.asuchars[context->ucharptr] = 0;
5359 }
5360 context->ucharptr++;
5361
5362 #if defined COMPILE_PCRE8
5363 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
5364 #else
5365 if (context->ucharptr >= 2 || context->length == 0)
5366 #endif
5367 {
5368 if (context->length >= 4)
5369 OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5370 else if (context->length >= 2)
5371 OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5372 #if defined COMPILE_PCRE8
5373 else if (context->length >= 1)
5374 OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5375 #endif /* COMPILE_PCRE8 */
5376 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5377
5378 switch(context->ucharptr)
5379 {
5380 case 4 / sizeof(pcre_uchar):
5381 if (context->oc.asint != 0)
5382 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
5383 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
5384 break;
5385
5386 case 2 / sizeof(pcre_uchar):
5387 if (context->oc.asushort != 0)
5388 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
5389 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
5390 break;
5391
5392 #ifdef COMPILE_PCRE8
5393 case 1:
5394 if (context->oc.asbyte != 0)
5395 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
5396 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
5397 break;
5398 #endif
5399
5400 default:
5401 SLJIT_UNREACHABLE();
5402 break;
5403 }
5404 context->ucharptr = 0;
5405 }
5406
5407 #else
5408
5409 /* Unaligned read is unsupported or in 32 bit mode. */
5410 if (context->length >= 1)
5411 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5412
5413 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5414
5415 if (othercasebit != 0 && othercasechar == cc)
5416 {
5417 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
5418 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
5419 }
5420 else
5421 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
5422
5423 #endif
5424
5425 cc++;
5426 #ifdef SUPPORT_UTF
5427 utflength--;
5428 }
5429 while (utflength > 0);
5430 #endif
5431
5432 return cc;
5433 }
5434
5435 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5436
5437 #define SET_TYPE_OFFSET(value) \
5438 if ((value) != typeoffset) \
5439 { \
5440 if ((value) < typeoffset) \
5441 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
5442 else \
5443 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
5444 } \
5445 typeoffset = (value);
5446
5447 #define SET_CHAR_OFFSET(value) \
5448 if ((value) != charoffset) \
5449 { \
5450 if ((value) < charoffset) \
5451 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
5452 else \
5453 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
5454 } \
5455 charoffset = (value);
5456
5457 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr);
5458
compile_xclass_matchingpath(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks)5459 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5460 {
5461 DEFINE_COMPILER;
5462 jump_list *found = NULL;
5463 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
5464 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
5465 struct sljit_jump *jump = NULL;
5466 pcre_uchar *ccbegin;
5467 int compares, invertcmp, numberofcmps;
5468 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5469 BOOL utf = common->utf;
5470 #endif
5471
5472 #ifdef SUPPORT_UCP
5473 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
5474 BOOL charsaved = FALSE;
5475 int typereg = TMP1;
5476 const sljit_u32 *other_cases;
5477 sljit_uw typeoffset;
5478 #endif
5479
5480 /* Scanning the necessary info. */
5481 cc++;
5482 ccbegin = cc;
5483 compares = 0;
5484 if (cc[-1] & XCL_MAP)
5485 {
5486 min = 0;
5487 cc += 32 / sizeof(pcre_uchar);
5488 }
5489
5490 while (*cc != XCL_END)
5491 {
5492 compares++;
5493 if (*cc == XCL_SINGLE)
5494 {
5495 cc ++;
5496 GETCHARINCTEST(c, cc);
5497 if (c > max) max = c;
5498 if (c < min) min = c;
5499 #ifdef SUPPORT_UCP
5500 needschar = TRUE;
5501 #endif
5502 }
5503 else if (*cc == XCL_RANGE)
5504 {
5505 cc ++;
5506 GETCHARINCTEST(c, cc);
5507 if (c < min) min = c;
5508 GETCHARINCTEST(c, cc);
5509 if (c > max) max = c;
5510 #ifdef SUPPORT_UCP
5511 needschar = TRUE;
5512 #endif
5513 }
5514 #ifdef SUPPORT_UCP
5515 else
5516 {
5517 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5518 cc++;
5519 if (*cc == PT_CLIST)
5520 {
5521 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5522 while (*other_cases != NOTACHAR)
5523 {
5524 if (*other_cases > max) max = *other_cases;
5525 if (*other_cases < min) min = *other_cases;
5526 other_cases++;
5527 }
5528 }
5529 else
5530 {
5531 max = READ_CHAR_MAX;
5532 min = 0;
5533 }
5534
5535 switch(*cc)
5536 {
5537 case PT_ANY:
5538 /* Any either accepts everything or ignored. */
5539 if (cc[-1] == XCL_PROP)
5540 {
5541 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
5542 if (list == backtracks)
5543 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5544 return;
5545 }
5546 break;
5547
5548 case PT_LAMP:
5549 case PT_GC:
5550 case PT_PC:
5551 case PT_ALNUM:
5552 needstype = TRUE;
5553 break;
5554
5555 case PT_SC:
5556 needsscript = TRUE;
5557 break;
5558
5559 case PT_SPACE:
5560 case PT_PXSPACE:
5561 case PT_WORD:
5562 case PT_PXGRAPH:
5563 case PT_PXPRINT:
5564 case PT_PXPUNCT:
5565 needstype = TRUE;
5566 needschar = TRUE;
5567 break;
5568
5569 case PT_CLIST:
5570 case PT_UCNC:
5571 needschar = TRUE;
5572 break;
5573
5574 default:
5575 SLJIT_UNREACHABLE();
5576 break;
5577 }
5578 cc += 2;
5579 }
5580 #endif
5581 }
5582 SLJIT_ASSERT(compares > 0);
5583
5584 /* We are not necessary in utf mode even in 8 bit mode. */
5585 cc = ccbegin;
5586 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
5587
5588 if ((cc[-1] & XCL_HASPROP) == 0)
5589 {
5590 if ((cc[-1] & XCL_MAP) != 0)
5591 {
5592 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5593 if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
5594 {
5595 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5596 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5597 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5598 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5599 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5600 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
5601 }
5602
5603 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5604 JUMPHERE(jump);
5605
5606 cc += 32 / sizeof(pcre_uchar);
5607 }
5608 else
5609 {
5610 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
5611 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
5612 }
5613 }
5614 else if ((cc[-1] & XCL_MAP) != 0)
5615 {
5616 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5617 #ifdef SUPPORT_UCP
5618 charsaved = TRUE;
5619 #endif
5620 if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
5621 {
5622 #ifdef COMPILE_PCRE8
5623 jump = NULL;
5624 if (common->utf)
5625 #endif
5626 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5627
5628 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5629 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5630 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5631 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5632 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5633 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
5634
5635 #ifdef COMPILE_PCRE8
5636 if (common->utf)
5637 #endif
5638 JUMPHERE(jump);
5639 }
5640
5641 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5642 cc += 32 / sizeof(pcre_uchar);
5643 }
5644
5645 #ifdef SUPPORT_UCP
5646 if (needstype || needsscript)
5647 {
5648 if (needschar && !charsaved)
5649 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5650
5651 #ifdef COMPILE_PCRE32
5652 if (!common->utf)
5653 {
5654 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1);
5655 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5656 JUMPHERE(jump);
5657 }
5658 #endif
5659
5660 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5661 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5662 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5663 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5664 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5665 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5666 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5667
5668 /* Before anything else, we deal with scripts. */
5669 if (needsscript)
5670 {
5671 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
5672 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5673
5674 ccbegin = cc;
5675
5676 while (*cc != XCL_END)
5677 {
5678 if (*cc == XCL_SINGLE)
5679 {
5680 cc ++;
5681 GETCHARINCTEST(c, cc);
5682 }
5683 else if (*cc == XCL_RANGE)
5684 {
5685 cc ++;
5686 GETCHARINCTEST(c, cc);
5687 GETCHARINCTEST(c, cc);
5688 }
5689 else
5690 {
5691 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5692 cc++;
5693 if (*cc == PT_SC)
5694 {
5695 compares--;
5696 invertcmp = (compares == 0 && list != backtracks);
5697 if (cc[-1] == XCL_NOTPROP)
5698 invertcmp ^= 0x1;
5699 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
5700 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5701 }
5702 cc += 2;
5703 }
5704 }
5705
5706 cc = ccbegin;
5707 }
5708
5709 if (needschar)
5710 {
5711 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5712 }
5713
5714 if (needstype)
5715 {
5716 if (!needschar)
5717 {
5718 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5719 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5720 }
5721 else
5722 {
5723 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
5724 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5725 typereg = RETURN_ADDR;
5726 }
5727 }
5728 }
5729 #endif
5730
5731 /* Generating code. */
5732 charoffset = 0;
5733 numberofcmps = 0;
5734 #ifdef SUPPORT_UCP
5735 typeoffset = 0;
5736 #endif
5737
5738 while (*cc != XCL_END)
5739 {
5740 compares--;
5741 invertcmp = (compares == 0 && list != backtracks);
5742 jump = NULL;
5743
5744 if (*cc == XCL_SINGLE)
5745 {
5746 cc ++;
5747 GETCHARINCTEST(c, cc);
5748
5749 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5750 {
5751 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5752 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5753 numberofcmps++;
5754 }
5755 else if (numberofcmps > 0)
5756 {
5757 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5758 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5759 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5760 numberofcmps = 0;
5761 }
5762 else
5763 {
5764 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5765 numberofcmps = 0;
5766 }
5767 }
5768 else if (*cc == XCL_RANGE)
5769 {
5770 cc ++;
5771 GETCHARINCTEST(c, cc);
5772 SET_CHAR_OFFSET(c);
5773 GETCHARINCTEST(c, cc);
5774
5775 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5776 {
5777 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5778 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5779 numberofcmps++;
5780 }
5781 else if (numberofcmps > 0)
5782 {
5783 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5784 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
5785 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5786 numberofcmps = 0;
5787 }
5788 else
5789 {
5790 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5791 numberofcmps = 0;
5792 }
5793 }
5794 #ifdef SUPPORT_UCP
5795 else
5796 {
5797 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5798 if (*cc == XCL_NOTPROP)
5799 invertcmp ^= 0x1;
5800 cc++;
5801 switch(*cc)
5802 {
5803 case PT_ANY:
5804 if (!invertcmp)
5805 jump = JUMP(SLJIT_JUMP);
5806 break;
5807
5808 case PT_LAMP:
5809 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5810 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5811 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5812 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5813 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5814 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5815 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5816 break;
5817
5818 case PT_GC:
5819 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5820 SET_TYPE_OFFSET(c);
5821 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5822 break;
5823
5824 case PT_PC:
5825 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5826 break;
5827
5828 case PT_SC:
5829 compares++;
5830 /* Do nothing. */
5831 break;
5832
5833 case PT_SPACE:
5834 case PT_PXSPACE:
5835 SET_CHAR_OFFSET(9);
5836 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5837 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5838
5839 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5840 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5841
5842 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5843 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5844
5845 SET_TYPE_OFFSET(ucp_Zl);
5846 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5847 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
5848 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5849 break;
5850
5851 case PT_WORD:
5852 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5853 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5854 /* Fall through. */
5855
5856 case PT_ALNUM:
5857 SET_TYPE_OFFSET(ucp_Ll);
5858 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5859 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5860 SET_TYPE_OFFSET(ucp_Nd);
5861 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5862 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
5863 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5864 break;
5865
5866 case PT_CLIST:
5867 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5868
5869 /* At least three characters are required.
5870 Otherwise this case would be handled by the normal code path. */
5871 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5872 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5873
5874 /* Optimizing character pairs, if their difference is power of 2. */
5875 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5876 {
5877 if (charoffset == 0)
5878 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5879 else
5880 {
5881 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5882 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5883 }
5884 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5885 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5886 other_cases += 2;
5887 }
5888 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5889 {
5890 if (charoffset == 0)
5891 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5892 else
5893 {
5894 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5895 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5896 }
5897 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5898 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5899
5900 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5901 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
5902
5903 other_cases += 3;
5904 }
5905 else
5906 {
5907 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5908 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5909 }
5910
5911 while (*other_cases != NOTACHAR)
5912 {
5913 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5914 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
5915 }
5916 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5917 break;
5918
5919 case PT_UCNC:
5920 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5921 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5922 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5923 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5924 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5925 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5926
5927 SET_CHAR_OFFSET(0xa0);
5928 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5929 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5930 SET_CHAR_OFFSET(0);
5931 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5932 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
5933 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5934 break;
5935
5936 case PT_PXGRAPH:
5937 /* C and Z groups are the farthest two groups. */
5938 SET_TYPE_OFFSET(ucp_Ll);
5939 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5940 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
5941
5942 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5943
5944 /* In case of ucp_Cf, we overwrite the result. */
5945 SET_CHAR_OFFSET(0x2066);
5946 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5947 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5948
5949 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5950 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5951
5952 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5953 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5954
5955 JUMPHERE(jump);
5956 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5957 break;
5958
5959 case PT_PXPRINT:
5960 /* C and Z groups are the farthest two groups. */
5961 SET_TYPE_OFFSET(ucp_Ll);
5962 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5963 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
5964
5965 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5966 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
5967
5968 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5969
5970 /* In case of ucp_Cf, we overwrite the result. */
5971 SET_CHAR_OFFSET(0x2066);
5972 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5973 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5974
5975 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5976 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5977
5978 JUMPHERE(jump);
5979 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5980 break;
5981
5982 case PT_PXPUNCT:
5983 SET_TYPE_OFFSET(ucp_Sc);
5984 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5985 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5986
5987 SET_CHAR_OFFSET(0);
5988 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
5989 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
5990
5991 SET_TYPE_OFFSET(ucp_Pc);
5992 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5993 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
5994 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5995 break;
5996
5997 default:
5998 SLJIT_UNREACHABLE();
5999 break;
6000 }
6001 cc += 2;
6002 }
6003 #endif
6004
6005 if (jump != NULL)
6006 add_jump(compiler, compares > 0 ? list : backtracks, jump);
6007 }
6008
6009 if (found != NULL)
6010 set_jumps(found, LABEL());
6011 }
6012
6013 #undef SET_TYPE_OFFSET
6014 #undef SET_CHAR_OFFSET
6015
6016 #endif
6017
compile_simple_assertion_matchingpath(compiler_common * common,pcre_uchar type,pcre_uchar * cc,jump_list ** backtracks)6018 static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
6019 {
6020 DEFINE_COMPILER;
6021 int length;
6022 struct sljit_jump *jump[4];
6023 #ifdef SUPPORT_UTF
6024 struct sljit_label *label;
6025 #endif /* SUPPORT_UTF */
6026
6027 switch(type)
6028 {
6029 case OP_SOD:
6030 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6031 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6032 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6033 return cc;
6034
6035 case OP_SOM:
6036 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6037 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6038 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6039 return cc;
6040
6041 case OP_NOT_WORD_BOUNDARY:
6042 case OP_WORD_BOUNDARY:
6043 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
6044 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6045 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6046 return cc;
6047
6048 case OP_EODN:
6049 /* Requires rather complex checks. */
6050 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6051 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6052 {
6053 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6054 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6055 if (common->mode == JIT_COMPILE)
6056 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6057 else
6058 {
6059 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
6060 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6061 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
6062 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6063 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
6064 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
6065 check_partial(common, TRUE);
6066 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6067 JUMPHERE(jump[1]);
6068 }
6069 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6070 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6071 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6072 }
6073 else if (common->nltype == NLTYPE_FIXED)
6074 {
6075 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6076 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6077 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6078 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
6079 }
6080 else
6081 {
6082 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6083 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6084 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6085 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6086 jump[2] = JUMP(SLJIT_GREATER);
6087 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
6088 /* Equal. */
6089 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6090 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6091 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6092
6093 JUMPHERE(jump[1]);
6094 if (common->nltype == NLTYPE_ANYCRLF)
6095 {
6096 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6097 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
6098 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
6099 }
6100 else
6101 {
6102 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
6103 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6104 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
6105 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
6106 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6107 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6108 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
6109 }
6110 JUMPHERE(jump[2]);
6111 JUMPHERE(jump[3]);
6112 }
6113 JUMPHERE(jump[0]);
6114 check_partial(common, FALSE);
6115 return cc;
6116
6117 case OP_EOD:
6118 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6119 check_partial(common, FALSE);
6120 return cc;
6121
6122 case OP_DOLL:
6123 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6124 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6125 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6126
6127 if (!common->endonly)
6128 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
6129 else
6130 {
6131 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6132 check_partial(common, FALSE);
6133 }
6134 return cc;
6135
6136 case OP_DOLLM:
6137 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6138 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6139 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6140 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6141 check_partial(common, FALSE);
6142 jump[0] = JUMP(SLJIT_JUMP);
6143 JUMPHERE(jump[1]);
6144
6145 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6146 {
6147 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6148 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6149 if (common->mode == JIT_COMPILE)
6150 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
6151 else
6152 {
6153 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
6154 /* STR_PTR = STR_END - IN_UCHARS(1) */
6155 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6156 check_partial(common, TRUE);
6157 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6158 JUMPHERE(jump[1]);
6159 }
6160
6161 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6162 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6163 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6164 }
6165 else
6166 {
6167 peek_char(common, common->nlmax);
6168 check_newlinechar(common, common->nltype, backtracks, FALSE);
6169 }
6170 JUMPHERE(jump[0]);
6171 return cc;
6172
6173 case OP_CIRC:
6174 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6175 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6176 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
6177 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6178 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6179 return cc;
6180
6181 case OP_CIRCM:
6182 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6183 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6184 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
6185 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6186 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6187 jump[0] = JUMP(SLJIT_JUMP);
6188 JUMPHERE(jump[1]);
6189
6190 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6191 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6192 {
6193 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6194 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
6195 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6196 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6197 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6198 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6199 }
6200 else
6201 {
6202 skip_char_back(common);
6203 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6204 check_newlinechar(common, common->nltype, backtracks, FALSE);
6205 }
6206 JUMPHERE(jump[0]);
6207 return cc;
6208
6209 case OP_REVERSE:
6210 length = GET(cc, 0);
6211 if (length == 0)
6212 return cc + LINK_SIZE;
6213 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6214 #ifdef SUPPORT_UTF
6215 if (common->utf)
6216 {
6217 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6218 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
6219 label = LABEL();
6220 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
6221 skip_char_back(common);
6222 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6223 JUMPTO(SLJIT_NOT_ZERO, label);
6224 }
6225 else
6226 #endif
6227 {
6228 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6229 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6230 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
6231 }
6232 check_start_used_ptr(common);
6233 return cc + LINK_SIZE;
6234 }
6235 SLJIT_UNREACHABLE();
6236 return cc;
6237 }
6238
compile_char1_matchingpath(compiler_common * common,pcre_uchar type,pcre_uchar * cc,jump_list ** backtracks,BOOL check_str_ptr)6239 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
6240 {
6241 DEFINE_COMPILER;
6242 int length;
6243 unsigned int c, oc, bit;
6244 compare_context context;
6245 struct sljit_jump *jump[3];
6246 jump_list *end_list;
6247 #ifdef SUPPORT_UTF
6248 struct sljit_label *label;
6249 #ifdef SUPPORT_UCP
6250 pcre_uchar propdata[5];
6251 #endif
6252 #endif /* SUPPORT_UTF */
6253
6254 switch(type)
6255 {
6256 case OP_NOT_DIGIT:
6257 case OP_DIGIT:
6258 /* Digits are usually 0-9, so it is worth to optimize them. */
6259 if (check_str_ptr)
6260 detect_partial_match(common, backtracks);
6261 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6262 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
6263 read_char7_type(common, type == OP_NOT_DIGIT);
6264 else
6265 #endif
6266 read_char8_type(common, type == OP_NOT_DIGIT);
6267 /* Flip the starting bit in the negative case. */
6268 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
6269 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6270 return cc;
6271
6272 case OP_NOT_WHITESPACE:
6273 case OP_WHITESPACE:
6274 if (check_str_ptr)
6275 detect_partial_match(common, backtracks);
6276 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6277 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
6278 read_char7_type(common, type == OP_NOT_WHITESPACE);
6279 else
6280 #endif
6281 read_char8_type(common, type == OP_NOT_WHITESPACE);
6282 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
6283 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6284 return cc;
6285
6286 case OP_NOT_WORDCHAR:
6287 case OP_WORDCHAR:
6288 if (check_str_ptr)
6289 detect_partial_match(common, backtracks);
6290 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6291 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
6292 read_char7_type(common, type == OP_NOT_WORDCHAR);
6293 else
6294 #endif
6295 read_char8_type(common, type == OP_NOT_WORDCHAR);
6296 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
6297 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6298 return cc;
6299
6300 case OP_ANY:
6301 if (check_str_ptr)
6302 detect_partial_match(common, backtracks);
6303 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6304 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6305 {
6306 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6307 end_list = NULL;
6308 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6309 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6310 else
6311 check_str_end(common, &end_list);
6312
6313 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6314 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
6315 set_jumps(end_list, LABEL());
6316 JUMPHERE(jump[0]);
6317 }
6318 else
6319 check_newlinechar(common, common->nltype, backtracks, TRUE);
6320 return cc;
6321
6322 case OP_ALLANY:
6323 if (check_str_ptr)
6324 detect_partial_match(common, backtracks);
6325 #ifdef SUPPORT_UTF
6326 if (common->utf)
6327 {
6328 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6329 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6330 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6331 #if defined COMPILE_PCRE8
6332 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6333 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6334 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6335 #elif defined COMPILE_PCRE16
6336 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
6337 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
6338 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
6339 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6340 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6341 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6342 #endif
6343 JUMPHERE(jump[0]);
6344 #endif /* COMPILE_PCRE[8|16] */
6345 return cc;
6346 }
6347 #endif
6348 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6349 return cc;
6350
6351 case OP_ANYBYTE:
6352 if (check_str_ptr)
6353 detect_partial_match(common, backtracks);
6354 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6355 return cc;
6356
6357 #ifdef SUPPORT_UTF
6358 #ifdef SUPPORT_UCP
6359 case OP_NOTPROP:
6360 case OP_PROP:
6361 propdata[0] = XCL_HASPROP;
6362 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
6363 propdata[2] = cc[0];
6364 propdata[3] = cc[1];
6365 propdata[4] = XCL_END;
6366 if (check_str_ptr)
6367 detect_partial_match(common, backtracks);
6368 compile_xclass_matchingpath(common, propdata, backtracks);
6369 return cc + 2;
6370 #endif
6371 #endif
6372
6373 case OP_ANYNL:
6374 if (check_str_ptr)
6375 detect_partial_match(common, backtracks);
6376 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
6377 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6378 /* We don't need to handle soft partial matching case. */
6379 end_list = NULL;
6380 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6381 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6382 else
6383 check_str_end(common, &end_list);
6384 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6385 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6386 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6387 jump[2] = JUMP(SLJIT_JUMP);
6388 JUMPHERE(jump[0]);
6389 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
6390 set_jumps(end_list, LABEL());
6391 JUMPHERE(jump[1]);
6392 JUMPHERE(jump[2]);
6393 return cc;
6394
6395 case OP_NOT_HSPACE:
6396 case OP_HSPACE:
6397 if (check_str_ptr)
6398 detect_partial_match(common, backtracks);
6399 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
6400 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
6401 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6402 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6403 return cc;
6404
6405 case OP_NOT_VSPACE:
6406 case OP_VSPACE:
6407 if (check_str_ptr)
6408 detect_partial_match(common, backtracks);
6409 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
6410 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
6411 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6412 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6413 return cc;
6414
6415 #ifdef SUPPORT_UCP
6416 case OP_EXTUNI:
6417 if (check_str_ptr)
6418 detect_partial_match(common, backtracks);
6419 read_char(common);
6420 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6421 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6422 /* Optimize register allocation: use a real register. */
6423 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6424 OP1(SLJIT_MOV_U8, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6425
6426 label = LABEL();
6427 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6428 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6429 read_char(common);
6430 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6431 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6432 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6433
6434 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
6435 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
6436 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
6437 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6438 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6439 JUMPTO(SLJIT_NOT_ZERO, label);
6440
6441 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6442 JUMPHERE(jump[0]);
6443 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6444
6445 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
6446 {
6447 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6448 /* Since we successfully read a char above, partial matching must occure. */
6449 check_partial(common, TRUE);
6450 JUMPHERE(jump[0]);
6451 }
6452 return cc;
6453 #endif
6454
6455 case OP_CHAR:
6456 case OP_CHARI:
6457 length = 1;
6458 #ifdef SUPPORT_UTF
6459 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
6460 #endif
6461 if (common->mode == JIT_COMPILE && check_str_ptr
6462 && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
6463 {
6464 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6465 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6466
6467 context.length = IN_UCHARS(length);
6468 context.sourcereg = -1;
6469 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6470 context.ucharptr = 0;
6471 #endif
6472 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
6473 }
6474
6475 if (check_str_ptr)
6476 detect_partial_match(common, backtracks);
6477 #ifdef SUPPORT_UTF
6478 if (common->utf)
6479 {
6480 GETCHAR(c, cc);
6481 }
6482 else
6483 #endif
6484 c = *cc;
6485
6486 if (type == OP_CHAR || !char_has_othercase(common, cc))
6487 {
6488 read_char_range(common, c, c, FALSE);
6489 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6490 return cc + length;
6491 }
6492 oc = char_othercase(common, c);
6493 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
6494 bit = c ^ oc;
6495 if (is_powerof2(bit))
6496 {
6497 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6498 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6499 return cc + length;
6500 }
6501 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
6502 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6503 JUMPHERE(jump[0]);
6504 return cc + length;
6505
6506 case OP_NOT:
6507 case OP_NOTI:
6508 if (check_str_ptr)
6509 detect_partial_match(common, backtracks);
6510 length = 1;
6511 #ifdef SUPPORT_UTF
6512 if (common->utf)
6513 {
6514 #ifdef COMPILE_PCRE8
6515 c = *cc;
6516 if (c < 128)
6517 {
6518 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6519 if (type == OP_NOT || !char_has_othercase(common, cc))
6520 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6521 else
6522 {
6523 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
6524 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
6525 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
6526 }
6527 /* Skip the variable-length character. */
6528 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6529 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6530 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6531 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6532 JUMPHERE(jump[0]);
6533 return cc + 1;
6534 }
6535 else
6536 #endif /* COMPILE_PCRE8 */
6537 {
6538 GETCHARLEN(c, cc, length);
6539 }
6540 }
6541 else
6542 #endif /* SUPPORT_UTF */
6543 c = *cc;
6544
6545 if (type == OP_NOT || !char_has_othercase(common, cc))
6546 {
6547 read_char_range(common, c, c, TRUE);
6548 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6549 }
6550 else
6551 {
6552 oc = char_othercase(common, c);
6553 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
6554 bit = c ^ oc;
6555 if (is_powerof2(bit))
6556 {
6557 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6558 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6559 }
6560 else
6561 {
6562 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6563 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6564 }
6565 }
6566 return cc + length;
6567
6568 case OP_CLASS:
6569 case OP_NCLASS:
6570 if (check_str_ptr)
6571 detect_partial_match(common, backtracks);
6572
6573 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6574 bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
6575 read_char_range(common, 0, bit, type == OP_NCLASS);
6576 #else
6577 read_char_range(common, 0, 255, type == OP_NCLASS);
6578 #endif
6579
6580 if (check_class_ranges(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
6581 return cc + 32 / sizeof(pcre_uchar);
6582
6583 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6584 jump[0] = NULL;
6585 if (common->utf)
6586 {
6587 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
6588 if (type == OP_CLASS)
6589 {
6590 add_jump(compiler, backtracks, jump[0]);
6591 jump[0] = NULL;
6592 }
6593 }
6594 #elif !defined COMPILE_PCRE8
6595 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6596 if (type == OP_CLASS)
6597 {
6598 add_jump(compiler, backtracks, jump[0]);
6599 jump[0] = NULL;
6600 }
6601 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
6602
6603 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6604 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6605 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
6606 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6607 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6608 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6609
6610 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6611 if (jump[0] != NULL)
6612 JUMPHERE(jump[0]);
6613 #endif
6614 return cc + 32 / sizeof(pcre_uchar);
6615
6616 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6617 case OP_XCLASS:
6618 if (check_str_ptr)
6619 detect_partial_match(common, backtracks);
6620 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
6621 return cc + GET(cc, 0) - 1;
6622 #endif
6623 }
6624 SLJIT_UNREACHABLE();
6625 return cc;
6626 }
6627
compile_charn_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,jump_list ** backtracks)6628 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
6629 {
6630 /* This function consumes at least one input character. */
6631 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
6632 DEFINE_COMPILER;
6633 pcre_uchar *ccbegin = cc;
6634 compare_context context;
6635 int size;
6636
6637 context.length = 0;
6638 do
6639 {
6640 if (cc >= ccend)
6641 break;
6642
6643 if (*cc == OP_CHAR)
6644 {
6645 size = 1;
6646 #ifdef SUPPORT_UTF
6647 if (common->utf && HAS_EXTRALEN(cc[1]))
6648 size += GET_EXTRALEN(cc[1]);
6649 #endif
6650 }
6651 else if (*cc == OP_CHARI)
6652 {
6653 size = 1;
6654 #ifdef SUPPORT_UTF
6655 if (common->utf)
6656 {
6657 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6658 size = 0;
6659 else if (HAS_EXTRALEN(cc[1]))
6660 size += GET_EXTRALEN(cc[1]);
6661 }
6662 else
6663 #endif
6664 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6665 size = 0;
6666 }
6667 else
6668 size = 0;
6669
6670 cc += 1 + size;
6671 context.length += IN_UCHARS(size);
6672 }
6673 while (size > 0 && context.length <= 128);
6674
6675 cc = ccbegin;
6676 if (context.length > 0)
6677 {
6678 /* We have a fixed-length byte sequence. */
6679 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
6680 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6681
6682 context.sourcereg = -1;
6683 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6684 context.ucharptr = 0;
6685 #endif
6686 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
6687 return cc;
6688 }
6689
6690 /* A non-fixed length character will be checked if length == 0. */
6691 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
6692 }
6693
6694 /* Forward definitions. */
6695 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
6696 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
6697
6698 #define PUSH_BACKTRACK(size, ccstart, error) \
6699 do \
6700 { \
6701 backtrack = sljit_alloc_memory(compiler, (size)); \
6702 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6703 return error; \
6704 memset(backtrack, 0, size); \
6705 backtrack->prev = parent->top; \
6706 backtrack->cc = (ccstart); \
6707 parent->top = backtrack; \
6708 } \
6709 while (0)
6710
6711 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
6712 do \
6713 { \
6714 backtrack = sljit_alloc_memory(compiler, (size)); \
6715 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6716 return; \
6717 memset(backtrack, 0, size); \
6718 backtrack->prev = parent->top; \
6719 backtrack->cc = (ccstart); \
6720 parent->top = backtrack; \
6721 } \
6722 while (0)
6723
6724 #define BACKTRACK_AS(type) ((type *)backtrack)
6725
compile_dnref_search(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks)6726 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
6727 {
6728 /* The OVECTOR offset goes to TMP2. */
6729 DEFINE_COMPILER;
6730 int count = GET2(cc, 1 + IMM2_SIZE);
6731 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
6732 unsigned int offset;
6733 jump_list *found = NULL;
6734
6735 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
6736
6737 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
6738
6739 count--;
6740 while (count-- > 0)
6741 {
6742 offset = GET2(slot, 0) << 1;
6743 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6744 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6745 slot += common->name_entry_size;
6746 }
6747
6748 offset = GET2(slot, 0) << 1;
6749 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6750 if (backtracks != NULL && !common->jscript_compat)
6751 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6752
6753 set_jumps(found, LABEL());
6754 }
6755
compile_ref_matchingpath(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)6756 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
6757 {
6758 DEFINE_COMPILER;
6759 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6760 int offset = 0;
6761 struct sljit_jump *jump = NULL;
6762 struct sljit_jump *partial;
6763 struct sljit_jump *nopartial;
6764
6765 if (ref)
6766 {
6767 offset = GET2(cc, 1) << 1;
6768 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6769 /* OVECTOR(1) contains the "string begin - 1" constant. */
6770 if (withchecks && !common->jscript_compat)
6771 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6772 }
6773 else
6774 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6775
6776 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6777 if (common->utf && *cc == OP_REFI)
6778 {
6779 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
6780 if (ref)
6781 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6782 else
6783 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6784
6785 if (withchecks)
6786 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6787
6788 /* Needed to save important temporary registers. */
6789 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6790 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6791 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
6792 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6793 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6794 if (common->mode == JIT_COMPILE)
6795 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6796 else
6797 {
6798 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6799 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6800 check_partial(common, FALSE);
6801 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6802 JUMPHERE(nopartial);
6803 }
6804 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6805 }
6806 else
6807 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6808 {
6809 if (ref)
6810 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6811 else
6812 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6813
6814 if (withchecks)
6815 jump = JUMP(SLJIT_ZERO);
6816
6817 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6818 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6819 if (common->mode == JIT_COMPILE)
6820 add_jump(compiler, backtracks, partial);
6821
6822 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6823 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6824
6825 if (common->mode != JIT_COMPILE)
6826 {
6827 nopartial = JUMP(SLJIT_JUMP);
6828 JUMPHERE(partial);
6829 /* TMP2 -= STR_END - STR_PTR */
6830 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6831 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6832 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6833 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6834 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6835 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6836 JUMPHERE(partial);
6837 check_partial(common, FALSE);
6838 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6839 JUMPHERE(nopartial);
6840 }
6841 }
6842
6843 if (jump != NULL)
6844 {
6845 if (emptyfail)
6846 add_jump(compiler, backtracks, jump);
6847 else
6848 JUMPHERE(jump);
6849 }
6850 }
6851
compile_ref_iterator_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6852 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6853 {
6854 DEFINE_COMPILER;
6855 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6856 backtrack_common *backtrack;
6857 pcre_uchar type;
6858 int offset = 0;
6859 struct sljit_label *label;
6860 struct sljit_jump *zerolength;
6861 struct sljit_jump *jump = NULL;
6862 pcre_uchar *ccbegin = cc;
6863 int min = 0, max = 0;
6864 BOOL minimize;
6865
6866 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
6867
6868 if (ref)
6869 offset = GET2(cc, 1) << 1;
6870 else
6871 cc += IMM2_SIZE;
6872 type = cc[1 + IMM2_SIZE];
6873
6874 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6875 minimize = (type & 0x1) != 0;
6876 switch(type)
6877 {
6878 case OP_CRSTAR:
6879 case OP_CRMINSTAR:
6880 min = 0;
6881 max = 0;
6882 cc += 1 + IMM2_SIZE + 1;
6883 break;
6884 case OP_CRPLUS:
6885 case OP_CRMINPLUS:
6886 min = 1;
6887 max = 0;
6888 cc += 1 + IMM2_SIZE + 1;
6889 break;
6890 case OP_CRQUERY:
6891 case OP_CRMINQUERY:
6892 min = 0;
6893 max = 1;
6894 cc += 1 + IMM2_SIZE + 1;
6895 break;
6896 case OP_CRRANGE:
6897 case OP_CRMINRANGE:
6898 min = GET2(cc, 1 + IMM2_SIZE + 1);
6899 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6900 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6901 break;
6902 default:
6903 SLJIT_UNREACHABLE();
6904 break;
6905 }
6906
6907 if (!minimize)
6908 {
6909 if (min == 0)
6910 {
6911 allocate_stack(common, 2);
6912 if (ref)
6913 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6914 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6915 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6916 /* Temporary release of STR_PTR. */
6917 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6918 /* Handles both invalid and empty cases. Since the minimum repeat,
6919 is zero the invalid case is basically the same as an empty case. */
6920 if (ref)
6921 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6922 else
6923 {
6924 compile_dnref_search(common, ccbegin, NULL);
6925 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6926 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6927 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6928 }
6929 /* Restore if not zero length. */
6930 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6931 }
6932 else
6933 {
6934 allocate_stack(common, 1);
6935 if (ref)
6936 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6937 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6938 if (ref)
6939 {
6940 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6941 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6942 }
6943 else
6944 {
6945 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6946 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6947 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6948 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6949 }
6950 }
6951
6952 if (min > 1 || max > 1)
6953 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6954
6955 label = LABEL();
6956 if (!ref)
6957 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6958 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6959
6960 if (min > 1 || max > 1)
6961 {
6962 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6963 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6964 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6965 if (min > 1)
6966 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6967 if (max > 1)
6968 {
6969 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6970 allocate_stack(common, 1);
6971 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6972 JUMPTO(SLJIT_JUMP, label);
6973 JUMPHERE(jump);
6974 }
6975 }
6976
6977 if (max == 0)
6978 {
6979 /* Includes min > 1 case as well. */
6980 allocate_stack(common, 1);
6981 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6982 JUMPTO(SLJIT_JUMP, label);
6983 }
6984
6985 JUMPHERE(zerolength);
6986 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
6987
6988 count_match(common);
6989 return cc;
6990 }
6991
6992 allocate_stack(common, ref ? 2 : 3);
6993 if (ref)
6994 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6995 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6996 if (type != OP_CRMINSTAR)
6997 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6998
6999 if (min == 0)
7000 {
7001 /* Handles both invalid and empty cases. Since the minimum repeat,
7002 is zero the invalid case is basically the same as an empty case. */
7003 if (ref)
7004 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7005 else
7006 {
7007 compile_dnref_search(common, ccbegin, NULL);
7008 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7009 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
7010 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7011 }
7012 /* Length is non-zero, we can match real repeats. */
7013 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7014 jump = JUMP(SLJIT_JUMP);
7015 }
7016 else
7017 {
7018 if (ref)
7019 {
7020 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7021 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7022 }
7023 else
7024 {
7025 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
7026 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7027 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
7028 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7029 }
7030 }
7031
7032 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
7033 if (max > 0)
7034 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
7035
7036 if (!ref)
7037 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
7038 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
7039 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7040
7041 if (min > 1)
7042 {
7043 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7044 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7045 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7046 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
7047 }
7048 else if (max > 0)
7049 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
7050
7051 if (jump != NULL)
7052 JUMPHERE(jump);
7053 JUMPHERE(zerolength);
7054
7055 count_match(common);
7056 return cc;
7057 }
7058
compile_recurse_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7059 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7060 {
7061 DEFINE_COMPILER;
7062 backtrack_common *backtrack;
7063 recurse_entry *entry = common->entries;
7064 recurse_entry *prev = NULL;
7065 sljit_sw start = GET(cc, 1);
7066 pcre_uchar *start_cc;
7067 BOOL needs_control_head;
7068
7069 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
7070
7071 /* Inlining simple patterns. */
7072 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
7073 {
7074 start_cc = common->start + start;
7075 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
7076 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
7077 return cc + 1 + LINK_SIZE;
7078 }
7079
7080 while (entry != NULL)
7081 {
7082 if (entry->start == start)
7083 break;
7084 prev = entry;
7085 entry = entry->next;
7086 }
7087
7088 if (entry == NULL)
7089 {
7090 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
7091 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7092 return NULL;
7093 entry->next = NULL;
7094 entry->entry = NULL;
7095 entry->calls = NULL;
7096 entry->start = start;
7097
7098 if (prev != NULL)
7099 prev->next = entry;
7100 else
7101 common->entries = entry;
7102 }
7103
7104 if (common->has_set_som && common->mark_ptr != 0)
7105 {
7106 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
7107 allocate_stack(common, 2);
7108 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
7109 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7110 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7111 }
7112 else if (common->has_set_som || common->mark_ptr != 0)
7113 {
7114 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
7115 allocate_stack(common, 1);
7116 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7117 }
7118
7119 if (entry->entry == NULL)
7120 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
7121 else
7122 JUMPTO(SLJIT_FAST_CALL, entry->entry);
7123 /* Leave if the match is failed. */
7124 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
7125 return cc + 1 + LINK_SIZE;
7126 }
7127
do_callout(struct jit_arguments * arguments,PUBL (callout_block)* callout_block,pcre_uchar ** jit_ovector)7128 static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
7129 {
7130 const pcre_uchar *begin = arguments->begin;
7131 int *offset_vector = arguments->offsets;
7132 int offset_count = arguments->offset_count;
7133 int i;
7134
7135 if (PUBL(callout) == NULL)
7136 return 0;
7137
7138 callout_block->version = 2;
7139 callout_block->callout_data = arguments->callout_data;
7140
7141 /* Offsets in subject. */
7142 callout_block->subject_length = arguments->end - arguments->begin;
7143 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
7144 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
7145 #if defined COMPILE_PCRE8
7146 callout_block->subject = (PCRE_SPTR)begin;
7147 #elif defined COMPILE_PCRE16
7148 callout_block->subject = (PCRE_SPTR16)begin;
7149 #elif defined COMPILE_PCRE32
7150 callout_block->subject = (PCRE_SPTR32)begin;
7151 #endif
7152
7153 /* Convert and copy the JIT offset vector to the offset_vector array. */
7154 callout_block->capture_top = 0;
7155 callout_block->offset_vector = offset_vector;
7156 for (i = 2; i < offset_count; i += 2)
7157 {
7158 offset_vector[i] = jit_ovector[i] - begin;
7159 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
7160 if (jit_ovector[i] >= begin)
7161 callout_block->capture_top = i;
7162 }
7163
7164 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
7165 if (offset_count > 0)
7166 offset_vector[0] = -1;
7167 if (offset_count > 1)
7168 offset_vector[1] = -1;
7169 return (*PUBL(callout))(callout_block);
7170 }
7171
7172 /* Aligning to 8 byte. */
7173 #define CALLOUT_ARG_SIZE \
7174 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
7175
7176 #define CALLOUT_ARG_OFFSET(arg) \
7177 SLJIT_OFFSETOF(PUBL(callout_block), arg)
7178
compile_callout_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7179 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7180 {
7181 DEFINE_COMPILER;
7182 backtrack_common *backtrack;
7183
7184 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
7185
7186 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
7187
7188 SLJIT_ASSERT(common->capture_last_ptr != 0);
7189 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7190 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7191 OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
7192 OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
7193
7194 /* These pointer sized fields temporarly stores internal variables. */
7195 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
7196 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
7197 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
7198
7199 if (common->mark_ptr != 0)
7200 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
7201 OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
7202 OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
7203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
7204
7205 /* Needed to save important temporary registers. */
7206 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
7207 /* SLJIT_R0 = arguments */
7208 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
7209 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
7210 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
7211 OP1(SLJIT_MOV_S32, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
7212 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7213 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
7214
7215 /* Check return value. */
7216 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
7217 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
7218 if (common->forced_quit_label == NULL)
7219 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
7220 else
7221 JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->forced_quit_label);
7222 return cc + 2 + 2 * LINK_SIZE;
7223 }
7224
7225 #undef CALLOUT_ARG_SIZE
7226 #undef CALLOUT_ARG_OFFSET
7227
assert_needs_str_ptr_saving(pcre_uchar * cc)7228 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(pcre_uchar *cc)
7229 {
7230 while (TRUE)
7231 {
7232 switch (*cc)
7233 {
7234 case OP_NOT_WORD_BOUNDARY:
7235 case OP_WORD_BOUNDARY:
7236 case OP_CIRC:
7237 case OP_CIRCM:
7238 case OP_DOLL:
7239 case OP_DOLLM:
7240 case OP_CALLOUT:
7241 case OP_ALT:
7242 cc += PRIV(OP_lengths)[*cc];
7243 break;
7244
7245 case OP_KET:
7246 return FALSE;
7247
7248 default:
7249 return TRUE;
7250 }
7251 }
7252 }
7253
compile_assert_matchingpath(compiler_common * common,pcre_uchar * cc,assert_backtrack * backtrack,BOOL conditional)7254 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
7255 {
7256 DEFINE_COMPILER;
7257 int framesize;
7258 int extrasize;
7259 BOOL needs_control_head;
7260 int private_data_ptr;
7261 backtrack_common altbacktrack;
7262 pcre_uchar *ccbegin;
7263 pcre_uchar opcode;
7264 pcre_uchar bra = OP_BRA;
7265 jump_list *tmp = NULL;
7266 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
7267 jump_list **found;
7268 /* Saving previous accept variables. */
7269 BOOL save_local_exit = common->local_exit;
7270 BOOL save_positive_assert = common->positive_assert;
7271 then_trap_backtrack *save_then_trap = common->then_trap;
7272 struct sljit_label *save_quit_label = common->quit_label;
7273 struct sljit_label *save_accept_label = common->accept_label;
7274 jump_list *save_quit = common->quit;
7275 jump_list *save_positive_assert_quit = common->positive_assert_quit;
7276 jump_list *save_accept = common->accept;
7277 struct sljit_jump *jump;
7278 struct sljit_jump *brajump = NULL;
7279
7280 /* Assert captures then. */
7281 common->then_trap = NULL;
7282
7283 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7284 {
7285 SLJIT_ASSERT(!conditional);
7286 bra = *cc;
7287 cc++;
7288 }
7289 private_data_ptr = PRIVATE_DATA(cc);
7290 SLJIT_ASSERT(private_data_ptr != 0);
7291 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
7292 backtrack->framesize = framesize;
7293 backtrack->private_data_ptr = private_data_ptr;
7294 opcode = *cc;
7295 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
7296 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
7297 ccbegin = cc;
7298 cc += GET(cc, 1);
7299
7300 if (bra == OP_BRAMINZERO)
7301 {
7302 /* This is a braminzero backtrack path. */
7303 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7304 free_stack(common, 1);
7305 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7306 }
7307
7308 if (framesize < 0)
7309 {
7310 extrasize = 1;
7311 if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
7312 extrasize = 0;
7313
7314 if (needs_control_head)
7315 extrasize++;
7316
7317 if (framesize == no_frame)
7318 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7319
7320 if (extrasize > 0)
7321 allocate_stack(common, extrasize);
7322
7323 if (needs_control_head)
7324 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7325
7326 if (extrasize > 0)
7327 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7328
7329 if (needs_control_head)
7330 {
7331 SLJIT_ASSERT(extrasize == 2);
7332 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
7333 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7334 }
7335 }
7336 else
7337 {
7338 extrasize = needs_control_head ? 3 : 2;
7339 allocate_stack(common, framesize + extrasize);
7340
7341 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7342 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
7343 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7344 if (needs_control_head)
7345 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7346 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7347
7348 if (needs_control_head)
7349 {
7350 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
7351 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7352 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
7353 }
7354 else
7355 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7356
7357 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
7358 }
7359
7360 memset(&altbacktrack, 0, sizeof(backtrack_common));
7361 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7362 {
7363 /* Negative assert is stronger than positive assert. */
7364 common->local_exit = TRUE;
7365 common->quit_label = NULL;
7366 common->quit = NULL;
7367 common->positive_assert = FALSE;
7368 }
7369 else
7370 common->positive_assert = TRUE;
7371 common->positive_assert_quit = NULL;
7372
7373 while (1)
7374 {
7375 common->accept_label = NULL;
7376 common->accept = NULL;
7377 altbacktrack.top = NULL;
7378 altbacktrack.topbacktracks = NULL;
7379
7380 if (*ccbegin == OP_ALT && extrasize > 0)
7381 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7382
7383 altbacktrack.cc = ccbegin;
7384 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
7385 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7386 {
7387 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7388 {
7389 common->local_exit = save_local_exit;
7390 common->quit_label = save_quit_label;
7391 common->quit = save_quit;
7392 }
7393 common->positive_assert = save_positive_assert;
7394 common->then_trap = save_then_trap;
7395 common->accept_label = save_accept_label;
7396 common->positive_assert_quit = save_positive_assert_quit;
7397 common->accept = save_accept;
7398 return NULL;
7399 }
7400 common->accept_label = LABEL();
7401 if (common->accept != NULL)
7402 set_jumps(common->accept, common->accept_label);
7403
7404 /* Reset stack. */
7405 if (framesize < 0)
7406 {
7407 if (framesize == no_frame)
7408 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7409 else if (extrasize > 0)
7410 free_stack(common, extrasize);
7411
7412 if (needs_control_head)
7413 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
7414 }
7415 else
7416 {
7417 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
7418 {
7419 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7420 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
7421 if (needs_control_head)
7422 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
7423 }
7424 else
7425 {
7426 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7427 if (needs_control_head)
7428 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
7429 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7430 }
7431 }
7432
7433 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7434 {
7435 /* We know that STR_PTR was stored on the top of the stack. */
7436 if (conditional)
7437 {
7438 if (extrasize > 0)
7439 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
7440 }
7441 else if (bra == OP_BRAZERO)
7442 {
7443 if (framesize < 0)
7444 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
7445 else
7446 {
7447 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
7448 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
7449 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7450 }
7451 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7452 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7453 }
7454 else if (framesize >= 0)
7455 {
7456 /* For OP_BRA and OP_BRAMINZERO. */
7457 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
7458 }
7459 }
7460 add_jump(compiler, found, JUMP(SLJIT_JUMP));
7461
7462 compile_backtrackingpath(common, altbacktrack.top);
7463 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7464 {
7465 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7466 {
7467 common->local_exit = save_local_exit;
7468 common->quit_label = save_quit_label;
7469 common->quit = save_quit;
7470 }
7471 common->positive_assert = save_positive_assert;
7472 common->then_trap = save_then_trap;
7473 common->accept_label = save_accept_label;
7474 common->positive_assert_quit = save_positive_assert_quit;
7475 common->accept = save_accept;
7476 return NULL;
7477 }
7478 set_jumps(altbacktrack.topbacktracks, LABEL());
7479
7480 if (*cc != OP_ALT)
7481 break;
7482
7483 ccbegin = cc;
7484 cc += GET(cc, 1);
7485 }
7486
7487 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7488 {
7489 SLJIT_ASSERT(common->positive_assert_quit == NULL);
7490 /* Makes the check less complicated below. */
7491 common->positive_assert_quit = common->quit;
7492 }
7493
7494 /* None of them matched. */
7495 if (common->positive_assert_quit != NULL)
7496 {
7497 jump = JUMP(SLJIT_JUMP);
7498 set_jumps(common->positive_assert_quit, LABEL());
7499 SLJIT_ASSERT(framesize != no_stack);
7500 if (framesize < 0)
7501 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
7502 else
7503 {
7504 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7505 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7506 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
7507 }
7508 JUMPHERE(jump);
7509 }
7510
7511 if (needs_control_head)
7512 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
7513
7514 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
7515 {
7516 /* Assert is failed. */
7517 if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
7518 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7519
7520 if (framesize < 0)
7521 {
7522 /* The topmost item should be 0. */
7523 if (bra == OP_BRAZERO)
7524 {
7525 if (extrasize == 2)
7526 free_stack(common, 1);
7527 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7528 }
7529 else if (extrasize > 0)
7530 free_stack(common, extrasize);
7531 }
7532 else
7533 {
7534 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
7535 /* The topmost item should be 0. */
7536 if (bra == OP_BRAZERO)
7537 {
7538 free_stack(common, framesize + extrasize - 1);
7539 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7540 }
7541 else
7542 free_stack(common, framesize + extrasize);
7543 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7544 }
7545 jump = JUMP(SLJIT_JUMP);
7546 if (bra != OP_BRAZERO)
7547 add_jump(compiler, target, jump);
7548
7549 /* Assert is successful. */
7550 set_jumps(tmp, LABEL());
7551 if (framesize < 0)
7552 {
7553 /* We know that STR_PTR was stored on the top of the stack. */
7554 if (extrasize > 0)
7555 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
7556
7557 /* Keep the STR_PTR on the top of the stack. */
7558 if (bra == OP_BRAZERO)
7559 {
7560 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7561 if (extrasize == 2)
7562 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7563 }
7564 else if (bra == OP_BRAMINZERO)
7565 {
7566 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7567 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7568 }
7569 }
7570 else
7571 {
7572 if (bra == OP_BRA)
7573 {
7574 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7575 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
7576 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
7577 }
7578 else
7579 {
7580 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7581 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
7582 if (extrasize == 2)
7583 {
7584 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7585 if (bra == OP_BRAMINZERO)
7586 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7587 }
7588 else
7589 {
7590 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
7591 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
7592 }
7593 }
7594 }
7595
7596 if (bra == OP_BRAZERO)
7597 {
7598 backtrack->matchingpath = LABEL();
7599 SET_LABEL(jump, backtrack->matchingpath);
7600 }
7601 else if (bra == OP_BRAMINZERO)
7602 {
7603 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
7604 JUMPHERE(brajump);
7605 if (framesize >= 0)
7606 {
7607 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7608 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7609 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
7610 }
7611 set_jumps(backtrack->common.topbacktracks, LABEL());
7612 }
7613 }
7614 else
7615 {
7616 /* AssertNot is successful. */
7617 if (framesize < 0)
7618 {
7619 if (extrasize > 0)
7620 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7621
7622 if (bra != OP_BRA)
7623 {
7624 if (extrasize == 2)
7625 free_stack(common, 1);
7626 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7627 }
7628 else if (extrasize > 0)
7629 free_stack(common, extrasize);
7630 }
7631 else
7632 {
7633 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7634 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
7635 /* The topmost item should be 0. */
7636 if (bra != OP_BRA)
7637 {
7638 free_stack(common, framesize + extrasize - 1);
7639 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7640 }
7641 else
7642 free_stack(common, framesize + extrasize);
7643 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7644 }
7645
7646 if (bra == OP_BRAZERO)
7647 backtrack->matchingpath = LABEL();
7648 else if (bra == OP_BRAMINZERO)
7649 {
7650 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
7651 JUMPHERE(brajump);
7652 }
7653
7654 if (bra != OP_BRA)
7655 {
7656 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
7657 set_jumps(backtrack->common.topbacktracks, LABEL());
7658 backtrack->common.topbacktracks = NULL;
7659 }
7660 }
7661
7662 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7663 {
7664 common->local_exit = save_local_exit;
7665 common->quit_label = save_quit_label;
7666 common->quit = save_quit;
7667 }
7668 common->positive_assert = save_positive_assert;
7669 common->then_trap = save_then_trap;
7670 common->accept_label = save_accept_label;
7671 common->positive_assert_quit = save_positive_assert_quit;
7672 common->accept = save_accept;
7673 return cc + 1 + LINK_SIZE;
7674 }
7675
match_once_common(compiler_common * common,pcre_uchar ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)7676 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
7677 {
7678 DEFINE_COMPILER;
7679 int stacksize;
7680
7681 if (framesize < 0)
7682 {
7683 if (framesize == no_frame)
7684 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7685 else
7686 {
7687 stacksize = needs_control_head ? 1 : 0;
7688 if (ket != OP_KET || has_alternatives)
7689 stacksize++;
7690
7691 if (stacksize > 0)
7692 free_stack(common, stacksize);
7693 }
7694
7695 if (needs_control_head)
7696 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
7697
7698 /* TMP2 which is set here used by OP_KETRMAX below. */
7699 if (ket == OP_KETRMAX)
7700 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
7701 else if (ket == OP_KETRMIN)
7702 {
7703 /* Move the STR_PTR to the private_data_ptr. */
7704 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
7705 }
7706 }
7707 else
7708 {
7709 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
7710 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
7711 if (needs_control_head)
7712 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
7713
7714 if (ket == OP_KETRMAX)
7715 {
7716 /* TMP2 which is set here used by OP_KETRMAX below. */
7717 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7718 }
7719 }
7720 if (needs_control_head)
7721 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
7722 }
7723
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)7724 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
7725 {
7726 DEFINE_COMPILER;
7727
7728 if (common->capture_last_ptr != 0)
7729 {
7730 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7731 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7732 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7733 stacksize++;
7734 }
7735 if (common->optimized_cbracket[offset >> 1] == 0)
7736 {
7737 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7738 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7739 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7740 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7741 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
7742 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7743 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7744 stacksize += 2;
7745 }
7746 return stacksize;
7747 }
7748
7749 /*
7750 Handling bracketed expressions is probably the most complex part.
7751
7752 Stack layout naming characters:
7753 S - Push the current STR_PTR
7754 0 - Push a 0 (NULL)
7755 A - Push the current STR_PTR. Needed for restoring the STR_PTR
7756 before the next alternative. Not pushed if there are no alternatives.
7757 M - Any values pushed by the current alternative. Can be empty, or anything.
7758 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
7759 L - Push the previous local (pointed by localptr) to the stack
7760 () - opional values stored on the stack
7761 ()* - optonal, can be stored multiple times
7762
7763 The following list shows the regular expression templates, their PCRE byte codes
7764 and stack layout supported by pcre-sljit.
7765
7766 (?:) OP_BRA | OP_KET A M
7767 () OP_CBRA | OP_KET C M
7768 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
7769 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
7770 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
7771 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
7772 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
7773 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
7774 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
7775 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
7776 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
7777 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
7778 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
7779 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
7780 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
7781 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
7782 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
7783 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
7784 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
7785 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
7786 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
7787 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
7788
7789
7790 Stack layout naming characters:
7791 A - Push the alternative index (starting from 0) on the stack.
7792 Not pushed if there is no alternatives.
7793 M - Any values pushed by the current alternative. Can be empty, or anything.
7794
7795 The next list shows the possible content of a bracket:
7796 (|) OP_*BRA | OP_ALT ... M A
7797 (?()|) OP_*COND | OP_ALT M A
7798 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
7799 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
7800 Or nothing, if trace is unnecessary
7801 */
7802
compile_bracket_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7803 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7804 {
7805 DEFINE_COMPILER;
7806 backtrack_common *backtrack;
7807 pcre_uchar opcode;
7808 int private_data_ptr = 0;
7809 int offset = 0;
7810 int i, stacksize;
7811 int repeat_ptr = 0, repeat_length = 0;
7812 int repeat_type = 0, repeat_count = 0;
7813 pcre_uchar *ccbegin;
7814 pcre_uchar *matchingpath;
7815 pcre_uchar *slot;
7816 pcre_uchar bra = OP_BRA;
7817 pcre_uchar ket;
7818 assert_backtrack *assert;
7819 BOOL has_alternatives;
7820 BOOL needs_control_head = FALSE;
7821 struct sljit_jump *jump;
7822 struct sljit_jump *skip;
7823 struct sljit_label *rmax_label = NULL;
7824 struct sljit_jump *braminzero = NULL;
7825
7826 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
7827
7828 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7829 {
7830 bra = *cc;
7831 cc++;
7832 opcode = *cc;
7833 }
7834
7835 opcode = *cc;
7836 ccbegin = cc;
7837 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
7838 ket = *matchingpath;
7839 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
7840 {
7841 repeat_ptr = PRIVATE_DATA(matchingpath);
7842 repeat_length = PRIVATE_DATA(matchingpath + 1);
7843 repeat_type = PRIVATE_DATA(matchingpath + 2);
7844 repeat_count = PRIVATE_DATA(matchingpath + 3);
7845 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
7846 if (repeat_type == OP_UPTO)
7847 ket = OP_KETRMAX;
7848 if (repeat_type == OP_MINUPTO)
7849 ket = OP_KETRMIN;
7850 }
7851
7852 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
7853 {
7854 /* Drop this bracket_backtrack. */
7855 parent->top = backtrack->prev;
7856 return matchingpath + 1 + LINK_SIZE + repeat_length;
7857 }
7858
7859 matchingpath = ccbegin + 1 + LINK_SIZE;
7860 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
7861 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
7862 cc += GET(cc, 1);
7863
7864 has_alternatives = *cc == OP_ALT;
7865 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
7866 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) ? FALSE : TRUE;
7867
7868 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
7869 opcode = OP_SCOND;
7870 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
7871 opcode = OP_ONCE;
7872
7873 if (opcode == OP_CBRA || opcode == OP_SCBRA)
7874 {
7875 /* Capturing brackets has a pre-allocated space. */
7876 offset = GET2(ccbegin, 1 + LINK_SIZE);
7877 if (common->optimized_cbracket[offset] == 0)
7878 {
7879 private_data_ptr = OVECTOR_PRIV(offset);
7880 offset <<= 1;
7881 }
7882 else
7883 {
7884 offset <<= 1;
7885 private_data_ptr = OVECTOR(offset);
7886 }
7887 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7888 matchingpath += IMM2_SIZE;
7889 }
7890 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
7891 {
7892 /* Other brackets simply allocate the next entry. */
7893 private_data_ptr = PRIVATE_DATA(ccbegin);
7894 SLJIT_ASSERT(private_data_ptr != 0);
7895 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7896 if (opcode == OP_ONCE)
7897 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
7898 }
7899
7900 /* Instructions before the first alternative. */
7901 stacksize = 0;
7902 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7903 stacksize++;
7904 if (bra == OP_BRAZERO)
7905 stacksize++;
7906
7907 if (stacksize > 0)
7908 allocate_stack(common, stacksize);
7909
7910 stacksize = 0;
7911 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7912 {
7913 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7914 stacksize++;
7915 }
7916
7917 if (bra == OP_BRAZERO)
7918 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7919
7920 if (bra == OP_BRAMINZERO)
7921 {
7922 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
7923 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7924 if (ket != OP_KETRMIN)
7925 {
7926 free_stack(common, 1);
7927 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7928 }
7929 else
7930 {
7931 if (opcode == OP_ONCE || opcode >= OP_SBRA)
7932 {
7933 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7934 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7935 /* Nothing stored during the first run. */
7936 skip = JUMP(SLJIT_JUMP);
7937 JUMPHERE(jump);
7938 /* Checking zero-length iteration. */
7939 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7940 {
7941 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
7942 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7943 }
7944 else
7945 {
7946 /* Except when the whole stack frame must be saved. */
7947 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7948 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
7949 }
7950 JUMPHERE(skip);
7951 }
7952 else
7953 {
7954 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7955 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7956 JUMPHERE(jump);
7957 }
7958 }
7959 }
7960
7961 if (repeat_type != 0)
7962 {
7963 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
7964 if (repeat_type == OP_EXACT)
7965 rmax_label = LABEL();
7966 }
7967
7968 if (ket == OP_KETRMIN)
7969 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
7970
7971 if (ket == OP_KETRMAX)
7972 {
7973 rmax_label = LABEL();
7974 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
7975 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
7976 }
7977
7978 /* Handling capturing brackets and alternatives. */
7979 if (opcode == OP_ONCE)
7980 {
7981 stacksize = 0;
7982 if (needs_control_head)
7983 {
7984 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7985 stacksize++;
7986 }
7987
7988 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7989 {
7990 /* Neither capturing brackets nor recursions are found in the block. */
7991 if (ket == OP_KETRMIN)
7992 {
7993 stacksize += 2;
7994 if (!needs_control_head)
7995 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7996 }
7997 else
7998 {
7999 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
8000 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8001 if (ket == OP_KETRMAX || has_alternatives)
8002 stacksize++;
8003 }
8004
8005 if (stacksize > 0)
8006 allocate_stack(common, stacksize);
8007
8008 stacksize = 0;
8009 if (needs_control_head)
8010 {
8011 stacksize++;
8012 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8013 }
8014
8015 if (ket == OP_KETRMIN)
8016 {
8017 if (needs_control_head)
8018 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8019 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8020 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
8021 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
8022 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
8023 }
8024 else if (ket == OP_KETRMAX || has_alternatives)
8025 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8026 }
8027 else
8028 {
8029 if (ket != OP_KET || has_alternatives)
8030 stacksize++;
8031
8032 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
8033 allocate_stack(common, stacksize);
8034
8035 if (needs_control_head)
8036 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8037
8038 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8039 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8040
8041 stacksize = needs_control_head ? 1 : 0;
8042 if (ket != OP_KET || has_alternatives)
8043 {
8044 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8045 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8046 stacksize++;
8047 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8048 }
8049 else
8050 {
8051 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8052 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8053 }
8054 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
8055 }
8056 }
8057 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
8058 {
8059 /* Saving the previous values. */
8060 if (common->optimized_cbracket[offset >> 1] != 0)
8061 {
8062 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
8063 allocate_stack(common, 2);
8064 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8065 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
8066 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8067 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8068 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
8069 }
8070 else
8071 {
8072 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8073 allocate_stack(common, 1);
8074 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8075 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8076 }
8077 }
8078 else if (opcode == OP_SBRA || opcode == OP_SCOND)
8079 {
8080 /* Saving the previous value. */
8081 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8082 allocate_stack(common, 1);
8083 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8084 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8085 }
8086 else if (has_alternatives)
8087 {
8088 /* Pushing the starting string pointer. */
8089 allocate_stack(common, 1);
8090 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8091 }
8092
8093 /* Generating code for the first alternative. */
8094 if (opcode == OP_COND || opcode == OP_SCOND)
8095 {
8096 if (*matchingpath == OP_CREF)
8097 {
8098 SLJIT_ASSERT(has_alternatives);
8099 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
8100 CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8101 matchingpath += 1 + IMM2_SIZE;
8102 }
8103 else if (*matchingpath == OP_DNCREF)
8104 {
8105 SLJIT_ASSERT(has_alternatives);
8106
8107 i = GET2(matchingpath, 1 + IMM2_SIZE);
8108 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
8109 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8110 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
8111 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
8112 slot += common->name_entry_size;
8113 i--;
8114 while (i-- > 0)
8115 {
8116 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
8117 OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
8118 slot += common->name_entry_size;
8119 }
8120 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8121 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
8122 matchingpath += 1 + 2 * IMM2_SIZE;
8123 }
8124 else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL)
8125 {
8126 /* Never has other case. */
8127 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
8128 SLJIT_ASSERT(!has_alternatives);
8129
8130 if (*matchingpath == OP_FAIL)
8131 stacksize = 0;
8132 else if (*matchingpath == OP_RREF)
8133 {
8134 stacksize = GET2(matchingpath, 1);
8135 if (common->currententry == NULL)
8136 stacksize = 0;
8137 else if (stacksize == RREF_ANY)
8138 stacksize = 1;
8139 else if (common->currententry->start == 0)
8140 stacksize = stacksize == 0;
8141 else
8142 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
8143
8144 if (stacksize != 0)
8145 matchingpath += 1 + IMM2_SIZE;
8146 }
8147 else
8148 {
8149 if (common->currententry == NULL || common->currententry->start == 0)
8150 stacksize = 0;
8151 else
8152 {
8153 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
8154 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
8155 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
8156 while (stacksize > 0)
8157 {
8158 if ((int)GET2(slot, 0) == i)
8159 break;
8160 slot += common->name_entry_size;
8161 stacksize--;
8162 }
8163 }
8164
8165 if (stacksize != 0)
8166 matchingpath += 1 + 2 * IMM2_SIZE;
8167 }
8168
8169 /* The stacksize == 0 is a common "else" case. */
8170 if (stacksize == 0)
8171 {
8172 if (*cc == OP_ALT)
8173 {
8174 matchingpath = cc + 1 + LINK_SIZE;
8175 cc += GET(cc, 1);
8176 }
8177 else
8178 matchingpath = cc;
8179 }
8180 }
8181 else
8182 {
8183 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
8184 /* Similar code as PUSH_BACKTRACK macro. */
8185 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
8186 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8187 return NULL;
8188 memset(assert, 0, sizeof(assert_backtrack));
8189 assert->common.cc = matchingpath;
8190 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
8191 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
8192 }
8193 }
8194
8195 compile_matchingpath(common, matchingpath, cc, backtrack);
8196 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8197 return NULL;
8198
8199 if (opcode == OP_ONCE)
8200 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
8201
8202 stacksize = 0;
8203 if (repeat_type == OP_MINUPTO)
8204 {
8205 /* We need to preserve the counter. TMP2 will be used below. */
8206 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
8207 stacksize++;
8208 }
8209 if (ket != OP_KET || bra != OP_BRA)
8210 stacksize++;
8211 if (offset != 0)
8212 {
8213 if (common->capture_last_ptr != 0)
8214 stacksize++;
8215 if (common->optimized_cbracket[offset >> 1] == 0)
8216 stacksize += 2;
8217 }
8218 if (has_alternatives && opcode != OP_ONCE)
8219 stacksize++;
8220
8221 if (stacksize > 0)
8222 allocate_stack(common, stacksize);
8223
8224 stacksize = 0;
8225 if (repeat_type == OP_MINUPTO)
8226 {
8227 /* TMP2 was set above. */
8228 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
8229 stacksize++;
8230 }
8231
8232 if (ket != OP_KET || bra != OP_BRA)
8233 {
8234 if (ket != OP_KET)
8235 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8236 else
8237 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8238 stacksize++;
8239 }
8240
8241 if (offset != 0)
8242 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
8243
8244 if (has_alternatives)
8245 {
8246 if (opcode != OP_ONCE)
8247 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8248 if (ket != OP_KETRMAX)
8249 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8250 }
8251
8252 /* Must be after the matchingpath label. */
8253 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
8254 {
8255 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
8256 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8257 }
8258
8259 if (ket == OP_KETRMAX)
8260 {
8261 if (repeat_type != 0)
8262 {
8263 if (has_alternatives)
8264 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8265 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8266 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
8267 /* Drop STR_PTR for greedy plus quantifier. */
8268 if (opcode != OP_ONCE)
8269 free_stack(common, 1);
8270 }
8271 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
8272 {
8273 if (has_alternatives)
8274 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8275 /* Checking zero-length iteration. */
8276 if (opcode != OP_ONCE)
8277 {
8278 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
8279 /* Drop STR_PTR for greedy plus quantifier. */
8280 if (bra != OP_BRAZERO)
8281 free_stack(common, 1);
8282 }
8283 else
8284 /* TMP2 must contain the starting STR_PTR. */
8285 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
8286 }
8287 else
8288 JUMPTO(SLJIT_JUMP, rmax_label);
8289 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
8290 }
8291
8292 if (repeat_type == OP_EXACT)
8293 {
8294 count_match(common);
8295 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8296 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
8297 }
8298 else if (repeat_type == OP_UPTO)
8299 {
8300 /* We need to preserve the counter. */
8301 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
8302 allocate_stack(common, 1);
8303 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8304 }
8305
8306 if (bra == OP_BRAZERO)
8307 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
8308
8309 if (bra == OP_BRAMINZERO)
8310 {
8311 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
8312 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
8313 if (braminzero != NULL)
8314 {
8315 JUMPHERE(braminzero);
8316 /* We need to release the end pointer to perform the
8317 backtrack for the zero-length iteration. When
8318 framesize is < 0, OP_ONCE will do the release itself. */
8319 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
8320 {
8321 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8322 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8323 }
8324 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
8325 free_stack(common, 1);
8326 }
8327 /* Continue to the normal backtrack. */
8328 }
8329
8330 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
8331 count_match(common);
8332
8333 /* Skip the other alternatives. */
8334 while (*cc == OP_ALT)
8335 cc += GET(cc, 1);
8336 cc += 1 + LINK_SIZE;
8337
8338 if (opcode == OP_ONCE)
8339 {
8340 /* We temporarily encode the needs_control_head in the lowest bit.
8341 Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
8342 the same value for small signed numbers (including negative numbers). */
8343 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
8344 }
8345 return cc + repeat_length;
8346 }
8347
compile_bracketpos_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)8348 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8349 {
8350 DEFINE_COMPILER;
8351 backtrack_common *backtrack;
8352 pcre_uchar opcode;
8353 int private_data_ptr;
8354 int cbraprivptr = 0;
8355 BOOL needs_control_head;
8356 int framesize;
8357 int stacksize;
8358 int offset = 0;
8359 BOOL zero = FALSE;
8360 pcre_uchar *ccbegin = NULL;
8361 int stack; /* Also contains the offset of control head. */
8362 struct sljit_label *loop = NULL;
8363 struct jump_list *emptymatch = NULL;
8364
8365 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
8366 if (*cc == OP_BRAPOSZERO)
8367 {
8368 zero = TRUE;
8369 cc++;
8370 }
8371
8372 opcode = *cc;
8373 private_data_ptr = PRIVATE_DATA(cc);
8374 SLJIT_ASSERT(private_data_ptr != 0);
8375 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
8376 switch(opcode)
8377 {
8378 case OP_BRAPOS:
8379 case OP_SBRAPOS:
8380 ccbegin = cc + 1 + LINK_SIZE;
8381 break;
8382
8383 case OP_CBRAPOS:
8384 case OP_SCBRAPOS:
8385 offset = GET2(cc, 1 + LINK_SIZE);
8386 /* This case cannot be optimized in the same was as
8387 normal capturing brackets. */
8388 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
8389 cbraprivptr = OVECTOR_PRIV(offset);
8390 offset <<= 1;
8391 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
8392 break;
8393
8394 default:
8395 SLJIT_UNREACHABLE();
8396 break;
8397 }
8398
8399 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
8400 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
8401 if (framesize < 0)
8402 {
8403 if (offset != 0)
8404 {
8405 stacksize = 2;
8406 if (common->capture_last_ptr != 0)
8407 stacksize++;
8408 }
8409 else
8410 stacksize = 1;
8411
8412 if (needs_control_head)
8413 stacksize++;
8414 if (!zero)
8415 stacksize++;
8416
8417 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
8418 allocate_stack(common, stacksize);
8419 if (framesize == no_frame)
8420 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8421
8422 stack = 0;
8423 if (offset != 0)
8424 {
8425 stack = 2;
8426 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8427 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8428 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8429 if (common->capture_last_ptr != 0)
8430 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
8431 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
8432 if (needs_control_head)
8433 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8434 if (common->capture_last_ptr != 0)
8435 {
8436 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
8437 stack = 3;
8438 }
8439 }
8440 else
8441 {
8442 if (needs_control_head)
8443 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8444 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8445 stack = 1;
8446 }
8447
8448 if (needs_control_head)
8449 stack++;
8450 if (!zero)
8451 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
8452 if (needs_control_head)
8453 {
8454 stack--;
8455 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
8456 }
8457 }
8458 else
8459 {
8460 stacksize = framesize + 1;
8461 if (!zero)
8462 stacksize++;
8463 if (needs_control_head)
8464 stacksize++;
8465 if (offset == 0)
8466 stacksize++;
8467 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
8468
8469 allocate_stack(common, stacksize);
8470 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8471 if (needs_control_head)
8472 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8473 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8474
8475 stack = 0;
8476 if (!zero)
8477 {
8478 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
8479 stack = 1;
8480 }
8481 if (needs_control_head)
8482 {
8483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
8484 stack++;
8485 }
8486 if (offset == 0)
8487 {
8488 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
8489 stack++;
8490 }
8491 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
8492 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
8493 stack -= 1 + (offset == 0);
8494 }
8495
8496 if (offset != 0)
8497 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8498
8499 loop = LABEL();
8500 while (*cc != OP_KETRPOS)
8501 {
8502 backtrack->top = NULL;
8503 backtrack->topbacktracks = NULL;
8504 cc += GET(cc, 1);
8505
8506 compile_matchingpath(common, ccbegin, cc, backtrack);
8507 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8508 return NULL;
8509
8510 if (framesize < 0)
8511 {
8512 if (framesize == no_frame)
8513 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8514
8515 if (offset != 0)
8516 {
8517 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8518 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8519 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8520 if (common->capture_last_ptr != 0)
8521 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
8522 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8523 }
8524 else
8525 {
8526 if (opcode == OP_SBRAPOS)
8527 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8528 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8529 }
8530
8531 /* Even if the match is empty, we need to reset the control head. */
8532 if (needs_control_head)
8533 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
8534
8535 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
8536 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
8537
8538 if (!zero)
8539 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
8540 }
8541 else
8542 {
8543 if (offset != 0)
8544 {
8545 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8546 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8547 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8548 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8549 if (common->capture_last_ptr != 0)
8550 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
8551 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8552 }
8553 else
8554 {
8555 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8556 OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8557 if (opcode == OP_SBRAPOS)
8558 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
8559 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
8560 }
8561
8562 /* Even if the match is empty, we need to reset the control head. */
8563 if (needs_control_head)
8564 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
8565
8566 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
8567 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
8568
8569 if (!zero)
8570 {
8571 if (framesize < 0)
8572 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
8573 else
8574 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8575 }
8576 }
8577
8578 JUMPTO(SLJIT_JUMP, loop);
8579 flush_stubs(common);
8580
8581 compile_backtrackingpath(common, backtrack->top);
8582 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8583 return NULL;
8584 set_jumps(backtrack->topbacktracks, LABEL());
8585
8586 if (framesize < 0)
8587 {
8588 if (offset != 0)
8589 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8590 else
8591 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8592 }
8593 else
8594 {
8595 if (offset != 0)
8596 {
8597 /* Last alternative. */
8598 if (*cc == OP_KETRPOS)
8599 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8600 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8601 }
8602 else
8603 {
8604 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8605 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
8606 }
8607 }
8608
8609 if (*cc == OP_KETRPOS)
8610 break;
8611 ccbegin = cc + 1 + LINK_SIZE;
8612 }
8613
8614 /* We don't have to restore the control head in case of a failed match. */
8615
8616 backtrack->topbacktracks = NULL;
8617 if (!zero)
8618 {
8619 if (framesize < 0)
8620 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
8621 else /* TMP2 is set to [private_data_ptr] above. */
8622 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
8623 }
8624
8625 /* None of them matched. */
8626 set_jumps(emptymatch, LABEL());
8627 count_match(common);
8628 return cc + 1 + LINK_SIZE;
8629 }
8630
get_iterator_parameters(compiler_common * common,pcre_uchar * cc,pcre_uchar * opcode,pcre_uchar * type,sljit_u32 * max,sljit_u32 * exact,pcre_uchar ** end)8631 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, sljit_u32 *max, sljit_u32 *exact, pcre_uchar **end)
8632 {
8633 int class_len;
8634
8635 *opcode = *cc;
8636 *exact = 0;
8637
8638 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
8639 {
8640 cc++;
8641 *type = OP_CHAR;
8642 }
8643 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
8644 {
8645 cc++;
8646 *type = OP_CHARI;
8647 *opcode -= OP_STARI - OP_STAR;
8648 }
8649 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
8650 {
8651 cc++;
8652 *type = OP_NOT;
8653 *opcode -= OP_NOTSTAR - OP_STAR;
8654 }
8655 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
8656 {
8657 cc++;
8658 *type = OP_NOTI;
8659 *opcode -= OP_NOTSTARI - OP_STAR;
8660 }
8661 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
8662 {
8663 cc++;
8664 *opcode -= OP_TYPESTAR - OP_STAR;
8665 *type = OP_END;
8666 }
8667 else
8668 {
8669 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
8670 *type = *opcode;
8671 cc++;
8672 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
8673 *opcode = cc[class_len - 1];
8674
8675 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
8676 {
8677 *opcode -= OP_CRSTAR - OP_STAR;
8678 *end = cc + class_len;
8679
8680 if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
8681 {
8682 *exact = 1;
8683 *opcode -= OP_PLUS - OP_STAR;
8684 }
8685 }
8686 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
8687 {
8688 *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
8689 *end = cc + class_len;
8690
8691 if (*opcode == OP_POSPLUS)
8692 {
8693 *exact = 1;
8694 *opcode = OP_POSSTAR;
8695 }
8696 }
8697 else
8698 {
8699 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
8700 *max = GET2(cc, (class_len + IMM2_SIZE));
8701 *exact = GET2(cc, class_len);
8702
8703 if (*max == 0)
8704 {
8705 if (*opcode == OP_CRPOSRANGE)
8706 *opcode = OP_POSSTAR;
8707 else
8708 *opcode -= OP_CRRANGE - OP_STAR;
8709 }
8710 else
8711 {
8712 *max -= *exact;
8713 if (*max == 0)
8714 *opcode = OP_EXACT;
8715 else if (*max == 1)
8716 {
8717 if (*opcode == OP_CRPOSRANGE)
8718 *opcode = OP_POSQUERY;
8719 else
8720 *opcode -= OP_CRRANGE - OP_QUERY;
8721 }
8722 else
8723 {
8724 if (*opcode == OP_CRPOSRANGE)
8725 *opcode = OP_POSUPTO;
8726 else
8727 *opcode -= OP_CRRANGE - OP_UPTO;
8728 }
8729 }
8730 *end = cc + class_len + 2 * IMM2_SIZE;
8731 }
8732 return cc;
8733 }
8734
8735 switch(*opcode)
8736 {
8737 case OP_EXACT:
8738 *exact = GET2(cc, 0);
8739 cc += IMM2_SIZE;
8740 break;
8741
8742 case OP_PLUS:
8743 case OP_MINPLUS:
8744 *exact = 1;
8745 *opcode -= OP_PLUS - OP_STAR;
8746 break;
8747
8748 case OP_POSPLUS:
8749 *exact = 1;
8750 *opcode = OP_POSSTAR;
8751 break;
8752
8753 case OP_UPTO:
8754 case OP_MINUPTO:
8755 case OP_POSUPTO:
8756 *max = GET2(cc, 0);
8757 cc += IMM2_SIZE;
8758 break;
8759 }
8760
8761 if (*type == OP_END)
8762 {
8763 *type = *cc;
8764 *end = next_opcode(common, cc);
8765 cc++;
8766 return cc;
8767 }
8768
8769 *end = cc + 1;
8770 #ifdef SUPPORT_UTF
8771 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
8772 #endif
8773 return cc;
8774 }
8775
compile_iterator_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)8776 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8777 {
8778 DEFINE_COMPILER;
8779 backtrack_common *backtrack;
8780 pcre_uchar opcode;
8781 pcre_uchar type;
8782 sljit_u32 max = 0, exact;
8783 BOOL fast_fail;
8784 sljit_s32 fast_str_ptr;
8785 BOOL charpos_enabled;
8786 pcre_uchar charpos_char;
8787 unsigned int charpos_othercasebit;
8788 pcre_uchar *end;
8789 jump_list *no_match = NULL;
8790 jump_list *no_char1_match = NULL;
8791 struct sljit_jump *jump = NULL;
8792 struct sljit_label *label;
8793 int private_data_ptr = PRIVATE_DATA(cc);
8794 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
8795 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
8796 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
8797 int tmp_base, tmp_offset;
8798
8799 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
8800
8801 fast_str_ptr = PRIVATE_DATA(cc + 1);
8802 fast_fail = TRUE;
8803
8804 SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr);
8805
8806 if (cc == common->fast_forward_bc_ptr)
8807 fast_fail = FALSE;
8808 else if (common->fast_fail_start_ptr == 0)
8809 fast_str_ptr = 0;
8810
8811 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0
8812 || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr));
8813
8814 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
8815
8816 if (type != OP_EXTUNI)
8817 {
8818 tmp_base = TMP3;
8819 tmp_offset = 0;
8820 }
8821 else
8822 {
8823 tmp_base = SLJIT_MEM1(SLJIT_SP);
8824 tmp_offset = POSSESSIVE0;
8825 }
8826
8827 if (fast_fail && fast_str_ptr != 0)
8828 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr));
8829
8830 /* Handle fixed part first. */
8831 if (exact > 1)
8832 {
8833 SLJIT_ASSERT(fast_str_ptr == 0);
8834 if (common->mode == JIT_COMPILE
8835 #ifdef SUPPORT_UTF
8836 && !common->utf
8837 #endif
8838 )
8839 {
8840 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
8841 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
8842 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
8843 label = LABEL();
8844 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
8845 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8846 JUMPTO(SLJIT_NOT_ZERO, label);
8847 }
8848 else
8849 {
8850 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
8851 label = LABEL();
8852 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
8853 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8854 JUMPTO(SLJIT_NOT_ZERO, label);
8855 }
8856 }
8857 else if (exact == 1)
8858 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
8859
8860 switch(opcode)
8861 {
8862 case OP_STAR:
8863 case OP_UPTO:
8864 SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR);
8865
8866 if (type == OP_ANYNL || type == OP_EXTUNI)
8867 {
8868 SLJIT_ASSERT(private_data_ptr == 0);
8869 SLJIT_ASSERT(fast_str_ptr == 0);
8870
8871 allocate_stack(common, 2);
8872 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8873 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
8874
8875 if (opcode == OP_UPTO)
8876 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
8877
8878 label = LABEL();
8879 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
8880 if (opcode == OP_UPTO)
8881 {
8882 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
8883 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8884 jump = JUMP(SLJIT_ZERO);
8885 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
8886 }
8887
8888 /* We cannot use TMP3 because of this allocate_stack. */
8889 allocate_stack(common, 1);
8890 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8891 JUMPTO(SLJIT_JUMP, label);
8892 if (jump != NULL)
8893 JUMPHERE(jump);
8894 }
8895 else
8896 {
8897 charpos_enabled = FALSE;
8898 charpos_char = 0;
8899 charpos_othercasebit = 0;
8900
8901 if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
8902 {
8903 charpos_enabled = TRUE;
8904 #ifdef SUPPORT_UTF
8905 charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
8906 #endif
8907 if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
8908 {
8909 charpos_othercasebit = char_get_othercase_bit(common, end + 1);
8910 if (charpos_othercasebit == 0)
8911 charpos_enabled = FALSE;
8912 }
8913
8914 if (charpos_enabled)
8915 {
8916 charpos_char = end[1];
8917 /* Consumpe the OP_CHAR opcode. */
8918 end += 2;
8919 #if defined COMPILE_PCRE8
8920 SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
8921 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
8922 SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
8923 if ((charpos_othercasebit & 0x100) != 0)
8924 charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
8925 #endif
8926 if (charpos_othercasebit != 0)
8927 charpos_char |= charpos_othercasebit;
8928
8929 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
8930 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
8931 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
8932 }
8933 }
8934
8935 if (charpos_enabled)
8936 {
8937 if (opcode == OP_UPTO)
8938 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
8939
8940 /* Search the first instance of charpos_char. */
8941 jump = JUMP(SLJIT_JUMP);
8942 label = LABEL();
8943 if (opcode == OP_UPTO)
8944 {
8945 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8946 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
8947 }
8948 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
8949 if (fast_str_ptr != 0)
8950 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
8951 JUMPHERE(jump);
8952
8953 detect_partial_match(common, &backtrack->topbacktracks);
8954 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8955 if (charpos_othercasebit != 0)
8956 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
8957 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
8958
8959 if (private_data_ptr == 0)
8960 allocate_stack(common, 2);
8961 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8962 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
8963 if (opcode == OP_UPTO)
8964 {
8965 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8966 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
8967 }
8968
8969 /* Search the last instance of charpos_char. */
8970 label = LABEL();
8971 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
8972 if (fast_str_ptr != 0)
8973 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
8974 detect_partial_match(common, &no_match);
8975 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8976 if (charpos_othercasebit != 0)
8977 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
8978 if (opcode == OP_STAR)
8979 {
8980 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
8981 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8982 }
8983 else
8984 {
8985 jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
8986 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8987 JUMPHERE(jump);
8988 }
8989
8990 if (opcode == OP_UPTO)
8991 {
8992 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8993 JUMPTO(SLJIT_NOT_ZERO, label);
8994 }
8995 else
8996 JUMPTO(SLJIT_JUMP, label);
8997
8998 set_jumps(no_match, LABEL());
8999 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9000 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9001 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9002 }
9003 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9004 else if (common->utf)
9005 {
9006 if (private_data_ptr == 0)
9007 allocate_stack(common, 2);
9008
9009 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9010 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9011
9012 if (opcode == OP_UPTO)
9013 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9014
9015 label = LABEL();
9016 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9017 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9018
9019 if (opcode == OP_UPTO)
9020 {
9021 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9022 JUMPTO(SLJIT_NOT_ZERO, label);
9023 }
9024 else
9025 JUMPTO(SLJIT_JUMP, label);
9026
9027 set_jumps(no_match, LABEL());
9028 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9029 if (fast_str_ptr != 0)
9030 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9031 }
9032 #endif
9033 else
9034 {
9035 if (private_data_ptr == 0)
9036 allocate_stack(common, 2);
9037
9038 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9039 if (opcode == OP_UPTO)
9040 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9041
9042 label = LABEL();
9043 detect_partial_match(common, &no_match);
9044 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9045 if (opcode == OP_UPTO)
9046 {
9047 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9048 JUMPTO(SLJIT_NOT_ZERO, label);
9049 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9050 }
9051 else
9052 JUMPTO(SLJIT_JUMP, label);
9053
9054 set_jumps(no_char1_match, LABEL());
9055 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9056 set_jumps(no_match, LABEL());
9057 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9058 if (fast_str_ptr != 0)
9059 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9060 }
9061 }
9062 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9063 break;
9064
9065 case OP_MINSTAR:
9066 if (private_data_ptr == 0)
9067 allocate_stack(common, 1);
9068 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9069 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9070 if (fast_str_ptr != 0)
9071 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9072 break;
9073
9074 case OP_MINUPTO:
9075 SLJIT_ASSERT(fast_str_ptr == 0);
9076 if (private_data_ptr == 0)
9077 allocate_stack(common, 2);
9078 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9079 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
9080 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9081 break;
9082
9083 case OP_QUERY:
9084 case OP_MINQUERY:
9085 SLJIT_ASSERT(fast_str_ptr == 0);
9086 if (private_data_ptr == 0)
9087 allocate_stack(common, 1);
9088 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9089 if (opcode == OP_QUERY)
9090 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
9091 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9092 break;
9093
9094 case OP_EXACT:
9095 break;
9096
9097 case OP_POSSTAR:
9098 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9099 if (common->utf)
9100 {
9101 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9102 label = LABEL();
9103 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9104 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9105 JUMPTO(SLJIT_JUMP, label);
9106 set_jumps(no_match, LABEL());
9107 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
9108 if (fast_str_ptr != 0)
9109 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9110 break;
9111 }
9112 #endif
9113 label = LABEL();
9114 detect_partial_match(common, &no_match);
9115 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9116 JUMPTO(SLJIT_JUMP, label);
9117 set_jumps(no_char1_match, LABEL());
9118 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9119 set_jumps(no_match, LABEL());
9120 if (fast_str_ptr != 0)
9121 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9122 break;
9123
9124 case OP_POSUPTO:
9125 SLJIT_ASSERT(fast_str_ptr == 0);
9126 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9127 if (common->utf)
9128 {
9129 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
9130 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9131 label = LABEL();
9132 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9133 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
9134 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9135 JUMPTO(SLJIT_NOT_ZERO, label);
9136 set_jumps(no_match, LABEL());
9137 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9138 break;
9139 }
9140 #endif
9141 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9142 label = LABEL();
9143 detect_partial_match(common, &no_match);
9144 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9145 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9146 JUMPTO(SLJIT_NOT_ZERO, label);
9147 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9148 set_jumps(no_char1_match, LABEL());
9149 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9150 set_jumps(no_match, LABEL());
9151 break;
9152
9153 case OP_POSQUERY:
9154 SLJIT_ASSERT(fast_str_ptr == 0);
9155 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9156 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9157 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9158 set_jumps(no_match, LABEL());
9159 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
9160 break;
9161
9162 default:
9163 SLJIT_UNREACHABLE();
9164 break;
9165 }
9166
9167 count_match(common);
9168 return end;
9169 }
9170
compile_fail_accept_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)9171 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
9172 {
9173 DEFINE_COMPILER;
9174 backtrack_common *backtrack;
9175
9176 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9177
9178 if (*cc == OP_FAIL)
9179 {
9180 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
9181 return cc + 1;
9182 }
9183
9184 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
9185 {
9186 /* No need to check notempty conditions. */
9187 if (common->accept_label == NULL)
9188 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
9189 else
9190 JUMPTO(SLJIT_JUMP, common->accept_label);
9191 return cc + 1;
9192 }
9193
9194 if (common->accept_label == NULL)
9195 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
9196 else
9197 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
9198 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9199 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
9200 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9201 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
9202 if (common->accept_label == NULL)
9203 add_jump(compiler, &common->accept, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9204 else
9205 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
9206 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
9207 if (common->accept_label == NULL)
9208 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
9209 else
9210 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
9211 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
9212 return cc + 1;
9213 }
9214
compile_close_matchingpath(compiler_common * common,pcre_uchar * cc)9215 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
9216 {
9217 DEFINE_COMPILER;
9218 int offset = GET2(cc, 1);
9219 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
9220
9221 /* Data will be discarded anyway... */
9222 if (common->currententry != NULL)
9223 return cc + 1 + IMM2_SIZE;
9224
9225 if (!optimized_cbracket)
9226 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
9227 offset <<= 1;
9228 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9229 if (!optimized_cbracket)
9230 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9231 return cc + 1 + IMM2_SIZE;
9232 }
9233
compile_control_verb_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)9234 static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
9235 {
9236 DEFINE_COMPILER;
9237 backtrack_common *backtrack;
9238 pcre_uchar opcode = *cc;
9239 pcre_uchar *ccend = cc + 1;
9240
9241 if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
9242 ccend += 2 + cc[1];
9243
9244 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9245
9246 if (opcode == OP_SKIP)
9247 {
9248 allocate_stack(common, 1);
9249 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9250 return ccend;
9251 }
9252
9253 if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
9254 {
9255 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9256 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
9257 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
9258 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
9259 }
9260
9261 return ccend;
9262 }
9263
9264 static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP };
9265
compile_then_trap_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,backtrack_common * parent)9266 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
9267 {
9268 DEFINE_COMPILER;
9269 backtrack_common *backtrack;
9270 BOOL needs_control_head;
9271 int size;
9272
9273 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
9274 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
9275 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
9276 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
9277 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
9278
9279 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
9280 size = 3 + (size < 0 ? 0 : size);
9281
9282 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9283 allocate_stack(common, size);
9284 if (size > 3)
9285 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
9286 else
9287 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
9288 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
9289 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
9290 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
9291
9292 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
9293 if (size >= 0)
9294 init_frame(common, cc, ccend, size - 1, 0, FALSE);
9295 }
9296
compile_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,backtrack_common * parent)9297 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
9298 {
9299 DEFINE_COMPILER;
9300 backtrack_common *backtrack;
9301 BOOL has_then_trap = FALSE;
9302 then_trap_backtrack *save_then_trap = NULL;
9303
9304 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
9305
9306 if (common->has_then && common->then_offsets[cc - common->start] != 0)
9307 {
9308 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
9309 has_then_trap = TRUE;
9310 save_then_trap = common->then_trap;
9311 /* Tail item on backtrack. */
9312 compile_then_trap_matchingpath(common, cc, ccend, parent);
9313 }
9314
9315 while (cc < ccend)
9316 {
9317 switch(*cc)
9318 {
9319 case OP_SOD:
9320 case OP_SOM:
9321 case OP_NOT_WORD_BOUNDARY:
9322 case OP_WORD_BOUNDARY:
9323 case OP_EODN:
9324 case OP_EOD:
9325 case OP_DOLL:
9326 case OP_DOLLM:
9327 case OP_CIRC:
9328 case OP_CIRCM:
9329 case OP_REVERSE:
9330 cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9331 break;
9332
9333 case OP_NOT_DIGIT:
9334 case OP_DIGIT:
9335 case OP_NOT_WHITESPACE:
9336 case OP_WHITESPACE:
9337 case OP_NOT_WORDCHAR:
9338 case OP_WORDCHAR:
9339 case OP_ANY:
9340 case OP_ALLANY:
9341 case OP_ANYBYTE:
9342 case OP_NOTPROP:
9343 case OP_PROP:
9344 case OP_ANYNL:
9345 case OP_NOT_HSPACE:
9346 case OP_HSPACE:
9347 case OP_NOT_VSPACE:
9348 case OP_VSPACE:
9349 case OP_EXTUNI:
9350 case OP_NOT:
9351 case OP_NOTI:
9352 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9353 break;
9354
9355 case OP_SET_SOM:
9356 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
9357 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
9358 allocate_stack(common, 1);
9359 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
9360 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9361 cc++;
9362 break;
9363
9364 case OP_CHAR:
9365 case OP_CHARI:
9366 if (common->mode == JIT_COMPILE)
9367 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9368 else
9369 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9370 break;
9371
9372 case OP_STAR:
9373 case OP_MINSTAR:
9374 case OP_PLUS:
9375 case OP_MINPLUS:
9376 case OP_QUERY:
9377 case OP_MINQUERY:
9378 case OP_UPTO:
9379 case OP_MINUPTO:
9380 case OP_EXACT:
9381 case OP_POSSTAR:
9382 case OP_POSPLUS:
9383 case OP_POSQUERY:
9384 case OP_POSUPTO:
9385 case OP_STARI:
9386 case OP_MINSTARI:
9387 case OP_PLUSI:
9388 case OP_MINPLUSI:
9389 case OP_QUERYI:
9390 case OP_MINQUERYI:
9391 case OP_UPTOI:
9392 case OP_MINUPTOI:
9393 case OP_EXACTI:
9394 case OP_POSSTARI:
9395 case OP_POSPLUSI:
9396 case OP_POSQUERYI:
9397 case OP_POSUPTOI:
9398 case OP_NOTSTAR:
9399 case OP_NOTMINSTAR:
9400 case OP_NOTPLUS:
9401 case OP_NOTMINPLUS:
9402 case OP_NOTQUERY:
9403 case OP_NOTMINQUERY:
9404 case OP_NOTUPTO:
9405 case OP_NOTMINUPTO:
9406 case OP_NOTEXACT:
9407 case OP_NOTPOSSTAR:
9408 case OP_NOTPOSPLUS:
9409 case OP_NOTPOSQUERY:
9410 case OP_NOTPOSUPTO:
9411 case OP_NOTSTARI:
9412 case OP_NOTMINSTARI:
9413 case OP_NOTPLUSI:
9414 case OP_NOTMINPLUSI:
9415 case OP_NOTQUERYI:
9416 case OP_NOTMINQUERYI:
9417 case OP_NOTUPTOI:
9418 case OP_NOTMINUPTOI:
9419 case OP_NOTEXACTI:
9420 case OP_NOTPOSSTARI:
9421 case OP_NOTPOSPLUSI:
9422 case OP_NOTPOSQUERYI:
9423 case OP_NOTPOSUPTOI:
9424 case OP_TYPESTAR:
9425 case OP_TYPEMINSTAR:
9426 case OP_TYPEPLUS:
9427 case OP_TYPEMINPLUS:
9428 case OP_TYPEQUERY:
9429 case OP_TYPEMINQUERY:
9430 case OP_TYPEUPTO:
9431 case OP_TYPEMINUPTO:
9432 case OP_TYPEEXACT:
9433 case OP_TYPEPOSSTAR:
9434 case OP_TYPEPOSPLUS:
9435 case OP_TYPEPOSQUERY:
9436 case OP_TYPEPOSUPTO:
9437 cc = compile_iterator_matchingpath(common, cc, parent);
9438 break;
9439
9440 case OP_CLASS:
9441 case OP_NCLASS:
9442 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
9443 cc = compile_iterator_matchingpath(common, cc, parent);
9444 else
9445 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9446 break;
9447
9448 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
9449 case OP_XCLASS:
9450 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
9451 cc = compile_iterator_matchingpath(common, cc, parent);
9452 else
9453 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9454 break;
9455 #endif
9456
9457 case OP_REF:
9458 case OP_REFI:
9459 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
9460 cc = compile_ref_iterator_matchingpath(common, cc, parent);
9461 else
9462 {
9463 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
9464 cc += 1 + IMM2_SIZE;
9465 }
9466 break;
9467
9468 case OP_DNREF:
9469 case OP_DNREFI:
9470 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
9471 cc = compile_ref_iterator_matchingpath(common, cc, parent);
9472 else
9473 {
9474 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9475 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
9476 cc += 1 + 2 * IMM2_SIZE;
9477 }
9478 break;
9479
9480 case OP_RECURSE:
9481 cc = compile_recurse_matchingpath(common, cc, parent);
9482 break;
9483
9484 case OP_CALLOUT:
9485 cc = compile_callout_matchingpath(common, cc, parent);
9486 break;
9487
9488 case OP_ASSERT:
9489 case OP_ASSERT_NOT:
9490 case OP_ASSERTBACK:
9491 case OP_ASSERTBACK_NOT:
9492 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
9493 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
9494 break;
9495
9496 case OP_BRAMINZERO:
9497 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
9498 cc = bracketend(cc + 1);
9499 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
9500 {
9501 allocate_stack(common, 1);
9502 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9503 }
9504 else
9505 {
9506 allocate_stack(common, 2);
9507 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9508 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
9509 }
9510 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
9511 count_match(common);
9512 break;
9513
9514 case OP_ONCE:
9515 case OP_ONCE_NC:
9516 case OP_BRA:
9517 case OP_CBRA:
9518 case OP_COND:
9519 case OP_SBRA:
9520 case OP_SCBRA:
9521 case OP_SCOND:
9522 cc = compile_bracket_matchingpath(common, cc, parent);
9523 break;
9524
9525 case OP_BRAZERO:
9526 if (cc[1] > OP_ASSERTBACK_NOT)
9527 cc = compile_bracket_matchingpath(common, cc, parent);
9528 else
9529 {
9530 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
9531 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
9532 }
9533 break;
9534
9535 case OP_BRAPOS:
9536 case OP_CBRAPOS:
9537 case OP_SBRAPOS:
9538 case OP_SCBRAPOS:
9539 case OP_BRAPOSZERO:
9540 cc = compile_bracketpos_matchingpath(common, cc, parent);
9541 break;
9542
9543 case OP_MARK:
9544 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
9545 SLJIT_ASSERT(common->mark_ptr != 0);
9546 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
9547 allocate_stack(common, common->has_skip_arg ? 5 : 1);
9548 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9549 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
9550 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
9551 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
9552 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
9553 if (common->has_skip_arg)
9554 {
9555 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9556 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
9557 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
9558 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
9559 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
9560 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9561 }
9562 cc += 1 + 2 + cc[1];
9563 break;
9564
9565 case OP_PRUNE:
9566 case OP_PRUNE_ARG:
9567 case OP_SKIP:
9568 case OP_SKIP_ARG:
9569 case OP_THEN:
9570 case OP_THEN_ARG:
9571 case OP_COMMIT:
9572 cc = compile_control_verb_matchingpath(common, cc, parent);
9573 break;
9574
9575 case OP_FAIL:
9576 case OP_ACCEPT:
9577 case OP_ASSERT_ACCEPT:
9578 cc = compile_fail_accept_matchingpath(common, cc, parent);
9579 break;
9580
9581 case OP_CLOSE:
9582 cc = compile_close_matchingpath(common, cc);
9583 break;
9584
9585 case OP_SKIPZERO:
9586 cc = bracketend(cc + 1);
9587 break;
9588
9589 default:
9590 SLJIT_UNREACHABLE();
9591 return;
9592 }
9593 if (cc == NULL)
9594 return;
9595 }
9596
9597 if (has_then_trap)
9598 {
9599 /* Head item on backtrack. */
9600 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
9601 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
9602 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
9603 common->then_trap = save_then_trap;
9604 }
9605 SLJIT_ASSERT(cc == ccend);
9606 }
9607
9608 #undef PUSH_BACKTRACK
9609 #undef PUSH_BACKTRACK_NOVALUE
9610 #undef BACKTRACK_AS
9611
9612 #define COMPILE_BACKTRACKINGPATH(current) \
9613 do \
9614 { \
9615 compile_backtrackingpath(common, (current)); \
9616 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9617 return; \
9618 } \
9619 while (0)
9620
9621 #define CURRENT_AS(type) ((type *)current)
9622
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)9623 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9624 {
9625 DEFINE_COMPILER;
9626 pcre_uchar *cc = current->cc;
9627 pcre_uchar opcode;
9628 pcre_uchar type;
9629 sljit_u32 max = 0, exact;
9630 struct sljit_label *label = NULL;
9631 struct sljit_jump *jump = NULL;
9632 jump_list *jumplist = NULL;
9633 pcre_uchar *end;
9634 int private_data_ptr = PRIVATE_DATA(cc);
9635 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
9636 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
9637 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
9638
9639 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
9640
9641 switch(opcode)
9642 {
9643 case OP_STAR:
9644 case OP_UPTO:
9645 if (type == OP_ANYNL || type == OP_EXTUNI)
9646 {
9647 SLJIT_ASSERT(private_data_ptr == 0);
9648 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
9649 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9650 free_stack(common, 1);
9651 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9652 }
9653 else
9654 {
9655 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
9656 {
9657 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9658 OP1(SLJIT_MOV, TMP2, 0, base, offset1);
9659 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9660
9661 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
9662 label = LABEL();
9663 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
9664 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9665 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
9666 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
9667 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9668 skip_char_back(common);
9669 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
9670 }
9671 else
9672 {
9673 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9674 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
9675 skip_char_back(common);
9676 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9677 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9678 }
9679 JUMPHERE(jump);
9680 if (private_data_ptr == 0)
9681 free_stack(common, 2);
9682 }
9683 break;
9684
9685 case OP_MINSTAR:
9686 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9687 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9688 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9689 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9690 set_jumps(jumplist, LABEL());
9691 if (private_data_ptr == 0)
9692 free_stack(common, 1);
9693 break;
9694
9695 case OP_MINUPTO:
9696 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
9697 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9698 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9699 add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
9700
9701 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
9702 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9703 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9704 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9705
9706 set_jumps(jumplist, LABEL());
9707 if (private_data_ptr == 0)
9708 free_stack(common, 2);
9709 break;
9710
9711 case OP_QUERY:
9712 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9713 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9714 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9715 jump = JUMP(SLJIT_JUMP);
9716 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
9717 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9718 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9719 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9720 JUMPHERE(jump);
9721 if (private_data_ptr == 0)
9722 free_stack(common, 1);
9723 break;
9724
9725 case OP_MINQUERY:
9726 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9727 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9728 jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9729 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9730 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9731 set_jumps(jumplist, LABEL());
9732 JUMPHERE(jump);
9733 if (private_data_ptr == 0)
9734 free_stack(common, 1);
9735 break;
9736
9737 case OP_EXACT:
9738 case OP_POSSTAR:
9739 case OP_POSQUERY:
9740 case OP_POSUPTO:
9741 break;
9742
9743 default:
9744 SLJIT_UNREACHABLE();
9745 break;
9746 }
9747
9748 set_jumps(current->topbacktracks, LABEL());
9749 }
9750
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)9751 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9752 {
9753 DEFINE_COMPILER;
9754 pcre_uchar *cc = current->cc;
9755 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9756 pcre_uchar type;
9757
9758 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
9759
9760 if ((type & 0x1) == 0)
9761 {
9762 /* Maximize case. */
9763 set_jumps(current->topbacktracks, LABEL());
9764 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9765 free_stack(common, 1);
9766 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
9767 return;
9768 }
9769
9770 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9771 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
9772 set_jumps(current->topbacktracks, LABEL());
9773 free_stack(common, ref ? 2 : 3);
9774 }
9775
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)9776 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9777 {
9778 DEFINE_COMPILER;
9779
9780 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
9781 compile_backtrackingpath(common, current->top);
9782 set_jumps(current->topbacktracks, LABEL());
9783 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
9784 return;
9785
9786 if (common->has_set_som && common->mark_ptr != 0)
9787 {
9788 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9789 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9790 free_stack(common, 2);
9791 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0);
9792 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
9793 }
9794 else if (common->has_set_som || common->mark_ptr != 0)
9795 {
9796 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9797 free_stack(common, 1);
9798 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
9799 }
9800 }
9801
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)9802 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9803 {
9804 DEFINE_COMPILER;
9805 pcre_uchar *cc = current->cc;
9806 pcre_uchar bra = OP_BRA;
9807 struct sljit_jump *brajump = NULL;
9808
9809 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
9810 if (*cc == OP_BRAZERO)
9811 {
9812 bra = *cc;
9813 cc++;
9814 }
9815
9816 if (bra == OP_BRAZERO)
9817 {
9818 SLJIT_ASSERT(current->topbacktracks == NULL);
9819 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9820 }
9821
9822 if (CURRENT_AS(assert_backtrack)->framesize < 0)
9823 {
9824 set_jumps(current->topbacktracks, LABEL());
9825
9826 if (bra == OP_BRAZERO)
9827 {
9828 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9829 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
9830 free_stack(common, 1);
9831 }
9832 return;
9833 }
9834
9835 if (bra == OP_BRAZERO)
9836 {
9837 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
9838 {
9839 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9840 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
9841 free_stack(common, 1);
9842 return;
9843 }
9844 free_stack(common, 1);
9845 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9846 }
9847
9848 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
9849 {
9850 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
9851 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9852 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(assert_backtrack)->framesize - 1));
9853
9854 set_jumps(current->topbacktracks, LABEL());
9855 }
9856 else
9857 set_jumps(current->topbacktracks, LABEL());
9858
9859 if (bra == OP_BRAZERO)
9860 {
9861 /* We know there is enough place on the stack. */
9862 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9863 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9864 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
9865 JUMPHERE(brajump);
9866 }
9867 }
9868
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)9869 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9870 {
9871 DEFINE_COMPILER;
9872 int opcode, stacksize, alt_count, alt_max;
9873 int offset = 0;
9874 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
9875 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
9876 pcre_uchar *cc = current->cc;
9877 pcre_uchar *ccbegin;
9878 pcre_uchar *ccprev;
9879 pcre_uchar bra = OP_BRA;
9880 pcre_uchar ket;
9881 assert_backtrack *assert;
9882 sljit_uw *next_update_addr = NULL;
9883 BOOL has_alternatives;
9884 BOOL needs_control_head = FALSE;
9885 struct sljit_jump *brazero = NULL;
9886 struct sljit_jump *alt1 = NULL;
9887 struct sljit_jump *alt2 = NULL;
9888 struct sljit_jump *once = NULL;
9889 struct sljit_jump *cond = NULL;
9890 struct sljit_label *rmin_label = NULL;
9891 struct sljit_label *exact_label = NULL;
9892
9893 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9894 {
9895 bra = *cc;
9896 cc++;
9897 }
9898
9899 opcode = *cc;
9900 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
9901 ket = *ccbegin;
9902 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
9903 {
9904 repeat_ptr = PRIVATE_DATA(ccbegin);
9905 repeat_type = PRIVATE_DATA(ccbegin + 2);
9906 repeat_count = PRIVATE_DATA(ccbegin + 3);
9907 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
9908 if (repeat_type == OP_UPTO)
9909 ket = OP_KETRMAX;
9910 if (repeat_type == OP_MINUPTO)
9911 ket = OP_KETRMIN;
9912 }
9913 ccbegin = cc;
9914 cc += GET(cc, 1);
9915 has_alternatives = *cc == OP_ALT;
9916 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
9917 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
9918 if (opcode == OP_CBRA || opcode == OP_SCBRA)
9919 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
9920 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
9921 opcode = OP_SCOND;
9922 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
9923 opcode = OP_ONCE;
9924
9925 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
9926
9927 /* Decoding the needs_control_head in framesize. */
9928 if (opcode == OP_ONCE)
9929 {
9930 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
9931 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
9932 }
9933
9934 if (ket != OP_KET && repeat_type != 0)
9935 {
9936 /* TMP1 is used in OP_KETRMIN below. */
9937 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9938 free_stack(common, 1);
9939 if (repeat_type == OP_UPTO)
9940 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
9941 else
9942 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
9943 }
9944
9945 if (ket == OP_KETRMAX)
9946 {
9947 if (bra == OP_BRAZERO)
9948 {
9949 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9950 free_stack(common, 1);
9951 brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
9952 }
9953 }
9954 else if (ket == OP_KETRMIN)
9955 {
9956 if (bra != OP_BRAMINZERO)
9957 {
9958 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9959 if (repeat_type != 0)
9960 {
9961 /* TMP1 was set a few lines above. */
9962 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9963 /* Drop STR_PTR for non-greedy plus quantifier. */
9964 if (opcode != OP_ONCE)
9965 free_stack(common, 1);
9966 }
9967 else if (opcode >= OP_SBRA || opcode == OP_ONCE)
9968 {
9969 /* Checking zero-length iteration. */
9970 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
9971 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9972 else
9973 {
9974 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9975 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9976 }
9977 /* Drop STR_PTR for non-greedy plus quantifier. */
9978 if (opcode != OP_ONCE)
9979 free_stack(common, 1);
9980 }
9981 else
9982 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9983 }
9984 rmin_label = LABEL();
9985 if (repeat_type != 0)
9986 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
9987 }
9988 else if (bra == OP_BRAZERO)
9989 {
9990 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9991 free_stack(common, 1);
9992 brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
9993 }
9994 else if (repeat_type == OP_EXACT)
9995 {
9996 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
9997 exact_label = LABEL();
9998 }
9999
10000 if (offset != 0)
10001 {
10002 if (common->capture_last_ptr != 0)
10003 {
10004 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
10005 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10006 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10007 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
10008 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
10009 free_stack(common, 3);
10010 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
10011 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
10012 }
10013 else if (common->optimized_cbracket[offset >> 1] == 0)
10014 {
10015 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10016 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10017 free_stack(common, 2);
10018 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10019 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10020 }
10021 }
10022
10023 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
10024 {
10025 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10026 {
10027 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10028 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10029 }
10030 once = JUMP(SLJIT_JUMP);
10031 }
10032 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10033 {
10034 if (has_alternatives)
10035 {
10036 /* Always exactly one alternative. */
10037 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10038 free_stack(common, 1);
10039
10040 alt_max = 2;
10041 alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
10042 }
10043 }
10044 else if (has_alternatives)
10045 {
10046 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10047 free_stack(common, 1);
10048
10049 if (alt_max > 4)
10050 {
10051 /* Table jump if alt_max is greater than 4. */
10052 next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
10053 if (SLJIT_UNLIKELY(next_update_addr == NULL))
10054 return;
10055 sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
10056 add_label_addr(common, next_update_addr++);
10057 }
10058 else
10059 {
10060 if (alt_max == 4)
10061 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
10062 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
10063 }
10064 }
10065
10066 COMPILE_BACKTRACKINGPATH(current->top);
10067 if (current->topbacktracks)
10068 set_jumps(current->topbacktracks, LABEL());
10069
10070 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10071 {
10072 /* Conditional block always has at most one alternative. */
10073 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
10074 {
10075 SLJIT_ASSERT(has_alternatives);
10076 assert = CURRENT_AS(bracket_backtrack)->u.assert;
10077 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
10078 {
10079 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
10080 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10081 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-assert->framesize - 1));
10082 }
10083 cond = JUMP(SLJIT_JUMP);
10084 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
10085 }
10086 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
10087 {
10088 SLJIT_ASSERT(has_alternatives);
10089 cond = JUMP(SLJIT_JUMP);
10090 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
10091 }
10092 else
10093 SLJIT_ASSERT(!has_alternatives);
10094 }
10095
10096 if (has_alternatives)
10097 {
10098 alt_count = sizeof(sljit_uw);
10099 do
10100 {
10101 current->top = NULL;
10102 current->topbacktracks = NULL;
10103 current->nextbacktracks = NULL;
10104 /* Conditional blocks always have an additional alternative, even if it is empty. */
10105 if (*cc == OP_ALT)
10106 {
10107 ccprev = cc + 1 + LINK_SIZE;
10108 cc += GET(cc, 1);
10109 if (opcode != OP_COND && opcode != OP_SCOND)
10110 {
10111 if (opcode != OP_ONCE)
10112 {
10113 if (private_data_ptr != 0)
10114 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10115 else
10116 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10117 }
10118 else
10119 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
10120 }
10121 compile_matchingpath(common, ccprev, cc, current);
10122 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10123 return;
10124 }
10125
10126 /* Instructions after the current alternative is successfully matched. */
10127 /* There is a similar code in compile_bracket_matchingpath. */
10128 if (opcode == OP_ONCE)
10129 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10130
10131 stacksize = 0;
10132 if (repeat_type == OP_MINUPTO)
10133 {
10134 /* We need to preserve the counter. TMP2 will be used below. */
10135 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10136 stacksize++;
10137 }
10138 if (ket != OP_KET || bra != OP_BRA)
10139 stacksize++;
10140 if (offset != 0)
10141 {
10142 if (common->capture_last_ptr != 0)
10143 stacksize++;
10144 if (common->optimized_cbracket[offset >> 1] == 0)
10145 stacksize += 2;
10146 }
10147 if (opcode != OP_ONCE)
10148 stacksize++;
10149
10150 if (stacksize > 0)
10151 allocate_stack(common, stacksize);
10152
10153 stacksize = 0;
10154 if (repeat_type == OP_MINUPTO)
10155 {
10156 /* TMP2 was set above. */
10157 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10158 stacksize++;
10159 }
10160
10161 if (ket != OP_KET || bra != OP_BRA)
10162 {
10163 if (ket != OP_KET)
10164 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10165 else
10166 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10167 stacksize++;
10168 }
10169
10170 if (offset != 0)
10171 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10172
10173 if (opcode != OP_ONCE)
10174 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
10175
10176 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
10177 {
10178 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
10179 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10180 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10181 }
10182
10183 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
10184
10185 if (opcode != OP_ONCE)
10186 {
10187 if (alt_max > 4)
10188 add_label_addr(common, next_update_addr++);
10189 else
10190 {
10191 if (alt_count != 2 * sizeof(sljit_uw))
10192 {
10193 JUMPHERE(alt1);
10194 if (alt_max == 3 && alt_count == sizeof(sljit_uw))
10195 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
10196 }
10197 else
10198 {
10199 JUMPHERE(alt2);
10200 if (alt_max == 4)
10201 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
10202 }
10203 }
10204 alt_count += sizeof(sljit_uw);
10205 }
10206
10207 COMPILE_BACKTRACKINGPATH(current->top);
10208 if (current->topbacktracks)
10209 set_jumps(current->topbacktracks, LABEL());
10210 SLJIT_ASSERT(!current->nextbacktracks);
10211 }
10212 while (*cc == OP_ALT);
10213
10214 if (cond != NULL)
10215 {
10216 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
10217 assert = CURRENT_AS(bracket_backtrack)->u.assert;
10218 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
10219 {
10220 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
10221 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10222 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-assert->framesize - 1));
10223 }
10224 JUMPHERE(cond);
10225 }
10226
10227 /* Free the STR_PTR. */
10228 if (private_data_ptr == 0)
10229 free_stack(common, 1);
10230 }
10231
10232 if (offset != 0)
10233 {
10234 /* Using both tmp register is better for instruction scheduling. */
10235 if (common->optimized_cbracket[offset >> 1] != 0)
10236 {
10237 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10238 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10239 free_stack(common, 2);
10240 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10241 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10242 }
10243 else
10244 {
10245 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10246 free_stack(common, 1);
10247 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10248 }
10249 }
10250 else if (opcode == OP_SBRA || opcode == OP_SCOND)
10251 {
10252 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
10253 free_stack(common, 1);
10254 }
10255 else if (opcode == OP_ONCE)
10256 {
10257 cc = ccbegin + GET(ccbegin, 1);
10258 stacksize = needs_control_head ? 1 : 0;
10259
10260 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10261 {
10262 /* Reset head and drop saved frame. */
10263 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
10264 }
10265 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
10266 {
10267 /* The STR_PTR must be released. */
10268 stacksize++;
10269 }
10270
10271 if (stacksize > 0)
10272 free_stack(common, stacksize);
10273
10274 JUMPHERE(once);
10275 /* Restore previous private_data_ptr */
10276 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10277 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
10278 else if (ket == OP_KETRMIN)
10279 {
10280 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10281 /* See the comment below. */
10282 free_stack(common, 2);
10283 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10284 }
10285 }
10286
10287 if (repeat_type == OP_EXACT)
10288 {
10289 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10290 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
10291 CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
10292 }
10293 else if (ket == OP_KETRMAX)
10294 {
10295 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10296 if (bra != OP_BRAZERO)
10297 free_stack(common, 1);
10298
10299 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10300 if (bra == OP_BRAZERO)
10301 {
10302 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10303 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
10304 JUMPHERE(brazero);
10305 free_stack(common, 1);
10306 }
10307 }
10308 else if (ket == OP_KETRMIN)
10309 {
10310 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10311
10312 /* OP_ONCE removes everything in case of a backtrack, so we don't
10313 need to explicitly release the STR_PTR. The extra release would
10314 affect badly the free_stack(2) above. */
10315 if (opcode != OP_ONCE)
10316 free_stack(common, 1);
10317 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
10318 if (opcode == OP_ONCE)
10319 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
10320 else if (bra == OP_BRAMINZERO)
10321 free_stack(common, 1);
10322 }
10323 else if (bra == OP_BRAZERO)
10324 {
10325 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10326 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
10327 JUMPHERE(brazero);
10328 }
10329 }
10330
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)10331 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10332 {
10333 DEFINE_COMPILER;
10334 int offset;
10335 struct sljit_jump *jump;
10336
10337 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
10338 {
10339 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
10340 {
10341 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
10342 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10343 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10344 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10345 if (common->capture_last_ptr != 0)
10346 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
10347 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10348 if (common->capture_last_ptr != 0)
10349 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
10350 }
10351 set_jumps(current->topbacktracks, LABEL());
10352 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
10353 return;
10354 }
10355
10356 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
10357 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10358
10359 if (current->topbacktracks)
10360 {
10361 jump = JUMP(SLJIT_JUMP);
10362 set_jumps(current->topbacktracks, LABEL());
10363 /* Drop the stack frame. */
10364 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
10365 JUMPHERE(jump);
10366 }
10367 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
10368 }
10369
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)10370 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10371 {
10372 assert_backtrack backtrack;
10373
10374 current->top = NULL;
10375 current->topbacktracks = NULL;
10376 current->nextbacktracks = NULL;
10377 if (current->cc[1] > OP_ASSERTBACK_NOT)
10378 {
10379 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
10380 compile_bracket_matchingpath(common, current->cc, current);
10381 compile_bracket_backtrackingpath(common, current->top);
10382 }
10383 else
10384 {
10385 memset(&backtrack, 0, sizeof(backtrack));
10386 backtrack.common.cc = current->cc;
10387 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
10388 /* Manual call of compile_assert_matchingpath. */
10389 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
10390 }
10391 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
10392 }
10393
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)10394 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10395 {
10396 DEFINE_COMPILER;
10397 pcre_uchar opcode = *current->cc;
10398 struct sljit_label *loop;
10399 struct sljit_jump *jump;
10400
10401 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
10402 {
10403 if (common->then_trap != NULL)
10404 {
10405 SLJIT_ASSERT(common->control_head_ptr != 0);
10406
10407 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10408 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
10409 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
10410 jump = JUMP(SLJIT_JUMP);
10411
10412 loop = LABEL();
10413 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10414 JUMPHERE(jump);
10415 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
10416 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
10417 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
10418 return;
10419 }
10420 else if (common->positive_assert)
10421 {
10422 add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
10423 return;
10424 }
10425 }
10426
10427 if (common->local_exit)
10428 {
10429 if (common->quit_label == NULL)
10430 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10431 else
10432 JUMPTO(SLJIT_JUMP, common->quit_label);
10433 return;
10434 }
10435
10436 if (opcode == OP_SKIP_ARG)
10437 {
10438 SLJIT_ASSERT(common->control_head_ptr != 0);
10439 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10440 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
10441 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
10442 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
10443 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10444
10445 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
10446 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1));
10447 return;
10448 }
10449
10450 if (opcode == OP_SKIP)
10451 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10452 else
10453 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
10454 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
10455 }
10456
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)10457 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10458 {
10459 DEFINE_COMPILER;
10460 struct sljit_jump *jump;
10461 int size;
10462
10463 if (CURRENT_AS(then_trap_backtrack)->then_trap)
10464 {
10465 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
10466 return;
10467 }
10468
10469 size = CURRENT_AS(then_trap_backtrack)->framesize;
10470 size = 3 + (size < 0 ? 0 : size);
10471
10472 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
10473 free_stack(common, size);
10474 jump = JUMP(SLJIT_JUMP);
10475
10476 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
10477 /* STACK_TOP is set by THEN. */
10478 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
10479 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10480 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10481 free_stack(common, 3);
10482
10483 JUMPHERE(jump);
10484 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10485 }
10486
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)10487 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10488 {
10489 DEFINE_COMPILER;
10490 then_trap_backtrack *save_then_trap = common->then_trap;
10491
10492 while (current)
10493 {
10494 if (current->nextbacktracks != NULL)
10495 set_jumps(current->nextbacktracks, LABEL());
10496 switch(*current->cc)
10497 {
10498 case OP_SET_SOM:
10499 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10500 free_stack(common, 1);
10501 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
10502 break;
10503
10504 case OP_STAR:
10505 case OP_MINSTAR:
10506 case OP_PLUS:
10507 case OP_MINPLUS:
10508 case OP_QUERY:
10509 case OP_MINQUERY:
10510 case OP_UPTO:
10511 case OP_MINUPTO:
10512 case OP_EXACT:
10513 case OP_POSSTAR:
10514 case OP_POSPLUS:
10515 case OP_POSQUERY:
10516 case OP_POSUPTO:
10517 case OP_STARI:
10518 case OP_MINSTARI:
10519 case OP_PLUSI:
10520 case OP_MINPLUSI:
10521 case OP_QUERYI:
10522 case OP_MINQUERYI:
10523 case OP_UPTOI:
10524 case OP_MINUPTOI:
10525 case OP_EXACTI:
10526 case OP_POSSTARI:
10527 case OP_POSPLUSI:
10528 case OP_POSQUERYI:
10529 case OP_POSUPTOI:
10530 case OP_NOTSTAR:
10531 case OP_NOTMINSTAR:
10532 case OP_NOTPLUS:
10533 case OP_NOTMINPLUS:
10534 case OP_NOTQUERY:
10535 case OP_NOTMINQUERY:
10536 case OP_NOTUPTO:
10537 case OP_NOTMINUPTO:
10538 case OP_NOTEXACT:
10539 case OP_NOTPOSSTAR:
10540 case OP_NOTPOSPLUS:
10541 case OP_NOTPOSQUERY:
10542 case OP_NOTPOSUPTO:
10543 case OP_NOTSTARI:
10544 case OP_NOTMINSTARI:
10545 case OP_NOTPLUSI:
10546 case OP_NOTMINPLUSI:
10547 case OP_NOTQUERYI:
10548 case OP_NOTMINQUERYI:
10549 case OP_NOTUPTOI:
10550 case OP_NOTMINUPTOI:
10551 case OP_NOTEXACTI:
10552 case OP_NOTPOSSTARI:
10553 case OP_NOTPOSPLUSI:
10554 case OP_NOTPOSQUERYI:
10555 case OP_NOTPOSUPTOI:
10556 case OP_TYPESTAR:
10557 case OP_TYPEMINSTAR:
10558 case OP_TYPEPLUS:
10559 case OP_TYPEMINPLUS:
10560 case OP_TYPEQUERY:
10561 case OP_TYPEMINQUERY:
10562 case OP_TYPEUPTO:
10563 case OP_TYPEMINUPTO:
10564 case OP_TYPEEXACT:
10565 case OP_TYPEPOSSTAR:
10566 case OP_TYPEPOSPLUS:
10567 case OP_TYPEPOSQUERY:
10568 case OP_TYPEPOSUPTO:
10569 case OP_CLASS:
10570 case OP_NCLASS:
10571 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
10572 case OP_XCLASS:
10573 #endif
10574 compile_iterator_backtrackingpath(common, current);
10575 break;
10576
10577 case OP_REF:
10578 case OP_REFI:
10579 case OP_DNREF:
10580 case OP_DNREFI:
10581 compile_ref_iterator_backtrackingpath(common, current);
10582 break;
10583
10584 case OP_RECURSE:
10585 compile_recurse_backtrackingpath(common, current);
10586 break;
10587
10588 case OP_ASSERT:
10589 case OP_ASSERT_NOT:
10590 case OP_ASSERTBACK:
10591 case OP_ASSERTBACK_NOT:
10592 compile_assert_backtrackingpath(common, current);
10593 break;
10594
10595 case OP_ONCE:
10596 case OP_ONCE_NC:
10597 case OP_BRA:
10598 case OP_CBRA:
10599 case OP_COND:
10600 case OP_SBRA:
10601 case OP_SCBRA:
10602 case OP_SCOND:
10603 compile_bracket_backtrackingpath(common, current);
10604 break;
10605
10606 case OP_BRAZERO:
10607 if (current->cc[1] > OP_ASSERTBACK_NOT)
10608 compile_bracket_backtrackingpath(common, current);
10609 else
10610 compile_assert_backtrackingpath(common, current);
10611 break;
10612
10613 case OP_BRAPOS:
10614 case OP_CBRAPOS:
10615 case OP_SBRAPOS:
10616 case OP_SCBRAPOS:
10617 case OP_BRAPOSZERO:
10618 compile_bracketpos_backtrackingpath(common, current);
10619 break;
10620
10621 case OP_BRAMINZERO:
10622 compile_braminzero_backtrackingpath(common, current);
10623 break;
10624
10625 case OP_MARK:
10626 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
10627 if (common->has_skip_arg)
10628 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10629 free_stack(common, common->has_skip_arg ? 5 : 1);
10630 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
10631 if (common->has_skip_arg)
10632 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
10633 break;
10634
10635 case OP_THEN:
10636 case OP_THEN_ARG:
10637 case OP_PRUNE:
10638 case OP_PRUNE_ARG:
10639 case OP_SKIP:
10640 case OP_SKIP_ARG:
10641 compile_control_verb_backtrackingpath(common, current);
10642 break;
10643
10644 case OP_COMMIT:
10645 if (!common->local_exit)
10646 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10647 if (common->quit_label == NULL)
10648 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10649 else
10650 JUMPTO(SLJIT_JUMP, common->quit_label);
10651 break;
10652
10653 case OP_CALLOUT:
10654 case OP_FAIL:
10655 case OP_ACCEPT:
10656 case OP_ASSERT_ACCEPT:
10657 set_jumps(current->topbacktracks, LABEL());
10658 break;
10659
10660 case OP_THEN_TRAP:
10661 /* A virtual opcode for then traps. */
10662 compile_then_trap_backtrackingpath(common, current);
10663 break;
10664
10665 default:
10666 SLJIT_UNREACHABLE();
10667 break;
10668 }
10669 current = current->prev;
10670 }
10671 common->then_trap = save_then_trap;
10672 }
10673
compile_recurse(compiler_common * common)10674 static SLJIT_INLINE void compile_recurse(compiler_common *common)
10675 {
10676 DEFINE_COMPILER;
10677 pcre_uchar *cc = common->start + common->currententry->start;
10678 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
10679 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
10680 BOOL needs_control_head;
10681 int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
10682 int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
10683 int alternativesize;
10684 BOOL needs_frame;
10685 backtrack_common altbacktrack;
10686 struct sljit_jump *jump;
10687
10688 /* Recurse captures then. */
10689 common->then_trap = NULL;
10690
10691 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
10692 needs_frame = framesize >= 0;
10693 if (!needs_frame)
10694 framesize = 0;
10695 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
10696
10697 SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
10698 common->currententry->entry = LABEL();
10699 set_jumps(common->currententry->calls, common->currententry->entry);
10700
10701 sljit_emit_fast_enter(compiler, TMP2, 0);
10702 count_match(common);
10703 allocate_stack(common, private_data_size + framesize + alternativesize);
10704 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
10705 copy_private_data(common, ccbegin, ccend, TRUE, framesize + alternativesize, private_data_size + framesize + alternativesize, needs_control_head);
10706 if (needs_control_head)
10707 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10708 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
10709 if (needs_frame)
10710 init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
10711
10712 if (alternativesize > 0)
10713 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10714
10715 memset(&altbacktrack, 0, sizeof(backtrack_common));
10716 common->quit_label = NULL;
10717 common->accept_label = NULL;
10718 common->quit = NULL;
10719 common->accept = NULL;
10720 altbacktrack.cc = ccbegin;
10721 cc += GET(cc, 1);
10722 while (1)
10723 {
10724 altbacktrack.top = NULL;
10725 altbacktrack.topbacktracks = NULL;
10726
10727 if (altbacktrack.cc != ccbegin)
10728 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10729
10730 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
10731 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10732 return;
10733
10734 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
10735
10736 compile_backtrackingpath(common, altbacktrack.top);
10737 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10738 return;
10739 set_jumps(altbacktrack.topbacktracks, LABEL());
10740
10741 if (*cc != OP_ALT)
10742 break;
10743
10744 altbacktrack.cc = cc + 1 + LINK_SIZE;
10745 cc += GET(cc, 1);
10746 }
10747
10748 /* None of them matched. */
10749 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10750 jump = JUMP(SLJIT_JUMP);
10751
10752 if (common->quit != NULL)
10753 {
10754 set_jumps(common->quit, LABEL());
10755 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
10756 if (needs_frame)
10757 {
10758 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10759 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10760 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10761 }
10762 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10763 common->quit = NULL;
10764 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10765 }
10766
10767 set_jumps(common->accept, LABEL());
10768 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
10769 if (needs_frame)
10770 {
10771 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10772 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10773 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10774 }
10775 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
10776
10777 JUMPHERE(jump);
10778 if (common->quit != NULL)
10779 set_jumps(common->quit, LABEL());
10780 copy_private_data(common, ccbegin, ccend, FALSE, framesize + alternativesize, private_data_size + framesize + alternativesize, needs_control_head);
10781 free_stack(common, private_data_size + framesize + alternativesize);
10782 if (needs_control_head)
10783 {
10784 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-3));
10785 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10786 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0);
10787 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
10788 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
10789 }
10790 else
10791 {
10792 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10793 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
10794 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0);
10795 }
10796 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), STACK(-1));
10797 }
10798
10799 #undef COMPILE_BACKTRACKINGPATH
10800 #undef CURRENT_AS
10801
10802 void
PRIV(jit_compile)10803 PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
10804 {
10805 struct sljit_compiler *compiler;
10806 backtrack_common rootbacktrack;
10807 compiler_common common_data;
10808 compiler_common *common = &common_data;
10809 const sljit_u8 *tables = re->tables;
10810 pcre_study_data *study;
10811 int private_data_size;
10812 pcre_uchar *ccend;
10813 executable_functions *functions;
10814 void *executable_func;
10815 sljit_uw executable_size;
10816 sljit_uw total_length;
10817 label_addr_list *label_addr;
10818 struct sljit_label *mainloop_label = NULL;
10819 struct sljit_label *continue_match_label;
10820 struct sljit_label *empty_match_found_label = NULL;
10821 struct sljit_label *empty_match_backtrack_label = NULL;
10822 struct sljit_label *reset_match_label;
10823 struct sljit_label *quit_label;
10824 struct sljit_jump *jump;
10825 struct sljit_jump *minlength_check_failed = NULL;
10826 struct sljit_jump *reqbyte_notfound = NULL;
10827 struct sljit_jump *empty_match = NULL;
10828
10829 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
10830 study = extra->study_data;
10831
10832 if (!tables)
10833 tables = PRIV(default_tables);
10834
10835 memset(&rootbacktrack, 0, sizeof(backtrack_common));
10836 memset(common, 0, sizeof(compiler_common));
10837 rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
10838
10839 common->start = rootbacktrack.cc;
10840 common->read_only_data_head = NULL;
10841 common->fcc = tables + fcc_offset;
10842 common->lcc = (sljit_sw)(tables + lcc_offset);
10843 common->mode = mode;
10844 common->might_be_empty = study->minlength == 0;
10845 common->nltype = NLTYPE_FIXED;
10846 switch(re->options & PCRE_NEWLINE_BITS)
10847 {
10848 case 0:
10849 /* Compile-time default */
10850 switch(NEWLINE)
10851 {
10852 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
10853 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
10854 default: common->newline = NEWLINE; break;
10855 }
10856 break;
10857 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
10858 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
10859 case PCRE_NEWLINE_CR+
10860 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
10861 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
10862 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
10863 default: return;
10864 }
10865 common->nlmax = READ_CHAR_MAX;
10866 common->nlmin = 0;
10867 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
10868 common->bsr_nltype = NLTYPE_ANYCRLF;
10869 else if ((re->options & PCRE_BSR_UNICODE) != 0)
10870 common->bsr_nltype = NLTYPE_ANY;
10871 else
10872 {
10873 #ifdef BSR_ANYCRLF
10874 common->bsr_nltype = NLTYPE_ANYCRLF;
10875 #else
10876 common->bsr_nltype = NLTYPE_ANY;
10877 #endif
10878 }
10879 common->bsr_nlmax = READ_CHAR_MAX;
10880 common->bsr_nlmin = 0;
10881 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
10882 common->ctypes = (sljit_sw)(tables + ctypes_offset);
10883 common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
10884 common->name_count = re->name_count;
10885 common->name_entry_size = re->name_entry_size;
10886 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
10887 #ifdef SUPPORT_UTF
10888 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
10889 common->utf = (re->options & PCRE_UTF8) != 0;
10890 #ifdef SUPPORT_UCP
10891 common->use_ucp = (re->options & PCRE_UCP) != 0;
10892 #endif
10893 if (common->utf)
10894 {
10895 if (common->nltype == NLTYPE_ANY)
10896 common->nlmax = 0x2029;
10897 else if (common->nltype == NLTYPE_ANYCRLF)
10898 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
10899 else
10900 {
10901 /* We only care about the first newline character. */
10902 common->nlmax = common->newline & 0xff;
10903 }
10904
10905 if (common->nltype == NLTYPE_FIXED)
10906 common->nlmin = common->newline & 0xff;
10907 else
10908 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
10909
10910 if (common->bsr_nltype == NLTYPE_ANY)
10911 common->bsr_nlmax = 0x2029;
10912 else
10913 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
10914 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
10915 }
10916 #endif /* SUPPORT_UTF */
10917 ccend = bracketend(common->start);
10918
10919 /* Calculate the local space size on the stack. */
10920 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
10921 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, compiler->allocator_data);
10922 if (!common->optimized_cbracket)
10923 return;
10924 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
10925 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
10926 #else
10927 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
10928 #endif
10929
10930 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
10931 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
10932 common->capture_last_ptr = common->ovector_start;
10933 common->ovector_start += sizeof(sljit_sw);
10934 #endif
10935 if (!check_opcode_types(common, common->start, ccend))
10936 {
10937 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10938 return;
10939 }
10940
10941 /* Checking flags and updating ovector_start. */
10942 if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
10943 {
10944 common->req_char_ptr = common->ovector_start;
10945 common->ovector_start += sizeof(sljit_sw);
10946 }
10947 if (mode != JIT_COMPILE)
10948 {
10949 common->start_used_ptr = common->ovector_start;
10950 common->ovector_start += sizeof(sljit_sw);
10951 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10952 {
10953 common->hit_start = common->ovector_start;
10954 common->ovector_start += 2 * sizeof(sljit_sw);
10955 }
10956 }
10957 if ((re->options & PCRE_FIRSTLINE) != 0)
10958 {
10959 common->match_end_ptr = common->ovector_start;
10960 common->ovector_start += sizeof(sljit_sw);
10961 }
10962 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
10963 common->control_head_ptr = 1;
10964 #endif
10965 if (common->control_head_ptr != 0)
10966 {
10967 common->control_head_ptr = common->ovector_start;
10968 common->ovector_start += sizeof(sljit_sw);
10969 }
10970 if (common->has_set_som)
10971 {
10972 /* Saving the real start pointer is necessary. */
10973 common->start_ptr = common->ovector_start;
10974 common->ovector_start += sizeof(sljit_sw);
10975 }
10976
10977 /* Aligning ovector to even number of sljit words. */
10978 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
10979 common->ovector_start += sizeof(sljit_sw);
10980
10981 if (common->start_ptr == 0)
10982 common->start_ptr = OVECTOR(0);
10983
10984 /* Capturing brackets cannot be optimized if callouts are allowed. */
10985 if (common->capture_last_ptr != 0)
10986 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
10987
10988 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
10989 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
10990
10991 total_length = ccend - common->start;
10992 common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), compiler->allocator_data);
10993 if (!common->private_data_ptrs)
10994 {
10995 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10996 return;
10997 }
10998 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
10999
11000 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
11001 set_private_data_ptrs(common, &private_data_size, ccend);
11002 if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
11003 {
11004 if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back)
11005 detect_fast_fail(common, common->start, &private_data_size, 4);
11006 }
11007
11008 SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr);
11009
11010 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
11011 {
11012 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11013 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11014 return;
11015 }
11016
11017 if (common->has_then)
11018 {
11019 common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
11020 memset(common->then_offsets, 0, total_length);
11021 set_then_offsets(common, common->start, NULL);
11022 }
11023
11024 compiler = sljit_create_compiler(NULL);
11025 if (!compiler)
11026 {
11027 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11028 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11029 return;
11030 }
11031 common->compiler = compiler;
11032
11033 /* Main pcre_jit_exec entry. */
11034 sljit_emit_enter(compiler, 0, 1, 5, 5, 0, 0, private_data_size);
11035
11036 /* Register init. */
11037 reset_ovector(common, (re->top_bracket + 1) * 2);
11038 if (common->req_char_ptr != 0)
11039 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
11040
11041 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
11042 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
11043 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
11044 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
11045 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
11046 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
11047 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
11048 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
11049 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11050 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
11051
11052 if (common->fast_fail_start_ptr < common->fast_fail_end_ptr)
11053 reset_fast_fail(common);
11054
11055 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11056 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
11057 if (common->mark_ptr != 0)
11058 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
11059 if (common->control_head_ptr != 0)
11060 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
11061
11062 /* Main part of the matching */
11063 if ((re->options & PCRE_ANCHORED) == 0)
11064 {
11065 mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0);
11066 continue_match_label = LABEL();
11067 /* Forward search if possible. */
11068 if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
11069 {
11070 if (mode == JIT_COMPILE && fast_forward_first_n_chars(common))
11071 ;
11072 else if ((re->flags & PCRE_FIRSTSET) != 0)
11073 fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0);
11074 else if ((re->flags & PCRE_STARTLINE) != 0)
11075 fast_forward_newline(common);
11076 else if (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
11077 fast_forward_start_bits(common, study->start_bits);
11078 }
11079 }
11080 else
11081 continue_match_label = LABEL();
11082
11083 if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
11084 {
11085 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
11086 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
11087 minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
11088 }
11089 if (common->req_char_ptr != 0)
11090 reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
11091
11092 /* Store the current STR_PTR in OVECTOR(0). */
11093 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11094 /* Copy the limit of allowed recursions. */
11095 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
11096 if (common->capture_last_ptr != 0)
11097 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1);
11098 if (common->fast_forward_bc_ptr != NULL)
11099 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0);
11100
11101 if (common->start_ptr != OVECTOR(0))
11102 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
11103
11104 /* Copy the beginning of the string. */
11105 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11106 {
11107 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
11108 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
11109 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
11110 JUMPHERE(jump);
11111 }
11112 else if (mode == JIT_PARTIAL_HARD_COMPILE)
11113 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
11114
11115 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
11116 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11117 {
11118 sljit_free_compiler(compiler);
11119 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11120 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11121 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11122 return;
11123 }
11124
11125 if (common->might_be_empty)
11126 {
11127 empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11128 empty_match_found_label = LABEL();
11129 }
11130
11131 common->accept_label = LABEL();
11132 if (common->accept != NULL)
11133 set_jumps(common->accept, common->accept_label);
11134
11135 /* This means we have a match. Update the ovector. */
11136 copy_ovector(common, re->top_bracket + 1);
11137 common->quit_label = common->forced_quit_label = LABEL();
11138 if (common->quit != NULL)
11139 set_jumps(common->quit, common->quit_label);
11140 if (common->forced_quit != NULL)
11141 set_jumps(common->forced_quit, common->forced_quit_label);
11142 if (minlength_check_failed != NULL)
11143 SET_LABEL(minlength_check_failed, common->forced_quit_label);
11144 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
11145
11146 if (mode != JIT_COMPILE)
11147 {
11148 common->partialmatchlabel = LABEL();
11149 set_jumps(common->partialmatch, common->partialmatchlabel);
11150 return_with_partial_match(common, common->quit_label);
11151 }
11152
11153 if (common->might_be_empty)
11154 empty_match_backtrack_label = LABEL();
11155 compile_backtrackingpath(common, rootbacktrack.top);
11156 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11157 {
11158 sljit_free_compiler(compiler);
11159 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11160 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11161 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11162 return;
11163 }
11164
11165 SLJIT_ASSERT(rootbacktrack.prev == NULL);
11166 reset_match_label = LABEL();
11167
11168 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11169 {
11170 /* Update hit_start only in the first time. */
11171 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
11172 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr);
11173 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
11174 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
11175 JUMPHERE(jump);
11176 }
11177
11178 /* Check we have remaining characters. */
11179 if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
11180 {
11181 SLJIT_ASSERT(common->match_end_ptr != 0);
11182 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
11183 }
11184
11185 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
11186 (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr);
11187
11188 if ((re->options & PCRE_ANCHORED) == 0)
11189 {
11190 if (common->ff_newline_shortcut != NULL)
11191 {
11192 if ((re->options & PCRE_FIRSTLINE) == 0)
11193 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
11194 /* There cannot be more newlines here. */
11195 }
11196 else
11197 CMPTO(SLJIT_LESS, STR_PTR, 0, ((re->options & PCRE_FIRSTLINE) == 0) ? STR_END : TMP1, 0, mainloop_label);
11198 }
11199
11200 /* No more remaining characters. */
11201 if (reqbyte_notfound != NULL)
11202 JUMPHERE(reqbyte_notfound);
11203
11204 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11205 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
11206
11207 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
11208 JUMPTO(SLJIT_JUMP, common->quit_label);
11209
11210 flush_stubs(common);
11211
11212 if (common->might_be_empty)
11213 {
11214 JUMPHERE(empty_match);
11215 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11216 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
11217 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
11218 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
11219 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
11220 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
11221 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
11222 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
11223 }
11224
11225 common->fast_forward_bc_ptr = NULL;
11226 common->fast_fail_start_ptr = 0;
11227 common->fast_fail_end_ptr = 0;
11228 common->currententry = common->entries;
11229 common->local_exit = TRUE;
11230 quit_label = common->quit_label;
11231 while (common->currententry != NULL)
11232 {
11233 /* Might add new entries. */
11234 compile_recurse(common);
11235 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11236 {
11237 sljit_free_compiler(compiler);
11238 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11239 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11240 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11241 return;
11242 }
11243 flush_stubs(common);
11244 common->currententry = common->currententry->next;
11245 }
11246 common->local_exit = FALSE;
11247 common->quit_label = quit_label;
11248
11249 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
11250 /* This is a (really) rare case. */
11251 set_jumps(common->stackalloc, LABEL());
11252 /* RETURN_ADDR is not a saved register. */
11253 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
11254 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
11255 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11256 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
11257 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
11258 OP2(SLJIT_SUB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
11259
11260 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
11261 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
11262 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11263 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
11264 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
11265 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
11266 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
11267 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
11268
11269 /* Allocation failed. */
11270 JUMPHERE(jump);
11271 /* We break the return address cache here, but this is a really rare case. */
11272 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
11273 JUMPTO(SLJIT_JUMP, common->quit_label);
11274
11275 /* Call limit reached. */
11276 set_jumps(common->calllimit, LABEL());
11277 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
11278 JUMPTO(SLJIT_JUMP, common->quit_label);
11279
11280 if (common->revertframes != NULL)
11281 {
11282 set_jumps(common->revertframes, LABEL());
11283 do_revertframes(common);
11284 }
11285 if (common->wordboundary != NULL)
11286 {
11287 set_jumps(common->wordboundary, LABEL());
11288 check_wordboundary(common);
11289 }
11290 if (common->anynewline != NULL)
11291 {
11292 set_jumps(common->anynewline, LABEL());
11293 check_anynewline(common);
11294 }
11295 if (common->hspace != NULL)
11296 {
11297 set_jumps(common->hspace, LABEL());
11298 check_hspace(common);
11299 }
11300 if (common->vspace != NULL)
11301 {
11302 set_jumps(common->vspace, LABEL());
11303 check_vspace(common);
11304 }
11305 if (common->casefulcmp != NULL)
11306 {
11307 set_jumps(common->casefulcmp, LABEL());
11308 do_casefulcmp(common);
11309 }
11310 if (common->caselesscmp != NULL)
11311 {
11312 set_jumps(common->caselesscmp, LABEL());
11313 do_caselesscmp(common);
11314 }
11315 if (common->reset_match != NULL)
11316 {
11317 set_jumps(common->reset_match, LABEL());
11318 do_reset_match(common, (re->top_bracket + 1) * 2);
11319 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
11320 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
11321 JUMPTO(SLJIT_JUMP, reset_match_label);
11322 }
11323 #ifdef SUPPORT_UTF
11324 #ifdef COMPILE_PCRE8
11325 if (common->utfreadchar != NULL)
11326 {
11327 set_jumps(common->utfreadchar, LABEL());
11328 do_utfreadchar(common);
11329 }
11330 if (common->utfreadchar16 != NULL)
11331 {
11332 set_jumps(common->utfreadchar16, LABEL());
11333 do_utfreadchar16(common);
11334 }
11335 if (common->utfreadtype8 != NULL)
11336 {
11337 set_jumps(common->utfreadtype8, LABEL());
11338 do_utfreadtype8(common);
11339 }
11340 #endif /* COMPILE_PCRE8 */
11341 #endif /* SUPPORT_UTF */
11342 #ifdef SUPPORT_UCP
11343 if (common->getucd != NULL)
11344 {
11345 set_jumps(common->getucd, LABEL());
11346 do_getucd(common);
11347 }
11348 #endif
11349
11350 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11351 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11352
11353 executable_func = sljit_generate_code(compiler);
11354 executable_size = sljit_get_generated_code_size(compiler);
11355 label_addr = common->label_addrs;
11356 while (label_addr != NULL)
11357 {
11358 *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
11359 label_addr = label_addr->next;
11360 }
11361 sljit_free_compiler(compiler);
11362 if (executable_func == NULL)
11363 {
11364 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11365 return;
11366 }
11367
11368 /* Reuse the function descriptor if possible. */
11369 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
11370 functions = (executable_functions *)extra->executable_jit;
11371 else
11372 {
11373 /* Note: If your memory-checker has flagged the allocation below as a
11374 * memory leak, it is probably because you either forgot to call
11375 * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or
11376 * pcre16_extra) object, or you called said function after having
11377 * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field
11378 * of the object. (The function will only free the JIT data if the
11379 * bit remains set, as the bit indicates that the pointer to the data
11380 * is valid.)
11381 */
11382 functions = SLJIT_MALLOC(sizeof(executable_functions), compiler->allocator_data);
11383 if (functions == NULL)
11384 {
11385 /* This case is highly unlikely since we just recently
11386 freed a lot of memory. Not impossible though. */
11387 sljit_free_code(executable_func);
11388 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11389 return;
11390 }
11391 memset(functions, 0, sizeof(executable_functions));
11392 functions->top_bracket = (re->top_bracket + 1) * 2;
11393 functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0;
11394 extra->executable_jit = functions;
11395 extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
11396 }
11397
11398 functions->executable_funcs[mode] = executable_func;
11399 functions->read_only_data_heads[mode] = common->read_only_data_head;
11400 functions->executable_sizes[mode] = executable_size;
11401 }
11402
jit_machine_stack_exec(jit_arguments * arguments,void * executable_func)11403 static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, void *executable_func)
11404 {
11405 union {
11406 void *executable_func;
11407 jit_function call_executable_func;
11408 } convert_executable_func;
11409 sljit_u8 local_space[MACHINE_STACK_SIZE];
11410 struct sljit_stack local_stack;
11411
11412 local_stack.max_limit = local_space;
11413 local_stack.limit = local_space;
11414 local_stack.base = local_space + MACHINE_STACK_SIZE;
11415 local_stack.top = local_space + MACHINE_STACK_SIZE;
11416 arguments->stack = &local_stack;
11417 convert_executable_func.executable_func = executable_func;
11418 return convert_executable_func.call_executable_func(arguments);
11419 }
11420
11421 int
PRIV(jit_exec)11422 PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
11423 int length, int start_offset, int options, int *offsets, int offset_count)
11424 {
11425 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
11426 union {
11427 void *executable_func;
11428 jit_function call_executable_func;
11429 } convert_executable_func;
11430 jit_arguments arguments;
11431 int max_offset_count;
11432 int retval;
11433 int mode = JIT_COMPILE;
11434
11435 if ((options & PCRE_PARTIAL_HARD) != 0)
11436 mode = JIT_PARTIAL_HARD_COMPILE;
11437 else if ((options & PCRE_PARTIAL_SOFT) != 0)
11438 mode = JIT_PARTIAL_SOFT_COMPILE;
11439
11440 if (functions->executable_funcs[mode] == NULL)
11441 return PCRE_ERROR_JIT_BADOPTION;
11442
11443 /* Sanity checks should be handled by pcre_exec. */
11444 arguments.str = subject + start_offset;
11445 arguments.begin = subject;
11446 arguments.end = subject + length;
11447 arguments.mark_ptr = NULL;
11448 /* JIT decreases this value less frequently than the interpreter. */
11449 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit);
11450 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
11451 arguments.limit_match = functions->limit_match;
11452 arguments.notbol = (options & PCRE_NOTBOL) != 0;
11453 arguments.noteol = (options & PCRE_NOTEOL) != 0;
11454 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
11455 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
11456 arguments.offsets = offsets;
11457 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
11458 arguments.real_offset_count = offset_count;
11459
11460 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
11461 the output vector for storing captured strings, with the remainder used as
11462 workspace. We don't need the workspace here. For compatibility, we limit the
11463 number of captured strings in the same way as pcre_exec(), so that the user
11464 gets the same result with and without JIT. */
11465
11466 if (offset_count != 2)
11467 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
11468 max_offset_count = functions->top_bracket;
11469 if (offset_count > max_offset_count)
11470 offset_count = max_offset_count;
11471 arguments.offset_count = offset_count;
11472
11473 if (functions->callback)
11474 arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
11475 else
11476 arguments.stack = (struct sljit_stack *)functions->userdata;
11477
11478 if (arguments.stack == NULL)
11479 retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
11480 else
11481 {
11482 convert_executable_func.executable_func = functions->executable_funcs[mode];
11483 retval = convert_executable_func.call_executable_func(&arguments);
11484 }
11485
11486 if (retval * 2 > offset_count)
11487 retval = 0;
11488 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
11489 *(extra_data->mark) = arguments.mark_ptr;
11490
11491 return retval;
11492 }
11493
11494 #if defined COMPILE_PCRE8
11495 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_jit_exec(const pcre * argument_re,const pcre_extra * extra_data,PCRE_SPTR subject,int length,int start_offset,int options,int * offsets,int offset_count,pcre_jit_stack * stack)11496 pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
11497 PCRE_SPTR subject, int length, int start_offset, int options,
11498 int *offsets, int offset_count, pcre_jit_stack *stack)
11499 #elif defined COMPILE_PCRE16
11500 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
11501 pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
11502 PCRE_SPTR16 subject, int length, int start_offset, int options,
11503 int *offsets, int offset_count, pcre16_jit_stack *stack)
11504 #elif defined COMPILE_PCRE32
11505 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
11506 pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
11507 PCRE_SPTR32 subject, int length, int start_offset, int options,
11508 int *offsets, int offset_count, pcre32_jit_stack *stack)
11509 #endif
11510 {
11511 pcre_uchar *subject_ptr = (pcre_uchar *)subject;
11512 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
11513 union {
11514 void *executable_func;
11515 jit_function call_executable_func;
11516 } convert_executable_func;
11517 jit_arguments arguments;
11518 int max_offset_count;
11519 int retval;
11520 int mode = JIT_COMPILE;
11521
11522 SLJIT_UNUSED_ARG(argument_re);
11523
11524 /* Plausibility checks */
11525 if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION;
11526
11527 if ((options & PCRE_PARTIAL_HARD) != 0)
11528 mode = JIT_PARTIAL_HARD_COMPILE;
11529 else if ((options & PCRE_PARTIAL_SOFT) != 0)
11530 mode = JIT_PARTIAL_SOFT_COMPILE;
11531
11532 if (functions->executable_funcs[mode] == NULL)
11533 return PCRE_ERROR_JIT_BADOPTION;
11534
11535 /* Sanity checks should be handled by pcre_exec. */
11536 arguments.stack = (struct sljit_stack *)stack;
11537 arguments.str = subject_ptr + start_offset;
11538 arguments.begin = subject_ptr;
11539 arguments.end = subject_ptr + length;
11540 arguments.mark_ptr = NULL;
11541 /* JIT decreases this value less frequently than the interpreter. */
11542 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit);
11543 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
11544 arguments.limit_match = functions->limit_match;
11545 arguments.notbol = (options & PCRE_NOTBOL) != 0;
11546 arguments.noteol = (options & PCRE_NOTEOL) != 0;
11547 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
11548 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
11549 arguments.offsets = offsets;
11550 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
11551 arguments.real_offset_count = offset_count;
11552
11553 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
11554 the output vector for storing captured strings, with the remainder used as
11555 workspace. We don't need the workspace here. For compatibility, we limit the
11556 number of captured strings in the same way as pcre_exec(), so that the user
11557 gets the same result with and without JIT. */
11558
11559 if (offset_count != 2)
11560 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
11561 max_offset_count = functions->top_bracket;
11562 if (offset_count > max_offset_count)
11563 offset_count = max_offset_count;
11564 arguments.offset_count = offset_count;
11565
11566 convert_executable_func.executable_func = functions->executable_funcs[mode];
11567 retval = convert_executable_func.call_executable_func(&arguments);
11568
11569 if (retval * 2 > offset_count)
11570 retval = 0;
11571 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
11572 *(extra_data->mark) = arguments.mark_ptr;
11573
11574 return retval;
11575 }
11576
11577 void
PRIV(jit_free)11578 PRIV(jit_free)(void *executable_funcs)
11579 {
11580 int i;
11581 executable_functions *functions = (executable_functions *)executable_funcs;
11582 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
11583 {
11584 if (functions->executable_funcs[i] != NULL)
11585 sljit_free_code(functions->executable_funcs[i]);
11586 free_read_only_data(functions->read_only_data_heads[i], NULL);
11587 }
11588 SLJIT_FREE(functions, compiler->allocator_data);
11589 }
11590
11591 int
PRIV(jit_get_size)11592 PRIV(jit_get_size)(void *executable_funcs)
11593 {
11594 int i;
11595 sljit_uw size = 0;
11596 sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
11597 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
11598 size += executable_sizes[i];
11599 return (int)size;
11600 }
11601
11602 const char*
PRIV(jit_get_target)11603 PRIV(jit_get_target)(void)
11604 {
11605 return sljit_get_platform_name();
11606 }
11607
11608 #if defined COMPILE_PCRE8
11609 PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize,int maxsize)11610 pcre_jit_stack_alloc(int startsize, int maxsize)
11611 #elif defined COMPILE_PCRE16
11612 PCRE_EXP_DECL pcre16_jit_stack *
11613 pcre16_jit_stack_alloc(int startsize, int maxsize)
11614 #elif defined COMPILE_PCRE32
11615 PCRE_EXP_DECL pcre32_jit_stack *
11616 pcre32_jit_stack_alloc(int startsize, int maxsize)
11617 #endif
11618 {
11619 if (startsize < 1 || maxsize < 1)
11620 return NULL;
11621 if (startsize > maxsize)
11622 startsize = maxsize;
11623 startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
11624 maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
11625 return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize, NULL);
11626 }
11627
11628 #if defined COMPILE_PCRE8
11629 PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack * stack)11630 pcre_jit_stack_free(pcre_jit_stack *stack)
11631 #elif defined COMPILE_PCRE16
11632 PCRE_EXP_DECL void
11633 pcre16_jit_stack_free(pcre16_jit_stack *stack)
11634 #elif defined COMPILE_PCRE32
11635 PCRE_EXP_DECL void
11636 pcre32_jit_stack_free(pcre32_jit_stack *stack)
11637 #endif
11638 {
11639 sljit_free_stack((struct sljit_stack *)stack, NULL);
11640 }
11641
11642 #if defined COMPILE_PCRE8
11643 PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra * extra,pcre_jit_callback callback,void * userdata)11644 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
11645 #elif defined COMPILE_PCRE16
11646 PCRE_EXP_DECL void
11647 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
11648 #elif defined COMPILE_PCRE32
11649 PCRE_EXP_DECL void
11650 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
11651 #endif
11652 {
11653 executable_functions *functions;
11654 if (extra != NULL &&
11655 (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
11656 extra->executable_jit != NULL)
11657 {
11658 functions = (executable_functions *)extra->executable_jit;
11659 functions->callback = callback;
11660 functions->userdata = userdata;
11661 }
11662 }
11663
11664 #if defined COMPILE_PCRE8
11665 PCRE_EXP_DECL void
pcre_jit_free_unused_memory(void)11666 pcre_jit_free_unused_memory(void)
11667 #elif defined COMPILE_PCRE16
11668 PCRE_EXP_DECL void
11669 pcre16_jit_free_unused_memory(void)
11670 #elif defined COMPILE_PCRE32
11671 PCRE_EXP_DECL void
11672 pcre32_jit_free_unused_memory(void)
11673 #endif
11674 {
11675 sljit_free_unused_memory_exec();
11676 }
11677
11678 #else /* SUPPORT_JIT */
11679
11680 /* These are dummy functions to avoid linking errors when JIT support is not
11681 being compiled. */
11682
11683 #if defined COMPILE_PCRE8
11684 PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize,int maxsize)11685 pcre_jit_stack_alloc(int startsize, int maxsize)
11686 #elif defined COMPILE_PCRE16
11687 PCRE_EXP_DECL pcre16_jit_stack *
11688 pcre16_jit_stack_alloc(int startsize, int maxsize)
11689 #elif defined COMPILE_PCRE32
11690 PCRE_EXP_DECL pcre32_jit_stack *
11691 pcre32_jit_stack_alloc(int startsize, int maxsize)
11692 #endif
11693 {
11694 (void)startsize;
11695 (void)maxsize;
11696 return NULL;
11697 }
11698
11699 #if defined COMPILE_PCRE8
11700 PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack * stack)11701 pcre_jit_stack_free(pcre_jit_stack *stack)
11702 #elif defined COMPILE_PCRE16
11703 PCRE_EXP_DECL void
11704 pcre16_jit_stack_free(pcre16_jit_stack *stack)
11705 #elif defined COMPILE_PCRE32
11706 PCRE_EXP_DECL void
11707 pcre32_jit_stack_free(pcre32_jit_stack *stack)
11708 #endif
11709 {
11710 (void)stack;
11711 }
11712
11713 #if defined COMPILE_PCRE8
11714 PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra * extra,pcre_jit_callback callback,void * userdata)11715 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
11716 #elif defined COMPILE_PCRE16
11717 PCRE_EXP_DECL void
11718 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
11719 #elif defined COMPILE_PCRE32
11720 PCRE_EXP_DECL void
11721 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
11722 #endif
11723 {
11724 (void)extra;
11725 (void)callback;
11726 (void)userdata;
11727 }
11728
11729 #if defined COMPILE_PCRE8
11730 PCRE_EXP_DECL void
pcre_jit_free_unused_memory(void)11731 pcre_jit_free_unused_memory(void)
11732 #elif defined COMPILE_PCRE16
11733 PCRE_EXP_DECL void
11734 pcre16_jit_free_unused_memory(void)
11735 #elif defined COMPILE_PCRE32
11736 PCRE_EXP_DECL void
11737 pcre32_jit_free_unused_memory(void)
11738 #endif
11739 {
11740 }
11741
11742 #endif
11743
11744 /* End of pcre_jit_compile.c */
11745