1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } iterator_backtrack;
285
286 typedef struct recurse_entry {
287 struct recurse_entry *next;
288 /* Contains the function entry. */
289 struct sljit_label *entry;
290 /* Collects the calls until the function is not created. */
291 jump_list *calls;
292 /* Points to the starting opcode. */
293 sljit_sw start;
294 } recurse_entry;
295
296 typedef struct recurse_backtrack {
297 backtrack_common common;
298 BOOL inlined_pattern;
299 } recurse_backtrack;
300
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
302
303 typedef struct then_trap_backtrack {
304 backtrack_common common;
305 /* If then_trap is not NULL, this structure contains the real
306 then_trap for the backtracking path. */
307 struct then_trap_backtrack *then_trap;
308 /* Points to the starting opcode. */
309 sljit_sw start;
310 /* Exit point for the then opcodes of this alternative. */
311 jump_list *quit;
312 /* Frame size of the current alternative. */
313 int framesize;
314 } then_trap_backtrack;
315
316 #define MAX_RANGE_SIZE 4
317
318 typedef struct compiler_common {
319 /* The sljit ceneric compiler. */
320 struct sljit_compiler *compiler;
321 /* First byte code. */
322 pcre_uchar *start;
323 /* Maps private data offset to each opcode. */
324 sljit_si *private_data_ptrs;
325 /* Chain list of read-only data ptrs. */
326 void *read_only_data_head;
327 /* Tells whether the capturing bracket is optimized. */
328 pcre_uint8 *optimized_cbracket;
329 /* Tells whether the starting offset is a target of then. */
330 pcre_uint8 *then_offsets;
331 /* Current position where a THEN must jump. */
332 then_trap_backtrack *then_trap;
333 /* Starting offset of private data for capturing brackets. */
334 int cbra_ptr;
335 /* Output vector starting point. Must be divisible by 2. */
336 int ovector_start;
337 /* Last known position of the requested byte. */
338 int req_char_ptr;
339 /* Head of the last recursion. */
340 int recursive_head_ptr;
341 /* First inspected character for partial matching. */
342 int start_used_ptr;
343 /* Starting pointer for partial soft matches. */
344 int hit_start;
345 /* End pointer of the first line. */
346 int first_line_end;
347 /* Points to the marked string. */
348 int mark_ptr;
349 /* Recursive control verb management chain. */
350 int control_head_ptr;
351 /* Points to the last matched capture block index. */
352 int capture_last_ptr;
353 /* Points to the starting position of the current match. */
354 int start_ptr;
355
356 /* Flipped and lower case tables. */
357 const pcre_uint8 *fcc;
358 sljit_sw lcc;
359 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
360 int mode;
361 /* TRUE, when minlength is greater than 0. */
362 BOOL might_be_empty;
363 /* \K is found in the pattern. */
364 BOOL has_set_som;
365 /* (*SKIP:arg) is found in the pattern. */
366 BOOL has_skip_arg;
367 /* (*THEN) is found in the pattern. */
368 BOOL has_then;
369 /* Needs to know the start position anytime. */
370 BOOL needs_start_ptr;
371 /* Currently in recurse or negative assert. */
372 BOOL local_exit;
373 /* Currently in a positive assert. */
374 BOOL positive_assert;
375 /* Newline control. */
376 int nltype;
377 pcre_uint32 nlmax;
378 pcre_uint32 nlmin;
379 int newline;
380 int bsr_nltype;
381 pcre_uint32 bsr_nlmax;
382 pcre_uint32 bsr_nlmin;
383 /* Dollar endonly. */
384 int endonly;
385 /* Tables. */
386 sljit_sw ctypes;
387 /* Named capturing brackets. */
388 pcre_uchar *name_table;
389 sljit_sw name_count;
390 sljit_sw name_entry_size;
391
392 /* Labels and jump lists. */
393 struct sljit_label *partialmatchlabel;
394 struct sljit_label *quit_label;
395 struct sljit_label *forced_quit_label;
396 struct sljit_label *accept_label;
397 struct sljit_label *ff_newline_shortcut;
398 stub_list *stubs;
399 label_addr_list *label_addrs;
400 recurse_entry *entries;
401 recurse_entry *currententry;
402 jump_list *partialmatch;
403 jump_list *quit;
404 jump_list *positive_assert_quit;
405 jump_list *forced_quit;
406 jump_list *accept;
407 jump_list *calllimit;
408 jump_list *stackalloc;
409 jump_list *revertframes;
410 jump_list *wordboundary;
411 jump_list *anynewline;
412 jump_list *hspace;
413 jump_list *vspace;
414 jump_list *casefulcmp;
415 jump_list *caselesscmp;
416 jump_list *reset_match;
417 BOOL jscript_compat;
418 #ifdef SUPPORT_UTF
419 BOOL utf;
420 #ifdef SUPPORT_UCP
421 BOOL use_ucp;
422 #endif
423 #ifdef COMPILE_PCRE8
424 jump_list *utfreadchar;
425 jump_list *utfreadchar16;
426 jump_list *utfreadtype8;
427 #endif
428 #endif /* SUPPORT_UTF */
429 #ifdef SUPPORT_UCP
430 jump_list *getucd;
431 #endif
432 } compiler_common;
433
434 /* For byte_sequence_compare. */
435
436 typedef struct compare_context {
437 int length;
438 int sourcereg;
439 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
440 int ucharptr;
441 union {
442 sljit_si asint;
443 sljit_uh asushort;
444 #if defined COMPILE_PCRE8
445 sljit_ub asbyte;
446 sljit_ub asuchars[4];
447 #elif defined COMPILE_PCRE16
448 sljit_uh asuchars[2];
449 #elif defined COMPILE_PCRE32
450 sljit_ui asuchars[1];
451 #endif
452 } c;
453 union {
454 sljit_si asint;
455 sljit_uh asushort;
456 #if defined COMPILE_PCRE8
457 sljit_ub asbyte;
458 sljit_ub asuchars[4];
459 #elif defined COMPILE_PCRE16
460 sljit_uh asuchars[2];
461 #elif defined COMPILE_PCRE32
462 sljit_ui asuchars[1];
463 #endif
464 } oc;
465 #endif
466 } compare_context;
467
468 /* Undefine sljit macros. */
469 #undef CMP
470
471 /* Used for accessing the elements of the stack. */
472 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
473
474 #define TMP1 SLJIT_R0
475 #define TMP2 SLJIT_R2
476 #define TMP3 SLJIT_R3
477 #define STR_PTR SLJIT_S0
478 #define STR_END SLJIT_S1
479 #define STACK_TOP SLJIT_R1
480 #define STACK_LIMIT SLJIT_S2
481 #define COUNT_MATCH SLJIT_S3
482 #define ARGUMENTS SLJIT_S4
483 #define RETURN_ADDR SLJIT_R4
484
485 /* Local space layout. */
486 /* These two locals can be used by the current opcode. */
487 #define LOCALS0 (0 * sizeof(sljit_sw))
488 #define LOCALS1 (1 * sizeof(sljit_sw))
489 /* Two local variables for possessive quantifiers (char1 cannot use them). */
490 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
491 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
492 /* Max limit of recursions. */
493 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
494 /* The output vector is stored on the stack, and contains pointers
495 to characters. The vector data is divided into two groups: the first
496 group contains the start / end character pointers, and the second is
497 the start pointers when the end of the capturing group has not yet reached. */
498 #define OVECTOR_START (common->ovector_start)
499 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
500 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
501 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
502
503 #if defined COMPILE_PCRE8
504 #define MOV_UCHAR SLJIT_MOV_UB
505 #define MOVU_UCHAR SLJIT_MOVU_UB
506 #elif defined COMPILE_PCRE16
507 #define MOV_UCHAR SLJIT_MOV_UH
508 #define MOVU_UCHAR SLJIT_MOVU_UH
509 #elif defined COMPILE_PCRE32
510 #define MOV_UCHAR SLJIT_MOV_UI
511 #define MOVU_UCHAR SLJIT_MOVU_UI
512 #else
513 #error Unsupported compiling mode
514 #endif
515
516 /* Shortcuts. */
517 #define DEFINE_COMPILER \
518 struct sljit_compiler *compiler = common->compiler
519 #define OP1(op, dst, dstw, src, srcw) \
520 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
521 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
522 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
523 #define LABEL() \
524 sljit_emit_label(compiler)
525 #define JUMP(type) \
526 sljit_emit_jump(compiler, (type))
527 #define JUMPTO(type, label) \
528 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
529 #define JUMPHERE(jump) \
530 sljit_set_label((jump), sljit_emit_label(compiler))
531 #define SET_LABEL(jump, label) \
532 sljit_set_label((jump), (label))
533 #define CMP(type, src1, src1w, src2, src2w) \
534 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
535 #define CMPTO(type, src1, src1w, src2, src2w, label) \
536 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
537 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
538 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
539 #define GET_LOCAL_BASE(dst, dstw, offset) \
540 sljit_get_local_base(compiler, (dst), (dstw), (offset))
541
542 #define READ_CHAR_MAX 0x7fffffff
543
bracketend(pcre_uchar * cc)544 static pcre_uchar *bracketend(pcre_uchar *cc)
545 {
546 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
547 do cc += GET(cc, 1); while (*cc == OP_ALT);
548 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
549 cc += 1 + LINK_SIZE;
550 return cc;
551 }
552
no_alternatives(pcre_uchar * cc)553 static int no_alternatives(pcre_uchar *cc)
554 {
555 int count = 0;
556 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
557 do
558 {
559 cc += GET(cc, 1);
560 count++;
561 }
562 while (*cc == OP_ALT);
563 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
564 return count;
565 }
566
567 static int ones_in_half_byte[16] = {
568 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
569 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
570 };
571
572 /* Functions whose might need modification for all new supported opcodes:
573 next_opcode
574 check_opcode_types
575 set_private_data_ptrs
576 get_framesize
577 init_frame
578 get_private_data_copy_length
579 copy_private_data
580 compile_matchingpath
581 compile_backtrackingpath
582 */
583
next_opcode(compiler_common * common,pcre_uchar * cc)584 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
585 {
586 SLJIT_UNUSED_ARG(common);
587 switch(*cc)
588 {
589 case OP_SOD:
590 case OP_SOM:
591 case OP_SET_SOM:
592 case OP_NOT_WORD_BOUNDARY:
593 case OP_WORD_BOUNDARY:
594 case OP_NOT_DIGIT:
595 case OP_DIGIT:
596 case OP_NOT_WHITESPACE:
597 case OP_WHITESPACE:
598 case OP_NOT_WORDCHAR:
599 case OP_WORDCHAR:
600 case OP_ANY:
601 case OP_ALLANY:
602 case OP_NOTPROP:
603 case OP_PROP:
604 case OP_ANYNL:
605 case OP_NOT_HSPACE:
606 case OP_HSPACE:
607 case OP_NOT_VSPACE:
608 case OP_VSPACE:
609 case OP_EXTUNI:
610 case OP_EODN:
611 case OP_EOD:
612 case OP_CIRC:
613 case OP_CIRCM:
614 case OP_DOLL:
615 case OP_DOLLM:
616 case OP_CRSTAR:
617 case OP_CRMINSTAR:
618 case OP_CRPLUS:
619 case OP_CRMINPLUS:
620 case OP_CRQUERY:
621 case OP_CRMINQUERY:
622 case OP_CRRANGE:
623 case OP_CRMINRANGE:
624 case OP_CRPOSSTAR:
625 case OP_CRPOSPLUS:
626 case OP_CRPOSQUERY:
627 case OP_CRPOSRANGE:
628 case OP_CLASS:
629 case OP_NCLASS:
630 case OP_REF:
631 case OP_REFI:
632 case OP_DNREF:
633 case OP_DNREFI:
634 case OP_RECURSE:
635 case OP_CALLOUT:
636 case OP_ALT:
637 case OP_KET:
638 case OP_KETRMAX:
639 case OP_KETRMIN:
640 case OP_KETRPOS:
641 case OP_REVERSE:
642 case OP_ASSERT:
643 case OP_ASSERT_NOT:
644 case OP_ASSERTBACK:
645 case OP_ASSERTBACK_NOT:
646 case OP_ONCE:
647 case OP_ONCE_NC:
648 case OP_BRA:
649 case OP_BRAPOS:
650 case OP_CBRA:
651 case OP_CBRAPOS:
652 case OP_COND:
653 case OP_SBRA:
654 case OP_SBRAPOS:
655 case OP_SCBRA:
656 case OP_SCBRAPOS:
657 case OP_SCOND:
658 case OP_CREF:
659 case OP_DNCREF:
660 case OP_RREF:
661 case OP_DNRREF:
662 case OP_DEF:
663 case OP_BRAZERO:
664 case OP_BRAMINZERO:
665 case OP_BRAPOSZERO:
666 case OP_PRUNE:
667 case OP_SKIP:
668 case OP_THEN:
669 case OP_COMMIT:
670 case OP_FAIL:
671 case OP_ACCEPT:
672 case OP_ASSERT_ACCEPT:
673 case OP_CLOSE:
674 case OP_SKIPZERO:
675 return cc + PRIV(OP_lengths)[*cc];
676
677 case OP_CHAR:
678 case OP_CHARI:
679 case OP_NOT:
680 case OP_NOTI:
681 case OP_STAR:
682 case OP_MINSTAR:
683 case OP_PLUS:
684 case OP_MINPLUS:
685 case OP_QUERY:
686 case OP_MINQUERY:
687 case OP_UPTO:
688 case OP_MINUPTO:
689 case OP_EXACT:
690 case OP_POSSTAR:
691 case OP_POSPLUS:
692 case OP_POSQUERY:
693 case OP_POSUPTO:
694 case OP_STARI:
695 case OP_MINSTARI:
696 case OP_PLUSI:
697 case OP_MINPLUSI:
698 case OP_QUERYI:
699 case OP_MINQUERYI:
700 case OP_UPTOI:
701 case OP_MINUPTOI:
702 case OP_EXACTI:
703 case OP_POSSTARI:
704 case OP_POSPLUSI:
705 case OP_POSQUERYI:
706 case OP_POSUPTOI:
707 case OP_NOTSTAR:
708 case OP_NOTMINSTAR:
709 case OP_NOTPLUS:
710 case OP_NOTMINPLUS:
711 case OP_NOTQUERY:
712 case OP_NOTMINQUERY:
713 case OP_NOTUPTO:
714 case OP_NOTMINUPTO:
715 case OP_NOTEXACT:
716 case OP_NOTPOSSTAR:
717 case OP_NOTPOSPLUS:
718 case OP_NOTPOSQUERY:
719 case OP_NOTPOSUPTO:
720 case OP_NOTSTARI:
721 case OP_NOTMINSTARI:
722 case OP_NOTPLUSI:
723 case OP_NOTMINPLUSI:
724 case OP_NOTQUERYI:
725 case OP_NOTMINQUERYI:
726 case OP_NOTUPTOI:
727 case OP_NOTMINUPTOI:
728 case OP_NOTEXACTI:
729 case OP_NOTPOSSTARI:
730 case OP_NOTPOSPLUSI:
731 case OP_NOTPOSQUERYI:
732 case OP_NOTPOSUPTOI:
733 cc += PRIV(OP_lengths)[*cc];
734 #ifdef SUPPORT_UTF
735 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
736 #endif
737 return cc;
738
739 /* Special cases. */
740 case OP_TYPESTAR:
741 case OP_TYPEMINSTAR:
742 case OP_TYPEPLUS:
743 case OP_TYPEMINPLUS:
744 case OP_TYPEQUERY:
745 case OP_TYPEMINQUERY:
746 case OP_TYPEUPTO:
747 case OP_TYPEMINUPTO:
748 case OP_TYPEEXACT:
749 case OP_TYPEPOSSTAR:
750 case OP_TYPEPOSPLUS:
751 case OP_TYPEPOSQUERY:
752 case OP_TYPEPOSUPTO:
753 return cc + PRIV(OP_lengths)[*cc] - 1;
754
755 case OP_ANYBYTE:
756 #ifdef SUPPORT_UTF
757 if (common->utf) return NULL;
758 #endif
759 return cc + 1;
760
761 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
762 case OP_XCLASS:
763 return cc + GET(cc, 1);
764 #endif
765
766 case OP_MARK:
767 case OP_PRUNE_ARG:
768 case OP_SKIP_ARG:
769 case OP_THEN_ARG:
770 return cc + 1 + 2 + cc[1];
771
772 default:
773 /* All opcodes are supported now! */
774 SLJIT_ASSERT_STOP();
775 return NULL;
776 }
777 }
778
check_opcode_types(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend)779 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
780 {
781 int count;
782 pcre_uchar *slot;
783
784 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
785 while (cc < ccend)
786 {
787 switch(*cc)
788 {
789 case OP_SET_SOM:
790 common->has_set_som = TRUE;
791 common->might_be_empty = TRUE;
792 cc += 1;
793 break;
794
795 case OP_REF:
796 case OP_REFI:
797 common->optimized_cbracket[GET2(cc, 1)] = 0;
798 cc += 1 + IMM2_SIZE;
799 break;
800
801 case OP_CBRAPOS:
802 case OP_SCBRAPOS:
803 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
804 cc += 1 + LINK_SIZE + IMM2_SIZE;
805 break;
806
807 case OP_COND:
808 case OP_SCOND:
809 /* Only AUTO_CALLOUT can insert this opcode. We do
810 not intend to support this case. */
811 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
812 return FALSE;
813 cc += 1 + LINK_SIZE;
814 break;
815
816 case OP_CREF:
817 common->optimized_cbracket[GET2(cc, 1)] = 0;
818 cc += 1 + IMM2_SIZE;
819 break;
820
821 case OP_DNREF:
822 case OP_DNREFI:
823 case OP_DNCREF:
824 count = GET2(cc, 1 + IMM2_SIZE);
825 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
826 while (count-- > 0)
827 {
828 common->optimized_cbracket[GET2(slot, 0)] = 0;
829 slot += common->name_entry_size;
830 }
831 cc += 1 + 2 * IMM2_SIZE;
832 break;
833
834 case OP_RECURSE:
835 /* Set its value only once. */
836 if (common->recursive_head_ptr == 0)
837 {
838 common->recursive_head_ptr = common->ovector_start;
839 common->ovector_start += sizeof(sljit_sw);
840 }
841 cc += 1 + LINK_SIZE;
842 break;
843
844 case OP_CALLOUT:
845 if (common->capture_last_ptr == 0)
846 {
847 common->capture_last_ptr = common->ovector_start;
848 common->ovector_start += sizeof(sljit_sw);
849 }
850 cc += 2 + 2 * LINK_SIZE;
851 break;
852
853 case OP_THEN_ARG:
854 common->has_then = TRUE;
855 common->control_head_ptr = 1;
856 /* Fall through. */
857
858 case OP_PRUNE_ARG:
859 common->needs_start_ptr = TRUE;
860 /* Fall through. */
861
862 case OP_MARK:
863 if (common->mark_ptr == 0)
864 {
865 common->mark_ptr = common->ovector_start;
866 common->ovector_start += sizeof(sljit_sw);
867 }
868 cc += 1 + 2 + cc[1];
869 break;
870
871 case OP_THEN:
872 common->has_then = TRUE;
873 common->control_head_ptr = 1;
874 /* Fall through. */
875
876 case OP_PRUNE:
877 case OP_SKIP:
878 common->needs_start_ptr = TRUE;
879 cc += 1;
880 break;
881
882 case OP_SKIP_ARG:
883 common->control_head_ptr = 1;
884 common->has_skip_arg = TRUE;
885 cc += 1 + 2 + cc[1];
886 break;
887
888 default:
889 cc = next_opcode(common, cc);
890 if (cc == NULL)
891 return FALSE;
892 break;
893 }
894 }
895 return TRUE;
896 }
897
get_class_iterator_size(pcre_uchar * cc)898 static int get_class_iterator_size(pcre_uchar *cc)
899 {
900 switch(*cc)
901 {
902 case OP_CRSTAR:
903 case OP_CRPLUS:
904 return 2;
905
906 case OP_CRMINSTAR:
907 case OP_CRMINPLUS:
908 case OP_CRQUERY:
909 case OP_CRMINQUERY:
910 return 1;
911
912 case OP_CRRANGE:
913 case OP_CRMINRANGE:
914 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
915 return 0;
916 return 2;
917
918 default:
919 return 0;
920 }
921 }
922
detect_repeat(compiler_common * common,pcre_uchar * begin)923 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
924 {
925 pcre_uchar *end = bracketend(begin);
926 pcre_uchar *next;
927 pcre_uchar *next_end;
928 pcre_uchar *max_end;
929 pcre_uchar type;
930 sljit_sw length = end - begin;
931 int min, max, i;
932
933 /* Detect fixed iterations first. */
934 if (end[-(1 + LINK_SIZE)] != OP_KET)
935 return FALSE;
936
937 /* Already detected repeat. */
938 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
939 return TRUE;
940
941 next = end;
942 min = 1;
943 while (1)
944 {
945 if (*next != *begin)
946 break;
947 next_end = bracketend(next);
948 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
949 break;
950 next = next_end;
951 min++;
952 }
953
954 if (min == 2)
955 return FALSE;
956
957 max = 0;
958 max_end = next;
959 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
960 {
961 type = *next;
962 while (1)
963 {
964 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
965 break;
966 next_end = bracketend(next + 2 + LINK_SIZE);
967 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
968 break;
969 next = next_end;
970 max++;
971 }
972
973 if (next[0] == type && next[1] == *begin && max >= 1)
974 {
975 next_end = bracketend(next + 1);
976 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
977 {
978 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
979 if (*next_end != OP_KET)
980 break;
981
982 if (i == max)
983 {
984 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
985 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
986 /* +2 the original and the last. */
987 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
988 if (min == 1)
989 return TRUE;
990 min--;
991 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
992 }
993 }
994 }
995 }
996
997 if (min >= 3)
998 {
999 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1000 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1001 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1002 return TRUE;
1003 }
1004
1005 return FALSE;
1006 }
1007
1008 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1009 case OP_MINSTAR: \
1010 case OP_MINPLUS: \
1011 case OP_QUERY: \
1012 case OP_MINQUERY: \
1013 case OP_MINSTARI: \
1014 case OP_MINPLUSI: \
1015 case OP_QUERYI: \
1016 case OP_MINQUERYI: \
1017 case OP_NOTMINSTAR: \
1018 case OP_NOTMINPLUS: \
1019 case OP_NOTQUERY: \
1020 case OP_NOTMINQUERY: \
1021 case OP_NOTMINSTARI: \
1022 case OP_NOTMINPLUSI: \
1023 case OP_NOTQUERYI: \
1024 case OP_NOTMINQUERYI:
1025
1026 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1027 case OP_STAR: \
1028 case OP_PLUS: \
1029 case OP_STARI: \
1030 case OP_PLUSI: \
1031 case OP_NOTSTAR: \
1032 case OP_NOTPLUS: \
1033 case OP_NOTSTARI: \
1034 case OP_NOTPLUSI:
1035
1036 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1037 case OP_UPTO: \
1038 case OP_MINUPTO: \
1039 case OP_UPTOI: \
1040 case OP_MINUPTOI: \
1041 case OP_NOTUPTO: \
1042 case OP_NOTMINUPTO: \
1043 case OP_NOTUPTOI: \
1044 case OP_NOTMINUPTOI:
1045
1046 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1047 case OP_TYPEMINSTAR: \
1048 case OP_TYPEMINPLUS: \
1049 case OP_TYPEQUERY: \
1050 case OP_TYPEMINQUERY:
1051
1052 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1053 case OP_TYPESTAR: \
1054 case OP_TYPEPLUS:
1055
1056 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1057 case OP_TYPEUPTO: \
1058 case OP_TYPEMINUPTO:
1059
set_private_data_ptrs(compiler_common * common,int * private_data_start,pcre_uchar * ccend)1060 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1061 {
1062 pcre_uchar *cc = common->start;
1063 pcre_uchar *alternative;
1064 pcre_uchar *end = NULL;
1065 int private_data_ptr = *private_data_start;
1066 int space, size, bracketlen;
1067 BOOL repeat_check = TRUE;
1068
1069 while (cc < ccend)
1070 {
1071 space = 0;
1072 size = 0;
1073 bracketlen = 0;
1074 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1075 break;
1076
1077 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1078 {
1079 if (detect_repeat(common, cc))
1080 {
1081 /* These brackets are converted to repeats, so no global
1082 based single character repeat is allowed. */
1083 if (cc >= end)
1084 end = bracketend(cc);
1085 }
1086 }
1087 repeat_check = TRUE;
1088
1089 switch(*cc)
1090 {
1091 case OP_KET:
1092 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1093 {
1094 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1095 private_data_ptr += sizeof(sljit_sw);
1096 cc += common->private_data_ptrs[cc + 1 - common->start];
1097 }
1098 cc += 1 + LINK_SIZE;
1099 break;
1100
1101 case OP_ASSERT:
1102 case OP_ASSERT_NOT:
1103 case OP_ASSERTBACK:
1104 case OP_ASSERTBACK_NOT:
1105 case OP_ONCE:
1106 case OP_ONCE_NC:
1107 case OP_BRAPOS:
1108 case OP_SBRA:
1109 case OP_SBRAPOS:
1110 case OP_SCOND:
1111 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1112 private_data_ptr += sizeof(sljit_sw);
1113 bracketlen = 1 + LINK_SIZE;
1114 break;
1115
1116 case OP_CBRAPOS:
1117 case OP_SCBRAPOS:
1118 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1119 private_data_ptr += sizeof(sljit_sw);
1120 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1121 break;
1122
1123 case OP_COND:
1124 /* Might be a hidden SCOND. */
1125 alternative = cc + GET(cc, 1);
1126 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1127 {
1128 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1129 private_data_ptr += sizeof(sljit_sw);
1130 }
1131 bracketlen = 1 + LINK_SIZE;
1132 break;
1133
1134 case OP_BRA:
1135 bracketlen = 1 + LINK_SIZE;
1136 break;
1137
1138 case OP_CBRA:
1139 case OP_SCBRA:
1140 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1141 break;
1142
1143 case OP_BRAZERO:
1144 case OP_BRAMINZERO:
1145 case OP_BRAPOSZERO:
1146 repeat_check = FALSE;
1147 size = 1;
1148 break;
1149
1150 CASE_ITERATOR_PRIVATE_DATA_1
1151 space = 1;
1152 size = -2;
1153 break;
1154
1155 CASE_ITERATOR_PRIVATE_DATA_2A
1156 space = 2;
1157 size = -2;
1158 break;
1159
1160 CASE_ITERATOR_PRIVATE_DATA_2B
1161 space = 2;
1162 size = -(2 + IMM2_SIZE);
1163 break;
1164
1165 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1166 space = 1;
1167 size = 1;
1168 break;
1169
1170 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1171 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1172 space = 2;
1173 size = 1;
1174 break;
1175
1176 case OP_TYPEUPTO:
1177 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1178 space = 2;
1179 size = 1 + IMM2_SIZE;
1180 break;
1181
1182 case OP_TYPEMINUPTO:
1183 space = 2;
1184 size = 1 + IMM2_SIZE;
1185 break;
1186
1187 case OP_CLASS:
1188 case OP_NCLASS:
1189 size += 1 + 32 / sizeof(pcre_uchar);
1190 space = get_class_iterator_size(cc + size);
1191 break;
1192
1193 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1194 case OP_XCLASS:
1195 size = GET(cc, 1);
1196 space = get_class_iterator_size(cc + size);
1197 break;
1198 #endif
1199
1200 default:
1201 cc = next_opcode(common, cc);
1202 SLJIT_ASSERT(cc != NULL);
1203 break;
1204 }
1205
1206 /* Character iterators, which are not inside a repeated bracket,
1207 gets a private slot instead of allocating it on the stack. */
1208 if (space > 0 && cc >= end)
1209 {
1210 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1211 private_data_ptr += sizeof(sljit_sw) * space;
1212 }
1213
1214 if (size != 0)
1215 {
1216 if (size < 0)
1217 {
1218 cc += -size;
1219 #ifdef SUPPORT_UTF
1220 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1221 #endif
1222 }
1223 else
1224 cc += size;
1225 }
1226
1227 if (bracketlen > 0)
1228 {
1229 if (cc >= end)
1230 {
1231 end = bracketend(cc);
1232 if (end[-1 - LINK_SIZE] == OP_KET)
1233 end = NULL;
1234 }
1235 cc += bracketlen;
1236 }
1237 }
1238 *private_data_start = private_data_ptr;
1239 }
1240
1241 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL recursive,BOOL * needs_control_head)1242 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1243 {
1244 int length = 0;
1245 int possessive = 0;
1246 BOOL stack_restore = FALSE;
1247 BOOL setsom_found = recursive;
1248 BOOL setmark_found = recursive;
1249 /* The last capture is a local variable even for recursions. */
1250 BOOL capture_last_found = FALSE;
1251
1252 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1253 SLJIT_ASSERT(common->control_head_ptr != 0);
1254 *needs_control_head = TRUE;
1255 #else
1256 *needs_control_head = FALSE;
1257 #endif
1258
1259 if (ccend == NULL)
1260 {
1261 ccend = bracketend(cc) - (1 + LINK_SIZE);
1262 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1263 {
1264 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1265 /* This is correct regardless of common->capture_last_ptr. */
1266 capture_last_found = TRUE;
1267 }
1268 cc = next_opcode(common, cc);
1269 }
1270
1271 SLJIT_ASSERT(cc != NULL);
1272 while (cc < ccend)
1273 switch(*cc)
1274 {
1275 case OP_SET_SOM:
1276 SLJIT_ASSERT(common->has_set_som);
1277 stack_restore = TRUE;
1278 if (!setsom_found)
1279 {
1280 length += 2;
1281 setsom_found = TRUE;
1282 }
1283 cc += 1;
1284 break;
1285
1286 case OP_MARK:
1287 case OP_PRUNE_ARG:
1288 case OP_THEN_ARG:
1289 SLJIT_ASSERT(common->mark_ptr != 0);
1290 stack_restore = TRUE;
1291 if (!setmark_found)
1292 {
1293 length += 2;
1294 setmark_found = TRUE;
1295 }
1296 if (common->control_head_ptr != 0)
1297 *needs_control_head = TRUE;
1298 cc += 1 + 2 + cc[1];
1299 break;
1300
1301 case OP_RECURSE:
1302 stack_restore = TRUE;
1303 if (common->has_set_som && !setsom_found)
1304 {
1305 length += 2;
1306 setsom_found = TRUE;
1307 }
1308 if (common->mark_ptr != 0 && !setmark_found)
1309 {
1310 length += 2;
1311 setmark_found = TRUE;
1312 }
1313 if (common->capture_last_ptr != 0 && !capture_last_found)
1314 {
1315 length += 2;
1316 capture_last_found = TRUE;
1317 }
1318 cc += 1 + LINK_SIZE;
1319 break;
1320
1321 case OP_CBRA:
1322 case OP_CBRAPOS:
1323 case OP_SCBRA:
1324 case OP_SCBRAPOS:
1325 stack_restore = TRUE;
1326 if (common->capture_last_ptr != 0 && !capture_last_found)
1327 {
1328 length += 2;
1329 capture_last_found = TRUE;
1330 }
1331 length += 3;
1332 cc += 1 + LINK_SIZE + IMM2_SIZE;
1333 break;
1334
1335 case OP_THEN:
1336 stack_restore = TRUE;
1337 if (common->control_head_ptr != 0)
1338 *needs_control_head = TRUE;
1339 cc ++;
1340 break;
1341
1342 default:
1343 stack_restore = TRUE;
1344 /* Fall through. */
1345
1346 case OP_NOT_WORD_BOUNDARY:
1347 case OP_WORD_BOUNDARY:
1348 case OP_NOT_DIGIT:
1349 case OP_DIGIT:
1350 case OP_NOT_WHITESPACE:
1351 case OP_WHITESPACE:
1352 case OP_NOT_WORDCHAR:
1353 case OP_WORDCHAR:
1354 case OP_ANY:
1355 case OP_ALLANY:
1356 case OP_ANYBYTE:
1357 case OP_NOTPROP:
1358 case OP_PROP:
1359 case OP_ANYNL:
1360 case OP_NOT_HSPACE:
1361 case OP_HSPACE:
1362 case OP_NOT_VSPACE:
1363 case OP_VSPACE:
1364 case OP_EXTUNI:
1365 case OP_EODN:
1366 case OP_EOD:
1367 case OP_CIRC:
1368 case OP_CIRCM:
1369 case OP_DOLL:
1370 case OP_DOLLM:
1371 case OP_CHAR:
1372 case OP_CHARI:
1373 case OP_NOT:
1374 case OP_NOTI:
1375
1376 case OP_EXACT:
1377 case OP_POSSTAR:
1378 case OP_POSPLUS:
1379 case OP_POSQUERY:
1380 case OP_POSUPTO:
1381
1382 case OP_EXACTI:
1383 case OP_POSSTARI:
1384 case OP_POSPLUSI:
1385 case OP_POSQUERYI:
1386 case OP_POSUPTOI:
1387
1388 case OP_NOTEXACT:
1389 case OP_NOTPOSSTAR:
1390 case OP_NOTPOSPLUS:
1391 case OP_NOTPOSQUERY:
1392 case OP_NOTPOSUPTO:
1393
1394 case OP_NOTEXACTI:
1395 case OP_NOTPOSSTARI:
1396 case OP_NOTPOSPLUSI:
1397 case OP_NOTPOSQUERYI:
1398 case OP_NOTPOSUPTOI:
1399
1400 case OP_TYPEEXACT:
1401 case OP_TYPEPOSSTAR:
1402 case OP_TYPEPOSPLUS:
1403 case OP_TYPEPOSQUERY:
1404 case OP_TYPEPOSUPTO:
1405
1406 case OP_CLASS:
1407 case OP_NCLASS:
1408 case OP_XCLASS:
1409
1410 cc = next_opcode(common, cc);
1411 SLJIT_ASSERT(cc != NULL);
1412 break;
1413 }
1414
1415 /* Possessive quantifiers can use a special case. */
1416 if (SLJIT_UNLIKELY(possessive == length))
1417 return stack_restore ? no_frame : no_stack;
1418
1419 if (length > 0)
1420 return length + 1;
1421 return stack_restore ? no_frame : no_stack;
1422 }
1423
init_frame(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,int stackpos,int stacktop,BOOL recursive)1424 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1425 {
1426 DEFINE_COMPILER;
1427 BOOL setsom_found = recursive;
1428 BOOL setmark_found = recursive;
1429 /* The last capture is a local variable even for recursions. */
1430 BOOL capture_last_found = FALSE;
1431 int offset;
1432
1433 /* >= 1 + shortest item size (2) */
1434 SLJIT_UNUSED_ARG(stacktop);
1435 SLJIT_ASSERT(stackpos >= stacktop + 2);
1436
1437 stackpos = STACK(stackpos);
1438 if (ccend == NULL)
1439 {
1440 ccend = bracketend(cc) - (1 + LINK_SIZE);
1441 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1442 cc = next_opcode(common, cc);
1443 }
1444
1445 SLJIT_ASSERT(cc != NULL);
1446 while (cc < ccend)
1447 switch(*cc)
1448 {
1449 case OP_SET_SOM:
1450 SLJIT_ASSERT(common->has_set_som);
1451 if (!setsom_found)
1452 {
1453 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1454 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1455 stackpos += (int)sizeof(sljit_sw);
1456 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1457 stackpos += (int)sizeof(sljit_sw);
1458 setsom_found = TRUE;
1459 }
1460 cc += 1;
1461 break;
1462
1463 case OP_MARK:
1464 case OP_PRUNE_ARG:
1465 case OP_THEN_ARG:
1466 SLJIT_ASSERT(common->mark_ptr != 0);
1467 if (!setmark_found)
1468 {
1469 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1470 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1471 stackpos += (int)sizeof(sljit_sw);
1472 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1473 stackpos += (int)sizeof(sljit_sw);
1474 setmark_found = TRUE;
1475 }
1476 cc += 1 + 2 + cc[1];
1477 break;
1478
1479 case OP_RECURSE:
1480 if (common->has_set_som && !setsom_found)
1481 {
1482 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1484 stackpos += (int)sizeof(sljit_sw);
1485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1486 stackpos += (int)sizeof(sljit_sw);
1487 setsom_found = TRUE;
1488 }
1489 if (common->mark_ptr != 0 && !setmark_found)
1490 {
1491 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1493 stackpos += (int)sizeof(sljit_sw);
1494 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1495 stackpos += (int)sizeof(sljit_sw);
1496 setmark_found = TRUE;
1497 }
1498 if (common->capture_last_ptr != 0 && !capture_last_found)
1499 {
1500 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1501 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1502 stackpos += (int)sizeof(sljit_sw);
1503 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1504 stackpos += (int)sizeof(sljit_sw);
1505 capture_last_found = TRUE;
1506 }
1507 cc += 1 + LINK_SIZE;
1508 break;
1509
1510 case OP_CBRA:
1511 case OP_CBRAPOS:
1512 case OP_SCBRA:
1513 case OP_SCBRAPOS:
1514 if (common->capture_last_ptr != 0 && !capture_last_found)
1515 {
1516 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1517 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1518 stackpos += (int)sizeof(sljit_sw);
1519 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1520 stackpos += (int)sizeof(sljit_sw);
1521 capture_last_found = TRUE;
1522 }
1523 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1525 stackpos += (int)sizeof(sljit_sw);
1526 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1527 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1528 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1529 stackpos += (int)sizeof(sljit_sw);
1530 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1531 stackpos += (int)sizeof(sljit_sw);
1532
1533 cc += 1 + LINK_SIZE + IMM2_SIZE;
1534 break;
1535
1536 default:
1537 cc = next_opcode(common, cc);
1538 SLJIT_ASSERT(cc != NULL);
1539 break;
1540 }
1541
1542 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1543 SLJIT_ASSERT(stackpos == STACK(stacktop));
1544 }
1545
get_private_data_copy_length(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL needs_control_head)1546 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1547 {
1548 int private_data_length = needs_control_head ? 3 : 2;
1549 int size;
1550 pcre_uchar *alternative;
1551 /* Calculate the sum of the private machine words. */
1552 while (cc < ccend)
1553 {
1554 size = 0;
1555 switch(*cc)
1556 {
1557 case OP_KET:
1558 if (PRIVATE_DATA(cc) != 0)
1559 {
1560 private_data_length++;
1561 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1562 cc += PRIVATE_DATA(cc + 1);
1563 }
1564 cc += 1 + LINK_SIZE;
1565 break;
1566
1567 case OP_ASSERT:
1568 case OP_ASSERT_NOT:
1569 case OP_ASSERTBACK:
1570 case OP_ASSERTBACK_NOT:
1571 case OP_ONCE:
1572 case OP_ONCE_NC:
1573 case OP_BRAPOS:
1574 case OP_SBRA:
1575 case OP_SBRAPOS:
1576 case OP_SCOND:
1577 private_data_length++;
1578 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1579 cc += 1 + LINK_SIZE;
1580 break;
1581
1582 case OP_CBRA:
1583 case OP_SCBRA:
1584 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1585 private_data_length++;
1586 cc += 1 + LINK_SIZE + IMM2_SIZE;
1587 break;
1588
1589 case OP_CBRAPOS:
1590 case OP_SCBRAPOS:
1591 private_data_length += 2;
1592 cc += 1 + LINK_SIZE + IMM2_SIZE;
1593 break;
1594
1595 case OP_COND:
1596 /* Might be a hidden SCOND. */
1597 alternative = cc + GET(cc, 1);
1598 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1599 private_data_length++;
1600 cc += 1 + LINK_SIZE;
1601 break;
1602
1603 CASE_ITERATOR_PRIVATE_DATA_1
1604 if (PRIVATE_DATA(cc))
1605 private_data_length++;
1606 cc += 2;
1607 #ifdef SUPPORT_UTF
1608 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1609 #endif
1610 break;
1611
1612 CASE_ITERATOR_PRIVATE_DATA_2A
1613 if (PRIVATE_DATA(cc))
1614 private_data_length += 2;
1615 cc += 2;
1616 #ifdef SUPPORT_UTF
1617 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1618 #endif
1619 break;
1620
1621 CASE_ITERATOR_PRIVATE_DATA_2B
1622 if (PRIVATE_DATA(cc))
1623 private_data_length += 2;
1624 cc += 2 + IMM2_SIZE;
1625 #ifdef SUPPORT_UTF
1626 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1627 #endif
1628 break;
1629
1630 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1631 if (PRIVATE_DATA(cc))
1632 private_data_length++;
1633 cc += 1;
1634 break;
1635
1636 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1637 if (PRIVATE_DATA(cc))
1638 private_data_length += 2;
1639 cc += 1;
1640 break;
1641
1642 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1643 if (PRIVATE_DATA(cc))
1644 private_data_length += 2;
1645 cc += 1 + IMM2_SIZE;
1646 break;
1647
1648 case OP_CLASS:
1649 case OP_NCLASS:
1650 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1651 case OP_XCLASS:
1652 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1653 #else
1654 size = 1 + 32 / (int)sizeof(pcre_uchar);
1655 #endif
1656 if (PRIVATE_DATA(cc))
1657 private_data_length += get_class_iterator_size(cc + size);
1658 cc += size;
1659 break;
1660
1661 default:
1662 cc = next_opcode(common, cc);
1663 SLJIT_ASSERT(cc != NULL);
1664 break;
1665 }
1666 }
1667 SLJIT_ASSERT(cc == ccend);
1668 return private_data_length;
1669 }
1670
copy_private_data(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL save,int stackptr,int stacktop,BOOL needs_control_head)1671 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1672 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1673 {
1674 DEFINE_COMPILER;
1675 int srcw[2];
1676 int count, size;
1677 BOOL tmp1next = TRUE;
1678 BOOL tmp1empty = TRUE;
1679 BOOL tmp2empty = TRUE;
1680 pcre_uchar *alternative;
1681 enum {
1682 start,
1683 loop,
1684 end
1685 } status;
1686
1687 status = save ? start : loop;
1688 stackptr = STACK(stackptr - 2);
1689 stacktop = STACK(stacktop - 1);
1690
1691 if (!save)
1692 {
1693 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1694 if (stackptr < stacktop)
1695 {
1696 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1697 stackptr += sizeof(sljit_sw);
1698 tmp1empty = FALSE;
1699 }
1700 if (stackptr < stacktop)
1701 {
1702 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1703 stackptr += sizeof(sljit_sw);
1704 tmp2empty = FALSE;
1705 }
1706 /* The tmp1next must be TRUE in either way. */
1707 }
1708
1709 do
1710 {
1711 count = 0;
1712 switch(status)
1713 {
1714 case start:
1715 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1716 count = 1;
1717 srcw[0] = common->recursive_head_ptr;
1718 if (needs_control_head)
1719 {
1720 SLJIT_ASSERT(common->control_head_ptr != 0);
1721 count = 2;
1722 srcw[1] = common->control_head_ptr;
1723 }
1724 status = loop;
1725 break;
1726
1727 case loop:
1728 if (cc >= ccend)
1729 {
1730 status = end;
1731 break;
1732 }
1733
1734 switch(*cc)
1735 {
1736 case OP_KET:
1737 if (PRIVATE_DATA(cc) != 0)
1738 {
1739 count = 1;
1740 srcw[0] = PRIVATE_DATA(cc);
1741 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1742 cc += PRIVATE_DATA(cc + 1);
1743 }
1744 cc += 1 + LINK_SIZE;
1745 break;
1746
1747 case OP_ASSERT:
1748 case OP_ASSERT_NOT:
1749 case OP_ASSERTBACK:
1750 case OP_ASSERTBACK_NOT:
1751 case OP_ONCE:
1752 case OP_ONCE_NC:
1753 case OP_BRAPOS:
1754 case OP_SBRA:
1755 case OP_SBRAPOS:
1756 case OP_SCOND:
1757 count = 1;
1758 srcw[0] = PRIVATE_DATA(cc);
1759 SLJIT_ASSERT(srcw[0] != 0);
1760 cc += 1 + LINK_SIZE;
1761 break;
1762
1763 case OP_CBRA:
1764 case OP_SCBRA:
1765 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1766 {
1767 count = 1;
1768 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1769 }
1770 cc += 1 + LINK_SIZE + IMM2_SIZE;
1771 break;
1772
1773 case OP_CBRAPOS:
1774 case OP_SCBRAPOS:
1775 count = 2;
1776 srcw[0] = PRIVATE_DATA(cc);
1777 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1778 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1779 cc += 1 + LINK_SIZE + IMM2_SIZE;
1780 break;
1781
1782 case OP_COND:
1783 /* Might be a hidden SCOND. */
1784 alternative = cc + GET(cc, 1);
1785 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1786 {
1787 count = 1;
1788 srcw[0] = PRIVATE_DATA(cc);
1789 SLJIT_ASSERT(srcw[0] != 0);
1790 }
1791 cc += 1 + LINK_SIZE;
1792 break;
1793
1794 CASE_ITERATOR_PRIVATE_DATA_1
1795 if (PRIVATE_DATA(cc))
1796 {
1797 count = 1;
1798 srcw[0] = PRIVATE_DATA(cc);
1799 }
1800 cc += 2;
1801 #ifdef SUPPORT_UTF
1802 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1803 #endif
1804 break;
1805
1806 CASE_ITERATOR_PRIVATE_DATA_2A
1807 if (PRIVATE_DATA(cc))
1808 {
1809 count = 2;
1810 srcw[0] = PRIVATE_DATA(cc);
1811 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1812 }
1813 cc += 2;
1814 #ifdef SUPPORT_UTF
1815 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1816 #endif
1817 break;
1818
1819 CASE_ITERATOR_PRIVATE_DATA_2B
1820 if (PRIVATE_DATA(cc))
1821 {
1822 count = 2;
1823 srcw[0] = PRIVATE_DATA(cc);
1824 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1825 }
1826 cc += 2 + IMM2_SIZE;
1827 #ifdef SUPPORT_UTF
1828 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1829 #endif
1830 break;
1831
1832 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1833 if (PRIVATE_DATA(cc))
1834 {
1835 count = 1;
1836 srcw[0] = PRIVATE_DATA(cc);
1837 }
1838 cc += 1;
1839 break;
1840
1841 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1842 if (PRIVATE_DATA(cc))
1843 {
1844 count = 2;
1845 srcw[0] = PRIVATE_DATA(cc);
1846 srcw[1] = srcw[0] + sizeof(sljit_sw);
1847 }
1848 cc += 1;
1849 break;
1850
1851 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1852 if (PRIVATE_DATA(cc))
1853 {
1854 count = 2;
1855 srcw[0] = PRIVATE_DATA(cc);
1856 srcw[1] = srcw[0] + sizeof(sljit_sw);
1857 }
1858 cc += 1 + IMM2_SIZE;
1859 break;
1860
1861 case OP_CLASS:
1862 case OP_NCLASS:
1863 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1864 case OP_XCLASS:
1865 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1866 #else
1867 size = 1 + 32 / (int)sizeof(pcre_uchar);
1868 #endif
1869 if (PRIVATE_DATA(cc))
1870 switch(get_class_iterator_size(cc + size))
1871 {
1872 case 1:
1873 count = 1;
1874 srcw[0] = PRIVATE_DATA(cc);
1875 break;
1876
1877 case 2:
1878 count = 2;
1879 srcw[0] = PRIVATE_DATA(cc);
1880 srcw[1] = srcw[0] + sizeof(sljit_sw);
1881 break;
1882
1883 default:
1884 SLJIT_ASSERT_STOP();
1885 break;
1886 }
1887 cc += size;
1888 break;
1889
1890 default:
1891 cc = next_opcode(common, cc);
1892 SLJIT_ASSERT(cc != NULL);
1893 break;
1894 }
1895 break;
1896
1897 case end:
1898 SLJIT_ASSERT_STOP();
1899 break;
1900 }
1901
1902 while (count > 0)
1903 {
1904 count--;
1905 if (save)
1906 {
1907 if (tmp1next)
1908 {
1909 if (!tmp1empty)
1910 {
1911 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1912 stackptr += sizeof(sljit_sw);
1913 }
1914 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1915 tmp1empty = FALSE;
1916 tmp1next = FALSE;
1917 }
1918 else
1919 {
1920 if (!tmp2empty)
1921 {
1922 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1923 stackptr += sizeof(sljit_sw);
1924 }
1925 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1926 tmp2empty = FALSE;
1927 tmp1next = TRUE;
1928 }
1929 }
1930 else
1931 {
1932 if (tmp1next)
1933 {
1934 SLJIT_ASSERT(!tmp1empty);
1935 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
1936 tmp1empty = stackptr >= stacktop;
1937 if (!tmp1empty)
1938 {
1939 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1940 stackptr += sizeof(sljit_sw);
1941 }
1942 tmp1next = FALSE;
1943 }
1944 else
1945 {
1946 SLJIT_ASSERT(!tmp2empty);
1947 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
1948 tmp2empty = stackptr >= stacktop;
1949 if (!tmp2empty)
1950 {
1951 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1952 stackptr += sizeof(sljit_sw);
1953 }
1954 tmp1next = TRUE;
1955 }
1956 }
1957 }
1958 }
1959 while (status != end);
1960
1961 if (save)
1962 {
1963 if (tmp1next)
1964 {
1965 if (!tmp1empty)
1966 {
1967 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1968 stackptr += sizeof(sljit_sw);
1969 }
1970 if (!tmp2empty)
1971 {
1972 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1973 stackptr += sizeof(sljit_sw);
1974 }
1975 }
1976 else
1977 {
1978 if (!tmp2empty)
1979 {
1980 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1981 stackptr += sizeof(sljit_sw);
1982 }
1983 if (!tmp1empty)
1984 {
1985 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1986 stackptr += sizeof(sljit_sw);
1987 }
1988 }
1989 }
1990 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1991 }
1992
set_then_offsets(compiler_common * common,pcre_uchar * cc,pcre_uint8 * current_offset)1993 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1994 {
1995 pcre_uchar *end = bracketend(cc);
1996 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1997
1998 /* Assert captures then. */
1999 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2000 current_offset = NULL;
2001 /* Conditional block does not. */
2002 if (*cc == OP_COND || *cc == OP_SCOND)
2003 has_alternatives = FALSE;
2004
2005 cc = next_opcode(common, cc);
2006 if (has_alternatives)
2007 current_offset = common->then_offsets + (cc - common->start);
2008
2009 while (cc < end)
2010 {
2011 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2012 cc = set_then_offsets(common, cc, current_offset);
2013 else
2014 {
2015 if (*cc == OP_ALT && has_alternatives)
2016 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2017 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2018 *current_offset = 1;
2019 cc = next_opcode(common, cc);
2020 }
2021 }
2022
2023 return end;
2024 }
2025
2026 #undef CASE_ITERATOR_PRIVATE_DATA_1
2027 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2028 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2029 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2030 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2031 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2032
is_powerof2(unsigned int value)2033 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2034 {
2035 return (value & (value - 1)) == 0;
2036 }
2037
set_jumps(jump_list * list,struct sljit_label * label)2038 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2039 {
2040 while (list)
2041 {
2042 /* sljit_set_label is clever enough to do nothing
2043 if either the jump or the label is NULL. */
2044 SET_LABEL(list->jump, label);
2045 list = list->next;
2046 }
2047 }
2048
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)2049 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2050 {
2051 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2052 if (list_item)
2053 {
2054 list_item->next = *list;
2055 list_item->jump = jump;
2056 *list = list_item;
2057 }
2058 }
2059
add_stub(compiler_common * common,struct sljit_jump * start)2060 static void add_stub(compiler_common *common, struct sljit_jump *start)
2061 {
2062 DEFINE_COMPILER;
2063 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2064
2065 if (list_item)
2066 {
2067 list_item->start = start;
2068 list_item->quit = LABEL();
2069 list_item->next = common->stubs;
2070 common->stubs = list_item;
2071 }
2072 }
2073
flush_stubs(compiler_common * common)2074 static void flush_stubs(compiler_common *common)
2075 {
2076 DEFINE_COMPILER;
2077 stub_list *list_item = common->stubs;
2078
2079 while (list_item)
2080 {
2081 JUMPHERE(list_item->start);
2082 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2083 JUMPTO(SLJIT_JUMP, list_item->quit);
2084 list_item = list_item->next;
2085 }
2086 common->stubs = NULL;
2087 }
2088
add_label_addr(compiler_common * common,sljit_uw * update_addr)2089 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2090 {
2091 DEFINE_COMPILER;
2092 label_addr_list *label_addr;
2093
2094 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2095 if (label_addr == NULL)
2096 return;
2097 label_addr->label = LABEL();
2098 label_addr->update_addr = update_addr;
2099 label_addr->next = common->label_addrs;
2100 common->label_addrs = label_addr;
2101 }
2102
count_match(compiler_common * common)2103 static SLJIT_INLINE void count_match(compiler_common *common)
2104 {
2105 DEFINE_COMPILER;
2106
2107 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2108 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2109 }
2110
allocate_stack(compiler_common * common,int size)2111 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2112 {
2113 /* May destroy all locals and registers except TMP2. */
2114 DEFINE_COMPILER;
2115
2116 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2117 #ifdef DESTROY_REGISTERS
2118 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2119 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2120 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2121 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2122 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2123 #endif
2124 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2125 }
2126
free_stack(compiler_common * common,int size)2127 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2128 {
2129 DEFINE_COMPILER;
2130 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2131 }
2132
allocate_read_only_data(compiler_common * common,sljit_uw size)2133 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2134 {
2135 DEFINE_COMPILER;
2136 sljit_uw *result;
2137
2138 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2139 return NULL;
2140
2141 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2142 if (SLJIT_UNLIKELY(result == NULL))
2143 {
2144 sljit_set_compiler_memory_error(compiler);
2145 return NULL;
2146 }
2147
2148 *(void**)result = common->read_only_data_head;
2149 common->read_only_data_head = (void *)result;
2150 return result + 1;
2151 }
2152
free_read_only_data(void * current,void * allocator_data)2153 static void free_read_only_data(void *current, void *allocator_data)
2154 {
2155 void *next;
2156
2157 SLJIT_UNUSED_ARG(allocator_data);
2158
2159 while (current != NULL)
2160 {
2161 next = *(void**)current;
2162 SLJIT_FREE(current, allocator_data);
2163 current = next;
2164 }
2165 }
2166
reset_ovector(compiler_common * common,int length)2167 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2168 {
2169 DEFINE_COMPILER;
2170 struct sljit_label *loop;
2171 int i;
2172
2173 /* At this point we can freely use all temporary registers. */
2174 SLJIT_ASSERT(length > 1);
2175 /* TMP1 returns with begin - 1. */
2176 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2177 if (length < 8)
2178 {
2179 for (i = 1; i < length; i++)
2180 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2181 }
2182 else
2183 {
2184 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2185 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2186 loop = LABEL();
2187 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2188 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2189 JUMPTO(SLJIT_NOT_ZERO, loop);
2190 }
2191 }
2192
do_reset_match(compiler_common * common,int length)2193 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2194 {
2195 DEFINE_COMPILER;
2196 struct sljit_label *loop;
2197 int i;
2198
2199 SLJIT_ASSERT(length > 1);
2200 /* OVECTOR(1) contains the "string begin - 1" constant. */
2201 if (length > 2)
2202 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2203 if (length < 8)
2204 {
2205 for (i = 2; i < length; i++)
2206 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2207 }
2208 else
2209 {
2210 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2211 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2212 loop = LABEL();
2213 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2214 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2215 JUMPTO(SLJIT_NOT_ZERO, loop);
2216 }
2217
2218 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2219 if (common->mark_ptr != 0)
2220 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2221 if (common->control_head_ptr != 0)
2222 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2223 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2225 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2226 }
2227
do_search_mark(sljit_sw * current,const pcre_uchar * skip_arg)2228 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2229 {
2230 while (current != NULL)
2231 {
2232 switch (current[-2])
2233 {
2234 case type_then_trap:
2235 break;
2236
2237 case type_mark:
2238 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2239 return current[-4];
2240 break;
2241
2242 default:
2243 SLJIT_ASSERT_STOP();
2244 break;
2245 }
2246 SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
2247 current = (sljit_sw*)current[-1];
2248 }
2249 return -1;
2250 }
2251
copy_ovector(compiler_common * common,int topbracket)2252 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2253 {
2254 DEFINE_COMPILER;
2255 struct sljit_label *loop;
2256 struct sljit_jump *early_quit;
2257
2258 /* At this point we can freely use all registers. */
2259 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2260 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2261
2262 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2263 if (common->mark_ptr != 0)
2264 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2265 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2266 if (common->mark_ptr != 0)
2267 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2268 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2269 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2270 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2271 /* Unlikely, but possible */
2272 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2273 loop = LABEL();
2274 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2275 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2276 /* Copy the integer value to the output buffer */
2277 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2278 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2279 #endif
2280 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2281 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2282 JUMPTO(SLJIT_NOT_ZERO, loop);
2283 JUMPHERE(early_quit);
2284
2285 /* Calculate the return value, which is the maximum ovector value. */
2286 if (topbracket > 1)
2287 {
2288 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2289 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2290
2291 /* OVECTOR(0) is never equal to SLJIT_S2. */
2292 loop = LABEL();
2293 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2294 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2295 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2296 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2297 }
2298 else
2299 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2300 }
2301
return_with_partial_match(compiler_common * common,struct sljit_label * quit)2302 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2303 {
2304 DEFINE_COMPILER;
2305 struct sljit_jump *jump;
2306
2307 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2308 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2309 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2310
2311 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2312 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2313 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2314 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2315
2316 /* Store match begin and end. */
2317 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2318 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2319
2320 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2321 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2322 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2323 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2324 #endif
2325 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2326 JUMPHERE(jump);
2327
2328 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2329 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2330 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2331 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2332 #endif
2333 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2334
2335 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2336 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2337 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2338 #endif
2339 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2340
2341 JUMPTO(SLJIT_JUMP, quit);
2342 }
2343
check_start_used_ptr(compiler_common * common)2344 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2345 {
2346 /* May destroy TMP1. */
2347 DEFINE_COMPILER;
2348 struct sljit_jump *jump;
2349
2350 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2351 {
2352 /* The value of -1 must be kept for start_used_ptr! */
2353 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2354 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2355 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2356 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2357 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2358 JUMPHERE(jump);
2359 }
2360 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2361 {
2362 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2363 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2364 JUMPHERE(jump);
2365 }
2366 }
2367
char_has_othercase(compiler_common * common,pcre_uchar * cc)2368 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2369 {
2370 /* Detects if the character has an othercase. */
2371 unsigned int c;
2372
2373 #ifdef SUPPORT_UTF
2374 if (common->utf)
2375 {
2376 GETCHAR(c, cc);
2377 if (c > 127)
2378 {
2379 #ifdef SUPPORT_UCP
2380 return c != UCD_OTHERCASE(c);
2381 #else
2382 return FALSE;
2383 #endif
2384 }
2385 #ifndef COMPILE_PCRE8
2386 return common->fcc[c] != c;
2387 #endif
2388 }
2389 else
2390 #endif
2391 c = *cc;
2392 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2393 }
2394
char_othercase(compiler_common * common,unsigned int c)2395 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2396 {
2397 /* Returns with the othercase. */
2398 #ifdef SUPPORT_UTF
2399 if (common->utf && c > 127)
2400 {
2401 #ifdef SUPPORT_UCP
2402 return UCD_OTHERCASE(c);
2403 #else
2404 return c;
2405 #endif
2406 }
2407 #endif
2408 return TABLE_GET(c, common->fcc, c);
2409 }
2410
char_get_othercase_bit(compiler_common * common,pcre_uchar * cc)2411 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2412 {
2413 /* Detects if the character and its othercase has only 1 bit difference. */
2414 unsigned int c, oc, bit;
2415 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2416 int n;
2417 #endif
2418
2419 #ifdef SUPPORT_UTF
2420 if (common->utf)
2421 {
2422 GETCHAR(c, cc);
2423 if (c <= 127)
2424 oc = common->fcc[c];
2425 else
2426 {
2427 #ifdef SUPPORT_UCP
2428 oc = UCD_OTHERCASE(c);
2429 #else
2430 oc = c;
2431 #endif
2432 }
2433 }
2434 else
2435 {
2436 c = *cc;
2437 oc = TABLE_GET(c, common->fcc, c);
2438 }
2439 #else
2440 c = *cc;
2441 oc = TABLE_GET(c, common->fcc, c);
2442 #endif
2443
2444 SLJIT_ASSERT(c != oc);
2445
2446 bit = c ^ oc;
2447 /* Optimized for English alphabet. */
2448 if (c <= 127 && bit == 0x20)
2449 return (0 << 8) | 0x20;
2450
2451 /* Since c != oc, they must have at least 1 bit difference. */
2452 if (!is_powerof2(bit))
2453 return 0;
2454
2455 #if defined COMPILE_PCRE8
2456
2457 #ifdef SUPPORT_UTF
2458 if (common->utf && c > 127)
2459 {
2460 n = GET_EXTRALEN(*cc);
2461 while ((bit & 0x3f) == 0)
2462 {
2463 n--;
2464 bit >>= 6;
2465 }
2466 return (n << 8) | bit;
2467 }
2468 #endif /* SUPPORT_UTF */
2469 return (0 << 8) | bit;
2470
2471 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2472
2473 #ifdef SUPPORT_UTF
2474 if (common->utf && c > 65535)
2475 {
2476 if (bit >= (1 << 10))
2477 bit >>= 10;
2478 else
2479 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2480 }
2481 #endif /* SUPPORT_UTF */
2482 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2483
2484 #endif /* COMPILE_PCRE[8|16|32] */
2485 }
2486
check_partial(compiler_common * common,BOOL force)2487 static void check_partial(compiler_common *common, BOOL force)
2488 {
2489 /* Checks whether a partial matching is occurred. Does not modify registers. */
2490 DEFINE_COMPILER;
2491 struct sljit_jump *jump = NULL;
2492
2493 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2494
2495 if (common->mode == JIT_COMPILE)
2496 return;
2497
2498 if (!force)
2499 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2500 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2501 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2502
2503 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2504 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2505 else
2506 {
2507 if (common->partialmatchlabel != NULL)
2508 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2509 else
2510 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2511 }
2512
2513 if (jump != NULL)
2514 JUMPHERE(jump);
2515 }
2516
check_str_end(compiler_common * common,jump_list ** end_reached)2517 static void check_str_end(compiler_common *common, jump_list **end_reached)
2518 {
2519 /* Does not affect registers. Usually used in a tight spot. */
2520 DEFINE_COMPILER;
2521 struct sljit_jump *jump;
2522
2523 if (common->mode == JIT_COMPILE)
2524 {
2525 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2526 return;
2527 }
2528
2529 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2530 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2531 {
2532 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2533 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2534 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2535 }
2536 else
2537 {
2538 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2539 if (common->partialmatchlabel != NULL)
2540 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2541 else
2542 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2543 }
2544 JUMPHERE(jump);
2545 }
2546
detect_partial_match(compiler_common * common,jump_list ** backtracks)2547 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2548 {
2549 DEFINE_COMPILER;
2550 struct sljit_jump *jump;
2551
2552 if (common->mode == JIT_COMPILE)
2553 {
2554 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2555 return;
2556 }
2557
2558 /* Partial matching mode. */
2559 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2560 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2561 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2562 {
2563 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2564 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2565 }
2566 else
2567 {
2568 if (common->partialmatchlabel != NULL)
2569 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2570 else
2571 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2572 }
2573 JUMPHERE(jump);
2574 }
2575
peek_char(compiler_common * common,pcre_uint32 max)2576 static void peek_char(compiler_common *common, pcre_uint32 max)
2577 {
2578 /* Reads the character into TMP1, keeps STR_PTR.
2579 Does not check STR_END. TMP2 Destroyed. */
2580 DEFINE_COMPILER;
2581 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2582 struct sljit_jump *jump;
2583 #endif
2584
2585 SLJIT_UNUSED_ARG(max);
2586
2587 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2588 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2589 if (common->utf)
2590 {
2591 if (max < 128) return;
2592
2593 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2594 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2595 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2596 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2597 JUMPHERE(jump);
2598 }
2599 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2600
2601 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2602 if (common->utf)
2603 {
2604 if (max < 0xd800) return;
2605
2606 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2607 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2608 /* TMP2 contains the high surrogate. */
2609 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2610 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2611 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2612 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2613 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2614 JUMPHERE(jump);
2615 }
2616 #endif
2617 }
2618
2619 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2620
is_char7_bitset(const pcre_uint8 * bitset,BOOL nclass)2621 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2622 {
2623 /* Tells whether the character codes below 128 are enough
2624 to determine a match. */
2625 const pcre_uint8 value = nclass ? 0xff : 0;
2626 const pcre_uint8 *end = bitset + 32;
2627
2628 bitset += 16;
2629 do
2630 {
2631 if (*bitset++ != value)
2632 return FALSE;
2633 }
2634 while (bitset < end);
2635 return TRUE;
2636 }
2637
read_char7_type(compiler_common * common,BOOL full_read)2638 static void read_char7_type(compiler_common *common, BOOL full_read)
2639 {
2640 /* Reads the precise character type of a character into TMP1, if the character
2641 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2642 full_read argument tells whether characters above max are accepted or not. */
2643 DEFINE_COMPILER;
2644 struct sljit_jump *jump;
2645
2646 SLJIT_ASSERT(common->utf);
2647
2648 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2649 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2650
2651 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2652
2653 if (full_read)
2654 {
2655 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2656 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2657 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2658 JUMPHERE(jump);
2659 }
2660 }
2661
2662 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2663
read_char_range(compiler_common * common,pcre_uint32 min,pcre_uint32 max,BOOL update_str_ptr)2664 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2665 {
2666 /* Reads the precise value of a character into TMP1, if the character is
2667 between min and max (c >= min && c <= max). Otherwise it returns with a value
2668 outside the range. Does not check STR_END. */
2669 DEFINE_COMPILER;
2670 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2671 struct sljit_jump *jump;
2672 #endif
2673 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2674 struct sljit_jump *jump2;
2675 #endif
2676
2677 SLJIT_UNUSED_ARG(update_str_ptr);
2678 SLJIT_UNUSED_ARG(min);
2679 SLJIT_UNUSED_ARG(max);
2680 SLJIT_ASSERT(min <= max);
2681
2682 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2683 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2684
2685 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2686 if (common->utf)
2687 {
2688 if (max < 128 && !update_str_ptr) return;
2689
2690 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2691 if (min >= 0x10000)
2692 {
2693 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2694 if (update_str_ptr)
2695 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2696 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2697 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2698 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2699 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2700 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2701 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2702 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2703 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2704 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2705 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2706 if (!update_str_ptr)
2707 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2708 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2709 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2710 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2711 JUMPHERE(jump2);
2712 if (update_str_ptr)
2713 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2714 }
2715 else if (min >= 0x800 && max <= 0xffff)
2716 {
2717 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2718 if (update_str_ptr)
2719 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2720 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2721 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2722 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2723 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2724 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2725 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2726 if (!update_str_ptr)
2727 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2728 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2729 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2730 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2731 JUMPHERE(jump2);
2732 if (update_str_ptr)
2733 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2734 }
2735 else if (max >= 0x800)
2736 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2737 else if (max < 128)
2738 {
2739 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2740 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2741 }
2742 else
2743 {
2744 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2745 if (!update_str_ptr)
2746 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2747 else
2748 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2749 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2750 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2751 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2752 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2753 if (update_str_ptr)
2754 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2755 }
2756 JUMPHERE(jump);
2757 }
2758 #endif
2759
2760 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2761 if (common->utf)
2762 {
2763 if (max >= 0x10000)
2764 {
2765 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2766 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2767 /* TMP2 contains the high surrogate. */
2768 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2769 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2770 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2771 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2772 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2773 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2774 JUMPHERE(jump);
2775 return;
2776 }
2777
2778 if (max < 0xd800 && !update_str_ptr) return;
2779
2780 /* Skip low surrogate if necessary. */
2781 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2782 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2783 if (update_str_ptr)
2784 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2785 if (max >= 0xd800)
2786 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2787 JUMPHERE(jump);
2788 }
2789 #endif
2790 }
2791
read_char(compiler_common * common)2792 static SLJIT_INLINE void read_char(compiler_common *common)
2793 {
2794 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2795 }
2796
read_char8_type(compiler_common * common,BOOL update_str_ptr)2797 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2798 {
2799 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2800 DEFINE_COMPILER;
2801 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2802 struct sljit_jump *jump;
2803 #endif
2804 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2805 struct sljit_jump *jump2;
2806 #endif
2807
2808 SLJIT_UNUSED_ARG(update_str_ptr);
2809
2810 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2811 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2812
2813 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2814 if (common->utf)
2815 {
2816 /* This can be an extra read in some situations, but hopefully
2817 it is needed in most cases. */
2818 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2819 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2820 if (!update_str_ptr)
2821 {
2822 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2823 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2824 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2825 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2826 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2827 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2828 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2829 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2830 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2831 JUMPHERE(jump2);
2832 }
2833 else
2834 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2835 JUMPHERE(jump);
2836 return;
2837 }
2838 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2839
2840 #if !defined COMPILE_PCRE8
2841 /* The ctypes array contains only 256 values. */
2842 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2843 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2844 #endif
2845 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2846 #if !defined COMPILE_PCRE8
2847 JUMPHERE(jump);
2848 #endif
2849
2850 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2851 if (common->utf && update_str_ptr)
2852 {
2853 /* Skip low surrogate if necessary. */
2854 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2855 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2856 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2857 JUMPHERE(jump);
2858 }
2859 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2860 }
2861
skip_char_back(compiler_common * common)2862 static void skip_char_back(compiler_common *common)
2863 {
2864 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2865 DEFINE_COMPILER;
2866 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2867 #if defined COMPILE_PCRE8
2868 struct sljit_label *label;
2869
2870 if (common->utf)
2871 {
2872 label = LABEL();
2873 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2874 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2875 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2876 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2877 return;
2878 }
2879 #elif defined COMPILE_PCRE16
2880 if (common->utf)
2881 {
2882 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2883 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2884 /* Skip low surrogate if necessary. */
2885 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2886 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2887 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
2888 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2889 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2890 return;
2891 }
2892 #endif /* COMPILE_PCRE[8|16] */
2893 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2894 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2895 }
2896
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)2897 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2898 {
2899 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2900 DEFINE_COMPILER;
2901 struct sljit_jump *jump;
2902
2903 if (nltype == NLTYPE_ANY)
2904 {
2905 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2906 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
2907 }
2908 else if (nltype == NLTYPE_ANYCRLF)
2909 {
2910 if (jumpifmatch)
2911 {
2912 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2913 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2914 }
2915 else
2916 {
2917 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2918 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2919 JUMPHERE(jump);
2920 }
2921 }
2922 else
2923 {
2924 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2925 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2926 }
2927 }
2928
2929 #ifdef SUPPORT_UTF
2930
2931 #if defined COMPILE_PCRE8
do_utfreadchar(compiler_common * common)2932 static void do_utfreadchar(compiler_common *common)
2933 {
2934 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2935 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2936 DEFINE_COMPILER;
2937 struct sljit_jump *jump;
2938
2939 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2940 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2941 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2942 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2943 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2944 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2945
2946 /* Searching for the first zero. */
2947 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2948 jump = JUMP(SLJIT_NOT_ZERO);
2949 /* Two byte sequence. */
2950 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2951 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2952 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2953
2954 JUMPHERE(jump);
2955 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2956 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2957 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2958 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2959 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2960
2961 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2962 jump = JUMP(SLJIT_NOT_ZERO);
2963 /* Three byte sequence. */
2964 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2965 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2966 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2967
2968 /* Four byte sequence. */
2969 JUMPHERE(jump);
2970 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2971 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2972 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2973 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2974 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2975 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2976 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2977 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2978 }
2979
do_utfreadchar16(compiler_common * common)2980 static void do_utfreadchar16(compiler_common *common)
2981 {
2982 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2983 of the character (>= 0xc0). Return value in TMP1. */
2984 DEFINE_COMPILER;
2985 struct sljit_jump *jump;
2986
2987 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2988 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2989 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2990 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2991 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2992 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2993
2994 /* Searching for the first zero. */
2995 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2996 jump = JUMP(SLJIT_NOT_ZERO);
2997 /* Two byte sequence. */
2998 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2999 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3000
3001 JUMPHERE(jump);
3002 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3003 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
3004 /* This code runs only in 8 bit mode. No need to shift the value. */
3005 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3006 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3007 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3008 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3009 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3010 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3011 /* Three byte sequence. */
3012 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3013 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3014 }
3015
do_utfreadtype8(compiler_common * common)3016 static void do_utfreadtype8(compiler_common *common)
3017 {
3018 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3019 of the character (>= 0xc0). Return value in TMP1. */
3020 DEFINE_COMPILER;
3021 struct sljit_jump *jump;
3022 struct sljit_jump *compare;
3023
3024 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3025
3026 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3027 jump = JUMP(SLJIT_NOT_ZERO);
3028 /* Two byte sequence. */
3029 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3030 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3031 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3032 /* The upper 5 bits are known at this point. */
3033 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3034 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3035 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3036 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3037 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3038 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3039
3040 JUMPHERE(compare);
3041 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3042 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3043
3044 /* We only have types for characters less than 256. */
3045 JUMPHERE(jump);
3046 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3047 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3048 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3049 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3050 }
3051
3052 #endif /* COMPILE_PCRE8 */
3053
3054 #endif /* SUPPORT_UTF */
3055
3056 #ifdef SUPPORT_UCP
3057
3058 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3059 #define UCD_BLOCK_MASK 127
3060 #define UCD_BLOCK_SHIFT 7
3061
do_getucd(compiler_common * common)3062 static void do_getucd(compiler_common *common)
3063 {
3064 /* Search the UCD record for the character comes in TMP1.
3065 Returns chartype in TMP1 and UCD offset in TMP2. */
3066 DEFINE_COMPILER;
3067
3068 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3069
3070 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3071 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3072 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3073 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3074 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3075 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3076 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3077 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3078 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3079 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3080 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3081 }
3082 #endif
3083
mainloop_entry(compiler_common * common,BOOL hascrorlf,BOOL firstline)3084 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3085 {
3086 DEFINE_COMPILER;
3087 struct sljit_label *mainloop;
3088 struct sljit_label *newlinelabel = NULL;
3089 struct sljit_jump *start;
3090 struct sljit_jump *end = NULL;
3091 struct sljit_jump *nl = NULL;
3092 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3093 struct sljit_jump *singlechar;
3094 #endif
3095 jump_list *newline = NULL;
3096 BOOL newlinecheck = FALSE;
3097 BOOL readuchar = FALSE;
3098
3099 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3100 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3101 newlinecheck = TRUE;
3102
3103 if (firstline)
3104 {
3105 /* Search for the end of the first line. */
3106 SLJIT_ASSERT(common->first_line_end != 0);
3107 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3108
3109 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3110 {
3111 mainloop = LABEL();
3112 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3113 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3114 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3115 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3116 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3117 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3118 JUMPHERE(end);
3119 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3120 }
3121 else
3122 {
3123 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3124 mainloop = LABEL();
3125 /* Continual stores does not cause data dependency. */
3126 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3127 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3128 check_newlinechar(common, common->nltype, &newline, TRUE);
3129 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3130 JUMPHERE(end);
3131 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3132 set_jumps(newline, LABEL());
3133 }
3134
3135 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3136 }
3137
3138 start = JUMP(SLJIT_JUMP);
3139
3140 if (newlinecheck)
3141 {
3142 newlinelabel = LABEL();
3143 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3144 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3145 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3146 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3147 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3148 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3149 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3150 #endif
3151 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3152 nl = JUMP(SLJIT_JUMP);
3153 }
3154
3155 mainloop = LABEL();
3156
3157 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3158 #ifdef SUPPORT_UTF
3159 if (common->utf) readuchar = TRUE;
3160 #endif
3161 if (newlinecheck) readuchar = TRUE;
3162
3163 if (readuchar)
3164 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3165
3166 if (newlinecheck)
3167 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3168
3169 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3170 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3171 #if defined COMPILE_PCRE8
3172 if (common->utf)
3173 {
3174 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3175 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3176 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3177 JUMPHERE(singlechar);
3178 }
3179 #elif defined COMPILE_PCRE16
3180 if (common->utf)
3181 {
3182 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3183 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3184 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3185 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3186 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3187 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3188 JUMPHERE(singlechar);
3189 }
3190 #endif /* COMPILE_PCRE[8|16] */
3191 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3192 JUMPHERE(start);
3193
3194 if (newlinecheck)
3195 {
3196 JUMPHERE(end);
3197 JUMPHERE(nl);
3198 }
3199
3200 return mainloop;
3201 }
3202
3203 #define MAX_N_CHARS 16
3204 #define MAX_N_BYTES 8
3205
add_prefix_byte(pcre_uint8 byte,pcre_uint8 * bytes)3206 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3207 {
3208 pcre_uint8 len = bytes[0];
3209 int i;
3210
3211 if (len == 255)
3212 return;
3213
3214 if (len == 0)
3215 {
3216 bytes[0] = 1;
3217 bytes[1] = byte;
3218 return;
3219 }
3220
3221 for (i = len; i > 0; i--)
3222 if (bytes[i] == byte)
3223 return;
3224
3225 if (len >= MAX_N_BYTES - 1)
3226 {
3227 bytes[0] = 255;
3228 return;
3229 }
3230
3231 len++;
3232 bytes[len] = byte;
3233 bytes[0] = len;
3234 }
3235
scan_prefix(compiler_common * common,pcre_uchar * cc,pcre_uint32 * chars,pcre_uint8 * bytes,int max_chars,pcre_uint32 * rec_count)3236 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars, pcre_uint32 *rec_count)
3237 {
3238 /* Recursive function, which scans prefix literals. */
3239 BOOL last, any, caseless;
3240 int len, repeat, len_save, consumed = 0;
3241 pcre_uint32 chr, mask;
3242 pcre_uchar *alternative, *cc_save, *oc;
3243 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3244 pcre_uchar othercase[8];
3245 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3246 pcre_uchar othercase[2];
3247 #else
3248 pcre_uchar othercase[1];
3249 #endif
3250
3251 repeat = 1;
3252 while (TRUE)
3253 {
3254 if (*rec_count == 0)
3255 return 0;
3256 (*rec_count)--;
3257
3258 last = TRUE;
3259 any = FALSE;
3260 caseless = FALSE;
3261
3262 switch (*cc)
3263 {
3264 case OP_CHARI:
3265 caseless = TRUE;
3266 case OP_CHAR:
3267 last = FALSE;
3268 cc++;
3269 break;
3270
3271 case OP_SOD:
3272 case OP_SOM:
3273 case OP_SET_SOM:
3274 case OP_NOT_WORD_BOUNDARY:
3275 case OP_WORD_BOUNDARY:
3276 case OP_EODN:
3277 case OP_EOD:
3278 case OP_CIRC:
3279 case OP_CIRCM:
3280 case OP_DOLL:
3281 case OP_DOLLM:
3282 /* Zero width assertions. */
3283 cc++;
3284 continue;
3285
3286 case OP_ASSERT:
3287 case OP_ASSERT_NOT:
3288 case OP_ASSERTBACK:
3289 case OP_ASSERTBACK_NOT:
3290 cc = bracketend(cc);
3291 continue;
3292
3293 case OP_PLUSI:
3294 case OP_MINPLUSI:
3295 case OP_POSPLUSI:
3296 caseless = TRUE;
3297 case OP_PLUS:
3298 case OP_MINPLUS:
3299 case OP_POSPLUS:
3300 cc++;
3301 break;
3302
3303 case OP_EXACTI:
3304 caseless = TRUE;
3305 case OP_EXACT:
3306 repeat = GET2(cc, 1);
3307 last = FALSE;
3308 cc += 1 + IMM2_SIZE;
3309 break;
3310
3311 case OP_QUERYI:
3312 case OP_MINQUERYI:
3313 case OP_POSQUERYI:
3314 caseless = TRUE;
3315 case OP_QUERY:
3316 case OP_MINQUERY:
3317 case OP_POSQUERY:
3318 len = 1;
3319 cc++;
3320 #ifdef SUPPORT_UTF
3321 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3322 #endif
3323 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars, rec_count);
3324 if (max_chars == 0)
3325 return consumed;
3326 last = FALSE;
3327 break;
3328
3329 case OP_KET:
3330 cc += 1 + LINK_SIZE;
3331 continue;
3332
3333 case OP_ALT:
3334 cc += GET(cc, 1);
3335 continue;
3336
3337 case OP_ONCE:
3338 case OP_ONCE_NC:
3339 case OP_BRA:
3340 case OP_BRAPOS:
3341 case OP_CBRA:
3342 case OP_CBRAPOS:
3343 alternative = cc + GET(cc, 1);
3344 while (*alternative == OP_ALT)
3345 {
3346 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars, rec_count);
3347 if (max_chars == 0)
3348 return consumed;
3349 alternative += GET(alternative, 1);
3350 }
3351
3352 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3353 cc += IMM2_SIZE;
3354 cc += 1 + LINK_SIZE;
3355 continue;
3356
3357 case OP_CLASS:
3358 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3359 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3360 #endif
3361 any = TRUE;
3362 cc += 1 + 32 / sizeof(pcre_uchar);
3363 break;
3364
3365 case OP_NCLASS:
3366 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3367 if (common->utf) return consumed;
3368 #endif
3369 any = TRUE;
3370 cc += 1 + 32 / sizeof(pcre_uchar);
3371 break;
3372
3373 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3374 case OP_XCLASS:
3375 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3376 if (common->utf) return consumed;
3377 #endif
3378 any = TRUE;
3379 cc += GET(cc, 1);
3380 break;
3381 #endif
3382
3383 case OP_DIGIT:
3384 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3385 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3386 return consumed;
3387 #endif
3388 any = TRUE;
3389 cc++;
3390 break;
3391
3392 case OP_WHITESPACE:
3393 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3394 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3395 return consumed;
3396 #endif
3397 any = TRUE;
3398 cc++;
3399 break;
3400
3401 case OP_WORDCHAR:
3402 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3403 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3404 return consumed;
3405 #endif
3406 any = TRUE;
3407 cc++;
3408 break;
3409
3410 case OP_NOT:
3411 case OP_NOTI:
3412 cc++;
3413 /* Fall through. */
3414 case OP_NOT_DIGIT:
3415 case OP_NOT_WHITESPACE:
3416 case OP_NOT_WORDCHAR:
3417 case OP_ANY:
3418 case OP_ALLANY:
3419 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3420 if (common->utf) return consumed;
3421 #endif
3422 any = TRUE;
3423 cc++;
3424 break;
3425
3426 #ifdef SUPPORT_UCP
3427 case OP_NOTPROP:
3428 case OP_PROP:
3429 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3430 if (common->utf) return consumed;
3431 #endif
3432 any = TRUE;
3433 cc += 1 + 2;
3434 break;
3435 #endif
3436
3437 case OP_TYPEEXACT:
3438 repeat = GET2(cc, 1);
3439 cc += 1 + IMM2_SIZE;
3440 continue;
3441
3442 case OP_NOTEXACT:
3443 case OP_NOTEXACTI:
3444 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3445 if (common->utf) return consumed;
3446 #endif
3447 any = TRUE;
3448 repeat = GET2(cc, 1);
3449 cc += 1 + IMM2_SIZE + 1;
3450 break;
3451
3452 default:
3453 return consumed;
3454 }
3455
3456 if (any)
3457 {
3458 #if defined COMPILE_PCRE8
3459 mask = 0xff;
3460 #elif defined COMPILE_PCRE16
3461 mask = 0xffff;
3462 #elif defined COMPILE_PCRE32
3463 mask = 0xffffffff;
3464 #else
3465 SLJIT_ASSERT_STOP();
3466 #endif
3467
3468 do
3469 {
3470 chars[0] = mask;
3471 chars[1] = mask;
3472 bytes[0] = 255;
3473
3474 consumed++;
3475 if (--max_chars == 0)
3476 return consumed;
3477 chars += 2;
3478 bytes += MAX_N_BYTES;
3479 }
3480 while (--repeat > 0);
3481
3482 repeat = 1;
3483 continue;
3484 }
3485
3486 len = 1;
3487 #ifdef SUPPORT_UTF
3488 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3489 #endif
3490
3491 if (caseless && char_has_othercase(common, cc))
3492 {
3493 #ifdef SUPPORT_UTF
3494 if (common->utf)
3495 {
3496 GETCHAR(chr, cc);
3497 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3498 return consumed;
3499 }
3500 else
3501 #endif
3502 {
3503 chr = *cc;
3504 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3505 }
3506 }
3507 else
3508 caseless = FALSE;
3509
3510 len_save = len;
3511 cc_save = cc;
3512 while (TRUE)
3513 {
3514 oc = othercase;
3515 do
3516 {
3517 chr = *cc;
3518 #ifdef COMPILE_PCRE32
3519 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3520 return consumed;
3521 #endif
3522 add_prefix_byte((pcre_uint8)chr, bytes);
3523
3524 mask = 0;
3525 if (caseless)
3526 {
3527 add_prefix_byte((pcre_uint8)*oc, bytes);
3528 mask = *cc ^ *oc;
3529 chr |= mask;
3530 }
3531
3532 #ifdef COMPILE_PCRE32
3533 if (chars[0] == NOTACHAR && chars[1] == 0)
3534 #else
3535 if (chars[0] == NOTACHAR)
3536 #endif
3537 {
3538 chars[0] = chr;
3539 chars[1] = mask;
3540 }
3541 else
3542 {
3543 mask |= chars[0] ^ chr;
3544 chr |= mask;
3545 chars[0] = chr;
3546 chars[1] |= mask;
3547 }
3548
3549 len--;
3550 consumed++;
3551 if (--max_chars == 0)
3552 return consumed;
3553 chars += 2;
3554 bytes += MAX_N_BYTES;
3555 cc++;
3556 oc++;
3557 }
3558 while (len > 0);
3559
3560 if (--repeat == 0)
3561 break;
3562
3563 len = len_save;
3564 cc = cc_save;
3565 }
3566
3567 repeat = 1;
3568 if (last)
3569 return consumed;
3570 }
3571 }
3572
fast_forward_first_n_chars(compiler_common * common,BOOL firstline)3573 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3574 {
3575 DEFINE_COMPILER;
3576 struct sljit_label *start;
3577 struct sljit_jump *quit;
3578 pcre_uint32 chars[MAX_N_CHARS * 2];
3579 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3580 pcre_uint8 ones[MAX_N_CHARS];
3581 int offsets[3];
3582 pcre_uint32 mask;
3583 pcre_uint8 *byte_set, *byte_set_end;
3584 int i, max, from;
3585 int range_right = -1, range_len = 3 - 1;
3586 sljit_ub *update_table = NULL;
3587 BOOL in_range;
3588 pcre_uint32 rec_count;
3589
3590 for (i = 0; i < MAX_N_CHARS; i++)
3591 {
3592 chars[i << 1] = NOTACHAR;
3593 chars[(i << 1) + 1] = 0;
3594 bytes[i * MAX_N_BYTES] = 0;
3595 }
3596
3597 rec_count = 10000;
3598 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS, &rec_count);
3599
3600 if (max <= 1)
3601 return FALSE;
3602
3603 for (i = 0; i < max; i++)
3604 {
3605 mask = chars[(i << 1) + 1];
3606 ones[i] = ones_in_half_byte[mask & 0xf];
3607 mask >>= 4;
3608 while (mask != 0)
3609 {
3610 ones[i] += ones_in_half_byte[mask & 0xf];
3611 mask >>= 4;
3612 }
3613 }
3614
3615 in_range = FALSE;
3616 from = 0; /* Prevent compiler "uninitialized" warning */
3617 for (i = 0; i <= max; i++)
3618 {
3619 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3620 {
3621 range_len = i - from;
3622 range_right = i - 1;
3623 }
3624
3625 if (i < max && bytes[i * MAX_N_BYTES] < 255)
3626 {
3627 if (!in_range)
3628 {
3629 in_range = TRUE;
3630 from = i;
3631 }
3632 }
3633 else if (in_range)
3634 in_range = FALSE;
3635 }
3636
3637 if (range_right >= 0)
3638 {
3639 update_table = (sljit_ub *)allocate_read_only_data(common, 256);
3640 if (update_table == NULL)
3641 return TRUE;
3642 memset(update_table, IN_UCHARS(range_len), 256);
3643
3644 for (i = 0; i < range_len; i++)
3645 {
3646 byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3647 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3648 byte_set_end = byte_set + byte_set[0];
3649 byte_set++;
3650 while (byte_set <= byte_set_end)
3651 {
3652 if (update_table[*byte_set] > IN_UCHARS(i))
3653 update_table[*byte_set] = IN_UCHARS(i);
3654 byte_set++;
3655 }
3656 }
3657 }
3658
3659 offsets[0] = -1;
3660 /* Scan forward. */
3661 for (i = 0; i < max; i++)
3662 if (ones[i] <= 2) {
3663 offsets[0] = i;
3664 break;
3665 }
3666
3667 if (offsets[0] < 0 && range_right < 0)
3668 return FALSE;
3669
3670 if (offsets[0] >= 0)
3671 {
3672 /* Scan backward. */
3673 offsets[1] = -1;
3674 for (i = max - 1; i > offsets[0]; i--)
3675 if (ones[i] <= 2 && i != range_right)
3676 {
3677 offsets[1] = i;
3678 break;
3679 }
3680
3681 /* This case is handled better by fast_forward_first_char. */
3682 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3683 return FALSE;
3684
3685 offsets[2] = -1;
3686 /* We only search for a middle character if there is no range check. */
3687 if (offsets[1] >= 0 && range_right == -1)
3688 {
3689 /* Scan from middle. */
3690 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3691 if (ones[i] <= 2)
3692 {
3693 offsets[2] = i;
3694 break;
3695 }
3696
3697 if (offsets[2] == -1)
3698 {
3699 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3700 if (ones[i] <= 2)
3701 {
3702 offsets[2] = i;
3703 break;
3704 }
3705 }
3706 }
3707
3708 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3709 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3710
3711 chars[0] = chars[offsets[0] << 1];
3712 chars[1] = chars[(offsets[0] << 1) + 1];
3713 if (offsets[2] >= 0)
3714 {
3715 chars[2] = chars[offsets[2] << 1];
3716 chars[3] = chars[(offsets[2] << 1) + 1];
3717 }
3718 if (offsets[1] >= 0)
3719 {
3720 chars[4] = chars[offsets[1] << 1];
3721 chars[5] = chars[(offsets[1] << 1) + 1];
3722 }
3723 }
3724
3725 max -= 1;
3726 if (firstline)
3727 {
3728 SLJIT_ASSERT(common->first_line_end != 0);
3729 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3730 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3731 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3732 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
3733 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3734 JUMPHERE(quit);
3735 }
3736 else
3737 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3738
3739 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3740 if (range_right >= 0)
3741 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3742 #endif
3743
3744 start = LABEL();
3745 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3746
3747 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3748
3749 if (range_right >= 0)
3750 {
3751 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3752 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3753 #else
3754 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3755 #endif
3756
3757 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3758 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3759 #else
3760 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3761 #endif
3762 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3763 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3764 }
3765
3766 if (offsets[0] >= 0)
3767 {
3768 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3769 if (offsets[1] >= 0)
3770 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3771 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3772
3773 if (chars[1] != 0)
3774 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3775 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3776 if (offsets[2] >= 0)
3777 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3778
3779 if (offsets[1] >= 0)
3780 {
3781 if (chars[5] != 0)
3782 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3783 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3784 }
3785
3786 if (offsets[2] >= 0)
3787 {
3788 if (chars[3] != 0)
3789 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3790 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3791 }
3792 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3793 }
3794
3795 JUMPHERE(quit);
3796
3797 if (firstline)
3798 {
3799 if (range_right >= 0)
3800 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3801 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3802 if (range_right >= 0)
3803 {
3804 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3805 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3806 JUMPHERE(quit);
3807 }
3808 }
3809 else
3810 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3811 return TRUE;
3812 }
3813
3814 #undef MAX_N_CHARS
3815 #undef MAX_N_BYTES
3816
fast_forward_first_char(compiler_common * common,pcre_uchar first_char,BOOL caseless,BOOL firstline)3817 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3818 {
3819 DEFINE_COMPILER;
3820 struct sljit_label *start;
3821 struct sljit_jump *quit;
3822 struct sljit_jump *found;
3823 pcre_uchar oc, bit;
3824
3825 if (firstline)
3826 {
3827 SLJIT_ASSERT(common->first_line_end != 0);
3828 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3829 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3830 }
3831
3832 start = LABEL();
3833 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3834 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3835
3836 oc = first_char;
3837 if (caseless)
3838 {
3839 oc = TABLE_GET(first_char, common->fcc, first_char);
3840 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3841 if (first_char > 127 && common->utf)
3842 oc = UCD_OTHERCASE(first_char);
3843 #endif
3844 }
3845 if (first_char == oc)
3846 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3847 else
3848 {
3849 bit = first_char ^ oc;
3850 if (is_powerof2(bit))
3851 {
3852 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3853 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3854 }
3855 else
3856 {
3857 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3858 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3859 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3860 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
3861 found = JUMP(SLJIT_NOT_ZERO);
3862 }
3863 }
3864
3865 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3866 JUMPTO(SLJIT_JUMP, start);
3867 JUMPHERE(found);
3868 JUMPHERE(quit);
3869
3870 if (firstline)
3871 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3872 }
3873
fast_forward_newline(compiler_common * common,BOOL firstline)3874 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3875 {
3876 DEFINE_COMPILER;
3877 struct sljit_label *loop;
3878 struct sljit_jump *lastchar;
3879 struct sljit_jump *firstchar;
3880 struct sljit_jump *quit;
3881 struct sljit_jump *foundcr = NULL;
3882 struct sljit_jump *notfoundnl;
3883 jump_list *newline = NULL;
3884
3885 if (firstline)
3886 {
3887 SLJIT_ASSERT(common->first_line_end != 0);
3888 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3889 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3890 }
3891
3892 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3893 {
3894 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3895 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3896 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3897 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3898 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3899
3900 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3901 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3902 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
3903 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3904 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3905 #endif
3906 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3907
3908 loop = LABEL();
3909 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3910 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3911 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3912 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3913 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3914 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3915
3916 JUMPHERE(quit);
3917 JUMPHERE(firstchar);
3918 JUMPHERE(lastchar);
3919
3920 if (firstline)
3921 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3922 return;
3923 }
3924
3925 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3926 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3927 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3928 skip_char_back(common);
3929
3930 loop = LABEL();
3931 common->ff_newline_shortcut = loop;
3932
3933 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3934 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3935 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3936 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3937 check_newlinechar(common, common->nltype, &newline, FALSE);
3938 set_jumps(newline, loop);
3939
3940 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3941 {
3942 quit = JUMP(SLJIT_JUMP);
3943 JUMPHERE(foundcr);
3944 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3945 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3946 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3947 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3948 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3949 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3950 #endif
3951 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3952 JUMPHERE(notfoundnl);
3953 JUMPHERE(quit);
3954 }
3955 JUMPHERE(lastchar);
3956 JUMPHERE(firstchar);
3957
3958 if (firstline)
3959 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3960 }
3961
3962 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3963
fast_forward_start_bits(compiler_common * common,pcre_uint8 * start_bits,BOOL firstline)3964 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3965 {
3966 DEFINE_COMPILER;
3967 struct sljit_label *start;
3968 struct sljit_jump *quit;
3969 struct sljit_jump *found = NULL;
3970 jump_list *matches = NULL;
3971 #ifndef COMPILE_PCRE8
3972 struct sljit_jump *jump;
3973 #endif
3974
3975 if (firstline)
3976 {
3977 SLJIT_ASSERT(common->first_line_end != 0);
3978 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3979 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3980 }
3981
3982 start = LABEL();
3983 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3984 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3985 #ifdef SUPPORT_UTF
3986 if (common->utf)
3987 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3988 #endif
3989
3990 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3991 {
3992 #ifndef COMPILE_PCRE8
3993 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
3994 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3995 JUMPHERE(jump);
3996 #endif
3997 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3998 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3999 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4000 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4001 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4002 found = JUMP(SLJIT_NOT_ZERO);
4003 }
4004
4005 #ifdef SUPPORT_UTF
4006 if (common->utf)
4007 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4008 #endif
4009 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4010 #ifdef SUPPORT_UTF
4011 #if defined COMPILE_PCRE8
4012 if (common->utf)
4013 {
4014 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4015 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4016 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4017 }
4018 #elif defined COMPILE_PCRE16
4019 if (common->utf)
4020 {
4021 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4022 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4023 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4024 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4025 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4026 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4027 }
4028 #endif /* COMPILE_PCRE[8|16] */
4029 #endif /* SUPPORT_UTF */
4030 JUMPTO(SLJIT_JUMP, start);
4031 if (found != NULL)
4032 JUMPHERE(found);
4033 if (matches != NULL)
4034 set_jumps(matches, LABEL());
4035 JUMPHERE(quit);
4036
4037 if (firstline)
4038 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4039 }
4040
search_requested_char(compiler_common * common,pcre_uchar req_char,BOOL caseless,BOOL has_firstchar)4041 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4042 {
4043 DEFINE_COMPILER;
4044 struct sljit_label *loop;
4045 struct sljit_jump *toolong;
4046 struct sljit_jump *alreadyfound;
4047 struct sljit_jump *found;
4048 struct sljit_jump *foundoc = NULL;
4049 struct sljit_jump *notfound;
4050 pcre_uint32 oc, bit;
4051
4052 SLJIT_ASSERT(common->req_char_ptr != 0);
4053 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4054 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4055 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4056 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4057
4058 if (has_firstchar)
4059 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4060 else
4061 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4062
4063 loop = LABEL();
4064 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4065
4066 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4067 oc = req_char;
4068 if (caseless)
4069 {
4070 oc = TABLE_GET(req_char, common->fcc, req_char);
4071 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4072 if (req_char > 127 && common->utf)
4073 oc = UCD_OTHERCASE(req_char);
4074 #endif
4075 }
4076 if (req_char == oc)
4077 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4078 else
4079 {
4080 bit = req_char ^ oc;
4081 if (is_powerof2(bit))
4082 {
4083 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4084 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4085 }
4086 else
4087 {
4088 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4089 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4090 }
4091 }
4092 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4093 JUMPTO(SLJIT_JUMP, loop);
4094
4095 JUMPHERE(found);
4096 if (foundoc)
4097 JUMPHERE(foundoc);
4098 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4099 JUMPHERE(alreadyfound);
4100 JUMPHERE(toolong);
4101 return notfound;
4102 }
4103
do_revertframes(compiler_common * common)4104 static void do_revertframes(compiler_common *common)
4105 {
4106 DEFINE_COMPILER;
4107 struct sljit_jump *jump;
4108 struct sljit_label *mainloop;
4109
4110 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4111 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4112 GET_LOCAL_BASE(TMP3, 0, 0);
4113
4114 /* Drop frames until we reach STACK_TOP. */
4115 mainloop = LABEL();
4116 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4117 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4118 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4119
4120 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4121 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4122 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4123 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4124 JUMPTO(SLJIT_JUMP, mainloop);
4125
4126 JUMPHERE(jump);
4127 jump = JUMP(SLJIT_SIG_LESS);
4128 /* End of dropping frames. */
4129 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4130
4131 JUMPHERE(jump);
4132 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4133 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4134 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4135 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4136 JUMPTO(SLJIT_JUMP, mainloop);
4137 }
4138
check_wordboundary(compiler_common * common)4139 static void check_wordboundary(compiler_common *common)
4140 {
4141 DEFINE_COMPILER;
4142 struct sljit_jump *skipread;
4143 jump_list *skipread_list = NULL;
4144 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4145 struct sljit_jump *jump;
4146 #endif
4147
4148 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4149
4150 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4151 /* Get type of the previous char, and put it to LOCALS1. */
4152 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4153 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4154 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4155 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4156 skip_char_back(common);
4157 check_start_used_ptr(common);
4158 read_char(common);
4159
4160 /* Testing char type. */
4161 #ifdef SUPPORT_UCP
4162 if (common->use_ucp)
4163 {
4164 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4165 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4166 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4167 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4168 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4169 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4170 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4171 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4172 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4173 JUMPHERE(jump);
4174 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4175 }
4176 else
4177 #endif
4178 {
4179 #ifndef COMPILE_PCRE8
4180 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4181 #elif defined SUPPORT_UTF
4182 /* Here LOCALS1 has already been zeroed. */
4183 jump = NULL;
4184 if (common->utf)
4185 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4186 #endif /* COMPILE_PCRE8 */
4187 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4188 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4189 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4190 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4191 #ifndef COMPILE_PCRE8
4192 JUMPHERE(jump);
4193 #elif defined SUPPORT_UTF
4194 if (jump != NULL)
4195 JUMPHERE(jump);
4196 #endif /* COMPILE_PCRE8 */
4197 }
4198 JUMPHERE(skipread);
4199
4200 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4201 check_str_end(common, &skipread_list);
4202 peek_char(common, READ_CHAR_MAX);
4203
4204 /* Testing char type. This is a code duplication. */
4205 #ifdef SUPPORT_UCP
4206 if (common->use_ucp)
4207 {
4208 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4209 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4210 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4211 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4212 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4213 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4214 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4215 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4216 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4217 JUMPHERE(jump);
4218 }
4219 else
4220 #endif
4221 {
4222 #ifndef COMPILE_PCRE8
4223 /* TMP2 may be destroyed by peek_char. */
4224 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4225 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4226 #elif defined SUPPORT_UTF
4227 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4228 jump = NULL;
4229 if (common->utf)
4230 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4231 #endif
4232 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4233 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4234 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4235 #ifndef COMPILE_PCRE8
4236 JUMPHERE(jump);
4237 #elif defined SUPPORT_UTF
4238 if (jump != NULL)
4239 JUMPHERE(jump);
4240 #endif /* COMPILE_PCRE8 */
4241 }
4242 set_jumps(skipread_list, LABEL());
4243
4244 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4245 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4246 }
4247
check_class_ranges(compiler_common * common,const pcre_uint8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)4248 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4249 {
4250 DEFINE_COMPILER;
4251 int ranges[MAX_RANGE_SIZE];
4252 pcre_uint8 bit, cbit, all;
4253 int i, byte, length = 0;
4254
4255 bit = bits[0] & 0x1;
4256 /* All bits will be zero or one (since bit is zero or one). */
4257 all = -bit;
4258
4259 for (i = 0; i < 256; )
4260 {
4261 byte = i >> 3;
4262 if ((i & 0x7) == 0 && bits[byte] == all)
4263 i += 8;
4264 else
4265 {
4266 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4267 if (cbit != bit)
4268 {
4269 if (length >= MAX_RANGE_SIZE)
4270 return FALSE;
4271 ranges[length] = i;
4272 length++;
4273 bit = cbit;
4274 all = -cbit;
4275 }
4276 i++;
4277 }
4278 }
4279
4280 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4281 {
4282 if (length >= MAX_RANGE_SIZE)
4283 return FALSE;
4284 ranges[length] = 256;
4285 length++;
4286 }
4287
4288 if (length < 0 || length > 4)
4289 return FALSE;
4290
4291 bit = bits[0] & 0x1;
4292 if (invert) bit ^= 0x1;
4293
4294 /* No character is accepted. */
4295 if (length == 0 && bit == 0)
4296 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4297
4298 switch(length)
4299 {
4300 case 0:
4301 /* When bit != 0, all characters are accepted. */
4302 return TRUE;
4303
4304 case 1:
4305 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4306 return TRUE;
4307
4308 case 2:
4309 if (ranges[0] + 1 != ranges[1])
4310 {
4311 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4312 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4313 }
4314 else
4315 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4316 return TRUE;
4317
4318 case 3:
4319 if (bit != 0)
4320 {
4321 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4322 if (ranges[0] + 1 != ranges[1])
4323 {
4324 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4325 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4326 }
4327 else
4328 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4329 return TRUE;
4330 }
4331
4332 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4333 if (ranges[1] + 1 != ranges[2])
4334 {
4335 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4336 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4337 }
4338 else
4339 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4340 return TRUE;
4341
4342 case 4:
4343 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4344 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4345 && (ranges[1] & (ranges[2] - ranges[0])) == 0
4346 && is_powerof2(ranges[2] - ranges[0]))
4347 {
4348 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
4349 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4350 if (ranges[2] + 1 != ranges[3])
4351 {
4352 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4353 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4354 }
4355 else
4356 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4357 return TRUE;
4358 }
4359
4360 if (bit != 0)
4361 {
4362 i = 0;
4363 if (ranges[0] + 1 != ranges[1])
4364 {
4365 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4366 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4367 i = ranges[0];
4368 }
4369 else
4370 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4371
4372 if (ranges[2] + 1 != ranges[3])
4373 {
4374 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4375 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4376 }
4377 else
4378 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4379 return TRUE;
4380 }
4381
4382 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4383 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4384 if (ranges[1] + 1 != ranges[2])
4385 {
4386 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4387 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4388 }
4389 else
4390 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4391 return TRUE;
4392
4393 default:
4394 SLJIT_ASSERT_STOP();
4395 return FALSE;
4396 }
4397 }
4398
check_anynewline(compiler_common * common)4399 static void check_anynewline(compiler_common *common)
4400 {
4401 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4402 DEFINE_COMPILER;
4403
4404 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4405
4406 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4407 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4408 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4409 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4410 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4411 #ifdef COMPILE_PCRE8
4412 if (common->utf)
4413 {
4414 #endif
4415 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4416 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4417 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4418 #ifdef COMPILE_PCRE8
4419 }
4420 #endif
4421 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4422 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4423 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4424 }
4425
check_hspace(compiler_common * common)4426 static void check_hspace(compiler_common *common)
4427 {
4428 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4429 DEFINE_COMPILER;
4430
4431 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4432
4433 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4434 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4435 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4436 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4437 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4438 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4439 #ifdef COMPILE_PCRE8
4440 if (common->utf)
4441 {
4442 #endif
4443 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4444 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4445 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4446 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4447 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4448 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4449 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4450 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4451 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4452 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4453 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4454 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4455 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4456 #ifdef COMPILE_PCRE8
4457 }
4458 #endif
4459 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4460 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4461
4462 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4463 }
4464
check_vspace(compiler_common * common)4465 static void check_vspace(compiler_common *common)
4466 {
4467 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4468 DEFINE_COMPILER;
4469
4470 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4471
4472 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4473 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4474 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4475 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4476 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4477 #ifdef COMPILE_PCRE8
4478 if (common->utf)
4479 {
4480 #endif
4481 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4482 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4483 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4484 #ifdef COMPILE_PCRE8
4485 }
4486 #endif
4487 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4488 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4489
4490 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4491 }
4492
4493 #define CHAR1 STR_END
4494 #define CHAR2 STACK_TOP
4495
do_casefulcmp(compiler_common * common)4496 static void do_casefulcmp(compiler_common *common)
4497 {
4498 DEFINE_COMPILER;
4499 struct sljit_jump *jump;
4500 struct sljit_label *label;
4501
4502 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4503 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4504 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4505 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
4506 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4507 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4508
4509 label = LABEL();
4510 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4511 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4512 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4513 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4514 JUMPTO(SLJIT_NOT_ZERO, label);
4515
4516 JUMPHERE(jump);
4517 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4518 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4519 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4520 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4521 }
4522
4523 #define LCC_TABLE STACK_LIMIT
4524
do_caselesscmp(compiler_common * common)4525 static void do_caselesscmp(compiler_common *common)
4526 {
4527 DEFINE_COMPILER;
4528 struct sljit_jump *jump;
4529 struct sljit_label *label;
4530
4531 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4532 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4533
4534 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4535 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
4536 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
4537 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4538 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4539 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4540
4541 label = LABEL();
4542 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4543 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4544 #ifndef COMPILE_PCRE8
4545 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4546 #endif
4547 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4548 #ifndef COMPILE_PCRE8
4549 JUMPHERE(jump);
4550 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4551 #endif
4552 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4553 #ifndef COMPILE_PCRE8
4554 JUMPHERE(jump);
4555 #endif
4556 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4557 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4558 JUMPTO(SLJIT_NOT_ZERO, label);
4559
4560 JUMPHERE(jump);
4561 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4562 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4563 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4564 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4565 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4566 }
4567
4568 #undef LCC_TABLE
4569 #undef CHAR1
4570 #undef CHAR2
4571
4572 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4573
do_utf_caselesscmp(pcre_uchar * src1,jit_arguments * args,pcre_uchar * end1)4574 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4575 {
4576 /* This function would be ineffective to do in JIT level. */
4577 pcre_uint32 c1, c2;
4578 const pcre_uchar *src2 = args->uchar_ptr;
4579 const pcre_uchar *end2 = args->end;
4580 const ucd_record *ur;
4581 const pcre_uint32 *pp;
4582
4583 while (src1 < end1)
4584 {
4585 if (src2 >= end2)
4586 return (pcre_uchar*)1;
4587 GETCHARINC(c1, src1);
4588 GETCHARINC(c2, src2);
4589 ur = GET_UCD(c2);
4590 if (c1 != c2 && c1 != c2 + ur->other_case)
4591 {
4592 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4593 for (;;)
4594 {
4595 if (c1 < *pp) return NULL;
4596 if (c1 == *pp++) break;
4597 }
4598 }
4599 }
4600 return src2;
4601 }
4602
4603 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4604
byte_sequence_compare(compiler_common * common,BOOL caseless,pcre_uchar * cc,compare_context * context,jump_list ** backtracks)4605 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4606 compare_context *context, jump_list **backtracks)
4607 {
4608 DEFINE_COMPILER;
4609 unsigned int othercasebit = 0;
4610 pcre_uchar *othercasechar = NULL;
4611 #ifdef SUPPORT_UTF
4612 int utflength;
4613 #endif
4614
4615 if (caseless && char_has_othercase(common, cc))
4616 {
4617 othercasebit = char_get_othercase_bit(common, cc);
4618 SLJIT_ASSERT(othercasebit);
4619 /* Extracting bit difference info. */
4620 #if defined COMPILE_PCRE8
4621 othercasechar = cc + (othercasebit >> 8);
4622 othercasebit &= 0xff;
4623 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4624 /* Note that this code only handles characters in the BMP. If there
4625 ever are characters outside the BMP whose othercase differs in only one
4626 bit from itself (there currently are none), this code will need to be
4627 revised for COMPILE_PCRE32. */
4628 othercasechar = cc + (othercasebit >> 9);
4629 if ((othercasebit & 0x100) != 0)
4630 othercasebit = (othercasebit & 0xff) << 8;
4631 else
4632 othercasebit &= 0xff;
4633 #endif /* COMPILE_PCRE[8|16|32] */
4634 }
4635
4636 if (context->sourcereg == -1)
4637 {
4638 #if defined COMPILE_PCRE8
4639 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4640 if (context->length >= 4)
4641 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4642 else if (context->length >= 2)
4643 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4644 else
4645 #endif
4646 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4647 #elif defined COMPILE_PCRE16
4648 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4649 if (context->length >= 4)
4650 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4651 else
4652 #endif
4653 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4654 #elif defined COMPILE_PCRE32
4655 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4656 #endif /* COMPILE_PCRE[8|16|32] */
4657 context->sourcereg = TMP2;
4658 }
4659
4660 #ifdef SUPPORT_UTF
4661 utflength = 1;
4662 if (common->utf && HAS_EXTRALEN(*cc))
4663 utflength += GET_EXTRALEN(*cc);
4664
4665 do
4666 {
4667 #endif
4668
4669 context->length -= IN_UCHARS(1);
4670 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4671
4672 /* Unaligned read is supported. */
4673 if (othercasebit != 0 && othercasechar == cc)
4674 {
4675 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4676 context->oc.asuchars[context->ucharptr] = othercasebit;
4677 }
4678 else
4679 {
4680 context->c.asuchars[context->ucharptr] = *cc;
4681 context->oc.asuchars[context->ucharptr] = 0;
4682 }
4683 context->ucharptr++;
4684
4685 #if defined COMPILE_PCRE8
4686 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4687 #else
4688 if (context->ucharptr >= 2 || context->length == 0)
4689 #endif
4690 {
4691 if (context->length >= 4)
4692 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4693 else if (context->length >= 2)
4694 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4695 #if defined COMPILE_PCRE8
4696 else if (context->length >= 1)
4697 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4698 #endif /* COMPILE_PCRE8 */
4699 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4700
4701 switch(context->ucharptr)
4702 {
4703 case 4 / sizeof(pcre_uchar):
4704 if (context->oc.asint != 0)
4705 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4706 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4707 break;
4708
4709 case 2 / sizeof(pcre_uchar):
4710 if (context->oc.asushort != 0)
4711 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4712 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4713 break;
4714
4715 #ifdef COMPILE_PCRE8
4716 case 1:
4717 if (context->oc.asbyte != 0)
4718 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4719 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4720 break;
4721 #endif
4722
4723 default:
4724 SLJIT_ASSERT_STOP();
4725 break;
4726 }
4727 context->ucharptr = 0;
4728 }
4729
4730 #else
4731
4732 /* Unaligned read is unsupported or in 32 bit mode. */
4733 if (context->length >= 1)
4734 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4735
4736 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4737
4738 if (othercasebit != 0 && othercasechar == cc)
4739 {
4740 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4741 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4742 }
4743 else
4744 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4745
4746 #endif
4747
4748 cc++;
4749 #ifdef SUPPORT_UTF
4750 utflength--;
4751 }
4752 while (utflength > 0);
4753 #endif
4754
4755 return cc;
4756 }
4757
4758 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4759
4760 #define SET_TYPE_OFFSET(value) \
4761 if ((value) != typeoffset) \
4762 { \
4763 if ((value) < typeoffset) \
4764 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4765 else \
4766 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4767 } \
4768 typeoffset = (value);
4769
4770 #define SET_CHAR_OFFSET(value) \
4771 if ((value) != charoffset) \
4772 { \
4773 if ((value) < charoffset) \
4774 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4775 else \
4776 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4777 } \
4778 charoffset = (value);
4779
compile_xclass_matchingpath(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks)4780 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4781 {
4782 DEFINE_COMPILER;
4783 jump_list *found = NULL;
4784 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4785 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4786 struct sljit_jump *jump = NULL;
4787 pcre_uchar *ccbegin;
4788 int compares, invertcmp, numberofcmps;
4789 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4790 BOOL utf = common->utf;
4791 #endif
4792
4793 #ifdef SUPPORT_UCP
4794 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4795 BOOL charsaved = FALSE;
4796 int typereg = TMP1, scriptreg = TMP1;
4797 const pcre_uint32 *other_cases;
4798 sljit_uw typeoffset;
4799 #endif
4800
4801 /* Scanning the necessary info. */
4802 cc++;
4803 ccbegin = cc;
4804 compares = 0;
4805 if (cc[-1] & XCL_MAP)
4806 {
4807 min = 0;
4808 cc += 32 / sizeof(pcre_uchar);
4809 }
4810
4811 while (*cc != XCL_END)
4812 {
4813 compares++;
4814 if (*cc == XCL_SINGLE)
4815 {
4816 cc ++;
4817 GETCHARINCTEST(c, cc);
4818 if (c > max) max = c;
4819 if (c < min) min = c;
4820 #ifdef SUPPORT_UCP
4821 needschar = TRUE;
4822 #endif
4823 }
4824 else if (*cc == XCL_RANGE)
4825 {
4826 cc ++;
4827 GETCHARINCTEST(c, cc);
4828 if (c < min) min = c;
4829 GETCHARINCTEST(c, cc);
4830 if (c > max) max = c;
4831 #ifdef SUPPORT_UCP
4832 needschar = TRUE;
4833 #endif
4834 }
4835 #ifdef SUPPORT_UCP
4836 else
4837 {
4838 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4839 cc++;
4840 if (*cc == PT_CLIST)
4841 {
4842 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4843 while (*other_cases != NOTACHAR)
4844 {
4845 if (*other_cases > max) max = *other_cases;
4846 if (*other_cases < min) min = *other_cases;
4847 other_cases++;
4848 }
4849 }
4850 else
4851 {
4852 max = READ_CHAR_MAX;
4853 min = 0;
4854 }
4855
4856 switch(*cc)
4857 {
4858 case PT_ANY:
4859 break;
4860
4861 case PT_LAMP:
4862 case PT_GC:
4863 case PT_PC:
4864 case PT_ALNUM:
4865 needstype = TRUE;
4866 break;
4867
4868 case PT_SC:
4869 needsscript = TRUE;
4870 break;
4871
4872 case PT_SPACE:
4873 case PT_PXSPACE:
4874 case PT_WORD:
4875 case PT_PXGRAPH:
4876 case PT_PXPRINT:
4877 case PT_PXPUNCT:
4878 needstype = TRUE;
4879 needschar = TRUE;
4880 break;
4881
4882 case PT_CLIST:
4883 case PT_UCNC:
4884 needschar = TRUE;
4885 break;
4886
4887 default:
4888 SLJIT_ASSERT_STOP();
4889 break;
4890 }
4891 cc += 2;
4892 }
4893 #endif
4894 }
4895
4896 /* We are not necessary in utf mode even in 8 bit mode. */
4897 cc = ccbegin;
4898 detect_partial_match(common, backtracks);
4899 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4900
4901 if ((cc[-1] & XCL_HASPROP) == 0)
4902 {
4903 if ((cc[-1] & XCL_MAP) != 0)
4904 {
4905 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4906 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4907 {
4908 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4909 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4910 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4911 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4912 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4913 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
4914 }
4915
4916 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4917 JUMPHERE(jump);
4918
4919 cc += 32 / sizeof(pcre_uchar);
4920 }
4921 else
4922 {
4923 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4924 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4925 }
4926 }
4927 else if ((cc[-1] & XCL_MAP) != 0)
4928 {
4929 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4930 #ifdef SUPPORT_UCP
4931 charsaved = TRUE;
4932 #endif
4933 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4934 {
4935 #ifdef COMPILE_PCRE8
4936 jump = NULL;
4937 if (common->utf)
4938 #endif
4939 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4940
4941 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4942 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4943 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4944 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4945 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4946 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
4947
4948 #ifdef COMPILE_PCRE8
4949 if (common->utf)
4950 #endif
4951 JUMPHERE(jump);
4952 }
4953
4954 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4955 cc += 32 / sizeof(pcre_uchar);
4956 }
4957
4958 #ifdef SUPPORT_UCP
4959 /* Simple register allocation. TMP1 is preferred if possible. */
4960 if (needstype || needsscript)
4961 {
4962 if (needschar && !charsaved)
4963 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4964 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4965 if (needschar)
4966 {
4967 if (needstype)
4968 {
4969 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4970 typereg = RETURN_ADDR;
4971 }
4972
4973 if (needsscript)
4974 scriptreg = TMP3;
4975 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4976 }
4977 else if (needstype && needsscript)
4978 scriptreg = TMP3;
4979 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4980
4981 if (needsscript)
4982 {
4983 if (scriptreg == TMP1)
4984 {
4985 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4986 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4987 }
4988 else
4989 {
4990 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4991 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4992 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4993 }
4994 }
4995 }
4996 #endif
4997
4998 /* Generating code. */
4999 charoffset = 0;
5000 numberofcmps = 0;
5001 #ifdef SUPPORT_UCP
5002 typeoffset = 0;
5003 #endif
5004
5005 while (*cc != XCL_END)
5006 {
5007 compares--;
5008 invertcmp = (compares == 0 && list != backtracks);
5009 jump = NULL;
5010
5011 if (*cc == XCL_SINGLE)
5012 {
5013 cc ++;
5014 GETCHARINCTEST(c, cc);
5015
5016 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5017 {
5018 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5019 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
5020 numberofcmps++;
5021 }
5022 else if (numberofcmps > 0)
5023 {
5024 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5025 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5026 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5027 numberofcmps = 0;
5028 }
5029 else
5030 {
5031 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5032 numberofcmps = 0;
5033 }
5034 }
5035 else if (*cc == XCL_RANGE)
5036 {
5037 cc ++;
5038 GETCHARINCTEST(c, cc);
5039 SET_CHAR_OFFSET(c);
5040 GETCHARINCTEST(c, cc);
5041
5042 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5043 {
5044 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5045 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5046 numberofcmps++;
5047 }
5048 else if (numberofcmps > 0)
5049 {
5050 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5051 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5052 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5053 numberofcmps = 0;
5054 }
5055 else
5056 {
5057 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5058 numberofcmps = 0;
5059 }
5060 }
5061 #ifdef SUPPORT_UCP
5062 else
5063 {
5064 if (*cc == XCL_NOTPROP)
5065 invertcmp ^= 0x1;
5066 cc++;
5067 switch(*cc)
5068 {
5069 case PT_ANY:
5070 if (list != backtracks)
5071 {
5072 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
5073 continue;
5074 }
5075 else if (cc[-1] == XCL_NOTPROP)
5076 continue;
5077 jump = JUMP(SLJIT_JUMP);
5078 break;
5079
5080 case PT_LAMP:
5081 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5082 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5083 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5084 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5085 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5086 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5087 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5088 break;
5089
5090 case PT_GC:
5091 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5092 SET_TYPE_OFFSET(c);
5093 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5094 break;
5095
5096 case PT_PC:
5097 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5098 break;
5099
5100 case PT_SC:
5101 jump = CMP(SLJIT_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
5102 break;
5103
5104 case PT_SPACE:
5105 case PT_PXSPACE:
5106 SET_CHAR_OFFSET(9);
5107 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5108 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5109
5110 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5111 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5112
5113 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5114 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5115
5116 SET_TYPE_OFFSET(ucp_Zl);
5117 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5118 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5119 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5120 break;
5121
5122 case PT_WORD:
5123 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5124 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5125 /* Fall through. */
5126
5127 case PT_ALNUM:
5128 SET_TYPE_OFFSET(ucp_Ll);
5129 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5130 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5131 SET_TYPE_OFFSET(ucp_Nd);
5132 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5133 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5134 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5135 break;
5136
5137 case PT_CLIST:
5138 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5139
5140 /* At least three characters are required.
5141 Otherwise this case would be handled by the normal code path. */
5142 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5143 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5144
5145 /* Optimizing character pairs, if their difference is power of 2. */
5146 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5147 {
5148 if (charoffset == 0)
5149 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5150 else
5151 {
5152 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5153 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5154 }
5155 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5156 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5157 other_cases += 2;
5158 }
5159 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5160 {
5161 if (charoffset == 0)
5162 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5163 else
5164 {
5165 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5166 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5167 }
5168 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5169 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5170
5171 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5172 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5173
5174 other_cases += 3;
5175 }
5176 else
5177 {
5178 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5179 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5180 }
5181
5182 while (*other_cases != NOTACHAR)
5183 {
5184 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5185 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5186 }
5187 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5188 break;
5189
5190 case PT_UCNC:
5191 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5192 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5193 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5194 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5195 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5196 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5197
5198 SET_CHAR_OFFSET(0xa0);
5199 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5200 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5201 SET_CHAR_OFFSET(0);
5202 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5203 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5204 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5205 break;
5206
5207 case PT_PXGRAPH:
5208 /* C and Z groups are the farthest two groups. */
5209 SET_TYPE_OFFSET(ucp_Ll);
5210 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5211 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5212
5213 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5214
5215 /* In case of ucp_Cf, we overwrite the result. */
5216 SET_CHAR_OFFSET(0x2066);
5217 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5218 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5219
5220 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5221 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5222
5223 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5224 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5225
5226 JUMPHERE(jump);
5227 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5228 break;
5229
5230 case PT_PXPRINT:
5231 /* C and Z groups are the farthest two groups. */
5232 SET_TYPE_OFFSET(ucp_Ll);
5233 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5234 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5235
5236 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5237 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5238
5239 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5240
5241 /* In case of ucp_Cf, we overwrite the result. */
5242 SET_CHAR_OFFSET(0x2066);
5243 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5244 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5245
5246 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5247 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5248
5249 JUMPHERE(jump);
5250 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5251 break;
5252
5253 case PT_PXPUNCT:
5254 SET_TYPE_OFFSET(ucp_Sc);
5255 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5256 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5257
5258 SET_CHAR_OFFSET(0);
5259 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
5260 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5261
5262 SET_TYPE_OFFSET(ucp_Pc);
5263 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5264 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5265 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5266 break;
5267 }
5268 cc += 2;
5269 }
5270 #endif
5271
5272 if (jump != NULL)
5273 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5274 }
5275
5276 if (found != NULL)
5277 set_jumps(found, LABEL());
5278 }
5279
5280 #undef SET_TYPE_OFFSET
5281 #undef SET_CHAR_OFFSET
5282
5283 #endif
5284
compile_char1_matchingpath(compiler_common * common,pcre_uchar type,pcre_uchar * cc,jump_list ** backtracks)5285 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5286 {
5287 DEFINE_COMPILER;
5288 int length;
5289 unsigned int c, oc, bit;
5290 compare_context context;
5291 struct sljit_jump *jump[4];
5292 jump_list *end_list;
5293 #ifdef SUPPORT_UTF
5294 struct sljit_label *label;
5295 #ifdef SUPPORT_UCP
5296 pcre_uchar propdata[5];
5297 #endif
5298 #endif /* SUPPORT_UTF */
5299
5300 switch(type)
5301 {
5302 case OP_SOD:
5303 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5304 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5305 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5306 return cc;
5307
5308 case OP_SOM:
5309 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5310 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5311 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5312 return cc;
5313
5314 case OP_NOT_WORD_BOUNDARY:
5315 case OP_WORD_BOUNDARY:
5316 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5317 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5318 return cc;
5319
5320 case OP_NOT_DIGIT:
5321 case OP_DIGIT:
5322 /* Digits are usually 0-9, so it is worth to optimize them. */
5323 detect_partial_match(common, backtracks);
5324 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5325 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5326 read_char7_type(common, type == OP_NOT_DIGIT);
5327 else
5328 #endif
5329 read_char8_type(common, type == OP_NOT_DIGIT);
5330 /* Flip the starting bit in the negative case. */
5331 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5332 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5333 return cc;
5334
5335 case OP_NOT_WHITESPACE:
5336 case OP_WHITESPACE:
5337 detect_partial_match(common, backtracks);
5338 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5339 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5340 read_char7_type(common, type == OP_NOT_WHITESPACE);
5341 else
5342 #endif
5343 read_char8_type(common, type == OP_NOT_WHITESPACE);
5344 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5345 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5346 return cc;
5347
5348 case OP_NOT_WORDCHAR:
5349 case OP_WORDCHAR:
5350 detect_partial_match(common, backtracks);
5351 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5352 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5353 read_char7_type(common, type == OP_NOT_WORDCHAR);
5354 else
5355 #endif
5356 read_char8_type(common, type == OP_NOT_WORDCHAR);
5357 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5358 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5359 return cc;
5360
5361 case OP_ANY:
5362 detect_partial_match(common, backtracks);
5363 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5364 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5365 {
5366 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5367 end_list = NULL;
5368 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5369 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5370 else
5371 check_str_end(common, &end_list);
5372
5373 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5374 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5375 set_jumps(end_list, LABEL());
5376 JUMPHERE(jump[0]);
5377 }
5378 else
5379 check_newlinechar(common, common->nltype, backtracks, TRUE);
5380 return cc;
5381
5382 case OP_ALLANY:
5383 detect_partial_match(common, backtracks);
5384 #ifdef SUPPORT_UTF
5385 if (common->utf)
5386 {
5387 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5388 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5389 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5390 #if defined COMPILE_PCRE8
5391 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5392 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5393 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5394 #elif defined COMPILE_PCRE16
5395 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5396 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5397 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5398 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5399 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5400 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5401 #endif
5402 JUMPHERE(jump[0]);
5403 #endif /* COMPILE_PCRE[8|16] */
5404 return cc;
5405 }
5406 #endif
5407 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5408 return cc;
5409
5410 case OP_ANYBYTE:
5411 detect_partial_match(common, backtracks);
5412 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5413 return cc;
5414
5415 #ifdef SUPPORT_UTF
5416 #ifdef SUPPORT_UCP
5417 case OP_NOTPROP:
5418 case OP_PROP:
5419 propdata[0] = XCL_HASPROP;
5420 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5421 propdata[2] = cc[0];
5422 propdata[3] = cc[1];
5423 propdata[4] = XCL_END;
5424 compile_xclass_matchingpath(common, propdata, backtracks);
5425 return cc + 2;
5426 #endif
5427 #endif
5428
5429 case OP_ANYNL:
5430 detect_partial_match(common, backtracks);
5431 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5432 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5433 /* We don't need to handle soft partial matching case. */
5434 end_list = NULL;
5435 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5436 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5437 else
5438 check_str_end(common, &end_list);
5439 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5440 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5441 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5442 jump[2] = JUMP(SLJIT_JUMP);
5443 JUMPHERE(jump[0]);
5444 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5445 set_jumps(end_list, LABEL());
5446 JUMPHERE(jump[1]);
5447 JUMPHERE(jump[2]);
5448 return cc;
5449
5450 case OP_NOT_HSPACE:
5451 case OP_HSPACE:
5452 detect_partial_match(common, backtracks);
5453 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5454 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5455 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5456 return cc;
5457
5458 case OP_NOT_VSPACE:
5459 case OP_VSPACE:
5460 detect_partial_match(common, backtracks);
5461 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5462 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5463 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5464 return cc;
5465
5466 #ifdef SUPPORT_UCP
5467 case OP_EXTUNI:
5468 detect_partial_match(common, backtracks);
5469 read_char(common);
5470 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5471 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5472 /* Optimize register allocation: use a real register. */
5473 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5474 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5475
5476 label = LABEL();
5477 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5478 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5479 read_char(common);
5480 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5481 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5482 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5483
5484 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5485 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5486 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5487 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5488 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5489 JUMPTO(SLJIT_NOT_ZERO, label);
5490
5491 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5492 JUMPHERE(jump[0]);
5493 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5494
5495 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5496 {
5497 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5498 /* Since we successfully read a char above, partial matching must occure. */
5499 check_partial(common, TRUE);
5500 JUMPHERE(jump[0]);
5501 }
5502 return cc;
5503 #endif
5504
5505 case OP_EODN:
5506 /* Requires rather complex checks. */
5507 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5508 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5509 {
5510 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5511 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5512 if (common->mode == JIT_COMPILE)
5513 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5514 else
5515 {
5516 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
5517 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5518 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
5519 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5520 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5521 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
5522 check_partial(common, TRUE);
5523 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5524 JUMPHERE(jump[1]);
5525 }
5526 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5527 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5528 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5529 }
5530 else if (common->nltype == NLTYPE_FIXED)
5531 {
5532 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5533 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5534 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5535 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5536 }
5537 else
5538 {
5539 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5540 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5541 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5542 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5543 jump[2] = JUMP(SLJIT_GREATER);
5544 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
5545 /* Equal. */
5546 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5547 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5548 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5549
5550 JUMPHERE(jump[1]);
5551 if (common->nltype == NLTYPE_ANYCRLF)
5552 {
5553 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5554 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
5555 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5556 }
5557 else
5558 {
5559 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
5560 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5561 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5562 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5563 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5564 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5565 }
5566 JUMPHERE(jump[2]);
5567 JUMPHERE(jump[3]);
5568 }
5569 JUMPHERE(jump[0]);
5570 check_partial(common, FALSE);
5571 return cc;
5572
5573 case OP_EOD:
5574 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5575 check_partial(common, FALSE);
5576 return cc;
5577
5578 case OP_CIRC:
5579 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5580 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5581 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
5582 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5583 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5584 return cc;
5585
5586 case OP_CIRCM:
5587 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5588 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5589 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
5590 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5591 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5592 jump[0] = JUMP(SLJIT_JUMP);
5593 JUMPHERE(jump[1]);
5594
5595 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5596 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5597 {
5598 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5599 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
5600 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5601 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5602 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5603 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5604 }
5605 else
5606 {
5607 skip_char_back(common);
5608 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5609 check_newlinechar(common, common->nltype, backtracks, FALSE);
5610 }
5611 JUMPHERE(jump[0]);
5612 return cc;
5613
5614 case OP_DOLL:
5615 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5616 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5617 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5618
5619 if (!common->endonly)
5620 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5621 else
5622 {
5623 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5624 check_partial(common, FALSE);
5625 }
5626 return cc;
5627
5628 case OP_DOLLM:
5629 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5630 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5631 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5632 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5633 check_partial(common, FALSE);
5634 jump[0] = JUMP(SLJIT_JUMP);
5635 JUMPHERE(jump[1]);
5636
5637 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5638 {
5639 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5640 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5641 if (common->mode == JIT_COMPILE)
5642 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
5643 else
5644 {
5645 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5646 /* STR_PTR = STR_END - IN_UCHARS(1) */
5647 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5648 check_partial(common, TRUE);
5649 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5650 JUMPHERE(jump[1]);
5651 }
5652
5653 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5654 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5655 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5656 }
5657 else
5658 {
5659 peek_char(common, common->nlmax);
5660 check_newlinechar(common, common->nltype, backtracks, FALSE);
5661 }
5662 JUMPHERE(jump[0]);
5663 return cc;
5664
5665 case OP_CHAR:
5666 case OP_CHARI:
5667 length = 1;
5668 #ifdef SUPPORT_UTF
5669 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5670 #endif
5671 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5672 {
5673 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5674 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5675
5676 context.length = IN_UCHARS(length);
5677 context.sourcereg = -1;
5678 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5679 context.ucharptr = 0;
5680 #endif
5681 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5682 }
5683
5684 detect_partial_match(common, backtracks);
5685 #ifdef SUPPORT_UTF
5686 if (common->utf)
5687 {
5688 GETCHAR(c, cc);
5689 }
5690 else
5691 #endif
5692 c = *cc;
5693
5694 if (type == OP_CHAR || !char_has_othercase(common, cc))
5695 {
5696 read_char_range(common, c, c, FALSE);
5697 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5698 return cc + length;
5699 }
5700 oc = char_othercase(common, c);
5701 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5702 bit = c ^ oc;
5703 if (is_powerof2(bit))
5704 {
5705 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5706 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5707 return cc + length;
5708 }
5709 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
5710 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5711 JUMPHERE(jump[0]);
5712 return cc + length;
5713
5714 case OP_NOT:
5715 case OP_NOTI:
5716 detect_partial_match(common, backtracks);
5717 length = 1;
5718 #ifdef SUPPORT_UTF
5719 if (common->utf)
5720 {
5721 #ifdef COMPILE_PCRE8
5722 c = *cc;
5723 if (c < 128)
5724 {
5725 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5726 if (type == OP_NOT || !char_has_othercase(common, cc))
5727 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5728 else
5729 {
5730 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5731 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5732 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5733 }
5734 /* Skip the variable-length character. */
5735 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5736 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5737 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5738 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5739 JUMPHERE(jump[0]);
5740 return cc + 1;
5741 }
5742 else
5743 #endif /* COMPILE_PCRE8 */
5744 {
5745 GETCHARLEN(c, cc, length);
5746 }
5747 }
5748 else
5749 #endif /* SUPPORT_UTF */
5750 c = *cc;
5751
5752 if (type == OP_NOT || !char_has_othercase(common, cc))
5753 {
5754 read_char_range(common, c, c, TRUE);
5755 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5756 }
5757 else
5758 {
5759 oc = char_othercase(common, c);
5760 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5761 bit = c ^ oc;
5762 if (is_powerof2(bit))
5763 {
5764 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5765 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5766 }
5767 else
5768 {
5769 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5770 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5771 }
5772 }
5773 return cc + length;
5774
5775 case OP_CLASS:
5776 case OP_NCLASS:
5777 detect_partial_match(common, backtracks);
5778
5779 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5780 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5781 read_char_range(common, 0, bit, type == OP_NCLASS);
5782 #else
5783 read_char_range(common, 0, 255, type == OP_NCLASS);
5784 #endif
5785
5786 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5787 return cc + 32 / sizeof(pcre_uchar);
5788
5789 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5790 jump[0] = NULL;
5791 if (common->utf)
5792 {
5793 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
5794 if (type == OP_CLASS)
5795 {
5796 add_jump(compiler, backtracks, jump[0]);
5797 jump[0] = NULL;
5798 }
5799 }
5800 #elif !defined COMPILE_PCRE8
5801 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5802 if (type == OP_CLASS)
5803 {
5804 add_jump(compiler, backtracks, jump[0]);
5805 jump[0] = NULL;
5806 }
5807 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5808
5809 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5810 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5811 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5812 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5813 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5814 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5815
5816 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5817 if (jump[0] != NULL)
5818 JUMPHERE(jump[0]);
5819 #endif
5820
5821 return cc + 32 / sizeof(pcre_uchar);
5822
5823 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5824 case OP_XCLASS:
5825 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5826 return cc + GET(cc, 0) - 1;
5827 #endif
5828
5829 case OP_REVERSE:
5830 length = GET(cc, 0);
5831 if (length == 0)
5832 return cc + LINK_SIZE;
5833 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5834 #ifdef SUPPORT_UTF
5835 if (common->utf)
5836 {
5837 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5838 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5839 label = LABEL();
5840 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5841 skip_char_back(common);
5842 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5843 JUMPTO(SLJIT_NOT_ZERO, label);
5844 }
5845 else
5846 #endif
5847 {
5848 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5849 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5850 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
5851 }
5852 check_start_used_ptr(common);
5853 return cc + LINK_SIZE;
5854 }
5855 SLJIT_ASSERT_STOP();
5856 return cc;
5857 }
5858
compile_charn_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,jump_list ** backtracks)5859 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5860 {
5861 /* This function consumes at least one input character. */
5862 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5863 DEFINE_COMPILER;
5864 pcre_uchar *ccbegin = cc;
5865 compare_context context;
5866 int size;
5867
5868 context.length = 0;
5869 do
5870 {
5871 if (cc >= ccend)
5872 break;
5873
5874 if (*cc == OP_CHAR)
5875 {
5876 size = 1;
5877 #ifdef SUPPORT_UTF
5878 if (common->utf && HAS_EXTRALEN(cc[1]))
5879 size += GET_EXTRALEN(cc[1]);
5880 #endif
5881 }
5882 else if (*cc == OP_CHARI)
5883 {
5884 size = 1;
5885 #ifdef SUPPORT_UTF
5886 if (common->utf)
5887 {
5888 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5889 size = 0;
5890 else if (HAS_EXTRALEN(cc[1]))
5891 size += GET_EXTRALEN(cc[1]);
5892 }
5893 else
5894 #endif
5895 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5896 size = 0;
5897 }
5898 else
5899 size = 0;
5900
5901 cc += 1 + size;
5902 context.length += IN_UCHARS(size);
5903 }
5904 while (size > 0 && context.length <= 128);
5905
5906 cc = ccbegin;
5907 if (context.length > 0)
5908 {
5909 /* We have a fixed-length byte sequence. */
5910 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5911 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5912
5913 context.sourcereg = -1;
5914 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5915 context.ucharptr = 0;
5916 #endif
5917 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5918 return cc;
5919 }
5920
5921 /* A non-fixed length character will be checked if length == 0. */
5922 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5923 }
5924
5925 /* Forward definitions. */
5926 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5927 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5928
5929 #define PUSH_BACKTRACK(size, ccstart, error) \
5930 do \
5931 { \
5932 backtrack = sljit_alloc_memory(compiler, (size)); \
5933 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5934 return error; \
5935 memset(backtrack, 0, size); \
5936 backtrack->prev = parent->top; \
5937 backtrack->cc = (ccstart); \
5938 parent->top = backtrack; \
5939 } \
5940 while (0)
5941
5942 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5943 do \
5944 { \
5945 backtrack = sljit_alloc_memory(compiler, (size)); \
5946 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5947 return; \
5948 memset(backtrack, 0, size); \
5949 backtrack->prev = parent->top; \
5950 backtrack->cc = (ccstart); \
5951 parent->top = backtrack; \
5952 } \
5953 while (0)
5954
5955 #define BACKTRACK_AS(type) ((type *)backtrack)
5956
compile_dnref_search(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks)5957 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5958 {
5959 /* The OVECTOR offset goes to TMP2. */
5960 DEFINE_COMPILER;
5961 int count = GET2(cc, 1 + IMM2_SIZE);
5962 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5963 unsigned int offset;
5964 jump_list *found = NULL;
5965
5966 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5967
5968 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
5969
5970 count--;
5971 while (count-- > 0)
5972 {
5973 offset = GET2(slot, 0) << 1;
5974 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5975 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5976 slot += common->name_entry_size;
5977 }
5978
5979 offset = GET2(slot, 0) << 1;
5980 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5981 if (backtracks != NULL && !common->jscript_compat)
5982 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5983
5984 set_jumps(found, LABEL());
5985 }
5986
compile_ref_matchingpath(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)5987 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5988 {
5989 DEFINE_COMPILER;
5990 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5991 int offset = 0;
5992 struct sljit_jump *jump = NULL;
5993 struct sljit_jump *partial;
5994 struct sljit_jump *nopartial;
5995
5996 if (ref)
5997 {
5998 offset = GET2(cc, 1) << 1;
5999 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6000 /* OVECTOR(1) contains the "string begin - 1" constant. */
6001 if (withchecks && !common->jscript_compat)
6002 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6003 }
6004 else
6005 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6006
6007 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6008 if (common->utf && *cc == OP_REFI)
6009 {
6010 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
6011 if (ref)
6012 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6013 else
6014 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6015
6016 if (withchecks)
6017 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6018
6019 /* Needed to save important temporary registers. */
6020 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6021 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6022 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
6023 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6024 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6025 if (common->mode == JIT_COMPILE)
6026 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6027 else
6028 {
6029 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6030 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6031 check_partial(common, FALSE);
6032 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6033 JUMPHERE(nopartial);
6034 }
6035 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6036 }
6037 else
6038 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6039 {
6040 if (ref)
6041 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6042 else
6043 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6044
6045 if (withchecks)
6046 jump = JUMP(SLJIT_ZERO);
6047
6048 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6049 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6050 if (common->mode == JIT_COMPILE)
6051 add_jump(compiler, backtracks, partial);
6052
6053 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6054 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6055
6056 if (common->mode != JIT_COMPILE)
6057 {
6058 nopartial = JUMP(SLJIT_JUMP);
6059 JUMPHERE(partial);
6060 /* TMP2 -= STR_END - STR_PTR */
6061 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6062 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6063 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6064 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6065 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6066 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6067 JUMPHERE(partial);
6068 check_partial(common, FALSE);
6069 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6070 JUMPHERE(nopartial);
6071 }
6072 }
6073
6074 if (jump != NULL)
6075 {
6076 if (emptyfail)
6077 add_jump(compiler, backtracks, jump);
6078 else
6079 JUMPHERE(jump);
6080 }
6081 }
6082
compile_ref_iterator_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6083 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6084 {
6085 DEFINE_COMPILER;
6086 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6087 backtrack_common *backtrack;
6088 pcre_uchar type;
6089 int offset = 0;
6090 struct sljit_label *label;
6091 struct sljit_jump *zerolength;
6092 struct sljit_jump *jump = NULL;
6093 pcre_uchar *ccbegin = cc;
6094 int min = 0, max = 0;
6095 BOOL minimize;
6096
6097 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6098
6099 if (ref)
6100 offset = GET2(cc, 1) << 1;
6101 else
6102 cc += IMM2_SIZE;
6103 type = cc[1 + IMM2_SIZE];
6104
6105 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6106 minimize = (type & 0x1) != 0;
6107 switch(type)
6108 {
6109 case OP_CRSTAR:
6110 case OP_CRMINSTAR:
6111 min = 0;
6112 max = 0;
6113 cc += 1 + IMM2_SIZE + 1;
6114 break;
6115 case OP_CRPLUS:
6116 case OP_CRMINPLUS:
6117 min = 1;
6118 max = 0;
6119 cc += 1 + IMM2_SIZE + 1;
6120 break;
6121 case OP_CRQUERY:
6122 case OP_CRMINQUERY:
6123 min = 0;
6124 max = 1;
6125 cc += 1 + IMM2_SIZE + 1;
6126 break;
6127 case OP_CRRANGE:
6128 case OP_CRMINRANGE:
6129 min = GET2(cc, 1 + IMM2_SIZE + 1);
6130 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6131 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6132 break;
6133 default:
6134 SLJIT_ASSERT_STOP();
6135 break;
6136 }
6137
6138 if (!minimize)
6139 {
6140 if (min == 0)
6141 {
6142 allocate_stack(common, 2);
6143 if (ref)
6144 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6145 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6146 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6147 /* Temporary release of STR_PTR. */
6148 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6149 /* Handles both invalid and empty cases. Since the minimum repeat,
6150 is zero the invalid case is basically the same as an empty case. */
6151 if (ref)
6152 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6153 else
6154 {
6155 compile_dnref_search(common, ccbegin, NULL);
6156 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6157 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6158 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6159 }
6160 /* Restore if not zero length. */
6161 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6162 }
6163 else
6164 {
6165 allocate_stack(common, 1);
6166 if (ref)
6167 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6168 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6169 if (ref)
6170 {
6171 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6172 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6173 }
6174 else
6175 {
6176 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6177 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6178 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6179 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6180 }
6181 }
6182
6183 if (min > 1 || max > 1)
6184 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6185
6186 label = LABEL();
6187 if (!ref)
6188 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6189 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6190
6191 if (min > 1 || max > 1)
6192 {
6193 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6194 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6195 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6196 if (min > 1)
6197 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6198 if (max > 1)
6199 {
6200 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6201 allocate_stack(common, 1);
6202 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6203 JUMPTO(SLJIT_JUMP, label);
6204 JUMPHERE(jump);
6205 }
6206 }
6207
6208 if (max == 0)
6209 {
6210 /* Includes min > 1 case as well. */
6211 allocate_stack(common, 1);
6212 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6213 JUMPTO(SLJIT_JUMP, label);
6214 }
6215
6216 JUMPHERE(zerolength);
6217 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6218
6219 count_match(common);
6220 return cc;
6221 }
6222
6223 allocate_stack(common, ref ? 2 : 3);
6224 if (ref)
6225 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6226 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6227 if (type != OP_CRMINSTAR)
6228 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6229
6230 if (min == 0)
6231 {
6232 /* Handles both invalid and empty cases. Since the minimum repeat,
6233 is zero the invalid case is basically the same as an empty case. */
6234 if (ref)
6235 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6236 else
6237 {
6238 compile_dnref_search(common, ccbegin, NULL);
6239 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6240 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6241 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6242 }
6243 /* Length is non-zero, we can match real repeats. */
6244 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6245 jump = JUMP(SLJIT_JUMP);
6246 }
6247 else
6248 {
6249 if (ref)
6250 {
6251 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6252 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6253 }
6254 else
6255 {
6256 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6257 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6258 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6259 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6260 }
6261 }
6262
6263 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6264 if (max > 0)
6265 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6266
6267 if (!ref)
6268 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6269 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6270 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6271
6272 if (min > 1)
6273 {
6274 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6275 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6276 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6277 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
6278 }
6279 else if (max > 0)
6280 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6281
6282 if (jump != NULL)
6283 JUMPHERE(jump);
6284 JUMPHERE(zerolength);
6285
6286 count_match(common);
6287 return cc;
6288 }
6289
compile_recurse_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6290 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6291 {
6292 DEFINE_COMPILER;
6293 backtrack_common *backtrack;
6294 recurse_entry *entry = common->entries;
6295 recurse_entry *prev = NULL;
6296 sljit_sw start = GET(cc, 1);
6297 pcre_uchar *start_cc;
6298 BOOL needs_control_head;
6299
6300 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6301
6302 /* Inlining simple patterns. */
6303 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6304 {
6305 start_cc = common->start + start;
6306 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6307 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6308 return cc + 1 + LINK_SIZE;
6309 }
6310
6311 while (entry != NULL)
6312 {
6313 if (entry->start == start)
6314 break;
6315 prev = entry;
6316 entry = entry->next;
6317 }
6318
6319 if (entry == NULL)
6320 {
6321 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6322 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6323 return NULL;
6324 entry->next = NULL;
6325 entry->entry = NULL;
6326 entry->calls = NULL;
6327 entry->start = start;
6328
6329 if (prev != NULL)
6330 prev->next = entry;
6331 else
6332 common->entries = entry;
6333 }
6334
6335 if (common->has_set_som && common->mark_ptr != 0)
6336 {
6337 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6338 allocate_stack(common, 2);
6339 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
6340 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6341 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6342 }
6343 else if (common->has_set_som || common->mark_ptr != 0)
6344 {
6345 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6346 allocate_stack(common, 1);
6347 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6348 }
6349
6350 if (entry->entry == NULL)
6351 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6352 else
6353 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6354 /* Leave if the match is failed. */
6355 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6356 return cc + 1 + LINK_SIZE;
6357 }
6358
do_callout(struct jit_arguments * arguments,PUBL (callout_block)* callout_block,pcre_uchar ** jit_ovector)6359 static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6360 {
6361 const pcre_uchar *begin = arguments->begin;
6362 int *offset_vector = arguments->offsets;
6363 int offset_count = arguments->offset_count;
6364 int i;
6365
6366 if (PUBL(callout) == NULL)
6367 return 0;
6368
6369 callout_block->version = 2;
6370 callout_block->callout_data = arguments->callout_data;
6371
6372 /* Offsets in subject. */
6373 callout_block->subject_length = arguments->end - arguments->begin;
6374 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6375 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6376 #if defined COMPILE_PCRE8
6377 callout_block->subject = (PCRE_SPTR)begin;
6378 #elif defined COMPILE_PCRE16
6379 callout_block->subject = (PCRE_SPTR16)begin;
6380 #elif defined COMPILE_PCRE32
6381 callout_block->subject = (PCRE_SPTR32)begin;
6382 #endif
6383
6384 /* Convert and copy the JIT offset vector to the offset_vector array. */
6385 callout_block->capture_top = 0;
6386 callout_block->offset_vector = offset_vector;
6387 for (i = 2; i < offset_count; i += 2)
6388 {
6389 offset_vector[i] = jit_ovector[i] - begin;
6390 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6391 if (jit_ovector[i] >= begin)
6392 callout_block->capture_top = i;
6393 }
6394
6395 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6396 if (offset_count > 0)
6397 offset_vector[0] = -1;
6398 if (offset_count > 1)
6399 offset_vector[1] = -1;
6400 return (*PUBL(callout))(callout_block);
6401 }
6402
6403 /* Aligning to 8 byte. */
6404 #define CALLOUT_ARG_SIZE \
6405 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6406
6407 #define CALLOUT_ARG_OFFSET(arg) \
6408 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6409
compile_callout_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6410 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6411 {
6412 DEFINE_COMPILER;
6413 backtrack_common *backtrack;
6414
6415 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6416
6417 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6418
6419 SLJIT_ASSERT(common->capture_last_ptr != 0);
6420 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6421 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6422 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6423 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6424
6425 /* These pointer sized fields temporarly stores internal variables. */
6426 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6427 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6428 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6429
6430 if (common->mark_ptr != 0)
6431 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6432 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6433 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6434 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6435
6436 /* Needed to save important temporary registers. */
6437 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6438 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6439 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
6440 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6441 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6442 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6443 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6444
6445 /* Check return value. */
6446 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6447 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
6448 if (common->forced_quit_label == NULL)
6449 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
6450 else
6451 JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
6452 return cc + 2 + 2 * LINK_SIZE;
6453 }
6454
6455 #undef CALLOUT_ARG_SIZE
6456 #undef CALLOUT_ARG_OFFSET
6457
compile_assert_matchingpath(compiler_common * common,pcre_uchar * cc,assert_backtrack * backtrack,BOOL conditional)6458 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6459 {
6460 DEFINE_COMPILER;
6461 int framesize;
6462 int extrasize;
6463 BOOL needs_control_head;
6464 int private_data_ptr;
6465 backtrack_common altbacktrack;
6466 pcre_uchar *ccbegin;
6467 pcre_uchar opcode;
6468 pcre_uchar bra = OP_BRA;
6469 jump_list *tmp = NULL;
6470 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6471 jump_list **found;
6472 /* Saving previous accept variables. */
6473 BOOL save_local_exit = common->local_exit;
6474 BOOL save_positive_assert = common->positive_assert;
6475 then_trap_backtrack *save_then_trap = common->then_trap;
6476 struct sljit_label *save_quit_label = common->quit_label;
6477 struct sljit_label *save_accept_label = common->accept_label;
6478 jump_list *save_quit = common->quit;
6479 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6480 jump_list *save_accept = common->accept;
6481 struct sljit_jump *jump;
6482 struct sljit_jump *brajump = NULL;
6483
6484 /* Assert captures then. */
6485 common->then_trap = NULL;
6486
6487 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6488 {
6489 SLJIT_ASSERT(!conditional);
6490 bra = *cc;
6491 cc++;
6492 }
6493 private_data_ptr = PRIVATE_DATA(cc);
6494 SLJIT_ASSERT(private_data_ptr != 0);
6495 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6496 backtrack->framesize = framesize;
6497 backtrack->private_data_ptr = private_data_ptr;
6498 opcode = *cc;
6499 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6500 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6501 ccbegin = cc;
6502 cc += GET(cc, 1);
6503
6504 if (bra == OP_BRAMINZERO)
6505 {
6506 /* This is a braminzero backtrack path. */
6507 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6508 free_stack(common, 1);
6509 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6510 }
6511
6512 if (framesize < 0)
6513 {
6514 extrasize = needs_control_head ? 2 : 1;
6515 if (framesize == no_frame)
6516 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
6517 allocate_stack(common, extrasize);
6518 if (needs_control_head)
6519 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6520 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6521 if (needs_control_head)
6522 {
6523 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6525 }
6526 }
6527 else
6528 {
6529 extrasize = needs_control_head ? 3 : 2;
6530 allocate_stack(common, framesize + extrasize);
6531 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6532 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6533 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
6534 if (needs_control_head)
6535 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6536 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6537 if (needs_control_head)
6538 {
6539 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6540 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6541 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6542 }
6543 else
6544 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6545 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6546 }
6547
6548 memset(&altbacktrack, 0, sizeof(backtrack_common));
6549 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6550 {
6551 /* Negative assert is stronger than positive assert. */
6552 common->local_exit = TRUE;
6553 common->quit_label = NULL;
6554 common->quit = NULL;
6555 common->positive_assert = FALSE;
6556 }
6557 else
6558 common->positive_assert = TRUE;
6559 common->positive_assert_quit = NULL;
6560
6561 while (1)
6562 {
6563 common->accept_label = NULL;
6564 common->accept = NULL;
6565 altbacktrack.top = NULL;
6566 altbacktrack.topbacktracks = NULL;
6567
6568 if (*ccbegin == OP_ALT)
6569 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6570
6571 altbacktrack.cc = ccbegin;
6572 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6573 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6574 {
6575 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6576 {
6577 common->local_exit = save_local_exit;
6578 common->quit_label = save_quit_label;
6579 common->quit = save_quit;
6580 }
6581 common->positive_assert = save_positive_assert;
6582 common->then_trap = save_then_trap;
6583 common->accept_label = save_accept_label;
6584 common->positive_assert_quit = save_positive_assert_quit;
6585 common->accept = save_accept;
6586 return NULL;
6587 }
6588 common->accept_label = LABEL();
6589 if (common->accept != NULL)
6590 set_jumps(common->accept, common->accept_label);
6591
6592 /* Reset stack. */
6593 if (framesize < 0)
6594 {
6595 if (framesize == no_frame)
6596 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6597 else
6598 free_stack(common, extrasize);
6599 if (needs_control_head)
6600 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6601 }
6602 else
6603 {
6604 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6605 {
6606 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6607 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6608 if (needs_control_head)
6609 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6610 }
6611 else
6612 {
6613 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6614 if (needs_control_head)
6615 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6616 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6617 }
6618 }
6619
6620 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6621 {
6622 /* We know that STR_PTR was stored on the top of the stack. */
6623 if (conditional)
6624 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6625 else if (bra == OP_BRAZERO)
6626 {
6627 if (framesize < 0)
6628 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6629 else
6630 {
6631 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6632 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6633 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6634 }
6635 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6636 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6637 }
6638 else if (framesize >= 0)
6639 {
6640 /* For OP_BRA and OP_BRAMINZERO. */
6641 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6642 }
6643 }
6644 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6645
6646 compile_backtrackingpath(common, altbacktrack.top);
6647 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6648 {
6649 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6650 {
6651 common->local_exit = save_local_exit;
6652 common->quit_label = save_quit_label;
6653 common->quit = save_quit;
6654 }
6655 common->positive_assert = save_positive_assert;
6656 common->then_trap = save_then_trap;
6657 common->accept_label = save_accept_label;
6658 common->positive_assert_quit = save_positive_assert_quit;
6659 common->accept = save_accept;
6660 return NULL;
6661 }
6662 set_jumps(altbacktrack.topbacktracks, LABEL());
6663
6664 if (*cc != OP_ALT)
6665 break;
6666
6667 ccbegin = cc;
6668 cc += GET(cc, 1);
6669 }
6670
6671 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6672 {
6673 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6674 /* Makes the check less complicated below. */
6675 common->positive_assert_quit = common->quit;
6676 }
6677
6678 /* None of them matched. */
6679 if (common->positive_assert_quit != NULL)
6680 {
6681 jump = JUMP(SLJIT_JUMP);
6682 set_jumps(common->positive_assert_quit, LABEL());
6683 SLJIT_ASSERT(framesize != no_stack);
6684 if (framesize < 0)
6685 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6686 else
6687 {
6688 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6689 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6690 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6691 }
6692 JUMPHERE(jump);
6693 }
6694
6695 if (needs_control_head)
6696 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6697
6698 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6699 {
6700 /* Assert is failed. */
6701 if (conditional || bra == OP_BRAZERO)
6702 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6703
6704 if (framesize < 0)
6705 {
6706 /* The topmost item should be 0. */
6707 if (bra == OP_BRAZERO)
6708 {
6709 if (extrasize == 2)
6710 free_stack(common, 1);
6711 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6712 }
6713 else
6714 free_stack(common, extrasize);
6715 }
6716 else
6717 {
6718 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6719 /* The topmost item should be 0. */
6720 if (bra == OP_BRAZERO)
6721 {
6722 free_stack(common, framesize + extrasize - 1);
6723 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6724 }
6725 else
6726 free_stack(common, framesize + extrasize);
6727 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6728 }
6729 jump = JUMP(SLJIT_JUMP);
6730 if (bra != OP_BRAZERO)
6731 add_jump(compiler, target, jump);
6732
6733 /* Assert is successful. */
6734 set_jumps(tmp, LABEL());
6735 if (framesize < 0)
6736 {
6737 /* We know that STR_PTR was stored on the top of the stack. */
6738 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6739 /* Keep the STR_PTR on the top of the stack. */
6740 if (bra == OP_BRAZERO)
6741 {
6742 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6743 if (extrasize == 2)
6744 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6745 }
6746 else if (bra == OP_BRAMINZERO)
6747 {
6748 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6749 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6750 }
6751 }
6752 else
6753 {
6754 if (bra == OP_BRA)
6755 {
6756 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6757 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6758 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6759 }
6760 else
6761 {
6762 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6763 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6764 if (extrasize == 2)
6765 {
6766 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6767 if (bra == OP_BRAMINZERO)
6768 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6769 }
6770 else
6771 {
6772 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6773 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6774 }
6775 }
6776 }
6777
6778 if (bra == OP_BRAZERO)
6779 {
6780 backtrack->matchingpath = LABEL();
6781 SET_LABEL(jump, backtrack->matchingpath);
6782 }
6783 else if (bra == OP_BRAMINZERO)
6784 {
6785 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6786 JUMPHERE(brajump);
6787 if (framesize >= 0)
6788 {
6789 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6790 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6791 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6792 }
6793 set_jumps(backtrack->common.topbacktracks, LABEL());
6794 }
6795 }
6796 else
6797 {
6798 /* AssertNot is successful. */
6799 if (framesize < 0)
6800 {
6801 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6802 if (bra != OP_BRA)
6803 {
6804 if (extrasize == 2)
6805 free_stack(common, 1);
6806 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6807 }
6808 else
6809 free_stack(common, extrasize);
6810 }
6811 else
6812 {
6813 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6814 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6815 /* The topmost item should be 0. */
6816 if (bra != OP_BRA)
6817 {
6818 free_stack(common, framesize + extrasize - 1);
6819 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6820 }
6821 else
6822 free_stack(common, framesize + extrasize);
6823 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6824 }
6825
6826 if (bra == OP_BRAZERO)
6827 backtrack->matchingpath = LABEL();
6828 else if (bra == OP_BRAMINZERO)
6829 {
6830 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6831 JUMPHERE(brajump);
6832 }
6833
6834 if (bra != OP_BRA)
6835 {
6836 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
6837 set_jumps(backtrack->common.topbacktracks, LABEL());
6838 backtrack->common.topbacktracks = NULL;
6839 }
6840 }
6841
6842 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6843 {
6844 common->local_exit = save_local_exit;
6845 common->quit_label = save_quit_label;
6846 common->quit = save_quit;
6847 }
6848 common->positive_assert = save_positive_assert;
6849 common->then_trap = save_then_trap;
6850 common->accept_label = save_accept_label;
6851 common->positive_assert_quit = save_positive_assert_quit;
6852 common->accept = save_accept;
6853 return cc + 1 + LINK_SIZE;
6854 }
6855
match_once_common(compiler_common * common,pcre_uchar ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)6856 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6857 {
6858 DEFINE_COMPILER;
6859 int stacksize;
6860
6861 if (framesize < 0)
6862 {
6863 if (framesize == no_frame)
6864 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6865 else
6866 {
6867 stacksize = needs_control_head ? 1 : 0;
6868 if (ket != OP_KET || has_alternatives)
6869 stacksize++;
6870 free_stack(common, stacksize);
6871 }
6872
6873 if (needs_control_head)
6874 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6875
6876 /* TMP2 which is set here used by OP_KETRMAX below. */
6877 if (ket == OP_KETRMAX)
6878 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6879 else if (ket == OP_KETRMIN)
6880 {
6881 /* Move the STR_PTR to the private_data_ptr. */
6882 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6883 }
6884 }
6885 else
6886 {
6887 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6888 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6889 if (needs_control_head)
6890 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6891
6892 if (ket == OP_KETRMAX)
6893 {
6894 /* TMP2 which is set here used by OP_KETRMAX below. */
6895 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6896 }
6897 }
6898 if (needs_control_head)
6899 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
6900 }
6901
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)6902 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6903 {
6904 DEFINE_COMPILER;
6905
6906 if (common->capture_last_ptr != 0)
6907 {
6908 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6909 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6910 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6911 stacksize++;
6912 }
6913 if (common->optimized_cbracket[offset >> 1] == 0)
6914 {
6915 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6916 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6917 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6918 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6919 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
6921 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
6922 stacksize += 2;
6923 }
6924 return stacksize;
6925 }
6926
6927 /*
6928 Handling bracketed expressions is probably the most complex part.
6929
6930 Stack layout naming characters:
6931 S - Push the current STR_PTR
6932 0 - Push a 0 (NULL)
6933 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6934 before the next alternative. Not pushed if there are no alternatives.
6935 M - Any values pushed by the current alternative. Can be empty, or anything.
6936 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6937 L - Push the previous local (pointed by localptr) to the stack
6938 () - opional values stored on the stack
6939 ()* - optonal, can be stored multiple times
6940
6941 The following list shows the regular expression templates, their PCRE byte codes
6942 and stack layout supported by pcre-sljit.
6943
6944 (?:) OP_BRA | OP_KET A M
6945 () OP_CBRA | OP_KET C M
6946 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6947 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6948 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6949 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6950 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6951 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6952 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6953 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6954 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6955 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6956 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6957 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6958 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6959 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6960 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6961 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6962 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6963 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6964 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6965 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6966
6967
6968 Stack layout naming characters:
6969 A - Push the alternative index (starting from 0) on the stack.
6970 Not pushed if there is no alternatives.
6971 M - Any values pushed by the current alternative. Can be empty, or anything.
6972
6973 The next list shows the possible content of a bracket:
6974 (|) OP_*BRA | OP_ALT ... M A
6975 (?()|) OP_*COND | OP_ALT M A
6976 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6977 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6978 Or nothing, if trace is unnecessary
6979 */
6980
compile_bracket_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6981 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6982 {
6983 DEFINE_COMPILER;
6984 backtrack_common *backtrack;
6985 pcre_uchar opcode;
6986 int private_data_ptr = 0;
6987 int offset = 0;
6988 int i, stacksize;
6989 int repeat_ptr = 0, repeat_length = 0;
6990 int repeat_type = 0, repeat_count = 0;
6991 pcre_uchar *ccbegin;
6992 pcre_uchar *matchingpath;
6993 pcre_uchar *slot;
6994 pcre_uchar bra = OP_BRA;
6995 pcre_uchar ket;
6996 assert_backtrack *assert;
6997 BOOL has_alternatives;
6998 BOOL needs_control_head = FALSE;
6999 struct sljit_jump *jump;
7000 struct sljit_jump *skip;
7001 struct sljit_label *rmax_label = NULL;
7002 struct sljit_jump *braminzero = NULL;
7003
7004 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
7005
7006 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7007 {
7008 bra = *cc;
7009 cc++;
7010 opcode = *cc;
7011 }
7012
7013 opcode = *cc;
7014 ccbegin = cc;
7015 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
7016 ket = *matchingpath;
7017 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
7018 {
7019 repeat_ptr = PRIVATE_DATA(matchingpath);
7020 repeat_length = PRIVATE_DATA(matchingpath + 1);
7021 repeat_type = PRIVATE_DATA(matchingpath + 2);
7022 repeat_count = PRIVATE_DATA(matchingpath + 3);
7023 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
7024 if (repeat_type == OP_UPTO)
7025 ket = OP_KETRMAX;
7026 if (repeat_type == OP_MINUPTO)
7027 ket = OP_KETRMIN;
7028 }
7029
7030 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
7031 {
7032 /* Drop this bracket_backtrack. */
7033 parent->top = backtrack->prev;
7034 return matchingpath + 1 + LINK_SIZE + repeat_length;
7035 }
7036
7037 matchingpath = ccbegin + 1 + LINK_SIZE;
7038 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
7039 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
7040 cc += GET(cc, 1);
7041
7042 has_alternatives = *cc == OP_ALT;
7043 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
7044 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) ? FALSE : TRUE;
7045
7046 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
7047 opcode = OP_SCOND;
7048 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
7049 opcode = OP_ONCE;
7050
7051 if (opcode == OP_CBRA || opcode == OP_SCBRA)
7052 {
7053 /* Capturing brackets has a pre-allocated space. */
7054 offset = GET2(ccbegin, 1 + LINK_SIZE);
7055 if (common->optimized_cbracket[offset] == 0)
7056 {
7057 private_data_ptr = OVECTOR_PRIV(offset);
7058 offset <<= 1;
7059 }
7060 else
7061 {
7062 offset <<= 1;
7063 private_data_ptr = OVECTOR(offset);
7064 }
7065 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7066 matchingpath += IMM2_SIZE;
7067 }
7068 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
7069 {
7070 /* Other brackets simply allocate the next entry. */
7071 private_data_ptr = PRIVATE_DATA(ccbegin);
7072 SLJIT_ASSERT(private_data_ptr != 0);
7073 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7074 if (opcode == OP_ONCE)
7075 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
7076 }
7077
7078 /* Instructions before the first alternative. */
7079 stacksize = 0;
7080 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7081 stacksize++;
7082 if (bra == OP_BRAZERO)
7083 stacksize++;
7084
7085 if (stacksize > 0)
7086 allocate_stack(common, stacksize);
7087
7088 stacksize = 0;
7089 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7090 {
7091 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7092 stacksize++;
7093 }
7094
7095 if (bra == OP_BRAZERO)
7096 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7097
7098 if (bra == OP_BRAMINZERO)
7099 {
7100 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
7101 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7102 if (ket != OP_KETRMIN)
7103 {
7104 free_stack(common, 1);
7105 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7106 }
7107 else
7108 {
7109 if (opcode == OP_ONCE || opcode >= OP_SBRA)
7110 {
7111 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7112 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7113 /* Nothing stored during the first run. */
7114 skip = JUMP(SLJIT_JUMP);
7115 JUMPHERE(jump);
7116 /* Checking zero-length iteration. */
7117 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7118 {
7119 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
7120 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7121 }
7122 else
7123 {
7124 /* Except when the whole stack frame must be saved. */
7125 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7126 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
7127 }
7128 JUMPHERE(skip);
7129 }
7130 else
7131 {
7132 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7133 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7134 JUMPHERE(jump);
7135 }
7136 }
7137 }
7138
7139 if (repeat_type != 0)
7140 {
7141 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
7142 if (repeat_type == OP_EXACT)
7143 rmax_label = LABEL();
7144 }
7145
7146 if (ket == OP_KETRMIN)
7147 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
7148
7149 if (ket == OP_KETRMAX)
7150 {
7151 rmax_label = LABEL();
7152 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
7153 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
7154 }
7155
7156 /* Handling capturing brackets and alternatives. */
7157 if (opcode == OP_ONCE)
7158 {
7159 stacksize = 0;
7160 if (needs_control_head)
7161 {
7162 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7163 stacksize++;
7164 }
7165
7166 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7167 {
7168 /* Neither capturing brackets nor recursions are found in the block. */
7169 if (ket == OP_KETRMIN)
7170 {
7171 stacksize += 2;
7172 if (!needs_control_head)
7173 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7174 }
7175 else
7176 {
7177 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
7178 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7179 if (ket == OP_KETRMAX || has_alternatives)
7180 stacksize++;
7181 }
7182
7183 if (stacksize > 0)
7184 allocate_stack(common, stacksize);
7185
7186 stacksize = 0;
7187 if (needs_control_head)
7188 {
7189 stacksize++;
7190 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7191 }
7192
7193 if (ket == OP_KETRMIN)
7194 {
7195 if (needs_control_head)
7196 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7197 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7198 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
7199 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
7200 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
7201 }
7202 else if (ket == OP_KETRMAX || has_alternatives)
7203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7204 }
7205 else
7206 {
7207 if (ket != OP_KET || has_alternatives)
7208 stacksize++;
7209
7210 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
7211 allocate_stack(common, stacksize);
7212
7213 if (needs_control_head)
7214 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7215
7216 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7217 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7218
7219 stacksize = needs_control_head ? 1 : 0;
7220 if (ket != OP_KET || has_alternatives)
7221 {
7222 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7223 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7224 stacksize++;
7225 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7226 }
7227 else
7228 {
7229 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7230 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7231 }
7232 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
7233 }
7234 }
7235 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
7236 {
7237 /* Saving the previous values. */
7238 if (common->optimized_cbracket[offset >> 1] != 0)
7239 {
7240 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
7241 allocate_stack(common, 2);
7242 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7243 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
7244 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
7245 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
7246 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7247 }
7248 else
7249 {
7250 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7251 allocate_stack(common, 1);
7252 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
7253 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7254 }
7255 }
7256 else if (opcode == OP_SBRA || opcode == OP_SCOND)
7257 {
7258 /* Saving the previous value. */
7259 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7260 allocate_stack(common, 1);
7261 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
7262 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7263 }
7264 else if (has_alternatives)
7265 {
7266 /* Pushing the starting string pointer. */
7267 allocate_stack(common, 1);
7268 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7269 }
7270
7271 /* Generating code for the first alternative. */
7272 if (opcode == OP_COND || opcode == OP_SCOND)
7273 {
7274 if (*matchingpath == OP_CREF)
7275 {
7276 SLJIT_ASSERT(has_alternatives);
7277 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
7278 CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7279 matchingpath += 1 + IMM2_SIZE;
7280 }
7281 else if (*matchingpath == OP_DNCREF)
7282 {
7283 SLJIT_ASSERT(has_alternatives);
7284
7285 i = GET2(matchingpath, 1 + IMM2_SIZE);
7286 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
7287 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
7288 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
7289 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
7290 slot += common->name_entry_size;
7291 i--;
7292 while (i-- > 0)
7293 {
7294 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
7295 OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
7296 slot += common->name_entry_size;
7297 }
7298 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
7299 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
7300 matchingpath += 1 + 2 * IMM2_SIZE;
7301 }
7302 else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL)
7303 {
7304 /* Never has other case. */
7305 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
7306 SLJIT_ASSERT(!has_alternatives);
7307
7308 if (*matchingpath == OP_FAIL)
7309 stacksize = 0;
7310 else if (*matchingpath == OP_RREF)
7311 {
7312 stacksize = GET2(matchingpath, 1);
7313 if (common->currententry == NULL)
7314 stacksize = 0;
7315 else if (stacksize == RREF_ANY)
7316 stacksize = 1;
7317 else if (common->currententry->start == 0)
7318 stacksize = stacksize == 0;
7319 else
7320 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
7321
7322 if (stacksize != 0)
7323 matchingpath += 1 + IMM2_SIZE;
7324 }
7325 else
7326 {
7327 if (common->currententry == NULL || common->currententry->start == 0)
7328 stacksize = 0;
7329 else
7330 {
7331 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
7332 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
7333 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
7334 while (stacksize > 0)
7335 {
7336 if ((int)GET2(slot, 0) == i)
7337 break;
7338 slot += common->name_entry_size;
7339 stacksize--;
7340 }
7341 }
7342
7343 if (stacksize != 0)
7344 matchingpath += 1 + 2 * IMM2_SIZE;
7345 }
7346
7347 /* The stacksize == 0 is a common "else" case. */
7348 if (stacksize == 0)
7349 {
7350 if (*cc == OP_ALT)
7351 {
7352 matchingpath = cc + 1 + LINK_SIZE;
7353 cc += GET(cc, 1);
7354 }
7355 else
7356 matchingpath = cc;
7357 }
7358 }
7359 else
7360 {
7361 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
7362 /* Similar code as PUSH_BACKTRACK macro. */
7363 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
7364 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7365 return NULL;
7366 memset(assert, 0, sizeof(assert_backtrack));
7367 assert->common.cc = matchingpath;
7368 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
7369 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
7370 }
7371 }
7372
7373 compile_matchingpath(common, matchingpath, cc, backtrack);
7374 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7375 return NULL;
7376
7377 if (opcode == OP_ONCE)
7378 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
7379
7380 stacksize = 0;
7381 if (repeat_type == OP_MINUPTO)
7382 {
7383 /* We need to preserve the counter. TMP2 will be used below. */
7384 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
7385 stacksize++;
7386 }
7387 if (ket != OP_KET || bra != OP_BRA)
7388 stacksize++;
7389 if (offset != 0)
7390 {
7391 if (common->capture_last_ptr != 0)
7392 stacksize++;
7393 if (common->optimized_cbracket[offset >> 1] == 0)
7394 stacksize += 2;
7395 }
7396 if (has_alternatives && opcode != OP_ONCE)
7397 stacksize++;
7398
7399 if (stacksize > 0)
7400 allocate_stack(common, stacksize);
7401
7402 stacksize = 0;
7403 if (repeat_type == OP_MINUPTO)
7404 {
7405 /* TMP2 was set above. */
7406 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
7407 stacksize++;
7408 }
7409
7410 if (ket != OP_KET || bra != OP_BRA)
7411 {
7412 if (ket != OP_KET)
7413 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7414 else
7415 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7416 stacksize++;
7417 }
7418
7419 if (offset != 0)
7420 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
7421
7422 if (has_alternatives)
7423 {
7424 if (opcode != OP_ONCE)
7425 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7426 if (ket != OP_KETRMAX)
7427 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7428 }
7429
7430 /* Must be after the matchingpath label. */
7431 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
7432 {
7433 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
7434 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7435 }
7436
7437 if (ket == OP_KETRMAX)
7438 {
7439 if (repeat_type != 0)
7440 {
7441 if (has_alternatives)
7442 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7443 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
7444 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
7445 /* Drop STR_PTR for greedy plus quantifier. */
7446 if (opcode != OP_ONCE)
7447 free_stack(common, 1);
7448 }
7449 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
7450 {
7451 if (has_alternatives)
7452 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7453 /* Checking zero-length iteration. */
7454 if (opcode != OP_ONCE)
7455 {
7456 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
7457 /* Drop STR_PTR for greedy plus quantifier. */
7458 if (bra != OP_BRAZERO)
7459 free_stack(common, 1);
7460 }
7461 else
7462 /* TMP2 must contain the starting STR_PTR. */
7463 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
7464 }
7465 else
7466 JUMPTO(SLJIT_JUMP, rmax_label);
7467 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
7468 }
7469
7470 if (repeat_type == OP_EXACT)
7471 {
7472 count_match(common);
7473 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
7474 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
7475 }
7476 else if (repeat_type == OP_UPTO)
7477 {
7478 /* We need to preserve the counter. */
7479 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
7480 allocate_stack(common, 1);
7481 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7482 }
7483
7484 if (bra == OP_BRAZERO)
7485 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
7486
7487 if (bra == OP_BRAMINZERO)
7488 {
7489 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
7490 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
7491 if (braminzero != NULL)
7492 {
7493 JUMPHERE(braminzero);
7494 /* We need to release the end pointer to perform the
7495 backtrack for the zero-length iteration. When
7496 framesize is < 0, OP_ONCE will do the release itself. */
7497 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
7498 {
7499 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7500 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7501 }
7502 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
7503 free_stack(common, 1);
7504 }
7505 /* Continue to the normal backtrack. */
7506 }
7507
7508 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
7509 count_match(common);
7510
7511 /* Skip the other alternatives. */
7512 while (*cc == OP_ALT)
7513 cc += GET(cc, 1);
7514 cc += 1 + LINK_SIZE;
7515
7516 /* Temporarily encoding the needs_control_head in framesize. */
7517 if (opcode == OP_ONCE)
7518 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
7519 return cc + repeat_length;
7520 }
7521
compile_bracketpos_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7522 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7523 {
7524 DEFINE_COMPILER;
7525 backtrack_common *backtrack;
7526 pcre_uchar opcode;
7527 int private_data_ptr;
7528 int cbraprivptr = 0;
7529 BOOL needs_control_head;
7530 int framesize;
7531 int stacksize;
7532 int offset = 0;
7533 BOOL zero = FALSE;
7534 pcre_uchar *ccbegin = NULL;
7535 int stack; /* Also contains the offset of control head. */
7536 struct sljit_label *loop = NULL;
7537 struct jump_list *emptymatch = NULL;
7538
7539 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
7540 if (*cc == OP_BRAPOSZERO)
7541 {
7542 zero = TRUE;
7543 cc++;
7544 }
7545
7546 opcode = *cc;
7547 private_data_ptr = PRIVATE_DATA(cc);
7548 SLJIT_ASSERT(private_data_ptr != 0);
7549 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
7550 switch(opcode)
7551 {
7552 case OP_BRAPOS:
7553 case OP_SBRAPOS:
7554 ccbegin = cc + 1 + LINK_SIZE;
7555 break;
7556
7557 case OP_CBRAPOS:
7558 case OP_SCBRAPOS:
7559 offset = GET2(cc, 1 + LINK_SIZE);
7560 /* This case cannot be optimized in the same was as
7561 normal capturing brackets. */
7562 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
7563 cbraprivptr = OVECTOR_PRIV(offset);
7564 offset <<= 1;
7565 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
7566 break;
7567
7568 default:
7569 SLJIT_ASSERT_STOP();
7570 break;
7571 }
7572
7573 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
7574 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
7575 if (framesize < 0)
7576 {
7577 if (offset != 0)
7578 {
7579 stacksize = 2;
7580 if (common->capture_last_ptr != 0)
7581 stacksize++;
7582 }
7583 else
7584 stacksize = 1;
7585
7586 if (needs_control_head)
7587 stacksize++;
7588 if (!zero)
7589 stacksize++;
7590
7591 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
7592 allocate_stack(common, stacksize);
7593 if (framesize == no_frame)
7594 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7595
7596 stack = 0;
7597 if (offset != 0)
7598 {
7599 stack = 2;
7600 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7601 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7602 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
7603 if (common->capture_last_ptr != 0)
7604 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7605 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7606 if (needs_control_head)
7607 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7608 if (common->capture_last_ptr != 0)
7609 {
7610 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
7611 stack = 3;
7612 }
7613 }
7614 else
7615 {
7616 if (needs_control_head)
7617 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7618 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7619 stack = 1;
7620 }
7621
7622 if (needs_control_head)
7623 stack++;
7624 if (!zero)
7625 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
7626 if (needs_control_head)
7627 {
7628 stack--;
7629 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
7630 }
7631 }
7632 else
7633 {
7634 stacksize = framesize + 1;
7635 if (!zero)
7636 stacksize++;
7637 if (needs_control_head)
7638 stacksize++;
7639 if (offset == 0)
7640 stacksize++;
7641 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
7642
7643 allocate_stack(common, stacksize);
7644 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7645 if (needs_control_head)
7646 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7647 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
7648
7649 stack = 0;
7650 if (!zero)
7651 {
7652 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
7653 stack = 1;
7654 }
7655 if (needs_control_head)
7656 {
7657 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
7658 stack++;
7659 }
7660 if (offset == 0)
7661 {
7662 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
7663 stack++;
7664 }
7665 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
7666 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
7667 stack -= 1 + (offset == 0);
7668 }
7669
7670 if (offset != 0)
7671 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
7672
7673 loop = LABEL();
7674 while (*cc != OP_KETRPOS)
7675 {
7676 backtrack->top = NULL;
7677 backtrack->topbacktracks = NULL;
7678 cc += GET(cc, 1);
7679
7680 compile_matchingpath(common, ccbegin, cc, backtrack);
7681 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7682 return NULL;
7683
7684 if (framesize < 0)
7685 {
7686 if (framesize == no_frame)
7687 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7688
7689 if (offset != 0)
7690 {
7691 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7692 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7693 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
7694 if (common->capture_last_ptr != 0)
7695 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7696 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7697 }
7698 else
7699 {
7700 if (opcode == OP_SBRAPOS)
7701 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7702 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7703 }
7704
7705 /* Even if the match is empty, we need to reset the control head. */
7706 if (needs_control_head)
7707 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
7708
7709 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
7710 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
7711
7712 if (!zero)
7713 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
7714 }
7715 else
7716 {
7717 if (offset != 0)
7718 {
7719 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7720 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7721 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7722 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
7723 if (common->capture_last_ptr != 0)
7724 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7725 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7726 }
7727 else
7728 {
7729 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7730 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7731 if (opcode == OP_SBRAPOS)
7732 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
7733 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
7734 }
7735
7736 /* Even if the match is empty, we need to reset the control head. */
7737 if (needs_control_head)
7738 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
7739
7740 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
7741 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
7742
7743 if (!zero)
7744 {
7745 if (framesize < 0)
7746 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
7747 else
7748 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7749 }
7750 }
7751
7752 JUMPTO(SLJIT_JUMP, loop);
7753 flush_stubs(common);
7754
7755 compile_backtrackingpath(common, backtrack->top);
7756 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7757 return NULL;
7758 set_jumps(backtrack->topbacktracks, LABEL());
7759
7760 if (framesize < 0)
7761 {
7762 if (offset != 0)
7763 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7764 else
7765 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7766 }
7767 else
7768 {
7769 if (offset != 0)
7770 {
7771 /* Last alternative. */
7772 if (*cc == OP_KETRPOS)
7773 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7774 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7775 }
7776 else
7777 {
7778 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7779 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
7780 }
7781 }
7782
7783 if (*cc == OP_KETRPOS)
7784 break;
7785 ccbegin = cc + 1 + LINK_SIZE;
7786 }
7787
7788 /* We don't have to restore the control head in case of a failed match. */
7789
7790 backtrack->topbacktracks = NULL;
7791 if (!zero)
7792 {
7793 if (framesize < 0)
7794 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
7795 else /* TMP2 is set to [private_data_ptr] above. */
7796 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
7797 }
7798
7799 /* None of them matched. */
7800 set_jumps(emptymatch, LABEL());
7801 count_match(common);
7802 return cc + 1 + LINK_SIZE;
7803 }
7804
get_iterator_parameters(compiler_common * common,pcre_uchar * cc,pcre_uchar * opcode,pcre_uchar * type,int * max,int * min,pcre_uchar ** end)7805 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *max, int *min, pcre_uchar **end)
7806 {
7807 int class_len;
7808
7809 *opcode = *cc;
7810 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
7811 {
7812 cc++;
7813 *type = OP_CHAR;
7814 }
7815 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
7816 {
7817 cc++;
7818 *type = OP_CHARI;
7819 *opcode -= OP_STARI - OP_STAR;
7820 }
7821 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
7822 {
7823 cc++;
7824 *type = OP_NOT;
7825 *opcode -= OP_NOTSTAR - OP_STAR;
7826 }
7827 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
7828 {
7829 cc++;
7830 *type = OP_NOTI;
7831 *opcode -= OP_NOTSTARI - OP_STAR;
7832 }
7833 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
7834 {
7835 cc++;
7836 *opcode -= OP_TYPESTAR - OP_STAR;
7837 *type = 0;
7838 }
7839 else
7840 {
7841 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
7842 *type = *opcode;
7843 cc++;
7844 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
7845 *opcode = cc[class_len - 1];
7846 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
7847 {
7848 *opcode -= OP_CRSTAR - OP_STAR;
7849 if (end != NULL)
7850 *end = cc + class_len;
7851 }
7852 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
7853 {
7854 *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
7855 if (end != NULL)
7856 *end = cc + class_len;
7857 }
7858 else
7859 {
7860 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
7861 *max = GET2(cc, (class_len + IMM2_SIZE));
7862 *min = GET2(cc, class_len);
7863
7864 if (*min == 0)
7865 {
7866 SLJIT_ASSERT(*max != 0);
7867 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : (*opcode == OP_CRMINRANGE ? OP_MINUPTO : OP_POSUPTO);
7868 }
7869 if (*max == *min)
7870 *opcode = OP_EXACT;
7871
7872 if (end != NULL)
7873 *end = cc + class_len + 2 * IMM2_SIZE;
7874 }
7875 return cc;
7876 }
7877
7878 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
7879 {
7880 *max = GET2(cc, 0);
7881 cc += IMM2_SIZE;
7882 }
7883
7884 if (*type == 0)
7885 {
7886 *type = *cc;
7887 if (end != NULL)
7888 *end = next_opcode(common, cc);
7889 cc++;
7890 return cc;
7891 }
7892
7893 if (end != NULL)
7894 {
7895 *end = cc + 1;
7896 #ifdef SUPPORT_UTF
7897 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
7898 #endif
7899 }
7900 return cc;
7901 }
7902
compile_iterator_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7903 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7904 {
7905 DEFINE_COMPILER;
7906 backtrack_common *backtrack;
7907 pcre_uchar opcode;
7908 pcre_uchar type;
7909 int max = -1, min = -1;
7910 pcre_uchar *end;
7911 jump_list *nomatch = NULL;
7912 struct sljit_jump *jump = NULL;
7913 struct sljit_label *label;
7914 int private_data_ptr = PRIVATE_DATA(cc);
7915 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
7916 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
7917 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
7918 int tmp_base, tmp_offset;
7919
7920 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
7921
7922 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, &end);
7923
7924 switch(type)
7925 {
7926 case OP_NOT_DIGIT:
7927 case OP_DIGIT:
7928 case OP_NOT_WHITESPACE:
7929 case OP_WHITESPACE:
7930 case OP_NOT_WORDCHAR:
7931 case OP_WORDCHAR:
7932 case OP_ANY:
7933 case OP_ALLANY:
7934 case OP_ANYBYTE:
7935 case OP_ANYNL:
7936 case OP_NOT_HSPACE:
7937 case OP_HSPACE:
7938 case OP_NOT_VSPACE:
7939 case OP_VSPACE:
7940 case OP_CHAR:
7941 case OP_CHARI:
7942 case OP_NOT:
7943 case OP_NOTI:
7944 case OP_CLASS:
7945 case OP_NCLASS:
7946 tmp_base = TMP3;
7947 tmp_offset = 0;
7948 break;
7949
7950 default:
7951 SLJIT_ASSERT_STOP();
7952 /* Fall through. */
7953
7954 case OP_EXTUNI:
7955 case OP_XCLASS:
7956 case OP_NOTPROP:
7957 case OP_PROP:
7958 tmp_base = SLJIT_MEM1(SLJIT_SP);
7959 tmp_offset = POSSESSIVE0;
7960 break;
7961 }
7962
7963 switch(opcode)
7964 {
7965 case OP_STAR:
7966 case OP_PLUS:
7967 case OP_UPTO:
7968 case OP_CRRANGE:
7969 if (type == OP_ANYNL || type == OP_EXTUNI)
7970 {
7971 SLJIT_ASSERT(private_data_ptr == 0);
7972 if (opcode == OP_STAR || opcode == OP_UPTO)
7973 {
7974 allocate_stack(common, 2);
7975 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7976 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
7977 }
7978 else
7979 {
7980 allocate_stack(common, 1);
7981 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7982 }
7983
7984 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
7985 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
7986
7987 label = LABEL();
7988 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7989 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
7990 {
7991 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
7992 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7993 if (opcode == OP_CRRANGE && min > 0)
7994 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
7995 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && max > 0))
7996 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
7997 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
7998 }
7999
8000 /* We cannot use TMP3 because of this allocate_stack. */
8001 allocate_stack(common, 1);
8002 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8003 JUMPTO(SLJIT_JUMP, label);
8004 if (jump != NULL)
8005 JUMPHERE(jump);
8006 }
8007 else
8008 {
8009 if (opcode == OP_PLUS)
8010 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8011 if (private_data_ptr == 0)
8012 allocate_stack(common, 2);
8013 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8014 if (opcode <= OP_PLUS)
8015 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
8016 else
8017 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
8018 label = LABEL();
8019 compile_char1_matchingpath(common, type, cc, &nomatch);
8020 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8021 if (opcode <= OP_PLUS)
8022 JUMPTO(SLJIT_JUMP, label);
8023 else if (opcode == OP_CRRANGE && max == 0)
8024 {
8025 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
8026 JUMPTO(SLJIT_JUMP, label);
8027 }
8028 else
8029 {
8030 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
8031 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8032 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
8033 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, max + 1, label);
8034 }
8035 set_jumps(nomatch, LABEL());
8036 if (opcode == OP_CRRANGE)
8037 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS, base, offset1, SLJIT_IMM, min + 1));
8038 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8039 }
8040 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8041 break;
8042
8043 case OP_MINSTAR:
8044 case OP_MINPLUS:
8045 if (opcode == OP_MINPLUS)
8046 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8047 if (private_data_ptr == 0)
8048 allocate_stack(common, 1);
8049 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8050 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8051 break;
8052
8053 case OP_MINUPTO:
8054 case OP_CRMINRANGE:
8055 if (private_data_ptr == 0)
8056 allocate_stack(common, 2);
8057 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8058 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
8059 if (opcode == OP_CRMINRANGE)
8060 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8061 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8062 break;
8063
8064 case OP_QUERY:
8065 case OP_MINQUERY:
8066 if (private_data_ptr == 0)
8067 allocate_stack(common, 1);
8068 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8069 if (opcode == OP_QUERY)
8070 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8071 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8072 break;
8073
8074 case OP_EXACT:
8075 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
8076 label = LABEL();
8077 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8078 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8079 JUMPTO(SLJIT_NOT_ZERO, label);
8080 break;
8081
8082 case OP_POSSTAR:
8083 case OP_POSPLUS:
8084 case OP_POSUPTO:
8085 if (opcode == OP_POSPLUS)
8086 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8087 if (opcode == OP_POSUPTO)
8088 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max);
8089 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8090 label = LABEL();
8091 compile_char1_matchingpath(common, type, cc, &nomatch);
8092 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8093 if (opcode != OP_POSUPTO)
8094 JUMPTO(SLJIT_JUMP, label);
8095 else
8096 {
8097 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1);
8098 JUMPTO(SLJIT_NOT_ZERO, label);
8099 }
8100 set_jumps(nomatch, LABEL());
8101 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8102 break;
8103
8104 case OP_POSQUERY:
8105 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8106 compile_char1_matchingpath(common, type, cc, &nomatch);
8107 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8108 set_jumps(nomatch, LABEL());
8109 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8110 break;
8111
8112 case OP_CRPOSRANGE:
8113 /* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */
8114 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min);
8115 label = LABEL();
8116 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8117 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8118 JUMPTO(SLJIT_NOT_ZERO, label);
8119
8120 if (max != 0)
8121 {
8122 SLJIT_ASSERT(max - min > 0);
8123 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max - min);
8124 }
8125 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8126 label = LABEL();
8127 compile_char1_matchingpath(common, type, cc, &nomatch);
8128 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8129 if (max == 0)
8130 JUMPTO(SLJIT_JUMP, label);
8131 else
8132 {
8133 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1);
8134 JUMPTO(SLJIT_NOT_ZERO, label);
8135 }
8136 set_jumps(nomatch, LABEL());
8137 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8138 break;
8139
8140 default:
8141 SLJIT_ASSERT_STOP();
8142 break;
8143 }
8144
8145 count_match(common);
8146 return end;
8147 }
8148
compile_fail_accept_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)8149 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8150 {
8151 DEFINE_COMPILER;
8152 backtrack_common *backtrack;
8153
8154 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
8155
8156 if (*cc == OP_FAIL)
8157 {
8158 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8159 return cc + 1;
8160 }
8161
8162 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
8163 {
8164 /* No need to check notempty conditions. */
8165 if (common->accept_label == NULL)
8166 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
8167 else
8168 JUMPTO(SLJIT_JUMP, common->accept_label);
8169 return cc + 1;
8170 }
8171
8172 if (common->accept_label == NULL)
8173 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
8174 else
8175 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
8176 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8177 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
8178 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8179 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
8180 if (common->accept_label == NULL)
8181 add_jump(compiler, &common->accept, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8182 else
8183 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
8184 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8185 if (common->accept_label == NULL)
8186 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
8187 else
8188 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
8189 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8190 return cc + 1;
8191 }
8192
compile_close_matchingpath(compiler_common * common,pcre_uchar * cc)8193 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
8194 {
8195 DEFINE_COMPILER;
8196 int offset = GET2(cc, 1);
8197 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
8198
8199 /* Data will be discarded anyway... */
8200 if (common->currententry != NULL)
8201 return cc + 1 + IMM2_SIZE;
8202
8203 if (!optimized_cbracket)
8204 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
8205 offset <<= 1;
8206 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8207 if (!optimized_cbracket)
8208 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8209 return cc + 1 + IMM2_SIZE;
8210 }
8211
compile_control_verb_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)8212 static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8213 {
8214 DEFINE_COMPILER;
8215 backtrack_common *backtrack;
8216 pcre_uchar opcode = *cc;
8217 pcre_uchar *ccend = cc + 1;
8218
8219 if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
8220 ccend += 2 + cc[1];
8221
8222 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
8223
8224 if (opcode == OP_SKIP)
8225 {
8226 allocate_stack(common, 1);
8227 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8228 return ccend;
8229 }
8230
8231 if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
8232 {
8233 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8234 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
8235 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
8236 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
8237 }
8238
8239 return ccend;
8240 }
8241
8242 static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP };
8243
compile_then_trap_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,backtrack_common * parent)8244 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
8245 {
8246 DEFINE_COMPILER;
8247 backtrack_common *backtrack;
8248 BOOL needs_control_head;
8249 int size;
8250
8251 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
8252 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
8253 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
8254 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
8255 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
8256
8257 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
8258 size = 3 + (size < 0 ? 0 : size);
8259
8260 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8261 allocate_stack(common, size);
8262 if (size > 3)
8263 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
8264 else
8265 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
8266 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
8267 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
8268 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
8269
8270 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
8271 if (size >= 0)
8272 init_frame(common, cc, ccend, size - 1, 0, FALSE);
8273 }
8274
compile_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,backtrack_common * parent)8275 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
8276 {
8277 DEFINE_COMPILER;
8278 backtrack_common *backtrack;
8279 BOOL has_then_trap = FALSE;
8280 then_trap_backtrack *save_then_trap = NULL;
8281
8282 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
8283
8284 if (common->has_then && common->then_offsets[cc - common->start] != 0)
8285 {
8286 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
8287 has_then_trap = TRUE;
8288 save_then_trap = common->then_trap;
8289 /* Tail item on backtrack. */
8290 compile_then_trap_matchingpath(common, cc, ccend, parent);
8291 }
8292
8293 while (cc < ccend)
8294 {
8295 switch(*cc)
8296 {
8297 case OP_SOD:
8298 case OP_SOM:
8299 case OP_NOT_WORD_BOUNDARY:
8300 case OP_WORD_BOUNDARY:
8301 case OP_NOT_DIGIT:
8302 case OP_DIGIT:
8303 case OP_NOT_WHITESPACE:
8304 case OP_WHITESPACE:
8305 case OP_NOT_WORDCHAR:
8306 case OP_WORDCHAR:
8307 case OP_ANY:
8308 case OP_ALLANY:
8309 case OP_ANYBYTE:
8310 case OP_NOTPROP:
8311 case OP_PROP:
8312 case OP_ANYNL:
8313 case OP_NOT_HSPACE:
8314 case OP_HSPACE:
8315 case OP_NOT_VSPACE:
8316 case OP_VSPACE:
8317 case OP_EXTUNI:
8318 case OP_EODN:
8319 case OP_EOD:
8320 case OP_CIRC:
8321 case OP_CIRCM:
8322 case OP_DOLL:
8323 case OP_DOLLM:
8324 case OP_NOT:
8325 case OP_NOTI:
8326 case OP_REVERSE:
8327 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8328 break;
8329
8330 case OP_SET_SOM:
8331 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
8332 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
8333 allocate_stack(common, 1);
8334 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
8335 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8336 cc++;
8337 break;
8338
8339 case OP_CHAR:
8340 case OP_CHARI:
8341 if (common->mode == JIT_COMPILE)
8342 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8343 else
8344 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8345 break;
8346
8347 case OP_STAR:
8348 case OP_MINSTAR:
8349 case OP_PLUS:
8350 case OP_MINPLUS:
8351 case OP_QUERY:
8352 case OP_MINQUERY:
8353 case OP_UPTO:
8354 case OP_MINUPTO:
8355 case OP_EXACT:
8356 case OP_POSSTAR:
8357 case OP_POSPLUS:
8358 case OP_POSQUERY:
8359 case OP_POSUPTO:
8360 case OP_STARI:
8361 case OP_MINSTARI:
8362 case OP_PLUSI:
8363 case OP_MINPLUSI:
8364 case OP_QUERYI:
8365 case OP_MINQUERYI:
8366 case OP_UPTOI:
8367 case OP_MINUPTOI:
8368 case OP_EXACTI:
8369 case OP_POSSTARI:
8370 case OP_POSPLUSI:
8371 case OP_POSQUERYI:
8372 case OP_POSUPTOI:
8373 case OP_NOTSTAR:
8374 case OP_NOTMINSTAR:
8375 case OP_NOTPLUS:
8376 case OP_NOTMINPLUS:
8377 case OP_NOTQUERY:
8378 case OP_NOTMINQUERY:
8379 case OP_NOTUPTO:
8380 case OP_NOTMINUPTO:
8381 case OP_NOTEXACT:
8382 case OP_NOTPOSSTAR:
8383 case OP_NOTPOSPLUS:
8384 case OP_NOTPOSQUERY:
8385 case OP_NOTPOSUPTO:
8386 case OP_NOTSTARI:
8387 case OP_NOTMINSTARI:
8388 case OP_NOTPLUSI:
8389 case OP_NOTMINPLUSI:
8390 case OP_NOTQUERYI:
8391 case OP_NOTMINQUERYI:
8392 case OP_NOTUPTOI:
8393 case OP_NOTMINUPTOI:
8394 case OP_NOTEXACTI:
8395 case OP_NOTPOSSTARI:
8396 case OP_NOTPOSPLUSI:
8397 case OP_NOTPOSQUERYI:
8398 case OP_NOTPOSUPTOI:
8399 case OP_TYPESTAR:
8400 case OP_TYPEMINSTAR:
8401 case OP_TYPEPLUS:
8402 case OP_TYPEMINPLUS:
8403 case OP_TYPEQUERY:
8404 case OP_TYPEMINQUERY:
8405 case OP_TYPEUPTO:
8406 case OP_TYPEMINUPTO:
8407 case OP_TYPEEXACT:
8408 case OP_TYPEPOSSTAR:
8409 case OP_TYPEPOSPLUS:
8410 case OP_TYPEPOSQUERY:
8411 case OP_TYPEPOSUPTO:
8412 cc = compile_iterator_matchingpath(common, cc, parent);
8413 break;
8414
8415 case OP_CLASS:
8416 case OP_NCLASS:
8417 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
8418 cc = compile_iterator_matchingpath(common, cc, parent);
8419 else
8420 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8421 break;
8422
8423 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
8424 case OP_XCLASS:
8425 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
8426 cc = compile_iterator_matchingpath(common, cc, parent);
8427 else
8428 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8429 break;
8430 #endif
8431
8432 case OP_REF:
8433 case OP_REFI:
8434 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
8435 cc = compile_ref_iterator_matchingpath(common, cc, parent);
8436 else
8437 {
8438 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
8439 cc += 1 + IMM2_SIZE;
8440 }
8441 break;
8442
8443 case OP_DNREF:
8444 case OP_DNREFI:
8445 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
8446 cc = compile_ref_iterator_matchingpath(common, cc, parent);
8447 else
8448 {
8449 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8450 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
8451 cc += 1 + 2 * IMM2_SIZE;
8452 }
8453 break;
8454
8455 case OP_RECURSE:
8456 cc = compile_recurse_matchingpath(common, cc, parent);
8457 break;
8458
8459 case OP_CALLOUT:
8460 cc = compile_callout_matchingpath(common, cc, parent);
8461 break;
8462
8463 case OP_ASSERT:
8464 case OP_ASSERT_NOT:
8465 case OP_ASSERTBACK:
8466 case OP_ASSERTBACK_NOT:
8467 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
8468 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
8469 break;
8470
8471 case OP_BRAMINZERO:
8472 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
8473 cc = bracketend(cc + 1);
8474 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
8475 {
8476 allocate_stack(common, 1);
8477 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8478 }
8479 else
8480 {
8481 allocate_stack(common, 2);
8482 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
8484 }
8485 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
8486 count_match(common);
8487 break;
8488
8489 case OP_ONCE:
8490 case OP_ONCE_NC:
8491 case OP_BRA:
8492 case OP_CBRA:
8493 case OP_COND:
8494 case OP_SBRA:
8495 case OP_SCBRA:
8496 case OP_SCOND:
8497 cc = compile_bracket_matchingpath(common, cc, parent);
8498 break;
8499
8500 case OP_BRAZERO:
8501 if (cc[1] > OP_ASSERTBACK_NOT)
8502 cc = compile_bracket_matchingpath(common, cc, parent);
8503 else
8504 {
8505 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
8506 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
8507 }
8508 break;
8509
8510 case OP_BRAPOS:
8511 case OP_CBRAPOS:
8512 case OP_SBRAPOS:
8513 case OP_SCBRAPOS:
8514 case OP_BRAPOSZERO:
8515 cc = compile_bracketpos_matchingpath(common, cc, parent);
8516 break;
8517
8518 case OP_MARK:
8519 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
8520 SLJIT_ASSERT(common->mark_ptr != 0);
8521 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
8522 allocate_stack(common, common->has_skip_arg ? 5 : 1);
8523 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
8525 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
8526 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
8527 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
8528 if (common->has_skip_arg)
8529 {
8530 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8531 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
8532 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
8533 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
8534 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
8535 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8536 }
8537 cc += 1 + 2 + cc[1];
8538 break;
8539
8540 case OP_PRUNE:
8541 case OP_PRUNE_ARG:
8542 case OP_SKIP:
8543 case OP_SKIP_ARG:
8544 case OP_THEN:
8545 case OP_THEN_ARG:
8546 case OP_COMMIT:
8547 cc = compile_control_verb_matchingpath(common, cc, parent);
8548 break;
8549
8550 case OP_FAIL:
8551 case OP_ACCEPT:
8552 case OP_ASSERT_ACCEPT:
8553 cc = compile_fail_accept_matchingpath(common, cc, parent);
8554 break;
8555
8556 case OP_CLOSE:
8557 cc = compile_close_matchingpath(common, cc);
8558 break;
8559
8560 case OP_SKIPZERO:
8561 cc = bracketend(cc + 1);
8562 break;
8563
8564 default:
8565 SLJIT_ASSERT_STOP();
8566 return;
8567 }
8568 if (cc == NULL)
8569 return;
8570 }
8571
8572 if (has_then_trap)
8573 {
8574 /* Head item on backtrack. */
8575 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
8576 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
8577 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
8578 common->then_trap = save_then_trap;
8579 }
8580 SLJIT_ASSERT(cc == ccend);
8581 }
8582
8583 #undef PUSH_BACKTRACK
8584 #undef PUSH_BACKTRACK_NOVALUE
8585 #undef BACKTRACK_AS
8586
8587 #define COMPILE_BACKTRACKINGPATH(current) \
8588 do \
8589 { \
8590 compile_backtrackingpath(common, (current)); \
8591 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8592 return; \
8593 } \
8594 while (0)
8595
8596 #define CURRENT_AS(type) ((type *)current)
8597
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)8598 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8599 {
8600 DEFINE_COMPILER;
8601 pcre_uchar *cc = current->cc;
8602 pcre_uchar opcode;
8603 pcre_uchar type;
8604 int max = -1, min = -1;
8605 struct sljit_label *label = NULL;
8606 struct sljit_jump *jump = NULL;
8607 jump_list *jumplist = NULL;
8608 int private_data_ptr = PRIVATE_DATA(cc);
8609 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
8610 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
8611 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
8612
8613 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, NULL);
8614
8615 switch(opcode)
8616 {
8617 case OP_STAR:
8618 case OP_PLUS:
8619 case OP_UPTO:
8620 case OP_CRRANGE:
8621 if (type == OP_ANYNL || type == OP_EXTUNI)
8622 {
8623 SLJIT_ASSERT(private_data_ptr == 0);
8624 set_jumps(current->topbacktracks, LABEL());
8625 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8626 free_stack(common, 1);
8627 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8628 }
8629 else
8630 {
8631 if (opcode == OP_UPTO)
8632 min = 0;
8633 if (opcode <= OP_PLUS)
8634 {
8635 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8636 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
8637 }
8638 else
8639 {
8640 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
8641 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8642 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, min + 1);
8643 OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
8644 }
8645 skip_char_back(common);
8646 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8647 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8648 if (opcode == OP_CRRANGE)
8649 set_jumps(current->topbacktracks, LABEL());
8650 JUMPHERE(jump);
8651 if (private_data_ptr == 0)
8652 free_stack(common, 2);
8653 if (opcode == OP_PLUS)
8654 set_jumps(current->topbacktracks, LABEL());
8655 }
8656 break;
8657
8658 case OP_MINSTAR:
8659 case OP_MINPLUS:
8660 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8661 compile_char1_matchingpath(common, type, cc, &jumplist);
8662 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8663 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8664 set_jumps(jumplist, LABEL());
8665 if (private_data_ptr == 0)
8666 free_stack(common, 1);
8667 if (opcode == OP_MINPLUS)
8668 set_jumps(current->topbacktracks, LABEL());
8669 break;
8670
8671 case OP_MINUPTO:
8672 case OP_CRMINRANGE:
8673 if (opcode == OP_CRMINRANGE)
8674 {
8675 label = LABEL();
8676 set_jumps(current->topbacktracks, label);
8677 }
8678 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8679 compile_char1_matchingpath(common, type, cc, &jumplist);
8680
8681 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
8682 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8683 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8684 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
8685
8686 if (opcode == OP_CRMINRANGE)
8687 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min + 1, label);
8688
8689 if (opcode == OP_CRMINRANGE && max == 0)
8690 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8691 else
8692 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(iterator_backtrack)->matchingpath);
8693
8694 set_jumps(jumplist, LABEL());
8695 if (private_data_ptr == 0)
8696 free_stack(common, 2);
8697 break;
8698
8699 case OP_QUERY:
8700 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8701 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8702 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8703 jump = JUMP(SLJIT_JUMP);
8704 set_jumps(current->topbacktracks, LABEL());
8705 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8706 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8707 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8708 JUMPHERE(jump);
8709 if (private_data_ptr == 0)
8710 free_stack(common, 1);
8711 break;
8712
8713 case OP_MINQUERY:
8714 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8715 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8716 jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8717 compile_char1_matchingpath(common, type, cc, &jumplist);
8718 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8719 set_jumps(jumplist, LABEL());
8720 JUMPHERE(jump);
8721 if (private_data_ptr == 0)
8722 free_stack(common, 1);
8723 break;
8724
8725 case OP_EXACT:
8726 case OP_POSPLUS:
8727 case OP_CRPOSRANGE:
8728 set_jumps(current->topbacktracks, LABEL());
8729 break;
8730
8731 case OP_POSSTAR:
8732 case OP_POSQUERY:
8733 case OP_POSUPTO:
8734 break;
8735
8736 default:
8737 SLJIT_ASSERT_STOP();
8738 break;
8739 }
8740 }
8741
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)8742 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8743 {
8744 DEFINE_COMPILER;
8745 pcre_uchar *cc = current->cc;
8746 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
8747 pcre_uchar type;
8748
8749 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
8750
8751 if ((type & 0x1) == 0)
8752 {
8753 /* Maximize case. */
8754 set_jumps(current->topbacktracks, LABEL());
8755 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8756 free_stack(common, 1);
8757 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8758 return;
8759 }
8760
8761 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8762 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8763 set_jumps(current->topbacktracks, LABEL());
8764 free_stack(common, ref ? 2 : 3);
8765 }
8766
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)8767 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8768 {
8769 DEFINE_COMPILER;
8770
8771 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
8772 compile_backtrackingpath(common, current->top);
8773 set_jumps(current->topbacktracks, LABEL());
8774 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
8775 return;
8776
8777 if (common->has_set_som && common->mark_ptr != 0)
8778 {
8779 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8780 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8781 free_stack(common, 2);
8782 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0);
8783 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
8784 }
8785 else if (common->has_set_som || common->mark_ptr != 0)
8786 {
8787 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8788 free_stack(common, 1);
8789 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
8790 }
8791 }
8792
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)8793 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8794 {
8795 DEFINE_COMPILER;
8796 pcre_uchar *cc = current->cc;
8797 pcre_uchar bra = OP_BRA;
8798 struct sljit_jump *brajump = NULL;
8799
8800 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
8801 if (*cc == OP_BRAZERO)
8802 {
8803 bra = *cc;
8804 cc++;
8805 }
8806
8807 if (bra == OP_BRAZERO)
8808 {
8809 SLJIT_ASSERT(current->topbacktracks == NULL);
8810 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8811 }
8812
8813 if (CURRENT_AS(assert_backtrack)->framesize < 0)
8814 {
8815 set_jumps(current->topbacktracks, LABEL());
8816
8817 if (bra == OP_BRAZERO)
8818 {
8819 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8820 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
8821 free_stack(common, 1);
8822 }
8823 return;
8824 }
8825
8826 if (bra == OP_BRAZERO)
8827 {
8828 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
8829 {
8830 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8831 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
8832 free_stack(common, 1);
8833 return;
8834 }
8835 free_stack(common, 1);
8836 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8837 }
8838
8839 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
8840 {
8841 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
8842 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8843 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw));
8844
8845 set_jumps(current->topbacktracks, LABEL());
8846 }
8847 else
8848 set_jumps(current->topbacktracks, LABEL());
8849
8850 if (bra == OP_BRAZERO)
8851 {
8852 /* We know there is enough place on the stack. */
8853 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8854 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8855 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
8856 JUMPHERE(brajump);
8857 }
8858 }
8859
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)8860 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8861 {
8862 DEFINE_COMPILER;
8863 int opcode, stacksize, alt_count, alt_max;
8864 int offset = 0;
8865 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
8866 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
8867 pcre_uchar *cc = current->cc;
8868 pcre_uchar *ccbegin;
8869 pcre_uchar *ccprev;
8870 pcre_uchar bra = OP_BRA;
8871 pcre_uchar ket;
8872 assert_backtrack *assert;
8873 sljit_uw *next_update_addr = NULL;
8874 BOOL has_alternatives;
8875 BOOL needs_control_head = FALSE;
8876 struct sljit_jump *brazero = NULL;
8877 struct sljit_jump *alt1 = NULL;
8878 struct sljit_jump *alt2 = NULL;
8879 struct sljit_jump *once = NULL;
8880 struct sljit_jump *cond = NULL;
8881 struct sljit_label *rmin_label = NULL;
8882 struct sljit_label *exact_label = NULL;
8883
8884 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
8885 {
8886 bra = *cc;
8887 cc++;
8888 }
8889
8890 opcode = *cc;
8891 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
8892 ket = *ccbegin;
8893 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
8894 {
8895 repeat_ptr = PRIVATE_DATA(ccbegin);
8896 repeat_type = PRIVATE_DATA(ccbegin + 2);
8897 repeat_count = PRIVATE_DATA(ccbegin + 3);
8898 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
8899 if (repeat_type == OP_UPTO)
8900 ket = OP_KETRMAX;
8901 if (repeat_type == OP_MINUPTO)
8902 ket = OP_KETRMIN;
8903 }
8904 ccbegin = cc;
8905 cc += GET(cc, 1);
8906 has_alternatives = *cc == OP_ALT;
8907 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
8908 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
8909 if (opcode == OP_CBRA || opcode == OP_SCBRA)
8910 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
8911 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
8912 opcode = OP_SCOND;
8913 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
8914 opcode = OP_ONCE;
8915
8916 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
8917
8918 /* Decoding the needs_control_head in framesize. */
8919 if (opcode == OP_ONCE)
8920 {
8921 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
8922 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
8923 }
8924
8925 if (ket != OP_KET && repeat_type != 0)
8926 {
8927 /* TMP1 is used in OP_KETRMIN below. */
8928 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8929 free_stack(common, 1);
8930 if (repeat_type == OP_UPTO)
8931 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
8932 else
8933 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
8934 }
8935
8936 if (ket == OP_KETRMAX)
8937 {
8938 if (bra == OP_BRAZERO)
8939 {
8940 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8941 free_stack(common, 1);
8942 brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
8943 }
8944 }
8945 else if (ket == OP_KETRMIN)
8946 {
8947 if (bra != OP_BRAMINZERO)
8948 {
8949 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8950 if (repeat_type != 0)
8951 {
8952 /* TMP1 was set a few lines above. */
8953 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8954 /* Drop STR_PTR for non-greedy plus quantifier. */
8955 if (opcode != OP_ONCE)
8956 free_stack(common, 1);
8957 }
8958 else if (opcode >= OP_SBRA || opcode == OP_ONCE)
8959 {
8960 /* Checking zero-length iteration. */
8961 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
8962 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8963 else
8964 {
8965 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8966 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8967 }
8968 /* Drop STR_PTR for non-greedy plus quantifier. */
8969 if (opcode != OP_ONCE)
8970 free_stack(common, 1);
8971 }
8972 else
8973 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8974 }
8975 rmin_label = LABEL();
8976 if (repeat_type != 0)
8977 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8978 }
8979 else if (bra == OP_BRAZERO)
8980 {
8981 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8982 free_stack(common, 1);
8983 brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
8984 }
8985 else if (repeat_type == OP_EXACT)
8986 {
8987 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8988 exact_label = LABEL();
8989 }
8990
8991 if (offset != 0)
8992 {
8993 if (common->capture_last_ptr != 0)
8994 {
8995 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
8996 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8997 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8998 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
8999 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9000 free_stack(common, 3);
9001 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
9002 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9003 }
9004 else if (common->optimized_cbracket[offset >> 1] == 0)
9005 {
9006 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9007 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9008 free_stack(common, 2);
9009 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9010 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
9011 }
9012 }
9013
9014 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
9015 {
9016 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
9017 {
9018 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9019 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9020 }
9021 once = JUMP(SLJIT_JUMP);
9022 }
9023 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
9024 {
9025 if (has_alternatives)
9026 {
9027 /* Always exactly one alternative. */
9028 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9029 free_stack(common, 1);
9030
9031 alt_max = 2;
9032 alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
9033 }
9034 }
9035 else if (has_alternatives)
9036 {
9037 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9038 free_stack(common, 1);
9039
9040 if (alt_max > 4)
9041 {
9042 /* Table jump if alt_max is greater than 4. */
9043 next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
9044 if (SLJIT_UNLIKELY(next_update_addr == NULL))
9045 return;
9046 sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
9047 add_label_addr(common, next_update_addr++);
9048 }
9049 else
9050 {
9051 if (alt_max == 4)
9052 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
9053 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
9054 }
9055 }
9056
9057 COMPILE_BACKTRACKINGPATH(current->top);
9058 if (current->topbacktracks)
9059 set_jumps(current->topbacktracks, LABEL());
9060
9061 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
9062 {
9063 /* Conditional block always has at most one alternative. */
9064 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
9065 {
9066 SLJIT_ASSERT(has_alternatives);
9067 assert = CURRENT_AS(bracket_backtrack)->u.assert;
9068 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
9069 {
9070 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
9071 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9072 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
9073 }
9074 cond = JUMP(SLJIT_JUMP);
9075 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
9076 }
9077 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
9078 {
9079 SLJIT_ASSERT(has_alternatives);
9080 cond = JUMP(SLJIT_JUMP);
9081 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
9082 }
9083 else
9084 SLJIT_ASSERT(!has_alternatives);
9085 }
9086
9087 if (has_alternatives)
9088 {
9089 alt_count = sizeof(sljit_uw);
9090 do
9091 {
9092 current->top = NULL;
9093 current->topbacktracks = NULL;
9094 current->nextbacktracks = NULL;
9095 /* Conditional blocks always have an additional alternative, even if it is empty. */
9096 if (*cc == OP_ALT)
9097 {
9098 ccprev = cc + 1 + LINK_SIZE;
9099 cc += GET(cc, 1);
9100 if (opcode != OP_COND && opcode != OP_SCOND)
9101 {
9102 if (opcode != OP_ONCE)
9103 {
9104 if (private_data_ptr != 0)
9105 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9106 else
9107 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9108 }
9109 else
9110 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
9111 }
9112 compile_matchingpath(common, ccprev, cc, current);
9113 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9114 return;
9115 }
9116
9117 /* Instructions after the current alternative is successfully matched. */
9118 /* There is a similar code in compile_bracket_matchingpath. */
9119 if (opcode == OP_ONCE)
9120 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
9121
9122 stacksize = 0;
9123 if (repeat_type == OP_MINUPTO)
9124 {
9125 /* We need to preserve the counter. TMP2 will be used below. */
9126 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
9127 stacksize++;
9128 }
9129 if (ket != OP_KET || bra != OP_BRA)
9130 stacksize++;
9131 if (offset != 0)
9132 {
9133 if (common->capture_last_ptr != 0)
9134 stacksize++;
9135 if (common->optimized_cbracket[offset >> 1] == 0)
9136 stacksize += 2;
9137 }
9138 if (opcode != OP_ONCE)
9139 stacksize++;
9140
9141 if (stacksize > 0)
9142 allocate_stack(common, stacksize);
9143
9144 stacksize = 0;
9145 if (repeat_type == OP_MINUPTO)
9146 {
9147 /* TMP2 was set above. */
9148 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
9149 stacksize++;
9150 }
9151
9152 if (ket != OP_KET || bra != OP_BRA)
9153 {
9154 if (ket != OP_KET)
9155 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9156 else
9157 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9158 stacksize++;
9159 }
9160
9161 if (offset != 0)
9162 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
9163
9164 if (opcode != OP_ONCE)
9165 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
9166
9167 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
9168 {
9169 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
9170 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
9171 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9172 }
9173
9174 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
9175
9176 if (opcode != OP_ONCE)
9177 {
9178 if (alt_max > 4)
9179 add_label_addr(common, next_update_addr++);
9180 else
9181 {
9182 if (alt_count != 2 * sizeof(sljit_uw))
9183 {
9184 JUMPHERE(alt1);
9185 if (alt_max == 3 && alt_count == sizeof(sljit_uw))
9186 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
9187 }
9188 else
9189 {
9190 JUMPHERE(alt2);
9191 if (alt_max == 4)
9192 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
9193 }
9194 }
9195 alt_count += sizeof(sljit_uw);
9196 }
9197
9198 COMPILE_BACKTRACKINGPATH(current->top);
9199 if (current->topbacktracks)
9200 set_jumps(current->topbacktracks, LABEL());
9201 SLJIT_ASSERT(!current->nextbacktracks);
9202 }
9203 while (*cc == OP_ALT);
9204
9205 if (cond != NULL)
9206 {
9207 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
9208 assert = CURRENT_AS(bracket_backtrack)->u.assert;
9209 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
9210 {
9211 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
9212 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9213 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
9214 }
9215 JUMPHERE(cond);
9216 }
9217
9218 /* Free the STR_PTR. */
9219 if (private_data_ptr == 0)
9220 free_stack(common, 1);
9221 }
9222
9223 if (offset != 0)
9224 {
9225 /* Using both tmp register is better for instruction scheduling. */
9226 if (common->optimized_cbracket[offset >> 1] != 0)
9227 {
9228 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9229 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9230 free_stack(common, 2);
9231 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9232 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
9233 }
9234 else
9235 {
9236 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9237 free_stack(common, 1);
9238 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9239 }
9240 }
9241 else if (opcode == OP_SBRA || opcode == OP_SCOND)
9242 {
9243 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
9244 free_stack(common, 1);
9245 }
9246 else if (opcode == OP_ONCE)
9247 {
9248 cc = ccbegin + GET(ccbegin, 1);
9249 stacksize = needs_control_head ? 1 : 0;
9250
9251 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
9252 {
9253 /* Reset head and drop saved frame. */
9254 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
9255 }
9256 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
9257 {
9258 /* The STR_PTR must be released. */
9259 stacksize++;
9260 }
9261 free_stack(common, stacksize);
9262
9263 JUMPHERE(once);
9264 /* Restore previous private_data_ptr */
9265 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
9266 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw));
9267 else if (ket == OP_KETRMIN)
9268 {
9269 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9270 /* See the comment below. */
9271 free_stack(common, 2);
9272 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9273 }
9274 }
9275
9276 if (repeat_type == OP_EXACT)
9277 {
9278 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
9279 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
9280 CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
9281 }
9282 else if (ket == OP_KETRMAX)
9283 {
9284 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9285 if (bra != OP_BRAZERO)
9286 free_stack(common, 1);
9287
9288 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9289 if (bra == OP_BRAZERO)
9290 {
9291 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9292 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
9293 JUMPHERE(brazero);
9294 free_stack(common, 1);
9295 }
9296 }
9297 else if (ket == OP_KETRMIN)
9298 {
9299 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9300
9301 /* OP_ONCE removes everything in case of a backtrack, so we don't
9302 need to explicitly release the STR_PTR. The extra release would
9303 affect badly the free_stack(2) above. */
9304 if (opcode != OP_ONCE)
9305 free_stack(common, 1);
9306 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
9307 if (opcode == OP_ONCE)
9308 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
9309 else if (bra == OP_BRAMINZERO)
9310 free_stack(common, 1);
9311 }
9312 else if (bra == OP_BRAZERO)
9313 {
9314 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9315 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
9316 JUMPHERE(brazero);
9317 }
9318 }
9319
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)9320 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9321 {
9322 DEFINE_COMPILER;
9323 int offset;
9324 struct sljit_jump *jump;
9325
9326 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
9327 {
9328 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
9329 {
9330 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
9331 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9332 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9333 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9334 if (common->capture_last_ptr != 0)
9335 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9336 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
9337 if (common->capture_last_ptr != 0)
9338 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
9339 }
9340 set_jumps(current->topbacktracks, LABEL());
9341 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
9342 return;
9343 }
9344
9345 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
9346 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9347
9348 if (current->topbacktracks)
9349 {
9350 jump = JUMP(SLJIT_JUMP);
9351 set_jumps(current->topbacktracks, LABEL());
9352 /* Drop the stack frame. */
9353 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
9354 JUMPHERE(jump);
9355 }
9356 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw));
9357 }
9358
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)9359 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9360 {
9361 assert_backtrack backtrack;
9362
9363 current->top = NULL;
9364 current->topbacktracks = NULL;
9365 current->nextbacktracks = NULL;
9366 if (current->cc[1] > OP_ASSERTBACK_NOT)
9367 {
9368 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
9369 compile_bracket_matchingpath(common, current->cc, current);
9370 compile_bracket_backtrackingpath(common, current->top);
9371 }
9372 else
9373 {
9374 memset(&backtrack, 0, sizeof(backtrack));
9375 backtrack.common.cc = current->cc;
9376 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
9377 /* Manual call of compile_assert_matchingpath. */
9378 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
9379 }
9380 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
9381 }
9382
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)9383 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9384 {
9385 DEFINE_COMPILER;
9386 pcre_uchar opcode = *current->cc;
9387 struct sljit_label *loop;
9388 struct sljit_jump *jump;
9389
9390 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
9391 {
9392 if (common->then_trap != NULL)
9393 {
9394 SLJIT_ASSERT(common->control_head_ptr != 0);
9395
9396 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9397 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
9398 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
9399 jump = JUMP(SLJIT_JUMP);
9400
9401 loop = LABEL();
9402 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw));
9403 JUMPHERE(jump);
9404 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop);
9405 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop);
9406 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
9407 return;
9408 }
9409 else if (common->positive_assert)
9410 {
9411 add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
9412 return;
9413 }
9414 }
9415
9416 if (common->local_exit)
9417 {
9418 if (common->quit_label == NULL)
9419 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9420 else
9421 JUMPTO(SLJIT_JUMP, common->quit_label);
9422 return;
9423 }
9424
9425 if (opcode == OP_SKIP_ARG)
9426 {
9427 SLJIT_ASSERT(common->control_head_ptr != 0);
9428 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9429 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
9430 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
9431 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
9432 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9433
9434 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
9435 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1));
9436 return;
9437 }
9438
9439 if (opcode == OP_SKIP)
9440 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9441 else
9442 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
9443 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
9444 }
9445
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)9446 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9447 {
9448 DEFINE_COMPILER;
9449 struct sljit_jump *jump;
9450 int size;
9451
9452 if (CURRENT_AS(then_trap_backtrack)->then_trap)
9453 {
9454 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
9455 return;
9456 }
9457
9458 size = CURRENT_AS(then_trap_backtrack)->framesize;
9459 size = 3 + (size < 0 ? 0 : size);
9460
9461 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
9462 free_stack(common, size);
9463 jump = JUMP(SLJIT_JUMP);
9464
9465 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
9466 /* STACK_TOP is set by THEN. */
9467 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
9468 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9469 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9470 free_stack(common, 3);
9471
9472 JUMPHERE(jump);
9473 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
9474 }
9475
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)9476 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9477 {
9478 DEFINE_COMPILER;
9479 then_trap_backtrack *save_then_trap = common->then_trap;
9480
9481 while (current)
9482 {
9483 if (current->nextbacktracks != NULL)
9484 set_jumps(current->nextbacktracks, LABEL());
9485 switch(*current->cc)
9486 {
9487 case OP_SET_SOM:
9488 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9489 free_stack(common, 1);
9490 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
9491 break;
9492
9493 case OP_STAR:
9494 case OP_MINSTAR:
9495 case OP_PLUS:
9496 case OP_MINPLUS:
9497 case OP_QUERY:
9498 case OP_MINQUERY:
9499 case OP_UPTO:
9500 case OP_MINUPTO:
9501 case OP_EXACT:
9502 case OP_POSSTAR:
9503 case OP_POSPLUS:
9504 case OP_POSQUERY:
9505 case OP_POSUPTO:
9506 case OP_STARI:
9507 case OP_MINSTARI:
9508 case OP_PLUSI:
9509 case OP_MINPLUSI:
9510 case OP_QUERYI:
9511 case OP_MINQUERYI:
9512 case OP_UPTOI:
9513 case OP_MINUPTOI:
9514 case OP_EXACTI:
9515 case OP_POSSTARI:
9516 case OP_POSPLUSI:
9517 case OP_POSQUERYI:
9518 case OP_POSUPTOI:
9519 case OP_NOTSTAR:
9520 case OP_NOTMINSTAR:
9521 case OP_NOTPLUS:
9522 case OP_NOTMINPLUS:
9523 case OP_NOTQUERY:
9524 case OP_NOTMINQUERY:
9525 case OP_NOTUPTO:
9526 case OP_NOTMINUPTO:
9527 case OP_NOTEXACT:
9528 case OP_NOTPOSSTAR:
9529 case OP_NOTPOSPLUS:
9530 case OP_NOTPOSQUERY:
9531 case OP_NOTPOSUPTO:
9532 case OP_NOTSTARI:
9533 case OP_NOTMINSTARI:
9534 case OP_NOTPLUSI:
9535 case OP_NOTMINPLUSI:
9536 case OP_NOTQUERYI:
9537 case OP_NOTMINQUERYI:
9538 case OP_NOTUPTOI:
9539 case OP_NOTMINUPTOI:
9540 case OP_NOTEXACTI:
9541 case OP_NOTPOSSTARI:
9542 case OP_NOTPOSPLUSI:
9543 case OP_NOTPOSQUERYI:
9544 case OP_NOTPOSUPTOI:
9545 case OP_TYPESTAR:
9546 case OP_TYPEMINSTAR:
9547 case OP_TYPEPLUS:
9548 case OP_TYPEMINPLUS:
9549 case OP_TYPEQUERY:
9550 case OP_TYPEMINQUERY:
9551 case OP_TYPEUPTO:
9552 case OP_TYPEMINUPTO:
9553 case OP_TYPEEXACT:
9554 case OP_TYPEPOSSTAR:
9555 case OP_TYPEPOSPLUS:
9556 case OP_TYPEPOSQUERY:
9557 case OP_TYPEPOSUPTO:
9558 case OP_CLASS:
9559 case OP_NCLASS:
9560 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
9561 case OP_XCLASS:
9562 #endif
9563 compile_iterator_backtrackingpath(common, current);
9564 break;
9565
9566 case OP_REF:
9567 case OP_REFI:
9568 case OP_DNREF:
9569 case OP_DNREFI:
9570 compile_ref_iterator_backtrackingpath(common, current);
9571 break;
9572
9573 case OP_RECURSE:
9574 compile_recurse_backtrackingpath(common, current);
9575 break;
9576
9577 case OP_ASSERT:
9578 case OP_ASSERT_NOT:
9579 case OP_ASSERTBACK:
9580 case OP_ASSERTBACK_NOT:
9581 compile_assert_backtrackingpath(common, current);
9582 break;
9583
9584 case OP_ONCE:
9585 case OP_ONCE_NC:
9586 case OP_BRA:
9587 case OP_CBRA:
9588 case OP_COND:
9589 case OP_SBRA:
9590 case OP_SCBRA:
9591 case OP_SCOND:
9592 compile_bracket_backtrackingpath(common, current);
9593 break;
9594
9595 case OP_BRAZERO:
9596 if (current->cc[1] > OP_ASSERTBACK_NOT)
9597 compile_bracket_backtrackingpath(common, current);
9598 else
9599 compile_assert_backtrackingpath(common, current);
9600 break;
9601
9602 case OP_BRAPOS:
9603 case OP_CBRAPOS:
9604 case OP_SBRAPOS:
9605 case OP_SCBRAPOS:
9606 case OP_BRAPOSZERO:
9607 compile_bracketpos_backtrackingpath(common, current);
9608 break;
9609
9610 case OP_BRAMINZERO:
9611 compile_braminzero_backtrackingpath(common, current);
9612 break;
9613
9614 case OP_MARK:
9615 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
9616 if (common->has_skip_arg)
9617 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9618 free_stack(common, common->has_skip_arg ? 5 : 1);
9619 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
9620 if (common->has_skip_arg)
9621 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
9622 break;
9623
9624 case OP_THEN:
9625 case OP_THEN_ARG:
9626 case OP_PRUNE:
9627 case OP_PRUNE_ARG:
9628 case OP_SKIP:
9629 case OP_SKIP_ARG:
9630 compile_control_verb_backtrackingpath(common, current);
9631 break;
9632
9633 case OP_COMMIT:
9634 if (!common->local_exit)
9635 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
9636 if (common->quit_label == NULL)
9637 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9638 else
9639 JUMPTO(SLJIT_JUMP, common->quit_label);
9640 break;
9641
9642 case OP_CALLOUT:
9643 case OP_FAIL:
9644 case OP_ACCEPT:
9645 case OP_ASSERT_ACCEPT:
9646 set_jumps(current->topbacktracks, LABEL());
9647 break;
9648
9649 case OP_THEN_TRAP:
9650 /* A virtual opcode for then traps. */
9651 compile_then_trap_backtrackingpath(common, current);
9652 break;
9653
9654 default:
9655 SLJIT_ASSERT_STOP();
9656 break;
9657 }
9658 current = current->prev;
9659 }
9660 common->then_trap = save_then_trap;
9661 }
9662
compile_recurse(compiler_common * common)9663 static SLJIT_INLINE void compile_recurse(compiler_common *common)
9664 {
9665 DEFINE_COMPILER;
9666 pcre_uchar *cc = common->start + common->currententry->start;
9667 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
9668 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
9669 BOOL needs_control_head;
9670 int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
9671 int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
9672 int alternativesize;
9673 BOOL needs_frame;
9674 backtrack_common altbacktrack;
9675 struct sljit_jump *jump;
9676
9677 /* Recurse captures then. */
9678 common->then_trap = NULL;
9679
9680 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
9681 needs_frame = framesize >= 0;
9682 if (!needs_frame)
9683 framesize = 0;
9684 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
9685
9686 SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
9687 common->currententry->entry = LABEL();
9688 set_jumps(common->currententry->calls, common->currententry->entry);
9689
9690 sljit_emit_fast_enter(compiler, TMP2, 0);
9691 allocate_stack(common, private_data_size + framesize + alternativesize);
9692 count_match(common);
9693 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
9694 copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
9695 if (needs_control_head)
9696 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9697 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
9698 if (needs_frame)
9699 init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
9700
9701 if (alternativesize > 0)
9702 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9703
9704 memset(&altbacktrack, 0, sizeof(backtrack_common));
9705 common->quit_label = NULL;
9706 common->accept_label = NULL;
9707 common->quit = NULL;
9708 common->accept = NULL;
9709 altbacktrack.cc = ccbegin;
9710 cc += GET(cc, 1);
9711 while (1)
9712 {
9713 altbacktrack.top = NULL;
9714 altbacktrack.topbacktracks = NULL;
9715
9716 if (altbacktrack.cc != ccbegin)
9717 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9718
9719 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
9720 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9721 return;
9722
9723 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
9724
9725 compile_backtrackingpath(common, altbacktrack.top);
9726 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9727 return;
9728 set_jumps(altbacktrack.topbacktracks, LABEL());
9729
9730 if (*cc != OP_ALT)
9731 break;
9732
9733 altbacktrack.cc = cc + 1 + LINK_SIZE;
9734 cc += GET(cc, 1);
9735 }
9736
9737 /* None of them matched. */
9738 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
9739 jump = JUMP(SLJIT_JUMP);
9740
9741 if (common->quit != NULL)
9742 {
9743 set_jumps(common->quit, LABEL());
9744 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
9745 if (needs_frame)
9746 {
9747 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9748 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9749 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9750 }
9751 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
9752 common->quit = NULL;
9753 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9754 }
9755
9756 set_jumps(common->accept, LABEL());
9757 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
9758 if (needs_frame)
9759 {
9760 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9761 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9762 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9763 }
9764 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
9765
9766 JUMPHERE(jump);
9767 if (common->quit != NULL)
9768 set_jumps(common->quit, LABEL());
9769 copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
9770 free_stack(common, private_data_size + framesize + alternativesize);
9771 if (needs_control_head)
9772 {
9773 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw));
9774 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
9775 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0);
9776 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
9777 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
9778 }
9779 else
9780 {
9781 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
9782 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
9783 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0);
9784 }
9785 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
9786 }
9787
9788 #undef COMPILE_BACKTRACKINGPATH
9789 #undef CURRENT_AS
9790
9791 void
PRIV(jit_compile)9792 PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
9793 {
9794 struct sljit_compiler *compiler;
9795 backtrack_common rootbacktrack;
9796 compiler_common common_data;
9797 compiler_common *common = &common_data;
9798 const pcre_uint8 *tables = re->tables;
9799 pcre_study_data *study;
9800 int private_data_size;
9801 pcre_uchar *ccend;
9802 executable_functions *functions;
9803 void *executable_func;
9804 sljit_uw executable_size;
9805 sljit_uw total_length;
9806 label_addr_list *label_addr;
9807 struct sljit_label *mainloop_label = NULL;
9808 struct sljit_label *continue_match_label;
9809 struct sljit_label *empty_match_found_label = NULL;
9810 struct sljit_label *empty_match_backtrack_label = NULL;
9811 struct sljit_label *reset_match_label;
9812 struct sljit_label *quit_label;
9813 struct sljit_jump *jump;
9814 struct sljit_jump *minlength_check_failed = NULL;
9815 struct sljit_jump *reqbyte_notfound = NULL;
9816 struct sljit_jump *empty_match = NULL;
9817
9818 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
9819 study = extra->study_data;
9820
9821 if (!tables)
9822 tables = PRIV(default_tables);
9823
9824 memset(&rootbacktrack, 0, sizeof(backtrack_common));
9825 memset(common, 0, sizeof(compiler_common));
9826 rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
9827
9828 common->start = rootbacktrack.cc;
9829 common->read_only_data_head = NULL;
9830 common->fcc = tables + fcc_offset;
9831 common->lcc = (sljit_sw)(tables + lcc_offset);
9832 common->mode = mode;
9833 common->might_be_empty = study->minlength == 0;
9834 common->nltype = NLTYPE_FIXED;
9835 switch(re->options & PCRE_NEWLINE_BITS)
9836 {
9837 case 0:
9838 /* Compile-time default */
9839 switch(NEWLINE)
9840 {
9841 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
9842 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
9843 default: common->newline = NEWLINE; break;
9844 }
9845 break;
9846 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
9847 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
9848 case PCRE_NEWLINE_CR+
9849 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
9850 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
9851 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
9852 default: return;
9853 }
9854 common->nlmax = READ_CHAR_MAX;
9855 common->nlmin = 0;
9856 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
9857 common->bsr_nltype = NLTYPE_ANYCRLF;
9858 else if ((re->options & PCRE_BSR_UNICODE) != 0)
9859 common->bsr_nltype = NLTYPE_ANY;
9860 else
9861 {
9862 #ifdef BSR_ANYCRLF
9863 common->bsr_nltype = NLTYPE_ANYCRLF;
9864 #else
9865 common->bsr_nltype = NLTYPE_ANY;
9866 #endif
9867 }
9868 common->bsr_nlmax = READ_CHAR_MAX;
9869 common->bsr_nlmin = 0;
9870 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
9871 common->ctypes = (sljit_sw)(tables + ctypes_offset);
9872 common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
9873 common->name_count = re->name_count;
9874 common->name_entry_size = re->name_entry_size;
9875 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
9876 #ifdef SUPPORT_UTF
9877 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
9878 common->utf = (re->options & PCRE_UTF8) != 0;
9879 #ifdef SUPPORT_UCP
9880 common->use_ucp = (re->options & PCRE_UCP) != 0;
9881 #endif
9882 if (common->utf)
9883 {
9884 if (common->nltype == NLTYPE_ANY)
9885 common->nlmax = 0x2029;
9886 else if (common->nltype == NLTYPE_ANYCRLF)
9887 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9888 else
9889 {
9890 /* We only care about the first newline character. */
9891 common->nlmax = common->newline & 0xff;
9892 }
9893
9894 if (common->nltype == NLTYPE_FIXED)
9895 common->nlmin = common->newline & 0xff;
9896 else
9897 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
9898
9899 if (common->bsr_nltype == NLTYPE_ANY)
9900 common->bsr_nlmax = 0x2029;
9901 else
9902 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9903 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
9904 }
9905 #endif /* SUPPORT_UTF */
9906 ccend = bracketend(common->start);
9907
9908 /* Calculate the local space size on the stack. */
9909 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
9910 common->optimized_cbracket = (pcre_uint8 *)SLJIT_MALLOC(re->top_bracket + 1, compiler->allocator_data);
9911 if (!common->optimized_cbracket)
9912 return;
9913 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
9914 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
9915 #else
9916 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
9917 #endif
9918
9919 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
9920 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
9921 common->capture_last_ptr = common->ovector_start;
9922 common->ovector_start += sizeof(sljit_sw);
9923 #endif
9924 if (!check_opcode_types(common, common->start, ccend))
9925 {
9926 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
9927 return;
9928 }
9929
9930 /* Checking flags and updating ovector_start. */
9931 if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
9932 {
9933 common->req_char_ptr = common->ovector_start;
9934 common->ovector_start += sizeof(sljit_sw);
9935 }
9936 if (mode != JIT_COMPILE)
9937 {
9938 common->start_used_ptr = common->ovector_start;
9939 common->ovector_start += sizeof(sljit_sw);
9940 if (mode == JIT_PARTIAL_SOFT_COMPILE)
9941 {
9942 common->hit_start = common->ovector_start;
9943 common->ovector_start += 2 * sizeof(sljit_sw);
9944 }
9945 else
9946 {
9947 SLJIT_ASSERT(mode == JIT_PARTIAL_HARD_COMPILE);
9948 common->needs_start_ptr = TRUE;
9949 }
9950 }
9951 if ((re->options & PCRE_FIRSTLINE) != 0)
9952 {
9953 common->first_line_end = common->ovector_start;
9954 common->ovector_start += sizeof(sljit_sw);
9955 }
9956 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
9957 common->control_head_ptr = 1;
9958 #endif
9959 if (common->control_head_ptr != 0)
9960 {
9961 common->control_head_ptr = common->ovector_start;
9962 common->ovector_start += sizeof(sljit_sw);
9963 }
9964 if (common->needs_start_ptr && common->has_set_som)
9965 {
9966 /* Saving the real start pointer is necessary. */
9967 common->start_ptr = common->ovector_start;
9968 common->ovector_start += sizeof(sljit_sw);
9969 }
9970 else
9971 common->needs_start_ptr = FALSE;
9972
9973 /* Aligning ovector to even number of sljit words. */
9974 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
9975 common->ovector_start += sizeof(sljit_sw);
9976
9977 if (common->start_ptr == 0)
9978 common->start_ptr = OVECTOR(0);
9979
9980 /* Capturing brackets cannot be optimized if callouts are allowed. */
9981 if (common->capture_last_ptr != 0)
9982 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
9983
9984 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
9985 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9986
9987 total_length = ccend - common->start;
9988 common->private_data_ptrs = (sljit_si *)SLJIT_MALLOC(total_length * (sizeof(sljit_si) + (common->has_then ? 1 : 0)), compiler->allocator_data);
9989 if (!common->private_data_ptrs)
9990 {
9991 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
9992 return;
9993 }
9994 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_si));
9995
9996 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
9997 set_private_data_ptrs(common, &private_data_size, ccend);
9998 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
9999 {
10000 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10001 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10002 return;
10003 }
10004
10005 if (common->has_then)
10006 {
10007 common->then_offsets = (pcre_uint8 *)(common->private_data_ptrs + total_length);
10008 memset(common->then_offsets, 0, total_length);
10009 set_then_offsets(common, common->start, NULL);
10010 }
10011
10012 compiler = sljit_create_compiler(NULL);
10013 if (!compiler)
10014 {
10015 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10016 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10017 return;
10018 }
10019 common->compiler = compiler;
10020
10021 /* Main pcre_jit_exec entry. */
10022 sljit_emit_enter(compiler, 0, 1, 5, 5, 0, 0, private_data_size);
10023
10024 /* Register init. */
10025 reset_ovector(common, (re->top_bracket + 1) * 2);
10026 if (common->req_char_ptr != 0)
10027 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
10028
10029 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
10030 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
10031 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
10032 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
10033 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
10034 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
10035 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
10036 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
10037 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
10038 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
10039
10040 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
10042 if (common->mark_ptr != 0)
10043 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
10044 if (common->control_head_ptr != 0)
10045 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10046
10047 /* Main part of the matching */
10048 if ((re->options & PCRE_ANCHORED) == 0)
10049 {
10050 mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
10051 continue_match_label = LABEL();
10052 /* Forward search if possible. */
10053 if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
10054 {
10055 if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0))
10056 ;
10057 else if ((re->flags & PCRE_FIRSTSET) != 0)
10058 fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
10059 else if ((re->flags & PCRE_STARTLINE) != 0)
10060 fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
10061 else if (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
10062 fast_forward_start_bits(common, study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
10063 }
10064 }
10065 else
10066 continue_match_label = LABEL();
10067
10068 if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
10069 {
10070 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10071 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
10072 minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
10073 }
10074 if (common->req_char_ptr != 0)
10075 reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
10076
10077 /* Store the current STR_PTR in OVECTOR(0). */
10078 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
10079 /* Copy the limit of allowed recursions. */
10080 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
10081 if (common->capture_last_ptr != 0)
10082 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1);
10083
10084 if (common->needs_start_ptr)
10085 {
10086 SLJIT_ASSERT(common->start_ptr != OVECTOR(0));
10087 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
10088 }
10089 else
10090 SLJIT_ASSERT(common->start_ptr == OVECTOR(0));
10091
10092 /* Copy the beginning of the string. */
10093 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10094 {
10095 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
10096 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
10097 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
10098 JUMPHERE(jump);
10099 }
10100 else if (mode == JIT_PARTIAL_HARD_COMPILE)
10101 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
10102
10103 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
10104 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10105 {
10106 sljit_free_compiler(compiler);
10107 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10108 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10109 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10110 return;
10111 }
10112
10113 if (common->might_be_empty)
10114 {
10115 empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
10116 empty_match_found_label = LABEL();
10117 }
10118
10119 common->accept_label = LABEL();
10120 if (common->accept != NULL)
10121 set_jumps(common->accept, common->accept_label);
10122
10123 /* This means we have a match. Update the ovector. */
10124 copy_ovector(common, re->top_bracket + 1);
10125 common->quit_label = common->forced_quit_label = LABEL();
10126 if (common->quit != NULL)
10127 set_jumps(common->quit, common->quit_label);
10128 if (common->forced_quit != NULL)
10129 set_jumps(common->forced_quit, common->forced_quit_label);
10130 if (minlength_check_failed != NULL)
10131 SET_LABEL(minlength_check_failed, common->forced_quit_label);
10132 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
10133
10134 if (mode != JIT_COMPILE)
10135 {
10136 common->partialmatchlabel = LABEL();
10137 set_jumps(common->partialmatch, common->partialmatchlabel);
10138 return_with_partial_match(common, common->quit_label);
10139 }
10140
10141 if (common->might_be_empty)
10142 empty_match_backtrack_label = LABEL();
10143 compile_backtrackingpath(common, rootbacktrack.top);
10144 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10145 {
10146 sljit_free_compiler(compiler);
10147 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10148 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10149 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10150 return;
10151 }
10152
10153 SLJIT_ASSERT(rootbacktrack.prev == NULL);
10154 reset_match_label = LABEL();
10155
10156 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10157 {
10158 /* Update hit_start only in the first time. */
10159 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
10160 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr);
10161 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
10162 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
10163 JUMPHERE(jump);
10164 }
10165
10166 /* Check we have remaining characters. */
10167 if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
10168 {
10169 SLJIT_ASSERT(common->first_line_end != 0);
10170 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
10171 }
10172
10173 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
10174
10175 if ((re->options & PCRE_ANCHORED) == 0)
10176 {
10177 if (common->ff_newline_shortcut != NULL)
10178 {
10179 if ((re->options & PCRE_FIRSTLINE) == 0)
10180 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
10181 /* There cannot be more newlines here. */
10182 }
10183 else
10184 {
10185 if ((re->options & PCRE_FIRSTLINE) == 0)
10186 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
10187 else
10188 CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
10189 }
10190 }
10191
10192 /* No more remaining characters. */
10193 if (reqbyte_notfound != NULL)
10194 JUMPHERE(reqbyte_notfound);
10195
10196 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10197 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
10198
10199 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10200 JUMPTO(SLJIT_JUMP, common->quit_label);
10201
10202 flush_stubs(common);
10203
10204 if (common->might_be_empty)
10205 {
10206 JUMPHERE(empty_match);
10207 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10208 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
10209 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
10210 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
10211 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
10212 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
10213 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
10214 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
10215 }
10216
10217 common->currententry = common->entries;
10218 common->local_exit = TRUE;
10219 quit_label = common->quit_label;
10220 while (common->currententry != NULL)
10221 {
10222 /* Might add new entries. */
10223 compile_recurse(common);
10224 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10225 {
10226 sljit_free_compiler(compiler);
10227 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10228 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10229 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10230 return;
10231 }
10232 flush_stubs(common);
10233 common->currententry = common->currententry->next;
10234 }
10235 common->local_exit = FALSE;
10236 common->quit_label = quit_label;
10237
10238 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
10239 /* This is a (really) rare case. */
10240 set_jumps(common->stackalloc, LABEL());
10241 /* RETURN_ADDR is not a saved register. */
10242 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10243 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
10244 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10245 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
10246 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
10247 OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
10248
10249 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
10250 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
10251 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10252 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
10253 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
10254 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
10255 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
10256 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10257
10258 /* Allocation failed. */
10259 JUMPHERE(jump);
10260 /* We break the return address cache here, but this is a really rare case. */
10261 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
10262 JUMPTO(SLJIT_JUMP, common->quit_label);
10263
10264 /* Call limit reached. */
10265 set_jumps(common->calllimit, LABEL());
10266 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
10267 JUMPTO(SLJIT_JUMP, common->quit_label);
10268
10269 if (common->revertframes != NULL)
10270 {
10271 set_jumps(common->revertframes, LABEL());
10272 do_revertframes(common);
10273 }
10274 if (common->wordboundary != NULL)
10275 {
10276 set_jumps(common->wordboundary, LABEL());
10277 check_wordboundary(common);
10278 }
10279 if (common->anynewline != NULL)
10280 {
10281 set_jumps(common->anynewline, LABEL());
10282 check_anynewline(common);
10283 }
10284 if (common->hspace != NULL)
10285 {
10286 set_jumps(common->hspace, LABEL());
10287 check_hspace(common);
10288 }
10289 if (common->vspace != NULL)
10290 {
10291 set_jumps(common->vspace, LABEL());
10292 check_vspace(common);
10293 }
10294 if (common->casefulcmp != NULL)
10295 {
10296 set_jumps(common->casefulcmp, LABEL());
10297 do_casefulcmp(common);
10298 }
10299 if (common->caselesscmp != NULL)
10300 {
10301 set_jumps(common->caselesscmp, LABEL());
10302 do_caselesscmp(common);
10303 }
10304 if (common->reset_match != NULL)
10305 {
10306 set_jumps(common->reset_match, LABEL());
10307 do_reset_match(common, (re->top_bracket + 1) * 2);
10308 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
10309 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
10310 JUMPTO(SLJIT_JUMP, reset_match_label);
10311 }
10312 #ifdef SUPPORT_UTF
10313 #ifdef COMPILE_PCRE8
10314 if (common->utfreadchar != NULL)
10315 {
10316 set_jumps(common->utfreadchar, LABEL());
10317 do_utfreadchar(common);
10318 }
10319 if (common->utfreadchar16 != NULL)
10320 {
10321 set_jumps(common->utfreadchar16, LABEL());
10322 do_utfreadchar16(common);
10323 }
10324 if (common->utfreadtype8 != NULL)
10325 {
10326 set_jumps(common->utfreadtype8, LABEL());
10327 do_utfreadtype8(common);
10328 }
10329 #endif /* COMPILE_PCRE8 */
10330 #endif /* SUPPORT_UTF */
10331 #ifdef SUPPORT_UCP
10332 if (common->getucd != NULL)
10333 {
10334 set_jumps(common->getucd, LABEL());
10335 do_getucd(common);
10336 }
10337 #endif
10338
10339 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10340 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10341
10342 executable_func = sljit_generate_code(compiler);
10343 executable_size = sljit_get_generated_code_size(compiler);
10344 label_addr = common->label_addrs;
10345 while (label_addr != NULL)
10346 {
10347 *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
10348 label_addr = label_addr->next;
10349 }
10350 sljit_free_compiler(compiler);
10351 if (executable_func == NULL)
10352 {
10353 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10354 return;
10355 }
10356
10357 /* Reuse the function descriptor if possible. */
10358 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
10359 functions = (executable_functions *)extra->executable_jit;
10360 else
10361 {
10362 /* Note: If your memory-checker has flagged the allocation below as a
10363 * memory leak, it is probably because you either forgot to call
10364 * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or
10365 * pcre16_extra) object, or you called said function after having
10366 * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field
10367 * of the object. (The function will only free the JIT data if the
10368 * bit remains set, as the bit indicates that the pointer to the data
10369 * is valid.)
10370 */
10371 functions = SLJIT_MALLOC(sizeof(executable_functions), compiler->allocator_data);
10372 if (functions == NULL)
10373 {
10374 /* This case is highly unlikely since we just recently
10375 freed a lot of memory. Not impossible though. */
10376 sljit_free_code(executable_func);
10377 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10378 return;
10379 }
10380 memset(functions, 0, sizeof(executable_functions));
10381 functions->top_bracket = (re->top_bracket + 1) * 2;
10382 functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0;
10383 extra->executable_jit = functions;
10384 extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
10385 }
10386
10387 functions->executable_funcs[mode] = executable_func;
10388 functions->read_only_data_heads[mode] = common->read_only_data_head;
10389 functions->executable_sizes[mode] = executable_size;
10390 }
10391
jit_machine_stack_exec(jit_arguments * arguments,void * executable_func)10392 static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, void *executable_func)
10393 {
10394 union {
10395 void *executable_func;
10396 jit_function call_executable_func;
10397 } convert_executable_func;
10398 pcre_uint8 local_space[MACHINE_STACK_SIZE];
10399 struct sljit_stack local_stack;
10400
10401 local_stack.top = (sljit_sw)&local_space;
10402 local_stack.base = local_stack.top;
10403 local_stack.limit = local_stack.base + MACHINE_STACK_SIZE;
10404 local_stack.max_limit = local_stack.limit;
10405 arguments->stack = &local_stack;
10406 convert_executable_func.executable_func = executable_func;
10407 return convert_executable_func.call_executable_func(arguments);
10408 }
10409
10410 int
PRIV(jit_exec)10411 PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
10412 int length, int start_offset, int options, int *offsets, int offset_count)
10413 {
10414 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
10415 union {
10416 void *executable_func;
10417 jit_function call_executable_func;
10418 } convert_executable_func;
10419 jit_arguments arguments;
10420 int max_offset_count;
10421 int retval;
10422 int mode = JIT_COMPILE;
10423
10424 if ((options & PCRE_PARTIAL_HARD) != 0)
10425 mode = JIT_PARTIAL_HARD_COMPILE;
10426 else if ((options & PCRE_PARTIAL_SOFT) != 0)
10427 mode = JIT_PARTIAL_SOFT_COMPILE;
10428
10429 if (functions->executable_funcs[mode] == NULL)
10430 return PCRE_ERROR_JIT_BADOPTION;
10431
10432 /* Sanity checks should be handled by pcre_exec. */
10433 arguments.str = subject + start_offset;
10434 arguments.begin = subject;
10435 arguments.end = subject + length;
10436 arguments.mark_ptr = NULL;
10437 /* JIT decreases this value less frequently than the interpreter. */
10438 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
10439 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
10440 arguments.limit_match = functions->limit_match;
10441 arguments.notbol = (options & PCRE_NOTBOL) != 0;
10442 arguments.noteol = (options & PCRE_NOTEOL) != 0;
10443 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
10444 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
10445 arguments.offsets = offsets;
10446 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
10447 arguments.real_offset_count = offset_count;
10448
10449 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
10450 the output vector for storing captured strings, with the remainder used as
10451 workspace. We don't need the workspace here. For compatibility, we limit the
10452 number of captured strings in the same way as pcre_exec(), so that the user
10453 gets the same result with and without JIT. */
10454
10455 if (offset_count != 2)
10456 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
10457 max_offset_count = functions->top_bracket;
10458 if (offset_count > max_offset_count)
10459 offset_count = max_offset_count;
10460 arguments.offset_count = offset_count;
10461
10462 if (functions->callback)
10463 arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
10464 else
10465 arguments.stack = (struct sljit_stack *)functions->userdata;
10466
10467 if (arguments.stack == NULL)
10468 retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
10469 else
10470 {
10471 convert_executable_func.executable_func = functions->executable_funcs[mode];
10472 retval = convert_executable_func.call_executable_func(&arguments);
10473 }
10474
10475 if (retval * 2 > offset_count)
10476 retval = 0;
10477 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
10478 *(extra_data->mark) = arguments.mark_ptr;
10479
10480 return retval;
10481 }
10482
10483 #if defined COMPILE_PCRE8
10484 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_jit_exec(const pcre * argument_re,const pcre_extra * extra_data,PCRE_SPTR subject,int length,int start_offset,int options,int * offsets,int offset_count,pcre_jit_stack * stack)10485 pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
10486 PCRE_SPTR subject, int length, int start_offset, int options,
10487 int *offsets, int offset_count, pcre_jit_stack *stack)
10488 #elif defined COMPILE_PCRE16
10489 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
10490 pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
10491 PCRE_SPTR16 subject, int length, int start_offset, int options,
10492 int *offsets, int offset_count, pcre16_jit_stack *stack)
10493 #elif defined COMPILE_PCRE32
10494 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
10495 pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
10496 PCRE_SPTR32 subject, int length, int start_offset, int options,
10497 int *offsets, int offset_count, pcre32_jit_stack *stack)
10498 #endif
10499 {
10500 pcre_uchar *subject_ptr = (pcre_uchar *)subject;
10501 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
10502 union {
10503 void *executable_func;
10504 jit_function call_executable_func;
10505 } convert_executable_func;
10506 jit_arguments arguments;
10507 int max_offset_count;
10508 int retval;
10509 int mode = JIT_COMPILE;
10510
10511 SLJIT_UNUSED_ARG(argument_re);
10512
10513 /* Plausibility checks */
10514 if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION;
10515
10516 if ((options & PCRE_PARTIAL_HARD) != 0)
10517 mode = JIT_PARTIAL_HARD_COMPILE;
10518 else if ((options & PCRE_PARTIAL_SOFT) != 0)
10519 mode = JIT_PARTIAL_SOFT_COMPILE;
10520
10521 if (functions->executable_funcs[mode] == NULL)
10522 return PCRE_ERROR_JIT_BADOPTION;
10523
10524 /* Sanity checks should be handled by pcre_exec. */
10525 arguments.stack = (struct sljit_stack *)stack;
10526 arguments.str = subject_ptr + start_offset;
10527 arguments.begin = subject_ptr;
10528 arguments.end = subject_ptr + length;
10529 arguments.mark_ptr = NULL;
10530 /* JIT decreases this value less frequently than the interpreter. */
10531 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
10532 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
10533 arguments.limit_match = functions->limit_match;
10534 arguments.notbol = (options & PCRE_NOTBOL) != 0;
10535 arguments.noteol = (options & PCRE_NOTEOL) != 0;
10536 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
10537 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
10538 arguments.offsets = offsets;
10539 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
10540 arguments.real_offset_count = offset_count;
10541
10542 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
10543 the output vector for storing captured strings, with the remainder used as
10544 workspace. We don't need the workspace here. For compatibility, we limit the
10545 number of captured strings in the same way as pcre_exec(), so that the user
10546 gets the same result with and without JIT. */
10547
10548 if (offset_count != 2)
10549 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
10550 max_offset_count = functions->top_bracket;
10551 if (offset_count > max_offset_count)
10552 offset_count = max_offset_count;
10553 arguments.offset_count = offset_count;
10554
10555 convert_executable_func.executable_func = functions->executable_funcs[mode];
10556 retval = convert_executable_func.call_executable_func(&arguments);
10557
10558 if (retval * 2 > offset_count)
10559 retval = 0;
10560 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
10561 *(extra_data->mark) = arguments.mark_ptr;
10562
10563 return retval;
10564 }
10565
10566 void
PRIV(jit_free)10567 PRIV(jit_free)(void *executable_funcs)
10568 {
10569 int i;
10570 executable_functions *functions = (executable_functions *)executable_funcs;
10571 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
10572 {
10573 if (functions->executable_funcs[i] != NULL)
10574 sljit_free_code(functions->executable_funcs[i]);
10575 free_read_only_data(functions->read_only_data_heads[i], NULL);
10576 }
10577 SLJIT_FREE(functions, compiler->allocator_data);
10578 }
10579
10580 int
PRIV(jit_get_size)10581 PRIV(jit_get_size)(void *executable_funcs)
10582 {
10583 int i;
10584 sljit_uw size = 0;
10585 sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
10586 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
10587 size += executable_sizes[i];
10588 return (int)size;
10589 }
10590
10591 const char*
PRIV(jit_get_target)10592 PRIV(jit_get_target)(void)
10593 {
10594 return sljit_get_platform_name();
10595 }
10596
10597 #if defined COMPILE_PCRE8
10598 PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize,int maxsize)10599 pcre_jit_stack_alloc(int startsize, int maxsize)
10600 #elif defined COMPILE_PCRE16
10601 PCRE_EXP_DECL pcre16_jit_stack *
10602 pcre16_jit_stack_alloc(int startsize, int maxsize)
10603 #elif defined COMPILE_PCRE32
10604 PCRE_EXP_DECL pcre32_jit_stack *
10605 pcre32_jit_stack_alloc(int startsize, int maxsize)
10606 #endif
10607 {
10608 if (startsize < 1 || maxsize < 1)
10609 return NULL;
10610 if (startsize > maxsize)
10611 startsize = maxsize;
10612 startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
10613 maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
10614 return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize, NULL);
10615 }
10616
10617 #if defined COMPILE_PCRE8
10618 PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack * stack)10619 pcre_jit_stack_free(pcre_jit_stack *stack)
10620 #elif defined COMPILE_PCRE16
10621 PCRE_EXP_DECL void
10622 pcre16_jit_stack_free(pcre16_jit_stack *stack)
10623 #elif defined COMPILE_PCRE32
10624 PCRE_EXP_DECL void
10625 pcre32_jit_stack_free(pcre32_jit_stack *stack)
10626 #endif
10627 {
10628 sljit_free_stack((struct sljit_stack *)stack, NULL);
10629 }
10630
10631 #if defined COMPILE_PCRE8
10632 PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra * extra,pcre_jit_callback callback,void * userdata)10633 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
10634 #elif defined COMPILE_PCRE16
10635 PCRE_EXP_DECL void
10636 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
10637 #elif defined COMPILE_PCRE32
10638 PCRE_EXP_DECL void
10639 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
10640 #endif
10641 {
10642 executable_functions *functions;
10643 if (extra != NULL &&
10644 (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
10645 extra->executable_jit != NULL)
10646 {
10647 functions = (executable_functions *)extra->executable_jit;
10648 functions->callback = callback;
10649 functions->userdata = userdata;
10650 }
10651 }
10652
10653 #if defined COMPILE_PCRE8
10654 PCRE_EXP_DECL void
pcre_jit_free_unused_memory(void)10655 pcre_jit_free_unused_memory(void)
10656 #elif defined COMPILE_PCRE16
10657 PCRE_EXP_DECL void
10658 pcre16_jit_free_unused_memory(void)
10659 #elif defined COMPILE_PCRE32
10660 PCRE_EXP_DECL void
10661 pcre32_jit_free_unused_memory(void)
10662 #endif
10663 {
10664 sljit_free_unused_memory_exec();
10665 }
10666
10667 #else /* SUPPORT_JIT */
10668
10669 /* These are dummy functions to avoid linking errors when JIT support is not
10670 being compiled. */
10671
10672 #if defined COMPILE_PCRE8
10673 PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize,int maxsize)10674 pcre_jit_stack_alloc(int startsize, int maxsize)
10675 #elif defined COMPILE_PCRE16
10676 PCRE_EXP_DECL pcre16_jit_stack *
10677 pcre16_jit_stack_alloc(int startsize, int maxsize)
10678 #elif defined COMPILE_PCRE32
10679 PCRE_EXP_DECL pcre32_jit_stack *
10680 pcre32_jit_stack_alloc(int startsize, int maxsize)
10681 #endif
10682 {
10683 (void)startsize;
10684 (void)maxsize;
10685 return NULL;
10686 }
10687
10688 #if defined COMPILE_PCRE8
10689 PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack * stack)10690 pcre_jit_stack_free(pcre_jit_stack *stack)
10691 #elif defined COMPILE_PCRE16
10692 PCRE_EXP_DECL void
10693 pcre16_jit_stack_free(pcre16_jit_stack *stack)
10694 #elif defined COMPILE_PCRE32
10695 PCRE_EXP_DECL void
10696 pcre32_jit_stack_free(pcre32_jit_stack *stack)
10697 #endif
10698 {
10699 (void)stack;
10700 }
10701
10702 #if defined COMPILE_PCRE8
10703 PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra * extra,pcre_jit_callback callback,void * userdata)10704 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
10705 #elif defined COMPILE_PCRE16
10706 PCRE_EXP_DECL void
10707 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
10708 #elif defined COMPILE_PCRE32
10709 PCRE_EXP_DECL void
10710 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
10711 #endif
10712 {
10713 (void)extra;
10714 (void)callback;
10715 (void)userdata;
10716 }
10717
10718 #if defined COMPILE_PCRE8
10719 PCRE_EXP_DECL void
pcre_jit_free_unused_memory(void)10720 pcre_jit_free_unused_memory(void)
10721 #elif defined COMPILE_PCRE16
10722 PCRE_EXP_DECL void
10723 pcre16_jit_free_unused_memory(void)
10724 #elif defined COMPILE_PCRE32
10725 PCRE_EXP_DECL void
10726 pcre32_jit_free_unused_memory(void)
10727 #endif
10728 {
10729 }
10730
10731 #endif
10732
10733 /* End of pcre_jit_compile.c */
10734