1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 This module by Zoltan Herczeg
10 Original API code Copyright (c) 1997-2012 University of Cambridge
11 New API code Copyright (c) 2016-2021 University of Cambridge
12
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
23
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
27
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45
46 #if defined(__has_feature)
47 #if __has_feature(memory_sanitizer)
48 #include <sanitizer/msan_interface.h>
49 #endif /* __has_feature(memory_sanitizer) */
50 #endif /* defined(__has_feature) */
51
52 #include "pcre2_internal.h"
53
54 #ifdef SUPPORT_JIT
55
56 /* All-in-one: Since we use the JIT compiler only from here,
57 we just include it. This way we don't need to touch the build
58 system files. */
59
60 #define SLJIT_CONFIG_AUTO 1
61 #define SLJIT_CONFIG_STATIC 1
62 #define SLJIT_VERBOSE 0
63
64 #ifdef PCRE2_DEBUG
65 #define SLJIT_DEBUG 1
66 #else
67 #define SLJIT_DEBUG 0
68 #endif
69
70 #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
71 #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
72
pcre2_jit_malloc(size_t size,void * allocator_data)73 static void * pcre2_jit_malloc(size_t size, void *allocator_data)
74 {
75 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76 return allocator->malloc(size, allocator->memory_data);
77 }
78
pcre2_jit_free(void * ptr,void * allocator_data)79 static void pcre2_jit_free(void *ptr, void *allocator_data)
80 {
81 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
82 allocator->free(ptr, allocator->memory_data);
83 }
84
85 #include "sljit/sljitLir.c"
86
87 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
88 #error Unsupported architecture
89 #endif
90
91 /* Defines for debugging purposes. */
92
93 /* 1 - Use unoptimized capturing brackets.
94 2 - Enable capture_last_ptr (includes option 1). */
95 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
96
97 /* 1 - Always have a control head. */
98 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
99
100 /* Allocate memory for the regex stack on the real machine stack.
101 Fast, but limited size. */
102 #define MACHINE_STACK_SIZE 32768
103
104 /* Growth rate for stack allocated by the OS. Should be the multiply
105 of page size. */
106 #define STACK_GROWTH_RATE 8192
107
108 /* Enable to check that the allocation could destroy temporaries. */
109 #if defined SLJIT_DEBUG && SLJIT_DEBUG
110 #define DESTROY_REGISTERS 1
111 #endif
112
113 /*
114 Short summary about the backtracking mechanism empolyed by the jit code generator:
115
116 The code generator follows the recursive nature of the PERL compatible regular
117 expressions. The basic blocks of regular expressions are condition checkers
118 whose execute different commands depending on the result of the condition check.
119 The relationship between the operators can be horizontal (concatenation) and
120 vertical (sub-expression) (See struct backtrack_common for more details).
121
122 'ab' - 'a' and 'b' regexps are concatenated
123 'a+' - 'a' is the sub-expression of the '+' operator
124
125 The condition checkers are boolean (true/false) checkers. Machine code is generated
126 for the checker itself and for the actions depending on the result of the checker.
127 The 'true' case is called as the matching path (expected path), and the other is called as
128 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
129 branches on the matching path.
130
131 Greedy star operator (*) :
132 Matching path: match happens.
133 Backtrack path: match failed.
134 Non-greedy star operator (*?) :
135 Matching path: no need to perform a match.
136 Backtrack path: match is required.
137
138 The following example shows how the code generated for a capturing bracket
139 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
140 we have the following regular expression:
141
142 A(B|C)D
143
144 The generated code will be the following:
145
146 A matching path
147 '(' matching path (pushing arguments to the stack)
148 B matching path
149 ')' matching path (pushing arguments to the stack)
150 D matching path
151 return with successful match
152
153 D backtrack path
154 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
155 B backtrack path
156 C expected path
157 jump to D matching path
158 C backtrack path
159 A backtrack path
160
161 Notice, that the order of backtrack code paths are the opposite of the fast
162 code paths. In this way the topmost value on the stack is always belong
163 to the current backtrack code path. The backtrack path must check
164 whether there is a next alternative. If so, it needs to jump back to
165 the matching path eventually. Otherwise it needs to clear out its own stack
166 frame and continue the execution on the backtrack code paths.
167 */
168
169 /*
170 Saved stack frames:
171
172 Atomic blocks and asserts require reloading the values of private data
173 when the backtrack mechanism performed. Because of OP_RECURSE, the data
174 are not necessarly known in compile time, thus we need a dynamic restore
175 mechanism.
176
177 The stack frames are stored in a chain list, and have the following format:
178 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
179
180 Thus we can restore the private data to a particular point in the stack.
181 */
182
183 typedef struct jit_arguments {
184 /* Pointers first. */
185 struct sljit_stack *stack;
186 PCRE2_SPTR str;
187 PCRE2_SPTR begin;
188 PCRE2_SPTR end;
189 pcre2_match_data *match_data;
190 PCRE2_SPTR startchar_ptr;
191 PCRE2_UCHAR *mark_ptr;
192 int (*callout)(pcre2_callout_block *, void *);
193 void *callout_data;
194 /* Everything else after. */
195 sljit_uw offset_limit;
196 sljit_u32 limit_match;
197 sljit_u32 oveccount;
198 sljit_u32 options;
199 } jit_arguments;
200
201 #define JIT_NUMBER_OF_COMPILE_MODES 3
202
203 typedef struct executable_functions {
204 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
205 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
206 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
207 sljit_u32 top_bracket;
208 sljit_u32 limit_match;
209 } executable_functions;
210
211 typedef struct jump_list {
212 struct sljit_jump *jump;
213 struct jump_list *next;
214 } jump_list;
215
216 typedef struct stub_list {
217 struct sljit_jump *start;
218 struct sljit_label *quit;
219 struct stub_list *next;
220 } stub_list;
221
222 enum frame_types {
223 no_frame = -1,
224 no_stack = -2
225 };
226
227 enum control_types {
228 type_mark = 0,
229 type_then_trap = 1
230 };
231
232 enum early_fail_types {
233 type_skip = 0,
234 type_fail = 1,
235 type_fail_range = 2
236 };
237
238 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
239
240 /* The following structure is the key data type for the recursive
241 code generator. It is allocated by compile_matchingpath, and contains
242 the arguments for compile_backtrackingpath. Must be the first member
243 of its descendants. */
244 typedef struct backtrack_common {
245 /* Backtracking path of an opcode, which falls back
246 to our opcode, if it cannot resume matching. */
247 struct backtrack_common *prev;
248 /* Backtracks for opcodes without backtracking path.
249 These opcodes are between 'prev' and the current
250 opcode, and they never resume the match. */
251 jump_list *simple_backtracks;
252 /* Internal backtracking list for block constructs
253 which contains other opcodes, such as brackets,
254 asserts, conditionals, etc. */
255 struct backtrack_common *top;
256 /* Backtracks used internally by the opcode. For component
257 opcodes, this list is also used by those opcodes without
258 backtracking path which follows the 'top' backtrack. */
259 jump_list *own_backtracks;
260 /* Opcode pointer. */
261 PCRE2_SPTR cc;
262 } backtrack_common;
263
264 typedef struct assert_backtrack {
265 backtrack_common common;
266 jump_list *condfailed;
267 /* Less than 0 if a frame is not needed. */
268 int framesize;
269 /* Points to our private memory word on the stack. */
270 int private_data_ptr;
271 /* For iterators. */
272 struct sljit_label *matchingpath;
273 } assert_backtrack;
274
275 typedef struct bracket_backtrack {
276 backtrack_common common;
277 /* Where to coninue if an alternative is successfully matched. */
278 struct sljit_label *alternative_matchingpath;
279 /* For rmin and rmax iterators. */
280 struct sljit_label *recursive_matchingpath;
281 /* For greedy ? operator. */
282 struct sljit_label *zero_matchingpath;
283 /* Contains the branches of a failed condition. */
284 union {
285 /* Both for OP_COND, OP_SCOND. */
286 jump_list *condfailed;
287 assert_backtrack *assert;
288 /* For OP_ONCE. Less than 0 if not needed. */
289 int framesize;
290 /* For brackets with >3 alternatives. */
291 struct sljit_put_label *matching_put_label;
292 } u;
293 /* Points to our private memory word on the stack. */
294 int private_data_ptr;
295 } bracket_backtrack;
296
297 typedef struct bracketpos_backtrack {
298 backtrack_common common;
299 /* Points to our private memory word on the stack. */
300 int private_data_ptr;
301 /* Reverting stack is needed. */
302 int framesize;
303 /* Allocated stack size. */
304 int stacksize;
305 } bracketpos_backtrack;
306
307 typedef struct braminzero_backtrack {
308 backtrack_common common;
309 struct sljit_label *matchingpath;
310 } braminzero_backtrack;
311
312 typedef struct char_iterator_backtrack {
313 backtrack_common common;
314 /* Next iteration. */
315 struct sljit_label *matchingpath;
316 union {
317 jump_list *backtracks;
318 struct {
319 unsigned int othercasebit;
320 PCRE2_UCHAR chr;
321 BOOL enabled;
322 } charpos;
323 } u;
324 } char_iterator_backtrack;
325
326 typedef struct ref_iterator_backtrack {
327 backtrack_common common;
328 /* Next iteration. */
329 struct sljit_label *matchingpath;
330 } ref_iterator_backtrack;
331
332 typedef struct recurse_entry {
333 struct recurse_entry *next;
334 /* Contains the function entry label. */
335 struct sljit_label *entry_label;
336 /* Contains the function entry label. */
337 struct sljit_label *backtrack_label;
338 /* Collects the entry calls until the function is not created. */
339 jump_list *entry_calls;
340 /* Collects the backtrack calls until the function is not created. */
341 jump_list *backtrack_calls;
342 /* Points to the starting opcode. */
343 sljit_sw start;
344 } recurse_entry;
345
346 typedef struct recurse_backtrack {
347 backtrack_common common;
348 /* Return to the matching path. */
349 struct sljit_label *matchingpath;
350 /* Recursive pattern. */
351 recurse_entry *entry;
352 /* Pattern is inlined. */
353 BOOL inlined_pattern;
354 } recurse_backtrack;
355
356 typedef struct vreverse_backtrack {
357 backtrack_common common;
358 /* Return to the matching path. */
359 struct sljit_label *matchingpath;
360 } vreverse_backtrack;
361
362 #define OP_THEN_TRAP OP_TABLE_LENGTH
363
364 typedef struct then_trap_backtrack {
365 backtrack_common common;
366 /* If then_trap is not NULL, this structure contains the real
367 then_trap for the backtracking path. */
368 struct then_trap_backtrack *then_trap;
369 /* Points to the starting opcode. */
370 sljit_sw start;
371 /* Exit point for the then opcodes of this alternative. */
372 jump_list *quit;
373 /* Frame size of the current alternative. */
374 int framesize;
375 } then_trap_backtrack;
376
377 #define MAX_N_CHARS 12
378 #define MAX_DIFF_CHARS 5
379
380 typedef struct fast_forward_char_data {
381 /* Number of characters in the chars array, 255 for any character. */
382 sljit_u8 count;
383 /* Number of last UTF-8 characters in the chars array. */
384 sljit_u8 last_count;
385 /* Available characters in the current position. */
386 PCRE2_UCHAR chars[MAX_DIFF_CHARS];
387 } fast_forward_char_data;
388
389 #define MAX_CLASS_RANGE_SIZE 4
390 #define MAX_CLASS_CHARS_SIZE 3
391
392 typedef struct compiler_common {
393 /* The sljit ceneric compiler. */
394 struct sljit_compiler *compiler;
395 /* Compiled regular expression. */
396 pcre2_real_code *re;
397 /* First byte code. */
398 PCRE2_SPTR start;
399 /* Maps private data offset to each opcode. */
400 sljit_s32 *private_data_ptrs;
401 /* Chain list of read-only data ptrs. */
402 void *read_only_data_head;
403 /* Tells whether the capturing bracket is optimized. */
404 sljit_u8 *optimized_cbracket;
405 /* Tells whether the starting offset is a target of then. */
406 sljit_u8 *then_offsets;
407 /* Current position where a THEN must jump. */
408 then_trap_backtrack *then_trap;
409 /* Starting offset of private data for capturing brackets. */
410 sljit_s32 cbra_ptr;
411 /* Output vector starting point. Must be divisible by 2. */
412 sljit_s32 ovector_start;
413 /* Points to the starting character of the current match. */
414 sljit_s32 start_ptr;
415 /* Last known position of the requested byte. */
416 sljit_s32 req_char_ptr;
417 /* Head of the last recursion. */
418 sljit_s32 recursive_head_ptr;
419 /* First inspected character for partial matching.
420 (Needed for avoiding zero length partial matches.) */
421 sljit_s32 start_used_ptr;
422 /* Starting pointer for partial soft matches. */
423 sljit_s32 hit_start;
424 /* Pointer of the match end position. */
425 sljit_s32 match_end_ptr;
426 /* Points to the marked string. */
427 sljit_s32 mark_ptr;
428 /* Head of the recursive control verb management chain.
429 Each item must have a previous offset and type
430 (see control_types) values. See do_search_mark. */
431 sljit_s32 control_head_ptr;
432 /* Points to the last matched capture block index. */
433 sljit_s32 capture_last_ptr;
434 /* Fast forward skipping byte code pointer. */
435 PCRE2_SPTR fast_forward_bc_ptr;
436 /* Locals used by fast fail optimization. */
437 sljit_s32 early_fail_start_ptr;
438 sljit_s32 early_fail_end_ptr;
439 /* Variables used by recursive call generator. */
440 sljit_s32 recurse_bitset_size;
441 uint8_t *recurse_bitset;
442
443 /* Flipped and lower case tables. */
444 const sljit_u8 *fcc;
445 sljit_sw lcc;
446 /* Mode can be PCRE2_JIT_COMPLETE and others. */
447 int mode;
448 /* TRUE, when empty match is accepted for partial matching. */
449 BOOL allow_empty_partial;
450 /* TRUE, when minlength is greater than 0. */
451 BOOL might_be_empty;
452 /* \K is found in the pattern. */
453 BOOL has_set_som;
454 /* (*SKIP:arg) is found in the pattern. */
455 BOOL has_skip_arg;
456 /* (*THEN) is found in the pattern. */
457 BOOL has_then;
458 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
459 BOOL has_skip_in_assert_back;
460 /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
461 BOOL local_quit_available;
462 /* Currently in a positive assertion. */
463 BOOL in_positive_assertion;
464 /* Newline control. */
465 int nltype;
466 sljit_u32 nlmax;
467 sljit_u32 nlmin;
468 int newline;
469 int bsr_nltype;
470 sljit_u32 bsr_nlmax;
471 sljit_u32 bsr_nlmin;
472 /* Dollar endonly. */
473 int endonly;
474 /* Tables. */
475 sljit_sw ctypes;
476 /* Named capturing brackets. */
477 PCRE2_SPTR name_table;
478 sljit_sw name_count;
479 sljit_sw name_entry_size;
480
481 /* Labels and jump lists. */
482 struct sljit_label *partialmatchlabel;
483 struct sljit_label *quit_label;
484 struct sljit_label *abort_label;
485 struct sljit_label *accept_label;
486 struct sljit_label *ff_newline_shortcut;
487 stub_list *stubs;
488 recurse_entry *entries;
489 recurse_entry *currententry;
490 jump_list *partialmatch;
491 jump_list *quit;
492 jump_list *positive_assertion_quit;
493 jump_list *abort;
494 jump_list *failed_match;
495 jump_list *accept;
496 jump_list *calllimit;
497 jump_list *stackalloc;
498 jump_list *revertframes;
499 jump_list *wordboundary;
500 jump_list *ucp_wordboundary;
501 jump_list *anynewline;
502 jump_list *hspace;
503 jump_list *vspace;
504 jump_list *casefulcmp;
505 jump_list *caselesscmp;
506 jump_list *reset_match;
507 /* Same as reset_match, but resets the STR_PTR as well. */
508 jump_list *restart_match;
509 BOOL unset_backref;
510 BOOL alt_circumflex;
511 #ifdef SUPPORT_UNICODE
512 BOOL utf;
513 BOOL invalid_utf;
514 BOOL ucp;
515 /* Points to saving area for iref. */
516 sljit_s32 iref_ptr;
517 jump_list *getucd;
518 jump_list *getucdtype;
519 #if PCRE2_CODE_UNIT_WIDTH == 8
520 jump_list *utfreadchar;
521 jump_list *utfreadtype8;
522 jump_list *utfpeakcharback;
523 #endif
524 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
525 jump_list *utfreadchar_invalid;
526 jump_list *utfreadnewline_invalid;
527 jump_list *utfmoveback_invalid;
528 jump_list *utfpeakcharback_invalid;
529 #endif
530 #endif /* SUPPORT_UNICODE */
531 } compiler_common;
532
533 /* For byte_sequence_compare. */
534
535 typedef struct compare_context {
536 int length;
537 int sourcereg;
538 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
539 int ucharptr;
540 union {
541 sljit_s32 asint;
542 sljit_u16 asushort;
543 #if PCRE2_CODE_UNIT_WIDTH == 8
544 sljit_u8 asbyte;
545 sljit_u8 asuchars[4];
546 #elif PCRE2_CODE_UNIT_WIDTH == 16
547 sljit_u16 asuchars[2];
548 #elif PCRE2_CODE_UNIT_WIDTH == 32
549 sljit_u32 asuchars[1];
550 #endif
551 } c;
552 union {
553 sljit_s32 asint;
554 sljit_u16 asushort;
555 #if PCRE2_CODE_UNIT_WIDTH == 8
556 sljit_u8 asbyte;
557 sljit_u8 asuchars[4];
558 #elif PCRE2_CODE_UNIT_WIDTH == 16
559 sljit_u16 asuchars[2];
560 #elif PCRE2_CODE_UNIT_WIDTH == 32
561 sljit_u32 asuchars[1];
562 #endif
563 } oc;
564 #endif
565 } compare_context;
566
567 /* Undefine sljit macros. */
568 #undef CMP
569
570 /* Used for accessing the elements of the stack. */
571 #define STACK(i) ((i) * SSIZE_OF(sw))
572
573 #ifdef SLJIT_PREF_SHIFT_REG
574 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
575 /* Nothing. */
576 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
577 #define SHIFT_REG_IS_R3
578 #else
579 #error "Unsupported shift register"
580 #endif
581 #endif
582
583 #define TMP1 SLJIT_R0
584 #ifdef SHIFT_REG_IS_R3
585 #define TMP2 SLJIT_R3
586 #define TMP3 SLJIT_R2
587 #else
588 #define TMP2 SLJIT_R2
589 #define TMP3 SLJIT_R3
590 #endif
591 #define STR_PTR SLJIT_R1
592 #define STR_END SLJIT_S0
593 #define STACK_TOP SLJIT_S1
594 #define STACK_LIMIT SLJIT_S2
595 #define COUNT_MATCH SLJIT_S3
596 #define ARGUMENTS SLJIT_S4
597 #define RETURN_ADDR SLJIT_R4
598
599 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
600 #define HAS_VIRTUAL_REGISTERS 1
601 #else
602 #define HAS_VIRTUAL_REGISTERS 0
603 #endif
604
605 /* Local space layout. */
606 /* These two locals can be used by the current opcode. */
607 #define LOCALS0 (0 * sizeof(sljit_sw))
608 #define LOCALS1 (1 * sizeof(sljit_sw))
609 /* Two local variables for possessive quantifiers (char1 cannot use them). */
610 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
611 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
612 /* Max limit of recursions. */
613 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
614 /* The output vector is stored on the stack, and contains pointers
615 to characters. The vector data is divided into two groups: the first
616 group contains the start / end character pointers, and the second is
617 the start pointers when the end of the capturing group has not yet reached. */
618 #define OVECTOR_START (common->ovector_start)
619 #define OVECTOR(i) (OVECTOR_START + (i) * SSIZE_OF(sw))
620 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * SSIZE_OF(sw))
621 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
622
623 #if PCRE2_CODE_UNIT_WIDTH == 8
624 #define MOV_UCHAR SLJIT_MOV_U8
625 #define IN_UCHARS(x) (x)
626 #elif PCRE2_CODE_UNIT_WIDTH == 16
627 #define MOV_UCHAR SLJIT_MOV_U16
628 #define UCHAR_SHIFT (1)
629 #define IN_UCHARS(x) ((x) * 2)
630 #elif PCRE2_CODE_UNIT_WIDTH == 32
631 #define MOV_UCHAR SLJIT_MOV_U32
632 #define UCHAR_SHIFT (2)
633 #define IN_UCHARS(x) ((x) * 4)
634 #else
635 #error Unsupported compiling mode
636 #endif
637
638 /* Shortcuts. */
639 #define DEFINE_COMPILER \
640 struct sljit_compiler *compiler = common->compiler
641 #define OP1(op, dst, dstw, src, srcw) \
642 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
643 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
644 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
645 #define OP2U(op, src1, src1w, src2, src2w) \
646 sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w))
647 #define OP_SRC(op, src, srcw) \
648 sljit_emit_op_src(compiler, (op), (src), (srcw))
649 #define LABEL() \
650 sljit_emit_label(compiler)
651 #define JUMP(type) \
652 sljit_emit_jump(compiler, (type))
653 #define JUMPTO(type, label) \
654 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
655 #define JUMPHERE(jump) \
656 sljit_set_label((jump), sljit_emit_label(compiler))
657 #define SET_LABEL(jump, label) \
658 sljit_set_label((jump), (label))
659 #define CMP(type, src1, src1w, src2, src2w) \
660 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
661 #define CMPTO(type, src1, src1w, src2, src2w, label) \
662 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
663 #define OP_FLAGS(op, dst, dstw, type) \
664 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
665 #define SELECT(type, dst_reg, src1, src1w, src2_reg) \
666 sljit_emit_select(compiler, (type), (dst_reg), (src1), (src1w), (src2_reg))
667 #define GET_LOCAL_BASE(dst, dstw, offset) \
668 sljit_get_local_base(compiler, (dst), (dstw), (offset))
669
670 #define READ_CHAR_MAX 0x7fffffff
671
672 #define INVALID_UTF_CHAR -1
673 #define UNASSIGNED_UTF_CHAR 888
674
675 #if defined SUPPORT_UNICODE
676 #if PCRE2_CODE_UNIT_WIDTH == 8
677
678 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
679 { \
680 if (ptr[0] <= 0x7f) \
681 c = *ptr++; \
682 else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
683 { \
684 c = ptr[1] - 0x80; \
685 \
686 if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
687 { \
688 c |= (ptr[0] - 0xc0) << 6; \
689 ptr += 2; \
690 } \
691 else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
692 { \
693 c = c << 6 | (ptr[2] - 0x80); \
694 \
695 if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
696 { \
697 c |= (ptr[0] - 0xe0) << 12; \
698 ptr += 3; \
699 \
700 if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
701 { \
702 invalid_action; \
703 } \
704 } \
705 else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
706 { \
707 c = c << 6 | (ptr[3] - 0x80); \
708 \
709 if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
710 { \
711 c |= (ptr[0] - 0xf0) << 18; \
712 ptr += 4; \
713 \
714 if (c >= 0x110000 || c < 0x10000) \
715 { \
716 invalid_action; \
717 } \
718 } \
719 else \
720 { \
721 invalid_action; \
722 } \
723 } \
724 else \
725 { \
726 invalid_action; \
727 } \
728 } \
729 else \
730 { \
731 invalid_action; \
732 } \
733 } \
734 else \
735 { \
736 invalid_action; \
737 } \
738 }
739
740 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
741 { \
742 c = ptr[-1]; \
743 if (c <= 0x7f) \
744 ptr--; \
745 else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
746 { \
747 c -= 0x80; \
748 \
749 if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
750 { \
751 c |= (ptr[-2] - 0xc0) << 6; \
752 ptr -= 2; \
753 } \
754 else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
755 { \
756 c = c << 6 | (ptr[-2] - 0x80); \
757 \
758 if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
759 { \
760 c |= (ptr[-3] - 0xe0) << 12; \
761 ptr -= 3; \
762 \
763 if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
764 { \
765 invalid_action; \
766 } \
767 } \
768 else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
769 { \
770 c = c << 6 | (ptr[-3] - 0x80); \
771 \
772 if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
773 { \
774 c |= (ptr[-4] - 0xf0) << 18; \
775 ptr -= 4; \
776 \
777 if (c >= 0x110000 || c < 0x10000) \
778 { \
779 invalid_action; \
780 } \
781 } \
782 else \
783 { \
784 invalid_action; \
785 } \
786 } \
787 else \
788 { \
789 invalid_action; \
790 } \
791 } \
792 else \
793 { \
794 invalid_action; \
795 } \
796 } \
797 else \
798 { \
799 invalid_action; \
800 } \
801 }
802
803 #elif PCRE2_CODE_UNIT_WIDTH == 16
804
805 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
806 { \
807 if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
808 c = *ptr++; \
809 else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
810 { \
811 c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
812 ptr += 2; \
813 } \
814 else \
815 { \
816 invalid_action; \
817 } \
818 }
819
820 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
821 { \
822 c = ptr[-1]; \
823 if (c < 0xd800 || c >= 0xe000) \
824 ptr--; \
825 else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
826 { \
827 c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
828 ptr -= 2; \
829 } \
830 else \
831 { \
832 invalid_action; \
833 } \
834 }
835
836
837 #elif PCRE2_CODE_UNIT_WIDTH == 32
838
839 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
840 { \
841 if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
842 c = *ptr++; \
843 else \
844 { \
845 invalid_action; \
846 } \
847 }
848
849 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
850 { \
851 c = ptr[-1]; \
852 if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
853 ptr--; \
854 else \
855 { \
856 invalid_action; \
857 } \
858 }
859
860 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
861 #endif /* SUPPORT_UNICODE */
862
bracketend(PCRE2_SPTR cc)863 static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
864 {
865 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
866 do cc += GET(cc, 1); while (*cc == OP_ALT);
867 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
868 cc += 1 + LINK_SIZE;
869 return cc;
870 }
871
no_alternatives(PCRE2_SPTR cc)872 static int no_alternatives(PCRE2_SPTR cc)
873 {
874 int count = 0;
875 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
876 do
877 {
878 cc += GET(cc, 1);
879 count++;
880 }
881 while (*cc == OP_ALT);
882 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
883 return count;
884 }
885
find_vreverse(PCRE2_SPTR cc)886 static BOOL find_vreverse(PCRE2_SPTR cc)
887 {
888 SLJIT_ASSERT(*cc == OP_ASSERTBACK || *cc == OP_ASSERTBACK_NOT || *cc == OP_ASSERTBACK_NA);
889
890 do
891 {
892 if (cc[1 + LINK_SIZE] == OP_VREVERSE)
893 return TRUE;
894 cc += GET(cc, 1);
895 }
896 while (*cc == OP_ALT);
897
898 return FALSE;
899 }
900
901 /* Functions whose might need modification for all new supported opcodes:
902 next_opcode
903 check_opcode_types
904 set_private_data_ptrs
905 get_framesize
906 init_frame
907 get_recurse_data_length
908 copy_recurse_data
909 compile_matchingpath
910 compile_backtrackingpath
911 */
912
next_opcode(compiler_common * common,PCRE2_SPTR cc)913 static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
914 {
915 SLJIT_UNUSED_ARG(common);
916 switch(*cc)
917 {
918 case OP_SOD:
919 case OP_SOM:
920 case OP_SET_SOM:
921 case OP_NOT_WORD_BOUNDARY:
922 case OP_WORD_BOUNDARY:
923 case OP_NOT_DIGIT:
924 case OP_DIGIT:
925 case OP_NOT_WHITESPACE:
926 case OP_WHITESPACE:
927 case OP_NOT_WORDCHAR:
928 case OP_WORDCHAR:
929 case OP_ANY:
930 case OP_ALLANY:
931 case OP_NOTPROP:
932 case OP_PROP:
933 case OP_ANYNL:
934 case OP_NOT_HSPACE:
935 case OP_HSPACE:
936 case OP_NOT_VSPACE:
937 case OP_VSPACE:
938 case OP_EXTUNI:
939 case OP_EODN:
940 case OP_EOD:
941 case OP_CIRC:
942 case OP_CIRCM:
943 case OP_DOLL:
944 case OP_DOLLM:
945 case OP_CRSTAR:
946 case OP_CRMINSTAR:
947 case OP_CRPLUS:
948 case OP_CRMINPLUS:
949 case OP_CRQUERY:
950 case OP_CRMINQUERY:
951 case OP_CRRANGE:
952 case OP_CRMINRANGE:
953 case OP_CRPOSSTAR:
954 case OP_CRPOSPLUS:
955 case OP_CRPOSQUERY:
956 case OP_CRPOSRANGE:
957 case OP_CLASS:
958 case OP_NCLASS:
959 case OP_REF:
960 case OP_REFI:
961 case OP_DNREF:
962 case OP_DNREFI:
963 case OP_RECURSE:
964 case OP_CALLOUT:
965 case OP_ALT:
966 case OP_KET:
967 case OP_KETRMAX:
968 case OP_KETRMIN:
969 case OP_KETRPOS:
970 case OP_REVERSE:
971 case OP_VREVERSE:
972 case OP_ASSERT:
973 case OP_ASSERT_NOT:
974 case OP_ASSERTBACK:
975 case OP_ASSERTBACK_NOT:
976 case OP_ASSERT_NA:
977 case OP_ASSERTBACK_NA:
978 case OP_ONCE:
979 case OP_SCRIPT_RUN:
980 case OP_BRA:
981 case OP_BRAPOS:
982 case OP_CBRA:
983 case OP_CBRAPOS:
984 case OP_COND:
985 case OP_SBRA:
986 case OP_SBRAPOS:
987 case OP_SCBRA:
988 case OP_SCBRAPOS:
989 case OP_SCOND:
990 case OP_CREF:
991 case OP_DNCREF:
992 case OP_RREF:
993 case OP_DNRREF:
994 case OP_FALSE:
995 case OP_TRUE:
996 case OP_BRAZERO:
997 case OP_BRAMINZERO:
998 case OP_BRAPOSZERO:
999 case OP_PRUNE:
1000 case OP_SKIP:
1001 case OP_THEN:
1002 case OP_COMMIT:
1003 case OP_FAIL:
1004 case OP_ACCEPT:
1005 case OP_ASSERT_ACCEPT:
1006 case OP_CLOSE:
1007 case OP_SKIPZERO:
1008 case OP_NOT_UCP_WORD_BOUNDARY:
1009 case OP_UCP_WORD_BOUNDARY:
1010 return cc + PRIV(OP_lengths)[*cc];
1011
1012 case OP_CHAR:
1013 case OP_CHARI:
1014 case OP_NOT:
1015 case OP_NOTI:
1016 case OP_STAR:
1017 case OP_MINSTAR:
1018 case OP_PLUS:
1019 case OP_MINPLUS:
1020 case OP_QUERY:
1021 case OP_MINQUERY:
1022 case OP_UPTO:
1023 case OP_MINUPTO:
1024 case OP_EXACT:
1025 case OP_POSSTAR:
1026 case OP_POSPLUS:
1027 case OP_POSQUERY:
1028 case OP_POSUPTO:
1029 case OP_STARI:
1030 case OP_MINSTARI:
1031 case OP_PLUSI:
1032 case OP_MINPLUSI:
1033 case OP_QUERYI:
1034 case OP_MINQUERYI:
1035 case OP_UPTOI:
1036 case OP_MINUPTOI:
1037 case OP_EXACTI:
1038 case OP_POSSTARI:
1039 case OP_POSPLUSI:
1040 case OP_POSQUERYI:
1041 case OP_POSUPTOI:
1042 case OP_NOTSTAR:
1043 case OP_NOTMINSTAR:
1044 case OP_NOTPLUS:
1045 case OP_NOTMINPLUS:
1046 case OP_NOTQUERY:
1047 case OP_NOTMINQUERY:
1048 case OP_NOTUPTO:
1049 case OP_NOTMINUPTO:
1050 case OP_NOTEXACT:
1051 case OP_NOTPOSSTAR:
1052 case OP_NOTPOSPLUS:
1053 case OP_NOTPOSQUERY:
1054 case OP_NOTPOSUPTO:
1055 case OP_NOTSTARI:
1056 case OP_NOTMINSTARI:
1057 case OP_NOTPLUSI:
1058 case OP_NOTMINPLUSI:
1059 case OP_NOTQUERYI:
1060 case OP_NOTMINQUERYI:
1061 case OP_NOTUPTOI:
1062 case OP_NOTMINUPTOI:
1063 case OP_NOTEXACTI:
1064 case OP_NOTPOSSTARI:
1065 case OP_NOTPOSPLUSI:
1066 case OP_NOTPOSQUERYI:
1067 case OP_NOTPOSUPTOI:
1068 cc += PRIV(OP_lengths)[*cc];
1069 #ifdef SUPPORT_UNICODE
1070 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1071 #endif
1072 return cc;
1073
1074 /* Special cases. */
1075 case OP_TYPESTAR:
1076 case OP_TYPEMINSTAR:
1077 case OP_TYPEPLUS:
1078 case OP_TYPEMINPLUS:
1079 case OP_TYPEQUERY:
1080 case OP_TYPEMINQUERY:
1081 case OP_TYPEUPTO:
1082 case OP_TYPEMINUPTO:
1083 case OP_TYPEEXACT:
1084 case OP_TYPEPOSSTAR:
1085 case OP_TYPEPOSPLUS:
1086 case OP_TYPEPOSQUERY:
1087 case OP_TYPEPOSUPTO:
1088 return cc + PRIV(OP_lengths)[*cc] - 1;
1089
1090 case OP_ANYBYTE:
1091 #ifdef SUPPORT_UNICODE
1092 if (common->utf) return NULL;
1093 #endif
1094 return cc + 1;
1095
1096 case OP_CALLOUT_STR:
1097 return cc + GET(cc, 1 + 2*LINK_SIZE);
1098
1099 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1100 case OP_XCLASS:
1101 return cc + GET(cc, 1);
1102 #endif
1103
1104 case OP_MARK:
1105 case OP_COMMIT_ARG:
1106 case OP_PRUNE_ARG:
1107 case OP_SKIP_ARG:
1108 case OP_THEN_ARG:
1109 return cc + 1 + 2 + cc[1];
1110
1111 default:
1112 SLJIT_UNREACHABLE();
1113 return NULL;
1114 }
1115 }
1116
check_opcode_types(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend)1117 static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1118 {
1119 int count;
1120 PCRE2_SPTR slot;
1121 PCRE2_SPTR assert_back_end = cc - 1;
1122 PCRE2_SPTR assert_na_end = cc - 1;
1123
1124 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1125 while (cc < ccend)
1126 {
1127 switch(*cc)
1128 {
1129 case OP_SET_SOM:
1130 common->has_set_som = TRUE;
1131 common->might_be_empty = TRUE;
1132 cc += 1;
1133 break;
1134
1135 case OP_REFI:
1136 #ifdef SUPPORT_UNICODE
1137 if (common->iref_ptr == 0)
1138 {
1139 common->iref_ptr = common->ovector_start;
1140 common->ovector_start += 3 * sizeof(sljit_sw);
1141 }
1142 #endif /* SUPPORT_UNICODE */
1143 /* Fall through. */
1144 case OP_REF:
1145 common->optimized_cbracket[GET2(cc, 1)] = 0;
1146 cc += 1 + IMM2_SIZE;
1147 break;
1148
1149 case OP_ASSERT_NA:
1150 case OP_ASSERTBACK_NA:
1151 slot = bracketend(cc);
1152 if (slot > assert_na_end)
1153 assert_na_end = slot;
1154 cc += 1 + LINK_SIZE;
1155 break;
1156
1157 case OP_CBRAPOS:
1158 case OP_SCBRAPOS:
1159 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1160 cc += 1 + LINK_SIZE + IMM2_SIZE;
1161 break;
1162
1163 case OP_COND:
1164 case OP_SCOND:
1165 /* Only AUTO_CALLOUT can insert this opcode. We do
1166 not intend to support this case. */
1167 if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1168 return FALSE;
1169 cc += 1 + LINK_SIZE;
1170 break;
1171
1172 case OP_CREF:
1173 common->optimized_cbracket[GET2(cc, 1)] = 0;
1174 cc += 1 + IMM2_SIZE;
1175 break;
1176
1177 case OP_DNREF:
1178 case OP_DNREFI:
1179 case OP_DNCREF:
1180 count = GET2(cc, 1 + IMM2_SIZE);
1181 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1182 while (count-- > 0)
1183 {
1184 common->optimized_cbracket[GET2(slot, 0)] = 0;
1185 slot += common->name_entry_size;
1186 }
1187 cc += 1 + 2 * IMM2_SIZE;
1188 break;
1189
1190 case OP_RECURSE:
1191 /* Set its value only once. */
1192 if (common->recursive_head_ptr == 0)
1193 {
1194 common->recursive_head_ptr = common->ovector_start;
1195 common->ovector_start += sizeof(sljit_sw);
1196 }
1197 cc += 1 + LINK_SIZE;
1198 break;
1199
1200 case OP_CALLOUT:
1201 case OP_CALLOUT_STR:
1202 if (common->capture_last_ptr == 0)
1203 {
1204 common->capture_last_ptr = common->ovector_start;
1205 common->ovector_start += sizeof(sljit_sw);
1206 }
1207 cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1208 break;
1209
1210 case OP_ASSERTBACK:
1211 slot = bracketend(cc);
1212 if (slot > assert_back_end)
1213 assert_back_end = slot;
1214 cc += 1 + LINK_SIZE;
1215 break;
1216
1217 case OP_THEN_ARG:
1218 common->has_then = TRUE;
1219 common->control_head_ptr = 1;
1220 /* Fall through. */
1221
1222 case OP_COMMIT_ARG:
1223 case OP_PRUNE_ARG:
1224 if (cc < assert_na_end)
1225 return FALSE;
1226 /* Fall through */
1227 case OP_MARK:
1228 if (common->mark_ptr == 0)
1229 {
1230 common->mark_ptr = common->ovector_start;
1231 common->ovector_start += sizeof(sljit_sw);
1232 }
1233 cc += 1 + 2 + cc[1];
1234 break;
1235
1236 case OP_THEN:
1237 common->has_then = TRUE;
1238 common->control_head_ptr = 1;
1239 cc += 1;
1240 break;
1241
1242 case OP_SKIP:
1243 if (cc < assert_back_end)
1244 common->has_skip_in_assert_back = TRUE;
1245 if (cc < assert_na_end)
1246 return FALSE;
1247 cc += 1;
1248 break;
1249
1250 case OP_SKIP_ARG:
1251 common->control_head_ptr = 1;
1252 common->has_skip_arg = TRUE;
1253 if (cc < assert_back_end)
1254 common->has_skip_in_assert_back = TRUE;
1255 if (cc < assert_na_end)
1256 return FALSE;
1257 cc += 1 + 2 + cc[1];
1258 break;
1259
1260 case OP_PRUNE:
1261 case OP_COMMIT:
1262 case OP_ASSERT_ACCEPT:
1263 if (cc < assert_na_end)
1264 return FALSE;
1265 cc++;
1266 break;
1267
1268 default:
1269 cc = next_opcode(common, cc);
1270 if (cc == NULL)
1271 return FALSE;
1272 break;
1273 }
1274 }
1275 return TRUE;
1276 }
1277
1278 #define EARLY_FAIL_ENHANCE_MAX (3 + 3)
1279
1280 /*
1281 Start represent the number of allowed early fail enhancements
1282
1283 The 0-2 values has a special meaning:
1284 0 - skip is allowed for all iterators
1285 1 - fail is allowed for all iterators
1286 2 - fail is allowed for greedy iterators
1287 3 - only ranged early fail is allowed
1288 >3 - (start - 3) number of remaining ranged early fails allowed
1289
1290 return: the updated value of start
1291 */
detect_early_fail(compiler_common * common,PCRE2_SPTR cc,int * private_data_start,sljit_s32 depth,int start)1292 static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc,
1293 int *private_data_start, sljit_s32 depth, int start)
1294 {
1295 PCRE2_SPTR begin = cc;
1296 PCRE2_SPTR next_alt;
1297 PCRE2_SPTR end;
1298 PCRE2_SPTR accelerated_start;
1299 int result = 0;
1300 int count, prev_count;
1301
1302 SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1303 SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1304 SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1305
1306 next_alt = cc + GET(cc, 1);
1307 if (*next_alt == OP_ALT && start < 1)
1308 start = 1;
1309
1310 do
1311 {
1312 count = start;
1313 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1314
1315 while (TRUE)
1316 {
1317 accelerated_start = NULL;
1318
1319 switch(*cc)
1320 {
1321 case OP_SOD:
1322 case OP_SOM:
1323 case OP_SET_SOM:
1324 case OP_NOT_WORD_BOUNDARY:
1325 case OP_WORD_BOUNDARY:
1326 case OP_EODN:
1327 case OP_EOD:
1328 case OP_CIRC:
1329 case OP_CIRCM:
1330 case OP_DOLL:
1331 case OP_DOLLM:
1332 case OP_NOT_UCP_WORD_BOUNDARY:
1333 case OP_UCP_WORD_BOUNDARY:
1334 /* Zero width assertions. */
1335 cc++;
1336 continue;
1337
1338 case OP_NOT_DIGIT:
1339 case OP_DIGIT:
1340 case OP_NOT_WHITESPACE:
1341 case OP_WHITESPACE:
1342 case OP_NOT_WORDCHAR:
1343 case OP_WORDCHAR:
1344 case OP_ANY:
1345 case OP_ALLANY:
1346 case OP_ANYBYTE:
1347 case OP_NOT_HSPACE:
1348 case OP_HSPACE:
1349 case OP_NOT_VSPACE:
1350 case OP_VSPACE:
1351 if (count < 1)
1352 count = 1;
1353 cc++;
1354 continue;
1355
1356 case OP_ANYNL:
1357 case OP_EXTUNI:
1358 if (count < 3)
1359 count = 3;
1360 cc++;
1361 continue;
1362
1363 case OP_NOTPROP:
1364 case OP_PROP:
1365 if (count < 1)
1366 count = 1;
1367 cc += 1 + 2;
1368 continue;
1369
1370 case OP_CHAR:
1371 case OP_CHARI:
1372 case OP_NOT:
1373 case OP_NOTI:
1374 if (count < 1)
1375 count = 1;
1376 cc += 2;
1377 #ifdef SUPPORT_UNICODE
1378 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1379 #endif
1380 continue;
1381
1382 case OP_TYPEMINSTAR:
1383 case OP_TYPEMINPLUS:
1384 if (count == 2)
1385 count = 3;
1386 /* Fall through */
1387
1388 case OP_TYPESTAR:
1389 case OP_TYPEPLUS:
1390 case OP_TYPEPOSSTAR:
1391 case OP_TYPEPOSPLUS:
1392 /* The type or prop opcode is skipped in the next iteration. */
1393 cc += 1;
1394
1395 if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1396 {
1397 accelerated_start = cc - 1;
1398 break;
1399 }
1400
1401 if (count < 3)
1402 count = 3;
1403 continue;
1404
1405 case OP_TYPEEXACT:
1406 if (count < 1)
1407 count = 1;
1408 cc += 1 + IMM2_SIZE;
1409 continue;
1410
1411 case OP_TYPEUPTO:
1412 case OP_TYPEMINUPTO:
1413 case OP_TYPEPOSUPTO:
1414 cc += IMM2_SIZE;
1415 /* Fall through */
1416
1417 case OP_TYPEQUERY:
1418 case OP_TYPEMINQUERY:
1419 case OP_TYPEPOSQUERY:
1420 /* The type or prop opcode is skipped in the next iteration. */
1421 if (count < 3)
1422 count = 3;
1423 cc += 1;
1424 continue;
1425
1426 case OP_MINSTAR:
1427 case OP_MINPLUS:
1428 case OP_MINSTARI:
1429 case OP_MINPLUSI:
1430 case OP_NOTMINSTAR:
1431 case OP_NOTMINPLUS:
1432 case OP_NOTMINSTARI:
1433 case OP_NOTMINPLUSI:
1434 if (count == 2)
1435 count = 3;
1436 /* Fall through */
1437
1438 case OP_STAR:
1439 case OP_PLUS:
1440 case OP_POSSTAR:
1441 case OP_POSPLUS:
1442
1443 case OP_STARI:
1444 case OP_PLUSI:
1445 case OP_POSSTARI:
1446 case OP_POSPLUSI:
1447
1448 case OP_NOTSTAR:
1449 case OP_NOTPLUS:
1450 case OP_NOTPOSSTAR:
1451 case OP_NOTPOSPLUS:
1452
1453 case OP_NOTSTARI:
1454 case OP_NOTPLUSI:
1455 case OP_NOTPOSSTARI:
1456 case OP_NOTPOSPLUSI:
1457 accelerated_start = cc;
1458 cc += 2;
1459 #ifdef SUPPORT_UNICODE
1460 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1461 #endif
1462 break;
1463
1464 case OP_EXACT:
1465 if (count < 1)
1466 count = 1;
1467 cc += 2 + IMM2_SIZE;
1468 #ifdef SUPPORT_UNICODE
1469 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1470 #endif
1471 continue;
1472
1473 case OP_UPTO:
1474 case OP_MINUPTO:
1475 case OP_POSUPTO:
1476 case OP_UPTOI:
1477 case OP_MINUPTOI:
1478 case OP_EXACTI:
1479 case OP_POSUPTOI:
1480 case OP_NOTUPTO:
1481 case OP_NOTMINUPTO:
1482 case OP_NOTEXACT:
1483 case OP_NOTPOSUPTO:
1484 case OP_NOTUPTOI:
1485 case OP_NOTMINUPTOI:
1486 case OP_NOTEXACTI:
1487 case OP_NOTPOSUPTOI:
1488 cc += IMM2_SIZE;
1489 /* Fall through */
1490
1491 case OP_QUERY:
1492 case OP_MINQUERY:
1493 case OP_POSQUERY:
1494 case OP_QUERYI:
1495 case OP_MINQUERYI:
1496 case OP_POSQUERYI:
1497 case OP_NOTQUERY:
1498 case OP_NOTMINQUERY:
1499 case OP_NOTPOSQUERY:
1500 case OP_NOTQUERYI:
1501 case OP_NOTMINQUERYI:
1502 case OP_NOTPOSQUERYI:
1503 if (count < 3)
1504 count = 3;
1505 cc += 2;
1506 #ifdef SUPPORT_UNICODE
1507 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1508 #endif
1509 continue;
1510
1511 case OP_CLASS:
1512 case OP_NCLASS:
1513 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1514 case OP_XCLASS:
1515 accelerated_start = cc;
1516 cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1517 #else
1518 accelerated_start = cc;
1519 cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1520 #endif
1521
1522 switch (*cc)
1523 {
1524 case OP_CRMINSTAR:
1525 case OP_CRMINPLUS:
1526 if (count == 2)
1527 count = 3;
1528 /* Fall through */
1529
1530 case OP_CRSTAR:
1531 case OP_CRPLUS:
1532 case OP_CRPOSSTAR:
1533 case OP_CRPOSPLUS:
1534 cc++;
1535 break;
1536
1537 case OP_CRRANGE:
1538 case OP_CRMINRANGE:
1539 case OP_CRPOSRANGE:
1540 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
1541 {
1542 /* Exact repeat. */
1543 cc += 1 + 2 * IMM2_SIZE;
1544 if (count < 1)
1545 count = 1;
1546 continue;
1547 }
1548
1549 cc += 2 * IMM2_SIZE;
1550 /* Fall through */
1551 case OP_CRQUERY:
1552 case OP_CRMINQUERY:
1553 case OP_CRPOSQUERY:
1554 cc++;
1555 if (count < 3)
1556 count = 3;
1557 continue;
1558
1559 default:
1560 /* No repeat. */
1561 if (count < 1)
1562 count = 1;
1563 continue;
1564 }
1565 break;
1566
1567 case OP_BRA:
1568 case OP_CBRA:
1569 prev_count = count;
1570 if (count < 1)
1571 count = 1;
1572
1573 if (depth >= 4)
1574 break;
1575
1576 if (count < 3 && cc[GET(cc, 1)] == OP_ALT)
1577 count = 3;
1578
1579 end = bracketend(cc);
1580 if (end[-1 - LINK_SIZE] != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
1581 break;
1582
1583 prev_count = detect_early_fail(common, cc, private_data_start, depth + 1, prev_count);
1584
1585 if (prev_count > count)
1586 count = prev_count;
1587
1588 if (PRIVATE_DATA(cc) != 0)
1589 common->private_data_ptrs[begin - common->start] = 1;
1590
1591 if (count < EARLY_FAIL_ENHANCE_MAX)
1592 {
1593 cc = end;
1594 continue;
1595 }
1596 break;
1597
1598 case OP_KET:
1599 SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1600 if (cc >= next_alt)
1601 break;
1602 cc += 1 + LINK_SIZE;
1603 continue;
1604 }
1605
1606 if (accelerated_start == NULL)
1607 break;
1608
1609 if (count == 0)
1610 {
1611 common->fast_forward_bc_ptr = accelerated_start;
1612 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1613 *private_data_start += sizeof(sljit_sw);
1614 count = 4;
1615 }
1616 else if (count < 3)
1617 {
1618 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1619
1620 if (common->early_fail_start_ptr == 0)
1621 common->early_fail_start_ptr = *private_data_start;
1622
1623 *private_data_start += sizeof(sljit_sw);
1624 common->early_fail_end_ptr = *private_data_start;
1625
1626 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1627 return EARLY_FAIL_ENHANCE_MAX;
1628
1629 count = 4;
1630 }
1631 else
1632 {
1633 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1634
1635 if (common->early_fail_start_ptr == 0)
1636 common->early_fail_start_ptr = *private_data_start;
1637
1638 *private_data_start += 2 * sizeof(sljit_sw);
1639 common->early_fail_end_ptr = *private_data_start;
1640
1641 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1642 return EARLY_FAIL_ENHANCE_MAX;
1643
1644 count++;
1645 }
1646
1647 /* Cannot be part of a repeat. */
1648 common->private_data_ptrs[begin - common->start] = 1;
1649
1650 if (count >= EARLY_FAIL_ENHANCE_MAX)
1651 break;
1652 }
1653
1654 if (*cc != OP_ALT && *cc != OP_KET)
1655 result = EARLY_FAIL_ENHANCE_MAX;
1656 else if (result < count)
1657 result = count;
1658
1659 cc = next_alt;
1660 next_alt = cc + GET(cc, 1);
1661 }
1662 while (*cc == OP_ALT);
1663
1664 return result;
1665 }
1666
get_class_iterator_size(PCRE2_SPTR cc)1667 static int get_class_iterator_size(PCRE2_SPTR cc)
1668 {
1669 sljit_u32 min;
1670 sljit_u32 max;
1671 switch(*cc)
1672 {
1673 case OP_CRSTAR:
1674 case OP_CRPLUS:
1675 return 2;
1676
1677 case OP_CRMINSTAR:
1678 case OP_CRMINPLUS:
1679 case OP_CRQUERY:
1680 case OP_CRMINQUERY:
1681 return 1;
1682
1683 case OP_CRRANGE:
1684 case OP_CRMINRANGE:
1685 min = GET2(cc, 1);
1686 max = GET2(cc, 1 + IMM2_SIZE);
1687 if (max == 0)
1688 return (*cc == OP_CRRANGE) ? 2 : 1;
1689 max -= min;
1690 if (max > 2)
1691 max = 2;
1692 return max;
1693
1694 default:
1695 return 0;
1696 }
1697 }
1698
detect_repeat(compiler_common * common,PCRE2_SPTR begin)1699 static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1700 {
1701 PCRE2_SPTR end = bracketend(begin);
1702 PCRE2_SPTR next;
1703 PCRE2_SPTR next_end;
1704 PCRE2_SPTR max_end;
1705 PCRE2_UCHAR type;
1706 sljit_sw length = end - begin;
1707 sljit_s32 min, max, i;
1708
1709 /* Detect fixed iterations first. */
1710 if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1711 return FALSE;
1712
1713 /* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1714 * Skip the check of the second part. */
1715 if (PRIVATE_DATA(end - LINK_SIZE) != 0)
1716 return TRUE;
1717
1718 next = end;
1719 min = 1;
1720 while (1)
1721 {
1722 if (*next != *begin)
1723 break;
1724 next_end = bracketend(next);
1725 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1726 break;
1727 next = next_end;
1728 min++;
1729 }
1730
1731 if (min == 2)
1732 return FALSE;
1733
1734 max = 0;
1735 max_end = next;
1736 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1737 {
1738 type = *next;
1739 while (1)
1740 {
1741 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1742 break;
1743 next_end = bracketend(next + 2 + LINK_SIZE);
1744 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1745 break;
1746 next = next_end;
1747 max++;
1748 }
1749
1750 if (next[0] == type && next[1] == *begin && max >= 1)
1751 {
1752 next_end = bracketend(next + 1);
1753 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1754 {
1755 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1756 if (*next_end != OP_KET)
1757 break;
1758
1759 if (i == max)
1760 {
1761 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1762 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1763 /* +2 the original and the last. */
1764 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1765 if (min == 1)
1766 return TRUE;
1767 min--;
1768 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1769 }
1770 }
1771 }
1772 }
1773
1774 if (min >= 3)
1775 {
1776 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1777 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1778 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1779 return TRUE;
1780 }
1781
1782 return FALSE;
1783 }
1784
1785 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1786 case OP_MINSTAR: \
1787 case OP_MINPLUS: \
1788 case OP_QUERY: \
1789 case OP_MINQUERY: \
1790 case OP_MINSTARI: \
1791 case OP_MINPLUSI: \
1792 case OP_QUERYI: \
1793 case OP_MINQUERYI: \
1794 case OP_NOTMINSTAR: \
1795 case OP_NOTMINPLUS: \
1796 case OP_NOTQUERY: \
1797 case OP_NOTMINQUERY: \
1798 case OP_NOTMINSTARI: \
1799 case OP_NOTMINPLUSI: \
1800 case OP_NOTQUERYI: \
1801 case OP_NOTMINQUERYI:
1802
1803 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1804 case OP_STAR: \
1805 case OP_PLUS: \
1806 case OP_STARI: \
1807 case OP_PLUSI: \
1808 case OP_NOTSTAR: \
1809 case OP_NOTPLUS: \
1810 case OP_NOTSTARI: \
1811 case OP_NOTPLUSI:
1812
1813 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1814 case OP_UPTO: \
1815 case OP_MINUPTO: \
1816 case OP_UPTOI: \
1817 case OP_MINUPTOI: \
1818 case OP_NOTUPTO: \
1819 case OP_NOTMINUPTO: \
1820 case OP_NOTUPTOI: \
1821 case OP_NOTMINUPTOI:
1822
1823 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1824 case OP_TYPEMINSTAR: \
1825 case OP_TYPEMINPLUS: \
1826 case OP_TYPEQUERY: \
1827 case OP_TYPEMINQUERY:
1828
1829 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1830 case OP_TYPESTAR: \
1831 case OP_TYPEPLUS:
1832
1833 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1834 case OP_TYPEUPTO: \
1835 case OP_TYPEMINUPTO:
1836
set_private_data_ptrs(compiler_common * common,int * private_data_start,PCRE2_SPTR ccend)1837 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1838 {
1839 PCRE2_SPTR cc = common->start;
1840 PCRE2_SPTR alternative;
1841 PCRE2_SPTR end = NULL;
1842 int private_data_ptr = *private_data_start;
1843 int space, size, bracketlen;
1844 BOOL repeat_check = TRUE;
1845
1846 while (cc < ccend)
1847 {
1848 space = 0;
1849 size = 0;
1850 bracketlen = 0;
1851 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1852 break;
1853
1854 /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1855 if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1856 {
1857 if (detect_repeat(common, cc))
1858 {
1859 /* These brackets are converted to repeats, so no global
1860 based single character repeat is allowed. */
1861 if (cc >= end)
1862 end = bracketend(cc);
1863 }
1864 }
1865 repeat_check = TRUE;
1866
1867 switch(*cc)
1868 {
1869 case OP_KET:
1870 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1871 {
1872 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1873 private_data_ptr += sizeof(sljit_sw);
1874 cc += common->private_data_ptrs[cc + 1 - common->start];
1875 }
1876 cc += 1 + LINK_SIZE;
1877 break;
1878
1879 case OP_ASSERT:
1880 case OP_ASSERT_NOT:
1881 case OP_ASSERTBACK:
1882 case OP_ASSERTBACK_NOT:
1883 case OP_ASSERT_NA:
1884 case OP_ONCE:
1885 case OP_SCRIPT_RUN:
1886 case OP_BRAPOS:
1887 case OP_SBRA:
1888 case OP_SBRAPOS:
1889 case OP_SCOND:
1890 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1891 private_data_ptr += sizeof(sljit_sw);
1892 bracketlen = 1 + LINK_SIZE;
1893 break;
1894
1895 case OP_ASSERTBACK_NA:
1896 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1897 private_data_ptr += sizeof(sljit_sw);
1898
1899 if (find_vreverse(cc))
1900 {
1901 common->private_data_ptrs[cc + 1 - common->start] = 1;
1902 private_data_ptr += sizeof(sljit_sw);
1903 }
1904
1905 bracketlen = 1 + LINK_SIZE;
1906 break;
1907
1908 case OP_CBRAPOS:
1909 case OP_SCBRAPOS:
1910 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1911 private_data_ptr += sizeof(sljit_sw);
1912 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1913 break;
1914
1915 case OP_COND:
1916 /* Might be a hidden SCOND. */
1917 common->private_data_ptrs[cc - common->start] = 0;
1918 alternative = cc + GET(cc, 1);
1919 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1920 {
1921 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1922 private_data_ptr += sizeof(sljit_sw);
1923 }
1924 bracketlen = 1 + LINK_SIZE;
1925 break;
1926
1927 case OP_BRA:
1928 bracketlen = 1 + LINK_SIZE;
1929 break;
1930
1931 case OP_CBRA:
1932 case OP_SCBRA:
1933 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1934 break;
1935
1936 case OP_BRAZERO:
1937 case OP_BRAMINZERO:
1938 case OP_BRAPOSZERO:
1939 size = 1;
1940 repeat_check = FALSE;
1941 break;
1942
1943 CASE_ITERATOR_PRIVATE_DATA_1
1944 size = -2;
1945 space = 1;
1946 break;
1947
1948 CASE_ITERATOR_PRIVATE_DATA_2A
1949 size = -2;
1950 space = 2;
1951 break;
1952
1953 CASE_ITERATOR_PRIVATE_DATA_2B
1954 size = -(2 + IMM2_SIZE);
1955 space = 2;
1956 break;
1957
1958 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1959 size = 1;
1960 space = 1;
1961 break;
1962
1963 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1964 size = 1;
1965 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1966 space = 2;
1967 break;
1968
1969 case OP_TYPEUPTO:
1970 size = 1 + IMM2_SIZE;
1971 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1972 space = 2;
1973 break;
1974
1975 case OP_TYPEMINUPTO:
1976 size = 1 + IMM2_SIZE;
1977 space = 2;
1978 break;
1979
1980 case OP_CLASS:
1981 case OP_NCLASS:
1982 size = 1 + 32 / sizeof(PCRE2_UCHAR);
1983 space = get_class_iterator_size(cc + size);
1984 break;
1985
1986 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1987 case OP_XCLASS:
1988 size = GET(cc, 1);
1989 space = get_class_iterator_size(cc + size);
1990 break;
1991 #endif
1992
1993 default:
1994 cc = next_opcode(common, cc);
1995 SLJIT_ASSERT(cc != NULL);
1996 break;
1997 }
1998
1999 /* Character iterators, which are not inside a repeated bracket,
2000 gets a private slot instead of allocating it on the stack. */
2001 if (space > 0 && cc >= end)
2002 {
2003 common->private_data_ptrs[cc - common->start] = private_data_ptr;
2004 private_data_ptr += sizeof(sljit_sw) * space;
2005 }
2006
2007 if (size != 0)
2008 {
2009 if (size < 0)
2010 {
2011 cc += -size;
2012 #ifdef SUPPORT_UNICODE
2013 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2014 #endif
2015 }
2016 else
2017 cc += size;
2018 }
2019
2020 if (bracketlen > 0)
2021 {
2022 if (cc >= end)
2023 {
2024 end = bracketend(cc);
2025 if (end[-1 - LINK_SIZE] == OP_KET)
2026 end = NULL;
2027 }
2028 cc += bracketlen;
2029 }
2030 }
2031 *private_data_start = private_data_ptr;
2032 }
2033
2034 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL recursive,BOOL * needs_control_head)2035 static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
2036 {
2037 int length = 0;
2038 int possessive = 0;
2039 BOOL stack_restore = FALSE;
2040 BOOL setsom_found = recursive;
2041 BOOL setmark_found = recursive;
2042 /* The last capture is a local variable even for recursions. */
2043 BOOL capture_last_found = FALSE;
2044
2045 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2046 SLJIT_ASSERT(common->control_head_ptr != 0);
2047 *needs_control_head = TRUE;
2048 #else
2049 *needs_control_head = FALSE;
2050 #endif
2051
2052 if (ccend == NULL)
2053 {
2054 ccend = bracketend(cc) - (1 + LINK_SIZE);
2055 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
2056 {
2057 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
2058 /* This is correct regardless of common->capture_last_ptr. */
2059 capture_last_found = TRUE;
2060 }
2061 cc = next_opcode(common, cc);
2062 }
2063
2064 SLJIT_ASSERT(cc != NULL);
2065 while (cc < ccend)
2066 switch(*cc)
2067 {
2068 case OP_SET_SOM:
2069 SLJIT_ASSERT(common->has_set_som);
2070 stack_restore = TRUE;
2071 if (!setsom_found)
2072 {
2073 length += 2;
2074 setsom_found = TRUE;
2075 }
2076 cc += 1;
2077 break;
2078
2079 case OP_MARK:
2080 case OP_COMMIT_ARG:
2081 case OP_PRUNE_ARG:
2082 case OP_THEN_ARG:
2083 SLJIT_ASSERT(common->mark_ptr != 0);
2084 stack_restore = TRUE;
2085 if (!setmark_found)
2086 {
2087 length += 2;
2088 setmark_found = TRUE;
2089 }
2090 if (common->control_head_ptr != 0)
2091 *needs_control_head = TRUE;
2092 cc += 1 + 2 + cc[1];
2093 break;
2094
2095 case OP_RECURSE:
2096 stack_restore = TRUE;
2097 if (common->has_set_som && !setsom_found)
2098 {
2099 length += 2;
2100 setsom_found = TRUE;
2101 }
2102 if (common->mark_ptr != 0 && !setmark_found)
2103 {
2104 length += 2;
2105 setmark_found = TRUE;
2106 }
2107 if (common->capture_last_ptr != 0 && !capture_last_found)
2108 {
2109 length += 2;
2110 capture_last_found = TRUE;
2111 }
2112 cc += 1 + LINK_SIZE;
2113 break;
2114
2115 case OP_CBRA:
2116 case OP_CBRAPOS:
2117 case OP_SCBRA:
2118 case OP_SCBRAPOS:
2119 stack_restore = TRUE;
2120 if (common->capture_last_ptr != 0 && !capture_last_found)
2121 {
2122 length += 2;
2123 capture_last_found = TRUE;
2124 }
2125 length += 3;
2126 cc += 1 + LINK_SIZE + IMM2_SIZE;
2127 break;
2128
2129 case OP_THEN:
2130 stack_restore = TRUE;
2131 if (common->control_head_ptr != 0)
2132 *needs_control_head = TRUE;
2133 cc ++;
2134 break;
2135
2136 default:
2137 stack_restore = TRUE;
2138 /* Fall through. */
2139
2140 case OP_NOT_WORD_BOUNDARY:
2141 case OP_WORD_BOUNDARY:
2142 case OP_NOT_DIGIT:
2143 case OP_DIGIT:
2144 case OP_NOT_WHITESPACE:
2145 case OP_WHITESPACE:
2146 case OP_NOT_WORDCHAR:
2147 case OP_WORDCHAR:
2148 case OP_ANY:
2149 case OP_ALLANY:
2150 case OP_ANYBYTE:
2151 case OP_NOTPROP:
2152 case OP_PROP:
2153 case OP_ANYNL:
2154 case OP_NOT_HSPACE:
2155 case OP_HSPACE:
2156 case OP_NOT_VSPACE:
2157 case OP_VSPACE:
2158 case OP_EXTUNI:
2159 case OP_EODN:
2160 case OP_EOD:
2161 case OP_CIRC:
2162 case OP_CIRCM:
2163 case OP_DOLL:
2164 case OP_DOLLM:
2165 case OP_CHAR:
2166 case OP_CHARI:
2167 case OP_NOT:
2168 case OP_NOTI:
2169
2170 case OP_EXACT:
2171 case OP_POSSTAR:
2172 case OP_POSPLUS:
2173 case OP_POSQUERY:
2174 case OP_POSUPTO:
2175
2176 case OP_EXACTI:
2177 case OP_POSSTARI:
2178 case OP_POSPLUSI:
2179 case OP_POSQUERYI:
2180 case OP_POSUPTOI:
2181
2182 case OP_NOTEXACT:
2183 case OP_NOTPOSSTAR:
2184 case OP_NOTPOSPLUS:
2185 case OP_NOTPOSQUERY:
2186 case OP_NOTPOSUPTO:
2187
2188 case OP_NOTEXACTI:
2189 case OP_NOTPOSSTARI:
2190 case OP_NOTPOSPLUSI:
2191 case OP_NOTPOSQUERYI:
2192 case OP_NOTPOSUPTOI:
2193
2194 case OP_TYPEEXACT:
2195 case OP_TYPEPOSSTAR:
2196 case OP_TYPEPOSPLUS:
2197 case OP_TYPEPOSQUERY:
2198 case OP_TYPEPOSUPTO:
2199
2200 case OP_CLASS:
2201 case OP_NCLASS:
2202 case OP_XCLASS:
2203
2204 case OP_CALLOUT:
2205 case OP_CALLOUT_STR:
2206
2207 case OP_NOT_UCP_WORD_BOUNDARY:
2208 case OP_UCP_WORD_BOUNDARY:
2209
2210 cc = next_opcode(common, cc);
2211 SLJIT_ASSERT(cc != NULL);
2212 break;
2213 }
2214
2215 /* Possessive quantifiers can use a special case. */
2216 if (SLJIT_UNLIKELY(possessive == length))
2217 return stack_restore ? no_frame : no_stack;
2218
2219 if (length > 0)
2220 return length + 1;
2221 return stack_restore ? no_frame : no_stack;
2222 }
2223
init_frame(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int stackpos,int stacktop)2224 static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2225 {
2226 DEFINE_COMPILER;
2227 BOOL setsom_found = FALSE;
2228 BOOL setmark_found = FALSE;
2229 /* The last capture is a local variable even for recursions. */
2230 BOOL capture_last_found = FALSE;
2231 int offset;
2232
2233 /* >= 1 + shortest item size (2) */
2234 SLJIT_UNUSED_ARG(stacktop);
2235 SLJIT_ASSERT(stackpos >= stacktop + 2);
2236
2237 stackpos = STACK(stackpos);
2238 if (ccend == NULL)
2239 {
2240 ccend = bracketend(cc) - (1 + LINK_SIZE);
2241 if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2242 cc = next_opcode(common, cc);
2243 }
2244
2245 SLJIT_ASSERT(cc != NULL);
2246 while (cc < ccend)
2247 switch(*cc)
2248 {
2249 case OP_SET_SOM:
2250 SLJIT_ASSERT(common->has_set_som);
2251 if (!setsom_found)
2252 {
2253 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2254 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2255 stackpos -= SSIZE_OF(sw);
2256 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2257 stackpos -= SSIZE_OF(sw);
2258 setsom_found = TRUE;
2259 }
2260 cc += 1;
2261 break;
2262
2263 case OP_MARK:
2264 case OP_COMMIT_ARG:
2265 case OP_PRUNE_ARG:
2266 case OP_THEN_ARG:
2267 SLJIT_ASSERT(common->mark_ptr != 0);
2268 if (!setmark_found)
2269 {
2270 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2271 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2272 stackpos -= SSIZE_OF(sw);
2273 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2274 stackpos -= SSIZE_OF(sw);
2275 setmark_found = TRUE;
2276 }
2277 cc += 1 + 2 + cc[1];
2278 break;
2279
2280 case OP_RECURSE:
2281 if (common->has_set_som && !setsom_found)
2282 {
2283 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2284 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2285 stackpos -= SSIZE_OF(sw);
2286 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2287 stackpos -= SSIZE_OF(sw);
2288 setsom_found = TRUE;
2289 }
2290 if (common->mark_ptr != 0 && !setmark_found)
2291 {
2292 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2293 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2294 stackpos -= SSIZE_OF(sw);
2295 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2296 stackpos -= SSIZE_OF(sw);
2297 setmark_found = TRUE;
2298 }
2299 if (common->capture_last_ptr != 0 && !capture_last_found)
2300 {
2301 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2302 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2303 stackpos -= SSIZE_OF(sw);
2304 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2305 stackpos -= SSIZE_OF(sw);
2306 capture_last_found = TRUE;
2307 }
2308 cc += 1 + LINK_SIZE;
2309 break;
2310
2311 case OP_CBRA:
2312 case OP_CBRAPOS:
2313 case OP_SCBRA:
2314 case OP_SCBRAPOS:
2315 if (common->capture_last_ptr != 0 && !capture_last_found)
2316 {
2317 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2318 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2319 stackpos -= SSIZE_OF(sw);
2320 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2321 stackpos -= SSIZE_OF(sw);
2322 capture_last_found = TRUE;
2323 }
2324 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2325 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2326 stackpos -= SSIZE_OF(sw);
2327 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2328 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2329 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2330 stackpos -= SSIZE_OF(sw);
2331 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2332 stackpos -= SSIZE_OF(sw);
2333
2334 cc += 1 + LINK_SIZE + IMM2_SIZE;
2335 break;
2336
2337 default:
2338 cc = next_opcode(common, cc);
2339 SLJIT_ASSERT(cc != NULL);
2340 break;
2341 }
2342
2343 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2344 SLJIT_ASSERT(stackpos == STACK(stacktop));
2345 }
2346
2347 #define RECURSE_TMP_REG_COUNT 3
2348
2349 typedef struct delayed_mem_copy_status {
2350 struct sljit_compiler *compiler;
2351 int store_bases[RECURSE_TMP_REG_COUNT];
2352 int store_offsets[RECURSE_TMP_REG_COUNT];
2353 int tmp_regs[RECURSE_TMP_REG_COUNT];
2354 int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2355 int next_tmp_reg;
2356 } delayed_mem_copy_status;
2357
delayed_mem_copy_init(delayed_mem_copy_status * status,compiler_common * common)2358 static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2359 {
2360 int i;
2361
2362 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2363 {
2364 SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2365 SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2366
2367 status->store_bases[i] = -1;
2368 }
2369 status->next_tmp_reg = 0;
2370 status->compiler = common->compiler;
2371 }
2372
delayed_mem_copy_move(delayed_mem_copy_status * status,int load_base,sljit_sw load_offset,int store_base,sljit_sw store_offset)2373 static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2374 int store_base, sljit_sw store_offset)
2375 {
2376 struct sljit_compiler *compiler = status->compiler;
2377 int next_tmp_reg = status->next_tmp_reg;
2378 int tmp_reg = status->tmp_regs[next_tmp_reg];
2379
2380 SLJIT_ASSERT(load_base > 0 && store_base > 0);
2381
2382 if (status->store_bases[next_tmp_reg] == -1)
2383 {
2384 /* Preserve virtual registers. */
2385 if (sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[next_tmp_reg]) < 0)
2386 OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2387 }
2388 else
2389 OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2390
2391 OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2392 status->store_bases[next_tmp_reg] = store_base;
2393 status->store_offsets[next_tmp_reg] = store_offset;
2394
2395 status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2396 }
2397
delayed_mem_copy_finish(delayed_mem_copy_status * status)2398 static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2399 {
2400 struct sljit_compiler *compiler = status->compiler;
2401 int next_tmp_reg = status->next_tmp_reg;
2402 int tmp_reg, saved_tmp_reg, i;
2403
2404 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2405 {
2406 if (status->store_bases[next_tmp_reg] != -1)
2407 {
2408 tmp_reg = status->tmp_regs[next_tmp_reg];
2409 saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2410
2411 OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2412
2413 /* Restore virtual registers. */
2414 if (sljit_get_register_index(SLJIT_GP_REGISTER, saved_tmp_reg) < 0)
2415 OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2416 }
2417
2418 next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2419 }
2420 }
2421
2422 #undef RECURSE_TMP_REG_COUNT
2423
recurse_check_bit(compiler_common * common,sljit_sw bit_index)2424 static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)
2425 {
2426 uint8_t *byte;
2427 uint8_t mask;
2428
2429 SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);
2430
2431 bit_index >>= SLJIT_WORD_SHIFT;
2432
2433 SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size);
2434
2435 mask = 1 << (bit_index & 0x7);
2436 byte = common->recurse_bitset + (bit_index >> 3);
2437
2438 if (*byte & mask)
2439 return FALSE;
2440
2441 *byte |= mask;
2442 return TRUE;
2443 }
2444
2445 enum get_recurse_flags {
2446 recurse_flag_quit_found = (1 << 0),
2447 recurse_flag_accept_found = (1 << 1),
2448 recurse_flag_setsom_found = (1 << 2),
2449 recurse_flag_setmark_found = (1 << 3),
2450 recurse_flag_control_head_found = (1 << 4),
2451 };
2452
get_recurse_data_length(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,uint32_t * result_flags)2453 static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags)
2454 {
2455 int length = 1;
2456 int size, offset;
2457 PCRE2_SPTR alternative;
2458 uint32_t recurse_flags = 0;
2459
2460 memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2461
2462 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2463 SLJIT_ASSERT(common->control_head_ptr != 0);
2464 recurse_flags |= recurse_flag_control_head_found;
2465 #endif
2466
2467 /* Calculate the sum of the private machine words. */
2468 while (cc < ccend)
2469 {
2470 size = 0;
2471 switch(*cc)
2472 {
2473 case OP_SET_SOM:
2474 SLJIT_ASSERT(common->has_set_som);
2475 recurse_flags |= recurse_flag_setsom_found;
2476 cc += 1;
2477 break;
2478
2479 case OP_RECURSE:
2480 if (common->has_set_som)
2481 recurse_flags |= recurse_flag_setsom_found;
2482 if (common->mark_ptr != 0)
2483 recurse_flags |= recurse_flag_setmark_found;
2484 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2485 length++;
2486 cc += 1 + LINK_SIZE;
2487 break;
2488
2489 case OP_KET:
2490 offset = PRIVATE_DATA(cc);
2491 if (offset != 0)
2492 {
2493 if (recurse_check_bit(common, offset))
2494 length++;
2495 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2496 cc += PRIVATE_DATA(cc + 1);
2497 }
2498 cc += 1 + LINK_SIZE;
2499 break;
2500
2501 case OP_ASSERT:
2502 case OP_ASSERT_NOT:
2503 case OP_ASSERTBACK:
2504 case OP_ASSERTBACK_NOT:
2505 case OP_ASSERT_NA:
2506 case OP_ASSERTBACK_NA:
2507 case OP_ONCE:
2508 case OP_SCRIPT_RUN:
2509 case OP_BRAPOS:
2510 case OP_SBRA:
2511 case OP_SBRAPOS:
2512 case OP_SCOND:
2513 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2514 if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2515 length++;
2516 cc += 1 + LINK_SIZE;
2517 break;
2518
2519 case OP_CBRA:
2520 case OP_SCBRA:
2521 offset = GET2(cc, 1 + LINK_SIZE);
2522 if (recurse_check_bit(common, OVECTOR(offset << 1)))
2523 {
2524 SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2525 length += 2;
2526 }
2527 if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset)))
2528 length++;
2529 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2530 length++;
2531 cc += 1 + LINK_SIZE + IMM2_SIZE;
2532 break;
2533
2534 case OP_CBRAPOS:
2535 case OP_SCBRAPOS:
2536 offset = GET2(cc, 1 + LINK_SIZE);
2537 if (recurse_check_bit(common, OVECTOR(offset << 1)))
2538 {
2539 SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2540 length += 2;
2541 }
2542 if (recurse_check_bit(common, OVECTOR_PRIV(offset)))
2543 length++;
2544 if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2545 length++;
2546 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2547 length++;
2548 cc += 1 + LINK_SIZE + IMM2_SIZE;
2549 break;
2550
2551 case OP_COND:
2552 /* Might be a hidden SCOND. */
2553 alternative = cc + GET(cc, 1);
2554 if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))
2555 length++;
2556 cc += 1 + LINK_SIZE;
2557 break;
2558
2559 CASE_ITERATOR_PRIVATE_DATA_1
2560 offset = PRIVATE_DATA(cc);
2561 if (offset != 0 && recurse_check_bit(common, offset))
2562 length++;
2563 cc += 2;
2564 #ifdef SUPPORT_UNICODE
2565 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2566 #endif
2567 break;
2568
2569 CASE_ITERATOR_PRIVATE_DATA_2A
2570 offset = PRIVATE_DATA(cc);
2571 if (offset != 0 && recurse_check_bit(common, offset))
2572 {
2573 SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2574 length += 2;
2575 }
2576 cc += 2;
2577 #ifdef SUPPORT_UNICODE
2578 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2579 #endif
2580 break;
2581
2582 CASE_ITERATOR_PRIVATE_DATA_2B
2583 offset = PRIVATE_DATA(cc);
2584 if (offset != 0 && recurse_check_bit(common, offset))
2585 {
2586 SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2587 length += 2;
2588 }
2589 cc += 2 + IMM2_SIZE;
2590 #ifdef SUPPORT_UNICODE
2591 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2592 #endif
2593 break;
2594
2595 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2596 offset = PRIVATE_DATA(cc);
2597 if (offset != 0 && recurse_check_bit(common, offset))
2598 length++;
2599 cc += 1;
2600 break;
2601
2602 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2603 offset = PRIVATE_DATA(cc);
2604 if (offset != 0 && recurse_check_bit(common, offset))
2605 {
2606 SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2607 length += 2;
2608 }
2609 cc += 1;
2610 break;
2611
2612 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2613 offset = PRIVATE_DATA(cc);
2614 if (offset != 0 && recurse_check_bit(common, offset))
2615 {
2616 SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2617 length += 2;
2618 }
2619 cc += 1 + IMM2_SIZE;
2620 break;
2621
2622 case OP_CLASS:
2623 case OP_NCLASS:
2624 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2625 case OP_XCLASS:
2626 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2627 #else
2628 size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2629 #endif
2630
2631 offset = PRIVATE_DATA(cc);
2632 if (offset != 0 && recurse_check_bit(common, offset))
2633 length += get_class_iterator_size(cc + size);
2634 cc += size;
2635 break;
2636
2637 case OP_MARK:
2638 case OP_COMMIT_ARG:
2639 case OP_PRUNE_ARG:
2640 case OP_THEN_ARG:
2641 SLJIT_ASSERT(common->mark_ptr != 0);
2642 recurse_flags |= recurse_flag_setmark_found;
2643 if (common->control_head_ptr != 0)
2644 recurse_flags |= recurse_flag_control_head_found;
2645 if (*cc != OP_MARK)
2646 recurse_flags |= recurse_flag_quit_found;
2647
2648 cc += 1 + 2 + cc[1];
2649 break;
2650
2651 case OP_PRUNE:
2652 case OP_SKIP:
2653 case OP_COMMIT:
2654 recurse_flags |= recurse_flag_quit_found;
2655 cc++;
2656 break;
2657
2658 case OP_SKIP_ARG:
2659 recurse_flags |= recurse_flag_quit_found;
2660 cc += 1 + 2 + cc[1];
2661 break;
2662
2663 case OP_THEN:
2664 SLJIT_ASSERT(common->control_head_ptr != 0);
2665 recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found;
2666 cc++;
2667 break;
2668
2669 case OP_ACCEPT:
2670 case OP_ASSERT_ACCEPT:
2671 recurse_flags |= recurse_flag_accept_found;
2672 cc++;
2673 break;
2674
2675 default:
2676 cc = next_opcode(common, cc);
2677 SLJIT_ASSERT(cc != NULL);
2678 break;
2679 }
2680 }
2681 SLJIT_ASSERT(cc == ccend);
2682
2683 if (recurse_flags & recurse_flag_control_head_found)
2684 length++;
2685 if (recurse_flags & recurse_flag_quit_found)
2686 {
2687 if (recurse_flags & recurse_flag_setsom_found)
2688 length++;
2689 if (recurse_flags & recurse_flag_setmark_found)
2690 length++;
2691 }
2692
2693 *result_flags = recurse_flags;
2694 return length;
2695 }
2696
2697 enum copy_recurse_data_types {
2698 recurse_copy_from_global,
2699 recurse_copy_private_to_global,
2700 recurse_copy_shared_to_global,
2701 recurse_copy_kept_shared_to_global,
2702 recurse_swap_global
2703 };
2704
copy_recurse_data(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int type,int stackptr,int stacktop,uint32_t recurse_flags)2705 static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2706 int type, int stackptr, int stacktop, uint32_t recurse_flags)
2707 {
2708 delayed_mem_copy_status status;
2709 PCRE2_SPTR alternative;
2710 sljit_sw private_srcw[2];
2711 sljit_sw shared_srcw[3];
2712 sljit_sw kept_shared_srcw[2];
2713 int private_count, shared_count, kept_shared_count;
2714 int from_sp, base_reg, offset, i;
2715
2716 memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2717
2718 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2719 SLJIT_ASSERT(common->control_head_ptr != 0);
2720 recurse_check_bit(common, common->control_head_ptr);
2721 #endif
2722
2723 switch (type)
2724 {
2725 case recurse_copy_from_global:
2726 from_sp = TRUE;
2727 base_reg = STACK_TOP;
2728 break;
2729
2730 case recurse_copy_private_to_global:
2731 case recurse_copy_shared_to_global:
2732 case recurse_copy_kept_shared_to_global:
2733 from_sp = FALSE;
2734 base_reg = STACK_TOP;
2735 break;
2736
2737 default:
2738 SLJIT_ASSERT(type == recurse_swap_global);
2739 from_sp = FALSE;
2740 base_reg = TMP2;
2741 break;
2742 }
2743
2744 stackptr = STACK(stackptr);
2745 stacktop = STACK(stacktop);
2746
2747 status.tmp_regs[0] = TMP1;
2748 status.saved_tmp_regs[0] = TMP1;
2749
2750 if (base_reg != TMP2)
2751 {
2752 status.tmp_regs[1] = TMP2;
2753 status.saved_tmp_regs[1] = TMP2;
2754 }
2755 else
2756 {
2757 status.saved_tmp_regs[1] = RETURN_ADDR;
2758 if (HAS_VIRTUAL_REGISTERS)
2759 status.tmp_regs[1] = STR_PTR;
2760 else
2761 status.tmp_regs[1] = RETURN_ADDR;
2762 }
2763
2764 status.saved_tmp_regs[2] = TMP3;
2765 if (HAS_VIRTUAL_REGISTERS)
2766 status.tmp_regs[2] = STR_END;
2767 else
2768 status.tmp_regs[2] = TMP3;
2769
2770 delayed_mem_copy_init(&status, common);
2771
2772 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2773 {
2774 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2775
2776 if (!from_sp)
2777 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2778
2779 if (from_sp || type == recurse_swap_global)
2780 delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2781 }
2782
2783 stackptr += sizeof(sljit_sw);
2784
2785 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2786 if (type != recurse_copy_shared_to_global)
2787 {
2788 if (!from_sp)
2789 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2790
2791 if (from_sp || type == recurse_swap_global)
2792 delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2793 }
2794
2795 stackptr += sizeof(sljit_sw);
2796 #endif
2797
2798 while (cc < ccend)
2799 {
2800 private_count = 0;
2801 shared_count = 0;
2802 kept_shared_count = 0;
2803
2804 switch(*cc)
2805 {
2806 case OP_SET_SOM:
2807 SLJIT_ASSERT(common->has_set_som);
2808 if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0)))
2809 {
2810 kept_shared_srcw[0] = OVECTOR(0);
2811 kept_shared_count = 1;
2812 }
2813 cc += 1;
2814 break;
2815
2816 case OP_RECURSE:
2817 if (recurse_flags & recurse_flag_quit_found)
2818 {
2819 if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))
2820 {
2821 kept_shared_srcw[0] = OVECTOR(0);
2822 kept_shared_count = 1;
2823 }
2824 if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))
2825 {
2826 kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2827 kept_shared_count++;
2828 }
2829 }
2830 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2831 {
2832 shared_srcw[0] = common->capture_last_ptr;
2833 shared_count = 1;
2834 }
2835 cc += 1 + LINK_SIZE;
2836 break;
2837
2838 case OP_KET:
2839 private_srcw[0] = PRIVATE_DATA(cc);
2840 if (private_srcw[0] != 0)
2841 {
2842 if (recurse_check_bit(common, private_srcw[0]))
2843 private_count = 1;
2844 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2845 cc += PRIVATE_DATA(cc + 1);
2846 }
2847 cc += 1 + LINK_SIZE;
2848 break;
2849
2850 case OP_ASSERT:
2851 case OP_ASSERT_NOT:
2852 case OP_ASSERTBACK:
2853 case OP_ASSERTBACK_NOT:
2854 case OP_ASSERT_NA:
2855 case OP_ASSERTBACK_NA:
2856 case OP_ONCE:
2857 case OP_SCRIPT_RUN:
2858 case OP_BRAPOS:
2859 case OP_SBRA:
2860 case OP_SBRAPOS:
2861 case OP_SCOND:
2862 private_srcw[0] = PRIVATE_DATA(cc);
2863 if (recurse_check_bit(common, private_srcw[0]))
2864 private_count = 1;
2865 cc += 1 + LINK_SIZE;
2866 break;
2867
2868 case OP_CBRA:
2869 case OP_SCBRA:
2870 offset = GET2(cc, 1 + LINK_SIZE);
2871 shared_srcw[0] = OVECTOR(offset << 1);
2872 if (recurse_check_bit(common, shared_srcw[0]))
2873 {
2874 shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2875 SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2876 shared_count = 2;
2877 }
2878
2879 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2880 {
2881 shared_srcw[shared_count] = common->capture_last_ptr;
2882 shared_count++;
2883 }
2884
2885 if (common->optimized_cbracket[offset] == 0)
2886 {
2887 private_srcw[0] = OVECTOR_PRIV(offset);
2888 if (recurse_check_bit(common, private_srcw[0]))
2889 private_count = 1;
2890 }
2891
2892 cc += 1 + LINK_SIZE + IMM2_SIZE;
2893 break;
2894
2895 case OP_CBRAPOS:
2896 case OP_SCBRAPOS:
2897 offset = GET2(cc, 1 + LINK_SIZE);
2898 shared_srcw[0] = OVECTOR(offset << 1);
2899 if (recurse_check_bit(common, shared_srcw[0]))
2900 {
2901 shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2902 SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2903 shared_count = 2;
2904 }
2905
2906 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2907 {
2908 shared_srcw[shared_count] = common->capture_last_ptr;
2909 shared_count++;
2910 }
2911
2912 private_srcw[0] = PRIVATE_DATA(cc);
2913 if (recurse_check_bit(common, private_srcw[0]))
2914 private_count = 1;
2915
2916 offset = OVECTOR_PRIV(offset);
2917 if (recurse_check_bit(common, offset))
2918 {
2919 private_srcw[private_count] = offset;
2920 private_count++;
2921 }
2922 cc += 1 + LINK_SIZE + IMM2_SIZE;
2923 break;
2924
2925 case OP_COND:
2926 /* Might be a hidden SCOND. */
2927 alternative = cc + GET(cc, 1);
2928 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2929 {
2930 private_srcw[0] = PRIVATE_DATA(cc);
2931 if (recurse_check_bit(common, private_srcw[0]))
2932 private_count = 1;
2933 }
2934 cc += 1 + LINK_SIZE;
2935 break;
2936
2937 CASE_ITERATOR_PRIVATE_DATA_1
2938 private_srcw[0] = PRIVATE_DATA(cc);
2939 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2940 private_count = 1;
2941 cc += 2;
2942 #ifdef SUPPORT_UNICODE
2943 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2944 #endif
2945 break;
2946
2947 CASE_ITERATOR_PRIVATE_DATA_2A
2948 private_srcw[0] = PRIVATE_DATA(cc);
2949 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2950 {
2951 private_count = 2;
2952 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2953 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2954 }
2955 cc += 2;
2956 #ifdef SUPPORT_UNICODE
2957 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2958 #endif
2959 break;
2960
2961 CASE_ITERATOR_PRIVATE_DATA_2B
2962 private_srcw[0] = PRIVATE_DATA(cc);
2963 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2964 {
2965 private_count = 2;
2966 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2967 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2968 }
2969 cc += 2 + IMM2_SIZE;
2970 #ifdef SUPPORT_UNICODE
2971 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2972 #endif
2973 break;
2974
2975 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2976 private_srcw[0] = PRIVATE_DATA(cc);
2977 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2978 private_count = 1;
2979 cc += 1;
2980 break;
2981
2982 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2983 private_srcw[0] = PRIVATE_DATA(cc);
2984 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2985 {
2986 private_count = 2;
2987 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2988 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2989 }
2990 cc += 1;
2991 break;
2992
2993 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2994 private_srcw[0] = PRIVATE_DATA(cc);
2995 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2996 {
2997 private_count = 2;
2998 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2999 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3000 }
3001 cc += 1 + IMM2_SIZE;
3002 break;
3003
3004 case OP_CLASS:
3005 case OP_NCLASS:
3006 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
3007 case OP_XCLASS:
3008 i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3009 #else
3010 i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3011 #endif
3012 if (PRIVATE_DATA(cc) != 0)
3013 {
3014 private_count = 1;
3015 private_srcw[0] = PRIVATE_DATA(cc);
3016 switch(get_class_iterator_size(cc + i))
3017 {
3018 case 1:
3019 break;
3020
3021 case 2:
3022 if (recurse_check_bit(common, private_srcw[0]))
3023 {
3024 private_count = 2;
3025 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3026 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3027 }
3028 break;
3029
3030 default:
3031 SLJIT_UNREACHABLE();
3032 break;
3033 }
3034 }
3035 cc += i;
3036 break;
3037
3038 case OP_MARK:
3039 case OP_COMMIT_ARG:
3040 case OP_PRUNE_ARG:
3041 case OP_THEN_ARG:
3042 SLJIT_ASSERT(common->mark_ptr != 0);
3043 if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr))
3044 {
3045 kept_shared_srcw[0] = common->mark_ptr;
3046 kept_shared_count = 1;
3047 }
3048 if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))
3049 {
3050 private_srcw[0] = common->control_head_ptr;
3051 private_count = 1;
3052 }
3053 cc += 1 + 2 + cc[1];
3054 break;
3055
3056 case OP_THEN:
3057 SLJIT_ASSERT(common->control_head_ptr != 0);
3058 if (recurse_check_bit(common, common->control_head_ptr))
3059 {
3060 private_srcw[0] = common->control_head_ptr;
3061 private_count = 1;
3062 }
3063 cc++;
3064 break;
3065
3066 default:
3067 cc = next_opcode(common, cc);
3068 SLJIT_ASSERT(cc != NULL);
3069 continue;
3070 }
3071
3072 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
3073 {
3074 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
3075
3076 for (i = 0; i < private_count; i++)
3077 {
3078 SLJIT_ASSERT(private_srcw[i] != 0);
3079
3080 if (!from_sp)
3081 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
3082
3083 if (from_sp || type == recurse_swap_global)
3084 delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
3085
3086 stackptr += sizeof(sljit_sw);
3087 }
3088 }
3089 else
3090 stackptr += sizeof(sljit_sw) * private_count;
3091
3092 if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
3093 {
3094 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
3095
3096 for (i = 0; i < shared_count; i++)
3097 {
3098 SLJIT_ASSERT(shared_srcw[i] != 0);
3099
3100 if (!from_sp)
3101 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
3102
3103 if (from_sp || type == recurse_swap_global)
3104 delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
3105
3106 stackptr += sizeof(sljit_sw);
3107 }
3108 }
3109 else
3110 stackptr += sizeof(sljit_sw) * shared_count;
3111
3112 if (type != recurse_copy_private_to_global && type != recurse_swap_global)
3113 {
3114 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
3115
3116 for (i = 0; i < kept_shared_count; i++)
3117 {
3118 SLJIT_ASSERT(kept_shared_srcw[i] != 0);
3119
3120 if (!from_sp)
3121 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
3122
3123 if (from_sp || type == recurse_swap_global)
3124 delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
3125
3126 stackptr += sizeof(sljit_sw);
3127 }
3128 }
3129 else
3130 stackptr += sizeof(sljit_sw) * kept_shared_count;
3131 }
3132
3133 SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
3134
3135 delayed_mem_copy_finish(&status);
3136 }
3137
set_then_offsets(compiler_common * common,PCRE2_SPTR cc,sljit_u8 * current_offset)3138 static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
3139 {
3140 PCRE2_SPTR end = bracketend(cc);
3141 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
3142
3143 /* Assert captures then. */
3144 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
3145 current_offset = NULL;
3146 /* Conditional block does not. */
3147 if (*cc == OP_COND || *cc == OP_SCOND)
3148 has_alternatives = FALSE;
3149
3150 cc = next_opcode(common, cc);
3151
3152 if (has_alternatives)
3153 {
3154 if (*cc == OP_REVERSE)
3155 cc += 1 + IMM2_SIZE;
3156 else if (*cc == OP_VREVERSE)
3157 cc += 1 + 2 * IMM2_SIZE;
3158
3159 current_offset = common->then_offsets + (cc - common->start);
3160 }
3161
3162 while (cc < end)
3163 {
3164 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
3165 cc = set_then_offsets(common, cc, current_offset);
3166 else
3167 {
3168 if (*cc == OP_ALT && has_alternatives)
3169 {
3170 cc += 1 + LINK_SIZE;
3171
3172 if (*cc == OP_REVERSE)
3173 cc += 1 + IMM2_SIZE;
3174 else if (*cc == OP_VREVERSE)
3175 cc += 1 + 2 * IMM2_SIZE;
3176
3177 current_offset = common->then_offsets + (cc - common->start);
3178 continue;
3179 }
3180
3181 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
3182 *current_offset = 1;
3183 cc = next_opcode(common, cc);
3184 }
3185 }
3186
3187 return end;
3188 }
3189
3190 #undef CASE_ITERATOR_PRIVATE_DATA_1
3191 #undef CASE_ITERATOR_PRIVATE_DATA_2A
3192 #undef CASE_ITERATOR_PRIVATE_DATA_2B
3193 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3194 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3195 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3196
is_powerof2(unsigned int value)3197 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3198 {
3199 return (value & (value - 1)) == 0;
3200 }
3201
set_jumps(jump_list * list,struct sljit_label * label)3202 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3203 {
3204 while (list != NULL)
3205 {
3206 /* sljit_set_label is clever enough to do nothing
3207 if either the jump or the label is NULL. */
3208 SET_LABEL(list->jump, label);
3209 list = list->next;
3210 }
3211 }
3212
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)3213 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3214 {
3215 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3216 if (list_item)
3217 {
3218 list_item->next = *list;
3219 list_item->jump = jump;
3220 *list = list_item;
3221 }
3222 }
3223
add_stub(compiler_common * common,struct sljit_jump * start)3224 static void add_stub(compiler_common *common, struct sljit_jump *start)
3225 {
3226 DEFINE_COMPILER;
3227 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3228
3229 if (list_item)
3230 {
3231 list_item->start = start;
3232 list_item->quit = LABEL();
3233 list_item->next = common->stubs;
3234 common->stubs = list_item;
3235 }
3236 }
3237
flush_stubs(compiler_common * common)3238 static void flush_stubs(compiler_common *common)
3239 {
3240 DEFINE_COMPILER;
3241 stub_list *list_item = common->stubs;
3242
3243 while (list_item)
3244 {
3245 JUMPHERE(list_item->start);
3246 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3247 JUMPTO(SLJIT_JUMP, list_item->quit);
3248 list_item = list_item->next;
3249 }
3250 common->stubs = NULL;
3251 }
3252
count_match(compiler_common * common)3253 static SLJIT_INLINE void count_match(compiler_common *common)
3254 {
3255 DEFINE_COMPILER;
3256
3257 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3258 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3259 }
3260
allocate_stack(compiler_common * common,int size)3261 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3262 {
3263 /* May destroy all locals and registers except TMP2. */
3264 DEFINE_COMPILER;
3265
3266 SLJIT_ASSERT(size > 0);
3267 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3268 #ifdef DESTROY_REGISTERS
3269 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3270 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3271 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3272 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3273 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3274 #endif
3275 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3276 }
3277
free_stack(compiler_common * common,int size)3278 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3279 {
3280 DEFINE_COMPILER;
3281
3282 SLJIT_ASSERT(size > 0);
3283 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3284 }
3285
allocate_read_only_data(compiler_common * common,sljit_uw size)3286 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3287 {
3288 DEFINE_COMPILER;
3289 sljit_uw *result;
3290
3291 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3292 return NULL;
3293
3294 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3295 if (SLJIT_UNLIKELY(result == NULL))
3296 {
3297 sljit_set_compiler_memory_error(compiler);
3298 return NULL;
3299 }
3300
3301 *(void**)result = common->read_only_data_head;
3302 common->read_only_data_head = (void *)result;
3303 return result + 1;
3304 }
3305
reset_ovector(compiler_common * common,int length)3306 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3307 {
3308 DEFINE_COMPILER;
3309 struct sljit_label *loop;
3310 sljit_s32 i;
3311
3312 /* At this point we can freely use all temporary registers. */
3313 SLJIT_ASSERT(length > 1);
3314 /* TMP1 returns with begin - 1. */
3315 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3316 if (length < 8)
3317 {
3318 for (i = 1; i < length; i++)
3319 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3320 }
3321 else
3322 {
3323 if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3324 {
3325 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3326 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3327 loop = LABEL();
3328 sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3329 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3330 JUMPTO(SLJIT_NOT_ZERO, loop);
3331 }
3332 else
3333 {
3334 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3335 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3336 loop = LABEL();
3337 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3338 OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3339 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3340 JUMPTO(SLJIT_NOT_ZERO, loop);
3341 }
3342 }
3343 }
3344
reset_early_fail(compiler_common * common)3345 static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3346 {
3347 DEFINE_COMPILER;
3348 sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3349 sljit_u32 uncleared_size;
3350 sljit_s32 src = SLJIT_IMM;
3351 sljit_s32 i;
3352 struct sljit_label *loop;
3353
3354 SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3355
3356 if (size == sizeof(sljit_sw))
3357 {
3358 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3359 return;
3360 }
3361
3362 if (sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3363 {
3364 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3365 src = TMP3;
3366 }
3367
3368 if (size <= 6 * sizeof(sljit_sw))
3369 {
3370 for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3371 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3372 return;
3373 }
3374
3375 GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3376
3377 uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3378
3379 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3380
3381 loop = LABEL();
3382 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3383 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3384 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0);
3385 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0);
3386 CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3387
3388 if (uncleared_size >= sizeof(sljit_sw))
3389 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3390
3391 if (uncleared_size >= 2 * sizeof(sljit_sw))
3392 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3393 }
3394
do_reset_match(compiler_common * common,int length)3395 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3396 {
3397 DEFINE_COMPILER;
3398 struct sljit_label *loop;
3399 int i;
3400
3401 SLJIT_ASSERT(length > 1);
3402 /* OVECTOR(1) contains the "string begin - 1" constant. */
3403 if (length > 2)
3404 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3405 if (length < 8)
3406 {
3407 for (i = 2; i < length; i++)
3408 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3409 }
3410 else
3411 {
3412 if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3413 {
3414 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3415 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3416 loop = LABEL();
3417 sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3418 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3419 JUMPTO(SLJIT_NOT_ZERO, loop);
3420 }
3421 else
3422 {
3423 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3424 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3425 loop = LABEL();
3426 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3427 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3428 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3429 JUMPTO(SLJIT_NOT_ZERO, loop);
3430 }
3431 }
3432
3433 if (!HAS_VIRTUAL_REGISTERS)
3434 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3435 else
3436 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3437
3438 if (common->mark_ptr != 0)
3439 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3440 if (common->control_head_ptr != 0)
3441 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3442 if (HAS_VIRTUAL_REGISTERS)
3443 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3444
3445 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3446 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3447 }
3448
do_search_mark(sljit_sw * current,PCRE2_SPTR skip_arg)3449 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3450 {
3451 while (current != NULL)
3452 {
3453 switch (current[1])
3454 {
3455 case type_then_trap:
3456 break;
3457
3458 case type_mark:
3459 if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3460 return current[3];
3461 break;
3462
3463 default:
3464 SLJIT_UNREACHABLE();
3465 break;
3466 }
3467 SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3468 current = (sljit_sw*)current[0];
3469 }
3470 return 0;
3471 }
3472
copy_ovector(compiler_common * common,int topbracket)3473 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3474 {
3475 DEFINE_COMPILER;
3476 struct sljit_label *loop;
3477 BOOL has_pre;
3478
3479 /* At this point we can freely use all registers. */
3480 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3481 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3482
3483 if (HAS_VIRTUAL_REGISTERS)
3484 {
3485 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3486 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3487 if (common->mark_ptr != 0)
3488 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3489 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3490 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3491 if (common->mark_ptr != 0)
3492 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3493 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3494 SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3495 }
3496 else
3497 {
3498 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3499 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3500 if (common->mark_ptr != 0)
3501 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3502 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3503 OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3504 if (common->mark_ptr != 0)
3505 OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3506 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3507 }
3508
3509 has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3510
3511 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3512 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3513
3514 loop = LABEL();
3515
3516 if (has_pre)
3517 sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3518 else
3519 {
3520 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3521 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3522 }
3523
3524 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3525 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3526 /* Copy the integer value to the output buffer */
3527 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3528 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3529 #endif
3530
3531 SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3532 OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3533
3534 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3535 JUMPTO(SLJIT_NOT_ZERO, loop);
3536
3537 /* Calculate the return value, which is the maximum ovector value. */
3538 if (topbracket > 1)
3539 {
3540 if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS)
3541 {
3542 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3543 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3544
3545 /* OVECTOR(0) is never equal to SLJIT_S2. */
3546 loop = LABEL();
3547 sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw)));
3548 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3549 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3550 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3551 }
3552 else
3553 {
3554 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3555 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3556
3557 /* OVECTOR(0) is never equal to SLJIT_S2. */
3558 loop = LABEL();
3559 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3560 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
3561 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3562 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3563 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3564 }
3565 }
3566 else
3567 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3568 }
3569
return_with_partial_match(compiler_common * common,struct sljit_label * quit)3570 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3571 {
3572 DEFINE_COMPILER;
3573 sljit_s32 mov_opcode;
3574 sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3575
3576 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3577 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3578 && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3579
3580 if (arguments_reg != ARGUMENTS)
3581 OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3582 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3583 common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3584 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3585
3586 /* Store match begin and end. */
3587 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3588 OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3589 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3590
3591 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3592
3593 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3594 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3595 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3596 #endif
3597 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3598
3599 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3600 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3601 OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3602 #endif
3603 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3604
3605 JUMPTO(SLJIT_JUMP, quit);
3606 }
3607
check_start_used_ptr(compiler_common * common)3608 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3609 {
3610 /* May destroy TMP1. */
3611 DEFINE_COMPILER;
3612 struct sljit_jump *jump;
3613
3614 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3615 {
3616 /* The value of -1 must be kept for start_used_ptr! */
3617 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3618 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3619 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3620 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3621 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3622 JUMPHERE(jump);
3623 }
3624 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3625 {
3626 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3627 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3628 JUMPHERE(jump);
3629 }
3630 }
3631
char_has_othercase(compiler_common * common,PCRE2_SPTR cc)3632 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3633 {
3634 /* Detects if the character has an othercase. */
3635 unsigned int c;
3636
3637 #ifdef SUPPORT_UNICODE
3638 if (common->utf || common->ucp)
3639 {
3640 if (common->utf)
3641 {
3642 GETCHAR(c, cc);
3643 }
3644 else
3645 c = *cc;
3646
3647 if (c > 127)
3648 return c != UCD_OTHERCASE(c);
3649
3650 return common->fcc[c] != c;
3651 }
3652 else
3653 #endif
3654 c = *cc;
3655 return MAX_255(c) ? common->fcc[c] != c : FALSE;
3656 }
3657
char_othercase(compiler_common * common,unsigned int c)3658 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3659 {
3660 /* Returns with the othercase. */
3661 #ifdef SUPPORT_UNICODE
3662 if ((common->utf || common->ucp) && c > 127)
3663 return UCD_OTHERCASE(c);
3664 #endif
3665 return TABLE_GET(c, common->fcc, c);
3666 }
3667
char_get_othercase_bit(compiler_common * common,PCRE2_SPTR cc)3668 static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3669 {
3670 /* Detects if the character and its othercase has only 1 bit difference. */
3671 unsigned int c, oc, bit;
3672 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3673 int n;
3674 #endif
3675
3676 #ifdef SUPPORT_UNICODE
3677 if (common->utf || common->ucp)
3678 {
3679 if (common->utf)
3680 {
3681 GETCHAR(c, cc);
3682 }
3683 else
3684 c = *cc;
3685
3686 if (c <= 127)
3687 oc = common->fcc[c];
3688 else
3689 oc = UCD_OTHERCASE(c);
3690 }
3691 else
3692 {
3693 c = *cc;
3694 oc = TABLE_GET(c, common->fcc, c);
3695 }
3696 #else
3697 c = *cc;
3698 oc = TABLE_GET(c, common->fcc, c);
3699 #endif
3700
3701 SLJIT_ASSERT(c != oc);
3702
3703 bit = c ^ oc;
3704 /* Optimized for English alphabet. */
3705 if (c <= 127 && bit == 0x20)
3706 return (0 << 8) | 0x20;
3707
3708 /* Since c != oc, they must have at least 1 bit difference. */
3709 if (!is_powerof2(bit))
3710 return 0;
3711
3712 #if PCRE2_CODE_UNIT_WIDTH == 8
3713
3714 #ifdef SUPPORT_UNICODE
3715 if (common->utf && c > 127)
3716 {
3717 n = GET_EXTRALEN(*cc);
3718 while ((bit & 0x3f) == 0)
3719 {
3720 n--;
3721 bit >>= 6;
3722 }
3723 return (n << 8) | bit;
3724 }
3725 #endif /* SUPPORT_UNICODE */
3726 return (0 << 8) | bit;
3727
3728 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3729
3730 #ifdef SUPPORT_UNICODE
3731 if (common->utf && c > 65535)
3732 {
3733 if (bit >= (1u << 10))
3734 bit >>= 10;
3735 else
3736 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3737 }
3738 #endif /* SUPPORT_UNICODE */
3739 return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3740
3741 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3742 }
3743
check_partial(compiler_common * common,BOOL force)3744 static void check_partial(compiler_common *common, BOOL force)
3745 {
3746 /* Checks whether a partial matching is occurred. Does not modify registers. */
3747 DEFINE_COMPILER;
3748 struct sljit_jump *jump = NULL;
3749
3750 SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3751
3752 if (common->mode == PCRE2_JIT_COMPLETE)
3753 return;
3754
3755 if (!force && !common->allow_empty_partial)
3756 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3757 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3758 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3759
3760 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3761 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3762 else
3763 {
3764 if (common->partialmatchlabel != NULL)
3765 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3766 else
3767 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3768 }
3769
3770 if (jump != NULL)
3771 JUMPHERE(jump);
3772 }
3773
check_str_end(compiler_common * common,jump_list ** end_reached)3774 static void check_str_end(compiler_common *common, jump_list **end_reached)
3775 {
3776 /* Does not affect registers. Usually used in a tight spot. */
3777 DEFINE_COMPILER;
3778 struct sljit_jump *jump;
3779
3780 if (common->mode == PCRE2_JIT_COMPLETE)
3781 {
3782 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3783 return;
3784 }
3785
3786 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3787 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3788 {
3789 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3790 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3791 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3792 }
3793 else
3794 {
3795 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3796 if (common->partialmatchlabel != NULL)
3797 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3798 else
3799 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3800 }
3801 JUMPHERE(jump);
3802 }
3803
detect_partial_match(compiler_common * common,jump_list ** backtracks)3804 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3805 {
3806 DEFINE_COMPILER;
3807 struct sljit_jump *jump;
3808
3809 if (common->mode == PCRE2_JIT_COMPLETE)
3810 {
3811 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3812 return;
3813 }
3814
3815 /* Partial matching mode. */
3816 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3817 if (!common->allow_empty_partial)
3818 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3819 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3820 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3821
3822 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3823 {
3824 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3825 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3826 }
3827 else
3828 {
3829 if (common->partialmatchlabel != NULL)
3830 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3831 else
3832 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3833 }
3834 JUMPHERE(jump);
3835 }
3836
process_partial_match(compiler_common * common)3837 static void process_partial_match(compiler_common *common)
3838 {
3839 DEFINE_COMPILER;
3840 struct sljit_jump *jump;
3841
3842 /* Partial matching mode. */
3843 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3844 {
3845 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3846 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3847 JUMPHERE(jump);
3848 }
3849 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3850 {
3851 if (common->partialmatchlabel != NULL)
3852 CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3853 else
3854 add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3855 }
3856 }
3857
detect_partial_match_to(compiler_common * common,struct sljit_label * label)3858 static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3859 {
3860 DEFINE_COMPILER;
3861
3862 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3863 process_partial_match(common);
3864 }
3865
peek_char(compiler_common * common,sljit_u32 max,sljit_s32 dst,sljit_sw dstw,jump_list ** backtracks)3866 static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3867 {
3868 /* Reads the character into TMP1, keeps STR_PTR.
3869 Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3870 DEFINE_COMPILER;
3871 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3872 struct sljit_jump *jump;
3873 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3874
3875 SLJIT_UNUSED_ARG(max);
3876 SLJIT_UNUSED_ARG(dst);
3877 SLJIT_UNUSED_ARG(dstw);
3878 SLJIT_UNUSED_ARG(backtracks);
3879
3880 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3881
3882 #ifdef SUPPORT_UNICODE
3883 #if PCRE2_CODE_UNIT_WIDTH == 8
3884 if (common->utf)
3885 {
3886 if (max < 128) return;
3887
3888 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3889 OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3890 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3891 add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3892 OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3893 if (backtracks && common->invalid_utf)
3894 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3895 JUMPHERE(jump);
3896 }
3897 #elif PCRE2_CODE_UNIT_WIDTH == 16
3898 if (common->utf)
3899 {
3900 if (max < 0xd800) return;
3901
3902 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3903
3904 if (common->invalid_utf)
3905 {
3906 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3907 OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3908 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3909 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3910 OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3911 if (backtracks && common->invalid_utf)
3912 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3913 }
3914 else
3915 {
3916 /* TMP2 contains the high surrogate. */
3917 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3918 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3919 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3920 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3921 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3922 }
3923
3924 JUMPHERE(jump);
3925 }
3926 #elif PCRE2_CODE_UNIT_WIDTH == 32
3927 if (common->invalid_utf)
3928 {
3929 if (max < 0xd800) return;
3930
3931 if (backtracks != NULL)
3932 {
3933 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3934 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3935 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3936 }
3937 else
3938 {
3939 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3940 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
3941 SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
3942 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3943 SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
3944 }
3945 }
3946 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3947 #endif /* SUPPORT_UNICODE */
3948 }
3949
peek_char_back(compiler_common * common,sljit_u32 max,jump_list ** backtracks)3950 static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3951 {
3952 /* Reads one character back without moving STR_PTR. TMP2 must
3953 contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3954 DEFINE_COMPILER;
3955
3956 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3957 struct sljit_jump *jump;
3958 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3959
3960 SLJIT_UNUSED_ARG(max);
3961 SLJIT_UNUSED_ARG(backtracks);
3962
3963 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3964
3965 #ifdef SUPPORT_UNICODE
3966 #if PCRE2_CODE_UNIT_WIDTH == 8
3967 if (common->utf)
3968 {
3969 if (max < 128) return;
3970
3971 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3972 if (common->invalid_utf)
3973 {
3974 add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3975 if (backtracks != NULL)
3976 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3977 }
3978 else
3979 add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3980 JUMPHERE(jump);
3981 }
3982 #elif PCRE2_CODE_UNIT_WIDTH == 16
3983 if (common->utf)
3984 {
3985 if (max < 0xd800) return;
3986
3987 if (common->invalid_utf)
3988 {
3989 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3990 add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3991 if (backtracks != NULL)
3992 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3993 }
3994 else
3995 {
3996 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3997 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3998 /* TMP2 contains the low surrogate. */
3999 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4000 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
4001 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4002 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
4003 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4004 }
4005 JUMPHERE(jump);
4006 }
4007 #elif PCRE2_CODE_UNIT_WIDTH == 32
4008 if (common->invalid_utf)
4009 {
4010 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4011 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4012 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4013 }
4014 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4015 #endif /* SUPPORT_UNICODE */
4016 }
4017
4018 #define READ_CHAR_UPDATE_STR_PTR 0x1
4019 #define READ_CHAR_UTF8_NEWLINE 0x2
4020 #define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
4021 #define READ_CHAR_VALID_UTF 0x4
4022
read_char(compiler_common * common,sljit_u32 min,sljit_u32 max,jump_list ** backtracks,sljit_u32 options)4023 static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
4024 jump_list **backtracks, sljit_u32 options)
4025 {
4026 /* Reads the precise value of a character into TMP1, if the character is
4027 between min and max (c >= min && c <= max). Otherwise it returns with a value
4028 outside the range. Does not check STR_END. */
4029 DEFINE_COMPILER;
4030 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4031 struct sljit_jump *jump;
4032 #endif
4033 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4034 struct sljit_jump *jump2;
4035 #endif
4036
4037 SLJIT_UNUSED_ARG(min);
4038 SLJIT_UNUSED_ARG(max);
4039 SLJIT_UNUSED_ARG(backtracks);
4040 SLJIT_UNUSED_ARG(options);
4041 SLJIT_ASSERT(min <= max);
4042
4043 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4044 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4045
4046 #ifdef SUPPORT_UNICODE
4047 #if PCRE2_CODE_UNIT_WIDTH == 8
4048 if (common->utf)
4049 {
4050 if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4051
4052 if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4053 {
4054 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4055
4056 if (options & READ_CHAR_UTF8_NEWLINE)
4057 add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4058 else
4059 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4060
4061 if (backtracks != NULL)
4062 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4063 JUMPHERE(jump);
4064 return;
4065 }
4066
4067 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4068 if (min >= 0x10000)
4069 {
4070 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4071 if (options & READ_CHAR_UPDATE_STR_PTR)
4072 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4073 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4074 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
4075 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4076 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4077 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4078 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4079 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4080 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4081 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4082 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4083 if (!(options & READ_CHAR_UPDATE_STR_PTR))
4084 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4085 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4086 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4087 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4088 JUMPHERE(jump2);
4089 if (options & READ_CHAR_UPDATE_STR_PTR)
4090 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4091 }
4092 else if (min >= 0x800 && max <= 0xffff)
4093 {
4094 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4095 if (options & READ_CHAR_UPDATE_STR_PTR)
4096 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4097 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4098 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
4099 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4100 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4101 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4102 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4103 if (!(options & READ_CHAR_UPDATE_STR_PTR))
4104 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4105 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4106 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4107 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4108 JUMPHERE(jump2);
4109 if (options & READ_CHAR_UPDATE_STR_PTR)
4110 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4111 }
4112 else if (max >= 0x800)
4113 {
4114 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
4115 }
4116 else if (max < 128)
4117 {
4118 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4119 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4120 }
4121 else
4122 {
4123 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4124 if (!(options & READ_CHAR_UPDATE_STR_PTR))
4125 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4126 else
4127 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4128 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4129 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4130 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4131 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4132 if (options & READ_CHAR_UPDATE_STR_PTR)
4133 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4134 }
4135 JUMPHERE(jump);
4136 }
4137 #elif PCRE2_CODE_UNIT_WIDTH == 16
4138 if (common->utf)
4139 {
4140 if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4141
4142 if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4143 {
4144 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4145 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4146
4147 if (options & READ_CHAR_UTF8_NEWLINE)
4148 add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4149 else
4150 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4151
4152 if (backtracks != NULL)
4153 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4154 JUMPHERE(jump);
4155 return;
4156 }
4157
4158 if (max >= 0x10000)
4159 {
4160 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4161 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4162 /* TMP2 contains the high surrogate. */
4163 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4164 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4165 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4166 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4167 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4168 JUMPHERE(jump);
4169 return;
4170 }
4171
4172 /* Skip low surrogate if necessary. */
4173 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4174
4175 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4176 {
4177 if (options & READ_CHAR_UPDATE_STR_PTR)
4178 OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4179 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4180 if (options & READ_CHAR_UPDATE_STR_PTR)
4181 SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4182 if (max >= 0xd800)
4183 SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000, TMP1);
4184 }
4185 else
4186 {
4187 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4188 if (options & READ_CHAR_UPDATE_STR_PTR)
4189 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4190 if (max >= 0xd800)
4191 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4192 JUMPHERE(jump);
4193 }
4194 }
4195 #elif PCRE2_CODE_UNIT_WIDTH == 32
4196 if (common->invalid_utf)
4197 {
4198 if (backtracks != NULL)
4199 {
4200 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4201 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4202 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4203 }
4204 else
4205 {
4206 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4207 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
4208 SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4209 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4210 SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4211 }
4212 }
4213 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4214 #endif /* SUPPORT_UNICODE */
4215 }
4216
skip_valid_char(compiler_common * common)4217 static void skip_valid_char(compiler_common *common)
4218 {
4219 DEFINE_COMPILER;
4220 #if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4221 struct sljit_jump *jump;
4222 #endif
4223
4224 #if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4225 if (common->utf)
4226 {
4227 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4228 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4229 #if PCRE2_CODE_UNIT_WIDTH == 8
4230 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4231 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4232 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4233 #elif PCRE2_CODE_UNIT_WIDTH == 16
4234 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4235 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4236 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);
4237 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4238 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4239 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4240 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4241 JUMPHERE(jump);
4242 return;
4243 }
4244 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
4245 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4246 }
4247
4248 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4249
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)4250 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4251 {
4252 /* Tells whether the character codes below 128 are enough
4253 to determine a match. */
4254 const sljit_u8 value = nclass ? 0xff : 0;
4255 const sljit_u8 *end = bitset + 32;
4256
4257 bitset += 16;
4258 do
4259 {
4260 if (*bitset++ != value)
4261 return FALSE;
4262 }
4263 while (bitset < end);
4264 return TRUE;
4265 }
4266
read_char7_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4267 static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4268 {
4269 /* Reads the precise character type of a character into TMP1, if the character
4270 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4271 full_read argument tells whether characters above max are accepted or not. */
4272 DEFINE_COMPILER;
4273 struct sljit_jump *jump;
4274
4275 SLJIT_ASSERT(common->utf);
4276
4277 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4278 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4279
4280 /* All values > 127 are zero in ctypes. */
4281 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4282
4283 if (negated)
4284 {
4285 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4286
4287 if (common->invalid_utf)
4288 {
4289 OP1(SLJIT_MOV, TMP1, 0, TMP2, 0);
4290 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4291 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4292 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4293 }
4294 else
4295 {
4296 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4297 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4298 }
4299 JUMPHERE(jump);
4300 }
4301 }
4302
4303 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4304
read_char8_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4305 static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4306 {
4307 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4308 DEFINE_COMPILER;
4309 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4310 struct sljit_jump *jump;
4311 #endif
4312 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4313 struct sljit_jump *jump2;
4314 #endif
4315
4316 SLJIT_UNUSED_ARG(backtracks);
4317 SLJIT_UNUSED_ARG(negated);
4318
4319 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4320 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4321
4322 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4323 if (common->utf)
4324 {
4325 /* The result of this read may be unused, but saves an "else" part. */
4326 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4327 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4328
4329 if (!negated)
4330 {
4331 if (common->invalid_utf)
4332 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4333
4334 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4335 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4336 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4337 if (common->invalid_utf)
4338 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4339
4340 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4341 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4342 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4343 if (common->invalid_utf)
4344 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4345
4346 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4347 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4348 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4349 JUMPHERE(jump2);
4350 }
4351 else if (common->invalid_utf)
4352 {
4353 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4354 OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4355 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4356
4357 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4358 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4359 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4360 JUMPHERE(jump2);
4361 }
4362 else
4363 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4364
4365 JUMPHERE(jump);
4366 return;
4367 }
4368 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4369
4370 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4371 if (common->invalid_utf && negated)
4372 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4373 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4374
4375 #if PCRE2_CODE_UNIT_WIDTH != 8
4376 /* The ctypes array contains only 256 values. */
4377 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4378 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4379 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4380 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4381 #if PCRE2_CODE_UNIT_WIDTH != 8
4382 JUMPHERE(jump);
4383 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4384
4385 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4386 if (common->utf && negated)
4387 {
4388 /* Skip low surrogate if necessary. */
4389 if (!common->invalid_utf)
4390 {
4391 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4392
4393 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4394 {
4395 OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4396 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4397 SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4398 }
4399 else
4400 {
4401 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4402 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4403 JUMPHERE(jump);
4404 }
4405 return;
4406 }
4407
4408 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4409 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4410 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4411 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4412
4413 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4414 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4415 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4416 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4417
4418 JUMPHERE(jump);
4419 return;
4420 }
4421 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4422 }
4423
move_back(compiler_common * common,jump_list ** backtracks,BOOL must_be_valid)4424 static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4425 {
4426 /* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4427 TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4428 and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4429 DEFINE_COMPILER;
4430
4431 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4432 struct sljit_jump *jump;
4433 #endif
4434
4435 #ifdef SUPPORT_UNICODE
4436 #if PCRE2_CODE_UNIT_WIDTH == 8
4437 struct sljit_label *label;
4438
4439 if (common->utf)
4440 {
4441 if (!must_be_valid && common->invalid_utf)
4442 {
4443 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4444 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4445 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4446 add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4447 if (backtracks != NULL)
4448 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4449 JUMPHERE(jump);
4450 return;
4451 }
4452
4453 label = LABEL();
4454 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4455 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4456 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4457 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4458 return;
4459 }
4460 #elif PCRE2_CODE_UNIT_WIDTH == 16
4461 if (common->utf)
4462 {
4463 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4464 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4465
4466 if (!must_be_valid && common->invalid_utf)
4467 {
4468 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4469 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4470 add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4471 if (backtracks != NULL)
4472 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4473 JUMPHERE(jump);
4474 return;
4475 }
4476
4477 /* Skip low surrogate if necessary. */
4478 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4479 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00);
4480 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4481 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4482 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4483 return;
4484 }
4485 #elif PCRE2_CODE_UNIT_WIDTH == 32
4486 if (common->invalid_utf && !must_be_valid)
4487 {
4488 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4489 if (backtracks != NULL)
4490 {
4491 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4492 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4493 return;
4494 }
4495
4496 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000);
4497 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4498 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4499 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4500 return;
4501 }
4502 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4503 #endif /* SUPPORT_UNICODE */
4504
4505 SLJIT_UNUSED_ARG(backtracks);
4506 SLJIT_UNUSED_ARG(must_be_valid);
4507
4508 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4509 }
4510
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)4511 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4512 {
4513 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4514 DEFINE_COMPILER;
4515 struct sljit_jump *jump;
4516
4517 if (nltype == NLTYPE_ANY)
4518 {
4519 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4520 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4521 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4522 }
4523 else if (nltype == NLTYPE_ANYCRLF)
4524 {
4525 if (jumpifmatch)
4526 {
4527 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4528 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4529 }
4530 else
4531 {
4532 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4533 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4534 JUMPHERE(jump);
4535 }
4536 }
4537 else
4538 {
4539 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4540 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4541 }
4542 }
4543
4544 #ifdef SUPPORT_UNICODE
4545
4546 #if PCRE2_CODE_UNIT_WIDTH == 8
do_utfreadchar(compiler_common * common)4547 static void do_utfreadchar(compiler_common *common)
4548 {
4549 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
4550 of the character (>= 0xc0). Return char value in TMP1. */
4551 DEFINE_COMPILER;
4552 struct sljit_jump *jump;
4553
4554 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4555 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4556 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4557 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4558 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4559
4560 /* Searching for the first zero. */
4561 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4562 jump = JUMP(SLJIT_NOT_ZERO);
4563 /* Two byte sequence. */
4564 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4565 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4566 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4567
4568 JUMPHERE(jump);
4569 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4570 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4571 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4572 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4573
4574 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4575 jump = JUMP(SLJIT_NOT_ZERO);
4576 /* Three byte sequence. */
4577 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4578 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4579 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4580
4581 /* Four byte sequence. */
4582 JUMPHERE(jump);
4583 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4584 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4585 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4586 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4587 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4588 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4589 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4590 }
4591
do_utfreadtype8(compiler_common * common)4592 static void do_utfreadtype8(compiler_common *common)
4593 {
4594 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4595 of the character (>= 0xc0). Return value in TMP1. */
4596 DEFINE_COMPILER;
4597 struct sljit_jump *jump;
4598 struct sljit_jump *compare;
4599
4600 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4601
4602 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20);
4603 jump = JUMP(SLJIT_NOT_ZERO);
4604 /* Two byte sequence. */
4605 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4606 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4607 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4608 /* The upper 5 bits are known at this point. */
4609 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4610 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4611 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4612 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4613 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4614 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4615
4616 JUMPHERE(compare);
4617 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4618 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4619
4620 /* We only have types for characters less than 256. */
4621 JUMPHERE(jump);
4622 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4623 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4624 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4625 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4626 }
4627
do_utfreadchar_invalid(compiler_common * common)4628 static void do_utfreadchar_invalid(compiler_common *common)
4629 {
4630 /* Slow decoding a UTF-8 character. TMP1 contains the first byte
4631 of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4632 undefined for invalid characters. */
4633 DEFINE_COMPILER;
4634 sljit_s32 i;
4635 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4636 struct sljit_jump *jump;
4637 struct sljit_jump *buffer_end_close;
4638 struct sljit_label *three_byte_entry;
4639 struct sljit_label *exit_invalid_label;
4640 struct sljit_jump *exit_invalid[11];
4641
4642 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4643
4644 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4645
4646 /* Usually more than 3 characters remained in the subject buffer. */
4647 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4648
4649 /* Not a valid start of a multi-byte sequence, no more bytes read. */
4650 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4651
4652 buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4653
4654 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4655 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4656 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4657 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4658 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4659 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4660
4661 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4662 jump = JUMP(SLJIT_NOT_ZERO);
4663
4664 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4665 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4666
4667 JUMPHERE(jump);
4668
4669 /* Three-byte sequence. */
4670 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4671 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4672 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4673 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4674 if (has_cmov)
4675 {
4676 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4677 SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000, TMP1);
4678 exit_invalid[2] = NULL;
4679 }
4680 else
4681 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4682
4683 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4684 jump = JUMP(SLJIT_NOT_ZERO);
4685
4686 three_byte_entry = LABEL();
4687
4688 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4689 if (has_cmov)
4690 {
4691 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4692 SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800, TMP1);
4693 exit_invalid[3] = NULL;
4694 }
4695 else
4696 exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4697 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4698 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4699
4700 if (has_cmov)
4701 {
4702 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4703 SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4704 exit_invalid[4] = NULL;
4705 }
4706 else
4707 exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4708 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4709
4710 JUMPHERE(jump);
4711
4712 /* Four-byte sequence. */
4713 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4714 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4715 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4716 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4717 if (has_cmov)
4718 {
4719 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4720 SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0, TMP1);
4721 exit_invalid[5] = NULL;
4722 }
4723 else
4724 exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4725
4726 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4727 if (has_cmov)
4728 {
4729 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4730 SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
4731 exit_invalid[6] = NULL;
4732 }
4733 else
4734 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4735
4736 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4737 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4738
4739 JUMPHERE(buffer_end_close);
4740 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4741 exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4742
4743 /* Two-byte sequence. */
4744 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4745 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4746 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4747 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4748 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4749 exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4750
4751 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4752 jump = JUMP(SLJIT_NOT_ZERO);
4753
4754 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4755
4756 /* Three-byte sequence. */
4757 JUMPHERE(jump);
4758 exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4759
4760 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4761 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4762 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4763 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4764 if (has_cmov)
4765 {
4766 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4767 SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4768 exit_invalid[10] = NULL;
4769 }
4770 else
4771 exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4772
4773 /* One will be substracted from STR_PTR later. */
4774 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4775
4776 /* Four byte sequences are not possible. */
4777 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4778
4779 exit_invalid_label = LABEL();
4780 for (i = 0; i < 11; i++)
4781 sljit_set_label(exit_invalid[i], exit_invalid_label);
4782
4783 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4784 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4785 }
4786
do_utfreadnewline_invalid(compiler_common * common)4787 static void do_utfreadnewline_invalid(compiler_common *common)
4788 {
4789 /* Slow decoding a UTF-8 character, specialized for newlines.
4790 TMP1 contains the first byte of the character (>= 0xc0). Return
4791 char value in TMP1. */
4792 DEFINE_COMPILER;
4793 struct sljit_label *loop;
4794 struct sljit_label *skip_start;
4795 struct sljit_label *three_byte_exit;
4796 struct sljit_jump *jump[5];
4797
4798 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4799
4800 if (common->nltype != NLTYPE_ANY)
4801 {
4802 SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4803
4804 /* All newlines are ascii, just skip intermediate octets. */
4805 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4806 loop = LABEL();
4807 if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4808 sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4809 else
4810 {
4811 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4812 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4813 }
4814
4815 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4816 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4817 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4818
4819 JUMPHERE(jump[0]);
4820
4821 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4822 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4823 return;
4824 }
4825
4826 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4827 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4828 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4829
4830 jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4831 jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4832
4833 skip_start = LABEL();
4834 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4835 jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4836
4837 /* Skip intermediate octets. */
4838 loop = LABEL();
4839 jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4840 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4841 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4842 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4843 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4844
4845 JUMPHERE(jump[3]);
4846 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4847
4848 three_byte_exit = LABEL();
4849 JUMPHERE(jump[0]);
4850 JUMPHERE(jump[4]);
4851
4852 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4853 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4854
4855 /* Two byte long newline: 0x85. */
4856 JUMPHERE(jump[1]);
4857 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4858
4859 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4860 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4861
4862 /* Three byte long newlines: 0x2028 and 0x2029. */
4863 JUMPHERE(jump[2]);
4864 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4865 CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4866
4867 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4868 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4869
4870 OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4871 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4872
4873 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4874 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4875 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4876 }
4877
do_utfmoveback_invalid(compiler_common * common)4878 static void do_utfmoveback_invalid(compiler_common *common)
4879 {
4880 /* Goes one character back. */
4881 DEFINE_COMPILER;
4882 sljit_s32 i;
4883 struct sljit_jump *jump;
4884 struct sljit_jump *buffer_start_close;
4885 struct sljit_label *exit_ok_label;
4886 struct sljit_label *exit_invalid_label;
4887 struct sljit_jump *exit_invalid[7];
4888
4889 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4890
4891 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4892 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4893
4894 /* Two-byte sequence. */
4895 buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4896
4897 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4898
4899 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4900 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4901
4902 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4903 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4904 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4905
4906 /* Three-byte sequence. */
4907 JUMPHERE(jump);
4908 exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4909
4910 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4911
4912 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4913 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4914
4915 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4916 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4917 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4918
4919 /* Four-byte sequence. */
4920 JUMPHERE(jump);
4921 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4922 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4923
4924 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4925 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4926 exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4927
4928 exit_ok_label = LABEL();
4929 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4930 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4931
4932 /* Two-byte sequence. */
4933 JUMPHERE(buffer_start_close);
4934 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4935
4936 exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4937
4938 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4939
4940 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4941 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4942
4943 /* Three-byte sequence. */
4944 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4945 exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4946 exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4947
4948 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4949
4950 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4951 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4952
4953 /* Four-byte sequences are not possible. */
4954
4955 exit_invalid_label = LABEL();
4956 sljit_set_label(exit_invalid[5], exit_invalid_label);
4957 sljit_set_label(exit_invalid[6], exit_invalid_label);
4958 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4959 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4960 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4961
4962 JUMPHERE(exit_invalid[4]);
4963 /* -2 + 4 = 2 */
4964 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4965
4966 exit_invalid_label = LABEL();
4967 for (i = 0; i < 4; i++)
4968 sljit_set_label(exit_invalid[i], exit_invalid_label);
4969 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4970 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4971 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4972 }
4973
do_utfpeakcharback(compiler_common * common)4974 static void do_utfpeakcharback(compiler_common *common)
4975 {
4976 /* Peak a character back. Does not modify STR_PTR. */
4977 DEFINE_COMPILER;
4978 struct sljit_jump *jump[2];
4979
4980 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4981
4982 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4983 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4984 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4985
4986 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4987 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4988 jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4989
4990 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4991 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4992 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4993 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4994 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4995
4996 JUMPHERE(jump[1]);
4997 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4998 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4999 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5000 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5001
5002 JUMPHERE(jump[0]);
5003 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5004 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
5005 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5006 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5007
5008 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5009 }
5010
do_utfpeakcharback_invalid(compiler_common * common)5011 static void do_utfpeakcharback_invalid(compiler_common *common)
5012 {
5013 /* Peak a character back. Does not modify STR_PTR. */
5014 DEFINE_COMPILER;
5015 sljit_s32 i;
5016 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
5017 struct sljit_jump *jump[2];
5018 struct sljit_label *two_byte_entry;
5019 struct sljit_label *three_byte_entry;
5020 struct sljit_label *exit_invalid_label;
5021 struct sljit_jump *exit_invalid[8];
5022
5023 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5024
5025 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
5026 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
5027 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5028
5029 /* Two-byte sequence. */
5030 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5031 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5032 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
5033
5034 two_byte_entry = LABEL();
5035 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5036 /* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
5037 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5038 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5039
5040 JUMPHERE(jump[1]);
5041 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5042 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5043 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5044 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5045 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5046
5047 /* Three-byte sequence. */
5048 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5049 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5050 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
5051
5052 three_byte_entry = LABEL();
5053 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5054 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5055
5056 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5057 if (has_cmov)
5058 {
5059 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5060 SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800, TMP1);
5061 exit_invalid[2] = NULL;
5062 }
5063 else
5064 exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5065
5066 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5067 if (has_cmov)
5068 {
5069 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5070 SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
5071 exit_invalid[3] = NULL;
5072 }
5073 else
5074 exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5075
5076 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5077
5078 JUMPHERE(jump[1]);
5079 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
5080 exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5081 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5082 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5083
5084 /* Four-byte sequence. */
5085 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
5086 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5087 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
5088 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
5089 /* ADD is used instead of OR because of the SUB 0x10000 above. */
5090 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5091
5092 if (has_cmov)
5093 {
5094 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5095 SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
5096 exit_invalid[5] = NULL;
5097 }
5098 else
5099 exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5100
5101 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5102 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5103
5104 JUMPHERE(jump[0]);
5105 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5106 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5107
5108 /* Two-byte sequence. */
5109 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5110 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5111 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5112
5113 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5114 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5115 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5116 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5117 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5118
5119 /* Three-byte sequence. */
5120 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5121 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5122 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
5123
5124 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5125 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5126
5127 JUMPHERE(jump[0]);
5128 exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
5129
5130 /* Two-byte sequence. */
5131 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5132 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5133 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5134
5135 exit_invalid_label = LABEL();
5136 for (i = 0; i < 8; i++)
5137 sljit_set_label(exit_invalid[i], exit_invalid_label);
5138
5139 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5140 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5141 }
5142
5143 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
5144
5145 #if PCRE2_CODE_UNIT_WIDTH == 16
5146
do_utfreadchar_invalid(compiler_common * common)5147 static void do_utfreadchar_invalid(compiler_common *common)
5148 {
5149 /* Slow decoding a UTF-16 character. TMP1 contains the first half
5150 of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
5151 undefined for invalid characters. */
5152 DEFINE_COMPILER;
5153 struct sljit_jump *exit_invalid[3];
5154
5155 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5156
5157 /* TMP2 contains the high surrogate. */
5158 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5159 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5160
5161 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5162 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5163 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5164
5165 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5166 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
5167 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5168
5169 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5170 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5171
5172 JUMPHERE(exit_invalid[0]);
5173 JUMPHERE(exit_invalid[1]);
5174 JUMPHERE(exit_invalid[2]);
5175 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5176 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5177 }
5178
do_utfreadnewline_invalid(compiler_common * common)5179 static void do_utfreadnewline_invalid(compiler_common *common)
5180 {
5181 /* Slow decoding a UTF-16 character, specialized for newlines.
5182 TMP1 contains the first half of the character (>= 0xd800). Return
5183 char value in TMP1. */
5184
5185 DEFINE_COMPILER;
5186 struct sljit_jump *exit_invalid[2];
5187
5188 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5189
5190 /* TMP2 contains the high surrogate. */
5191 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5192
5193 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5194 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5195
5196 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
5197 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
5198 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
5199 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
5200 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
5201 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5202
5203 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5204
5205 JUMPHERE(exit_invalid[0]);
5206 JUMPHERE(exit_invalid[1]);
5207 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5208 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5209 }
5210
do_utfmoveback_invalid(compiler_common * common)5211 static void do_utfmoveback_invalid(compiler_common *common)
5212 {
5213 /* Goes one character back. */
5214 DEFINE_COMPILER;
5215 struct sljit_jump *exit_invalid[3];
5216
5217 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5218
5219 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5220 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5221
5222 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5223 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5224 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5225
5226 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5227 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5228 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5229
5230 JUMPHERE(exit_invalid[0]);
5231 JUMPHERE(exit_invalid[1]);
5232 JUMPHERE(exit_invalid[2]);
5233
5234 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5235 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5236 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5237 }
5238
do_utfpeakcharback_invalid(compiler_common * common)5239 static void do_utfpeakcharback_invalid(compiler_common *common)
5240 {
5241 /* Peak a character back. Does not modify STR_PTR. */
5242 DEFINE_COMPILER;
5243 struct sljit_jump *jump;
5244 struct sljit_jump *exit_invalid[3];
5245
5246 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5247
5248 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5249 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5250 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5251 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5252
5253 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5254 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5255 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5256 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5257 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5258 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5259
5260 JUMPHERE(jump);
5261 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5262
5263 JUMPHERE(exit_invalid[0]);
5264 JUMPHERE(exit_invalid[1]);
5265 JUMPHERE(exit_invalid[2]);
5266
5267 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5268 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5269 }
5270
5271 #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5272
5273 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5274 #define UCD_BLOCK_MASK 127
5275 #define UCD_BLOCK_SHIFT 7
5276
do_getucd(compiler_common * common)5277 static void do_getucd(compiler_common *common)
5278 {
5279 /* Search the UCD record for the character comes in TMP1.
5280 Returns chartype in TMP1 and UCD offset in TMP2. */
5281 DEFINE_COMPILER;
5282 #if PCRE2_CODE_UNIT_WIDTH == 32
5283 struct sljit_jump *jump;
5284 #endif
5285
5286 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5287 /* dummy_ucd_record */
5288 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5289 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5290 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5291 #endif
5292
5293 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5294
5295 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5296
5297 #if PCRE2_CODE_UNIT_WIDTH == 32
5298 if (!common->utf)
5299 {
5300 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5301 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5302 JUMPHERE(jump);
5303 }
5304 #endif
5305
5306 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5307 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5308 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5309 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5310 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5311 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5312 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5313 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5314 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5315 }
5316
do_getucdtype(compiler_common * common)5317 static void do_getucdtype(compiler_common *common)
5318 {
5319 /* Search the UCD record for the character comes in TMP1.
5320 Returns chartype in TMP1 and UCD offset in TMP2. */
5321 DEFINE_COMPILER;
5322 #if PCRE2_CODE_UNIT_WIDTH == 32
5323 struct sljit_jump *jump;
5324 #endif
5325
5326 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5327 /* dummy_ucd_record */
5328 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5329 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5330 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5331 #endif
5332
5333 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5334
5335 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5336
5337 #if PCRE2_CODE_UNIT_WIDTH == 32
5338 if (!common->utf)
5339 {
5340 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5341 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5342 JUMPHERE(jump);
5343 }
5344 #endif
5345
5346 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5347 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5348 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5349 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5350 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5351 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5352 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5353 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5354
5355 /* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5356 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5357 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5358 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5359 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5360
5361 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5362 }
5363
5364 #endif /* SUPPORT_UNICODE */
5365
mainloop_entry(compiler_common * common)5366 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5367 {
5368 DEFINE_COMPILER;
5369 struct sljit_label *mainloop;
5370 struct sljit_label *newlinelabel = NULL;
5371 struct sljit_jump *start;
5372 struct sljit_jump *end = NULL;
5373 struct sljit_jump *end2 = NULL;
5374 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5375 struct sljit_label *loop;
5376 struct sljit_jump *jump;
5377 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5378 jump_list *newline = NULL;
5379 sljit_u32 overall_options = common->re->overall_options;
5380 BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5381 BOOL newlinecheck = FALSE;
5382 BOOL readuchar = FALSE;
5383
5384 if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5385 && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5386 newlinecheck = TRUE;
5387
5388 SLJIT_ASSERT(common->abort_label == NULL);
5389
5390 if ((overall_options & PCRE2_FIRSTLINE) != 0)
5391 {
5392 /* Search for the end of the first line. */
5393 SLJIT_ASSERT(common->match_end_ptr != 0);
5394 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5395
5396 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5397 {
5398 mainloop = LABEL();
5399 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5400 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5401 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5402 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5403 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5404 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5405 JUMPHERE(end);
5406 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5407 }
5408 else
5409 {
5410 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5411 mainloop = LABEL();
5412 /* Continual stores does not cause data dependency. */
5413 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5414 read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5415 check_newlinechar(common, common->nltype, &newline, TRUE);
5416 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5417 JUMPHERE(end);
5418 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5419 set_jumps(newline, LABEL());
5420 }
5421
5422 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5423 }
5424 else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5425 {
5426 /* Check whether offset limit is set and valid. */
5427 SLJIT_ASSERT(common->match_end_ptr != 0);
5428
5429 if (HAS_VIRTUAL_REGISTERS)
5430 {
5431 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5432 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5433 }
5434 else
5435 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5436
5437 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5438 end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5439 if (HAS_VIRTUAL_REGISTERS)
5440 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5441 else
5442 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5443
5444 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5445 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5446 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5447 if (HAS_VIRTUAL_REGISTERS)
5448 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5449
5450 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5451 end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5452 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5453 JUMPHERE(end2);
5454 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5455 add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5456 JUMPHERE(end);
5457 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5458 }
5459
5460 start = JUMP(SLJIT_JUMP);
5461
5462 if (newlinecheck)
5463 {
5464 newlinelabel = LABEL();
5465 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5466 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5467 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5468 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5469 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5470 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5471 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5472 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5473 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5474 end2 = JUMP(SLJIT_JUMP);
5475 }
5476
5477 mainloop = LABEL();
5478
5479 /* Increasing the STR_PTR here requires one less jump in the most common case. */
5480 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5481 if (common->utf && !common->invalid_utf) readuchar = TRUE;
5482 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5483 if (newlinecheck) readuchar = TRUE;
5484
5485 if (readuchar)
5486 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5487
5488 if (newlinecheck)
5489 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5490
5491 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5492 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5493 #if PCRE2_CODE_UNIT_WIDTH == 8
5494 if (common->invalid_utf)
5495 {
5496 /* Skip continuation code units. */
5497 loop = LABEL();
5498 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5499 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5500 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5501 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5502 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5503 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5504 JUMPHERE(jump);
5505 }
5506 else if (common->utf)
5507 {
5508 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5509 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5510 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5511 JUMPHERE(jump);
5512 }
5513 #elif PCRE2_CODE_UNIT_WIDTH == 16
5514 if (common->invalid_utf)
5515 {
5516 /* Skip continuation code units. */
5517 loop = LABEL();
5518 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5519 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5520 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5521 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5522 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5523 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5524 JUMPHERE(jump);
5525 }
5526 else if (common->utf)
5527 {
5528 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5529
5530 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5531 {
5532 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5533 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5534 SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
5535 }
5536 else
5537 {
5538 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5539 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5540 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5541 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5542 }
5543 }
5544 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5545 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5546 JUMPHERE(start);
5547
5548 if (newlinecheck)
5549 {
5550 JUMPHERE(end);
5551 JUMPHERE(end2);
5552 }
5553
5554 return mainloop;
5555 }
5556
5557
add_prefix_char(PCRE2_UCHAR chr,fast_forward_char_data * chars,BOOL last)5558 static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5559 {
5560 sljit_u32 i, count = chars->count;
5561
5562 if (count == 255)
5563 return;
5564
5565 if (count == 0)
5566 {
5567 chars->count = 1;
5568 chars->chars[0] = chr;
5569
5570 if (last)
5571 chars->last_count = 1;
5572 return;
5573 }
5574
5575 for (i = 0; i < count; i++)
5576 if (chars->chars[i] == chr)
5577 return;
5578
5579 if (count >= MAX_DIFF_CHARS)
5580 {
5581 chars->count = 255;
5582 return;
5583 }
5584
5585 chars->chars[count] = chr;
5586 chars->count = count + 1;
5587
5588 if (last)
5589 chars->last_count++;
5590 }
5591
scan_prefix(compiler_common * common,PCRE2_SPTR cc,fast_forward_char_data * chars,int max_chars,sljit_u32 * rec_count)5592 static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5593 {
5594 /* Recursive function, which scans prefix literals. */
5595 BOOL last, any, class, caseless;
5596 int len, repeat, len_save, consumed = 0;
5597 sljit_u32 chr; /* Any unicode character. */
5598 sljit_u8 *bytes, *bytes_end, byte;
5599 PCRE2_SPTR alternative, cc_save, oc;
5600 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5601 PCRE2_UCHAR othercase[4];
5602 #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5603 PCRE2_UCHAR othercase[2];
5604 #else
5605 PCRE2_UCHAR othercase[1];
5606 #endif
5607
5608 repeat = 1;
5609 while (TRUE)
5610 {
5611 if (*rec_count == 0)
5612 return 0;
5613 (*rec_count)--;
5614
5615 last = TRUE;
5616 any = FALSE;
5617 class = FALSE;
5618 caseless = FALSE;
5619
5620 switch (*cc)
5621 {
5622 case OP_CHARI:
5623 caseless = TRUE;
5624 /* Fall through */
5625 case OP_CHAR:
5626 last = FALSE;
5627 cc++;
5628 break;
5629
5630 case OP_SOD:
5631 case OP_SOM:
5632 case OP_SET_SOM:
5633 case OP_NOT_WORD_BOUNDARY:
5634 case OP_WORD_BOUNDARY:
5635 case OP_EODN:
5636 case OP_EOD:
5637 case OP_CIRC:
5638 case OP_CIRCM:
5639 case OP_DOLL:
5640 case OP_DOLLM:
5641 case OP_NOT_UCP_WORD_BOUNDARY:
5642 case OP_UCP_WORD_BOUNDARY:
5643 /* Zero width assertions. */
5644 cc++;
5645 continue;
5646
5647 case OP_ASSERT:
5648 case OP_ASSERT_NOT:
5649 case OP_ASSERTBACK:
5650 case OP_ASSERTBACK_NOT:
5651 case OP_ASSERT_NA:
5652 case OP_ASSERTBACK_NA:
5653 cc = bracketend(cc);
5654 continue;
5655
5656 case OP_PLUSI:
5657 case OP_MINPLUSI:
5658 case OP_POSPLUSI:
5659 caseless = TRUE;
5660 /* Fall through */
5661 case OP_PLUS:
5662 case OP_MINPLUS:
5663 case OP_POSPLUS:
5664 cc++;
5665 break;
5666
5667 case OP_EXACTI:
5668 caseless = TRUE;
5669 /* Fall through */
5670 case OP_EXACT:
5671 repeat = GET2(cc, 1);
5672 last = FALSE;
5673 cc += 1 + IMM2_SIZE;
5674 break;
5675
5676 case OP_QUERYI:
5677 case OP_MINQUERYI:
5678 case OP_POSQUERYI:
5679 caseless = TRUE;
5680 /* Fall through */
5681 case OP_QUERY:
5682 case OP_MINQUERY:
5683 case OP_POSQUERY:
5684 len = 1;
5685 cc++;
5686 #ifdef SUPPORT_UNICODE
5687 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5688 #endif
5689 max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5690 if (max_chars == 0)
5691 return consumed;
5692 last = FALSE;
5693 break;
5694
5695 case OP_KET:
5696 cc += 1 + LINK_SIZE;
5697 continue;
5698
5699 case OP_ALT:
5700 cc += GET(cc, 1);
5701 continue;
5702
5703 case OP_ONCE:
5704 case OP_BRA:
5705 case OP_BRAPOS:
5706 case OP_CBRA:
5707 case OP_CBRAPOS:
5708 alternative = cc + GET(cc, 1);
5709 while (*alternative == OP_ALT)
5710 {
5711 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5712 if (max_chars == 0)
5713 return consumed;
5714 alternative += GET(alternative, 1);
5715 }
5716
5717 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5718 cc += IMM2_SIZE;
5719 cc += 1 + LINK_SIZE;
5720 continue;
5721
5722 case OP_CLASS:
5723 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5724 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5725 return consumed;
5726 #endif
5727 class = TRUE;
5728 break;
5729
5730 case OP_NCLASS:
5731 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5732 if (common->utf) return consumed;
5733 #endif
5734 class = TRUE;
5735 break;
5736
5737 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5738 case OP_XCLASS:
5739 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5740 if (common->utf) return consumed;
5741 #endif
5742 any = TRUE;
5743 cc += GET(cc, 1);
5744 break;
5745 #endif
5746
5747 case OP_DIGIT:
5748 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5749 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5750 return consumed;
5751 #endif
5752 any = TRUE;
5753 cc++;
5754 break;
5755
5756 case OP_WHITESPACE:
5757 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5758 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5759 return consumed;
5760 #endif
5761 any = TRUE;
5762 cc++;
5763 break;
5764
5765 case OP_WORDCHAR:
5766 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5767 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5768 return consumed;
5769 #endif
5770 any = TRUE;
5771 cc++;
5772 break;
5773
5774 case OP_NOT:
5775 case OP_NOTI:
5776 cc++;
5777 /* Fall through. */
5778 case OP_NOT_DIGIT:
5779 case OP_NOT_WHITESPACE:
5780 case OP_NOT_WORDCHAR:
5781 case OP_ANY:
5782 case OP_ALLANY:
5783 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5784 if (common->utf) return consumed;
5785 #endif
5786 any = TRUE;
5787 cc++;
5788 break;
5789
5790 #ifdef SUPPORT_UNICODE
5791 case OP_NOTPROP:
5792 case OP_PROP:
5793 #if PCRE2_CODE_UNIT_WIDTH != 32
5794 if (common->utf) return consumed;
5795 #endif
5796 any = TRUE;
5797 cc += 1 + 2;
5798 break;
5799 #endif
5800
5801 case OP_TYPEEXACT:
5802 repeat = GET2(cc, 1);
5803 cc += 1 + IMM2_SIZE;
5804 continue;
5805
5806 case OP_NOTEXACT:
5807 case OP_NOTEXACTI:
5808 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5809 if (common->utf) return consumed;
5810 #endif
5811 any = TRUE;
5812 repeat = GET2(cc, 1);
5813 cc += 1 + IMM2_SIZE + 1;
5814 break;
5815
5816 default:
5817 return consumed;
5818 }
5819
5820 if (any)
5821 {
5822 do
5823 {
5824 chars->count = 255;
5825
5826 consumed++;
5827 if (--max_chars == 0)
5828 return consumed;
5829 chars++;
5830 }
5831 while (--repeat > 0);
5832
5833 repeat = 1;
5834 continue;
5835 }
5836
5837 if (class)
5838 {
5839 bytes = (sljit_u8*) (cc + 1);
5840 cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5841
5842 switch (*cc)
5843 {
5844 case OP_CRSTAR:
5845 case OP_CRMINSTAR:
5846 case OP_CRPOSSTAR:
5847 case OP_CRQUERY:
5848 case OP_CRMINQUERY:
5849 case OP_CRPOSQUERY:
5850 max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5851 if (max_chars == 0)
5852 return consumed;
5853 break;
5854
5855 default:
5856 case OP_CRPLUS:
5857 case OP_CRMINPLUS:
5858 case OP_CRPOSPLUS:
5859 break;
5860
5861 case OP_CRRANGE:
5862 case OP_CRMINRANGE:
5863 case OP_CRPOSRANGE:
5864 repeat = GET2(cc, 1);
5865 if (repeat <= 0)
5866 return consumed;
5867 break;
5868 }
5869
5870 do
5871 {
5872 if (bytes[31] & 0x80)
5873 chars->count = 255;
5874 else if (chars->count != 255)
5875 {
5876 bytes_end = bytes + 32;
5877 chr = 0;
5878 do
5879 {
5880 byte = *bytes++;
5881 SLJIT_ASSERT((chr & 0x7) == 0);
5882 if (byte == 0)
5883 chr += 8;
5884 else
5885 {
5886 do
5887 {
5888 if ((byte & 0x1) != 0)
5889 add_prefix_char(chr, chars, TRUE);
5890 byte >>= 1;
5891 chr++;
5892 }
5893 while (byte != 0);
5894 chr = (chr + 7) & ~7;
5895 }
5896 }
5897 while (chars->count != 255 && bytes < bytes_end);
5898 bytes = bytes_end - 32;
5899 }
5900
5901 consumed++;
5902 if (--max_chars == 0)
5903 return consumed;
5904 chars++;
5905 }
5906 while (--repeat > 0);
5907
5908 switch (*cc)
5909 {
5910 case OP_CRSTAR:
5911 case OP_CRMINSTAR:
5912 case OP_CRPOSSTAR:
5913 return consumed;
5914
5915 case OP_CRQUERY:
5916 case OP_CRMINQUERY:
5917 case OP_CRPOSQUERY:
5918 cc++;
5919 break;
5920
5921 case OP_CRRANGE:
5922 case OP_CRMINRANGE:
5923 case OP_CRPOSRANGE:
5924 if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5925 return consumed;
5926 cc += 1 + 2 * IMM2_SIZE;
5927 break;
5928 }
5929
5930 repeat = 1;
5931 continue;
5932 }
5933
5934 len = 1;
5935 #ifdef SUPPORT_UNICODE
5936 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5937 #endif
5938
5939 if (caseless && char_has_othercase(common, cc))
5940 {
5941 #ifdef SUPPORT_UNICODE
5942 if (common->utf)
5943 {
5944 GETCHAR(chr, cc);
5945 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5946 return consumed;
5947 }
5948 else
5949 #endif
5950 {
5951 chr = *cc;
5952 #ifdef SUPPORT_UNICODE
5953 if (common->ucp && chr > 127)
5954 othercase[0] = UCD_OTHERCASE(chr);
5955 else
5956 #endif
5957 othercase[0] = TABLE_GET(chr, common->fcc, chr);
5958 }
5959 }
5960 else
5961 {
5962 caseless = FALSE;
5963 othercase[0] = 0; /* Stops compiler warning - PH */
5964 }
5965
5966 len_save = len;
5967 cc_save = cc;
5968 while (TRUE)
5969 {
5970 oc = othercase;
5971 do
5972 {
5973 len--;
5974 consumed++;
5975
5976 chr = *cc;
5977 add_prefix_char(*cc, chars, len == 0);
5978
5979 if (caseless)
5980 add_prefix_char(*oc, chars, len == 0);
5981
5982 if (--max_chars == 0)
5983 return consumed;
5984 chars++;
5985 cc++;
5986 oc++;
5987 }
5988 while (len > 0);
5989
5990 if (--repeat == 0)
5991 break;
5992
5993 len = len_save;
5994 cc = cc_save;
5995 }
5996
5997 repeat = 1;
5998 if (last)
5999 return consumed;
6000 }
6001 }
6002
6003 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jumpto_if_not_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg,struct sljit_label * label)6004 static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
6005 {
6006 #if PCRE2_CODE_UNIT_WIDTH == 8
6007 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
6008 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
6009 #elif PCRE2_CODE_UNIT_WIDTH == 16
6010 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
6011 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
6012 #else
6013 #error "Unknown code width"
6014 #endif
6015 }
6016 #endif
6017
6018 #include "pcre2_jit_simd_inc.h"
6019
6020 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6021
check_fast_forward_char_pair_simd(compiler_common * common,fast_forward_char_data * chars,int max)6022 static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
6023 {
6024 sljit_s32 i, j, max_i = 0, max_j = 0;
6025 sljit_u32 max_pri = 0;
6026 sljit_s32 max_offset = max_fast_forward_char_pair_offset();
6027 PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
6028
6029 for (i = max - 1; i >= 1; i--)
6030 {
6031 if (chars[i].last_count > 2)
6032 {
6033 a1 = chars[i].chars[0];
6034 a2 = chars[i].chars[1];
6035 a_pri = chars[i].last_count;
6036
6037 j = i - max_offset;
6038 if (j < 0)
6039 j = 0;
6040
6041 while (j < i)
6042 {
6043 b_pri = chars[j].last_count;
6044 if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri)
6045 {
6046 b1 = chars[j].chars[0];
6047 b2 = chars[j].chars[1];
6048
6049 if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
6050 {
6051 max_pri = a_pri + b_pri;
6052 max_i = i;
6053 max_j = j;
6054 }
6055 }
6056 j++;
6057 }
6058 }
6059 }
6060
6061 if (max_pri == 0)
6062 return FALSE;
6063
6064 fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
6065 return TRUE;
6066 }
6067
6068 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6069
fast_forward_first_char2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)6070 static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
6071 {
6072 DEFINE_COMPILER;
6073 struct sljit_label *start;
6074 struct sljit_jump *match;
6075 struct sljit_jump *partial_quit;
6076 PCRE2_UCHAR mask;
6077 BOOL has_match_end = (common->match_end_ptr != 0);
6078
6079 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
6080
6081 if (has_match_end)
6082 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6083
6084 if (offset > 0)
6085 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6086
6087 if (has_match_end)
6088 {
6089 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6090
6091 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6092 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6093 SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6094 }
6095
6096 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6097
6098 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
6099 {
6100 fast_forward_char_simd(common, char1, char2, offset);
6101
6102 if (offset > 0)
6103 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6104
6105 if (has_match_end)
6106 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6107 return;
6108 }
6109
6110 #endif
6111
6112 start = LABEL();
6113
6114 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6115 if (common->mode == PCRE2_JIT_COMPLETE)
6116 add_jump(compiler, &common->failed_match, partial_quit);
6117
6118 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6119 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6120
6121 if (char1 == char2)
6122 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
6123 else
6124 {
6125 mask = char1 ^ char2;
6126 if (is_powerof2(mask))
6127 {
6128 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6129 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
6130 }
6131 else
6132 {
6133 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
6134 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
6135 JUMPHERE(match);
6136 }
6137 }
6138
6139 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6140 if (common->utf && offset > 0)
6141 {
6142 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
6143 jumpto_if_not_utf_char_start(compiler, TMP1, start);
6144 }
6145 #endif
6146
6147 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6148
6149 if (common->mode != PCRE2_JIT_COMPLETE)
6150 JUMPHERE(partial_quit);
6151
6152 if (has_match_end)
6153 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6154 }
6155
fast_forward_first_n_chars(compiler_common * common)6156 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
6157 {
6158 DEFINE_COMPILER;
6159 struct sljit_label *start;
6160 struct sljit_jump *match;
6161 fast_forward_char_data chars[MAX_N_CHARS];
6162 sljit_s32 offset;
6163 PCRE2_UCHAR mask;
6164 PCRE2_UCHAR *char_set, *char_set_end;
6165 int i, max, from;
6166 int range_right = -1, range_len;
6167 sljit_u8 *update_table = NULL;
6168 BOOL in_range;
6169 sljit_u32 rec_count;
6170
6171 for (i = 0; i < MAX_N_CHARS; i++)
6172 {
6173 chars[i].count = 0;
6174 chars[i].last_count = 0;
6175 }
6176
6177 rec_count = 10000;
6178 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
6179
6180 if (max < 1)
6181 return FALSE;
6182
6183 /* Convert last_count to priority. */
6184 for (i = 0; i < max; i++)
6185 {
6186 SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
6187
6188 if (chars[i].count == 1)
6189 {
6190 chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
6191 /* Simplifies algorithms later. */
6192 chars[i].chars[1] = chars[i].chars[0];
6193 }
6194 else if (chars[i].count == 2)
6195 {
6196 SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
6197
6198 if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
6199 chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
6200 else
6201 chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
6202 }
6203 else
6204 chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
6205 }
6206
6207 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6208 if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
6209 return TRUE;
6210 #endif
6211
6212 in_range = FALSE;
6213 /* Prevent compiler "uninitialized" warning */
6214 from = 0;
6215 range_len = 4 /* minimum length */ - 1;
6216 for (i = 0; i <= max; i++)
6217 {
6218 if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
6219 {
6220 range_len = i - from;
6221 range_right = i - 1;
6222 }
6223
6224 if (i < max && chars[i].count < 255)
6225 {
6226 SLJIT_ASSERT(chars[i].count > 0);
6227 if (!in_range)
6228 {
6229 in_range = TRUE;
6230 from = i;
6231 }
6232 }
6233 else
6234 in_range = FALSE;
6235 }
6236
6237 if (range_right >= 0)
6238 {
6239 update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6240 if (update_table == NULL)
6241 return TRUE;
6242 memset(update_table, IN_UCHARS(range_len), 256);
6243
6244 for (i = 0; i < range_len; i++)
6245 {
6246 SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6247
6248 char_set = chars[range_right - i].chars;
6249 char_set_end = char_set + chars[range_right - i].count;
6250 do
6251 {
6252 if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6253 update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6254 char_set++;
6255 }
6256 while (char_set < char_set_end);
6257 }
6258 }
6259
6260 offset = -1;
6261 /* Scan forward. */
6262 for (i = 0; i < max; i++)
6263 {
6264 if (range_right == i)
6265 continue;
6266
6267 if (offset == -1)
6268 {
6269 if (chars[i].last_count >= 2)
6270 offset = i;
6271 }
6272 else if (chars[offset].last_count < chars[i].last_count)
6273 offset = i;
6274 }
6275
6276 SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6277
6278 if (range_right < 0)
6279 {
6280 if (offset < 0)
6281 return FALSE;
6282 /* Works regardless the value is 1 or 2. */
6283 fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6284 return TRUE;
6285 }
6286
6287 SLJIT_ASSERT(range_right != offset);
6288
6289 if (common->match_end_ptr != 0)
6290 {
6291 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6292 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6293 OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6294 add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6295 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6296 SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6297 }
6298 else
6299 {
6300 OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6301 add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6302 }
6303
6304 SLJIT_ASSERT(range_right >= 0);
6305
6306 if (!HAS_VIRTUAL_REGISTERS)
6307 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6308
6309 start = LABEL();
6310 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6311
6312 #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6313 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6314 #else
6315 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6316 #endif
6317
6318 if (!HAS_VIRTUAL_REGISTERS)
6319 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6320 else
6321 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6322
6323 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6324 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6325
6326 if (offset >= 0)
6327 {
6328 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6329 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6330
6331 if (chars[offset].count == 1)
6332 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6333 else
6334 {
6335 mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6336 if (is_powerof2(mask))
6337 {
6338 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6339 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6340 }
6341 else
6342 {
6343 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6344 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6345 JUMPHERE(match);
6346 }
6347 }
6348 }
6349
6350 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6351 if (common->utf && offset != 0)
6352 {
6353 if (offset < 0)
6354 {
6355 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6356 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6357 }
6358 else
6359 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6360
6361 jumpto_if_not_utf_char_start(compiler, TMP1, start);
6362
6363 if (offset < 0)
6364 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6365 }
6366 #endif
6367
6368 if (offset >= 0)
6369 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6370
6371 if (common->match_end_ptr != 0)
6372 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6373 else
6374 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6375 return TRUE;
6376 }
6377
fast_forward_first_char(compiler_common * common)6378 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6379 {
6380 PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6381 PCRE2_UCHAR oc;
6382
6383 oc = first_char;
6384 if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6385 {
6386 oc = TABLE_GET(first_char, common->fcc, first_char);
6387 #if defined SUPPORT_UNICODE
6388 if (first_char > 127 && (common->utf || common->ucp))
6389 oc = UCD_OTHERCASE(first_char);
6390 #endif
6391 }
6392
6393 fast_forward_first_char2(common, first_char, oc, 0);
6394 }
6395
fast_forward_newline(compiler_common * common)6396 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6397 {
6398 DEFINE_COMPILER;
6399 struct sljit_label *loop;
6400 struct sljit_jump *lastchar = NULL;
6401 struct sljit_jump *firstchar;
6402 struct sljit_jump *quit = NULL;
6403 struct sljit_jump *foundcr = NULL;
6404 struct sljit_jump *notfoundnl;
6405 jump_list *newline = NULL;
6406
6407 if (common->match_end_ptr != 0)
6408 {
6409 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6410 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6411 }
6412
6413 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6414 {
6415 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6416 if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6417 {
6418 if (HAS_VIRTUAL_REGISTERS)
6419 {
6420 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6421 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6422 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6423 }
6424 else
6425 {
6426 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6427 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6428 }
6429 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6430
6431 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6432 OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0);
6433 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6434 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6435 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6436 #endif
6437 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6438
6439 fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6440 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6441 }
6442 else
6443 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6444 {
6445 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6446 if (HAS_VIRTUAL_REGISTERS)
6447 {
6448 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6449 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6450 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6451 }
6452 else
6453 {
6454 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6455 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6456 }
6457 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6458
6459 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6460 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0);
6461 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6462 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6463 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6464 #endif
6465 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6466
6467 loop = LABEL();
6468 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6469 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6470 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6471 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6472 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6473 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6474
6475 JUMPHERE(quit);
6476 JUMPHERE(lastchar);
6477 }
6478
6479 JUMPHERE(firstchar);
6480
6481 if (common->match_end_ptr != 0)
6482 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6483 return;
6484 }
6485
6486 if (HAS_VIRTUAL_REGISTERS)
6487 {
6488 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6489 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6490 }
6491 else
6492 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6493
6494 /* Example: match /^/ to \r\n from offset 1. */
6495 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6496
6497 if (common->nltype == NLTYPE_ANY)
6498 move_back(common, NULL, FALSE);
6499 else
6500 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6501
6502 loop = LABEL();
6503 common->ff_newline_shortcut = loop;
6504
6505 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6506 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6507 {
6508 if (common->nltype == NLTYPE_ANYCRLF)
6509 {
6510 fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6511 if (common->mode != PCRE2_JIT_COMPLETE)
6512 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6513
6514 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6515 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6516 quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6517 }
6518 else
6519 {
6520 fast_forward_char_simd(common, common->newline, common->newline, 0);
6521
6522 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6523 if (common->mode != PCRE2_JIT_COMPLETE)
6524 {
6525 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
6526 SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
6527 }
6528 }
6529 }
6530 else
6531 #endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6532 {
6533 read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6534 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6535 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6536 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6537 check_newlinechar(common, common->nltype, &newline, FALSE);
6538 set_jumps(newline, loop);
6539 }
6540
6541 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6542 {
6543 if (quit == NULL)
6544 {
6545 quit = JUMP(SLJIT_JUMP);
6546 JUMPHERE(foundcr);
6547 }
6548
6549 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6550 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6551 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
6552 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6553 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6554 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6555 #endif
6556 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6557 JUMPHERE(notfoundnl);
6558 JUMPHERE(quit);
6559 }
6560
6561 if (lastchar)
6562 JUMPHERE(lastchar);
6563 JUMPHERE(firstchar);
6564
6565 if (common->match_end_ptr != 0)
6566 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6567 }
6568
6569 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6570
fast_forward_start_bits(compiler_common * common)6571 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6572 {
6573 DEFINE_COMPILER;
6574 const sljit_u8 *start_bits = common->re->start_bitmap;
6575 struct sljit_label *start;
6576 struct sljit_jump *partial_quit;
6577 #if PCRE2_CODE_UNIT_WIDTH != 8
6578 struct sljit_jump *found = NULL;
6579 #endif
6580 jump_list *matches = NULL;
6581
6582 if (common->match_end_ptr != 0)
6583 {
6584 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6585 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6586 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6587 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6588 SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6589 }
6590
6591 start = LABEL();
6592
6593 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6594 if (common->mode == PCRE2_JIT_COMPLETE)
6595 add_jump(compiler, &common->failed_match, partial_quit);
6596
6597 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6598 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6599
6600 if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6601 {
6602 #if PCRE2_CODE_UNIT_WIDTH != 8
6603 if ((start_bits[31] & 0x80) != 0)
6604 found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6605 else
6606 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6607 #elif defined SUPPORT_UNICODE
6608 if (common->utf && is_char7_bitset(start_bits, FALSE))
6609 CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6610 #endif
6611 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6612 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6613 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6614 if (!HAS_VIRTUAL_REGISTERS)
6615 {
6616 OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6617 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0);
6618 }
6619 else
6620 {
6621 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6622 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
6623 }
6624 JUMPTO(SLJIT_ZERO, start);
6625 }
6626 else
6627 set_jumps(matches, start);
6628
6629 #if PCRE2_CODE_UNIT_WIDTH != 8
6630 if (found != NULL)
6631 JUMPHERE(found);
6632 #endif
6633
6634 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6635
6636 if (common->mode != PCRE2_JIT_COMPLETE)
6637 JUMPHERE(partial_quit);
6638
6639 if (common->match_end_ptr != 0)
6640 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6641 }
6642
search_requested_char(compiler_common * common,PCRE2_UCHAR req_char,BOOL caseless,BOOL has_firstchar)6643 static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6644 {
6645 DEFINE_COMPILER;
6646 struct sljit_label *loop;
6647 struct sljit_jump *toolong;
6648 struct sljit_jump *already_found;
6649 struct sljit_jump *found;
6650 struct sljit_jump *found_oc = NULL;
6651 jump_list *not_found = NULL;
6652 sljit_u32 oc, bit;
6653
6654 SLJIT_ASSERT(common->req_char_ptr != 0);
6655 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6656 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6657 toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6658 already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6659
6660 if (has_firstchar)
6661 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6662 else
6663 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6664
6665 oc = req_char;
6666 if (caseless)
6667 {
6668 oc = TABLE_GET(req_char, common->fcc, req_char);
6669 #if defined SUPPORT_UNICODE
6670 if (req_char > 127 && (common->utf || common->ucp))
6671 oc = UCD_OTHERCASE(req_char);
6672 #endif
6673 }
6674
6675 #ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6676 if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6677 {
6678 not_found = fast_requested_char_simd(common, req_char, oc);
6679 }
6680 else
6681 #endif
6682 {
6683 loop = LABEL();
6684 add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6685
6686 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6687
6688 if (req_char == oc)
6689 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6690 else
6691 {
6692 bit = req_char ^ oc;
6693 if (is_powerof2(bit))
6694 {
6695 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6696 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6697 }
6698 else
6699 {
6700 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6701 found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6702 }
6703 }
6704 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6705 JUMPTO(SLJIT_JUMP, loop);
6706
6707 JUMPHERE(found);
6708 if (found_oc)
6709 JUMPHERE(found_oc);
6710 }
6711
6712 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6713
6714 JUMPHERE(already_found);
6715 JUMPHERE(toolong);
6716 return not_found;
6717 }
6718
do_revertframes(compiler_common * common)6719 static void do_revertframes(compiler_common *common)
6720 {
6721 DEFINE_COMPILER;
6722 struct sljit_jump *jump;
6723 struct sljit_label *mainloop;
6724
6725 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
6726 GET_LOCAL_BASE(TMP1, 0, 0);
6727
6728 /* Drop frames until we reach STACK_TOP. */
6729 mainloop = LABEL();
6730 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw));
6731 OP2U(SLJIT_SUB | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
6732 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
6733
6734 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6735 if (HAS_VIRTUAL_REGISTERS)
6736 {
6737 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6738 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6739 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6740 }
6741 else
6742 {
6743 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6744 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6745 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6746 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6747 GET_LOCAL_BASE(TMP1, 0, 0);
6748 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6749 }
6750 JUMPTO(SLJIT_JUMP, mainloop);
6751
6752 JUMPHERE(jump);
6753 sljit_set_current_flags(compiler, SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z);
6754 jump = JUMP(SLJIT_NOT_ZERO /* SIG_LESS */);
6755 /* End of reverting values. */
6756 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6757
6758 JUMPHERE(jump);
6759 OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0);
6760 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6761 if (HAS_VIRTUAL_REGISTERS)
6762 {
6763 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6764 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
6765 }
6766 else
6767 {
6768 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6769 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
6770 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6771 }
6772 JUMPTO(SLJIT_JUMP, mainloop);
6773 }
6774
6775 #ifdef SUPPORT_UNICODE
6776 #define UCPCAT(bit) (1 << (bit))
6777 #define UCPCAT2(bit1, bit2) (UCPCAT(bit1) | UCPCAT(bit2))
6778 #define UCPCAT3(bit1, bit2, bit3) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3))
6779 #define UCPCAT_RANGE(start, end) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1))
6780 #define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu)
6781 #define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No)
6782 #define UCPCAT_ALL ((1 << (ucp_Zs + 1)) - 1)
6783 #endif
6784
check_wordboundary(compiler_common * common,BOOL ucp)6785 static void check_wordboundary(compiler_common *common, BOOL ucp)
6786 {
6787 DEFINE_COMPILER;
6788 struct sljit_jump *skipread;
6789 jump_list *skipread_list = NULL;
6790 #ifdef SUPPORT_UNICODE
6791 struct sljit_label *valid_utf;
6792 jump_list *invalid_utf1 = NULL;
6793 #endif /* SUPPORT_UNICODE */
6794 jump_list *invalid_utf2 = NULL;
6795 #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6796 struct sljit_jump *jump;
6797 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6798
6799 SLJIT_UNUSED_ARG(ucp);
6800 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6801
6802 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6803 /* Get type of the previous char, and put it to TMP3. */
6804 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6805 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6806 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6807 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6808
6809 #ifdef SUPPORT_UNICODE
6810 if (common->invalid_utf)
6811 {
6812 peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6813
6814 if (common->mode != PCRE2_JIT_COMPLETE)
6815 {
6816 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6817 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6818 move_back(common, NULL, TRUE);
6819 check_start_used_ptr(common);
6820 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6821 OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6822 }
6823 }
6824 else
6825 #endif /* SUPPORT_UNICODE */
6826 {
6827 if (common->mode == PCRE2_JIT_COMPLETE)
6828 peek_char_back(common, READ_CHAR_MAX, NULL);
6829 else
6830 {
6831 move_back(common, NULL, TRUE);
6832 check_start_used_ptr(common);
6833 read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6834 }
6835 }
6836
6837 /* Testing char type. */
6838 #ifdef SUPPORT_UNICODE
6839 if (ucp)
6840 {
6841 add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6842 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
6843 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
6844 OP_FLAGS(SLJIT_MOV, TMP3, 0, SLJIT_NOT_ZERO);
6845 }
6846 else
6847 #endif /* SUPPORT_UNICODE */
6848 {
6849 #if PCRE2_CODE_UNIT_WIDTH != 8
6850 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6851 #elif defined SUPPORT_UNICODE
6852 /* Here TMP3 has already been zeroed. */
6853 jump = NULL;
6854 if (common->utf)
6855 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6856 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6857 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6858 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6859 OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6860 #if PCRE2_CODE_UNIT_WIDTH != 8
6861 JUMPHERE(jump);
6862 #elif defined SUPPORT_UNICODE
6863 if (jump != NULL)
6864 JUMPHERE(jump);
6865 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6866 }
6867 JUMPHERE(skipread);
6868
6869 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6870 check_str_end(common, &skipread_list);
6871 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6872
6873 /* Testing char type. This is a code duplication. */
6874 #ifdef SUPPORT_UNICODE
6875
6876 valid_utf = LABEL();
6877
6878 if (ucp)
6879 {
6880 add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6881 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
6882 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
6883 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
6884 }
6885 else
6886 #endif /* SUPPORT_UNICODE */
6887 {
6888 #if PCRE2_CODE_UNIT_WIDTH != 8
6889 /* TMP2 may be destroyed by peek_char. */
6890 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6891 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6892 #elif defined SUPPORT_UNICODE
6893 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6894 jump = NULL;
6895 if (common->utf)
6896 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6897 #endif
6898 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6899 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6900 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6901 #if PCRE2_CODE_UNIT_WIDTH != 8
6902 JUMPHERE(jump);
6903 #elif defined SUPPORT_UNICODE
6904 if (jump != NULL)
6905 JUMPHERE(jump);
6906 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6907 }
6908 set_jumps(skipread_list, LABEL());
6909
6910 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6911 OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6912 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6913
6914 #ifdef SUPPORT_UNICODE
6915 if (common->invalid_utf)
6916 {
6917 set_jumps(invalid_utf1, LABEL());
6918
6919 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6920 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6921
6922 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6923 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6924 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6925
6926 set_jumps(invalid_utf2, LABEL());
6927 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6928 OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6929 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6930 }
6931 #endif /* SUPPORT_UNICODE */
6932 }
6933
optimize_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6934 static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6935 {
6936 /* May destroy TMP1. */
6937 DEFINE_COMPILER;
6938 int ranges[MAX_CLASS_RANGE_SIZE];
6939 sljit_u8 bit, cbit, all;
6940 int i, byte, length = 0;
6941
6942 bit = bits[0] & 0x1;
6943 /* All bits will be zero or one (since bit is zero or one). */
6944 all = -bit;
6945
6946 for (i = 0; i < 256; )
6947 {
6948 byte = i >> 3;
6949 if ((i & 0x7) == 0 && bits[byte] == all)
6950 i += 8;
6951 else
6952 {
6953 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6954 if (cbit != bit)
6955 {
6956 if (length >= MAX_CLASS_RANGE_SIZE)
6957 return FALSE;
6958 ranges[length] = i;
6959 length++;
6960 bit = cbit;
6961 all = -cbit;
6962 }
6963 i++;
6964 }
6965 }
6966
6967 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6968 {
6969 if (length >= MAX_CLASS_RANGE_SIZE)
6970 return FALSE;
6971 ranges[length] = 256;
6972 length++;
6973 }
6974
6975 if (length < 0 || length > 4)
6976 return FALSE;
6977
6978 bit = bits[0] & 0x1;
6979 if (invert) bit ^= 0x1;
6980
6981 /* No character is accepted. */
6982 if (length == 0 && bit == 0)
6983 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6984
6985 switch(length)
6986 {
6987 case 0:
6988 /* When bit != 0, all characters are accepted. */
6989 return TRUE;
6990
6991 case 1:
6992 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6993 return TRUE;
6994
6995 case 2:
6996 if (ranges[0] + 1 != ranges[1])
6997 {
6998 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6999 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7000 }
7001 else
7002 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7003 return TRUE;
7004
7005 case 3:
7006 if (bit != 0)
7007 {
7008 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7009 if (ranges[0] + 1 != ranges[1])
7010 {
7011 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7012 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7013 }
7014 else
7015 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7016 return TRUE;
7017 }
7018
7019 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
7020 if (ranges[1] + 1 != ranges[2])
7021 {
7022 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
7023 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7024 }
7025 else
7026 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
7027 return TRUE;
7028
7029 case 4:
7030 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
7031 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
7032 && (ranges[1] & (ranges[2] - ranges[0])) == 0
7033 && is_powerof2(ranges[2] - ranges[0]))
7034 {
7035 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
7036 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
7037 if (ranges[2] + 1 != ranges[3])
7038 {
7039 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
7040 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7041 }
7042 else
7043 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7044 return TRUE;
7045 }
7046
7047 if (bit != 0)
7048 {
7049 i = 0;
7050 if (ranges[0] + 1 != ranges[1])
7051 {
7052 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7053 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7054 i = ranges[0];
7055 }
7056 else
7057 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7058
7059 if (ranges[2] + 1 != ranges[3])
7060 {
7061 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
7062 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7063 }
7064 else
7065 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
7066 return TRUE;
7067 }
7068
7069 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7070 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
7071 if (ranges[1] + 1 != ranges[2])
7072 {
7073 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
7074 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7075 }
7076 else
7077 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7078 return TRUE;
7079
7080 default:
7081 SLJIT_UNREACHABLE();
7082 return FALSE;
7083 }
7084 }
7085
optimize_class_chars(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)7086 static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7087 {
7088 /* May destroy TMP1. */
7089 DEFINE_COMPILER;
7090 uint16_t char_list[MAX_CLASS_CHARS_SIZE];
7091 uint8_t byte;
7092 sljit_s32 type;
7093 int i, j, k, len, c;
7094
7095 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
7096 return FALSE;
7097
7098 len = 0;
7099
7100 for (i = 0; i < 32; i++)
7101 {
7102 byte = bits[i];
7103
7104 if (nclass)
7105 byte = ~byte;
7106
7107 j = 0;
7108 while (byte != 0)
7109 {
7110 if (byte & 0x1)
7111 {
7112 c = i * 8 + j;
7113
7114 k = len;
7115
7116 if ((c & 0x20) != 0)
7117 {
7118 for (k = 0; k < len; k++)
7119 if (char_list[k] == c - 0x20)
7120 {
7121 char_list[k] |= 0x120;
7122 break;
7123 }
7124 }
7125
7126 if (k == len)
7127 {
7128 if (len >= MAX_CLASS_CHARS_SIZE)
7129 return FALSE;
7130
7131 char_list[len++] = (uint16_t) c;
7132 }
7133 }
7134
7135 byte >>= 1;
7136 j++;
7137 }
7138 }
7139
7140 if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */
7141
7142 i = 0;
7143 j = 0;
7144
7145 if (char_list[0] == 0)
7146 {
7147 i++;
7148 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0);
7149 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
7150 }
7151 else
7152 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7153
7154 while (i < len)
7155 {
7156 if ((char_list[i] & 0x100) != 0)
7157 j++;
7158 else
7159 {
7160 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]);
7161 SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7162 }
7163 i++;
7164 }
7165
7166 if (j != 0)
7167 {
7168 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
7169
7170 for (i = 0; i < len; i++)
7171 if ((char_list[i] & 0x100) != 0)
7172 {
7173 j--;
7174 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
7175 SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7176 }
7177 }
7178
7179 if (invert)
7180 nclass = !nclass;
7181
7182 type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
7183 add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
7184 return TRUE;
7185 }
7186
optimize_class(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)7187 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7188 {
7189 /* May destroy TMP1. */
7190 if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
7191 return TRUE;
7192 return optimize_class_chars(common, bits, nclass, invert, backtracks);
7193 }
7194
check_anynewline(compiler_common * common)7195 static void check_anynewline(compiler_common *common)
7196 {
7197 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7198 DEFINE_COMPILER;
7199
7200 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7201
7202 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7203 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7204 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7205 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7206 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7207 #if PCRE2_CODE_UNIT_WIDTH == 8
7208 if (common->utf)
7209 {
7210 #endif
7211 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7212 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7213 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7214 #if PCRE2_CODE_UNIT_WIDTH == 8
7215 }
7216 #endif
7217 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7218 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7219 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7220 }
7221
check_hspace(compiler_common * common)7222 static void check_hspace(compiler_common *common)
7223 {
7224 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7225 DEFINE_COMPILER;
7226
7227 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7228
7229 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x09);
7230 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7231 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x20);
7232 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7233 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xa0);
7234 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7235 #if PCRE2_CODE_UNIT_WIDTH == 8
7236 if (common->utf)
7237 {
7238 #endif
7239 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7240 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680);
7241 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7242 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e);
7243 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7244 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7245 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7246 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7247 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7248 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7249 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7250 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7251 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7252 #if PCRE2_CODE_UNIT_WIDTH == 8
7253 }
7254 #endif
7255 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7256 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7257
7258 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7259 }
7260
check_vspace(compiler_common * common)7261 static void check_vspace(compiler_common *common)
7262 {
7263 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7264 DEFINE_COMPILER;
7265
7266 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7267
7268 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7269 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7270 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7271 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7272 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7273 #if PCRE2_CODE_UNIT_WIDTH == 8
7274 if (common->utf)
7275 {
7276 #endif
7277 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7278 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7279 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7280 #if PCRE2_CODE_UNIT_WIDTH == 8
7281 }
7282 #endif
7283 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7284 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7285
7286 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7287 }
7288
do_casefulcmp(compiler_common * common)7289 static void do_casefulcmp(compiler_common *common)
7290 {
7291 DEFINE_COMPILER;
7292 struct sljit_jump *jump;
7293 struct sljit_label *label;
7294 int char1_reg;
7295 int char2_reg;
7296
7297 if (HAS_VIRTUAL_REGISTERS)
7298 {
7299 char1_reg = STR_END;
7300 char2_reg = STACK_TOP;
7301 }
7302 else
7303 {
7304 char1_reg = TMP3;
7305 char2_reg = RETURN_ADDR;
7306 }
7307
7308 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7309 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7310
7311 if (char1_reg == STR_END)
7312 {
7313 OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7314 OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7315 }
7316
7317 if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7318 {
7319 label = LABEL();
7320 sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7321 sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7322 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7323 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7324 JUMPTO(SLJIT_NOT_ZERO, label);
7325
7326 JUMPHERE(jump);
7327 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7328 }
7329 else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7330 {
7331 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7332 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7333
7334 label = LABEL();
7335 sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7336 sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7337 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7338 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7339 JUMPTO(SLJIT_NOT_ZERO, label);
7340
7341 JUMPHERE(jump);
7342 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7343 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7344 }
7345 else
7346 {
7347 label = LABEL();
7348 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7349 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7350 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7351 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7352 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7353 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7354 JUMPTO(SLJIT_NOT_ZERO, label);
7355
7356 JUMPHERE(jump);
7357 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7358 }
7359
7360 if (char1_reg == STR_END)
7361 {
7362 OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7363 OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7364 }
7365
7366 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7367 }
7368
do_caselesscmp(compiler_common * common)7369 static void do_caselesscmp(compiler_common *common)
7370 {
7371 DEFINE_COMPILER;
7372 struct sljit_jump *jump;
7373 struct sljit_label *label;
7374 int char1_reg = STR_END;
7375 int char2_reg;
7376 int lcc_table;
7377 int opt_type = 0;
7378
7379 if (HAS_VIRTUAL_REGISTERS)
7380 {
7381 char2_reg = STACK_TOP;
7382 lcc_table = STACK_LIMIT;
7383 }
7384 else
7385 {
7386 char2_reg = RETURN_ADDR;
7387 lcc_table = TMP3;
7388 }
7389
7390 if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7391 opt_type = 1;
7392 else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7393 opt_type = 2;
7394
7395 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7396 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7397
7398 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7399
7400 if (char2_reg == STACK_TOP)
7401 {
7402 OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7403 OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7404 }
7405
7406 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7407
7408 if (opt_type == 1)
7409 {
7410 label = LABEL();
7411 sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7412 sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7413 }
7414 else if (opt_type == 2)
7415 {
7416 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7417 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7418
7419 label = LABEL();
7420 sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7421 sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7422 }
7423 else
7424 {
7425 label = LABEL();
7426 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7427 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7428 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7429 }
7430
7431 #if PCRE2_CODE_UNIT_WIDTH != 8
7432 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7433 #endif
7434 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7435 #if PCRE2_CODE_UNIT_WIDTH != 8
7436 JUMPHERE(jump);
7437 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7438 #endif
7439 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7440 #if PCRE2_CODE_UNIT_WIDTH != 8
7441 JUMPHERE(jump);
7442 #endif
7443
7444 if (opt_type == 0)
7445 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7446
7447 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7448 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7449 JUMPTO(SLJIT_NOT_ZERO, label);
7450
7451 JUMPHERE(jump);
7452 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7453
7454 if (opt_type == 2)
7455 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7456
7457 if (char2_reg == STACK_TOP)
7458 {
7459 OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7460 OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7461 }
7462
7463 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7464 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7465 }
7466
byte_sequence_compare(compiler_common * common,BOOL caseless,PCRE2_SPTR cc,compare_context * context,jump_list ** backtracks)7467 static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7468 compare_context *context, jump_list **backtracks)
7469 {
7470 DEFINE_COMPILER;
7471 unsigned int othercasebit = 0;
7472 PCRE2_SPTR othercasechar = NULL;
7473 #ifdef SUPPORT_UNICODE
7474 int utflength;
7475 #endif
7476
7477 if (caseless && char_has_othercase(common, cc))
7478 {
7479 othercasebit = char_get_othercase_bit(common, cc);
7480 SLJIT_ASSERT(othercasebit);
7481 /* Extracting bit difference info. */
7482 #if PCRE2_CODE_UNIT_WIDTH == 8
7483 othercasechar = cc + (othercasebit >> 8);
7484 othercasebit &= 0xff;
7485 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7486 /* Note that this code only handles characters in the BMP. If there
7487 ever are characters outside the BMP whose othercase differs in only one
7488 bit from itself (there currently are none), this code will need to be
7489 revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7490 othercasechar = cc + (othercasebit >> 9);
7491 if ((othercasebit & 0x100) != 0)
7492 othercasebit = (othercasebit & 0xff) << 8;
7493 else
7494 othercasebit &= 0xff;
7495 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7496 }
7497
7498 if (context->sourcereg == -1)
7499 {
7500 #if PCRE2_CODE_UNIT_WIDTH == 8
7501 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7502 if (context->length >= 4)
7503 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7504 else if (context->length >= 2)
7505 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7506 else
7507 #endif
7508 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7509 #elif PCRE2_CODE_UNIT_WIDTH == 16
7510 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7511 if (context->length >= 4)
7512 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7513 else
7514 #endif
7515 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7516 #elif PCRE2_CODE_UNIT_WIDTH == 32
7517 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7518 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7519 context->sourcereg = TMP2;
7520 }
7521
7522 #ifdef SUPPORT_UNICODE
7523 utflength = 1;
7524 if (common->utf && HAS_EXTRALEN(*cc))
7525 utflength += GET_EXTRALEN(*cc);
7526
7527 do
7528 {
7529 #endif
7530
7531 context->length -= IN_UCHARS(1);
7532 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7533
7534 /* Unaligned read is supported. */
7535 if (othercasebit != 0 && othercasechar == cc)
7536 {
7537 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7538 context->oc.asuchars[context->ucharptr] = othercasebit;
7539 }
7540 else
7541 {
7542 context->c.asuchars[context->ucharptr] = *cc;
7543 context->oc.asuchars[context->ucharptr] = 0;
7544 }
7545 context->ucharptr++;
7546
7547 #if PCRE2_CODE_UNIT_WIDTH == 8
7548 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7549 #else
7550 if (context->ucharptr >= 2 || context->length == 0)
7551 #endif
7552 {
7553 if (context->length >= 4)
7554 OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7555 else if (context->length >= 2)
7556 OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7557 #if PCRE2_CODE_UNIT_WIDTH == 8
7558 else if (context->length >= 1)
7559 OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7560 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7561 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7562
7563 switch(context->ucharptr)
7564 {
7565 case 4 / sizeof(PCRE2_UCHAR):
7566 if (context->oc.asint != 0)
7567 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7568 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7569 break;
7570
7571 case 2 / sizeof(PCRE2_UCHAR):
7572 if (context->oc.asushort != 0)
7573 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7574 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7575 break;
7576
7577 #if PCRE2_CODE_UNIT_WIDTH == 8
7578 case 1:
7579 if (context->oc.asbyte != 0)
7580 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7581 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7582 break;
7583 #endif
7584
7585 default:
7586 SLJIT_UNREACHABLE();
7587 break;
7588 }
7589 context->ucharptr = 0;
7590 }
7591
7592 #else
7593
7594 /* Unaligned read is unsupported or in 32 bit mode. */
7595 if (context->length >= 1)
7596 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7597
7598 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7599
7600 if (othercasebit != 0 && othercasechar == cc)
7601 {
7602 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7603 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7604 }
7605 else
7606 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7607
7608 #endif
7609
7610 cc++;
7611 #ifdef SUPPORT_UNICODE
7612 utflength--;
7613 }
7614 while (utflength > 0);
7615 #endif
7616
7617 return cc;
7618 }
7619
7620 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7621
7622 #define SET_CHAR_OFFSET(value) \
7623 if ((value) != charoffset) \
7624 { \
7625 if ((value) < charoffset) \
7626 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7627 else \
7628 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7629 } \
7630 charoffset = (value);
7631
7632 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7633
7634 #ifdef SUPPORT_UNICODE
7635 #define XCLASS_SAVE_CHAR 0x001
7636 #define XCLASS_CHAR_SAVED 0x002
7637 #define XCLASS_HAS_TYPE 0x004
7638 #define XCLASS_HAS_SCRIPT 0x008
7639 #define XCLASS_HAS_SCRIPT_EXTENSION 0x010
7640 #define XCLASS_HAS_BOOL 0x020
7641 #define XCLASS_HAS_BIDICL 0x040
7642 #define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL)
7643 #define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080
7644 #define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100
7645 #define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200
7646 #endif /* SUPPORT_UNICODE */
7647
compile_xclass_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)7648 static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7649 {
7650 DEFINE_COMPILER;
7651 jump_list *found = NULL;
7652 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7653 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7654 struct sljit_jump *jump = NULL;
7655 PCRE2_SPTR ccbegin;
7656 int compares, invertcmp, numberofcmps;
7657 #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7658 BOOL utf = common->utf;
7659 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7660
7661 #ifdef SUPPORT_UNICODE
7662 sljit_u32 unicode_status = 0;
7663 sljit_u32 category_list = 0;
7664 sljit_u32 items;
7665 int typereg = TMP1;
7666 const sljit_u32 *other_cases;
7667 #endif /* SUPPORT_UNICODE */
7668
7669 /* Scanning the necessary info. */
7670 cc++;
7671 ccbegin = cc;
7672 compares = 0;
7673
7674 if (cc[-1] & XCL_MAP)
7675 {
7676 min = 0;
7677 cc += 32 / sizeof(PCRE2_UCHAR);
7678 }
7679
7680 while (*cc != XCL_END)
7681 {
7682 compares++;
7683
7684 if (*cc == XCL_SINGLE)
7685 {
7686 cc ++;
7687 GETCHARINCTEST(c, cc);
7688 if (c > max) max = c;
7689 if (c < min) min = c;
7690 #ifdef SUPPORT_UNICODE
7691 unicode_status |= XCLASS_SAVE_CHAR;
7692 #endif /* SUPPORT_UNICODE */
7693 }
7694 else if (*cc == XCL_RANGE)
7695 {
7696 cc ++;
7697 GETCHARINCTEST(c, cc);
7698 if (c < min) min = c;
7699 GETCHARINCTEST(c, cc);
7700 if (c > max) max = c;
7701 #ifdef SUPPORT_UNICODE
7702 unicode_status |= XCLASS_SAVE_CHAR;
7703 #endif /* SUPPORT_UNICODE */
7704 }
7705 #ifdef SUPPORT_UNICODE
7706 else
7707 {
7708 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7709 cc++;
7710
7711 if (*cc == PT_CLIST && cc[-1] == XCL_PROP)
7712 {
7713 other_cases = PRIV(ucd_caseless_sets) + cc[1];
7714 while (*other_cases != NOTACHAR)
7715 {
7716 if (*other_cases > max) max = *other_cases;
7717 if (*other_cases < min) min = *other_cases;
7718 other_cases++;
7719 }
7720 }
7721 else
7722 {
7723 max = READ_CHAR_MAX;
7724 min = 0;
7725 }
7726
7727 items = 0;
7728
7729 switch(*cc)
7730 {
7731 case PT_ANY:
7732 /* Any either accepts everything or ignored. */
7733 if (cc[-1] == XCL_PROP)
7734 items = UCPCAT_ALL;
7735 else
7736 compares--;
7737 break;
7738
7739 case PT_LAMP:
7740 items = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt);
7741 break;
7742
7743 case PT_GC:
7744 items = UCPCAT_RANGE(PRIV(ucp_typerange)[(int)cc[1] * 2], PRIV(ucp_typerange)[(int)cc[1] * 2 + 1]);
7745 break;
7746
7747 case PT_PC:
7748 items = UCPCAT(cc[1]);
7749 break;
7750
7751 case PT_WORD:
7752 items = UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N;
7753 break;
7754
7755 case PT_ALNUM:
7756 items = UCPCAT_L | UCPCAT_N;
7757 break;
7758
7759 case PT_SCX:
7760 unicode_status |= XCLASS_HAS_SCRIPT_EXTENSION;
7761 if (cc[-1] == XCL_NOTPROP)
7762 {
7763 unicode_status |= XCLASS_SCRIPT_EXTENSION_NOTPROP;
7764 break;
7765 }
7766 compares++;
7767 /* Fall through */
7768
7769 case PT_SC:
7770 unicode_status |= XCLASS_HAS_SCRIPT;
7771 break;
7772
7773 case PT_SPACE:
7774 case PT_PXSPACE:
7775 case PT_PXGRAPH:
7776 case PT_PXPRINT:
7777 case PT_PXPUNCT:
7778 unicode_status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE;
7779 break;
7780
7781 case PT_CLIST:
7782 case PT_UCNC:
7783 case PT_PXXDIGIT:
7784 unicode_status |= XCLASS_SAVE_CHAR;
7785 break;
7786
7787 case PT_BOOL:
7788 unicode_status |= XCLASS_HAS_BOOL;
7789 break;
7790
7791 case PT_BIDICL:
7792 unicode_status |= XCLASS_HAS_BIDICL;
7793 break;
7794
7795 default:
7796 SLJIT_UNREACHABLE();
7797 break;
7798 }
7799
7800 if (items > 0)
7801 {
7802 if (cc[-1] == XCL_NOTPROP)
7803 items ^= UCPCAT_ALL;
7804 category_list |= items;
7805 unicode_status |= XCLASS_HAS_TYPE;
7806 compares--;
7807 }
7808
7809 cc += 2;
7810 }
7811 #endif /* SUPPORT_UNICODE */
7812 }
7813
7814 #ifdef SUPPORT_UNICODE
7815 if (category_list == UCPCAT_ALL)
7816 {
7817 /* All characters are accepted, same as dotall. */
7818 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7819 if (list == backtracks)
7820 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7821 return;
7822 }
7823
7824 if (compares == 0 && category_list == 0)
7825 {
7826 /* No characters are accepted, same as (*F) or dotall. */
7827 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7828 if (list != backtracks)
7829 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7830 return;
7831 }
7832 #else /* !SUPPORT_UNICODE */
7833 SLJIT_ASSERT(compares > 0);
7834 #endif /* SUPPORT_UNICODE */
7835
7836 /* We are not necessary in utf mode even in 8 bit mode. */
7837 cc = ccbegin;
7838 if ((cc[-1] & XCL_NOT) != 0)
7839 read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7840 else
7841 {
7842 #ifdef SUPPORT_UNICODE
7843 read_char(common, min, max, (unicode_status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0);
7844 #else /* !SUPPORT_UNICODE */
7845 read_char(common, min, max, NULL, 0);
7846 #endif /* SUPPORT_UNICODE */
7847 }
7848
7849 if ((cc[-1] & XCL_HASPROP) == 0)
7850 {
7851 if ((cc[-1] & XCL_MAP) != 0)
7852 {
7853 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7854 if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7855 {
7856 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7857 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7858 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7859 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7860 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7861 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7862 }
7863
7864 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7865 JUMPHERE(jump);
7866
7867 cc += 32 / sizeof(PCRE2_UCHAR);
7868 }
7869 else
7870 {
7871 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7872 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7873 }
7874 }
7875 else if ((cc[-1] & XCL_MAP) != 0)
7876 {
7877 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7878 #ifdef SUPPORT_UNICODE
7879 unicode_status |= XCLASS_CHAR_SAVED;
7880 #endif /* SUPPORT_UNICODE */
7881 if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7882 {
7883 #if PCRE2_CODE_UNIT_WIDTH == 8
7884 jump = NULL;
7885 if (common->utf)
7886 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7887 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7888
7889 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7890 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7891 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7892 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7893 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7894 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7895
7896 #if PCRE2_CODE_UNIT_WIDTH == 8
7897 if (common->utf)
7898 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7899 JUMPHERE(jump);
7900 }
7901
7902 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7903 cc += 32 / sizeof(PCRE2_UCHAR);
7904 }
7905
7906 #ifdef SUPPORT_UNICODE
7907 if (unicode_status & XCLASS_NEEDS_UCD)
7908 {
7909 if ((unicode_status & (XCLASS_SAVE_CHAR | XCLASS_CHAR_SAVED)) == XCLASS_SAVE_CHAR)
7910 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7911
7912 #if PCRE2_CODE_UNIT_WIDTH == 32
7913 if (!common->utf)
7914 {
7915 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7916 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7917 JUMPHERE(jump);
7918 }
7919 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7920
7921 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7922 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7923 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7924 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7925 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7926 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7927 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7928 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7929 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7930 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7931 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7932
7933 ccbegin = cc;
7934
7935 if (category_list != 0)
7936 compares++;
7937
7938 if (unicode_status & XCLASS_HAS_BIDICL)
7939 {
7940 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
7941 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
7942
7943 while (*cc != XCL_END)
7944 {
7945 if (*cc == XCL_SINGLE)
7946 {
7947 cc ++;
7948 GETCHARINCTEST(c, cc);
7949 }
7950 else if (*cc == XCL_RANGE)
7951 {
7952 cc ++;
7953 GETCHARINCTEST(c, cc);
7954 GETCHARINCTEST(c, cc);
7955 }
7956 else
7957 {
7958 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7959 cc++;
7960 if (*cc == PT_BIDICL)
7961 {
7962 compares--;
7963 invertcmp = (compares == 0 && list != backtracks);
7964 if (cc[-1] == XCL_NOTPROP)
7965 invertcmp ^= 0x1;
7966 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7967 add_jump(compiler, compares > 0 ? list : backtracks, jump);
7968 }
7969 cc += 2;
7970 }
7971 }
7972
7973 cc = ccbegin;
7974 }
7975
7976 if (unicode_status & XCLASS_HAS_BOOL)
7977 {
7978 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops));
7979 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK);
7980 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
7981
7982 while (*cc != XCL_END)
7983 {
7984 if (*cc == XCL_SINGLE)
7985 {
7986 cc ++;
7987 GETCHARINCTEST(c, cc);
7988 }
7989 else if (*cc == XCL_RANGE)
7990 {
7991 cc ++;
7992 GETCHARINCTEST(c, cc);
7993 GETCHARINCTEST(c, cc);
7994 }
7995 else
7996 {
7997 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7998 cc++;
7999 if (*cc == PT_BOOL)
8000 {
8001 compares--;
8002 invertcmp = (compares == 0 && list != backtracks);
8003 if (cc[-1] == XCL_NOTPROP)
8004 invertcmp ^= 0x1;
8005
8006 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f));
8007 add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8008 }
8009 cc += 2;
8010 }
8011 }
8012
8013 cc = ccbegin;
8014 }
8015
8016 if (unicode_status & XCLASS_HAS_SCRIPT)
8017 {
8018 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
8019
8020 while (*cc != XCL_END)
8021 {
8022 if (*cc == XCL_SINGLE)
8023 {
8024 cc ++;
8025 GETCHARINCTEST(c, cc);
8026 }
8027 else if (*cc == XCL_RANGE)
8028 {
8029 cc ++;
8030 GETCHARINCTEST(c, cc);
8031 GETCHARINCTEST(c, cc);
8032 }
8033 else
8034 {
8035 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8036 cc++;
8037 switch (*cc)
8038 {
8039 case PT_SCX:
8040 if (cc[-1] == XCL_NOTPROP)
8041 break;
8042 /* Fall through */
8043
8044 case PT_SC:
8045 compares--;
8046 invertcmp = (compares == 0 && list != backtracks);
8047 if (cc[-1] == XCL_NOTPROP)
8048 invertcmp ^= 0x1;
8049
8050 add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]));
8051 }
8052 cc += 2;
8053 }
8054 }
8055
8056 cc = ccbegin;
8057 }
8058
8059 if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION)
8060 {
8061 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
8062 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
8063 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
8064
8065 if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP)
8066 {
8067 if (unicode_status & XCLASS_HAS_TYPE)
8068 {
8069 if (unicode_status & XCLASS_SAVE_CHAR)
8070 {
8071 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP2, 0);
8072 unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0;
8073 }
8074 else
8075 {
8076 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
8077 unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR;
8078 }
8079 }
8080 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
8081 }
8082
8083 while (*cc != XCL_END)
8084 {
8085 if (*cc == XCL_SINGLE)
8086 {
8087 cc ++;
8088 GETCHARINCTEST(c, cc);
8089 }
8090 else if (*cc == XCL_RANGE)
8091 {
8092 cc ++;
8093 GETCHARINCTEST(c, cc);
8094 GETCHARINCTEST(c, cc);
8095 }
8096 else
8097 {
8098 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8099 cc++;
8100 if (*cc == PT_SCX)
8101 {
8102 compares--;
8103 invertcmp = (compares == 0 && list != backtracks);
8104
8105 jump = NULL;
8106 if (cc[-1] == XCL_NOTPROP)
8107 {
8108 jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]);
8109 if (invertcmp)
8110 {
8111 add_jump(compiler, backtracks, jump);
8112 jump = NULL;
8113 }
8114 invertcmp ^= 0x1;
8115 }
8116
8117 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f));
8118 add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8119
8120 if (jump != NULL)
8121 JUMPHERE(jump);
8122 }
8123 cc += 2;
8124 }
8125 }
8126
8127 if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0)
8128 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
8129 else if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR)
8130 OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
8131 cc = ccbegin;
8132 }
8133
8134 if (unicode_status & XCLASS_SAVE_CHAR)
8135 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
8136
8137 if (unicode_status & XCLASS_HAS_TYPE)
8138 {
8139 if (unicode_status & XCLASS_SAVE_CHAR)
8140 typereg = RETURN_ADDR;
8141
8142 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
8143 OP2(SLJIT_SHL, typereg, 0, SLJIT_IMM, 1, TMP2, 0);
8144
8145 if (category_list > 0)
8146 {
8147 compares--;
8148 invertcmp = (compares == 0 && list != backtracks);
8149 OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, category_list);
8150 add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8151 }
8152 }
8153 }
8154 #endif /* SUPPORT_UNICODE */
8155
8156 /* Generating code. */
8157 charoffset = 0;
8158 numberofcmps = 0;
8159
8160 while (*cc != XCL_END)
8161 {
8162 compares--;
8163 invertcmp = (compares == 0 && list != backtracks);
8164 jump = NULL;
8165
8166 if (*cc == XCL_SINGLE)
8167 {
8168 cc ++;
8169 GETCHARINCTEST(c, cc);
8170
8171 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
8172 {
8173 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8174 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8175 numberofcmps++;
8176 }
8177 else if (numberofcmps > 0)
8178 {
8179 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8180 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
8181 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8182 numberofcmps = 0;
8183 }
8184 else
8185 {
8186 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8187 numberofcmps = 0;
8188 }
8189 }
8190 else if (*cc == XCL_RANGE)
8191 {
8192 cc ++;
8193 GETCHARINCTEST(c, cc);
8194 SET_CHAR_OFFSET(c);
8195 GETCHARINCTEST(c, cc);
8196
8197 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
8198 {
8199 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8200 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8201 numberofcmps++;
8202 }
8203 else if (numberofcmps > 0)
8204 {
8205 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8206 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8207 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8208 numberofcmps = 0;
8209 }
8210 else
8211 {
8212 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8213 numberofcmps = 0;
8214 }
8215 }
8216 #ifdef SUPPORT_UNICODE
8217 else
8218 {
8219 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8220 if (*cc == XCL_NOTPROP)
8221 invertcmp ^= 0x1;
8222 cc++;
8223 switch(*cc)
8224 {
8225 case PT_ANY:
8226 case PT_LAMP:
8227 case PT_GC:
8228 case PT_PC:
8229 case PT_SC:
8230 case PT_SCX:
8231 case PT_BOOL:
8232 case PT_BIDICL:
8233 case PT_WORD:
8234 case PT_ALNUM:
8235 compares++;
8236 /* Already handled. */
8237 break;
8238
8239 case PT_SPACE:
8240 case PT_PXSPACE:
8241 SET_CHAR_OFFSET(9);
8242 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
8243 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8244
8245 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
8246 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8247
8248 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
8249 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8250
8251 OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Zl, ucp_Zs));
8252 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
8253 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8254 break;
8255
8256 case PT_CLIST:
8257 other_cases = PRIV(ucd_caseless_sets) + cc[1];
8258
8259 /* At least three characters are required.
8260 Otherwise this case would be handled by the normal code path. */
8261 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
8262 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
8263
8264 /* Optimizing character pairs, if their difference is power of 2. */
8265 if (is_powerof2(other_cases[1] ^ other_cases[0]))
8266 {
8267 if (charoffset == 0)
8268 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8269 else
8270 {
8271 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8272 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8273 }
8274 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]);
8275 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8276 other_cases += 2;
8277 }
8278 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
8279 {
8280 if (charoffset == 0)
8281 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
8282 else
8283 {
8284 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8285 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8286 }
8287 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]);
8288 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8289
8290 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
8291 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8292
8293 other_cases += 3;
8294 }
8295 else
8296 {
8297 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8298 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8299 }
8300
8301 while (*other_cases != NOTACHAR)
8302 {
8303 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8304 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8305 }
8306 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8307 break;
8308
8309 case PT_UCNC:
8310 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
8311 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8312 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
8313 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8314 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
8315 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8316
8317 SET_CHAR_OFFSET(0xa0);
8318 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
8319 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8320 SET_CHAR_OFFSET(0);
8321 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
8322 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
8323 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8324 break;
8325
8326 case PT_PXGRAPH:
8327 OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT_RANGE(ucp_Zl, ucp_Zs));
8328 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8329
8330 OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
8331 jump = JUMP(SLJIT_ZERO);
8332
8333 c = charoffset;
8334 /* In case of ucp_Cf, we overwrite the result. */
8335 SET_CHAR_OFFSET(0x2066);
8336 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8337 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8338
8339 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8340 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8341
8342 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
8343 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8344
8345 /* Restore charoffset. */
8346 SET_CHAR_OFFSET(c);
8347
8348 JUMPHERE(jump);
8349 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8350 break;
8351
8352 case PT_PXPRINT:
8353 OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT2(ucp_Zl, ucp_Zp));
8354 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8355
8356 OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
8357 jump = JUMP(SLJIT_ZERO);
8358
8359 c = charoffset;
8360 /* In case of ucp_Cf, we overwrite the result. */
8361 SET_CHAR_OFFSET(0x2066);
8362 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8363 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8364
8365 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8366 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8367
8368 /* Restore charoffset. */
8369 SET_CHAR_OFFSET(c);
8370
8371 JUMPHERE(jump);
8372 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8373 break;
8374
8375 case PT_PXPUNCT:
8376 OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Sc, ucp_So));
8377 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8378
8379 SET_CHAR_OFFSET(0);
8380 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f);
8381 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
8382
8383 OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Pc, ucp_Ps));
8384 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
8385 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8386 break;
8387
8388 case PT_PXXDIGIT:
8389 SET_CHAR_OFFSET(CHAR_A);
8390 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, ~0x20);
8391 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP2, 0, SLJIT_IMM, CHAR_F - CHAR_A);
8392 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8393
8394 SET_CHAR_OFFSET(CHAR_0);
8395 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_9 - CHAR_0);
8396 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8397
8398 SET_CHAR_OFFSET(0xff10);
8399 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff10);
8400
8401 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff19 - 0xff10);
8402 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8403
8404 SET_CHAR_OFFSET(0xff21);
8405 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff26 - 0xff21);
8406 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8407
8408 SET_CHAR_OFFSET(0xff41);
8409 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff41);
8410 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8411
8412 SET_CHAR_OFFSET(0xff10);
8413
8414 JUMPHERE(jump);
8415 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
8416 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8417 break;
8418
8419 default:
8420 SLJIT_UNREACHABLE();
8421 break;
8422 }
8423 cc += 2;
8424 }
8425 #endif /* SUPPORT_UNICODE */
8426
8427 if (jump != NULL)
8428 add_jump(compiler, compares > 0 ? list : backtracks, jump);
8429 }
8430
8431 SLJIT_ASSERT(compares == 0);
8432 if (found != NULL)
8433 set_jumps(found, LABEL());
8434 }
8435
8436 #undef SET_TYPE_OFFSET
8437 #undef SET_CHAR_OFFSET
8438
8439 #endif
8440
compile_simple_assertion_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks)8441 static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8442 {
8443 DEFINE_COMPILER;
8444 struct sljit_jump *jump[4];
8445
8446 switch(type)
8447 {
8448 case OP_SOD:
8449 if (HAS_VIRTUAL_REGISTERS)
8450 {
8451 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8452 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8453 }
8454 else
8455 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8456 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8457 return cc;
8458
8459 case OP_SOM:
8460 if (HAS_VIRTUAL_REGISTERS)
8461 {
8462 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8463 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8464 }
8465 else
8466 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
8467 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8468 return cc;
8469
8470 case OP_NOT_WORD_BOUNDARY:
8471 case OP_WORD_BOUNDARY:
8472 case OP_NOT_UCP_WORD_BOUNDARY:
8473 case OP_UCP_WORD_BOUNDARY:
8474 add_jump(compiler, (type == OP_NOT_WORD_BOUNDARY || type == OP_WORD_BOUNDARY) ? &common->wordboundary : &common->ucp_wordboundary, JUMP(SLJIT_FAST_CALL));
8475 #ifdef SUPPORT_UNICODE
8476 if (common->invalid_utf)
8477 {
8478 add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8479 return cc;
8480 }
8481 #endif /* SUPPORT_UNICODE */
8482 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8483 add_jump(compiler, backtracks, JUMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8484 return cc;
8485
8486 case OP_EODN:
8487 /* Requires rather complex checks. */
8488 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8489 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8490 {
8491 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8492 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8493 if (common->mode == PCRE2_JIT_COMPLETE)
8494 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8495 else
8496 {
8497 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
8498 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0);
8499 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
8500 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8501 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8502 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8503 check_partial(common, TRUE);
8504 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8505 JUMPHERE(jump[1]);
8506 }
8507 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8508 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8509 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8510 }
8511 else if (common->nltype == NLTYPE_FIXED)
8512 {
8513 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8514 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8515 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8516 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8517 }
8518 else
8519 {
8520 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8521 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8522 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8523 OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0);
8524 jump[2] = JUMP(SLJIT_GREATER);
8525 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8526 /* Equal. */
8527 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8528 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8529 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8530
8531 JUMPHERE(jump[1]);
8532 if (common->nltype == NLTYPE_ANYCRLF)
8533 {
8534 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8535 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8536 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8537 }
8538 else
8539 {
8540 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8541 read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8542 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8543 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8544 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8545 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8546 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8547 }
8548 JUMPHERE(jump[2]);
8549 JUMPHERE(jump[3]);
8550 }
8551 JUMPHERE(jump[0]);
8552 if (common->mode != PCRE2_JIT_COMPLETE)
8553 check_partial(common, TRUE);
8554 return cc;
8555
8556 case OP_EOD:
8557 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8558 if (common->mode != PCRE2_JIT_COMPLETE)
8559 check_partial(common, TRUE);
8560 return cc;
8561
8562 case OP_DOLL:
8563 if (HAS_VIRTUAL_REGISTERS)
8564 {
8565 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8566 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8567 }
8568 else
8569 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8570 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8571
8572 if (!common->endonly)
8573 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8574 else
8575 {
8576 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8577 check_partial(common, FALSE);
8578 }
8579 return cc;
8580
8581 case OP_DOLLM:
8582 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8583 if (HAS_VIRTUAL_REGISTERS)
8584 {
8585 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8586 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8587 }
8588 else
8589 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8590 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8591 check_partial(common, FALSE);
8592 jump[0] = JUMP(SLJIT_JUMP);
8593 JUMPHERE(jump[1]);
8594
8595 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8596 {
8597 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8598 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8599 if (common->mode == PCRE2_JIT_COMPLETE)
8600 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8601 else
8602 {
8603 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8604 /* STR_PTR = STR_END - IN_UCHARS(1) */
8605 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8606 check_partial(common, TRUE);
8607 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8608 JUMPHERE(jump[1]);
8609 }
8610
8611 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8612 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8613 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8614 }
8615 else
8616 {
8617 peek_char(common, common->nlmax, TMP3, 0, NULL);
8618 check_newlinechar(common, common->nltype, backtracks, FALSE);
8619 }
8620 JUMPHERE(jump[0]);
8621 return cc;
8622
8623 case OP_CIRC:
8624 if (HAS_VIRTUAL_REGISTERS)
8625 {
8626 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8627 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8628 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8629 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8630 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8631 }
8632 else
8633 {
8634 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8635 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8636 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8637 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8638 }
8639 return cc;
8640
8641 case OP_CIRCM:
8642 /* TMP2 might be used by peek_char_back. */
8643 if (HAS_VIRTUAL_REGISTERS)
8644 {
8645 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8646 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8647 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8648 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8649 }
8650 else
8651 {
8652 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8653 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8654 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8655 }
8656 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8657 jump[0] = JUMP(SLJIT_JUMP);
8658 JUMPHERE(jump[1]);
8659
8660 if (!common->alt_circumflex)
8661 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8662
8663 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8664 {
8665 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8666 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8667 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8668 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8669 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8670 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8671 }
8672 else
8673 {
8674 peek_char_back(common, common->nlmax, backtracks);
8675 check_newlinechar(common, common->nltype, backtracks, FALSE);
8676 }
8677 JUMPHERE(jump[0]);
8678 return cc;
8679 }
8680 SLJIT_UNREACHABLE();
8681 return cc;
8682 }
8683
8684 #ifdef SUPPORT_UNICODE
8685
8686 #if PCRE2_CODE_UNIT_WIDTH != 32
8687
do_extuni_utf(jit_arguments * args,PCRE2_SPTR cc)8688 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8689 {
8690 PCRE2_SPTR start_subject = args->begin;
8691 PCRE2_SPTR end_subject = args->end;
8692 int lgb, rgb, ricount;
8693 PCRE2_SPTR prevcc, endcc, bptr;
8694 BOOL first = TRUE;
8695 uint32_t c;
8696
8697 prevcc = cc;
8698 endcc = NULL;
8699 do
8700 {
8701 GETCHARINC(c, cc);
8702 rgb = UCD_GRAPHBREAK(c);
8703
8704 if (first)
8705 {
8706 lgb = rgb;
8707 endcc = cc;
8708 first = FALSE;
8709 continue;
8710 }
8711
8712 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8713 break;
8714
8715 /* Not breaking between Regional Indicators is allowed only if there
8716 are an even number of preceding RIs. */
8717
8718 if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8719 {
8720 ricount = 0;
8721 bptr = prevcc;
8722
8723 /* bptr is pointing to the left-hand character */
8724 while (bptr > start_subject)
8725 {
8726 bptr--;
8727 BACKCHAR(bptr);
8728 GETCHAR(c, bptr);
8729
8730 if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8731 break;
8732
8733 ricount++;
8734 }
8735
8736 if ((ricount & 1) != 0) break; /* Grapheme break required */
8737 }
8738
8739 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8740 allows any number of them before a following Extended_Pictographic. */
8741
8742 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8743 lgb != ucp_gbExtended_Pictographic)
8744 lgb = rgb;
8745
8746 prevcc = endcc;
8747 endcc = cc;
8748 }
8749 while (cc < end_subject);
8750
8751 return endcc;
8752 }
8753
8754 #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8755
do_extuni_utf_invalid(jit_arguments * args,PCRE2_SPTR cc)8756 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8757 {
8758 PCRE2_SPTR start_subject = args->begin;
8759 PCRE2_SPTR end_subject = args->end;
8760 int lgb, rgb, ricount;
8761 PCRE2_SPTR prevcc, endcc, bptr;
8762 BOOL first = TRUE;
8763 uint32_t c;
8764
8765 prevcc = cc;
8766 endcc = NULL;
8767 do
8768 {
8769 GETCHARINC_INVALID(c, cc, end_subject, break);
8770 rgb = UCD_GRAPHBREAK(c);
8771
8772 if (first)
8773 {
8774 lgb = rgb;
8775 endcc = cc;
8776 first = FALSE;
8777 continue;
8778 }
8779
8780 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8781 break;
8782
8783 /* Not breaking between Regional Indicators is allowed only if there
8784 are an even number of preceding RIs. */
8785
8786 if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8787 {
8788 ricount = 0;
8789 bptr = prevcc;
8790
8791 /* bptr is pointing to the left-hand character */
8792 while (bptr > start_subject)
8793 {
8794 GETCHARBACK_INVALID(c, bptr, start_subject, break);
8795
8796 if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8797 break;
8798
8799 ricount++;
8800 }
8801
8802 if ((ricount & 1) != 0)
8803 break; /* Grapheme break required */
8804 }
8805
8806 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8807 allows any number of them before a following Extended_Pictographic. */
8808
8809 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8810 lgb != ucp_gbExtended_Pictographic)
8811 lgb = rgb;
8812
8813 prevcc = endcc;
8814 endcc = cc;
8815 }
8816 while (cc < end_subject);
8817
8818 return endcc;
8819 }
8820
do_extuni_no_utf(jit_arguments * args,PCRE2_SPTR cc)8821 static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8822 {
8823 PCRE2_SPTR start_subject = args->begin;
8824 PCRE2_SPTR end_subject = args->end;
8825 int lgb, rgb, ricount;
8826 PCRE2_SPTR bptr;
8827 uint32_t c;
8828
8829 /* Patch by PH */
8830 /* GETCHARINC(c, cc); */
8831 c = *cc++;
8832
8833 #if PCRE2_CODE_UNIT_WIDTH == 32
8834 if (c >= 0x110000)
8835 return cc;
8836 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8837 lgb = UCD_GRAPHBREAK(c);
8838
8839 while (cc < end_subject)
8840 {
8841 c = *cc;
8842 #if PCRE2_CODE_UNIT_WIDTH == 32
8843 if (c >= 0x110000)
8844 break;
8845 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8846 rgb = UCD_GRAPHBREAK(c);
8847
8848 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8849 break;
8850
8851 /* Not breaking between Regional Indicators is allowed only if there
8852 are an even number of preceding RIs. */
8853
8854 if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8855 {
8856 ricount = 0;
8857 bptr = cc - 1;
8858
8859 /* bptr is pointing to the left-hand character */
8860 while (bptr > start_subject)
8861 {
8862 bptr--;
8863 c = *bptr;
8864 #if PCRE2_CODE_UNIT_WIDTH == 32
8865 if (c >= 0x110000)
8866 break;
8867 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8868
8869 if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
8870
8871 ricount++;
8872 }
8873
8874 if ((ricount & 1) != 0)
8875 break; /* Grapheme break required */
8876 }
8877
8878 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8879 allows any number of them before a following Extended_Pictographic. */
8880
8881 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8882 lgb != ucp_gbExtended_Pictographic)
8883 lgb = rgb;
8884
8885 cc++;
8886 }
8887
8888 return cc;
8889 }
8890
8891 #endif /* SUPPORT_UNICODE */
8892
compile_char1_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks,BOOL check_str_ptr)8893 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8894 {
8895 DEFINE_COMPILER;
8896 int length;
8897 unsigned int c, oc, bit;
8898 compare_context context;
8899 struct sljit_jump *jump[3];
8900 jump_list *end_list;
8901 #ifdef SUPPORT_UNICODE
8902 PCRE2_UCHAR propdata[5];
8903 #endif /* SUPPORT_UNICODE */
8904
8905 switch(type)
8906 {
8907 case OP_NOT_DIGIT:
8908 case OP_DIGIT:
8909 /* Digits are usually 0-9, so it is worth to optimize them. */
8910 if (check_str_ptr)
8911 detect_partial_match(common, backtracks);
8912 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8913 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8914 read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8915 else
8916 #endif
8917 read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8918 /* Flip the starting bit in the negative case. */
8919 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit);
8920 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8921 return cc;
8922
8923 case OP_NOT_WHITESPACE:
8924 case OP_WHITESPACE:
8925 if (check_str_ptr)
8926 detect_partial_match(common, backtracks);
8927 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8928 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8929 read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8930 else
8931 #endif
8932 read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8933 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space);
8934 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8935 return cc;
8936
8937 case OP_NOT_WORDCHAR:
8938 case OP_WORDCHAR:
8939 if (check_str_ptr)
8940 detect_partial_match(common, backtracks);
8941 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8942 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
8943 read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
8944 else
8945 #endif
8946 read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
8947 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word);
8948 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8949 return cc;
8950
8951 case OP_ANY:
8952 if (check_str_ptr)
8953 detect_partial_match(common, backtracks);
8954 read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8955 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8956 {
8957 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8958 end_list = NULL;
8959 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8960 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8961 else
8962 check_str_end(common, &end_list);
8963
8964 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8965 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
8966 set_jumps(end_list, LABEL());
8967 JUMPHERE(jump[0]);
8968 }
8969 else
8970 check_newlinechar(common, common->nltype, backtracks, TRUE);
8971 return cc;
8972
8973 case OP_ALLANY:
8974 if (check_str_ptr)
8975 detect_partial_match(common, backtracks);
8976 #ifdef SUPPORT_UNICODE
8977 if (common->utf && common->invalid_utf)
8978 {
8979 read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
8980 return cc;
8981 }
8982 #endif /* SUPPORT_UNICODE */
8983
8984 skip_valid_char(common);
8985 return cc;
8986
8987 case OP_ANYBYTE:
8988 if (check_str_ptr)
8989 detect_partial_match(common, backtracks);
8990 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8991 return cc;
8992
8993 #ifdef SUPPORT_UNICODE
8994 case OP_NOTPROP:
8995 case OP_PROP:
8996 propdata[0] = XCL_HASPROP;
8997 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
8998 propdata[2] = cc[0];
8999 propdata[3] = cc[1];
9000 propdata[4] = XCL_END;
9001 if (check_str_ptr)
9002 detect_partial_match(common, backtracks);
9003 compile_xclass_matchingpath(common, propdata, backtracks);
9004 return cc + 2;
9005 #endif
9006
9007 case OP_ANYNL:
9008 if (check_str_ptr)
9009 detect_partial_match(common, backtracks);
9010 read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
9011 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
9012 /* We don't need to handle soft partial matching case. */
9013 end_list = NULL;
9014 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
9015 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
9016 else
9017 check_str_end(common, &end_list);
9018 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9019 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
9020 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9021 jump[2] = JUMP(SLJIT_JUMP);
9022 JUMPHERE(jump[0]);
9023 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
9024 set_jumps(end_list, LABEL());
9025 JUMPHERE(jump[1]);
9026 JUMPHERE(jump[2]);
9027 return cc;
9028
9029 case OP_NOT_HSPACE:
9030 case OP_HSPACE:
9031 if (check_str_ptr)
9032 detect_partial_match(common, backtracks);
9033
9034 if (type == OP_NOT_HSPACE)
9035 read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
9036 else
9037 read_char(common, 0x9, 0x3000, NULL, 0);
9038
9039 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
9040 sljit_set_current_flags(compiler, SLJIT_SET_Z);
9041 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
9042 return cc;
9043
9044 case OP_NOT_VSPACE:
9045 case OP_VSPACE:
9046 if (check_str_ptr)
9047 detect_partial_match(common, backtracks);
9048
9049 if (type == OP_NOT_VSPACE)
9050 read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
9051 else
9052 read_char(common, 0xa, 0x2029, NULL, 0);
9053
9054 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
9055 sljit_set_current_flags(compiler, SLJIT_SET_Z);
9056 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
9057 return cc;
9058
9059 #ifdef SUPPORT_UNICODE
9060 case OP_EXTUNI:
9061 if (check_str_ptr)
9062 detect_partial_match(common, backtracks);
9063
9064 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9065 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
9066
9067 #if PCRE2_CODE_UNIT_WIDTH != 32
9068 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
9069 common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
9070 if (common->invalid_utf)
9071 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
9072 #else
9073 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
9074 common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
9075 if (common->invalid_utf)
9076 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
9077 #endif
9078
9079 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
9080
9081 if (common->mode == PCRE2_JIT_PARTIAL_HARD)
9082 {
9083 jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
9084 /* Since we successfully read a char above, partial matching must occure. */
9085 check_partial(common, TRUE);
9086 JUMPHERE(jump[0]);
9087 }
9088 return cc;
9089 #endif
9090
9091 case OP_CHAR:
9092 case OP_CHARI:
9093 length = 1;
9094 #ifdef SUPPORT_UNICODE
9095 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
9096 #endif
9097
9098 if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
9099 detect_partial_match(common, backtracks);
9100
9101 if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
9102 {
9103 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
9104 if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
9105 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9106
9107 context.length = IN_UCHARS(length);
9108 context.sourcereg = -1;
9109 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9110 context.ucharptr = 0;
9111 #endif
9112 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
9113 }
9114
9115 #ifdef SUPPORT_UNICODE
9116 if (common->utf)
9117 {
9118 GETCHAR(c, cc);
9119 }
9120 else
9121 #endif
9122 c = *cc;
9123
9124 SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
9125
9126 if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
9127 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
9128
9129 oc = char_othercase(common, c);
9130 read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
9131
9132 SLJIT_ASSERT(!is_powerof2(c ^ oc));
9133
9134 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
9135 {
9136 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc);
9137 SELECT(SLJIT_EQUAL, TMP1, SLJIT_IMM, c, TMP1);
9138 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9139 }
9140 else
9141 {
9142 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
9143 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9144 JUMPHERE(jump[0]);
9145 }
9146 return cc + length;
9147
9148 case OP_NOT:
9149 case OP_NOTI:
9150 if (check_str_ptr)
9151 detect_partial_match(common, backtracks);
9152
9153 length = 1;
9154 #ifdef SUPPORT_UNICODE
9155 if (common->utf)
9156 {
9157 #if PCRE2_CODE_UNIT_WIDTH == 8
9158 c = *cc;
9159 if (c < 128 && !common->invalid_utf)
9160 {
9161 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9162 if (type == OP_NOT || !char_has_othercase(common, cc))
9163 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9164 else
9165 {
9166 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
9167 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
9168 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
9169 }
9170 /* Skip the variable-length character. */
9171 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9172 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
9173 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
9174 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
9175 JUMPHERE(jump[0]);
9176 return cc + 1;
9177 }
9178 else
9179 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
9180 {
9181 GETCHARLEN(c, cc, length);
9182 }
9183 }
9184 else
9185 #endif /* SUPPORT_UNICODE */
9186 c = *cc;
9187
9188 if (type == OP_NOT || !char_has_othercase(common, cc))
9189 {
9190 read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
9191 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9192 }
9193 else
9194 {
9195 oc = char_othercase(common, c);
9196 read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
9197 bit = c ^ oc;
9198 if (is_powerof2(bit))
9199 {
9200 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
9201 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
9202 }
9203 else
9204 {
9205 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9206 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9207 }
9208 }
9209 return cc + length;
9210
9211 case OP_CLASS:
9212 case OP_NCLASS:
9213 if (check_str_ptr)
9214 detect_partial_match(common, backtracks);
9215
9216 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9217 bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
9218 if (type == OP_NCLASS)
9219 read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
9220 else
9221 read_char(common, 0, bit, NULL, 0);
9222 #else
9223 if (type == OP_NCLASS)
9224 read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
9225 else
9226 read_char(common, 0, 255, NULL, 0);
9227 #endif
9228
9229 if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
9230 return cc + 32 / sizeof(PCRE2_UCHAR);
9231
9232 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9233 jump[0] = NULL;
9234 if (common->utf)
9235 {
9236 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
9237 if (type == OP_CLASS)
9238 {
9239 add_jump(compiler, backtracks, jump[0]);
9240 jump[0] = NULL;
9241 }
9242 }
9243 #elif PCRE2_CODE_UNIT_WIDTH != 8
9244 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
9245 if (type == OP_CLASS)
9246 {
9247 add_jump(compiler, backtracks, jump[0]);
9248 jump[0] = NULL;
9249 }
9250 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
9251
9252 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
9253 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
9254 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
9255 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
9256 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
9257 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
9258
9259 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
9260 if (jump[0] != NULL)
9261 JUMPHERE(jump[0]);
9262 #endif
9263 return cc + 32 / sizeof(PCRE2_UCHAR);
9264
9265 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
9266 case OP_XCLASS:
9267 if (check_str_ptr)
9268 detect_partial_match(common, backtracks);
9269 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
9270 return cc + GET(cc, 0) - 1;
9271 #endif
9272 }
9273 SLJIT_UNREACHABLE();
9274 return cc;
9275 }
9276
compile_charn_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,jump_list ** backtracks)9277 static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
9278 {
9279 /* This function consumes at least one input character. */
9280 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
9281 DEFINE_COMPILER;
9282 PCRE2_SPTR ccbegin = cc;
9283 compare_context context;
9284 int size;
9285
9286 context.length = 0;
9287 do
9288 {
9289 if (cc >= ccend)
9290 break;
9291
9292 if (*cc == OP_CHAR)
9293 {
9294 size = 1;
9295 #ifdef SUPPORT_UNICODE
9296 if (common->utf && HAS_EXTRALEN(cc[1]))
9297 size += GET_EXTRALEN(cc[1]);
9298 #endif
9299 }
9300 else if (*cc == OP_CHARI)
9301 {
9302 size = 1;
9303 #ifdef SUPPORT_UNICODE
9304 if (common->utf)
9305 {
9306 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9307 size = 0;
9308 else if (HAS_EXTRALEN(cc[1]))
9309 size += GET_EXTRALEN(cc[1]);
9310 }
9311 else
9312 #endif
9313 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9314 size = 0;
9315 }
9316 else
9317 size = 0;
9318
9319 cc += 1 + size;
9320 context.length += IN_UCHARS(size);
9321 }
9322 while (size > 0 && context.length <= 128);
9323
9324 cc = ccbegin;
9325 if (context.length > 0)
9326 {
9327 /* We have a fixed-length byte sequence. */
9328 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
9329 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9330
9331 context.sourcereg = -1;
9332 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9333 context.ucharptr = 0;
9334 #endif
9335 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
9336 return cc;
9337 }
9338
9339 /* A non-fixed length character will be checked if length == 0. */
9340 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
9341 }
9342
9343 /* Forward definitions. */
9344 static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
9345 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
9346
9347 #define PUSH_BACKTRACK(size, ccstart, error) \
9348 do \
9349 { \
9350 backtrack = sljit_alloc_memory(compiler, (size)); \
9351 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9352 return error; \
9353 memset(backtrack, 0, size); \
9354 backtrack->prev = parent->top; \
9355 backtrack->cc = (ccstart); \
9356 parent->top = backtrack; \
9357 } \
9358 while (0)
9359
9360 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
9361 do \
9362 { \
9363 backtrack = sljit_alloc_memory(compiler, (size)); \
9364 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9365 return; \
9366 memset(backtrack, 0, size); \
9367 backtrack->prev = parent->top; \
9368 backtrack->cc = (ccstart); \
9369 parent->top = backtrack; \
9370 } \
9371 while (0)
9372
9373 #define BACKTRACK_AS(type) ((type *)backtrack)
9374
compile_dnref_search(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)9375 static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
9376 {
9377 /* The OVECTOR offset goes to TMP2. */
9378 DEFINE_COMPILER;
9379 int count = GET2(cc, 1 + IMM2_SIZE);
9380 PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9381 unsigned int offset;
9382 jump_list *found = NULL;
9383
9384 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9385
9386 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9387
9388 count--;
9389 while (count-- > 0)
9390 {
9391 offset = GET2(slot, 0) << 1;
9392 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9393 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9394 slot += common->name_entry_size;
9395 }
9396
9397 offset = GET2(slot, 0) << 1;
9398 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9399 if (backtracks != NULL && !common->unset_backref)
9400 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9401
9402 set_jumps(found, LABEL());
9403 }
9404
compile_ref_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)9405 static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9406 {
9407 DEFINE_COMPILER;
9408 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9409 int offset = 0;
9410 struct sljit_jump *jump = NULL;
9411 struct sljit_jump *partial;
9412 struct sljit_jump *nopartial;
9413 #if defined SUPPORT_UNICODE
9414 struct sljit_label *loop;
9415 struct sljit_label *caseless_loop;
9416 jump_list *no_match = NULL;
9417 int source_reg = COUNT_MATCH;
9418 int source_end_reg = ARGUMENTS;
9419 int char1_reg = STACK_LIMIT;
9420 #endif /* SUPPORT_UNICODE */
9421
9422 if (ref)
9423 {
9424 offset = GET2(cc, 1) << 1;
9425 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9426 /* OVECTOR(1) contains the "string begin - 1" constant. */
9427 if (withchecks && !common->unset_backref)
9428 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9429 }
9430 else
9431 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9432
9433 #if defined SUPPORT_UNICODE
9434 if (common->utf && *cc == OP_REFI)
9435 {
9436 SLJIT_ASSERT(common->iref_ptr != 0);
9437
9438 if (ref)
9439 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9440 else
9441 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9442
9443 if (withchecks && emptyfail)
9444 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9445
9446 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9447 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9448 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9449
9450 OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9451 OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9452
9453 loop = LABEL();
9454 jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9455 partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9456
9457 /* Read original character. It must be a valid UTF character. */
9458 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9459 OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9460
9461 read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9462
9463 OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9464 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9465 OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9466
9467 /* Read second character. */
9468 read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9469
9470 CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9471
9472 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9473
9474 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9475
9476 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9477 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9478 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9479
9480 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9481
9482 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9483 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9484 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9485 CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9486
9487 add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9488 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9489 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9490
9491 caseless_loop = LABEL();
9492 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9493 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9494 OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0);
9495 JUMPTO(SLJIT_EQUAL, loop);
9496 JUMPTO(SLJIT_LESS, caseless_loop);
9497
9498 set_jumps(no_match, LABEL());
9499 if (common->mode == PCRE2_JIT_COMPLETE)
9500 JUMPHERE(partial);
9501
9502 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9503 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9504 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9505 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9506
9507 if (common->mode != PCRE2_JIT_COMPLETE)
9508 {
9509 JUMPHERE(partial);
9510 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9511 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9512 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9513
9514 check_partial(common, FALSE);
9515 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9516 }
9517
9518 JUMPHERE(jump);
9519 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9520 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9521 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9522 return;
9523 }
9524 else
9525 #endif /* SUPPORT_UNICODE */
9526 {
9527 if (ref)
9528 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9529 else
9530 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9531
9532 if (withchecks)
9533 jump = JUMP(SLJIT_ZERO);
9534
9535 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9536 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9537 if (common->mode == PCRE2_JIT_COMPLETE)
9538 add_jump(compiler, backtracks, partial);
9539
9540 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9541 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9542
9543 if (common->mode != PCRE2_JIT_COMPLETE)
9544 {
9545 nopartial = JUMP(SLJIT_JUMP);
9546 JUMPHERE(partial);
9547 /* TMP2 -= STR_END - STR_PTR */
9548 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9549 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9550 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9551 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9552 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9553 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9554 JUMPHERE(partial);
9555 check_partial(common, FALSE);
9556 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9557 JUMPHERE(nopartial);
9558 }
9559 }
9560
9561 if (jump != NULL)
9562 {
9563 if (emptyfail)
9564 add_jump(compiler, backtracks, jump);
9565 else
9566 JUMPHERE(jump);
9567 }
9568 }
9569
compile_ref_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9570 static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9571 {
9572 DEFINE_COMPILER;
9573 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9574 backtrack_common *backtrack;
9575 PCRE2_UCHAR type;
9576 int offset = 0;
9577 struct sljit_label *label;
9578 struct sljit_jump *zerolength;
9579 struct sljit_jump *jump = NULL;
9580 PCRE2_SPTR ccbegin = cc;
9581 int min = 0, max = 0;
9582 BOOL minimize;
9583
9584 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9585
9586 if (ref)
9587 offset = GET2(cc, 1) << 1;
9588 else
9589 cc += IMM2_SIZE;
9590 type = cc[1 + IMM2_SIZE];
9591
9592 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9593 minimize = (type & 0x1) != 0;
9594 switch(type)
9595 {
9596 case OP_CRSTAR:
9597 case OP_CRMINSTAR:
9598 min = 0;
9599 max = 0;
9600 cc += 1 + IMM2_SIZE + 1;
9601 break;
9602 case OP_CRPLUS:
9603 case OP_CRMINPLUS:
9604 min = 1;
9605 max = 0;
9606 cc += 1 + IMM2_SIZE + 1;
9607 break;
9608 case OP_CRQUERY:
9609 case OP_CRMINQUERY:
9610 min = 0;
9611 max = 1;
9612 cc += 1 + IMM2_SIZE + 1;
9613 break;
9614 case OP_CRRANGE:
9615 case OP_CRMINRANGE:
9616 min = GET2(cc, 1 + IMM2_SIZE + 1);
9617 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9618 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9619 break;
9620 default:
9621 SLJIT_UNREACHABLE();
9622 break;
9623 }
9624
9625 if (!minimize)
9626 {
9627 if (min == 0)
9628 {
9629 allocate_stack(common, 2);
9630 if (ref)
9631 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9632 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9633 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9634 /* Temporary release of STR_PTR. */
9635 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9636 /* Handles both invalid and empty cases. Since the minimum repeat,
9637 is zero the invalid case is basically the same as an empty case. */
9638 if (ref)
9639 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9640 else
9641 {
9642 compile_dnref_search(common, ccbegin, NULL);
9643 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9644 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9645 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9646 }
9647 /* Restore if not zero length. */
9648 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9649 }
9650 else
9651 {
9652 allocate_stack(common, 1);
9653 if (ref)
9654 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9655 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9656
9657 if (ref)
9658 {
9659 if (!common->unset_backref)
9660 add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9661 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9662 }
9663 else
9664 {
9665 compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
9666 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9667 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9668 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9669 }
9670 }
9671
9672 if (min > 1 || max > 1)
9673 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9674
9675 label = LABEL();
9676 if (!ref)
9677 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9678 compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, FALSE, FALSE);
9679
9680 if (min > 1 || max > 1)
9681 {
9682 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9683 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9684 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9685 if (min > 1)
9686 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9687 if (max > 1)
9688 {
9689 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9690 allocate_stack(common, 1);
9691 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9692 JUMPTO(SLJIT_JUMP, label);
9693 JUMPHERE(jump);
9694 }
9695 }
9696
9697 if (max == 0)
9698 {
9699 /* Includes min > 1 case as well. */
9700 allocate_stack(common, 1);
9701 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9702 JUMPTO(SLJIT_JUMP, label);
9703 }
9704
9705 JUMPHERE(zerolength);
9706 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9707
9708 count_match(common);
9709 return cc;
9710 }
9711
9712 allocate_stack(common, ref ? 2 : 3);
9713 if (ref)
9714 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9715 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9716 if (type != OP_CRMINSTAR)
9717 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9718
9719 if (min == 0)
9720 {
9721 /* Handles both invalid and empty cases. Since the minimum repeat,
9722 is zero the invalid case is basically the same as an empty case. */
9723 if (ref)
9724 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9725 else
9726 {
9727 compile_dnref_search(common, ccbegin, NULL);
9728 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9729 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9730 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9731 }
9732 /* Length is non-zero, we can match real repeats. */
9733 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9734 jump = JUMP(SLJIT_JUMP);
9735 }
9736 else
9737 {
9738 if (ref)
9739 {
9740 if (!common->unset_backref)
9741 add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9742 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9743 }
9744 else
9745 {
9746 compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
9747 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9748 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9749 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9750 }
9751 }
9752
9753 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9754 if (max > 0)
9755 add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9756
9757 if (!ref)
9758 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9759 compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, TRUE, TRUE);
9760 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9761
9762 if (min > 1)
9763 {
9764 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9765 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9766 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9767 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9768 }
9769 else if (max > 0)
9770 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9771
9772 if (jump != NULL)
9773 JUMPHERE(jump);
9774 JUMPHERE(zerolength);
9775
9776 count_match(common);
9777 return cc;
9778 }
9779
compile_recurse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9780 static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9781 {
9782 DEFINE_COMPILER;
9783 backtrack_common *backtrack;
9784 recurse_entry *entry = common->entries;
9785 recurse_entry *prev = NULL;
9786 sljit_sw start = GET(cc, 1);
9787 PCRE2_SPTR start_cc;
9788 BOOL needs_control_head;
9789
9790 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9791
9792 /* Inlining simple patterns. */
9793 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9794 {
9795 start_cc = common->start + start;
9796 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9797 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9798 return cc + 1 + LINK_SIZE;
9799 }
9800
9801 while (entry != NULL)
9802 {
9803 if (entry->start == start)
9804 break;
9805 prev = entry;
9806 entry = entry->next;
9807 }
9808
9809 if (entry == NULL)
9810 {
9811 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9812 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9813 return NULL;
9814 entry->next = NULL;
9815 entry->entry_label = NULL;
9816 entry->backtrack_label = NULL;
9817 entry->entry_calls = NULL;
9818 entry->backtrack_calls = NULL;
9819 entry->start = start;
9820
9821 if (prev != NULL)
9822 prev->next = entry;
9823 else
9824 common->entries = entry;
9825 }
9826
9827 BACKTRACK_AS(recurse_backtrack)->entry = entry;
9828
9829 if (entry->entry_label == NULL)
9830 add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9831 else
9832 JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9833 /* Leave if the match is failed. */
9834 add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9835 BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9836 return cc + 1 + LINK_SIZE;
9837 }
9838
do_callout_jit(struct jit_arguments * arguments,pcre2_callout_block * callout_block,PCRE2_SPTR * jit_ovector)9839 static sljit_s32 SLJIT_FUNC SLJIT_FUNC_ATTRIBUTE do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9840 {
9841 PCRE2_SPTR begin;
9842 PCRE2_SIZE *ovector;
9843 sljit_u32 oveccount, capture_top;
9844
9845 if (arguments->callout == NULL)
9846 return 0;
9847
9848 SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9849
9850 begin = arguments->begin;
9851 ovector = (PCRE2_SIZE*)(callout_block + 1);
9852 oveccount = callout_block->capture_top;
9853
9854 SLJIT_ASSERT(oveccount >= 1);
9855
9856 callout_block->version = 2;
9857 callout_block->callout_flags = 0;
9858
9859 /* Offsets in subject. */
9860 callout_block->subject_length = arguments->end - arguments->begin;
9861 callout_block->start_match = jit_ovector[0] - begin;
9862 callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9863 callout_block->subject = begin;
9864
9865 /* Convert and copy the JIT offset vector to the ovector array. */
9866 callout_block->capture_top = 1;
9867 callout_block->offset_vector = ovector;
9868
9869 ovector[0] = PCRE2_UNSET;
9870 ovector[1] = PCRE2_UNSET;
9871 ovector += 2;
9872 jit_ovector += 2;
9873 capture_top = 1;
9874
9875 /* Convert pointers to sizes. */
9876 while (--oveccount != 0)
9877 {
9878 capture_top++;
9879
9880 ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9881 ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9882
9883 if (ovector[0] != PCRE2_UNSET)
9884 callout_block->capture_top = capture_top;
9885
9886 ovector += 2;
9887 jit_ovector += 2;
9888 }
9889
9890 return (arguments->callout)(callout_block, arguments->callout_data);
9891 }
9892
9893 #define CALLOUT_ARG_OFFSET(arg) \
9894 SLJIT_OFFSETOF(pcre2_callout_block, arg)
9895
compile_callout_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9896 static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9897 {
9898 DEFINE_COMPILER;
9899 backtrack_common *backtrack;
9900 sljit_s32 mov_opcode;
9901 unsigned int callout_length = (*cc == OP_CALLOUT)
9902 ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9903 sljit_sw value1;
9904 sljit_sw value2;
9905 sljit_sw value3;
9906 sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw);
9907
9908 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9909
9910 callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9911
9912 allocate_stack(common, callout_arg_size);
9913
9914 SLJIT_ASSERT(common->capture_last_ptr != 0);
9915 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9916 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9917 value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9918 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9919 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9920 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9921
9922 /* These pointer sized fields temporarly stores internal variables. */
9923 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9924
9925 if (common->mark_ptr != 0)
9926 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9927 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9928 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9929 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9930
9931 if (*cc == OP_CALLOUT)
9932 {
9933 value1 = 0;
9934 value2 = 0;
9935 value3 = 0;
9936 }
9937 else
9938 {
9939 value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9940 value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9941 value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9942 }
9943
9944 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
9945 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
9946 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
9947 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
9948
9949 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9950
9951 /* Needed to save important temporary registers. */
9952 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
9953 /* SLJIT_R0 = arguments */
9954 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
9955 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
9956 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit));
9957 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9958 free_stack(common, callout_arg_size);
9959
9960 /* Check return value. */
9961 OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
9962 add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_SIG_GREATER));
9963 if (common->abort_label == NULL)
9964 add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
9965 else
9966 JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
9967 return cc + callout_length;
9968 }
9969
9970 #undef CALLOUT_ARG_SIZE
9971 #undef CALLOUT_ARG_OFFSET
9972
compile_reverse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9973 static PCRE2_SPTR compile_reverse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9974 {
9975 DEFINE_COMPILER;
9976 backtrack_common *backtrack = NULL;
9977 jump_list **reverse_failed;
9978 unsigned int lmin, lmax;
9979 #ifdef SUPPORT_UNICODE
9980 struct sljit_jump *jump;
9981 struct sljit_label *label;
9982 #endif
9983
9984 SLJIT_ASSERT(parent->top == NULL);
9985
9986 if (*cc == OP_REVERSE)
9987 {
9988 reverse_failed = &parent->own_backtracks;
9989 lmin = GET2(cc, 1);
9990 lmax = lmin;
9991 cc += 1 + IMM2_SIZE;
9992
9993 SLJIT_ASSERT(lmin > 0);
9994 }
9995 else
9996 {
9997 SLJIT_ASSERT(*cc == OP_VREVERSE);
9998 PUSH_BACKTRACK(sizeof(vreverse_backtrack), cc, NULL);
9999
10000 reverse_failed = &backtrack->own_backtracks;
10001 lmin = GET2(cc, 1);
10002 lmax = GET2(cc, 1 + IMM2_SIZE);
10003 cc += 1 + 2 * IMM2_SIZE;
10004
10005 SLJIT_ASSERT(lmin < lmax);
10006 }
10007
10008 if (HAS_VIRTUAL_REGISTERS)
10009 {
10010 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10011 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
10012 }
10013 else
10014 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
10015
10016 #ifdef SUPPORT_UNICODE
10017 if (common->utf)
10018 {
10019 if (lmin > 0)
10020 {
10021 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmin);
10022 label = LABEL();
10023 add_jump(compiler, reverse_failed, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
10024 move_back(common, reverse_failed, FALSE);
10025 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10026 JUMPTO(SLJIT_NOT_ZERO, label);
10027 }
10028
10029 if (lmin < lmax)
10030 {
10031 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
10032
10033 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmax - lmin);
10034 label = LABEL();
10035 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
10036 move_back(common, reverse_failed, FALSE);
10037 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10038 JUMPTO(SLJIT_NOT_ZERO, label);
10039
10040 JUMPHERE(jump);
10041 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
10042 }
10043 }
10044 else
10045 #endif
10046 {
10047 if (lmin > 0)
10048 {
10049 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmin));
10050 add_jump(compiler, reverse_failed, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
10051 }
10052
10053 if (lmin < lmax)
10054 {
10055 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
10056
10057 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmax - lmin));
10058 OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_PTR, 0, TMP2, 0);
10059 SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
10060
10061 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
10062 }
10063 }
10064
10065 check_start_used_ptr(common);
10066
10067 if (lmin < lmax)
10068 BACKTRACK_AS(vreverse_backtrack)->matchingpath = LABEL();
10069
10070 return cc;
10071 }
10072
assert_needs_str_ptr_saving(PCRE2_SPTR cc)10073 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
10074 {
10075 while (TRUE)
10076 {
10077 switch (*cc)
10078 {
10079 case OP_CALLOUT_STR:
10080 cc += GET(cc, 1 + 2*LINK_SIZE);
10081 break;
10082
10083 case OP_NOT_WORD_BOUNDARY:
10084 case OP_WORD_BOUNDARY:
10085 case OP_CIRC:
10086 case OP_CIRCM:
10087 case OP_DOLL:
10088 case OP_DOLLM:
10089 case OP_CALLOUT:
10090 case OP_ALT:
10091 case OP_NOT_UCP_WORD_BOUNDARY:
10092 case OP_UCP_WORD_BOUNDARY:
10093 cc += PRIV(OP_lengths)[*cc];
10094 break;
10095
10096 case OP_KET:
10097 return FALSE;
10098
10099 default:
10100 return TRUE;
10101 }
10102 }
10103 }
10104
compile_assert_matchingpath(compiler_common * common,PCRE2_SPTR cc,assert_backtrack * backtrack,BOOL conditional)10105 static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
10106 {
10107 DEFINE_COMPILER;
10108 int framesize;
10109 int extrasize;
10110 BOOL local_quit_available = FALSE;
10111 BOOL needs_control_head;
10112 BOOL end_block_size = 0;
10113 BOOL has_vreverse;
10114 int private_data_ptr;
10115 backtrack_common altbacktrack;
10116 PCRE2_SPTR ccbegin;
10117 PCRE2_UCHAR opcode;
10118 PCRE2_UCHAR bra = OP_BRA;
10119 jump_list *tmp = NULL;
10120 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.own_backtracks;
10121 jump_list **found;
10122 /* Saving previous accept variables. */
10123 BOOL save_local_quit_available = common->local_quit_available;
10124 BOOL save_in_positive_assertion = common->in_positive_assertion;
10125 then_trap_backtrack *save_then_trap = common->then_trap;
10126 struct sljit_label *save_quit_label = common->quit_label;
10127 struct sljit_label *save_accept_label = common->accept_label;
10128 jump_list *save_quit = common->quit;
10129 jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
10130 jump_list *save_accept = common->accept;
10131 struct sljit_jump *jump;
10132 struct sljit_jump *brajump = NULL;
10133
10134 /* Assert captures then. */
10135 common->then_trap = NULL;
10136
10137 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10138 {
10139 SLJIT_ASSERT(!conditional);
10140 bra = *cc;
10141 cc++;
10142 }
10143
10144 private_data_ptr = PRIVATE_DATA(cc);
10145 SLJIT_ASSERT(private_data_ptr != 0);
10146 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10147 backtrack->framesize = framesize;
10148 backtrack->private_data_ptr = private_data_ptr;
10149 opcode = *cc;
10150 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
10151 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
10152 ccbegin = cc;
10153 cc += GET(cc, 1);
10154
10155 if (bra == OP_BRAMINZERO)
10156 {
10157 /* This is a braminzero backtrack path. */
10158 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10159 free_stack(common, 1);
10160 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10161 }
10162
10163 if ((opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NOT) && find_vreverse(ccbegin))
10164 end_block_size = 3;
10165
10166 if (framesize < 0)
10167 {
10168 extrasize = 1;
10169 if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
10170 extrasize = 0;
10171
10172 extrasize += end_block_size;
10173
10174 if (needs_control_head)
10175 extrasize++;
10176
10177 if (framesize == no_frame)
10178 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10179
10180 if (extrasize > 0)
10181 allocate_stack(common, extrasize);
10182
10183 if (needs_control_head)
10184 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10185
10186 if (extrasize > 0)
10187 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10188
10189 if (needs_control_head)
10190 {
10191 SLJIT_ASSERT(extrasize == end_block_size + 2);
10192 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10193 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
10194 }
10195 }
10196 else
10197 {
10198 extrasize = (needs_control_head ? 3 : 2) + end_block_size;
10199
10200 OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
10201 allocate_stack(common, framesize + extrasize);
10202
10203 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10204 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10205 if (needs_control_head)
10206 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10207 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10208
10209 if (needs_control_head)
10210 {
10211 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 2), TMP1, 0);
10212 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP2, 0);
10213 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10214 }
10215 else
10216 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
10217
10218 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
10219 }
10220
10221 if (end_block_size > 0)
10222 {
10223 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_END, 0);
10224 OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
10225 }
10226
10227 memset(&altbacktrack, 0, sizeof(backtrack_common));
10228 if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
10229 {
10230 /* Control verbs cannot escape from these asserts. */
10231 local_quit_available = TRUE;
10232 common->local_quit_available = TRUE;
10233 common->quit_label = NULL;
10234 common->quit = NULL;
10235 }
10236
10237 common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
10238 common->positive_assertion_quit = NULL;
10239
10240 while (1)
10241 {
10242 common->accept_label = NULL;
10243 common->accept = NULL;
10244 altbacktrack.top = NULL;
10245 altbacktrack.own_backtracks = NULL;
10246
10247 if (*ccbegin == OP_ALT && extrasize > 0)
10248 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10249
10250 altbacktrack.cc = ccbegin;
10251 ccbegin += 1 + LINK_SIZE;
10252
10253 has_vreverse = (*ccbegin == OP_VREVERSE);
10254 if (*ccbegin == OP_REVERSE || has_vreverse)
10255 ccbegin = compile_reverse_matchingpath(common, ccbegin, &altbacktrack);
10256
10257 compile_matchingpath(common, ccbegin, cc, &altbacktrack);
10258 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10259 {
10260 if (local_quit_available)
10261 {
10262 common->local_quit_available = save_local_quit_available;
10263 common->quit_label = save_quit_label;
10264 common->quit = save_quit;
10265 }
10266 common->in_positive_assertion = save_in_positive_assertion;
10267 common->then_trap = save_then_trap;
10268 common->accept_label = save_accept_label;
10269 common->positive_assertion_quit = save_positive_assertion_quit;
10270 common->accept = save_accept;
10271 return NULL;
10272 }
10273
10274 if (has_vreverse)
10275 {
10276 SLJIT_ASSERT(altbacktrack.top != NULL);
10277 add_jump(compiler, &altbacktrack.top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
10278 }
10279
10280 common->accept_label = LABEL();
10281 if (common->accept != NULL)
10282 set_jumps(common->accept, common->accept_label);
10283
10284 /* Reset stack. */
10285 if (framesize < 0)
10286 {
10287 if (framesize == no_frame)
10288 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10289 else if (extrasize > 0)
10290 free_stack(common, extrasize);
10291
10292 if (end_block_size > 0)
10293 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10294
10295 if (needs_control_head)
10296 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10297 }
10298 else
10299 {
10300 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
10301 {
10302 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10303 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10304
10305 if (end_block_size > 0)
10306 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 2));
10307
10308 if (needs_control_head)
10309 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10310 }
10311 else
10312 {
10313 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10314
10315 if (end_block_size > 0)
10316 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize + 1));
10317
10318 if (needs_control_head)
10319 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
10320 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10321 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10322 }
10323 }
10324
10325 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
10326 {
10327 /* We know that STR_PTR was stored on the top of the stack. */
10328 if (conditional)
10329 {
10330 if (extrasize > 0)
10331 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-end_block_size - (needs_control_head ? 2 : 1)));
10332 }
10333 else if (bra == OP_BRAZERO)
10334 {
10335 if (framesize < 0)
10336 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10337 else
10338 {
10339 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10340 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
10341 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10342 }
10343 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10344 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10345 }
10346 else if (framesize >= 0)
10347 {
10348 /* For OP_BRA and OP_BRAMINZERO. */
10349 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10350 }
10351 }
10352 add_jump(compiler, found, JUMP(SLJIT_JUMP));
10353
10354 compile_backtrackingpath(common, altbacktrack.top);
10355 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10356 {
10357 if (local_quit_available)
10358 {
10359 common->local_quit_available = save_local_quit_available;
10360 common->quit_label = save_quit_label;
10361 common->quit = save_quit;
10362 }
10363 common->in_positive_assertion = save_in_positive_assertion;
10364 common->then_trap = save_then_trap;
10365 common->accept_label = save_accept_label;
10366 common->positive_assertion_quit = save_positive_assertion_quit;
10367 common->accept = save_accept;
10368 return NULL;
10369 }
10370 set_jumps(altbacktrack.own_backtracks, LABEL());
10371
10372 if (*cc != OP_ALT)
10373 break;
10374
10375 ccbegin = cc;
10376 cc += GET(cc, 1);
10377 }
10378
10379 if (local_quit_available)
10380 {
10381 SLJIT_ASSERT(common->positive_assertion_quit == NULL);
10382 /* Makes the check less complicated below. */
10383 common->positive_assertion_quit = common->quit;
10384 }
10385
10386 /* None of them matched. */
10387 if (common->positive_assertion_quit != NULL)
10388 {
10389 jump = JUMP(SLJIT_JUMP);
10390 set_jumps(common->positive_assertion_quit, LABEL());
10391 SLJIT_ASSERT(framesize != no_stack);
10392 if (framesize < 0)
10393 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
10394 else
10395 {
10396 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10397 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10398 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
10399 }
10400 JUMPHERE(jump);
10401 }
10402
10403 if (end_block_size > 0)
10404 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10405
10406 if (needs_control_head)
10407 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1));
10408
10409 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
10410 {
10411 /* Assert is failed. */
10412 if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
10413 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10414
10415 if (framesize < 0)
10416 {
10417 /* The topmost item should be 0. */
10418 if (bra == OP_BRAZERO)
10419 {
10420 if (extrasize >= 2)
10421 free_stack(common, extrasize - 1);
10422 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10423 }
10424 else if (extrasize > 0)
10425 free_stack(common, extrasize);
10426 }
10427 else
10428 {
10429 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10430 /* The topmost item should be 0. */
10431 if (bra == OP_BRAZERO)
10432 {
10433 free_stack(common, framesize + extrasize - 1);
10434 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10435 }
10436 else
10437 free_stack(common, framesize + extrasize);
10438 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10439 }
10440 jump = JUMP(SLJIT_JUMP);
10441 if (bra != OP_BRAZERO)
10442 add_jump(compiler, target, jump);
10443
10444 /* Assert is successful. */
10445 set_jumps(tmp, LABEL());
10446 if (framesize < 0)
10447 {
10448 /* We know that STR_PTR was stored on the top of the stack. */
10449 if (extrasize > 0)
10450 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10451
10452 /* Keep the STR_PTR on the top of the stack. */
10453 if (bra == OP_BRAZERO)
10454 {
10455 /* This allocation is always successful. */
10456 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10457 if (extrasize >= 2)
10458 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10459 }
10460 else if (bra == OP_BRAMINZERO)
10461 {
10462 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10463 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10464 }
10465 }
10466 else
10467 {
10468 if (bra == OP_BRA)
10469 {
10470 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10471 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10472 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10473 }
10474 else
10475 {
10476 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10477 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + end_block_size + 2) * sizeof(sljit_sw));
10478
10479 if (extrasize == 2 + end_block_size)
10480 {
10481 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10482 if (bra == OP_BRAMINZERO)
10483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10484 }
10485 else
10486 {
10487 SLJIT_ASSERT(extrasize == 3 + end_block_size);
10488 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10489 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
10490 }
10491 }
10492 }
10493
10494 if (bra == OP_BRAZERO)
10495 {
10496 backtrack->matchingpath = LABEL();
10497 SET_LABEL(jump, backtrack->matchingpath);
10498 }
10499 else if (bra == OP_BRAMINZERO)
10500 {
10501 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10502 JUMPHERE(brajump);
10503 if (framesize >= 0)
10504 {
10505 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10506 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10507 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10508 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10509 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10510 }
10511 set_jumps(backtrack->common.own_backtracks, LABEL());
10512 }
10513 }
10514 else
10515 {
10516 /* AssertNot is successful. */
10517 if (framesize < 0)
10518 {
10519 if (extrasize > 0)
10520 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10521
10522 if (bra != OP_BRA)
10523 {
10524 if (extrasize >= 2)
10525 free_stack(common, extrasize - 1);
10526 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10527 }
10528 else if (extrasize > 0)
10529 free_stack(common, extrasize);
10530 }
10531 else
10532 {
10533 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10534 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10535 /* The topmost item should be 0. */
10536 if (bra != OP_BRA)
10537 {
10538 free_stack(common, framesize + extrasize - 1);
10539 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10540 }
10541 else
10542 free_stack(common, framesize + extrasize);
10543 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10544 }
10545
10546 if (bra == OP_BRAZERO)
10547 backtrack->matchingpath = LABEL();
10548 else if (bra == OP_BRAMINZERO)
10549 {
10550 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10551 JUMPHERE(brajump);
10552 }
10553
10554 if (bra != OP_BRA)
10555 {
10556 SLJIT_ASSERT(found == &backtrack->common.own_backtracks);
10557 set_jumps(backtrack->common.own_backtracks, LABEL());
10558 backtrack->common.own_backtracks = NULL;
10559 }
10560 }
10561
10562 if (local_quit_available)
10563 {
10564 common->local_quit_available = save_local_quit_available;
10565 common->quit_label = save_quit_label;
10566 common->quit = save_quit;
10567 }
10568 common->in_positive_assertion = save_in_positive_assertion;
10569 common->then_trap = save_then_trap;
10570 common->accept_label = save_accept_label;
10571 common->positive_assertion_quit = save_positive_assertion_quit;
10572 common->accept = save_accept;
10573 return cc + 1 + LINK_SIZE;
10574 }
10575
match_once_common(compiler_common * common,PCRE2_UCHAR ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)10576 static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10577 {
10578 DEFINE_COMPILER;
10579 int stacksize;
10580
10581 if (framesize < 0)
10582 {
10583 if (framesize == no_frame)
10584 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10585 else
10586 {
10587 stacksize = needs_control_head ? 1 : 0;
10588 if (ket != OP_KET || has_alternatives)
10589 stacksize++;
10590
10591 if (stacksize > 0)
10592 free_stack(common, stacksize);
10593 }
10594
10595 if (needs_control_head)
10596 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10597
10598 /* TMP2 which is set here used by OP_KETRMAX below. */
10599 if (ket == OP_KETRMAX)
10600 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10601 else if (ket == OP_KETRMIN)
10602 {
10603 /* Move the STR_PTR to the private_data_ptr. */
10604 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10605 }
10606 }
10607 else
10608 {
10609 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10610 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10611 if (needs_control_head)
10612 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10613
10614 if (ket == OP_KETRMAX)
10615 {
10616 /* TMP2 which is set here used by OP_KETRMAX below. */
10617 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10618 }
10619 }
10620 if (needs_control_head)
10621 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10622 }
10623
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)10624 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10625 {
10626 DEFINE_COMPILER;
10627
10628 if (common->capture_last_ptr != 0)
10629 {
10630 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10631 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10632 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10633 stacksize++;
10634 }
10635 if (common->optimized_cbracket[offset >> 1] == 0)
10636 {
10637 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10638 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10639 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10640 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10641 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10642 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10643 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10644 stacksize += 2;
10645 }
10646 return stacksize;
10647 }
10648
do_script_run(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10649 static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10650 {
10651 if (PRIV(script_run)(ptr, endptr, FALSE))
10652 return endptr;
10653 return NULL;
10654 }
10655
10656 #ifdef SUPPORT_UNICODE
10657
do_script_run_utf(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10658 static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10659 {
10660 if (PRIV(script_run)(ptr, endptr, TRUE))
10661 return endptr;
10662 return NULL;
10663 }
10664
10665 #endif /* SUPPORT_UNICODE */
10666
match_script_run_common(compiler_common * common,int private_data_ptr,backtrack_common * parent)10667 static void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10668 {
10669 DEFINE_COMPILER;
10670
10671 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10672
10673 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10674 #ifdef SUPPORT_UNICODE
10675 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
10676 common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run));
10677 #else
10678 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run));
10679 #endif
10680
10681 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10682 add_jump(compiler, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10683 }
10684
10685 /*
10686 Handling bracketed expressions is probably the most complex part.
10687
10688 Stack layout naming characters:
10689 S - Push the current STR_PTR
10690 0 - Push a 0 (NULL)
10691 A - Push the current STR_PTR. Needed for restoring the STR_PTR
10692 before the next alternative. Not pushed if there are no alternatives.
10693 M - Any values pushed by the current alternative. Can be empty, or anything.
10694 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10695 L - Push the previous local (pointed by localptr) to the stack
10696 () - opional values stored on the stack
10697 ()* - optonal, can be stored multiple times
10698
10699 The following list shows the regular expression templates, their PCRE byte codes
10700 and stack layout supported by pcre-sljit.
10701
10702 (?:) OP_BRA | OP_KET A M
10703 () OP_CBRA | OP_KET C M
10704 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
10705 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
10706 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
10707 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
10708 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
10709 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
10710 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
10711 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
10712 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
10713 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
10714 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
10715 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
10716 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
10717 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
10718 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
10719 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
10720 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
10721 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
10722 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
10723 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
10724
10725
10726 Stack layout naming characters:
10727 A - Push the alternative index (starting from 0) on the stack.
10728 Not pushed if there is no alternatives.
10729 M - Any values pushed by the current alternative. Can be empty, or anything.
10730
10731 The next list shows the possible content of a bracket:
10732 (|) OP_*BRA | OP_ALT ... M A
10733 (?()|) OP_*COND | OP_ALT M A
10734 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
10735 Or nothing, if trace is unnecessary
10736 */
10737
compile_bracket_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10738 static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10739 {
10740 DEFINE_COMPILER;
10741 backtrack_common *backtrack;
10742 PCRE2_UCHAR opcode;
10743 int private_data_ptr = 0;
10744 int offset = 0;
10745 int i, stacksize;
10746 int repeat_ptr = 0, repeat_length = 0;
10747 int repeat_type = 0, repeat_count = 0;
10748 PCRE2_SPTR ccbegin;
10749 PCRE2_SPTR matchingpath;
10750 PCRE2_SPTR slot;
10751 PCRE2_UCHAR bra = OP_BRA;
10752 PCRE2_UCHAR ket;
10753 assert_backtrack *assert;
10754 BOOL has_alternatives;
10755 BOOL needs_control_head = FALSE;
10756 BOOL has_vreverse = FALSE;
10757 struct sljit_jump *jump;
10758 struct sljit_jump *skip;
10759 struct sljit_label *rmax_label = NULL;
10760 struct sljit_jump *braminzero = NULL;
10761
10762 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10763
10764 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10765 {
10766 bra = *cc;
10767 cc++;
10768 opcode = *cc;
10769 }
10770
10771 opcode = *cc;
10772 ccbegin = cc;
10773 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10774 ket = *matchingpath;
10775 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10776 {
10777 repeat_ptr = PRIVATE_DATA(matchingpath);
10778 repeat_length = PRIVATE_DATA(matchingpath + 1);
10779 repeat_type = PRIVATE_DATA(matchingpath + 2);
10780 repeat_count = PRIVATE_DATA(matchingpath + 3);
10781 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10782 if (repeat_type == OP_UPTO)
10783 ket = OP_KETRMAX;
10784 if (repeat_type == OP_MINUPTO)
10785 ket = OP_KETRMIN;
10786 }
10787
10788 matchingpath = ccbegin + 1 + LINK_SIZE;
10789 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10790 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10791 cc += GET(cc, 1);
10792
10793 has_alternatives = *cc == OP_ALT;
10794 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10795 {
10796 SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10797 compile_time_checks_must_be_grouped_together);
10798 has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10799 }
10800
10801 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10802 opcode = OP_SCOND;
10803
10804 if (opcode == OP_CBRA || opcode == OP_SCBRA)
10805 {
10806 /* Capturing brackets has a pre-allocated space. */
10807 offset = GET2(ccbegin, 1 + LINK_SIZE);
10808 if (common->optimized_cbracket[offset] == 0)
10809 {
10810 private_data_ptr = OVECTOR_PRIV(offset);
10811 offset <<= 1;
10812 }
10813 else
10814 {
10815 offset <<= 1;
10816 private_data_ptr = OVECTOR(offset);
10817 }
10818 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10819 matchingpath += IMM2_SIZE;
10820 }
10821 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10822 {
10823 /* Other brackets simply allocate the next entry. */
10824 private_data_ptr = PRIVATE_DATA(ccbegin);
10825 SLJIT_ASSERT(private_data_ptr != 0);
10826 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10827 if (opcode == OP_ONCE)
10828 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10829 }
10830
10831 /* Instructions before the first alternative. */
10832 stacksize = 0;
10833 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10834 stacksize++;
10835 if (bra == OP_BRAZERO)
10836 stacksize++;
10837
10838 if (stacksize > 0)
10839 allocate_stack(common, stacksize);
10840
10841 stacksize = 0;
10842 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10843 {
10844 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10845 stacksize++;
10846 }
10847
10848 if (bra == OP_BRAZERO)
10849 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10850
10851 if (bra == OP_BRAMINZERO)
10852 {
10853 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10854 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10855 if (ket != OP_KETRMIN)
10856 {
10857 free_stack(common, 1);
10858 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10859 }
10860 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10861 {
10862 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10863 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10864 /* Nothing stored during the first run. */
10865 skip = JUMP(SLJIT_JUMP);
10866 JUMPHERE(jump);
10867 /* Checking zero-length iteration. */
10868 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10869 {
10870 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10871 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10872 }
10873 else
10874 {
10875 /* Except when the whole stack frame must be saved. */
10876 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10877 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10878 }
10879 JUMPHERE(skip);
10880 }
10881 else
10882 {
10883 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10884 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10885 JUMPHERE(jump);
10886 }
10887 }
10888
10889 if (repeat_type != 0)
10890 {
10891 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10892 if (repeat_type == OP_EXACT)
10893 rmax_label = LABEL();
10894 }
10895
10896 if (ket == OP_KETRMIN)
10897 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10898
10899 if (ket == OP_KETRMAX)
10900 {
10901 rmax_label = LABEL();
10902 if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10903 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10904 }
10905
10906 /* Handling capturing brackets and alternatives. */
10907 if (opcode == OP_ONCE)
10908 {
10909 stacksize = 0;
10910 if (needs_control_head)
10911 {
10912 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10913 stacksize++;
10914 }
10915
10916 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10917 {
10918 /* Neither capturing brackets nor recursions are found in the block. */
10919 if (ket == OP_KETRMIN)
10920 {
10921 stacksize += 2;
10922 if (!needs_control_head)
10923 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10924 }
10925 else
10926 {
10927 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10928 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10929 if (ket == OP_KETRMAX || has_alternatives)
10930 stacksize++;
10931 }
10932
10933 if (stacksize > 0)
10934 allocate_stack(common, stacksize);
10935
10936 stacksize = 0;
10937 if (needs_control_head)
10938 {
10939 stacksize++;
10940 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10941 }
10942
10943 if (ket == OP_KETRMIN)
10944 {
10945 if (needs_control_head)
10946 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10947 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10948 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10949 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
10950 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10951 }
10952 else if (ket == OP_KETRMAX || has_alternatives)
10953 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10954 }
10955 else
10956 {
10957 if (ket != OP_KET || has_alternatives)
10958 stacksize++;
10959
10960 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
10961 allocate_stack(common, stacksize);
10962
10963 if (needs_control_head)
10964 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10965
10966 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10967 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10968
10969 stacksize = needs_control_head ? 1 : 0;
10970 if (ket != OP_KET || has_alternatives)
10971 {
10972 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10973 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10974 stacksize++;
10975 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10976 }
10977 else
10978 {
10979 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10980 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10981 }
10982 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
10983 }
10984 }
10985 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
10986 {
10987 /* Saving the previous values. */
10988 if (common->optimized_cbracket[offset >> 1] != 0)
10989 {
10990 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
10991 allocate_stack(common, 2);
10992 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10993 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10994 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10995 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10996 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10997 }
10998 else
10999 {
11000 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11001 allocate_stack(common, 1);
11002 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11003 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11004 }
11005 }
11006 else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
11007 {
11008 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11009 allocate_stack(common, 4);
11010 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
11011 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11012 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
11013 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11014 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
11015 OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
11016
11017 has_vreverse = (*matchingpath == OP_VREVERSE);
11018 if (*matchingpath == OP_REVERSE || has_vreverse)
11019 matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
11020 }
11021 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
11022 {
11023 /* Saving the previous value. */
11024 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11025 allocate_stack(common, 1);
11026 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11027 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11028
11029 if (*matchingpath == OP_REVERSE)
11030 matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
11031 }
11032 else if (has_alternatives)
11033 {
11034 /* Pushing the starting string pointer. */
11035 allocate_stack(common, 1);
11036 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11037 }
11038
11039 /* Generating code for the first alternative. */
11040 if (opcode == OP_COND || opcode == OP_SCOND)
11041 {
11042 if (*matchingpath == OP_CREF)
11043 {
11044 SLJIT_ASSERT(has_alternatives);
11045 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
11046 CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
11047 matchingpath += 1 + IMM2_SIZE;
11048 }
11049 else if (*matchingpath == OP_DNCREF)
11050 {
11051 SLJIT_ASSERT(has_alternatives);
11052
11053 i = GET2(matchingpath, 1 + IMM2_SIZE);
11054 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
11055 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
11056 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
11057 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
11058 slot += common->name_entry_size;
11059 i--;
11060 while (i-- > 0)
11061 {
11062 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
11063 OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
11064 slot += common->name_entry_size;
11065 }
11066 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11067 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
11068 matchingpath += 1 + 2 * IMM2_SIZE;
11069 }
11070 else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
11071 {
11072 /* Never has other case. */
11073 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
11074 SLJIT_ASSERT(!has_alternatives);
11075
11076 if (*matchingpath == OP_TRUE)
11077 {
11078 stacksize = 1;
11079 matchingpath++;
11080 }
11081 else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
11082 stacksize = 0;
11083 else if (*matchingpath == OP_RREF)
11084 {
11085 stacksize = GET2(matchingpath, 1);
11086 if (common->currententry == NULL)
11087 stacksize = 0;
11088 else if (stacksize == RREF_ANY)
11089 stacksize = 1;
11090 else if (common->currententry->start == 0)
11091 stacksize = stacksize == 0;
11092 else
11093 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
11094
11095 if (stacksize != 0)
11096 matchingpath += 1 + IMM2_SIZE;
11097 }
11098 else
11099 {
11100 if (common->currententry == NULL || common->currententry->start == 0)
11101 stacksize = 0;
11102 else
11103 {
11104 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
11105 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
11106 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
11107 while (stacksize > 0)
11108 {
11109 if ((int)GET2(slot, 0) == i)
11110 break;
11111 slot += common->name_entry_size;
11112 stacksize--;
11113 }
11114 }
11115
11116 if (stacksize != 0)
11117 matchingpath += 1 + 2 * IMM2_SIZE;
11118 }
11119
11120 /* The stacksize == 0 is a common "else" case. */
11121 if (stacksize == 0)
11122 {
11123 if (*cc == OP_ALT)
11124 {
11125 matchingpath = cc + 1 + LINK_SIZE;
11126 cc += GET(cc, 1);
11127 }
11128 else
11129 matchingpath = cc;
11130 }
11131 }
11132 else
11133 {
11134 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
11135 /* Similar code as PUSH_BACKTRACK macro. */
11136 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
11137 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11138 return NULL;
11139 memset(assert, 0, sizeof(assert_backtrack));
11140 assert->common.cc = matchingpath;
11141 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
11142 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
11143 }
11144 }
11145
11146 compile_matchingpath(common, matchingpath, cc, backtrack);
11147 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11148 return NULL;
11149
11150 switch (opcode)
11151 {
11152 case OP_ASSERTBACK_NA:
11153 if (has_vreverse)
11154 {
11155 SLJIT_ASSERT(backtrack->top != NULL && PRIVATE_DATA(ccbegin + 1));
11156 add_jump(compiler, &backtrack->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
11157 }
11158
11159 if (PRIVATE_DATA(ccbegin + 1))
11160 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
11161 break;
11162 case OP_ASSERT_NA:
11163 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11164 break;
11165 case OP_ONCE:
11166 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
11167 break;
11168 case OP_SCRIPT_RUN:
11169 match_script_run_common(common, private_data_ptr, backtrack);
11170 break;
11171 }
11172
11173 stacksize = 0;
11174 if (repeat_type == OP_MINUPTO)
11175 {
11176 /* We need to preserve the counter. TMP2 will be used below. */
11177 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
11178 stacksize++;
11179 }
11180 if (ket != OP_KET || bra != OP_BRA)
11181 stacksize++;
11182 if (offset != 0)
11183 {
11184 if (common->capture_last_ptr != 0)
11185 stacksize++;
11186 if (common->optimized_cbracket[offset >> 1] == 0)
11187 stacksize += 2;
11188 }
11189 if (has_alternatives && opcode != OP_ONCE)
11190 stacksize++;
11191
11192 if (stacksize > 0)
11193 allocate_stack(common, stacksize);
11194
11195 stacksize = 0;
11196 if (repeat_type == OP_MINUPTO)
11197 {
11198 /* TMP2 was set above. */
11199 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
11200 stacksize++;
11201 }
11202
11203 if (ket != OP_KET || bra != OP_BRA)
11204 {
11205 if (ket != OP_KET)
11206 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11207 else
11208 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
11209 stacksize++;
11210 }
11211
11212 if (offset != 0)
11213 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
11214
11215 /* Skip and count the other alternatives. */
11216 i = 1;
11217 while (*cc == OP_ALT)
11218 {
11219 cc += GET(cc, 1);
11220 i++;
11221 }
11222
11223 if (has_alternatives)
11224 {
11225 if (opcode != OP_ONCE)
11226 {
11227 if (i <= 3)
11228 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
11229 else
11230 BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
11231 }
11232 if (ket != OP_KETRMAX)
11233 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11234 }
11235
11236 /* Must be after the matchingpath label. */
11237 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
11238 {
11239 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
11240 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11241 }
11242
11243 if (ket == OP_KETRMAX)
11244 {
11245 if (repeat_type != 0)
11246 {
11247 if (has_alternatives)
11248 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11249 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
11250 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
11251 /* Drop STR_PTR for greedy plus quantifier. */
11252 if (opcode != OP_ONCE)
11253 free_stack(common, 1);
11254 }
11255 else if (opcode < OP_BRA || opcode >= OP_SBRA)
11256 {
11257 if (has_alternatives)
11258 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11259
11260 /* Checking zero-length iteration. */
11261 if (opcode != OP_ONCE)
11262 {
11263 /* This case includes opcodes such as OP_SCRIPT_RUN. */
11264 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
11265 /* Drop STR_PTR for greedy plus quantifier. */
11266 if (bra != OP_BRAZERO)
11267 free_stack(common, 1);
11268 }
11269 else
11270 /* TMP2 must contain the starting STR_PTR. */
11271 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
11272 }
11273 else
11274 JUMPTO(SLJIT_JUMP, rmax_label);
11275 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
11276 }
11277
11278 if (repeat_type == OP_EXACT)
11279 {
11280 count_match(common);
11281 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
11282 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
11283 }
11284 else if (repeat_type == OP_UPTO)
11285 {
11286 /* We need to preserve the counter. */
11287 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
11288 allocate_stack(common, 1);
11289 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11290 }
11291
11292 if (bra == OP_BRAZERO)
11293 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
11294
11295 if (bra == OP_BRAMINZERO)
11296 {
11297 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
11298 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
11299 if (braminzero != NULL)
11300 {
11301 JUMPHERE(braminzero);
11302 /* We need to release the end pointer to perform the
11303 backtrack for the zero-length iteration. When
11304 framesize is < 0, OP_ONCE will do the release itself. */
11305 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
11306 {
11307 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11308 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
11309 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
11310 }
11311 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
11312 free_stack(common, 1);
11313 }
11314 /* Continue to the normal backtrack. */
11315 }
11316
11317 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
11318 count_match(common);
11319
11320 cc += 1 + LINK_SIZE;
11321
11322 if (opcode == OP_ONCE)
11323 {
11324 /* We temporarily encode the needs_control_head in the lowest bit.
11325 Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
11326 the same value for small signed numbers (including negative numbers). */
11327 BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
11328 }
11329 return cc + repeat_length;
11330 }
11331
compile_bracketpos_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11332 static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11333 {
11334 DEFINE_COMPILER;
11335 backtrack_common *backtrack;
11336 PCRE2_UCHAR opcode;
11337 int private_data_ptr;
11338 int cbraprivptr = 0;
11339 BOOL needs_control_head;
11340 int framesize;
11341 int stacksize;
11342 int offset = 0;
11343 BOOL zero = FALSE;
11344 PCRE2_SPTR ccbegin = NULL;
11345 int stack; /* Also contains the offset of control head. */
11346 struct sljit_label *loop = NULL;
11347 struct jump_list *emptymatch = NULL;
11348
11349 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
11350 if (*cc == OP_BRAPOSZERO)
11351 {
11352 zero = TRUE;
11353 cc++;
11354 }
11355
11356 opcode = *cc;
11357 private_data_ptr = PRIVATE_DATA(cc);
11358 SLJIT_ASSERT(private_data_ptr != 0);
11359 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
11360 switch(opcode)
11361 {
11362 case OP_BRAPOS:
11363 case OP_SBRAPOS:
11364 ccbegin = cc + 1 + LINK_SIZE;
11365 break;
11366
11367 case OP_CBRAPOS:
11368 case OP_SCBRAPOS:
11369 offset = GET2(cc, 1 + LINK_SIZE);
11370 /* This case cannot be optimized in the same way as
11371 normal capturing brackets. */
11372 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
11373 cbraprivptr = OVECTOR_PRIV(offset);
11374 offset <<= 1;
11375 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
11376 break;
11377
11378 default:
11379 SLJIT_UNREACHABLE();
11380 break;
11381 }
11382
11383 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
11384 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
11385 if (framesize < 0)
11386 {
11387 if (offset != 0)
11388 {
11389 stacksize = 2;
11390 if (common->capture_last_ptr != 0)
11391 stacksize++;
11392 }
11393 else
11394 stacksize = 1;
11395
11396 if (needs_control_head)
11397 stacksize++;
11398 if (!zero)
11399 stacksize++;
11400
11401 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11402 allocate_stack(common, stacksize);
11403 if (framesize == no_frame)
11404 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
11405
11406 stack = 0;
11407 if (offset != 0)
11408 {
11409 stack = 2;
11410 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
11411 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
11412 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11413 if (common->capture_last_ptr != 0)
11414 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
11415 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
11416 if (needs_control_head)
11417 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11418 if (common->capture_last_ptr != 0)
11419 {
11420 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
11421 stack = 3;
11422 }
11423 }
11424 else
11425 {
11426 if (needs_control_head)
11427 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11428 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11429 stack = 1;
11430 }
11431
11432 if (needs_control_head)
11433 stack++;
11434 if (!zero)
11435 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
11436 if (needs_control_head)
11437 {
11438 stack--;
11439 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11440 }
11441 }
11442 else
11443 {
11444 stacksize = framesize + 1;
11445 if (!zero)
11446 stacksize++;
11447 if (needs_control_head)
11448 stacksize++;
11449 if (offset == 0)
11450 stacksize++;
11451 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11452
11453 allocate_stack(common, stacksize);
11454 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11455 if (needs_control_head)
11456 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11457 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11458
11459 stack = 0;
11460 if (!zero)
11461 {
11462 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
11463 stack = 1;
11464 }
11465 if (needs_control_head)
11466 {
11467 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11468 stack++;
11469 }
11470 if (offset == 0)
11471 {
11472 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
11473 stack++;
11474 }
11475 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
11476 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
11477 stack -= 1 + (offset == 0);
11478 }
11479
11480 if (offset != 0)
11481 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11482
11483 loop = LABEL();
11484 while (*cc != OP_KETRPOS)
11485 {
11486 backtrack->top = NULL;
11487 backtrack->own_backtracks = NULL;
11488 cc += GET(cc, 1);
11489
11490 compile_matchingpath(common, ccbegin, cc, backtrack);
11491 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11492 return NULL;
11493
11494 if (framesize < 0)
11495 {
11496 if (framesize == no_frame)
11497 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11498
11499 if (offset != 0)
11500 {
11501 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11502 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11503 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11504 if (common->capture_last_ptr != 0)
11505 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11506 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11507 }
11508 else
11509 {
11510 if (opcode == OP_SBRAPOS)
11511 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11512 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11513 }
11514
11515 /* Even if the match is empty, we need to reset the control head. */
11516 if (needs_control_head)
11517 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11518
11519 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11520 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11521
11522 if (!zero)
11523 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11524 }
11525 else
11526 {
11527 if (offset != 0)
11528 {
11529 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11530 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11531 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11532 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11533 if (common->capture_last_ptr != 0)
11534 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11535 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11536 }
11537 else
11538 {
11539 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11540 OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11541 if (opcode == OP_SBRAPOS)
11542 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11543 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
11544 }
11545
11546 /* Even if the match is empty, we need to reset the control head. */
11547 if (needs_control_head)
11548 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11549
11550 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11551 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11552
11553 if (!zero)
11554 {
11555 if (framesize < 0)
11556 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11557 else
11558 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11559 }
11560 }
11561
11562 JUMPTO(SLJIT_JUMP, loop);
11563 flush_stubs(common);
11564
11565 compile_backtrackingpath(common, backtrack->top);
11566 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11567 return NULL;
11568 set_jumps(backtrack->own_backtracks, LABEL());
11569
11570 if (framesize < 0)
11571 {
11572 if (offset != 0)
11573 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11574 else
11575 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11576 }
11577 else
11578 {
11579 if (offset != 0)
11580 {
11581 /* Last alternative. */
11582 if (*cc == OP_KETRPOS)
11583 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11584 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11585 }
11586 else
11587 {
11588 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11589 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11590 }
11591 }
11592
11593 if (*cc == OP_KETRPOS)
11594 break;
11595 ccbegin = cc + 1 + LINK_SIZE;
11596 }
11597
11598 /* We don't have to restore the control head in case of a failed match. */
11599
11600 backtrack->own_backtracks = NULL;
11601 if (!zero)
11602 {
11603 if (framesize < 0)
11604 add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11605 else /* TMP2 is set to [private_data_ptr] above. */
11606 add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11607 }
11608
11609 /* None of them matched. */
11610 set_jumps(emptymatch, LABEL());
11611 count_match(common);
11612 return cc + 1 + LINK_SIZE;
11613 }
11614
get_iterator_parameters(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * opcode,PCRE2_UCHAR * type,sljit_u32 * max,sljit_u32 * exact,PCRE2_SPTR * end)11615 static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11616 {
11617 int class_len;
11618
11619 *opcode = *cc;
11620 *exact = 0;
11621
11622 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11623 {
11624 cc++;
11625 *type = OP_CHAR;
11626 }
11627 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11628 {
11629 cc++;
11630 *type = OP_CHARI;
11631 *opcode -= OP_STARI - OP_STAR;
11632 }
11633 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11634 {
11635 cc++;
11636 *type = OP_NOT;
11637 *opcode -= OP_NOTSTAR - OP_STAR;
11638 }
11639 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11640 {
11641 cc++;
11642 *type = OP_NOTI;
11643 *opcode -= OP_NOTSTARI - OP_STAR;
11644 }
11645 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11646 {
11647 cc++;
11648 *opcode -= OP_TYPESTAR - OP_STAR;
11649 *type = OP_END;
11650 }
11651 else
11652 {
11653 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11654 *type = *opcode;
11655 cc++;
11656 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11657 *opcode = cc[class_len - 1];
11658
11659 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11660 {
11661 *opcode -= OP_CRSTAR - OP_STAR;
11662 *end = cc + class_len;
11663
11664 if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11665 {
11666 *exact = 1;
11667 *opcode -= OP_PLUS - OP_STAR;
11668 }
11669 }
11670 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11671 {
11672 *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11673 *end = cc + class_len;
11674
11675 if (*opcode == OP_POSPLUS)
11676 {
11677 *exact = 1;
11678 *opcode = OP_POSSTAR;
11679 }
11680 }
11681 else
11682 {
11683 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11684 *max = GET2(cc, (class_len + IMM2_SIZE));
11685 *exact = GET2(cc, class_len);
11686
11687 if (*max == 0)
11688 {
11689 if (*opcode == OP_CRPOSRANGE)
11690 *opcode = OP_POSSTAR;
11691 else
11692 *opcode -= OP_CRRANGE - OP_STAR;
11693 }
11694 else
11695 {
11696 *max -= *exact;
11697 if (*max == 0)
11698 *opcode = OP_EXACT;
11699 else if (*max == 1)
11700 {
11701 if (*opcode == OP_CRPOSRANGE)
11702 *opcode = OP_POSQUERY;
11703 else
11704 *opcode -= OP_CRRANGE - OP_QUERY;
11705 }
11706 else
11707 {
11708 if (*opcode == OP_CRPOSRANGE)
11709 *opcode = OP_POSUPTO;
11710 else
11711 *opcode -= OP_CRRANGE - OP_UPTO;
11712 }
11713 }
11714 *end = cc + class_len + 2 * IMM2_SIZE;
11715 }
11716 return cc;
11717 }
11718
11719 switch(*opcode)
11720 {
11721 case OP_EXACT:
11722 *exact = GET2(cc, 0);
11723 cc += IMM2_SIZE;
11724 break;
11725
11726 case OP_PLUS:
11727 case OP_MINPLUS:
11728 *exact = 1;
11729 *opcode -= OP_PLUS - OP_STAR;
11730 break;
11731
11732 case OP_POSPLUS:
11733 *exact = 1;
11734 *opcode = OP_POSSTAR;
11735 break;
11736
11737 case OP_UPTO:
11738 case OP_MINUPTO:
11739 case OP_POSUPTO:
11740 *max = GET2(cc, 0);
11741 cc += IMM2_SIZE;
11742 break;
11743 }
11744
11745 if (*type == OP_END)
11746 {
11747 *type = *cc;
11748 *end = next_opcode(common, cc);
11749 cc++;
11750 return cc;
11751 }
11752
11753 *end = cc + 1;
11754 #ifdef SUPPORT_UNICODE
11755 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11756 #endif
11757 return cc;
11758 }
11759
compile_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11760 static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11761 {
11762 DEFINE_COMPILER;
11763 backtrack_common *backtrack;
11764 PCRE2_UCHAR opcode;
11765 PCRE2_UCHAR type;
11766 sljit_u32 max = 0, exact;
11767 sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11768 sljit_s32 early_fail_type;
11769 BOOL charpos_enabled;
11770 PCRE2_UCHAR charpos_char;
11771 unsigned int charpos_othercasebit;
11772 PCRE2_SPTR end;
11773 jump_list *no_match = NULL;
11774 jump_list *no_char1_match = NULL;
11775 struct sljit_jump *jump = NULL;
11776 struct sljit_label *label;
11777 int private_data_ptr = PRIVATE_DATA(cc);
11778 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11779 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11780 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
11781 int tmp_base, tmp_offset;
11782 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11783 BOOL use_tmp;
11784 #endif
11785
11786 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11787
11788 early_fail_type = (early_fail_ptr & 0x7);
11789 early_fail_ptr >>= 3;
11790
11791 /* During recursion, these optimizations are disabled. */
11792 if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
11793 {
11794 early_fail_ptr = 0;
11795 early_fail_type = type_skip;
11796 }
11797
11798 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11799 || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11800
11801 if (early_fail_type == type_fail)
11802 add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11803
11804 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11805
11806 if (type != OP_EXTUNI)
11807 {
11808 tmp_base = TMP3;
11809 tmp_offset = 0;
11810 }
11811 else
11812 {
11813 tmp_base = SLJIT_MEM1(SLJIT_SP);
11814 tmp_offset = POSSESSIVE0;
11815 }
11816
11817 /* Handle fixed part first. */
11818 if (exact > 1)
11819 {
11820 SLJIT_ASSERT(early_fail_ptr == 0);
11821
11822 if (common->mode == PCRE2_JIT_COMPLETE
11823 #ifdef SUPPORT_UNICODE
11824 && !common->utf
11825 #endif
11826 && type != OP_ANYNL && type != OP_EXTUNI)
11827 {
11828 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11829 add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11830 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11831 label = LABEL();
11832 compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE);
11833 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11834 JUMPTO(SLJIT_NOT_ZERO, label);
11835 }
11836 else
11837 {
11838 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11839 label = LABEL();
11840 compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11841 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11842 JUMPTO(SLJIT_NOT_ZERO, label);
11843 }
11844 }
11845 else if (exact == 1)
11846 compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11847
11848 if (early_fail_type == type_fail_range)
11849 {
11850 /* Range end first, followed by range start. */
11851 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11852 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw));
11853 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11854 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11855 add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11856
11857 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11858 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0);
11859 }
11860
11861 switch(opcode)
11862 {
11863 case OP_STAR:
11864 case OP_UPTO:
11865 SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11866
11867 if (type == OP_ANYNL || type == OP_EXTUNI)
11868 {
11869 SLJIT_ASSERT(private_data_ptr == 0);
11870 SLJIT_ASSERT(early_fail_ptr == 0);
11871
11872 allocate_stack(common, 2);
11873 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11874 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11875
11876 if (opcode == OP_UPTO)
11877 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11878
11879 label = LABEL();
11880 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11881 if (opcode == OP_UPTO)
11882 {
11883 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11884 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11885 jump = JUMP(SLJIT_ZERO);
11886 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11887 }
11888
11889 /* We cannot use TMP3 because of allocate_stack. */
11890 allocate_stack(common, 1);
11891 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11892 JUMPTO(SLJIT_JUMP, label);
11893 if (jump != NULL)
11894 JUMPHERE(jump);
11895 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11896 break;
11897 }
11898 #ifdef SUPPORT_UNICODE
11899 else if (type == OP_ALLANY && !common->invalid_utf)
11900 #else
11901 else if (type == OP_ALLANY)
11902 #endif
11903 {
11904 if (opcode == OP_STAR)
11905 {
11906 if (private_data_ptr == 0)
11907 allocate_stack(common, 2);
11908
11909 OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11910 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11911
11912 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11913 process_partial_match(common);
11914
11915 if (early_fail_ptr != 0)
11916 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11917 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11918 break;
11919 }
11920 #ifdef SUPPORT_UNICODE
11921 else if (!common->utf)
11922 #else
11923 else
11924 #endif
11925 {
11926 if (private_data_ptr == 0)
11927 allocate_stack(common, 2);
11928
11929 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11930 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11931
11932 if (common->mode == PCRE2_JIT_COMPLETE)
11933 {
11934 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
11935 SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
11936 }
11937 else
11938 {
11939 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11940 process_partial_match(common);
11941 JUMPHERE(jump);
11942 }
11943
11944 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11945
11946 if (early_fail_ptr != 0)
11947 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11948 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11949 break;
11950 }
11951 }
11952
11953 charpos_enabled = FALSE;
11954 charpos_char = 0;
11955 charpos_othercasebit = 0;
11956
11957 if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
11958 {
11959 #ifdef SUPPORT_UNICODE
11960 charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
11961 #else
11962 charpos_enabled = TRUE;
11963 #endif
11964 if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
11965 {
11966 charpos_othercasebit = char_get_othercase_bit(common, end + 1);
11967 if (charpos_othercasebit == 0)
11968 charpos_enabled = FALSE;
11969 }
11970
11971 if (charpos_enabled)
11972 {
11973 charpos_char = end[1];
11974 /* Consume the OP_CHAR opcode. */
11975 end += 2;
11976 #if PCRE2_CODE_UNIT_WIDTH == 8
11977 SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
11978 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11979 SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
11980 if ((charpos_othercasebit & 0x100) != 0)
11981 charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
11982 #endif
11983 if (charpos_othercasebit != 0)
11984 charpos_char |= charpos_othercasebit;
11985
11986 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
11987 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
11988 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
11989 }
11990 }
11991
11992 if (charpos_enabled)
11993 {
11994 if (opcode == OP_UPTO)
11995 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
11996
11997 /* Search the first instance of charpos_char. */
11998 jump = JUMP(SLJIT_JUMP);
11999 label = LABEL();
12000 if (opcode == OP_UPTO)
12001 {
12002 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12003 add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_ZERO));
12004 }
12005 compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE);
12006 if (early_fail_ptr != 0)
12007 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12008 JUMPHERE(jump);
12009
12010 detect_partial_match(common, &backtrack->own_backtracks);
12011 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
12012 if (charpos_othercasebit != 0)
12013 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
12014 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
12015
12016 if (private_data_ptr == 0)
12017 allocate_stack(common, 2);
12018 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12019 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
12020
12021 if (opcode == OP_UPTO)
12022 {
12023 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12024 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12025 }
12026
12027 /* Search the last instance of charpos_char. */
12028 label = LABEL();
12029 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12030 if (early_fail_ptr != 0)
12031 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12032 detect_partial_match(common, &no_match);
12033 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
12034 if (charpos_othercasebit != 0)
12035 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
12036
12037 if (opcode == OP_STAR)
12038 {
12039 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
12040 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12041 JUMPTO(SLJIT_JUMP, label);
12042 }
12043 else
12044 {
12045 jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
12046 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12047 JUMPHERE(jump);
12048 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12049 JUMPTO(SLJIT_NOT_ZERO, label);
12050 }
12051
12052 set_jumps(no_match, LABEL());
12053 OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
12054 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12055 }
12056 else
12057 {
12058 if (private_data_ptr == 0)
12059 allocate_stack(common, 2);
12060
12061 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
12062 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12063 use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
12064 SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
12065
12066 if (common->utf)
12067 OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
12068 #endif
12069 if (opcode == OP_UPTO)
12070 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12071
12072 detect_partial_match(common, &no_match);
12073 label = LABEL();
12074 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12075 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12076 if (common->utf)
12077 OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
12078 #endif
12079
12080 if (opcode == OP_UPTO)
12081 {
12082 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12083 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12084 }
12085
12086 detect_partial_match_to(common, label);
12087 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12088
12089 set_jumps(no_char1_match, LABEL());
12090 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12091 if (common->utf)
12092 {
12093 set_jumps(no_match, LABEL());
12094 if (use_tmp)
12095 {
12096 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
12097 OP1(SLJIT_MOV, base, offset0, TMP3, 0);
12098 }
12099 else
12100 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12101 }
12102 else
12103 #endif
12104 {
12105 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12106 set_jumps(no_match, LABEL());
12107 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12108 }
12109
12110 if (early_fail_ptr != 0)
12111 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12112 }
12113
12114 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12115 break;
12116
12117 case OP_MINSTAR:
12118 if (private_data_ptr == 0)
12119 allocate_stack(common, 1);
12120 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12121 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12122 if (early_fail_ptr != 0)
12123 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12124 break;
12125
12126 case OP_MINUPTO:
12127 SLJIT_ASSERT(early_fail_ptr == 0);
12128 if (private_data_ptr == 0)
12129 allocate_stack(common, 2);
12130 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12131 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
12132 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12133 break;
12134
12135 case OP_QUERY:
12136 case OP_MINQUERY:
12137 SLJIT_ASSERT(early_fail_ptr == 0);
12138 if (private_data_ptr == 0)
12139 allocate_stack(common, 1);
12140 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12141 if (opcode == OP_QUERY)
12142 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
12143 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12144 break;
12145
12146 case OP_EXACT:
12147 break;
12148
12149 case OP_POSSTAR:
12150 #if defined SUPPORT_UNICODE
12151 if (type == OP_ALLANY && !common->invalid_utf)
12152 #else
12153 if (type == OP_ALLANY)
12154 #endif
12155 {
12156 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
12157 process_partial_match(common);
12158 if (early_fail_ptr != 0)
12159 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
12160 break;
12161 }
12162
12163 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12164 if (type == OP_EXTUNI || common->utf)
12165 {
12166 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12167 detect_partial_match(common, &no_match);
12168 label = LABEL();
12169 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12170 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12171 detect_partial_match_to(common, label);
12172
12173 set_jumps(no_match, LABEL());
12174 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
12175 if (early_fail_ptr != 0)
12176 {
12177 if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
12178 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
12179 else
12180 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12181 }
12182 break;
12183 }
12184 #endif
12185
12186 detect_partial_match(common, &no_match);
12187 label = LABEL();
12188 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12189 detect_partial_match_to(common, label);
12190 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12191
12192 set_jumps(no_char1_match, LABEL());
12193 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12194 set_jumps(no_match, LABEL());
12195 if (early_fail_ptr != 0)
12196 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12197 break;
12198
12199 case OP_POSUPTO:
12200 SLJIT_ASSERT(early_fail_ptr == 0);
12201 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12202 if (common->utf)
12203 {
12204 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
12205 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12206
12207 detect_partial_match(common, &no_match);
12208 label = LABEL();
12209 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12210 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
12211 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12212 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12213 detect_partial_match_to(common, label);
12214
12215 set_jumps(no_match, LABEL());
12216 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
12217 break;
12218 }
12219 #endif
12220
12221 if (type == OP_ALLANY)
12222 {
12223 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
12224
12225 if (common->mode == PCRE2_JIT_COMPLETE)
12226 {
12227 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
12228 SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
12229 }
12230 else
12231 {
12232 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
12233 process_partial_match(common);
12234 JUMPHERE(jump);
12235 }
12236 break;
12237 }
12238
12239 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12240
12241 detect_partial_match(common, &no_match);
12242 label = LABEL();
12243 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12244 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12245 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12246 detect_partial_match_to(common, label);
12247 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12248
12249 set_jumps(no_char1_match, LABEL());
12250 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12251 set_jumps(no_match, LABEL());
12252 break;
12253
12254 case OP_POSQUERY:
12255 SLJIT_ASSERT(early_fail_ptr == 0);
12256 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12257 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
12258 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12259 set_jumps(no_match, LABEL());
12260 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
12261 break;
12262
12263 default:
12264 SLJIT_UNREACHABLE();
12265 break;
12266 }
12267
12268 count_match(common);
12269 return end;
12270 }
12271
compile_fail_accept_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)12272 static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
12273 {
12274 DEFINE_COMPILER;
12275 backtrack_common *backtrack;
12276
12277 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
12278
12279 if (*cc == OP_FAIL)
12280 {
12281 add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
12282 return cc + 1;
12283 }
12284
12285 if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
12286 add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
12287
12288 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
12289 {
12290 /* No need to check notempty conditions. */
12291 if (common->accept_label == NULL)
12292 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
12293 else
12294 JUMPTO(SLJIT_JUMP, common->accept_label);
12295 return cc + 1;
12296 }
12297
12298 if (common->accept_label == NULL)
12299 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
12300 else
12301 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
12302
12303 if (HAS_VIRTUAL_REGISTERS)
12304 {
12305 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12306 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
12307 }
12308 else
12309 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
12310
12311 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
12312 add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_NOT_ZERO));
12313 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
12314 if (common->accept_label == NULL)
12315 add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
12316 else
12317 JUMPTO(SLJIT_ZERO, common->accept_label);
12318
12319 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
12320 if (common->accept_label == NULL)
12321 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
12322 else
12323 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
12324 add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
12325 return cc + 1;
12326 }
12327
compile_close_matchingpath(compiler_common * common,PCRE2_SPTR cc)12328 static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
12329 {
12330 DEFINE_COMPILER;
12331 int offset = GET2(cc, 1);
12332 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
12333
12334 /* Data will be discarded anyway... */
12335 if (common->currententry != NULL)
12336 return cc + 1 + IMM2_SIZE;
12337
12338 if (!optimized_cbracket)
12339 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
12340 offset <<= 1;
12341 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12342 if (!optimized_cbracket)
12343 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12344 return cc + 1 + IMM2_SIZE;
12345 }
12346
compile_control_verb_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)12347 static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
12348 {
12349 DEFINE_COMPILER;
12350 backtrack_common *backtrack;
12351 PCRE2_UCHAR opcode = *cc;
12352 PCRE2_SPTR ccend = cc + 1;
12353
12354 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
12355 opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
12356 ccend += 2 + cc[1];
12357
12358 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
12359
12360 if (opcode == OP_SKIP)
12361 {
12362 allocate_stack(common, 1);
12363 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12364 return ccend;
12365 }
12366
12367 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
12368 {
12369 if (HAS_VIRTUAL_REGISTERS)
12370 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12371 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12372 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12373 OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12374 }
12375
12376 return ccend;
12377 }
12378
12379 static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
12380
compile_then_trap_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)12381 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12382 {
12383 DEFINE_COMPILER;
12384 backtrack_common *backtrack;
12385 BOOL needs_control_head;
12386 int size;
12387
12388 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12389 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
12390 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12391 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
12392 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
12393
12394 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12395 size = 3 + (size < 0 ? 0 : size);
12396
12397 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12398 allocate_stack(common, size);
12399 if (size > 3)
12400 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
12401 else
12402 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12403 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
12404 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
12405 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
12406
12407 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12408 if (size >= 0)
12409 init_frame(common, cc, ccend, size - 1, 0);
12410 }
12411
compile_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)12412 static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12413 {
12414 DEFINE_COMPILER;
12415 backtrack_common *backtrack;
12416 BOOL has_then_trap = FALSE;
12417 then_trap_backtrack *save_then_trap = NULL;
12418
12419 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
12420
12421 if (common->has_then && common->then_offsets[cc - common->start] != 0)
12422 {
12423 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
12424 has_then_trap = TRUE;
12425 save_then_trap = common->then_trap;
12426 /* Tail item on backtrack. */
12427 compile_then_trap_matchingpath(common, cc, ccend, parent);
12428 }
12429
12430 while (cc < ccend)
12431 {
12432 switch(*cc)
12433 {
12434 case OP_SOD:
12435 case OP_SOM:
12436 case OP_NOT_WORD_BOUNDARY:
12437 case OP_WORD_BOUNDARY:
12438 case OP_EODN:
12439 case OP_EOD:
12440 case OP_DOLL:
12441 case OP_DOLLM:
12442 case OP_CIRC:
12443 case OP_CIRCM:
12444 case OP_NOT_UCP_WORD_BOUNDARY:
12445 case OP_UCP_WORD_BOUNDARY:
12446 cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12447 break;
12448
12449 case OP_NOT_DIGIT:
12450 case OP_DIGIT:
12451 case OP_NOT_WHITESPACE:
12452 case OP_WHITESPACE:
12453 case OP_NOT_WORDCHAR:
12454 case OP_WORDCHAR:
12455 case OP_ANY:
12456 case OP_ALLANY:
12457 case OP_ANYBYTE:
12458 case OP_NOTPROP:
12459 case OP_PROP:
12460 case OP_ANYNL:
12461 case OP_NOT_HSPACE:
12462 case OP_HSPACE:
12463 case OP_NOT_VSPACE:
12464 case OP_VSPACE:
12465 case OP_EXTUNI:
12466 case OP_NOT:
12467 case OP_NOTI:
12468 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12469 break;
12470
12471 case OP_SET_SOM:
12472 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12473 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
12474 allocate_stack(common, 1);
12475 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
12476 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
12477 cc++;
12478 break;
12479
12480 case OP_CHAR:
12481 case OP_CHARI:
12482 if (common->mode == PCRE2_JIT_COMPLETE)
12483 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12484 else
12485 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12486 break;
12487
12488 case OP_STAR:
12489 case OP_MINSTAR:
12490 case OP_PLUS:
12491 case OP_MINPLUS:
12492 case OP_QUERY:
12493 case OP_MINQUERY:
12494 case OP_UPTO:
12495 case OP_MINUPTO:
12496 case OP_EXACT:
12497 case OP_POSSTAR:
12498 case OP_POSPLUS:
12499 case OP_POSQUERY:
12500 case OP_POSUPTO:
12501 case OP_STARI:
12502 case OP_MINSTARI:
12503 case OP_PLUSI:
12504 case OP_MINPLUSI:
12505 case OP_QUERYI:
12506 case OP_MINQUERYI:
12507 case OP_UPTOI:
12508 case OP_MINUPTOI:
12509 case OP_EXACTI:
12510 case OP_POSSTARI:
12511 case OP_POSPLUSI:
12512 case OP_POSQUERYI:
12513 case OP_POSUPTOI:
12514 case OP_NOTSTAR:
12515 case OP_NOTMINSTAR:
12516 case OP_NOTPLUS:
12517 case OP_NOTMINPLUS:
12518 case OP_NOTQUERY:
12519 case OP_NOTMINQUERY:
12520 case OP_NOTUPTO:
12521 case OP_NOTMINUPTO:
12522 case OP_NOTEXACT:
12523 case OP_NOTPOSSTAR:
12524 case OP_NOTPOSPLUS:
12525 case OP_NOTPOSQUERY:
12526 case OP_NOTPOSUPTO:
12527 case OP_NOTSTARI:
12528 case OP_NOTMINSTARI:
12529 case OP_NOTPLUSI:
12530 case OP_NOTMINPLUSI:
12531 case OP_NOTQUERYI:
12532 case OP_NOTMINQUERYI:
12533 case OP_NOTUPTOI:
12534 case OP_NOTMINUPTOI:
12535 case OP_NOTEXACTI:
12536 case OP_NOTPOSSTARI:
12537 case OP_NOTPOSPLUSI:
12538 case OP_NOTPOSQUERYI:
12539 case OP_NOTPOSUPTOI:
12540 case OP_TYPESTAR:
12541 case OP_TYPEMINSTAR:
12542 case OP_TYPEPLUS:
12543 case OP_TYPEMINPLUS:
12544 case OP_TYPEQUERY:
12545 case OP_TYPEMINQUERY:
12546 case OP_TYPEUPTO:
12547 case OP_TYPEMINUPTO:
12548 case OP_TYPEEXACT:
12549 case OP_TYPEPOSSTAR:
12550 case OP_TYPEPOSPLUS:
12551 case OP_TYPEPOSQUERY:
12552 case OP_TYPEPOSUPTO:
12553 cc = compile_iterator_matchingpath(common, cc, parent);
12554 break;
12555
12556 case OP_CLASS:
12557 case OP_NCLASS:
12558 if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
12559 cc = compile_iterator_matchingpath(common, cc, parent);
12560 else
12561 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12562 break;
12563
12564 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12565 case OP_XCLASS:
12566 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
12567 cc = compile_iterator_matchingpath(common, cc, parent);
12568 else
12569 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12570 break;
12571 #endif
12572
12573 case OP_REF:
12574 case OP_REFI:
12575 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
12576 cc = compile_ref_iterator_matchingpath(common, cc, parent);
12577 else
12578 {
12579 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
12580 cc += 1 + IMM2_SIZE;
12581 }
12582 break;
12583
12584 case OP_DNREF:
12585 case OP_DNREFI:
12586 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
12587 cc = compile_ref_iterator_matchingpath(common, cc, parent);
12588 else
12589 {
12590 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12591 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
12592 cc += 1 + 2 * IMM2_SIZE;
12593 }
12594 break;
12595
12596 case OP_RECURSE:
12597 cc = compile_recurse_matchingpath(common, cc, parent);
12598 break;
12599
12600 case OP_CALLOUT:
12601 case OP_CALLOUT_STR:
12602 cc = compile_callout_matchingpath(common, cc, parent);
12603 break;
12604
12605 case OP_ASSERT:
12606 case OP_ASSERT_NOT:
12607 case OP_ASSERTBACK:
12608 case OP_ASSERTBACK_NOT:
12609 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12610 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12611 break;
12612
12613 case OP_BRAMINZERO:
12614 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12615 cc = bracketend(cc + 1);
12616 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12617 {
12618 allocate_stack(common, 1);
12619 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12620 }
12621 else
12622 {
12623 allocate_stack(common, 2);
12624 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12625 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12626 }
12627 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12628 count_match(common);
12629 break;
12630
12631 case OP_ASSERT_NA:
12632 case OP_ASSERTBACK_NA:
12633 case OP_ONCE:
12634 case OP_SCRIPT_RUN:
12635 case OP_BRA:
12636 case OP_CBRA:
12637 case OP_COND:
12638 case OP_SBRA:
12639 case OP_SCBRA:
12640 case OP_SCOND:
12641 cc = compile_bracket_matchingpath(common, cc, parent);
12642 break;
12643
12644 case OP_BRAZERO:
12645 if (cc[1] > OP_ASSERTBACK_NOT)
12646 cc = compile_bracket_matchingpath(common, cc, parent);
12647 else
12648 {
12649 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12650 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12651 }
12652 break;
12653
12654 case OP_BRAPOS:
12655 case OP_CBRAPOS:
12656 case OP_SBRAPOS:
12657 case OP_SCBRAPOS:
12658 case OP_BRAPOSZERO:
12659 cc = compile_bracketpos_matchingpath(common, cc, parent);
12660 break;
12661
12662 case OP_MARK:
12663 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12664 SLJIT_ASSERT(common->mark_ptr != 0);
12665 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12666 allocate_stack(common, common->has_skip_arg ? 5 : 1);
12667 if (HAS_VIRTUAL_REGISTERS)
12668 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12669 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12670 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12671 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12672 OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12673 if (common->has_skip_arg)
12674 {
12675 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12676 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12677 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12678 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12679 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12680 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12681 }
12682 cc += 1 + 2 + cc[1];
12683 break;
12684
12685 case OP_PRUNE:
12686 case OP_PRUNE_ARG:
12687 case OP_SKIP:
12688 case OP_SKIP_ARG:
12689 case OP_THEN:
12690 case OP_THEN_ARG:
12691 case OP_COMMIT:
12692 case OP_COMMIT_ARG:
12693 cc = compile_control_verb_matchingpath(common, cc, parent);
12694 break;
12695
12696 case OP_FAIL:
12697 case OP_ACCEPT:
12698 case OP_ASSERT_ACCEPT:
12699 cc = compile_fail_accept_matchingpath(common, cc, parent);
12700 break;
12701
12702 case OP_CLOSE:
12703 cc = compile_close_matchingpath(common, cc);
12704 break;
12705
12706 case OP_SKIPZERO:
12707 cc = bracketend(cc + 1);
12708 break;
12709
12710 default:
12711 SLJIT_UNREACHABLE();
12712 return;
12713 }
12714 if (cc == NULL)
12715 return;
12716 }
12717
12718 if (has_then_trap)
12719 {
12720 /* Head item on backtrack. */
12721 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12722 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12723 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12724 common->then_trap = save_then_trap;
12725 }
12726 SLJIT_ASSERT(cc == ccend);
12727 }
12728
12729 #undef PUSH_BACKTRACK
12730 #undef PUSH_BACKTRACK_NOVALUE
12731 #undef BACKTRACK_AS
12732
12733 #define COMPILE_BACKTRACKINGPATH(current) \
12734 do \
12735 { \
12736 compile_backtrackingpath(common, (current)); \
12737 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12738 return; \
12739 } \
12740 while (0)
12741
12742 #define CURRENT_AS(type) ((type *)current)
12743
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12744 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12745 {
12746 DEFINE_COMPILER;
12747 PCRE2_SPTR cc = current->cc;
12748 PCRE2_UCHAR opcode;
12749 PCRE2_UCHAR type;
12750 sljit_u32 max = 0, exact;
12751 struct sljit_label *label = NULL;
12752 struct sljit_jump *jump = NULL;
12753 jump_list *jumplist = NULL;
12754 PCRE2_SPTR end;
12755 int private_data_ptr = PRIVATE_DATA(cc);
12756 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12757 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12758 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
12759
12760 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12761
12762 switch(opcode)
12763 {
12764 case OP_STAR:
12765 case OP_UPTO:
12766 if (type == OP_ANYNL || type == OP_EXTUNI)
12767 {
12768 SLJIT_ASSERT(private_data_ptr == 0);
12769 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12770 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12771 free_stack(common, 1);
12772 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12773 }
12774 else
12775 {
12776 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12777 {
12778 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12779 OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12780 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12781
12782 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12783 label = LABEL();
12784 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12785 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12786 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12787 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12788 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12789 move_back(common, NULL, TRUE);
12790 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12791 }
12792 else
12793 {
12794 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12795 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12796 move_back(common, NULL, TRUE);
12797 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12798 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12799 }
12800 JUMPHERE(jump);
12801 if (private_data_ptr == 0)
12802 free_stack(common, 2);
12803 }
12804 break;
12805
12806 case OP_MINSTAR:
12807 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12808 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12809 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12810 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12811 set_jumps(jumplist, LABEL());
12812 if (private_data_ptr == 0)
12813 free_stack(common, 1);
12814 break;
12815
12816 case OP_MINUPTO:
12817 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12818 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12819 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12820 add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12821
12822 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12823 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12824 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12825 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12826
12827 set_jumps(jumplist, LABEL());
12828 if (private_data_ptr == 0)
12829 free_stack(common, 2);
12830 break;
12831
12832 case OP_QUERY:
12833 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12834 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12835 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12836 jump = JUMP(SLJIT_JUMP);
12837 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12838 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12839 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12840 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12841 JUMPHERE(jump);
12842 if (private_data_ptr == 0)
12843 free_stack(common, 1);
12844 break;
12845
12846 case OP_MINQUERY:
12847 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12848 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12849 jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12850 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12851 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12852 set_jumps(jumplist, LABEL());
12853 JUMPHERE(jump);
12854 if (private_data_ptr == 0)
12855 free_stack(common, 1);
12856 break;
12857
12858 case OP_EXACT:
12859 case OP_POSSTAR:
12860 case OP_POSQUERY:
12861 case OP_POSUPTO:
12862 break;
12863
12864 default:
12865 SLJIT_UNREACHABLE();
12866 break;
12867 }
12868
12869 set_jumps(current->own_backtracks, LABEL());
12870 }
12871
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12872 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12873 {
12874 DEFINE_COMPILER;
12875 PCRE2_SPTR cc = current->cc;
12876 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12877 PCRE2_UCHAR type;
12878
12879 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12880
12881 if ((type & 0x1) == 0)
12882 {
12883 /* Maximize case. */
12884 set_jumps(current->own_backtracks, LABEL());
12885 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12886 free_stack(common, 1);
12887 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12888 return;
12889 }
12890
12891 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12892 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12893 set_jumps(current->own_backtracks, LABEL());
12894 free_stack(common, ref ? 2 : 3);
12895 }
12896
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)12897 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12898 {
12899 DEFINE_COMPILER;
12900 recurse_entry *entry;
12901
12902 if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12903 {
12904 entry = CURRENT_AS(recurse_backtrack)->entry;
12905 if (entry->backtrack_label == NULL)
12906 add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12907 else
12908 JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12909 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12910 }
12911 else
12912 compile_backtrackingpath(common, current->top);
12913
12914 set_jumps(current->own_backtracks, LABEL());
12915 }
12916
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)12917 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12918 {
12919 DEFINE_COMPILER;
12920 PCRE2_SPTR cc = current->cc;
12921 PCRE2_UCHAR bra = OP_BRA;
12922 struct sljit_jump *brajump = NULL;
12923
12924 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12925 if (*cc == OP_BRAZERO)
12926 {
12927 bra = *cc;
12928 cc++;
12929 }
12930
12931 if (bra == OP_BRAZERO)
12932 {
12933 SLJIT_ASSERT(current->own_backtracks == NULL);
12934 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12935 }
12936
12937 if (CURRENT_AS(assert_backtrack)->framesize < 0)
12938 {
12939 set_jumps(current->own_backtracks, LABEL());
12940
12941 if (bra == OP_BRAZERO)
12942 {
12943 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12944 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12945 free_stack(common, 1);
12946 }
12947 return;
12948 }
12949
12950 if (bra == OP_BRAZERO)
12951 {
12952 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12953 {
12954 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12955 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12956 free_stack(common, 1);
12957 return;
12958 }
12959 free_stack(common, 1);
12960 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12961 }
12962
12963 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12964 {
12965 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12966 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12967 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12968 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12969 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12970
12971 set_jumps(current->own_backtracks, LABEL());
12972 }
12973 else
12974 set_jumps(current->own_backtracks, LABEL());
12975
12976 if (bra == OP_BRAZERO)
12977 {
12978 /* We know there is enough place on the stack. */
12979 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12980 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12981 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12982 JUMPHERE(brajump);
12983 }
12984 }
12985
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)12986 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12987 {
12988 DEFINE_COMPILER;
12989 int opcode, stacksize, alt_count, alt_max;
12990 int offset = 0;
12991 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12992 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12993 PCRE2_SPTR cc = current->cc;
12994 PCRE2_SPTR ccbegin;
12995 PCRE2_SPTR ccprev;
12996 PCRE2_UCHAR bra = OP_BRA;
12997 PCRE2_UCHAR ket;
12998 assert_backtrack *assert;
12999 BOOL has_alternatives;
13000 BOOL needs_control_head = FALSE;
13001 BOOL has_vreverse;
13002 struct sljit_jump *brazero = NULL;
13003 struct sljit_jump *next_alt = NULL;
13004 struct sljit_jump *once = NULL;
13005 struct sljit_jump *cond = NULL;
13006 struct sljit_label *rmin_label = NULL;
13007 struct sljit_label *exact_label = NULL;
13008 struct sljit_put_label *put_label = NULL;
13009
13010 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
13011 {
13012 bra = *cc;
13013 cc++;
13014 }
13015
13016 opcode = *cc;
13017 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
13018 ket = *ccbegin;
13019 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
13020 {
13021 repeat_ptr = PRIVATE_DATA(ccbegin);
13022 repeat_type = PRIVATE_DATA(ccbegin + 2);
13023 repeat_count = PRIVATE_DATA(ccbegin + 3);
13024 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
13025 if (repeat_type == OP_UPTO)
13026 ket = OP_KETRMAX;
13027 if (repeat_type == OP_MINUPTO)
13028 ket = OP_KETRMIN;
13029 }
13030 ccbegin = cc;
13031 cc += GET(cc, 1);
13032 has_alternatives = *cc == OP_ALT;
13033 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13034 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
13035 if (opcode == OP_CBRA || opcode == OP_SCBRA)
13036 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
13037 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
13038 opcode = OP_SCOND;
13039
13040 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
13041
13042 /* Decoding the needs_control_head in framesize. */
13043 if (opcode == OP_ONCE)
13044 {
13045 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
13046 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
13047 }
13048
13049 if (ket != OP_KET && repeat_type != 0)
13050 {
13051 /* TMP1 is used in OP_KETRMIN below. */
13052 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13053 free_stack(common, 1);
13054 if (repeat_type == OP_UPTO)
13055 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
13056 else
13057 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
13058 }
13059
13060 if (ket == OP_KETRMAX)
13061 {
13062 if (bra == OP_BRAZERO)
13063 {
13064 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13065 free_stack(common, 1);
13066 brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13067 }
13068 }
13069 else if (ket == OP_KETRMIN)
13070 {
13071 if (bra != OP_BRAMINZERO)
13072 {
13073 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13074 if (repeat_type != 0)
13075 {
13076 /* TMP1 was set a few lines above. */
13077 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13078 /* Drop STR_PTR for non-greedy plus quantifier. */
13079 if (opcode != OP_ONCE)
13080 free_stack(common, 1);
13081 }
13082 else if (opcode >= OP_SBRA || opcode == OP_ONCE)
13083 {
13084 /* Checking zero-length iteration. */
13085 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
13086 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13087 else
13088 {
13089 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13090 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13091 }
13092 /* Drop STR_PTR for non-greedy plus quantifier. */
13093 if (opcode != OP_ONCE)
13094 free_stack(common, 1);
13095 }
13096 else
13097 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13098 }
13099 rmin_label = LABEL();
13100 if (repeat_type != 0)
13101 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13102 }
13103 else if (bra == OP_BRAZERO)
13104 {
13105 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13106 free_stack(common, 1);
13107 brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13108 }
13109 else if (repeat_type == OP_EXACT)
13110 {
13111 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13112 exact_label = LABEL();
13113 }
13114
13115 if (offset != 0)
13116 {
13117 if (common->capture_last_ptr != 0)
13118 {
13119 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
13120 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13121 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13122 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
13123 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13124 free_stack(common, 3);
13125 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
13126 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
13127 }
13128 else if (common->optimized_cbracket[offset >> 1] == 0)
13129 {
13130 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13131 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13132 free_stack(common, 2);
13133 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13134 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13135 }
13136 }
13137
13138 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
13139 {
13140 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13141 {
13142 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13143 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13144 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
13145 }
13146 once = JUMP(SLJIT_JUMP);
13147 }
13148 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13149 {
13150 if (has_alternatives)
13151 {
13152 /* Always exactly one alternative. */
13153 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13154 free_stack(common, 1);
13155
13156 alt_max = 2;
13157 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13158 }
13159 }
13160 else if (has_alternatives)
13161 {
13162 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13163 free_stack(common, 1);
13164
13165 if (alt_max > 3)
13166 {
13167 sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13168
13169 SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label);
13170 sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL());
13171 sljit_emit_op0(compiler, SLJIT_ENDBR);
13172 }
13173 else
13174 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13175 }
13176
13177 COMPILE_BACKTRACKINGPATH(current->top);
13178 if (current->own_backtracks)
13179 set_jumps(current->own_backtracks, LABEL());
13180
13181 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13182 {
13183 /* Conditional block always has at most one alternative. */
13184 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
13185 {
13186 SLJIT_ASSERT(has_alternatives);
13187 assert = CURRENT_AS(bracket_backtrack)->u.assert;
13188 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
13189 {
13190 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
13191 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13192 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13193 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
13194 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
13195 }
13196 cond = JUMP(SLJIT_JUMP);
13197 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
13198 }
13199 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
13200 {
13201 SLJIT_ASSERT(has_alternatives);
13202 cond = JUMP(SLJIT_JUMP);
13203 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
13204 }
13205 else
13206 SLJIT_ASSERT(!has_alternatives);
13207 }
13208
13209 if (has_alternatives)
13210 {
13211 alt_count = 1;
13212 do
13213 {
13214 current->top = NULL;
13215 current->own_backtracks = NULL;
13216 current->simple_backtracks = NULL;
13217 /* Conditional blocks always have an additional alternative, even if it is empty. */
13218 if (*cc == OP_ALT)
13219 {
13220 ccprev = cc + 1 + LINK_SIZE;
13221 cc += GET(cc, 1);
13222
13223 has_vreverse = FALSE;
13224 if (opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NA)
13225 {
13226 SLJIT_ASSERT(private_data_ptr != 0);
13227 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13228
13229 has_vreverse = (*ccprev == OP_VREVERSE);
13230 if (*ccprev == OP_REVERSE || has_vreverse)
13231 ccprev = compile_reverse_matchingpath(common, ccprev, current);
13232 }
13233 else if (opcode != OP_COND && opcode != OP_SCOND)
13234 {
13235 if (opcode != OP_ONCE)
13236 {
13237 if (private_data_ptr != 0)
13238 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13239 else
13240 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13241 }
13242 else
13243 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
13244 }
13245
13246 compile_matchingpath(common, ccprev, cc, current);
13247 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13248 return;
13249
13250 switch (opcode)
13251 {
13252 case OP_ASSERTBACK_NA:
13253 if (has_vreverse)
13254 {
13255 SLJIT_ASSERT(current->top != NULL && PRIVATE_DATA(ccbegin + 1));
13256 add_jump(compiler, ¤t->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
13257 }
13258
13259 if (PRIVATE_DATA(ccbegin + 1))
13260 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
13261 break;
13262 case OP_ASSERT_NA:
13263 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13264 break;
13265 case OP_SCRIPT_RUN:
13266 match_script_run_common(common, private_data_ptr, current);
13267 break;
13268 }
13269 }
13270
13271 /* Instructions after the current alternative is successfully matched. */
13272 /* There is a similar code in compile_bracket_matchingpath. */
13273 if (opcode == OP_ONCE)
13274 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
13275
13276 stacksize = 0;
13277 if (repeat_type == OP_MINUPTO)
13278 {
13279 /* We need to preserve the counter. TMP2 will be used below. */
13280 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
13281 stacksize++;
13282 }
13283 if (ket != OP_KET || bra != OP_BRA)
13284 stacksize++;
13285 if (offset != 0)
13286 {
13287 if (common->capture_last_ptr != 0)
13288 stacksize++;
13289 if (common->optimized_cbracket[offset >> 1] == 0)
13290 stacksize += 2;
13291 }
13292 if (opcode != OP_ONCE)
13293 stacksize++;
13294
13295 if (stacksize > 0)
13296 allocate_stack(common, stacksize);
13297
13298 stacksize = 0;
13299 if (repeat_type == OP_MINUPTO)
13300 {
13301 /* TMP2 was set above. */
13302 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
13303 stacksize++;
13304 }
13305
13306 if (ket != OP_KET || bra != OP_BRA)
13307 {
13308 if (ket != OP_KET)
13309 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
13310 else
13311 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
13312 stacksize++;
13313 }
13314
13315 if (offset != 0)
13316 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
13317
13318 if (opcode != OP_ONCE)
13319 {
13320 if (alt_max <= 3)
13321 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
13322 else
13323 put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
13324 }
13325
13326 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
13327 {
13328 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
13329 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
13330 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
13331 }
13332
13333 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
13334
13335 if (opcode != OP_ONCE)
13336 {
13337 if (alt_max <= 3)
13338 {
13339 JUMPHERE(next_alt);
13340 alt_count++;
13341 if (alt_count < alt_max)
13342 {
13343 SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
13344 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13345 }
13346 }
13347 else
13348 {
13349 sljit_set_put_label(put_label, LABEL());
13350 sljit_emit_op0(compiler, SLJIT_ENDBR);
13351 }
13352 }
13353
13354 COMPILE_BACKTRACKINGPATH(current->top);
13355 if (current->own_backtracks)
13356 set_jumps(current->own_backtracks, LABEL());
13357 SLJIT_ASSERT(!current->simple_backtracks);
13358 }
13359 while (*cc == OP_ALT);
13360
13361 if (cond != NULL)
13362 {
13363 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
13364 assert = CURRENT_AS(bracket_backtrack)->u.assert;
13365 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
13366 {
13367 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
13368 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13369 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13370 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
13371 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
13372 }
13373 JUMPHERE(cond);
13374 }
13375
13376 /* Free the STR_PTR. */
13377 if (private_data_ptr == 0)
13378 free_stack(common, 1);
13379 }
13380
13381 if (offset != 0)
13382 {
13383 /* Using both tmp register is better for instruction scheduling. */
13384 if (common->optimized_cbracket[offset >> 1] != 0)
13385 {
13386 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13387 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13388 free_stack(common, 2);
13389 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13390 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13391 }
13392 else
13393 {
13394 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13395 free_stack(common, 1);
13396 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13397 }
13398 }
13399 else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
13400 {
13401 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13402 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13403 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
13404 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13405 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);
13406 free_stack(common, 4);
13407 }
13408 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
13409 {
13410 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
13411 free_stack(common, 1);
13412 }
13413 else if (opcode == OP_ONCE)
13414 {
13415 cc = ccbegin + GET(ccbegin, 1);
13416 stacksize = needs_control_head ? 1 : 0;
13417
13418 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13419 {
13420 /* Reset head and drop saved frame. */
13421 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
13422 }
13423 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
13424 {
13425 /* The STR_PTR must be released. */
13426 stacksize++;
13427 }
13428
13429 if (stacksize > 0)
13430 free_stack(common, stacksize);
13431
13432 JUMPHERE(once);
13433 /* Restore previous private_data_ptr */
13434 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13435 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
13436 else if (ket == OP_KETRMIN)
13437 {
13438 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13439 /* See the comment below. */
13440 free_stack(common, 2);
13441 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13442 }
13443 }
13444
13445 if (repeat_type == OP_EXACT)
13446 {
13447 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13448 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
13449 CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
13450 }
13451 else if (ket == OP_KETRMAX)
13452 {
13453 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13454 if (bra != OP_BRAZERO)
13455 free_stack(common, 1);
13456
13457 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13458 if (bra == OP_BRAZERO)
13459 {
13460 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13461 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13462 JUMPHERE(brazero);
13463 free_stack(common, 1);
13464 }
13465 }
13466 else if (ket == OP_KETRMIN)
13467 {
13468 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13469
13470 /* OP_ONCE removes everything in case of a backtrack, so we don't
13471 need to explicitly release the STR_PTR. The extra release would
13472 affect badly the free_stack(2) above. */
13473 if (opcode != OP_ONCE)
13474 free_stack(common, 1);
13475 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
13476 if (opcode == OP_ONCE)
13477 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
13478 else if (bra == OP_BRAMINZERO)
13479 free_stack(common, 1);
13480 }
13481 else if (bra == OP_BRAZERO)
13482 {
13483 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13484 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13485 JUMPHERE(brazero);
13486 }
13487 }
13488
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)13489 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13490 {
13491 DEFINE_COMPILER;
13492 int offset;
13493 struct sljit_jump *jump;
13494 PCRE2_SPTR cc;
13495
13496 /* No retry on backtrack, just drop everything. */
13497 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
13498 {
13499 cc = current->cc;
13500
13501 if (*cc == OP_BRAPOSZERO)
13502 cc++;
13503
13504 if (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)
13505 {
13506 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
13507 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13508 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13509 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13510 if (common->capture_last_ptr != 0)
13511 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13512 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13513 if (common->capture_last_ptr != 0)
13514 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
13515 }
13516 set_jumps(current->own_backtracks, LABEL());
13517 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13518 return;
13519 }
13520
13521 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
13522 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13523 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
13524
13525 if (current->own_backtracks)
13526 {
13527 jump = JUMP(SLJIT_JUMP);
13528 set_jumps(current->own_backtracks, LABEL());
13529 /* Drop the stack frame. */
13530 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13531 JUMPHERE(jump);
13532 }
13533 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
13534 }
13535
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)13536 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13537 {
13538 assert_backtrack backtrack;
13539
13540 current->top = NULL;
13541 current->own_backtracks = NULL;
13542 current->simple_backtracks = NULL;
13543 if (current->cc[1] > OP_ASSERTBACK_NOT)
13544 {
13545 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
13546 compile_bracket_matchingpath(common, current->cc, current);
13547 compile_bracket_backtrackingpath(common, current->top);
13548 }
13549 else
13550 {
13551 memset(&backtrack, 0, sizeof(backtrack));
13552 backtrack.common.cc = current->cc;
13553 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
13554 /* Manual call of compile_assert_matchingpath. */
13555 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
13556 }
13557 SLJIT_ASSERT(!current->simple_backtracks && !current->own_backtracks);
13558 }
13559
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)13560 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13561 {
13562 DEFINE_COMPILER;
13563 PCRE2_UCHAR opcode = *current->cc;
13564 struct sljit_label *loop;
13565 struct sljit_jump *jump;
13566
13567 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
13568 {
13569 if (common->then_trap != NULL)
13570 {
13571 SLJIT_ASSERT(common->control_head_ptr != 0);
13572
13573 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13574 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
13575 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
13576 jump = JUMP(SLJIT_JUMP);
13577
13578 loop = LABEL();
13579 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13580 JUMPHERE(jump);
13581 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
13582 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
13583 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
13584 return;
13585 }
13586 else if (!common->local_quit_available && common->in_positive_assertion)
13587 {
13588 add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
13589 return;
13590 }
13591 }
13592
13593 if (common->local_quit_available)
13594 {
13595 /* Abort match with a fail. */
13596 if (common->quit_label == NULL)
13597 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13598 else
13599 JUMPTO(SLJIT_JUMP, common->quit_label);
13600 return;
13601 }
13602
13603 if (opcode == OP_SKIP_ARG)
13604 {
13605 SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13606 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13607 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
13608 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark));
13609
13610 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
13611 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
13612 return;
13613 }
13614
13615 if (opcode == OP_SKIP)
13616 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13617 else
13618 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
13619 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
13620 }
13621
compile_vreverse_backtrackingpath(compiler_common * common,struct backtrack_common * current)13622 static SLJIT_INLINE void compile_vreverse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13623 {
13624 DEFINE_COMPILER;
13625 struct sljit_jump *jump;
13626 struct sljit_label *label;
13627
13628 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13629 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(3));
13630 skip_valid_char(common);
13631 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
13632 JUMPTO(SLJIT_JUMP, CURRENT_AS(vreverse_backtrack)->matchingpath);
13633
13634 label = LABEL();
13635 sljit_set_label(jump, label);
13636 set_jumps(current->own_backtracks, label);
13637 }
13638
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)13639 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13640 {
13641 DEFINE_COMPILER;
13642 struct sljit_jump *jump;
13643 int size;
13644
13645 if (CURRENT_AS(then_trap_backtrack)->then_trap)
13646 {
13647 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
13648 return;
13649 }
13650
13651 size = CURRENT_AS(then_trap_backtrack)->framesize;
13652 size = 3 + (size < 0 ? 0 : size);
13653
13654 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13655 free_stack(common, size);
13656 jump = JUMP(SLJIT_JUMP);
13657
13658 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13659 /* STACK_TOP is set by THEN. */
13660 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13661 {
13662 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13663 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13664 }
13665 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13666 free_stack(common, 3);
13667
13668 JUMPHERE(jump);
13669 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13670 }
13671
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)13672 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13673 {
13674 DEFINE_COMPILER;
13675 then_trap_backtrack *save_then_trap = common->then_trap;
13676
13677 while (current)
13678 {
13679 if (current->simple_backtracks != NULL)
13680 set_jumps(current->simple_backtracks, LABEL());
13681 switch(*current->cc)
13682 {
13683 case OP_SET_SOM:
13684 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13685 free_stack(common, 1);
13686 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13687 break;
13688
13689 case OP_STAR:
13690 case OP_MINSTAR:
13691 case OP_PLUS:
13692 case OP_MINPLUS:
13693 case OP_QUERY:
13694 case OP_MINQUERY:
13695 case OP_UPTO:
13696 case OP_MINUPTO:
13697 case OP_EXACT:
13698 case OP_POSSTAR:
13699 case OP_POSPLUS:
13700 case OP_POSQUERY:
13701 case OP_POSUPTO:
13702 case OP_STARI:
13703 case OP_MINSTARI:
13704 case OP_PLUSI:
13705 case OP_MINPLUSI:
13706 case OP_QUERYI:
13707 case OP_MINQUERYI:
13708 case OP_UPTOI:
13709 case OP_MINUPTOI:
13710 case OP_EXACTI:
13711 case OP_POSSTARI:
13712 case OP_POSPLUSI:
13713 case OP_POSQUERYI:
13714 case OP_POSUPTOI:
13715 case OP_NOTSTAR:
13716 case OP_NOTMINSTAR:
13717 case OP_NOTPLUS:
13718 case OP_NOTMINPLUS:
13719 case OP_NOTQUERY:
13720 case OP_NOTMINQUERY:
13721 case OP_NOTUPTO:
13722 case OP_NOTMINUPTO:
13723 case OP_NOTEXACT:
13724 case OP_NOTPOSSTAR:
13725 case OP_NOTPOSPLUS:
13726 case OP_NOTPOSQUERY:
13727 case OP_NOTPOSUPTO:
13728 case OP_NOTSTARI:
13729 case OP_NOTMINSTARI:
13730 case OP_NOTPLUSI:
13731 case OP_NOTMINPLUSI:
13732 case OP_NOTQUERYI:
13733 case OP_NOTMINQUERYI:
13734 case OP_NOTUPTOI:
13735 case OP_NOTMINUPTOI:
13736 case OP_NOTEXACTI:
13737 case OP_NOTPOSSTARI:
13738 case OP_NOTPOSPLUSI:
13739 case OP_NOTPOSQUERYI:
13740 case OP_NOTPOSUPTOI:
13741 case OP_TYPESTAR:
13742 case OP_TYPEMINSTAR:
13743 case OP_TYPEPLUS:
13744 case OP_TYPEMINPLUS:
13745 case OP_TYPEQUERY:
13746 case OP_TYPEMINQUERY:
13747 case OP_TYPEUPTO:
13748 case OP_TYPEMINUPTO:
13749 case OP_TYPEEXACT:
13750 case OP_TYPEPOSSTAR:
13751 case OP_TYPEPOSPLUS:
13752 case OP_TYPEPOSQUERY:
13753 case OP_TYPEPOSUPTO:
13754 case OP_CLASS:
13755 case OP_NCLASS:
13756 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13757 case OP_XCLASS:
13758 #endif
13759 compile_iterator_backtrackingpath(common, current);
13760 break;
13761
13762 case OP_REF:
13763 case OP_REFI:
13764 case OP_DNREF:
13765 case OP_DNREFI:
13766 compile_ref_iterator_backtrackingpath(common, current);
13767 break;
13768
13769 case OP_RECURSE:
13770 compile_recurse_backtrackingpath(common, current);
13771 break;
13772
13773 case OP_ASSERT:
13774 case OP_ASSERT_NOT:
13775 case OP_ASSERTBACK:
13776 case OP_ASSERTBACK_NOT:
13777 compile_assert_backtrackingpath(common, current);
13778 break;
13779
13780 case OP_ASSERT_NA:
13781 case OP_ASSERTBACK_NA:
13782 case OP_ONCE:
13783 case OP_SCRIPT_RUN:
13784 case OP_BRA:
13785 case OP_CBRA:
13786 case OP_COND:
13787 case OP_SBRA:
13788 case OP_SCBRA:
13789 case OP_SCOND:
13790 compile_bracket_backtrackingpath(common, current);
13791 break;
13792
13793 case OP_BRAZERO:
13794 if (current->cc[1] > OP_ASSERTBACK_NOT)
13795 compile_bracket_backtrackingpath(common, current);
13796 else
13797 compile_assert_backtrackingpath(common, current);
13798 break;
13799
13800 case OP_BRAPOS:
13801 case OP_CBRAPOS:
13802 case OP_SBRAPOS:
13803 case OP_SCBRAPOS:
13804 case OP_BRAPOSZERO:
13805 compile_bracketpos_backtrackingpath(common, current);
13806 break;
13807
13808 case OP_BRAMINZERO:
13809 compile_braminzero_backtrackingpath(common, current);
13810 break;
13811
13812 case OP_MARK:
13813 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13814 if (common->has_skip_arg)
13815 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13816 free_stack(common, common->has_skip_arg ? 5 : 1);
13817 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13818 if (common->has_skip_arg)
13819 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13820 break;
13821
13822 case OP_THEN:
13823 case OP_THEN_ARG:
13824 case OP_PRUNE:
13825 case OP_PRUNE_ARG:
13826 case OP_SKIP:
13827 case OP_SKIP_ARG:
13828 compile_control_verb_backtrackingpath(common, current);
13829 break;
13830
13831 case OP_COMMIT:
13832 case OP_COMMIT_ARG:
13833 if (!common->local_quit_available)
13834 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13835 if (common->quit_label == NULL)
13836 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13837 else
13838 JUMPTO(SLJIT_JUMP, common->quit_label);
13839 break;
13840
13841 case OP_CALLOUT:
13842 case OP_CALLOUT_STR:
13843 case OP_FAIL:
13844 case OP_ACCEPT:
13845 case OP_ASSERT_ACCEPT:
13846 set_jumps(current->own_backtracks, LABEL());
13847 break;
13848
13849 case OP_VREVERSE:
13850 compile_vreverse_backtrackingpath(common, current);
13851 break;
13852
13853 case OP_THEN_TRAP:
13854 /* A virtual opcode for then traps. */
13855 compile_then_trap_backtrackingpath(common, current);
13856 break;
13857
13858 default:
13859 SLJIT_UNREACHABLE();
13860 break;
13861 }
13862 current = current->prev;
13863 }
13864 common->then_trap = save_then_trap;
13865 }
13866
compile_recurse(compiler_common * common)13867 static SLJIT_INLINE void compile_recurse(compiler_common *common)
13868 {
13869 DEFINE_COMPILER;
13870 PCRE2_SPTR cc = common->start + common->currententry->start;
13871 PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13872 PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13873 uint32_t recurse_flags = 0;
13874 int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags);
13875 int alt_count, alt_max, local_size;
13876 backtrack_common altbacktrack;
13877 jump_list *match = NULL;
13878 struct sljit_jump *next_alt = NULL;
13879 struct sljit_jump *accept_exit = NULL;
13880 struct sljit_label *quit;
13881 struct sljit_put_label *put_label = NULL;
13882
13883 /* Recurse captures then. */
13884 common->then_trap = NULL;
13885
13886 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13887
13888 alt_max = no_alternatives(cc);
13889 alt_count = 0;
13890
13891 /* Matching path. */
13892 SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13893 common->currententry->entry_label = LABEL();
13894 set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13895
13896 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP2, 0);
13897 count_match(common);
13898
13899 local_size = (alt_max > 1) ? 2 : 1;
13900
13901 /* (Reversed) stack layout:
13902 [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13903
13904 allocate_stack(common, private_data_size + local_size);
13905 /* Save return address. */
13906 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13907
13908 copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags);
13909
13910 /* This variable is saved and restored all time when we enter or exit from a recursive context. */
13911 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13912
13913 if (recurse_flags & recurse_flag_control_head_found)
13914 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13915
13916 if (alt_max > 1)
13917 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13918
13919 memset(&altbacktrack, 0, sizeof(backtrack_common));
13920 common->quit_label = NULL;
13921 common->accept_label = NULL;
13922 common->quit = NULL;
13923 common->accept = NULL;
13924 altbacktrack.cc = ccbegin;
13925 cc += GET(cc, 1);
13926 while (1)
13927 {
13928 altbacktrack.top = NULL;
13929 altbacktrack.own_backtracks = NULL;
13930
13931 if (altbacktrack.cc != ccbegin)
13932 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13933
13934 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13935 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13936 return;
13937
13938 allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1);
13939 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13940
13941 if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found))
13942 {
13943 if (alt_max > 3)
13944 put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
13945 else
13946 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13947 }
13948
13949 add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13950
13951 if (alt_count == 0)
13952 {
13953 /* Backtracking path entry. */
13954 SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13955 common->currententry->backtrack_label = LABEL();
13956 set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13957
13958 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP1, 0);
13959
13960 if (recurse_flags & recurse_flag_accept_found)
13961 accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13962
13963 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13964 /* Save return address. */
13965 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13966
13967 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
13968
13969 if (alt_max > 1)
13970 {
13971 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13972 free_stack(common, 2);
13973
13974 if (alt_max > 3)
13975 {
13976 sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13977 sljit_set_put_label(put_label, LABEL());
13978 sljit_emit_op0(compiler, SLJIT_ENDBR);
13979 }
13980 else
13981 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13982 }
13983 else
13984 free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1);
13985 }
13986 else if (alt_max > 3)
13987 {
13988 sljit_set_put_label(put_label, LABEL());
13989 sljit_emit_op0(compiler, SLJIT_ENDBR);
13990 }
13991 else
13992 {
13993 JUMPHERE(next_alt);
13994 if (alt_count + 1 < alt_max)
13995 {
13996 SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
13997 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13998 }
13999 }
14000
14001 alt_count++;
14002
14003 compile_backtrackingpath(common, altbacktrack.top);
14004 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14005 return;
14006 set_jumps(altbacktrack.own_backtracks, LABEL());
14007
14008 if (*cc != OP_ALT)
14009 break;
14010
14011 altbacktrack.cc = cc + 1 + LINK_SIZE;
14012 cc += GET(cc, 1);
14013 }
14014
14015 /* No alternative is matched. */
14016
14017 quit = LABEL();
14018
14019 copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags);
14020
14021 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
14022 free_stack(common, private_data_size + local_size);
14023 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
14024 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14025
14026 if (common->quit != NULL)
14027 {
14028 SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found);
14029
14030 set_jumps(common->quit, LABEL());
14031 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
14032 copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
14033 JUMPTO(SLJIT_JUMP, quit);
14034 }
14035
14036 if (recurse_flags & recurse_flag_accept_found)
14037 {
14038 JUMPHERE(accept_exit);
14039 free_stack(common, 2);
14040
14041 /* Save return address. */
14042 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
14043
14044 copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
14045
14046 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
14047 free_stack(common, private_data_size + local_size);
14048 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
14049 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14050 }
14051
14052 if (common->accept != NULL)
14053 {
14054 SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found);
14055
14056 set_jumps(common->accept, LABEL());
14057
14058 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
14059 OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
14060
14061 allocate_stack(common, 2);
14062 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
14063 }
14064
14065 set_jumps(match, LABEL());
14066
14067 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
14068
14069 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
14070
14071 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
14072 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
14073 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14074 }
14075
14076 #undef COMPILE_BACKTRACKINGPATH
14077 #undef CURRENT_AS
14078
14079 #define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
14080 (PCRE2_JIT_INVALID_UTF)
14081
jit_compile(pcre2_code * code,sljit_u32 mode)14082 static int jit_compile(pcre2_code *code, sljit_u32 mode)
14083 {
14084 pcre2_real_code *re = (pcre2_real_code *)code;
14085 struct sljit_compiler *compiler;
14086 backtrack_common rootbacktrack;
14087 compiler_common common_data;
14088 compiler_common *common = &common_data;
14089 const sljit_u8 *tables = re->tables;
14090 void *allocator_data = &re->memctl;
14091 int private_data_size;
14092 PCRE2_SPTR ccend;
14093 executable_functions *functions;
14094 void *executable_func;
14095 sljit_uw executable_size;
14096 sljit_uw total_length;
14097 struct sljit_label *mainloop_label = NULL;
14098 struct sljit_label *continue_match_label;
14099 struct sljit_label *empty_match_found_label = NULL;
14100 struct sljit_label *empty_match_backtrack_label = NULL;
14101 struct sljit_label *reset_match_label;
14102 struct sljit_label *quit_label;
14103 struct sljit_jump *jump;
14104 struct sljit_jump *minlength_check_failed = NULL;
14105 struct sljit_jump *empty_match = NULL;
14106 struct sljit_jump *end_anchor_failed = NULL;
14107 jump_list *reqcu_not_found = NULL;
14108
14109 SLJIT_ASSERT(tables);
14110
14111 #if HAS_VIRTUAL_REGISTERS == 1
14112 SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) < 0);
14113 #elif HAS_VIRTUAL_REGISTERS == 0
14114 SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) >= 0);
14115 #else
14116 #error "Invalid value for HAS_VIRTUAL_REGISTERS"
14117 #endif
14118
14119 memset(&rootbacktrack, 0, sizeof(backtrack_common));
14120 memset(common, 0, sizeof(compiler_common));
14121 common->re = re;
14122 common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
14123 rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
14124
14125 #ifdef SUPPORT_UNICODE
14126 common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
14127 #endif /* SUPPORT_UNICODE */
14128 mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
14129
14130 common->start = rootbacktrack.cc;
14131 common->read_only_data_head = NULL;
14132 common->fcc = tables + fcc_offset;
14133 common->lcc = (sljit_sw)(tables + lcc_offset);
14134 common->mode = mode;
14135 common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
14136 common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
14137 common->nltype = NLTYPE_FIXED;
14138 switch(re->newline_convention)
14139 {
14140 case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
14141 case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
14142 case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
14143 case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
14144 case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
14145 case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
14146 default: return PCRE2_ERROR_INTERNAL;
14147 }
14148 common->nlmax = READ_CHAR_MAX;
14149 common->nlmin = 0;
14150 if (re->bsr_convention == PCRE2_BSR_UNICODE)
14151 common->bsr_nltype = NLTYPE_ANY;
14152 else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
14153 common->bsr_nltype = NLTYPE_ANYCRLF;
14154 else
14155 {
14156 #ifdef BSR_ANYCRLF
14157 common->bsr_nltype = NLTYPE_ANYCRLF;
14158 #else
14159 common->bsr_nltype = NLTYPE_ANY;
14160 #endif
14161 }
14162 common->bsr_nlmax = READ_CHAR_MAX;
14163 common->bsr_nlmin = 0;
14164 common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
14165 common->ctypes = (sljit_sw)(tables + ctypes_offset);
14166 common->name_count = re->name_count;
14167 common->name_entry_size = re->name_entry_size;
14168 common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
14169 common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
14170 #ifdef SUPPORT_UNICODE
14171 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
14172 common->utf = (re->overall_options & PCRE2_UTF) != 0;
14173 common->ucp = (re->overall_options & PCRE2_UCP) != 0;
14174 if (common->utf)
14175 {
14176 if (common->nltype == NLTYPE_ANY)
14177 common->nlmax = 0x2029;
14178 else if (common->nltype == NLTYPE_ANYCRLF)
14179 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
14180 else
14181 {
14182 /* We only care about the first newline character. */
14183 common->nlmax = common->newline & 0xff;
14184 }
14185
14186 if (common->nltype == NLTYPE_FIXED)
14187 common->nlmin = common->newline & 0xff;
14188 else
14189 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
14190
14191 if (common->bsr_nltype == NLTYPE_ANY)
14192 common->bsr_nlmax = 0x2029;
14193 else
14194 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
14195 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
14196 }
14197 else
14198 common->invalid_utf = FALSE;
14199 #endif /* SUPPORT_UNICODE */
14200 ccend = bracketend(common->start);
14201
14202 /* Calculate the local space size on the stack. */
14203 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
14204 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
14205 if (!common->optimized_cbracket)
14206 return PCRE2_ERROR_NOMEMORY;
14207 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
14208 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
14209 #else
14210 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
14211 #endif
14212
14213 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
14214 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
14215 common->capture_last_ptr = common->ovector_start;
14216 common->ovector_start += sizeof(sljit_sw);
14217 #endif
14218 if (!check_opcode_types(common, common->start, ccend))
14219 {
14220 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14221 return PCRE2_ERROR_NOMEMORY;
14222 }
14223
14224 /* Checking flags and updating ovector_start. */
14225 if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14226 {
14227 common->req_char_ptr = common->ovector_start;
14228 common->ovector_start += sizeof(sljit_sw);
14229 }
14230 if (mode != PCRE2_JIT_COMPLETE)
14231 {
14232 common->start_used_ptr = common->ovector_start;
14233 common->ovector_start += sizeof(sljit_sw);
14234 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14235 {
14236 common->hit_start = common->ovector_start;
14237 common->ovector_start += sizeof(sljit_sw);
14238 }
14239 }
14240 if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
14241 {
14242 common->match_end_ptr = common->ovector_start;
14243 common->ovector_start += sizeof(sljit_sw);
14244 }
14245 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
14246 common->control_head_ptr = 1;
14247 #endif
14248 if (common->control_head_ptr != 0)
14249 {
14250 common->control_head_ptr = common->ovector_start;
14251 common->ovector_start += sizeof(sljit_sw);
14252 }
14253 if (common->has_set_som)
14254 {
14255 /* Saving the real start pointer is necessary. */
14256 common->start_ptr = common->ovector_start;
14257 common->ovector_start += sizeof(sljit_sw);
14258 }
14259
14260 /* Aligning ovector to even number of sljit words. */
14261 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
14262 common->ovector_start += sizeof(sljit_sw);
14263
14264 if (common->start_ptr == 0)
14265 common->start_ptr = OVECTOR(0);
14266
14267 /* Capturing brackets cannot be optimized if callouts are allowed. */
14268 if (common->capture_last_ptr != 0)
14269 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
14270
14271 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
14272 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
14273
14274 total_length = ccend - common->start;
14275 common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
14276 if (!common->private_data_ptrs)
14277 {
14278 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14279 return PCRE2_ERROR_NOMEMORY;
14280 }
14281 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
14282
14283 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
14284
14285 if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
14286 detect_early_fail(common, common->start, &private_data_size, 0, 0);
14287
14288 set_private_data_ptrs(common, &private_data_size, ccend);
14289
14290 SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
14291
14292 if (private_data_size > 65536)
14293 {
14294 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14295 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14296 return PCRE2_ERROR_NOMEMORY;
14297 }
14298
14299 if (common->has_then)
14300 {
14301 common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
14302 memset(common->then_offsets, 0, total_length);
14303 set_then_offsets(common, common->start, NULL);
14304 }
14305
14306 compiler = sljit_create_compiler(allocator_data, NULL);
14307 if (!compiler)
14308 {
14309 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14310 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14311 return PCRE2_ERROR_NOMEMORY;
14312 }
14313 common->compiler = compiler;
14314
14315 /* Main pcre2_jit_exec entry. */
14316 SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);
14317 sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5, 5, SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, 0, private_data_size);
14318
14319 /* Register init. */
14320 reset_ovector(common, (re->top_bracket + 1) * 2);
14321 if (common->req_char_ptr != 0)
14322 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
14323
14324 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
14325 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
14326 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
14327 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
14328 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
14329 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
14330 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
14331 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
14332 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
14333 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
14334
14335 if (common->early_fail_start_ptr < common->early_fail_end_ptr)
14336 reset_early_fail(common);
14337
14338 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14339 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
14340 if (common->mark_ptr != 0)
14341 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
14342 if (common->control_head_ptr != 0)
14343 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
14344
14345 /* Main part of the matching */
14346 if ((re->overall_options & PCRE2_ANCHORED) == 0)
14347 {
14348 mainloop_label = mainloop_entry(common);
14349 continue_match_label = LABEL();
14350 /* Forward search if possible. */
14351 if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14352 {
14353 if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
14354 ;
14355 else if ((re->flags & PCRE2_FIRSTSET) != 0)
14356 fast_forward_first_char(common);
14357 else if ((re->flags & PCRE2_STARTLINE) != 0)
14358 fast_forward_newline(common);
14359 else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
14360 fast_forward_start_bits(common);
14361 }
14362 }
14363 else
14364 continue_match_label = LABEL();
14365
14366 if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14367 {
14368 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14369 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
14370 minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
14371 }
14372 if (common->req_char_ptr != 0)
14373 reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
14374
14375 /* Store the current STR_PTR in OVECTOR(0). */
14376 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
14377 /* Copy the limit of allowed recursions. */
14378 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
14379 if (common->capture_last_ptr != 0)
14380 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
14381 if (common->fast_forward_bc_ptr != NULL)
14382 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
14383
14384 if (common->start_ptr != OVECTOR(0))
14385 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
14386
14387 /* Copy the beginning of the string. */
14388 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14389 {
14390 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
14391 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14392 JUMPHERE(jump);
14393 }
14394 else if (mode == PCRE2_JIT_PARTIAL_HARD)
14395 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14396
14397 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
14398 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14399 {
14400 sljit_free_compiler(compiler);
14401 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14402 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14403 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14404 return PCRE2_ERROR_NOMEMORY;
14405 }
14406
14407 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14408 end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
14409
14410 if (common->might_be_empty)
14411 {
14412 empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
14413 empty_match_found_label = LABEL();
14414 }
14415
14416 common->accept_label = LABEL();
14417 if (common->accept != NULL)
14418 set_jumps(common->accept, common->accept_label);
14419
14420 /* This means we have a match. Update the ovector. */
14421 copy_ovector(common, re->top_bracket + 1);
14422 common->quit_label = common->abort_label = LABEL();
14423 if (common->quit != NULL)
14424 set_jumps(common->quit, common->quit_label);
14425 if (common->abort != NULL)
14426 set_jumps(common->abort, common->abort_label);
14427 if (minlength_check_failed != NULL)
14428 SET_LABEL(minlength_check_failed, common->abort_label);
14429
14430 sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
14431 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
14432
14433 if (common->failed_match != NULL)
14434 {
14435 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
14436 set_jumps(common->failed_match, LABEL());
14437 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14438 JUMPTO(SLJIT_JUMP, common->abort_label);
14439 }
14440
14441 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14442 JUMPHERE(end_anchor_failed);
14443
14444 if (mode != PCRE2_JIT_COMPLETE)
14445 {
14446 common->partialmatchlabel = LABEL();
14447 set_jumps(common->partialmatch, common->partialmatchlabel);
14448 return_with_partial_match(common, common->quit_label);
14449 }
14450
14451 if (common->might_be_empty)
14452 empty_match_backtrack_label = LABEL();
14453 compile_backtrackingpath(common, rootbacktrack.top);
14454 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14455 {
14456 sljit_free_compiler(compiler);
14457 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14458 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14459 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14460 return PCRE2_ERROR_NOMEMORY;
14461 }
14462
14463 SLJIT_ASSERT(rootbacktrack.prev == NULL);
14464 reset_match_label = LABEL();
14465
14466 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14467 {
14468 /* Update hit_start only in the first time. */
14469 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
14470 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14471 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
14472 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
14473 JUMPHERE(jump);
14474 }
14475
14476 /* Check we have remaining characters. */
14477 if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
14478 {
14479 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
14480 }
14481
14482 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
14483 (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
14484
14485 if ((re->overall_options & PCRE2_ANCHORED) == 0)
14486 {
14487 if (common->ff_newline_shortcut != NULL)
14488 {
14489 /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
14490 if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
14491 {
14492 if (common->match_end_ptr != 0)
14493 {
14494 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
14495 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
14496 CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
14497 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
14498 }
14499 else
14500 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
14501 }
14502 }
14503 else
14504 CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
14505 }
14506
14507 /* No more remaining characters. */
14508 if (reqcu_not_found != NULL)
14509 set_jumps(reqcu_not_found, LABEL());
14510
14511 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14512 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
14513
14514 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14515 JUMPTO(SLJIT_JUMP, common->quit_label);
14516
14517 flush_stubs(common);
14518
14519 if (common->might_be_empty)
14520 {
14521 JUMPHERE(empty_match);
14522 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
14523 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
14524 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
14525 JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
14526 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
14527 JUMPTO(SLJIT_ZERO, empty_match_found_label);
14528 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
14529 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
14530 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
14531 }
14532
14533 common->fast_forward_bc_ptr = NULL;
14534 common->early_fail_start_ptr = 0;
14535 common->early_fail_end_ptr = 0;
14536 common->currententry = common->entries;
14537 common->local_quit_available = TRUE;
14538 quit_label = common->quit_label;
14539 if (common->currententry != NULL)
14540 {
14541 /* A free bit for each private data. */
14542 common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3;
14543 SLJIT_ASSERT(common->recurse_bitset_size > 0);
14544 common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
14545
14546 if (common->recurse_bitset != NULL)
14547 {
14548 do
14549 {
14550 /* Might add new entries. */
14551 compile_recurse(common);
14552 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14553 break;
14554 flush_stubs(common);
14555 common->currententry = common->currententry->next;
14556 }
14557 while (common->currententry != NULL);
14558
14559 SLJIT_FREE(common->recurse_bitset, allocator_data);
14560 }
14561
14562 if (common->currententry != NULL)
14563 {
14564 /* The common->recurse_bitset has been freed. */
14565 SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);
14566
14567 sljit_free_compiler(compiler);
14568 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14569 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14570 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14571 return PCRE2_ERROR_NOMEMORY;
14572 }
14573 }
14574 common->local_quit_available = FALSE;
14575 common->quit_label = quit_label;
14576
14577 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
14578 /* This is a (really) rare case. */
14579 set_jumps(common->stackalloc, LABEL());
14580 /* RETURN_ADDR is not a saved register. */
14581 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14582
14583 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
14584
14585 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
14586 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
14587 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
14588 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
14589 OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
14590
14591 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize));
14592
14593 jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
14594 OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
14595 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
14596 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14597 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
14598 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
14599
14600 /* Allocation failed. */
14601 JUMPHERE(jump);
14602 /* We break the return address cache here, but this is a really rare case. */
14603 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
14604 JUMPTO(SLJIT_JUMP, common->quit_label);
14605
14606 /* Call limit reached. */
14607 set_jumps(common->calllimit, LABEL());
14608 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
14609 JUMPTO(SLJIT_JUMP, common->quit_label);
14610
14611 if (common->revertframes != NULL)
14612 {
14613 set_jumps(common->revertframes, LABEL());
14614 do_revertframes(common);
14615 }
14616 if (common->wordboundary != NULL)
14617 {
14618 set_jumps(common->wordboundary, LABEL());
14619 check_wordboundary(common, FALSE);
14620 }
14621 if (common->ucp_wordboundary != NULL)
14622 {
14623 set_jumps(common->ucp_wordboundary, LABEL());
14624 check_wordboundary(common, TRUE);
14625 }
14626 if (common->anynewline != NULL)
14627 {
14628 set_jumps(common->anynewline, LABEL());
14629 check_anynewline(common);
14630 }
14631 if (common->hspace != NULL)
14632 {
14633 set_jumps(common->hspace, LABEL());
14634 check_hspace(common);
14635 }
14636 if (common->vspace != NULL)
14637 {
14638 set_jumps(common->vspace, LABEL());
14639 check_vspace(common);
14640 }
14641 if (common->casefulcmp != NULL)
14642 {
14643 set_jumps(common->casefulcmp, LABEL());
14644 do_casefulcmp(common);
14645 }
14646 if (common->caselesscmp != NULL)
14647 {
14648 set_jumps(common->caselesscmp, LABEL());
14649 do_caselesscmp(common);
14650 }
14651 if (common->reset_match != NULL || common->restart_match != NULL)
14652 {
14653 if (common->restart_match != NULL)
14654 {
14655 set_jumps(common->restart_match, LABEL());
14656 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14657 }
14658
14659 set_jumps(common->reset_match, LABEL());
14660 do_reset_match(common, (re->top_bracket + 1) * 2);
14661 /* The value of restart_match is in TMP1. */
14662 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14663 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14664 JUMPTO(SLJIT_JUMP, reset_match_label);
14665 }
14666 #ifdef SUPPORT_UNICODE
14667 #if PCRE2_CODE_UNIT_WIDTH == 8
14668 if (common->utfreadchar != NULL)
14669 {
14670 set_jumps(common->utfreadchar, LABEL());
14671 do_utfreadchar(common);
14672 }
14673 if (common->utfreadtype8 != NULL)
14674 {
14675 set_jumps(common->utfreadtype8, LABEL());
14676 do_utfreadtype8(common);
14677 }
14678 if (common->utfpeakcharback != NULL)
14679 {
14680 set_jumps(common->utfpeakcharback, LABEL());
14681 do_utfpeakcharback(common);
14682 }
14683 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14684 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14685 if (common->utfreadchar_invalid != NULL)
14686 {
14687 set_jumps(common->utfreadchar_invalid, LABEL());
14688 do_utfreadchar_invalid(common);
14689 }
14690 if (common->utfreadnewline_invalid != NULL)
14691 {
14692 set_jumps(common->utfreadnewline_invalid, LABEL());
14693 do_utfreadnewline_invalid(common);
14694 }
14695 if (common->utfmoveback_invalid)
14696 {
14697 set_jumps(common->utfmoveback_invalid, LABEL());
14698 do_utfmoveback_invalid(common);
14699 }
14700 if (common->utfpeakcharback_invalid)
14701 {
14702 set_jumps(common->utfpeakcharback_invalid, LABEL());
14703 do_utfpeakcharback_invalid(common);
14704 }
14705 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14706 if (common->getucd != NULL)
14707 {
14708 set_jumps(common->getucd, LABEL());
14709 do_getucd(common);
14710 }
14711 if (common->getucdtype != NULL)
14712 {
14713 set_jumps(common->getucdtype, LABEL());
14714 do_getucdtype(common);
14715 }
14716 #endif /* SUPPORT_UNICODE */
14717
14718 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14719 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14720
14721 executable_func = sljit_generate_code(compiler);
14722 executable_size = sljit_get_generated_code_size(compiler);
14723 sljit_free_compiler(compiler);
14724
14725 if (executable_func == NULL)
14726 {
14727 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14728 return PCRE2_ERROR_NOMEMORY;
14729 }
14730
14731 /* Reuse the function descriptor if possible. */
14732 if (re->executable_jit != NULL)
14733 functions = (executable_functions *)re->executable_jit;
14734 else
14735 {
14736 functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14737 if (functions == NULL)
14738 {
14739 /* This case is highly unlikely since we just recently
14740 freed a lot of memory. Not impossible though. */
14741 sljit_free_code(executable_func, NULL);
14742 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14743 return PCRE2_ERROR_NOMEMORY;
14744 }
14745 memset(functions, 0, sizeof(executable_functions));
14746 functions->top_bracket = re->top_bracket + 1;
14747 functions->limit_match = re->limit_match;
14748 re->executable_jit = functions;
14749 }
14750
14751 /* Turn mode into an index. */
14752 if (mode == PCRE2_JIT_COMPLETE)
14753 mode = 0;
14754 else
14755 mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14756
14757 SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14758 functions->executable_funcs[mode] = executable_func;
14759 functions->read_only_data_heads[mode] = common->read_only_data_head;
14760 functions->executable_sizes[mode] = executable_size;
14761 return 0;
14762 }
14763
14764 #endif
14765
14766 /*************************************************
14767 * JIT compile a Regular Expression *
14768 *************************************************/
14769
14770 /* This function used JIT to convert a previously-compiled pattern into machine
14771 code.
14772
14773 Arguments:
14774 code a compiled pattern
14775 options JIT option bits
14776
14777 Returns: 0: success or (*NOJIT) was used
14778 <0: an error code
14779 */
14780
14781 #define PUBLIC_JIT_COMPILE_OPTIONS \
14782 (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14783
14784 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code * code,uint32_t options)14785 pcre2_jit_compile(pcre2_code *code, uint32_t options)
14786 {
14787 pcre2_real_code *re = (pcre2_real_code *)code;
14788 #ifdef SUPPORT_JIT
14789 executable_functions *functions;
14790 static int executable_allocator_is_working = -1;
14791 #endif
14792
14793 if (code == NULL)
14794 return PCRE2_ERROR_NULL;
14795
14796 if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14797 return PCRE2_ERROR_JIT_BADOPTION;
14798
14799 /* Support for invalid UTF was first introduced in JIT, with the option
14800 PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14801 compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14802 preferred feature, with the earlier option deprecated. However, for backward
14803 compatibility, if the earlier option is set, it forces the new option so that
14804 if JIT matching falls back to the interpreter, there is still support for
14805 invalid UTF. However, if this function has already been successfully called
14806 without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14807 non-invalid-supporting JIT code was compiled), give an error.
14808
14809 If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14810 actions are needed:
14811
14812 1. Remove the definition from pcre2.h.in and from the list in
14813 PUBLIC_JIT_COMPILE_OPTIONS above.
14814
14815 2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14816
14817 3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14818
14819 4. Delete the following short block of code. The setting of "re" and
14820 "functions" can be moved into the JIT-only block below, but if that is
14821 done, (void)re and (void)functions will be needed in the non-JIT case, to
14822 avoid compiler warnings.
14823 */
14824
14825 #ifdef SUPPORT_JIT
14826 functions = (executable_functions *)re->executable_jit;
14827 #endif
14828
14829 if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14830 {
14831 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14832 {
14833 #ifdef SUPPORT_JIT
14834 if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14835 #endif
14836 re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14837 }
14838 }
14839
14840 /* The above tests are run with and without JIT support. This means that
14841 PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14842 interpreter support) even in the absence of JIT. But now, if there is no JIT
14843 support, give an error return. */
14844
14845 #ifndef SUPPORT_JIT
14846 return PCRE2_ERROR_JIT_BADOPTION;
14847 #else /* SUPPORT_JIT */
14848
14849 /* There is JIT support. Do the necessary. */
14850
14851 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14852
14853 if (executable_allocator_is_working == -1)
14854 {
14855 /* Checks whether the executable allocator is working. This check
14856 might run multiple times in multi-threaded environments, but the
14857 result should not be affected by it. */
14858 void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
14859 if (ptr != NULL)
14860 {
14861 SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
14862 executable_allocator_is_working = 1;
14863 }
14864 else executable_allocator_is_working = 0;
14865 }
14866
14867 if (!executable_allocator_is_working)
14868 return PCRE2_ERROR_NOMEMORY;
14869
14870 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14871 options |= PCRE2_JIT_INVALID_UTF;
14872
14873 if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14874 || functions->executable_funcs[0] == NULL)) {
14875 uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14876 int result = jit_compile(code, options & ~excluded_options);
14877 if (result != 0)
14878 return result;
14879 }
14880
14881 if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14882 || functions->executable_funcs[1] == NULL)) {
14883 uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14884 int result = jit_compile(code, options & ~excluded_options);
14885 if (result != 0)
14886 return result;
14887 }
14888
14889 if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14890 || functions->executable_funcs[2] == NULL)) {
14891 uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14892 int result = jit_compile(code, options & ~excluded_options);
14893 if (result != 0)
14894 return result;
14895 }
14896
14897 return 0;
14898
14899 #endif /* SUPPORT_JIT */
14900 }
14901
14902 /* JIT compiler uses an all-in-one approach. This improves security,
14903 since the code generator functions are not exported. */
14904
14905 #define INCLUDED_FROM_PCRE2_JIT_COMPILE
14906
14907 #include "pcre2_jit_match.c"
14908 #include "pcre2_jit_misc.c"
14909
14910 /* End of pcre2_jit_compile.c */
14911