1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 This module by Zoltan Herczeg
10 Original API code Copyright (c) 1997-2012 University of Cambridge
11 New API code Copyright (c) 2016-2024 University of Cambridge
12
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
23
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
27
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45
46 #if defined(__has_feature)
47 #if __has_feature(memory_sanitizer)
48 #include <sanitizer/msan_interface.h>
49 #endif /* __has_feature(memory_sanitizer) */
50 #endif /* defined(__has_feature) */
51
52 #include "pcre2_internal.h"
53
54 #ifdef SUPPORT_JIT
55
56 /* All-in-one: Since we use the JIT compiler only from here,
57 we just include it. This way we don't need to touch the build
58 system files. */
59
60 #define SLJIT_CONFIG_AUTO 1
61 #define SLJIT_CONFIG_STATIC 1
62 #define SLJIT_VERBOSE 0
63
64 #ifdef PCRE2_DEBUG
65 #define SLJIT_DEBUG 1
66 #else
67 #define SLJIT_DEBUG 0
68 #endif
69
70 #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
71 #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
72
pcre2_jit_malloc(size_t size,void * allocator_data)73 static void * pcre2_jit_malloc(size_t size, void *allocator_data)
74 {
75 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76 return allocator->malloc(size, allocator->memory_data);
77 }
78
pcre2_jit_free(void * ptr,void * allocator_data)79 static void pcre2_jit_free(void *ptr, void *allocator_data)
80 {
81 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
82 allocator->free(ptr, allocator->memory_data);
83 }
84
85 #include "sljit/sljitLir.c"
86
87 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
88 #error Unsupported architecture
89 #endif
90
91 /* Defines for debugging purposes. */
92
93 /* 1 - Use unoptimized capturing brackets.
94 2 - Enable capture_last_ptr (includes option 1). */
95 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
96
97 /* 1 - Always have a control head. */
98 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
99
100 /* Allocate memory for the regex stack on the real machine stack.
101 Fast, but limited size. */
102 #define MACHINE_STACK_SIZE 32768
103
104 /* Growth rate for stack allocated by the OS. Should be the multiply
105 of page size. */
106 #define STACK_GROWTH_RATE 8192
107
108 /* Enable to check that the allocation could destroy temporaries. */
109 #if defined SLJIT_DEBUG && SLJIT_DEBUG
110 #define DESTROY_REGISTERS 1
111 #endif
112
113 /*
114 Short summary about the backtracking mechanism empolyed by the jit code generator:
115
116 The code generator follows the recursive nature of the PERL compatible regular
117 expressions. The basic blocks of regular expressions are condition checkers
118 whose execute different commands depending on the result of the condition check.
119 The relationship between the operators can be horizontal (concatenation) and
120 vertical (sub-expression) (See struct backtrack_common for more details).
121
122 'ab' - 'a' and 'b' regexps are concatenated
123 'a+' - 'a' is the sub-expression of the '+' operator
124
125 The condition checkers are boolean (true/false) checkers. Machine code is generated
126 for the checker itself and for the actions depending on the result of the checker.
127 The 'true' case is called as the matching path (expected path), and the other is called as
128 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
129 branches on the matching path.
130
131 Greedy star operator (*) :
132 Matching path: match happens.
133 Backtrack path: match failed.
134 Non-greedy star operator (*?) :
135 Matching path: no need to perform a match.
136 Backtrack path: match is required.
137
138 The following example shows how the code generated for a capturing bracket
139 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
140 we have the following regular expression:
141
142 A(B|C)D
143
144 The generated code will be the following:
145
146 A matching path
147 '(' matching path (pushing arguments to the stack)
148 B matching path
149 ')' matching path (pushing arguments to the stack)
150 D matching path
151 return with successful match
152
153 D backtrack path
154 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
155 B backtrack path
156 C expected path
157 jump to D matching path
158 C backtrack path
159 A backtrack path
160
161 Notice, that the order of backtrack code paths are the opposite of the fast
162 code paths. In this way the topmost value on the stack is always belong
163 to the current backtrack code path. The backtrack path must check
164 whether there is a next alternative. If so, it needs to jump back to
165 the matching path eventually. Otherwise it needs to clear out its own stack
166 frame and continue the execution on the backtrack code paths.
167 */
168
169 /*
170 Saved stack frames:
171
172 Atomic blocks and asserts require reloading the values of private data
173 when the backtrack mechanism performed. Because of OP_RECURSE, the data
174 are not necessarly known in compile time, thus we need a dynamic restore
175 mechanism.
176
177 The stack frames are stored in a chain list, and have the following format:
178 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
179
180 Thus we can restore the private data to a particular point in the stack.
181 */
182
183 typedef struct jit_arguments {
184 /* Pointers first. */
185 struct sljit_stack *stack;
186 PCRE2_SPTR str;
187 PCRE2_SPTR begin;
188 PCRE2_SPTR end;
189 pcre2_match_data *match_data;
190 PCRE2_SPTR startchar_ptr;
191 PCRE2_UCHAR *mark_ptr;
192 int (*callout)(pcre2_callout_block *, void *);
193 void *callout_data;
194 /* Everything else after. */
195 sljit_uw offset_limit;
196 sljit_u32 limit_match;
197 sljit_u32 oveccount;
198 sljit_u32 options;
199 } jit_arguments;
200
201 #define JIT_NUMBER_OF_COMPILE_MODES 3
202
203 typedef struct executable_functions {
204 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
205 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
206 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
207 sljit_u32 top_bracket;
208 sljit_u32 limit_match;
209 } executable_functions;
210
211 typedef struct jump_list {
212 struct sljit_jump *jump;
213 struct jump_list *next;
214 } jump_list;
215
216 typedef struct stub_list {
217 struct sljit_jump *start;
218 struct sljit_label *quit;
219 struct stub_list *next;
220 } stub_list;
221
222 enum frame_types {
223 no_frame = -1,
224 no_stack = -2
225 };
226
227 enum control_types {
228 type_mark = 0,
229 type_then_trap = 1
230 };
231
232 enum early_fail_types {
233 type_skip = 0,
234 type_fail = 1,
235 type_fail_range = 2
236 };
237
238 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
239
240 /* The following structure is the key data type for the recursive
241 code generator. It is allocated by compile_matchingpath, and contains
242 the arguments for compile_backtrackingpath. Must be the first member
243 of its descendants. */
244 typedef struct backtrack_common {
245 /* Backtracking path of an opcode, which falls back
246 to our opcode, if it cannot resume matching. */
247 struct backtrack_common *prev;
248 /* Backtracks for opcodes without backtracking path.
249 These opcodes are between 'prev' and the current
250 opcode, and they never resume the match. */
251 jump_list *simple_backtracks;
252 /* Internal backtracking list for block constructs
253 which contains other opcodes, such as brackets,
254 asserts, conditionals, etc. */
255 struct backtrack_common *top;
256 /* Backtracks used internally by the opcode. For component
257 opcodes, this list is also used by those opcodes without
258 backtracking path which follows the 'top' backtrack. */
259 jump_list *own_backtracks;
260 /* Opcode pointer. */
261 PCRE2_SPTR cc;
262 } backtrack_common;
263
264 typedef struct assert_backtrack {
265 backtrack_common common;
266 jump_list *condfailed;
267 /* Less than 0 if a frame is not needed. */
268 int framesize;
269 /* Points to our private memory word on the stack. */
270 int private_data_ptr;
271 /* For iterators. */
272 struct sljit_label *matchingpath;
273 } assert_backtrack;
274
275 typedef struct bracket_backtrack {
276 backtrack_common common;
277 /* Where to coninue if an alternative is successfully matched. */
278 struct sljit_label *alternative_matchingpath;
279 /* For rmin and rmax iterators. */
280 struct sljit_label *recursive_matchingpath;
281 /* For greedy ? operator. */
282 struct sljit_label *zero_matchingpath;
283 /* Contains the branches of a failed condition. */
284 union {
285 /* Both for OP_COND, OP_SCOND. */
286 jump_list *condfailed;
287 assert_backtrack *assert;
288 /* For OP_ONCE. Less than 0 if not needed. */
289 int framesize;
290 /* For brackets with >3 alternatives. */
291 struct sljit_jump *matching_mov_addr;
292 } u;
293 /* Points to our private memory word on the stack. */
294 int private_data_ptr;
295 } bracket_backtrack;
296
297 typedef struct bracketpos_backtrack {
298 backtrack_common common;
299 /* Points to our private memory word on the stack. */
300 int private_data_ptr;
301 /* Reverting stack is needed. */
302 int framesize;
303 /* Allocated stack size. */
304 int stacksize;
305 } bracketpos_backtrack;
306
307 typedef struct braminzero_backtrack {
308 backtrack_common common;
309 struct sljit_label *matchingpath;
310 } braminzero_backtrack;
311
312 typedef struct char_iterator_backtrack {
313 backtrack_common common;
314 /* Next iteration. */
315 struct sljit_label *matchingpath;
316 union {
317 jump_list *backtracks;
318 struct {
319 unsigned int othercasebit;
320 PCRE2_UCHAR chr;
321 BOOL enabled;
322 } charpos;
323 } u;
324 } char_iterator_backtrack;
325
326 typedef struct ref_iterator_backtrack {
327 backtrack_common common;
328 /* Next iteration. */
329 struct sljit_label *matchingpath;
330 } ref_iterator_backtrack;
331
332 typedef struct recurse_entry {
333 struct recurse_entry *next;
334 /* Contains the function entry label. */
335 struct sljit_label *entry_label;
336 /* Contains the function entry label. */
337 struct sljit_label *backtrack_label;
338 /* Collects the entry calls until the function is not created. */
339 jump_list *entry_calls;
340 /* Collects the backtrack calls until the function is not created. */
341 jump_list *backtrack_calls;
342 /* Points to the starting opcode. */
343 sljit_sw start;
344 } recurse_entry;
345
346 typedef struct recurse_backtrack {
347 backtrack_common common;
348 /* Return to the matching path. */
349 struct sljit_label *matchingpath;
350 /* Recursive pattern. */
351 recurse_entry *entry;
352 /* Pattern is inlined. */
353 BOOL inlined_pattern;
354 } recurse_backtrack;
355
356 typedef struct vreverse_backtrack {
357 backtrack_common common;
358 /* Return to the matching path. */
359 struct sljit_label *matchingpath;
360 } vreverse_backtrack;
361
362 #define OP_THEN_TRAP OP_TABLE_LENGTH
363
364 typedef struct then_trap_backtrack {
365 backtrack_common common;
366 /* If then_trap is not NULL, this structure contains the real
367 then_trap for the backtracking path. */
368 struct then_trap_backtrack *then_trap;
369 /* Points to the starting opcode. */
370 sljit_sw start;
371 /* Exit point for the then opcodes of this alternative. */
372 jump_list *quit;
373 /* Frame size of the current alternative. */
374 int framesize;
375 } then_trap_backtrack;
376
377 #define MAX_N_CHARS 12
378 #define MAX_DIFF_CHARS 5
379
380 typedef struct fast_forward_char_data {
381 /* Number of characters in the chars array, 255 for any character. */
382 sljit_u8 count;
383 /* Number of last UTF-8 characters in the chars array. */
384 sljit_u8 last_count;
385 /* Available characters in the current position. */
386 PCRE2_UCHAR chars[MAX_DIFF_CHARS];
387 } fast_forward_char_data;
388
389 #define MAX_CLASS_RANGE_SIZE 4
390 #define MAX_CLASS_CHARS_SIZE 3
391
392 typedef struct compiler_common {
393 /* The sljit ceneric compiler. */
394 struct sljit_compiler *compiler;
395 /* Compiled regular expression. */
396 pcre2_real_code *re;
397 /* First byte code. */
398 PCRE2_SPTR start;
399 /* Maps private data offset to each opcode. */
400 sljit_s32 *private_data_ptrs;
401 /* Chain list of read-only data ptrs. */
402 void *read_only_data_head;
403 /* Tells whether the capturing bracket is optimized. */
404 sljit_u8 *optimized_cbracket;
405 /* Tells whether the starting offset is a target of then. */
406 sljit_u8 *then_offsets;
407 /* Current position where a THEN must jump. */
408 then_trap_backtrack *then_trap;
409 /* Starting offset of private data for capturing brackets. */
410 sljit_s32 cbra_ptr;
411 /* Output vector starting point. Must be divisible by 2. */
412 sljit_s32 ovector_start;
413 /* Points to the starting character of the current match. */
414 sljit_s32 start_ptr;
415 /* Last known position of the requested byte. */
416 sljit_s32 req_char_ptr;
417 /* Head of the last recursion. */
418 sljit_s32 recursive_head_ptr;
419 /* First inspected character for partial matching.
420 (Needed for avoiding zero length partial matches.) */
421 sljit_s32 start_used_ptr;
422 /* Starting pointer for partial soft matches. */
423 sljit_s32 hit_start;
424 /* Pointer of the match end position. */
425 sljit_s32 match_end_ptr;
426 /* Points to the marked string. */
427 sljit_s32 mark_ptr;
428 /* Head of the recursive control verb management chain.
429 Each item must have a previous offset and type
430 (see control_types) values. See do_search_mark. */
431 sljit_s32 control_head_ptr;
432 /* Points to the last matched capture block index. */
433 sljit_s32 capture_last_ptr;
434 /* Fast forward skipping byte code pointer. */
435 PCRE2_SPTR fast_forward_bc_ptr;
436 /* Locals used by fast fail optimization. */
437 sljit_s32 early_fail_start_ptr;
438 sljit_s32 early_fail_end_ptr;
439 /* Variables used by recursive call generator. */
440 sljit_s32 recurse_bitset_size;
441 uint8_t *recurse_bitset;
442
443 /* Flipped and lower case tables. */
444 const sljit_u8 *fcc;
445 sljit_sw lcc;
446 /* Mode can be PCRE2_JIT_COMPLETE and others. */
447 int mode;
448 /* TRUE, when empty match is accepted for partial matching. */
449 BOOL allow_empty_partial;
450 /* TRUE, when minlength is greater than 0. */
451 BOOL might_be_empty;
452 /* \K is found in the pattern. */
453 BOOL has_set_som;
454 /* (*SKIP:arg) is found in the pattern. */
455 BOOL has_skip_arg;
456 /* (*THEN) is found in the pattern. */
457 BOOL has_then;
458 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
459 BOOL has_skip_in_assert_back;
460 /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
461 BOOL local_quit_available;
462 /* Currently in a positive assertion. */
463 BOOL in_positive_assertion;
464 /* Newline control. */
465 int nltype;
466 sljit_u32 nlmax;
467 sljit_u32 nlmin;
468 int newline;
469 int bsr_nltype;
470 sljit_u32 bsr_nlmax;
471 sljit_u32 bsr_nlmin;
472 /* Dollar endonly. */
473 int endonly;
474 /* Tables. */
475 sljit_sw ctypes;
476 /* Named capturing brackets. */
477 PCRE2_SPTR name_table;
478 sljit_sw name_count;
479 sljit_sw name_entry_size;
480
481 /* Labels and jump lists. */
482 struct sljit_label *partialmatchlabel;
483 struct sljit_label *quit_label;
484 struct sljit_label *abort_label;
485 struct sljit_label *accept_label;
486 struct sljit_label *ff_newline_shortcut;
487 stub_list *stubs;
488 recurse_entry *entries;
489 recurse_entry *currententry;
490 jump_list *partialmatch;
491 jump_list *quit;
492 jump_list *positive_assertion_quit;
493 jump_list *abort;
494 jump_list *failed_match;
495 jump_list *accept;
496 jump_list *calllimit;
497 jump_list *stackalloc;
498 jump_list *revertframes;
499 jump_list *wordboundary;
500 jump_list *ucp_wordboundary;
501 jump_list *anynewline;
502 jump_list *hspace;
503 jump_list *vspace;
504 jump_list *casefulcmp;
505 jump_list *caselesscmp;
506 jump_list *reset_match;
507 /* Same as reset_match, but resets the STR_PTR as well. */
508 jump_list *restart_match;
509 BOOL unset_backref;
510 BOOL alt_circumflex;
511 #ifdef SUPPORT_UNICODE
512 BOOL utf;
513 BOOL invalid_utf;
514 BOOL ucp;
515 /* Points to saving area for iref. */
516 sljit_s32 iref_ptr;
517 jump_list *getucd;
518 jump_list *getucdtype;
519 #if PCRE2_CODE_UNIT_WIDTH == 8
520 jump_list *utfreadchar;
521 jump_list *utfreadtype8;
522 jump_list *utfpeakcharback;
523 #endif
524 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
525 jump_list *utfreadchar_invalid;
526 jump_list *utfreadnewline_invalid;
527 jump_list *utfmoveback_invalid;
528 jump_list *utfpeakcharback_invalid;
529 #endif
530 #endif /* SUPPORT_UNICODE */
531 } compiler_common;
532
533 /* For byte_sequence_compare. */
534
535 typedef struct compare_context {
536 int length;
537 int sourcereg;
538 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
539 int ucharptr;
540 union {
541 sljit_s32 asint;
542 sljit_u16 asushort;
543 #if PCRE2_CODE_UNIT_WIDTH == 8
544 sljit_u8 asbyte;
545 sljit_u8 asuchars[4];
546 #elif PCRE2_CODE_UNIT_WIDTH == 16
547 sljit_u16 asuchars[2];
548 #elif PCRE2_CODE_UNIT_WIDTH == 32
549 sljit_u32 asuchars[1];
550 #endif
551 } c;
552 union {
553 sljit_s32 asint;
554 sljit_u16 asushort;
555 #if PCRE2_CODE_UNIT_WIDTH == 8
556 sljit_u8 asbyte;
557 sljit_u8 asuchars[4];
558 #elif PCRE2_CODE_UNIT_WIDTH == 16
559 sljit_u16 asuchars[2];
560 #elif PCRE2_CODE_UNIT_WIDTH == 32
561 sljit_u32 asuchars[1];
562 #endif
563 } oc;
564 #endif
565 } compare_context;
566
567 /* Undefine sljit macros. */
568 #undef CMP
569
570 /* Used for accessing the elements of the stack. */
571 #define STACK(i) ((i) * SSIZE_OF(sw))
572
573 #ifdef SLJIT_PREF_SHIFT_REG
574 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
575 /* Nothing. */
576 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
577 #define SHIFT_REG_IS_R3
578 #else
579 #error "Unsupported shift register"
580 #endif
581 #endif
582
583 #define TMP1 SLJIT_R0
584 #ifdef SHIFT_REG_IS_R3
585 #define TMP2 SLJIT_R3
586 #define TMP3 SLJIT_R2
587 #else
588 #define TMP2 SLJIT_R2
589 #define TMP3 SLJIT_R3
590 #endif
591 #define STR_PTR SLJIT_R1
592 #define STR_END SLJIT_S0
593 #define STACK_TOP SLJIT_S1
594 #define STACK_LIMIT SLJIT_S2
595 #define COUNT_MATCH SLJIT_S3
596 #define ARGUMENTS SLJIT_S4
597 #define RETURN_ADDR SLJIT_R4
598
599 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
600 #define HAS_VIRTUAL_REGISTERS 1
601 #else
602 #define HAS_VIRTUAL_REGISTERS 0
603 #endif
604
605 /* Local space layout. */
606 /* These two locals can be used by the current opcode. */
607 #define LOCALS0 (0 * sizeof(sljit_sw))
608 #define LOCALS1 (1 * sizeof(sljit_sw))
609 /* Two local variables for possessive quantifiers (char1 cannot use them). */
610 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
611 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
612 /* Max limit of recursions. */
613 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
614 /* The output vector is stored on the stack, and contains pointers
615 to characters. The vector data is divided into two groups: the first
616 group contains the start / end character pointers, and the second is
617 the start pointers when the end of the capturing group has not yet reached. */
618 #define OVECTOR_START (common->ovector_start)
619 #define OVECTOR(i) (OVECTOR_START + (i) * SSIZE_OF(sw))
620 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * SSIZE_OF(sw))
621 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
622
623 #if PCRE2_CODE_UNIT_WIDTH == 8
624 #define MOV_UCHAR SLJIT_MOV_U8
625 #define IN_UCHARS(x) (x)
626 #elif PCRE2_CODE_UNIT_WIDTH == 16
627 #define MOV_UCHAR SLJIT_MOV_U16
628 #define UCHAR_SHIFT (1)
629 #define IN_UCHARS(x) ((x) * 2)
630 #elif PCRE2_CODE_UNIT_WIDTH == 32
631 #define MOV_UCHAR SLJIT_MOV_U32
632 #define UCHAR_SHIFT (2)
633 #define IN_UCHARS(x) ((x) * 4)
634 #else
635 #error Unsupported compiling mode
636 #endif
637
638 /* Shortcuts. */
639 #define DEFINE_COMPILER \
640 struct sljit_compiler *compiler = common->compiler
641 #define OP1(op, dst, dstw, src, srcw) \
642 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
643 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
644 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
645 #define OP2U(op, src1, src1w, src2, src2w) \
646 sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w))
647 #define OP_SRC(op, src, srcw) \
648 sljit_emit_op_src(compiler, (op), (src), (srcw))
649 #define LABEL() \
650 sljit_emit_label(compiler)
651 #define JUMP(type) \
652 sljit_emit_jump(compiler, (type))
653 #define JUMPTO(type, label) \
654 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
655 #define JUMPHERE(jump) \
656 sljit_set_label((jump), sljit_emit_label(compiler))
657 #define SET_LABEL(jump, label) \
658 sljit_set_label((jump), (label))
659 #define CMP(type, src1, src1w, src2, src2w) \
660 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
661 #define CMPTO(type, src1, src1w, src2, src2w, label) \
662 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
663 #define OP_FLAGS(op, dst, dstw, type) \
664 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
665 #define SELECT(type, dst_reg, src1, src1w, src2_reg) \
666 sljit_emit_select(compiler, (type), (dst_reg), (src1), (src1w), (src2_reg))
667 #define GET_LOCAL_BASE(dst, dstw, offset) \
668 sljit_get_local_base(compiler, (dst), (dstw), (offset))
669
670 #define READ_CHAR_MAX 0x7fffffff
671
672 #define INVALID_UTF_CHAR -1
673 #define UNASSIGNED_UTF_CHAR 888
674
675 #if defined SUPPORT_UNICODE
676 #if PCRE2_CODE_UNIT_WIDTH == 8
677
678 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
679 { \
680 if (ptr[0] <= 0x7f) \
681 c = *ptr++; \
682 else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
683 { \
684 c = ptr[1] - 0x80; \
685 \
686 if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
687 { \
688 c |= (ptr[0] - 0xc0) << 6; \
689 ptr += 2; \
690 } \
691 else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
692 { \
693 c = c << 6 | (ptr[2] - 0x80); \
694 \
695 if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
696 { \
697 c |= (ptr[0] - 0xe0) << 12; \
698 ptr += 3; \
699 \
700 if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
701 { \
702 invalid_action; \
703 } \
704 } \
705 else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
706 { \
707 c = c << 6 | (ptr[3] - 0x80); \
708 \
709 if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
710 { \
711 c |= (ptr[0] - 0xf0) << 18; \
712 ptr += 4; \
713 \
714 if (c >= 0x110000 || c < 0x10000) \
715 { \
716 invalid_action; \
717 } \
718 } \
719 else \
720 { \
721 invalid_action; \
722 } \
723 } \
724 else \
725 { \
726 invalid_action; \
727 } \
728 } \
729 else \
730 { \
731 invalid_action; \
732 } \
733 } \
734 else \
735 { \
736 invalid_action; \
737 } \
738 }
739
740 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
741 { \
742 c = ptr[-1]; \
743 if (c <= 0x7f) \
744 ptr--; \
745 else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
746 { \
747 c -= 0x80; \
748 \
749 if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
750 { \
751 c |= (ptr[-2] - 0xc0) << 6; \
752 ptr -= 2; \
753 } \
754 else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
755 { \
756 c = c << 6 | (ptr[-2] - 0x80); \
757 \
758 if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
759 { \
760 c |= (ptr[-3] - 0xe0) << 12; \
761 ptr -= 3; \
762 \
763 if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
764 { \
765 invalid_action; \
766 } \
767 } \
768 else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
769 { \
770 c = c << 6 | (ptr[-3] - 0x80); \
771 \
772 if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
773 { \
774 c |= (ptr[-4] - 0xf0) << 18; \
775 ptr -= 4; \
776 \
777 if (c >= 0x110000 || c < 0x10000) \
778 { \
779 invalid_action; \
780 } \
781 } \
782 else \
783 { \
784 invalid_action; \
785 } \
786 } \
787 else \
788 { \
789 invalid_action; \
790 } \
791 } \
792 else \
793 { \
794 invalid_action; \
795 } \
796 } \
797 else \
798 { \
799 invalid_action; \
800 } \
801 }
802
803 #elif PCRE2_CODE_UNIT_WIDTH == 16
804
805 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
806 { \
807 if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
808 c = *ptr++; \
809 else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
810 { \
811 c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
812 ptr += 2; \
813 } \
814 else \
815 { \
816 invalid_action; \
817 } \
818 }
819
820 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
821 { \
822 c = ptr[-1]; \
823 if (c < 0xd800 || c >= 0xe000) \
824 ptr--; \
825 else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
826 { \
827 c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
828 ptr -= 2; \
829 } \
830 else \
831 { \
832 invalid_action; \
833 } \
834 }
835
836
837 #elif PCRE2_CODE_UNIT_WIDTH == 32
838
839 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
840 { \
841 if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
842 c = *ptr++; \
843 else \
844 { \
845 invalid_action; \
846 } \
847 }
848
849 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
850 { \
851 c = ptr[-1]; \
852 if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
853 ptr--; \
854 else \
855 { \
856 invalid_action; \
857 } \
858 }
859
860 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
861 #endif /* SUPPORT_UNICODE */
862
bracketend(PCRE2_SPTR cc)863 static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
864 {
865 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
866 do cc += GET(cc, 1); while (*cc == OP_ALT);
867 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
868 cc += 1 + LINK_SIZE;
869 return cc;
870 }
871
no_alternatives(PCRE2_SPTR cc)872 static int no_alternatives(PCRE2_SPTR cc)
873 {
874 int count = 0;
875 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
876 do
877 {
878 cc += GET(cc, 1);
879 count++;
880 }
881 while (*cc == OP_ALT);
882 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
883 return count;
884 }
885
find_vreverse(PCRE2_SPTR cc)886 static BOOL find_vreverse(PCRE2_SPTR cc)
887 {
888 SLJIT_ASSERT(*cc == OP_ASSERTBACK || *cc == OP_ASSERTBACK_NOT || *cc == OP_ASSERTBACK_NA);
889
890 do
891 {
892 if (cc[1 + LINK_SIZE] == OP_VREVERSE)
893 return TRUE;
894 cc += GET(cc, 1);
895 }
896 while (*cc == OP_ALT);
897
898 return FALSE;
899 }
900
901 /* Functions whose might need modification for all new supported opcodes:
902 next_opcode
903 check_opcode_types
904 set_private_data_ptrs
905 get_framesize
906 init_frame
907 get_recurse_data_length
908 copy_recurse_data
909 compile_matchingpath
910 compile_backtrackingpath
911 */
912
next_opcode(compiler_common * common,PCRE2_SPTR cc)913 static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
914 {
915 SLJIT_UNUSED_ARG(common);
916 switch(*cc)
917 {
918 case OP_SOD:
919 case OP_SOM:
920 case OP_SET_SOM:
921 case OP_NOT_WORD_BOUNDARY:
922 case OP_WORD_BOUNDARY:
923 case OP_NOT_DIGIT:
924 case OP_DIGIT:
925 case OP_NOT_WHITESPACE:
926 case OP_WHITESPACE:
927 case OP_NOT_WORDCHAR:
928 case OP_WORDCHAR:
929 case OP_ANY:
930 case OP_ALLANY:
931 case OP_NOTPROP:
932 case OP_PROP:
933 case OP_ANYNL:
934 case OP_NOT_HSPACE:
935 case OP_HSPACE:
936 case OP_NOT_VSPACE:
937 case OP_VSPACE:
938 case OP_EXTUNI:
939 case OP_EODN:
940 case OP_EOD:
941 case OP_CIRC:
942 case OP_CIRCM:
943 case OP_DOLL:
944 case OP_DOLLM:
945 case OP_CRSTAR:
946 case OP_CRMINSTAR:
947 case OP_CRPLUS:
948 case OP_CRMINPLUS:
949 case OP_CRQUERY:
950 case OP_CRMINQUERY:
951 case OP_CRRANGE:
952 case OP_CRMINRANGE:
953 case OP_CRPOSSTAR:
954 case OP_CRPOSPLUS:
955 case OP_CRPOSQUERY:
956 case OP_CRPOSRANGE:
957 case OP_CLASS:
958 case OP_NCLASS:
959 case OP_REF:
960 case OP_REFI:
961 case OP_DNREF:
962 case OP_DNREFI:
963 case OP_RECURSE:
964 case OP_CALLOUT:
965 case OP_ALT:
966 case OP_KET:
967 case OP_KETRMAX:
968 case OP_KETRMIN:
969 case OP_KETRPOS:
970 case OP_REVERSE:
971 case OP_VREVERSE:
972 case OP_ASSERT:
973 case OP_ASSERT_NOT:
974 case OP_ASSERTBACK:
975 case OP_ASSERTBACK_NOT:
976 case OP_ASSERT_NA:
977 case OP_ASSERTBACK_NA:
978 case OP_ONCE:
979 case OP_SCRIPT_RUN:
980 case OP_BRA:
981 case OP_BRAPOS:
982 case OP_CBRA:
983 case OP_CBRAPOS:
984 case OP_COND:
985 case OP_SBRA:
986 case OP_SBRAPOS:
987 case OP_SCBRA:
988 case OP_SCBRAPOS:
989 case OP_SCOND:
990 case OP_CREF:
991 case OP_DNCREF:
992 case OP_RREF:
993 case OP_DNRREF:
994 case OP_FALSE:
995 case OP_TRUE:
996 case OP_BRAZERO:
997 case OP_BRAMINZERO:
998 case OP_BRAPOSZERO:
999 case OP_PRUNE:
1000 case OP_SKIP:
1001 case OP_THEN:
1002 case OP_COMMIT:
1003 case OP_FAIL:
1004 case OP_ACCEPT:
1005 case OP_ASSERT_ACCEPT:
1006 case OP_CLOSE:
1007 case OP_SKIPZERO:
1008 case OP_NOT_UCP_WORD_BOUNDARY:
1009 case OP_UCP_WORD_BOUNDARY:
1010 return cc + PRIV(OP_lengths)[*cc];
1011
1012 case OP_CHAR:
1013 case OP_CHARI:
1014 case OP_NOT:
1015 case OP_NOTI:
1016 case OP_STAR:
1017 case OP_MINSTAR:
1018 case OP_PLUS:
1019 case OP_MINPLUS:
1020 case OP_QUERY:
1021 case OP_MINQUERY:
1022 case OP_UPTO:
1023 case OP_MINUPTO:
1024 case OP_EXACT:
1025 case OP_POSSTAR:
1026 case OP_POSPLUS:
1027 case OP_POSQUERY:
1028 case OP_POSUPTO:
1029 case OP_STARI:
1030 case OP_MINSTARI:
1031 case OP_PLUSI:
1032 case OP_MINPLUSI:
1033 case OP_QUERYI:
1034 case OP_MINQUERYI:
1035 case OP_UPTOI:
1036 case OP_MINUPTOI:
1037 case OP_EXACTI:
1038 case OP_POSSTARI:
1039 case OP_POSPLUSI:
1040 case OP_POSQUERYI:
1041 case OP_POSUPTOI:
1042 case OP_NOTSTAR:
1043 case OP_NOTMINSTAR:
1044 case OP_NOTPLUS:
1045 case OP_NOTMINPLUS:
1046 case OP_NOTQUERY:
1047 case OP_NOTMINQUERY:
1048 case OP_NOTUPTO:
1049 case OP_NOTMINUPTO:
1050 case OP_NOTEXACT:
1051 case OP_NOTPOSSTAR:
1052 case OP_NOTPOSPLUS:
1053 case OP_NOTPOSQUERY:
1054 case OP_NOTPOSUPTO:
1055 case OP_NOTSTARI:
1056 case OP_NOTMINSTARI:
1057 case OP_NOTPLUSI:
1058 case OP_NOTMINPLUSI:
1059 case OP_NOTQUERYI:
1060 case OP_NOTMINQUERYI:
1061 case OP_NOTUPTOI:
1062 case OP_NOTMINUPTOI:
1063 case OP_NOTEXACTI:
1064 case OP_NOTPOSSTARI:
1065 case OP_NOTPOSPLUSI:
1066 case OP_NOTPOSQUERYI:
1067 case OP_NOTPOSUPTOI:
1068 cc += PRIV(OP_lengths)[*cc];
1069 #ifdef SUPPORT_UNICODE
1070 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1071 #endif
1072 return cc;
1073
1074 /* Special cases. */
1075 case OP_TYPESTAR:
1076 case OP_TYPEMINSTAR:
1077 case OP_TYPEPLUS:
1078 case OP_TYPEMINPLUS:
1079 case OP_TYPEQUERY:
1080 case OP_TYPEMINQUERY:
1081 case OP_TYPEUPTO:
1082 case OP_TYPEMINUPTO:
1083 case OP_TYPEEXACT:
1084 case OP_TYPEPOSSTAR:
1085 case OP_TYPEPOSPLUS:
1086 case OP_TYPEPOSQUERY:
1087 case OP_TYPEPOSUPTO:
1088 return cc + PRIV(OP_lengths)[*cc] - 1;
1089
1090 case OP_ANYBYTE:
1091 #ifdef SUPPORT_UNICODE
1092 if (common->utf) return NULL;
1093 #endif
1094 return cc + 1;
1095
1096 case OP_CALLOUT_STR:
1097 return cc + GET(cc, 1 + 2*LINK_SIZE);
1098
1099 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1100 case OP_XCLASS:
1101 return cc + GET(cc, 1);
1102 #endif
1103
1104 case OP_MARK:
1105 case OP_COMMIT_ARG:
1106 case OP_PRUNE_ARG:
1107 case OP_SKIP_ARG:
1108 case OP_THEN_ARG:
1109 return cc + 1 + 2 + cc[1];
1110
1111 default:
1112 SLJIT_UNREACHABLE();
1113 return NULL;
1114 }
1115 }
1116
check_opcode_types(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend)1117 static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1118 {
1119 int count;
1120 PCRE2_SPTR slot;
1121 PCRE2_SPTR assert_back_end = cc - 1;
1122 PCRE2_SPTR assert_na_end = cc - 1;
1123
1124 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1125 while (cc < ccend)
1126 {
1127 switch(*cc)
1128 {
1129 case OP_SET_SOM:
1130 common->has_set_som = TRUE;
1131 common->might_be_empty = TRUE;
1132 cc += 1;
1133 break;
1134
1135 case OP_REFI:
1136 #ifdef SUPPORT_UNICODE
1137 if (common->iref_ptr == 0)
1138 {
1139 common->iref_ptr = common->ovector_start;
1140 common->ovector_start += 3 * sizeof(sljit_sw);
1141 }
1142 #endif /* SUPPORT_UNICODE */
1143 /* Fall through. */
1144 case OP_REF:
1145 common->optimized_cbracket[GET2(cc, 1)] = 0;
1146 cc += 1 + IMM2_SIZE;
1147 break;
1148
1149 case OP_ASSERT_NA:
1150 case OP_ASSERTBACK_NA:
1151 slot = bracketend(cc);
1152 if (slot > assert_na_end)
1153 assert_na_end = slot;
1154 cc += 1 + LINK_SIZE;
1155 break;
1156
1157 case OP_CBRAPOS:
1158 case OP_SCBRAPOS:
1159 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1160 cc += 1 + LINK_SIZE + IMM2_SIZE;
1161 break;
1162
1163 case OP_COND:
1164 case OP_SCOND:
1165 /* Only AUTO_CALLOUT can insert this opcode. We do
1166 not intend to support this case. */
1167 if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1168 return FALSE;
1169 cc += 1 + LINK_SIZE;
1170 break;
1171
1172 case OP_CREF:
1173 common->optimized_cbracket[GET2(cc, 1)] = 0;
1174 cc += 1 + IMM2_SIZE;
1175 break;
1176
1177 case OP_DNREF:
1178 case OP_DNREFI:
1179 case OP_DNCREF:
1180 count = GET2(cc, 1 + IMM2_SIZE);
1181 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1182 while (count-- > 0)
1183 {
1184 common->optimized_cbracket[GET2(slot, 0)] = 0;
1185 slot += common->name_entry_size;
1186 }
1187 cc += 1 + 2 * IMM2_SIZE;
1188 break;
1189
1190 case OP_RECURSE:
1191 /* Set its value only once. */
1192 if (common->recursive_head_ptr == 0)
1193 {
1194 common->recursive_head_ptr = common->ovector_start;
1195 common->ovector_start += sizeof(sljit_sw);
1196 }
1197 cc += 1 + LINK_SIZE;
1198 break;
1199
1200 case OP_CALLOUT:
1201 case OP_CALLOUT_STR:
1202 if (common->capture_last_ptr == 0)
1203 {
1204 common->capture_last_ptr = common->ovector_start;
1205 common->ovector_start += sizeof(sljit_sw);
1206 }
1207 cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1208 break;
1209
1210 case OP_ASSERTBACK:
1211 slot = bracketend(cc);
1212 if (slot > assert_back_end)
1213 assert_back_end = slot;
1214 cc += 1 + LINK_SIZE;
1215 break;
1216
1217 case OP_THEN_ARG:
1218 common->has_then = TRUE;
1219 common->control_head_ptr = 1;
1220 /* Fall through. */
1221
1222 case OP_COMMIT_ARG:
1223 case OP_PRUNE_ARG:
1224 if (cc < assert_na_end)
1225 return FALSE;
1226 /* Fall through */
1227 case OP_MARK:
1228 if (common->mark_ptr == 0)
1229 {
1230 common->mark_ptr = common->ovector_start;
1231 common->ovector_start += sizeof(sljit_sw);
1232 }
1233 cc += 1 + 2 + cc[1];
1234 break;
1235
1236 case OP_THEN:
1237 common->has_then = TRUE;
1238 common->control_head_ptr = 1;
1239 cc += 1;
1240 break;
1241
1242 case OP_SKIP:
1243 if (cc < assert_back_end)
1244 common->has_skip_in_assert_back = TRUE;
1245 if (cc < assert_na_end)
1246 return FALSE;
1247 cc += 1;
1248 break;
1249
1250 case OP_SKIP_ARG:
1251 common->control_head_ptr = 1;
1252 common->has_skip_arg = TRUE;
1253 if (cc < assert_back_end)
1254 common->has_skip_in_assert_back = TRUE;
1255 if (cc < assert_na_end)
1256 return FALSE;
1257 cc += 1 + 2 + cc[1];
1258 break;
1259
1260 case OP_PRUNE:
1261 case OP_COMMIT:
1262 case OP_ASSERT_ACCEPT:
1263 if (cc < assert_na_end)
1264 return FALSE;
1265 cc++;
1266 break;
1267
1268 default:
1269 cc = next_opcode(common, cc);
1270 if (cc == NULL)
1271 return FALSE;
1272 break;
1273 }
1274 }
1275 return TRUE;
1276 }
1277
1278 #define EARLY_FAIL_ENHANCE_MAX (3 + 3)
1279
1280 /*
1281 Start represent the number of allowed early fail enhancements
1282
1283 The 0-2 values has a special meaning:
1284 0 - skip is allowed for all iterators
1285 1 - fail is allowed for all iterators
1286 2 - fail is allowed for greedy iterators
1287 3 - only ranged early fail is allowed
1288 >3 - (start - 3) number of remaining ranged early fails allowed
1289
1290 return: the updated value of start
1291 */
detect_early_fail(compiler_common * common,PCRE2_SPTR cc,int * private_data_start,sljit_s32 depth,int start)1292 static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc,
1293 int *private_data_start, sljit_s32 depth, int start)
1294 {
1295 PCRE2_SPTR begin = cc;
1296 PCRE2_SPTR next_alt;
1297 PCRE2_SPTR end;
1298 PCRE2_SPTR accelerated_start;
1299 int result = 0;
1300 int count, prev_count;
1301
1302 SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1303 SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1304 SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1305
1306 next_alt = cc + GET(cc, 1);
1307 if (*next_alt == OP_ALT && start < 1)
1308 start = 1;
1309
1310 do
1311 {
1312 count = start;
1313 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1314
1315 while (TRUE)
1316 {
1317 accelerated_start = NULL;
1318
1319 switch(*cc)
1320 {
1321 case OP_SOD:
1322 case OP_SOM:
1323 case OP_SET_SOM:
1324 case OP_NOT_WORD_BOUNDARY:
1325 case OP_WORD_BOUNDARY:
1326 case OP_EODN:
1327 case OP_EOD:
1328 case OP_CIRC:
1329 case OP_CIRCM:
1330 case OP_DOLL:
1331 case OP_DOLLM:
1332 case OP_NOT_UCP_WORD_BOUNDARY:
1333 case OP_UCP_WORD_BOUNDARY:
1334 /* Zero width assertions. */
1335 cc++;
1336 continue;
1337
1338 case OP_NOT_DIGIT:
1339 case OP_DIGIT:
1340 case OP_NOT_WHITESPACE:
1341 case OP_WHITESPACE:
1342 case OP_NOT_WORDCHAR:
1343 case OP_WORDCHAR:
1344 case OP_ANY:
1345 case OP_ALLANY:
1346 case OP_ANYBYTE:
1347 case OP_NOT_HSPACE:
1348 case OP_HSPACE:
1349 case OP_NOT_VSPACE:
1350 case OP_VSPACE:
1351 if (count < 1)
1352 count = 1;
1353 cc++;
1354 continue;
1355
1356 case OP_ANYNL:
1357 case OP_EXTUNI:
1358 if (count < 3)
1359 count = 3;
1360 cc++;
1361 continue;
1362
1363 case OP_NOTPROP:
1364 case OP_PROP:
1365 if (count < 1)
1366 count = 1;
1367 cc += 1 + 2;
1368 continue;
1369
1370 case OP_CHAR:
1371 case OP_CHARI:
1372 case OP_NOT:
1373 case OP_NOTI:
1374 if (count < 1)
1375 count = 1;
1376 cc += 2;
1377 #ifdef SUPPORT_UNICODE
1378 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1379 #endif
1380 continue;
1381
1382 case OP_TYPEMINSTAR:
1383 case OP_TYPEMINPLUS:
1384 if (count == 2)
1385 count = 3;
1386 /* Fall through */
1387
1388 case OP_TYPESTAR:
1389 case OP_TYPEPLUS:
1390 case OP_TYPEPOSSTAR:
1391 case OP_TYPEPOSPLUS:
1392 /* The type or prop opcode is skipped in the next iteration. */
1393 cc += 1;
1394
1395 if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1396 {
1397 accelerated_start = cc - 1;
1398 break;
1399 }
1400
1401 if (count < 3)
1402 count = 3;
1403 continue;
1404
1405 case OP_TYPEEXACT:
1406 if (count < 1)
1407 count = 1;
1408 cc += 1 + IMM2_SIZE;
1409 continue;
1410
1411 case OP_TYPEUPTO:
1412 case OP_TYPEMINUPTO:
1413 case OP_TYPEPOSUPTO:
1414 cc += IMM2_SIZE;
1415 /* Fall through */
1416
1417 case OP_TYPEQUERY:
1418 case OP_TYPEMINQUERY:
1419 case OP_TYPEPOSQUERY:
1420 /* The type or prop opcode is skipped in the next iteration. */
1421 if (count < 3)
1422 count = 3;
1423 cc += 1;
1424 continue;
1425
1426 case OP_MINSTAR:
1427 case OP_MINPLUS:
1428 case OP_MINSTARI:
1429 case OP_MINPLUSI:
1430 case OP_NOTMINSTAR:
1431 case OP_NOTMINPLUS:
1432 case OP_NOTMINSTARI:
1433 case OP_NOTMINPLUSI:
1434 if (count == 2)
1435 count = 3;
1436 /* Fall through */
1437
1438 case OP_STAR:
1439 case OP_PLUS:
1440 case OP_POSSTAR:
1441 case OP_POSPLUS:
1442
1443 case OP_STARI:
1444 case OP_PLUSI:
1445 case OP_POSSTARI:
1446 case OP_POSPLUSI:
1447
1448 case OP_NOTSTAR:
1449 case OP_NOTPLUS:
1450 case OP_NOTPOSSTAR:
1451 case OP_NOTPOSPLUS:
1452
1453 case OP_NOTSTARI:
1454 case OP_NOTPLUSI:
1455 case OP_NOTPOSSTARI:
1456 case OP_NOTPOSPLUSI:
1457 accelerated_start = cc;
1458 cc += 2;
1459 #ifdef SUPPORT_UNICODE
1460 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1461 #endif
1462 break;
1463
1464 case OP_EXACT:
1465 if (count < 1)
1466 count = 1;
1467 cc += 2 + IMM2_SIZE;
1468 #ifdef SUPPORT_UNICODE
1469 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1470 #endif
1471 continue;
1472
1473 case OP_UPTO:
1474 case OP_MINUPTO:
1475 case OP_POSUPTO:
1476 case OP_UPTOI:
1477 case OP_MINUPTOI:
1478 case OP_EXACTI:
1479 case OP_POSUPTOI:
1480 case OP_NOTUPTO:
1481 case OP_NOTMINUPTO:
1482 case OP_NOTEXACT:
1483 case OP_NOTPOSUPTO:
1484 case OP_NOTUPTOI:
1485 case OP_NOTMINUPTOI:
1486 case OP_NOTEXACTI:
1487 case OP_NOTPOSUPTOI:
1488 cc += IMM2_SIZE;
1489 /* Fall through */
1490
1491 case OP_QUERY:
1492 case OP_MINQUERY:
1493 case OP_POSQUERY:
1494 case OP_QUERYI:
1495 case OP_MINQUERYI:
1496 case OP_POSQUERYI:
1497 case OP_NOTQUERY:
1498 case OP_NOTMINQUERY:
1499 case OP_NOTPOSQUERY:
1500 case OP_NOTQUERYI:
1501 case OP_NOTMINQUERYI:
1502 case OP_NOTPOSQUERYI:
1503 if (count < 3)
1504 count = 3;
1505 cc += 2;
1506 #ifdef SUPPORT_UNICODE
1507 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1508 #endif
1509 continue;
1510
1511 case OP_CLASS:
1512 case OP_NCLASS:
1513 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1514 case OP_XCLASS:
1515 accelerated_start = cc;
1516 cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1517 #else
1518 accelerated_start = cc;
1519 cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1520 #endif
1521
1522 switch (*cc)
1523 {
1524 case OP_CRMINSTAR:
1525 case OP_CRMINPLUS:
1526 if (count == 2)
1527 count = 3;
1528 /* Fall through */
1529
1530 case OP_CRSTAR:
1531 case OP_CRPLUS:
1532 case OP_CRPOSSTAR:
1533 case OP_CRPOSPLUS:
1534 cc++;
1535 break;
1536
1537 case OP_CRRANGE:
1538 case OP_CRMINRANGE:
1539 case OP_CRPOSRANGE:
1540 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
1541 {
1542 /* Exact repeat. */
1543 cc += 1 + 2 * IMM2_SIZE;
1544 if (count < 1)
1545 count = 1;
1546 continue;
1547 }
1548
1549 cc += 2 * IMM2_SIZE;
1550 /* Fall through */
1551 case OP_CRQUERY:
1552 case OP_CRMINQUERY:
1553 case OP_CRPOSQUERY:
1554 cc++;
1555 if (count < 3)
1556 count = 3;
1557 continue;
1558
1559 default:
1560 /* No repeat. */
1561 if (count < 1)
1562 count = 1;
1563 continue;
1564 }
1565 break;
1566
1567 case OP_BRA:
1568 case OP_CBRA:
1569 prev_count = count;
1570 if (count < 1)
1571 count = 1;
1572
1573 if (depth >= 4)
1574 break;
1575
1576 if (count < 3 && cc[GET(cc, 1)] == OP_ALT)
1577 count = 3;
1578
1579 end = bracketend(cc);
1580 if (end[-1 - LINK_SIZE] != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
1581 break;
1582
1583 prev_count = detect_early_fail(common, cc, private_data_start, depth + 1, prev_count);
1584
1585 if (prev_count > count)
1586 count = prev_count;
1587
1588 if (PRIVATE_DATA(cc) != 0)
1589 common->private_data_ptrs[begin - common->start] = 1;
1590
1591 if (count < EARLY_FAIL_ENHANCE_MAX)
1592 {
1593 cc = end;
1594 continue;
1595 }
1596 break;
1597
1598 case OP_KET:
1599 SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1600 if (cc >= next_alt)
1601 break;
1602 cc += 1 + LINK_SIZE;
1603 continue;
1604 }
1605
1606 if (accelerated_start == NULL)
1607 break;
1608
1609 if (count == 0)
1610 {
1611 common->fast_forward_bc_ptr = accelerated_start;
1612 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1613 *private_data_start += sizeof(sljit_sw);
1614 count = 4;
1615 }
1616 else if (count < 3)
1617 {
1618 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1619
1620 if (common->early_fail_start_ptr == 0)
1621 common->early_fail_start_ptr = *private_data_start;
1622
1623 *private_data_start += sizeof(sljit_sw);
1624 common->early_fail_end_ptr = *private_data_start;
1625
1626 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1627 return EARLY_FAIL_ENHANCE_MAX;
1628
1629 count = 4;
1630 }
1631 else
1632 {
1633 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1634
1635 if (common->early_fail_start_ptr == 0)
1636 common->early_fail_start_ptr = *private_data_start;
1637
1638 *private_data_start += 2 * sizeof(sljit_sw);
1639 common->early_fail_end_ptr = *private_data_start;
1640
1641 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1642 return EARLY_FAIL_ENHANCE_MAX;
1643
1644 count++;
1645 }
1646
1647 /* Cannot be part of a repeat. */
1648 common->private_data_ptrs[begin - common->start] = 1;
1649
1650 if (count >= EARLY_FAIL_ENHANCE_MAX)
1651 break;
1652 }
1653
1654 if (*cc != OP_ALT && *cc != OP_KET)
1655 result = EARLY_FAIL_ENHANCE_MAX;
1656 else if (result < count)
1657 result = count;
1658
1659 cc = next_alt;
1660 next_alt = cc + GET(cc, 1);
1661 }
1662 while (*cc == OP_ALT);
1663
1664 return result;
1665 }
1666
get_class_iterator_size(PCRE2_SPTR cc)1667 static int get_class_iterator_size(PCRE2_SPTR cc)
1668 {
1669 sljit_u32 min;
1670 sljit_u32 max;
1671 switch(*cc)
1672 {
1673 case OP_CRSTAR:
1674 case OP_CRPLUS:
1675 return 2;
1676
1677 case OP_CRMINSTAR:
1678 case OP_CRMINPLUS:
1679 case OP_CRQUERY:
1680 case OP_CRMINQUERY:
1681 return 1;
1682
1683 case OP_CRRANGE:
1684 case OP_CRMINRANGE:
1685 min = GET2(cc, 1);
1686 max = GET2(cc, 1 + IMM2_SIZE);
1687 if (max == 0)
1688 return (*cc == OP_CRRANGE) ? 2 : 1;
1689 max -= min;
1690 if (max > 2)
1691 max = 2;
1692 return max;
1693
1694 default:
1695 return 0;
1696 }
1697 }
1698
detect_repeat(compiler_common * common,PCRE2_SPTR begin)1699 static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1700 {
1701 PCRE2_SPTR end = bracketend(begin);
1702 PCRE2_SPTR next;
1703 PCRE2_SPTR next_end;
1704 PCRE2_SPTR max_end;
1705 PCRE2_UCHAR type;
1706 sljit_sw length = end - begin;
1707 sljit_s32 min, max, i;
1708
1709 /* Detect fixed iterations first. */
1710 if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1711 return FALSE;
1712
1713 /* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1714 * Skip the check of the second part. */
1715 if (PRIVATE_DATA(end - LINK_SIZE) != 0)
1716 return TRUE;
1717
1718 next = end;
1719 min = 1;
1720 while (1)
1721 {
1722 if (*next != *begin)
1723 break;
1724 next_end = bracketend(next);
1725 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1726 break;
1727 next = next_end;
1728 min++;
1729 }
1730
1731 if (min == 2)
1732 return FALSE;
1733
1734 max = 0;
1735 max_end = next;
1736 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1737 {
1738 type = *next;
1739 while (1)
1740 {
1741 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1742 break;
1743 next_end = bracketend(next + 2 + LINK_SIZE);
1744 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1745 break;
1746 next = next_end;
1747 max++;
1748 }
1749
1750 if (next[0] == type && next[1] == *begin && max >= 1)
1751 {
1752 next_end = bracketend(next + 1);
1753 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1754 {
1755 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1756 if (*next_end != OP_KET)
1757 break;
1758
1759 if (i == max)
1760 {
1761 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1762 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1763 /* +2 the original and the last. */
1764 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1765 if (min == 1)
1766 return TRUE;
1767 min--;
1768 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1769 }
1770 }
1771 }
1772 }
1773
1774 if (min >= 3)
1775 {
1776 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1777 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1778 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1779 return TRUE;
1780 }
1781
1782 return FALSE;
1783 }
1784
1785 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1786 case OP_MINSTAR: \
1787 case OP_MINPLUS: \
1788 case OP_QUERY: \
1789 case OP_MINQUERY: \
1790 case OP_MINSTARI: \
1791 case OP_MINPLUSI: \
1792 case OP_QUERYI: \
1793 case OP_MINQUERYI: \
1794 case OP_NOTMINSTAR: \
1795 case OP_NOTMINPLUS: \
1796 case OP_NOTQUERY: \
1797 case OP_NOTMINQUERY: \
1798 case OP_NOTMINSTARI: \
1799 case OP_NOTMINPLUSI: \
1800 case OP_NOTQUERYI: \
1801 case OP_NOTMINQUERYI:
1802
1803 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1804 case OP_STAR: \
1805 case OP_PLUS: \
1806 case OP_STARI: \
1807 case OP_PLUSI: \
1808 case OP_NOTSTAR: \
1809 case OP_NOTPLUS: \
1810 case OP_NOTSTARI: \
1811 case OP_NOTPLUSI:
1812
1813 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1814 case OP_UPTO: \
1815 case OP_MINUPTO: \
1816 case OP_UPTOI: \
1817 case OP_MINUPTOI: \
1818 case OP_NOTUPTO: \
1819 case OP_NOTMINUPTO: \
1820 case OP_NOTUPTOI: \
1821 case OP_NOTMINUPTOI:
1822
1823 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1824 case OP_TYPEMINSTAR: \
1825 case OP_TYPEMINPLUS: \
1826 case OP_TYPEQUERY: \
1827 case OP_TYPEMINQUERY:
1828
1829 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1830 case OP_TYPESTAR: \
1831 case OP_TYPEPLUS:
1832
1833 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1834 case OP_TYPEUPTO: \
1835 case OP_TYPEMINUPTO:
1836
set_private_data_ptrs(compiler_common * common,int * private_data_start,PCRE2_SPTR ccend)1837 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1838 {
1839 PCRE2_SPTR cc = common->start;
1840 PCRE2_SPTR alternative;
1841 PCRE2_SPTR end = NULL;
1842 int private_data_ptr = *private_data_start;
1843 int space, size, bracketlen;
1844 BOOL repeat_check = TRUE;
1845
1846 while (cc < ccend)
1847 {
1848 space = 0;
1849 size = 0;
1850 bracketlen = 0;
1851 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1852 break;
1853
1854 /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1855 if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1856 {
1857 if (detect_repeat(common, cc))
1858 {
1859 /* These brackets are converted to repeats, so no global
1860 based single character repeat is allowed. */
1861 if (cc >= end)
1862 end = bracketend(cc);
1863 }
1864 }
1865 repeat_check = TRUE;
1866
1867 switch(*cc)
1868 {
1869 case OP_KET:
1870 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1871 {
1872 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1873 private_data_ptr += sizeof(sljit_sw);
1874 cc += common->private_data_ptrs[cc + 1 - common->start];
1875 }
1876 cc += 1 + LINK_SIZE;
1877 break;
1878
1879 case OP_ASSERT:
1880 case OP_ASSERT_NOT:
1881 case OP_ASSERTBACK:
1882 case OP_ASSERTBACK_NOT:
1883 case OP_ASSERT_NA:
1884 case OP_ONCE:
1885 case OP_SCRIPT_RUN:
1886 case OP_BRAPOS:
1887 case OP_SBRA:
1888 case OP_SBRAPOS:
1889 case OP_SCOND:
1890 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1891 private_data_ptr += sizeof(sljit_sw);
1892 bracketlen = 1 + LINK_SIZE;
1893 break;
1894
1895 case OP_ASSERTBACK_NA:
1896 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1897 private_data_ptr += sizeof(sljit_sw);
1898
1899 if (find_vreverse(cc))
1900 {
1901 common->private_data_ptrs[cc + 1 - common->start] = 1;
1902 private_data_ptr += sizeof(sljit_sw);
1903 }
1904
1905 bracketlen = 1 + LINK_SIZE;
1906 break;
1907
1908 case OP_CBRAPOS:
1909 case OP_SCBRAPOS:
1910 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1911 private_data_ptr += sizeof(sljit_sw);
1912 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1913 break;
1914
1915 case OP_COND:
1916 /* Might be a hidden SCOND. */
1917 common->private_data_ptrs[cc - common->start] = 0;
1918 alternative = cc + GET(cc, 1);
1919 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1920 {
1921 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1922 private_data_ptr += sizeof(sljit_sw);
1923 }
1924 bracketlen = 1 + LINK_SIZE;
1925 break;
1926
1927 case OP_BRA:
1928 bracketlen = 1 + LINK_SIZE;
1929 break;
1930
1931 case OP_CBRA:
1932 case OP_SCBRA:
1933 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1934 break;
1935
1936 case OP_BRAZERO:
1937 case OP_BRAMINZERO:
1938 case OP_BRAPOSZERO:
1939 size = 1;
1940 repeat_check = FALSE;
1941 break;
1942
1943 CASE_ITERATOR_PRIVATE_DATA_1
1944 size = -2;
1945 space = 1;
1946 break;
1947
1948 CASE_ITERATOR_PRIVATE_DATA_2A
1949 size = -2;
1950 space = 2;
1951 break;
1952
1953 CASE_ITERATOR_PRIVATE_DATA_2B
1954 size = -(2 + IMM2_SIZE);
1955 space = 2;
1956 break;
1957
1958 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1959 size = 1;
1960 space = 1;
1961 break;
1962
1963 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1964 size = 1;
1965 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1966 space = 2;
1967 break;
1968
1969 case OP_TYPEUPTO:
1970 size = 1 + IMM2_SIZE;
1971 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1972 space = 2;
1973 break;
1974
1975 case OP_TYPEMINUPTO:
1976 size = 1 + IMM2_SIZE;
1977 space = 2;
1978 break;
1979
1980 case OP_CLASS:
1981 case OP_NCLASS:
1982 size = 1 + 32 / sizeof(PCRE2_UCHAR);
1983 space = get_class_iterator_size(cc + size);
1984 break;
1985
1986 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1987 case OP_XCLASS:
1988 size = GET(cc, 1);
1989 space = get_class_iterator_size(cc + size);
1990 break;
1991 #endif
1992
1993 default:
1994 cc = next_opcode(common, cc);
1995 SLJIT_ASSERT(cc != NULL);
1996 break;
1997 }
1998
1999 /* Character iterators, which are not inside a repeated bracket,
2000 gets a private slot instead of allocating it on the stack. */
2001 if (space > 0 && cc >= end)
2002 {
2003 common->private_data_ptrs[cc - common->start] = private_data_ptr;
2004 private_data_ptr += sizeof(sljit_sw) * space;
2005 }
2006
2007 if (size != 0)
2008 {
2009 if (size < 0)
2010 {
2011 cc += -size;
2012 #ifdef SUPPORT_UNICODE
2013 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2014 #endif
2015 }
2016 else
2017 cc += size;
2018 }
2019
2020 if (bracketlen > 0)
2021 {
2022 if (cc >= end)
2023 {
2024 end = bracketend(cc);
2025 if (end[-1 - LINK_SIZE] == OP_KET)
2026 end = NULL;
2027 }
2028 cc += bracketlen;
2029 }
2030 }
2031 *private_data_start = private_data_ptr;
2032 }
2033
2034 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL recursive,BOOL * needs_control_head)2035 static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
2036 {
2037 int length = 0;
2038 int possessive = 0;
2039 BOOL stack_restore = FALSE;
2040 BOOL setsom_found = recursive;
2041 BOOL setmark_found = recursive;
2042 /* The last capture is a local variable even for recursions. */
2043 BOOL capture_last_found = FALSE;
2044
2045 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2046 SLJIT_ASSERT(common->control_head_ptr != 0);
2047 *needs_control_head = TRUE;
2048 #else
2049 *needs_control_head = FALSE;
2050 #endif
2051
2052 if (ccend == NULL)
2053 {
2054 ccend = bracketend(cc) - (1 + LINK_SIZE);
2055 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
2056 {
2057 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
2058 /* This is correct regardless of common->capture_last_ptr. */
2059 capture_last_found = TRUE;
2060 }
2061 cc = next_opcode(common, cc);
2062 }
2063
2064 SLJIT_ASSERT(cc != NULL);
2065 while (cc < ccend)
2066 switch(*cc)
2067 {
2068 case OP_SET_SOM:
2069 SLJIT_ASSERT(common->has_set_som);
2070 stack_restore = TRUE;
2071 if (!setsom_found)
2072 {
2073 length += 2;
2074 setsom_found = TRUE;
2075 }
2076 cc += 1;
2077 break;
2078
2079 case OP_MARK:
2080 case OP_COMMIT_ARG:
2081 case OP_PRUNE_ARG:
2082 case OP_THEN_ARG:
2083 SLJIT_ASSERT(common->mark_ptr != 0);
2084 stack_restore = TRUE;
2085 if (!setmark_found)
2086 {
2087 length += 2;
2088 setmark_found = TRUE;
2089 }
2090 if (common->control_head_ptr != 0)
2091 *needs_control_head = TRUE;
2092 cc += 1 + 2 + cc[1];
2093 break;
2094
2095 case OP_RECURSE:
2096 stack_restore = TRUE;
2097 if (common->has_set_som && !setsom_found)
2098 {
2099 length += 2;
2100 setsom_found = TRUE;
2101 }
2102 if (common->mark_ptr != 0 && !setmark_found)
2103 {
2104 length += 2;
2105 setmark_found = TRUE;
2106 }
2107 if (common->capture_last_ptr != 0 && !capture_last_found)
2108 {
2109 length += 2;
2110 capture_last_found = TRUE;
2111 }
2112 cc += 1 + LINK_SIZE;
2113 break;
2114
2115 case OP_CBRA:
2116 case OP_CBRAPOS:
2117 case OP_SCBRA:
2118 case OP_SCBRAPOS:
2119 stack_restore = TRUE;
2120 if (common->capture_last_ptr != 0 && !capture_last_found)
2121 {
2122 length += 2;
2123 capture_last_found = TRUE;
2124 }
2125 length += 3;
2126 cc += 1 + LINK_SIZE + IMM2_SIZE;
2127 break;
2128
2129 case OP_THEN:
2130 stack_restore = TRUE;
2131 if (common->control_head_ptr != 0)
2132 *needs_control_head = TRUE;
2133 cc ++;
2134 break;
2135
2136 default:
2137 stack_restore = TRUE;
2138 /* Fall through. */
2139
2140 case OP_NOT_WORD_BOUNDARY:
2141 case OP_WORD_BOUNDARY:
2142 case OP_NOT_DIGIT:
2143 case OP_DIGIT:
2144 case OP_NOT_WHITESPACE:
2145 case OP_WHITESPACE:
2146 case OP_NOT_WORDCHAR:
2147 case OP_WORDCHAR:
2148 case OP_ANY:
2149 case OP_ALLANY:
2150 case OP_ANYBYTE:
2151 case OP_NOTPROP:
2152 case OP_PROP:
2153 case OP_ANYNL:
2154 case OP_NOT_HSPACE:
2155 case OP_HSPACE:
2156 case OP_NOT_VSPACE:
2157 case OP_VSPACE:
2158 case OP_EXTUNI:
2159 case OP_EODN:
2160 case OP_EOD:
2161 case OP_CIRC:
2162 case OP_CIRCM:
2163 case OP_DOLL:
2164 case OP_DOLLM:
2165 case OP_CHAR:
2166 case OP_CHARI:
2167 case OP_NOT:
2168 case OP_NOTI:
2169
2170 case OP_EXACT:
2171 case OP_POSSTAR:
2172 case OP_POSPLUS:
2173 case OP_POSQUERY:
2174 case OP_POSUPTO:
2175
2176 case OP_EXACTI:
2177 case OP_POSSTARI:
2178 case OP_POSPLUSI:
2179 case OP_POSQUERYI:
2180 case OP_POSUPTOI:
2181
2182 case OP_NOTEXACT:
2183 case OP_NOTPOSSTAR:
2184 case OP_NOTPOSPLUS:
2185 case OP_NOTPOSQUERY:
2186 case OP_NOTPOSUPTO:
2187
2188 case OP_NOTEXACTI:
2189 case OP_NOTPOSSTARI:
2190 case OP_NOTPOSPLUSI:
2191 case OP_NOTPOSQUERYI:
2192 case OP_NOTPOSUPTOI:
2193
2194 case OP_TYPEEXACT:
2195 case OP_TYPEPOSSTAR:
2196 case OP_TYPEPOSPLUS:
2197 case OP_TYPEPOSQUERY:
2198 case OP_TYPEPOSUPTO:
2199
2200 case OP_CLASS:
2201 case OP_NCLASS:
2202 case OP_XCLASS:
2203
2204 case OP_CALLOUT:
2205 case OP_CALLOUT_STR:
2206
2207 case OP_NOT_UCP_WORD_BOUNDARY:
2208 case OP_UCP_WORD_BOUNDARY:
2209
2210 cc = next_opcode(common, cc);
2211 SLJIT_ASSERT(cc != NULL);
2212 break;
2213 }
2214
2215 /* Possessive quantifiers can use a special case. */
2216 if (SLJIT_UNLIKELY(possessive == length))
2217 return stack_restore ? no_frame : no_stack;
2218
2219 if (length > 0)
2220 return length + 1;
2221 return stack_restore ? no_frame : no_stack;
2222 }
2223
init_frame(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int stackpos,int stacktop)2224 static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2225 {
2226 DEFINE_COMPILER;
2227 BOOL setsom_found = FALSE;
2228 BOOL setmark_found = FALSE;
2229 /* The last capture is a local variable even for recursions. */
2230 BOOL capture_last_found = FALSE;
2231 int offset;
2232
2233 /* >= 1 + shortest item size (2) */
2234 SLJIT_UNUSED_ARG(stacktop);
2235 SLJIT_ASSERT(stackpos >= stacktop + 2);
2236
2237 stackpos = STACK(stackpos);
2238 if (ccend == NULL)
2239 {
2240 ccend = bracketend(cc) - (1 + LINK_SIZE);
2241 if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2242 cc = next_opcode(common, cc);
2243 }
2244
2245 SLJIT_ASSERT(cc != NULL);
2246 while (cc < ccend)
2247 switch(*cc)
2248 {
2249 case OP_SET_SOM:
2250 SLJIT_ASSERT(common->has_set_som);
2251 if (!setsom_found)
2252 {
2253 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2254 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2255 stackpos -= SSIZE_OF(sw);
2256 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2257 stackpos -= SSIZE_OF(sw);
2258 setsom_found = TRUE;
2259 }
2260 cc += 1;
2261 break;
2262
2263 case OP_MARK:
2264 case OP_COMMIT_ARG:
2265 case OP_PRUNE_ARG:
2266 case OP_THEN_ARG:
2267 SLJIT_ASSERT(common->mark_ptr != 0);
2268 if (!setmark_found)
2269 {
2270 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2271 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2272 stackpos -= SSIZE_OF(sw);
2273 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2274 stackpos -= SSIZE_OF(sw);
2275 setmark_found = TRUE;
2276 }
2277 cc += 1 + 2 + cc[1];
2278 break;
2279
2280 case OP_RECURSE:
2281 if (common->has_set_som && !setsom_found)
2282 {
2283 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2284 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2285 stackpos -= SSIZE_OF(sw);
2286 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2287 stackpos -= SSIZE_OF(sw);
2288 setsom_found = TRUE;
2289 }
2290 if (common->mark_ptr != 0 && !setmark_found)
2291 {
2292 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2293 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2294 stackpos -= SSIZE_OF(sw);
2295 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2296 stackpos -= SSIZE_OF(sw);
2297 setmark_found = TRUE;
2298 }
2299 if (common->capture_last_ptr != 0 && !capture_last_found)
2300 {
2301 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2302 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2303 stackpos -= SSIZE_OF(sw);
2304 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2305 stackpos -= SSIZE_OF(sw);
2306 capture_last_found = TRUE;
2307 }
2308 cc += 1 + LINK_SIZE;
2309 break;
2310
2311 case OP_CBRA:
2312 case OP_CBRAPOS:
2313 case OP_SCBRA:
2314 case OP_SCBRAPOS:
2315 if (common->capture_last_ptr != 0 && !capture_last_found)
2316 {
2317 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2318 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2319 stackpos -= SSIZE_OF(sw);
2320 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2321 stackpos -= SSIZE_OF(sw);
2322 capture_last_found = TRUE;
2323 }
2324 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2325 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2326 stackpos -= SSIZE_OF(sw);
2327 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2328 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2329 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2330 stackpos -= SSIZE_OF(sw);
2331 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2332 stackpos -= SSIZE_OF(sw);
2333
2334 cc += 1 + LINK_SIZE + IMM2_SIZE;
2335 break;
2336
2337 default:
2338 cc = next_opcode(common, cc);
2339 SLJIT_ASSERT(cc != NULL);
2340 break;
2341 }
2342
2343 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2344 SLJIT_ASSERT(stackpos == STACK(stacktop));
2345 }
2346
2347 #define RECURSE_TMP_REG_COUNT 3
2348
2349 typedef struct delayed_mem_copy_status {
2350 struct sljit_compiler *compiler;
2351 int store_bases[RECURSE_TMP_REG_COUNT];
2352 int store_offsets[RECURSE_TMP_REG_COUNT];
2353 int tmp_regs[RECURSE_TMP_REG_COUNT];
2354 int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2355 int next_tmp_reg;
2356 } delayed_mem_copy_status;
2357
delayed_mem_copy_init(delayed_mem_copy_status * status,compiler_common * common)2358 static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2359 {
2360 int i;
2361
2362 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2363 {
2364 SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2365 SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2366
2367 status->store_bases[i] = -1;
2368 }
2369 status->next_tmp_reg = 0;
2370 status->compiler = common->compiler;
2371 }
2372
delayed_mem_copy_move(delayed_mem_copy_status * status,int load_base,sljit_sw load_offset,int store_base,sljit_sw store_offset)2373 static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2374 int store_base, sljit_sw store_offset)
2375 {
2376 struct sljit_compiler *compiler = status->compiler;
2377 int next_tmp_reg = status->next_tmp_reg;
2378 int tmp_reg = status->tmp_regs[next_tmp_reg];
2379
2380 SLJIT_ASSERT(load_base > 0 && store_base > 0);
2381
2382 if (status->store_bases[next_tmp_reg] == -1)
2383 {
2384 /* Preserve virtual registers. */
2385 if (sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[next_tmp_reg]) < 0)
2386 OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2387 }
2388 else
2389 OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2390
2391 OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2392 status->store_bases[next_tmp_reg] = store_base;
2393 status->store_offsets[next_tmp_reg] = store_offset;
2394
2395 status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2396 }
2397
delayed_mem_copy_finish(delayed_mem_copy_status * status)2398 static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2399 {
2400 struct sljit_compiler *compiler = status->compiler;
2401 int next_tmp_reg = status->next_tmp_reg;
2402 int tmp_reg, saved_tmp_reg, i;
2403
2404 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2405 {
2406 if (status->store_bases[next_tmp_reg] != -1)
2407 {
2408 tmp_reg = status->tmp_regs[next_tmp_reg];
2409 saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2410
2411 OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2412
2413 /* Restore virtual registers. */
2414 if (sljit_get_register_index(SLJIT_GP_REGISTER, saved_tmp_reg) < 0)
2415 OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2416 }
2417
2418 next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2419 }
2420 }
2421
2422 #undef RECURSE_TMP_REG_COUNT
2423
recurse_check_bit(compiler_common * common,sljit_sw bit_index)2424 static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)
2425 {
2426 uint8_t *byte;
2427 uint8_t mask;
2428
2429 SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);
2430
2431 bit_index >>= SLJIT_WORD_SHIFT;
2432
2433 SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size);
2434
2435 mask = 1 << (bit_index & 0x7);
2436 byte = common->recurse_bitset + (bit_index >> 3);
2437
2438 if (*byte & mask)
2439 return FALSE;
2440
2441 *byte |= mask;
2442 return TRUE;
2443 }
2444
2445 enum get_recurse_flags {
2446 recurse_flag_quit_found = (1 << 0),
2447 recurse_flag_accept_found = (1 << 1),
2448 recurse_flag_setsom_found = (1 << 2),
2449 recurse_flag_setmark_found = (1 << 3),
2450 recurse_flag_control_head_found = (1 << 4),
2451 };
2452
get_recurse_data_length(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,uint32_t * result_flags)2453 static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags)
2454 {
2455 int length = 1;
2456 int size, offset;
2457 PCRE2_SPTR alternative;
2458 uint32_t recurse_flags = 0;
2459
2460 memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2461
2462 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2463 SLJIT_ASSERT(common->control_head_ptr != 0);
2464 recurse_flags |= recurse_flag_control_head_found;
2465 #endif
2466
2467 /* Calculate the sum of the private machine words. */
2468 while (cc < ccend)
2469 {
2470 size = 0;
2471 switch(*cc)
2472 {
2473 case OP_SET_SOM:
2474 SLJIT_ASSERT(common->has_set_som);
2475 recurse_flags |= recurse_flag_setsom_found;
2476 cc += 1;
2477 break;
2478
2479 case OP_RECURSE:
2480 if (common->has_set_som)
2481 recurse_flags |= recurse_flag_setsom_found;
2482 if (common->mark_ptr != 0)
2483 recurse_flags |= recurse_flag_setmark_found;
2484 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2485 length++;
2486 cc += 1 + LINK_SIZE;
2487 break;
2488
2489 case OP_KET:
2490 offset = PRIVATE_DATA(cc);
2491 if (offset != 0)
2492 {
2493 if (recurse_check_bit(common, offset))
2494 length++;
2495 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2496 cc += PRIVATE_DATA(cc + 1);
2497 }
2498 cc += 1 + LINK_SIZE;
2499 break;
2500
2501 case OP_ASSERT:
2502 case OP_ASSERT_NOT:
2503 case OP_ASSERTBACK:
2504 case OP_ASSERTBACK_NOT:
2505 case OP_ASSERT_NA:
2506 case OP_ASSERTBACK_NA:
2507 case OP_ONCE:
2508 case OP_SCRIPT_RUN:
2509 case OP_BRAPOS:
2510 case OP_SBRA:
2511 case OP_SBRAPOS:
2512 case OP_SCOND:
2513 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2514 if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2515 length++;
2516 cc += 1 + LINK_SIZE;
2517 break;
2518
2519 case OP_CBRA:
2520 case OP_SCBRA:
2521 offset = GET2(cc, 1 + LINK_SIZE);
2522 if (recurse_check_bit(common, OVECTOR(offset << 1)))
2523 {
2524 SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2525 length += 2;
2526 }
2527 if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset)))
2528 length++;
2529 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2530 length++;
2531 cc += 1 + LINK_SIZE + IMM2_SIZE;
2532 break;
2533
2534 case OP_CBRAPOS:
2535 case OP_SCBRAPOS:
2536 offset = GET2(cc, 1 + LINK_SIZE);
2537 if (recurse_check_bit(common, OVECTOR(offset << 1)))
2538 {
2539 SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2540 length += 2;
2541 }
2542 if (recurse_check_bit(common, OVECTOR_PRIV(offset)))
2543 length++;
2544 if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2545 length++;
2546 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2547 length++;
2548 cc += 1 + LINK_SIZE + IMM2_SIZE;
2549 break;
2550
2551 case OP_COND:
2552 /* Might be a hidden SCOND. */
2553 alternative = cc + GET(cc, 1);
2554 if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))
2555 length++;
2556 cc += 1 + LINK_SIZE;
2557 break;
2558
2559 CASE_ITERATOR_PRIVATE_DATA_1
2560 offset = PRIVATE_DATA(cc);
2561 if (offset != 0 && recurse_check_bit(common, offset))
2562 length++;
2563 cc += 2;
2564 #ifdef SUPPORT_UNICODE
2565 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2566 #endif
2567 break;
2568
2569 CASE_ITERATOR_PRIVATE_DATA_2A
2570 offset = PRIVATE_DATA(cc);
2571 if (offset != 0 && recurse_check_bit(common, offset))
2572 {
2573 SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2574 length += 2;
2575 }
2576 cc += 2;
2577 #ifdef SUPPORT_UNICODE
2578 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2579 #endif
2580 break;
2581
2582 CASE_ITERATOR_PRIVATE_DATA_2B
2583 offset = PRIVATE_DATA(cc);
2584 if (offset != 0 && recurse_check_bit(common, offset))
2585 {
2586 SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2587 length += 2;
2588 }
2589 cc += 2 + IMM2_SIZE;
2590 #ifdef SUPPORT_UNICODE
2591 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2592 #endif
2593 break;
2594
2595 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2596 offset = PRIVATE_DATA(cc);
2597 if (offset != 0 && recurse_check_bit(common, offset))
2598 length++;
2599 cc += 1;
2600 break;
2601
2602 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2603 offset = PRIVATE_DATA(cc);
2604 if (offset != 0 && recurse_check_bit(common, offset))
2605 {
2606 SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2607 length += 2;
2608 }
2609 cc += 1;
2610 break;
2611
2612 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2613 offset = PRIVATE_DATA(cc);
2614 if (offset != 0 && recurse_check_bit(common, offset))
2615 {
2616 SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2617 length += 2;
2618 }
2619 cc += 1 + IMM2_SIZE;
2620 break;
2621
2622 case OP_CLASS:
2623 case OP_NCLASS:
2624 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2625 case OP_XCLASS:
2626 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2627 #else
2628 size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2629 #endif
2630
2631 offset = PRIVATE_DATA(cc);
2632 if (offset != 0 && recurse_check_bit(common, offset))
2633 length += get_class_iterator_size(cc + size);
2634 cc += size;
2635 break;
2636
2637 case OP_MARK:
2638 case OP_COMMIT_ARG:
2639 case OP_PRUNE_ARG:
2640 case OP_THEN_ARG:
2641 SLJIT_ASSERT(common->mark_ptr != 0);
2642 recurse_flags |= recurse_flag_setmark_found;
2643 if (common->control_head_ptr != 0)
2644 recurse_flags |= recurse_flag_control_head_found;
2645 if (*cc != OP_MARK)
2646 recurse_flags |= recurse_flag_quit_found;
2647
2648 cc += 1 + 2 + cc[1];
2649 break;
2650
2651 case OP_PRUNE:
2652 case OP_SKIP:
2653 case OP_COMMIT:
2654 recurse_flags |= recurse_flag_quit_found;
2655 cc++;
2656 break;
2657
2658 case OP_SKIP_ARG:
2659 recurse_flags |= recurse_flag_quit_found;
2660 cc += 1 + 2 + cc[1];
2661 break;
2662
2663 case OP_THEN:
2664 SLJIT_ASSERT(common->control_head_ptr != 0);
2665 recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found;
2666 cc++;
2667 break;
2668
2669 case OP_ACCEPT:
2670 case OP_ASSERT_ACCEPT:
2671 recurse_flags |= recurse_flag_accept_found;
2672 cc++;
2673 break;
2674
2675 default:
2676 cc = next_opcode(common, cc);
2677 SLJIT_ASSERT(cc != NULL);
2678 break;
2679 }
2680 }
2681 SLJIT_ASSERT(cc == ccend);
2682
2683 if (recurse_flags & recurse_flag_control_head_found)
2684 length++;
2685 if (recurse_flags & recurse_flag_quit_found)
2686 {
2687 if (recurse_flags & recurse_flag_setsom_found)
2688 length++;
2689 if (recurse_flags & recurse_flag_setmark_found)
2690 length++;
2691 }
2692
2693 *result_flags = recurse_flags;
2694 return length;
2695 }
2696
2697 enum copy_recurse_data_types {
2698 recurse_copy_from_global,
2699 recurse_copy_private_to_global,
2700 recurse_copy_shared_to_global,
2701 recurse_copy_kept_shared_to_global,
2702 recurse_swap_global
2703 };
2704
copy_recurse_data(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int type,int stackptr,int stacktop,uint32_t recurse_flags)2705 static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2706 int type, int stackptr, int stacktop, uint32_t recurse_flags)
2707 {
2708 delayed_mem_copy_status status;
2709 PCRE2_SPTR alternative;
2710 sljit_sw private_srcw[2];
2711 sljit_sw shared_srcw[3];
2712 sljit_sw kept_shared_srcw[2];
2713 int private_count, shared_count, kept_shared_count;
2714 int from_sp, base_reg, offset, i;
2715
2716 memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2717
2718 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2719 SLJIT_ASSERT(common->control_head_ptr != 0);
2720 recurse_check_bit(common, common->control_head_ptr);
2721 #endif
2722
2723 switch (type)
2724 {
2725 case recurse_copy_from_global:
2726 from_sp = TRUE;
2727 base_reg = STACK_TOP;
2728 break;
2729
2730 case recurse_copy_private_to_global:
2731 case recurse_copy_shared_to_global:
2732 case recurse_copy_kept_shared_to_global:
2733 from_sp = FALSE;
2734 base_reg = STACK_TOP;
2735 break;
2736
2737 default:
2738 SLJIT_ASSERT(type == recurse_swap_global);
2739 from_sp = FALSE;
2740 base_reg = TMP2;
2741 break;
2742 }
2743
2744 stackptr = STACK(stackptr);
2745 stacktop = STACK(stacktop);
2746
2747 status.tmp_regs[0] = TMP1;
2748 status.saved_tmp_regs[0] = TMP1;
2749
2750 if (base_reg != TMP2)
2751 {
2752 status.tmp_regs[1] = TMP2;
2753 status.saved_tmp_regs[1] = TMP2;
2754 }
2755 else
2756 {
2757 status.saved_tmp_regs[1] = RETURN_ADDR;
2758 if (HAS_VIRTUAL_REGISTERS)
2759 status.tmp_regs[1] = STR_PTR;
2760 else
2761 status.tmp_regs[1] = RETURN_ADDR;
2762 }
2763
2764 status.saved_tmp_regs[2] = TMP3;
2765 if (HAS_VIRTUAL_REGISTERS)
2766 status.tmp_regs[2] = STR_END;
2767 else
2768 status.tmp_regs[2] = TMP3;
2769
2770 delayed_mem_copy_init(&status, common);
2771
2772 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2773 {
2774 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2775
2776 if (!from_sp)
2777 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2778
2779 if (from_sp || type == recurse_swap_global)
2780 delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2781 }
2782
2783 stackptr += sizeof(sljit_sw);
2784
2785 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2786 if (type != recurse_copy_shared_to_global)
2787 {
2788 if (!from_sp)
2789 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2790
2791 if (from_sp || type == recurse_swap_global)
2792 delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2793 }
2794
2795 stackptr += sizeof(sljit_sw);
2796 #endif
2797
2798 while (cc < ccend)
2799 {
2800 private_count = 0;
2801 shared_count = 0;
2802 kept_shared_count = 0;
2803
2804 switch(*cc)
2805 {
2806 case OP_SET_SOM:
2807 SLJIT_ASSERT(common->has_set_som);
2808 if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0)))
2809 {
2810 kept_shared_srcw[0] = OVECTOR(0);
2811 kept_shared_count = 1;
2812 }
2813 cc += 1;
2814 break;
2815
2816 case OP_RECURSE:
2817 if (recurse_flags & recurse_flag_quit_found)
2818 {
2819 if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))
2820 {
2821 kept_shared_srcw[0] = OVECTOR(0);
2822 kept_shared_count = 1;
2823 }
2824 if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))
2825 {
2826 kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2827 kept_shared_count++;
2828 }
2829 }
2830 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2831 {
2832 shared_srcw[0] = common->capture_last_ptr;
2833 shared_count = 1;
2834 }
2835 cc += 1 + LINK_SIZE;
2836 break;
2837
2838 case OP_KET:
2839 private_srcw[0] = PRIVATE_DATA(cc);
2840 if (private_srcw[0] != 0)
2841 {
2842 if (recurse_check_bit(common, private_srcw[0]))
2843 private_count = 1;
2844 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2845 cc += PRIVATE_DATA(cc + 1);
2846 }
2847 cc += 1 + LINK_SIZE;
2848 break;
2849
2850 case OP_ASSERT:
2851 case OP_ASSERT_NOT:
2852 case OP_ASSERTBACK:
2853 case OP_ASSERTBACK_NOT:
2854 case OP_ASSERT_NA:
2855 case OP_ASSERTBACK_NA:
2856 case OP_ONCE:
2857 case OP_SCRIPT_RUN:
2858 case OP_BRAPOS:
2859 case OP_SBRA:
2860 case OP_SBRAPOS:
2861 case OP_SCOND:
2862 private_srcw[0] = PRIVATE_DATA(cc);
2863 if (recurse_check_bit(common, private_srcw[0]))
2864 private_count = 1;
2865 cc += 1 + LINK_SIZE;
2866 break;
2867
2868 case OP_CBRA:
2869 case OP_SCBRA:
2870 offset = GET2(cc, 1 + LINK_SIZE);
2871 shared_srcw[0] = OVECTOR(offset << 1);
2872 if (recurse_check_bit(common, shared_srcw[0]))
2873 {
2874 shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2875 SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2876 shared_count = 2;
2877 }
2878
2879 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2880 {
2881 shared_srcw[shared_count] = common->capture_last_ptr;
2882 shared_count++;
2883 }
2884
2885 if (common->optimized_cbracket[offset] == 0)
2886 {
2887 private_srcw[0] = OVECTOR_PRIV(offset);
2888 if (recurse_check_bit(common, private_srcw[0]))
2889 private_count = 1;
2890 }
2891
2892 cc += 1 + LINK_SIZE + IMM2_SIZE;
2893 break;
2894
2895 case OP_CBRAPOS:
2896 case OP_SCBRAPOS:
2897 offset = GET2(cc, 1 + LINK_SIZE);
2898 shared_srcw[0] = OVECTOR(offset << 1);
2899 if (recurse_check_bit(common, shared_srcw[0]))
2900 {
2901 shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2902 SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2903 shared_count = 2;
2904 }
2905
2906 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2907 {
2908 shared_srcw[shared_count] = common->capture_last_ptr;
2909 shared_count++;
2910 }
2911
2912 private_srcw[0] = PRIVATE_DATA(cc);
2913 if (recurse_check_bit(common, private_srcw[0]))
2914 private_count = 1;
2915
2916 offset = OVECTOR_PRIV(offset);
2917 if (recurse_check_bit(common, offset))
2918 {
2919 private_srcw[private_count] = offset;
2920 private_count++;
2921 }
2922 cc += 1 + LINK_SIZE + IMM2_SIZE;
2923 break;
2924
2925 case OP_COND:
2926 /* Might be a hidden SCOND. */
2927 alternative = cc + GET(cc, 1);
2928 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2929 {
2930 private_srcw[0] = PRIVATE_DATA(cc);
2931 if (recurse_check_bit(common, private_srcw[0]))
2932 private_count = 1;
2933 }
2934 cc += 1 + LINK_SIZE;
2935 break;
2936
2937 CASE_ITERATOR_PRIVATE_DATA_1
2938 private_srcw[0] = PRIVATE_DATA(cc);
2939 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2940 private_count = 1;
2941 cc += 2;
2942 #ifdef SUPPORT_UNICODE
2943 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2944 #endif
2945 break;
2946
2947 CASE_ITERATOR_PRIVATE_DATA_2A
2948 private_srcw[0] = PRIVATE_DATA(cc);
2949 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2950 {
2951 private_count = 2;
2952 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2953 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2954 }
2955 cc += 2;
2956 #ifdef SUPPORT_UNICODE
2957 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2958 #endif
2959 break;
2960
2961 CASE_ITERATOR_PRIVATE_DATA_2B
2962 private_srcw[0] = PRIVATE_DATA(cc);
2963 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2964 {
2965 private_count = 2;
2966 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2967 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2968 }
2969 cc += 2 + IMM2_SIZE;
2970 #ifdef SUPPORT_UNICODE
2971 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2972 #endif
2973 break;
2974
2975 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2976 private_srcw[0] = PRIVATE_DATA(cc);
2977 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2978 private_count = 1;
2979 cc += 1;
2980 break;
2981
2982 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2983 private_srcw[0] = PRIVATE_DATA(cc);
2984 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2985 {
2986 private_count = 2;
2987 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2988 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2989 }
2990 cc += 1;
2991 break;
2992
2993 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2994 private_srcw[0] = PRIVATE_DATA(cc);
2995 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2996 {
2997 private_count = 2;
2998 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2999 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3000 }
3001 cc += 1 + IMM2_SIZE;
3002 break;
3003
3004 case OP_CLASS:
3005 case OP_NCLASS:
3006 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
3007 case OP_XCLASS:
3008 i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3009 #else
3010 i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3011 #endif
3012 if (PRIVATE_DATA(cc) != 0)
3013 {
3014 private_count = 1;
3015 private_srcw[0] = PRIVATE_DATA(cc);
3016 switch(get_class_iterator_size(cc + i))
3017 {
3018 case 1:
3019 break;
3020
3021 case 2:
3022 if (recurse_check_bit(common, private_srcw[0]))
3023 {
3024 private_count = 2;
3025 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3026 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3027 }
3028 break;
3029
3030 default:
3031 SLJIT_UNREACHABLE();
3032 break;
3033 }
3034 }
3035 cc += i;
3036 break;
3037
3038 case OP_MARK:
3039 case OP_COMMIT_ARG:
3040 case OP_PRUNE_ARG:
3041 case OP_THEN_ARG:
3042 SLJIT_ASSERT(common->mark_ptr != 0);
3043 if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr))
3044 {
3045 kept_shared_srcw[0] = common->mark_ptr;
3046 kept_shared_count = 1;
3047 }
3048 if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))
3049 {
3050 private_srcw[0] = common->control_head_ptr;
3051 private_count = 1;
3052 }
3053 cc += 1 + 2 + cc[1];
3054 break;
3055
3056 case OP_THEN:
3057 SLJIT_ASSERT(common->control_head_ptr != 0);
3058 if (recurse_check_bit(common, common->control_head_ptr))
3059 {
3060 private_srcw[0] = common->control_head_ptr;
3061 private_count = 1;
3062 }
3063 cc++;
3064 break;
3065
3066 default:
3067 cc = next_opcode(common, cc);
3068 SLJIT_ASSERT(cc != NULL);
3069 continue;
3070 }
3071
3072 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
3073 {
3074 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
3075
3076 for (i = 0; i < private_count; i++)
3077 {
3078 SLJIT_ASSERT(private_srcw[i] != 0);
3079
3080 if (!from_sp)
3081 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
3082
3083 if (from_sp || type == recurse_swap_global)
3084 delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
3085
3086 stackptr += sizeof(sljit_sw);
3087 }
3088 }
3089 else
3090 stackptr += sizeof(sljit_sw) * private_count;
3091
3092 if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
3093 {
3094 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
3095
3096 for (i = 0; i < shared_count; i++)
3097 {
3098 SLJIT_ASSERT(shared_srcw[i] != 0);
3099
3100 if (!from_sp)
3101 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
3102
3103 if (from_sp || type == recurse_swap_global)
3104 delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
3105
3106 stackptr += sizeof(sljit_sw);
3107 }
3108 }
3109 else
3110 stackptr += sizeof(sljit_sw) * shared_count;
3111
3112 if (type != recurse_copy_private_to_global && type != recurse_swap_global)
3113 {
3114 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
3115
3116 for (i = 0; i < kept_shared_count; i++)
3117 {
3118 SLJIT_ASSERT(kept_shared_srcw[i] != 0);
3119
3120 if (!from_sp)
3121 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
3122
3123 if (from_sp || type == recurse_swap_global)
3124 delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
3125
3126 stackptr += sizeof(sljit_sw);
3127 }
3128 }
3129 else
3130 stackptr += sizeof(sljit_sw) * kept_shared_count;
3131 }
3132
3133 SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
3134
3135 delayed_mem_copy_finish(&status);
3136 }
3137
set_then_offsets(compiler_common * common,PCRE2_SPTR cc,sljit_u8 * current_offset)3138 static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
3139 {
3140 PCRE2_SPTR end = bracketend(cc);
3141 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
3142
3143 /* Assert captures then. */
3144 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
3145 current_offset = NULL;
3146 /* Conditional block does not. */
3147 if (*cc == OP_COND || *cc == OP_SCOND)
3148 has_alternatives = FALSE;
3149
3150 cc = next_opcode(common, cc);
3151
3152 if (has_alternatives)
3153 {
3154 if (*cc == OP_REVERSE)
3155 cc += 1 + IMM2_SIZE;
3156 else if (*cc == OP_VREVERSE)
3157 cc += 1 + 2 * IMM2_SIZE;
3158
3159 current_offset = common->then_offsets + (cc - common->start);
3160 }
3161
3162 while (cc < end)
3163 {
3164 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
3165 cc = set_then_offsets(common, cc, current_offset);
3166 else
3167 {
3168 if (*cc == OP_ALT && has_alternatives)
3169 {
3170 cc += 1 + LINK_SIZE;
3171
3172 if (*cc == OP_REVERSE)
3173 cc += 1 + IMM2_SIZE;
3174 else if (*cc == OP_VREVERSE)
3175 cc += 1 + 2 * IMM2_SIZE;
3176
3177 current_offset = common->then_offsets + (cc - common->start);
3178 continue;
3179 }
3180
3181 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
3182 *current_offset = 1;
3183 cc = next_opcode(common, cc);
3184 }
3185 }
3186
3187 return end;
3188 }
3189
3190 #undef CASE_ITERATOR_PRIVATE_DATA_1
3191 #undef CASE_ITERATOR_PRIVATE_DATA_2A
3192 #undef CASE_ITERATOR_PRIVATE_DATA_2B
3193 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3194 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3195 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3196
is_powerof2(unsigned int value)3197 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3198 {
3199 return (value & (value - 1)) == 0;
3200 }
3201
set_jumps(jump_list * list,struct sljit_label * label)3202 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3203 {
3204 while (list != NULL)
3205 {
3206 /* sljit_set_label is clever enough to do nothing
3207 if either the jump or the label is NULL. */
3208 SET_LABEL(list->jump, label);
3209 list = list->next;
3210 }
3211 }
3212
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)3213 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3214 {
3215 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3216 if (list_item)
3217 {
3218 list_item->next = *list;
3219 list_item->jump = jump;
3220 *list = list_item;
3221 }
3222 }
3223
add_stub(compiler_common * common,struct sljit_jump * start)3224 static void add_stub(compiler_common *common, struct sljit_jump *start)
3225 {
3226 DEFINE_COMPILER;
3227 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3228
3229 if (list_item)
3230 {
3231 list_item->start = start;
3232 list_item->quit = LABEL();
3233 list_item->next = common->stubs;
3234 common->stubs = list_item;
3235 }
3236 }
3237
flush_stubs(compiler_common * common)3238 static void flush_stubs(compiler_common *common)
3239 {
3240 DEFINE_COMPILER;
3241 stub_list *list_item = common->stubs;
3242
3243 while (list_item)
3244 {
3245 JUMPHERE(list_item->start);
3246 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3247 JUMPTO(SLJIT_JUMP, list_item->quit);
3248 list_item = list_item->next;
3249 }
3250 common->stubs = NULL;
3251 }
3252
count_match(compiler_common * common)3253 static SLJIT_INLINE void count_match(compiler_common *common)
3254 {
3255 DEFINE_COMPILER;
3256
3257 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3258 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3259 }
3260
allocate_stack(compiler_common * common,int size)3261 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3262 {
3263 /* May destroy all locals and registers except TMP2. */
3264 DEFINE_COMPILER;
3265
3266 SLJIT_ASSERT(size > 0);
3267 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3268 #ifdef DESTROY_REGISTERS
3269 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3270 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3271 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3272 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3273 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3274 #endif
3275 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3276 }
3277
free_stack(compiler_common * common,int size)3278 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3279 {
3280 DEFINE_COMPILER;
3281
3282 SLJIT_ASSERT(size > 0);
3283 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3284 }
3285
allocate_read_only_data(compiler_common * common,sljit_uw size)3286 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3287 {
3288 DEFINE_COMPILER;
3289 sljit_uw *result;
3290
3291 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3292 return NULL;
3293
3294 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3295 if (SLJIT_UNLIKELY(result == NULL))
3296 {
3297 sljit_set_compiler_memory_error(compiler);
3298 return NULL;
3299 }
3300
3301 *(void**)result = common->read_only_data_head;
3302 common->read_only_data_head = (void *)result;
3303 return result + 1;
3304 }
3305
reset_ovector(compiler_common * common,int length)3306 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3307 {
3308 DEFINE_COMPILER;
3309 struct sljit_label *loop;
3310 sljit_s32 i;
3311
3312 /* At this point we can freely use all temporary registers. */
3313 SLJIT_ASSERT(length > 1);
3314 /* TMP1 returns with begin - 1. */
3315 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3316 if (length < 8)
3317 {
3318 for (i = 1; i < length; i++)
3319 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3320 }
3321 else
3322 {
3323 if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3324 {
3325 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3326 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3327 loop = LABEL();
3328 sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3329 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3330 JUMPTO(SLJIT_NOT_ZERO, loop);
3331 }
3332 else
3333 {
3334 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3335 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3336 loop = LABEL();
3337 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3338 OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3339 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3340 JUMPTO(SLJIT_NOT_ZERO, loop);
3341 }
3342 }
3343 }
3344
reset_early_fail(compiler_common * common)3345 static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3346 {
3347 DEFINE_COMPILER;
3348 sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3349 sljit_u32 uncleared_size;
3350 sljit_s32 src = SLJIT_IMM;
3351 sljit_s32 i;
3352 struct sljit_label *loop;
3353
3354 SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3355
3356 if (size == sizeof(sljit_sw))
3357 {
3358 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3359 return;
3360 }
3361
3362 if (sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3363 {
3364 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3365 src = TMP3;
3366 }
3367
3368 if (size <= 6 * sizeof(sljit_sw))
3369 {
3370 for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3371 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3372 return;
3373 }
3374
3375 GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3376
3377 uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3378
3379 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3380
3381 loop = LABEL();
3382 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3383 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3384 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0);
3385 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0);
3386 CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3387
3388 if (uncleared_size >= sizeof(sljit_sw))
3389 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3390
3391 if (uncleared_size >= 2 * sizeof(sljit_sw))
3392 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3393 }
3394
do_reset_match(compiler_common * common,int length)3395 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3396 {
3397 DEFINE_COMPILER;
3398 struct sljit_label *loop;
3399 int i;
3400
3401 SLJIT_ASSERT(length > 1);
3402 /* OVECTOR(1) contains the "string begin - 1" constant. */
3403 if (length > 2)
3404 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3405 if (length < 8)
3406 {
3407 for (i = 2; i < length; i++)
3408 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3409 }
3410 else
3411 {
3412 if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3413 {
3414 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3415 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3416 loop = LABEL();
3417 sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3418 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3419 JUMPTO(SLJIT_NOT_ZERO, loop);
3420 }
3421 else
3422 {
3423 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3424 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3425 loop = LABEL();
3426 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3427 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3428 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3429 JUMPTO(SLJIT_NOT_ZERO, loop);
3430 }
3431 }
3432
3433 if (!HAS_VIRTUAL_REGISTERS)
3434 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3435 else
3436 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3437
3438 if (common->mark_ptr != 0)
3439 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3440 if (common->control_head_ptr != 0)
3441 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3442 if (HAS_VIRTUAL_REGISTERS)
3443 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3444
3445 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3446 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3447 }
3448
do_search_mark(sljit_sw * current,PCRE2_SPTR skip_arg)3449 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3450 {
3451 while (current != NULL)
3452 {
3453 switch (current[1])
3454 {
3455 case type_then_trap:
3456 break;
3457
3458 case type_mark:
3459 if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3460 return current[3];
3461 break;
3462
3463 default:
3464 SLJIT_UNREACHABLE();
3465 break;
3466 }
3467 SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3468 current = (sljit_sw*)current[0];
3469 }
3470 return 0;
3471 }
3472
copy_ovector(compiler_common * common,int topbracket)3473 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3474 {
3475 DEFINE_COMPILER;
3476 struct sljit_label *loop;
3477 BOOL has_pre;
3478
3479 /* At this point we can freely use all registers. */
3480 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3481 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3482
3483 if (HAS_VIRTUAL_REGISTERS)
3484 {
3485 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3486 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3487 if (common->mark_ptr != 0)
3488 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3489 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3490 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3491 if (common->mark_ptr != 0)
3492 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3493 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3494 SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3495 }
3496 else
3497 {
3498 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3499 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3500 if (common->mark_ptr != 0)
3501 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3502 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3503 OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3504 if (common->mark_ptr != 0)
3505 OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3506 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3507 }
3508
3509 has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3510
3511 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3512 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3513
3514 loop = LABEL();
3515
3516 if (has_pre)
3517 sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3518 else
3519 {
3520 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3521 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3522 }
3523
3524 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3525 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3526 /* Copy the integer value to the output buffer */
3527 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3528 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3529 #endif
3530
3531 SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3532 OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3533
3534 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3535 JUMPTO(SLJIT_NOT_ZERO, loop);
3536
3537 /* Calculate the return value, which is the maximum ovector value. */
3538 if (topbracket > 1)
3539 {
3540 if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS)
3541 {
3542 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3543 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3544
3545 /* OVECTOR(0) is never equal to SLJIT_S2. */
3546 loop = LABEL();
3547 sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw)));
3548 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3549 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3550 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3551 }
3552 else
3553 {
3554 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3555 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3556
3557 /* OVECTOR(0) is never equal to SLJIT_S2. */
3558 loop = LABEL();
3559 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3560 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
3561 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3562 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3563 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3564 }
3565 }
3566 else
3567 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3568 }
3569
return_with_partial_match(compiler_common * common,struct sljit_label * quit)3570 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3571 {
3572 DEFINE_COMPILER;
3573 sljit_s32 mov_opcode;
3574 sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3575
3576 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3577 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3578 && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3579
3580 if (arguments_reg != ARGUMENTS)
3581 OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3582 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3583 common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3584 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3585
3586 /* Store match begin and end. */
3587 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3588 OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3589 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3590
3591 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3592
3593 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3594 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3595 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3596 #endif
3597 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3598
3599 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3600 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3601 OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3602 #endif
3603 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3604
3605 JUMPTO(SLJIT_JUMP, quit);
3606 }
3607
check_start_used_ptr(compiler_common * common)3608 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3609 {
3610 /* May destroy TMP1. */
3611 DEFINE_COMPILER;
3612 struct sljit_jump *jump;
3613
3614 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3615 {
3616 /* The value of -1 must be kept for start_used_ptr! */
3617 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3618 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3619 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3620 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3621 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3622 JUMPHERE(jump);
3623 }
3624 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3625 {
3626 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3627 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3628 JUMPHERE(jump);
3629 }
3630 }
3631
char_has_othercase(compiler_common * common,PCRE2_SPTR cc)3632 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3633 {
3634 /* Detects if the character has an othercase. */
3635 unsigned int c;
3636
3637 #ifdef SUPPORT_UNICODE
3638 if (common->utf || common->ucp)
3639 {
3640 if (common->utf)
3641 {
3642 GETCHAR(c, cc);
3643 }
3644 else
3645 c = *cc;
3646
3647 if (c > 127)
3648 return c != UCD_OTHERCASE(c);
3649
3650 return common->fcc[c] != c;
3651 }
3652 else
3653 #endif
3654 c = *cc;
3655 return MAX_255(c) ? common->fcc[c] != c : FALSE;
3656 }
3657
char_othercase(compiler_common * common,unsigned int c)3658 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3659 {
3660 /* Returns with the othercase. */
3661 #ifdef SUPPORT_UNICODE
3662 if ((common->utf || common->ucp) && c > 127)
3663 return UCD_OTHERCASE(c);
3664 #endif
3665 return TABLE_GET(c, common->fcc, c);
3666 }
3667
char_get_othercase_bit(compiler_common * common,PCRE2_SPTR cc)3668 static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3669 {
3670 /* Detects if the character and its othercase has only 1 bit difference. */
3671 unsigned int c, oc, bit;
3672 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3673 int n;
3674 #endif
3675
3676 #ifdef SUPPORT_UNICODE
3677 if (common->utf || common->ucp)
3678 {
3679 if (common->utf)
3680 {
3681 GETCHAR(c, cc);
3682 }
3683 else
3684 c = *cc;
3685
3686 if (c <= 127)
3687 oc = common->fcc[c];
3688 else
3689 oc = UCD_OTHERCASE(c);
3690 }
3691 else
3692 {
3693 c = *cc;
3694 oc = TABLE_GET(c, common->fcc, c);
3695 }
3696 #else
3697 c = *cc;
3698 oc = TABLE_GET(c, common->fcc, c);
3699 #endif
3700
3701 SLJIT_ASSERT(c != oc);
3702
3703 bit = c ^ oc;
3704 /* Optimized for English alphabet. */
3705 if (c <= 127 && bit == 0x20)
3706 return (0 << 8) | 0x20;
3707
3708 /* Since c != oc, they must have at least 1 bit difference. */
3709 if (!is_powerof2(bit))
3710 return 0;
3711
3712 #if PCRE2_CODE_UNIT_WIDTH == 8
3713
3714 #ifdef SUPPORT_UNICODE
3715 if (common->utf && c > 127)
3716 {
3717 n = GET_EXTRALEN(*cc);
3718 while ((bit & 0x3f) == 0)
3719 {
3720 n--;
3721 bit >>= 6;
3722 }
3723 return (n << 8) | bit;
3724 }
3725 #endif /* SUPPORT_UNICODE */
3726 return (0 << 8) | bit;
3727
3728 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3729
3730 #ifdef SUPPORT_UNICODE
3731 if (common->utf && c > 65535)
3732 {
3733 if (bit >= (1u << 10))
3734 bit >>= 10;
3735 else
3736 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3737 }
3738 #endif /* SUPPORT_UNICODE */
3739 return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3740
3741 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3742 }
3743
check_partial(compiler_common * common,BOOL force)3744 static void check_partial(compiler_common *common, BOOL force)
3745 {
3746 /* Checks whether a partial matching is occurred. Does not modify registers. */
3747 DEFINE_COMPILER;
3748 struct sljit_jump *jump = NULL;
3749
3750 SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3751
3752 if (common->mode == PCRE2_JIT_COMPLETE)
3753 return;
3754
3755 if (!force && !common->allow_empty_partial)
3756 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3757 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3758 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3759
3760 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3761 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3762 else
3763 {
3764 if (common->partialmatchlabel != NULL)
3765 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3766 else
3767 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3768 }
3769
3770 if (jump != NULL)
3771 JUMPHERE(jump);
3772 }
3773
check_str_end(compiler_common * common,jump_list ** end_reached)3774 static void check_str_end(compiler_common *common, jump_list **end_reached)
3775 {
3776 /* Does not affect registers. Usually used in a tight spot. */
3777 DEFINE_COMPILER;
3778 struct sljit_jump *jump;
3779
3780 if (common->mode == PCRE2_JIT_COMPLETE)
3781 {
3782 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3783 return;
3784 }
3785
3786 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3787 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3788 {
3789 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3790 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3791 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3792 }
3793 else
3794 {
3795 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3796 if (common->partialmatchlabel != NULL)
3797 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3798 else
3799 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3800 }
3801 JUMPHERE(jump);
3802 }
3803
detect_partial_match(compiler_common * common,jump_list ** backtracks)3804 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3805 {
3806 DEFINE_COMPILER;
3807 struct sljit_jump *jump;
3808
3809 if (common->mode == PCRE2_JIT_COMPLETE)
3810 {
3811 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3812 return;
3813 }
3814
3815 /* Partial matching mode. */
3816 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3817 if (!common->allow_empty_partial)
3818 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3819 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3820 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3821
3822 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3823 {
3824 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3825 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3826 }
3827 else
3828 {
3829 if (common->partialmatchlabel != NULL)
3830 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3831 else
3832 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3833 }
3834 JUMPHERE(jump);
3835 }
3836
process_partial_match(compiler_common * common)3837 static void process_partial_match(compiler_common *common)
3838 {
3839 DEFINE_COMPILER;
3840 struct sljit_jump *jump;
3841
3842 /* Partial matching mode. */
3843 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3844 {
3845 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3846 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3847 JUMPHERE(jump);
3848 }
3849 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3850 {
3851 if (common->partialmatchlabel != NULL)
3852 CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3853 else
3854 add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3855 }
3856 }
3857
detect_partial_match_to(compiler_common * common,struct sljit_label * label)3858 static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3859 {
3860 DEFINE_COMPILER;
3861
3862 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3863 process_partial_match(common);
3864 }
3865
peek_char(compiler_common * common,sljit_u32 max,sljit_s32 dst,sljit_sw dstw,jump_list ** backtracks)3866 static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3867 {
3868 /* Reads the character into TMP1, keeps STR_PTR.
3869 Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3870 DEFINE_COMPILER;
3871 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3872 struct sljit_jump *jump;
3873 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3874
3875 SLJIT_UNUSED_ARG(max);
3876 SLJIT_UNUSED_ARG(dst);
3877 SLJIT_UNUSED_ARG(dstw);
3878 SLJIT_UNUSED_ARG(backtracks);
3879
3880 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3881
3882 #ifdef SUPPORT_UNICODE
3883 #if PCRE2_CODE_UNIT_WIDTH == 8
3884 if (common->utf)
3885 {
3886 if (max < 128) return;
3887
3888 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3889 OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3890 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3891 add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3892 OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3893 if (backtracks && common->invalid_utf)
3894 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3895 JUMPHERE(jump);
3896 }
3897 #elif PCRE2_CODE_UNIT_WIDTH == 16
3898 if (common->utf)
3899 {
3900 if (max < 0xd800) return;
3901
3902 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3903
3904 if (common->invalid_utf)
3905 {
3906 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3907 OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3908 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3909 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3910 OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3911 if (backtracks && common->invalid_utf)
3912 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3913 }
3914 else
3915 {
3916 /* TMP2 contains the high surrogate. */
3917 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3918 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3919 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3920 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3921 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3922 }
3923
3924 JUMPHERE(jump);
3925 }
3926 #elif PCRE2_CODE_UNIT_WIDTH == 32
3927 if (common->invalid_utf)
3928 {
3929 if (max < 0xd800) return;
3930
3931 if (backtracks != NULL)
3932 {
3933 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3934 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3935 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3936 }
3937 else
3938 {
3939 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3940 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
3941 SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
3942 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3943 SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
3944 }
3945 }
3946 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3947 #endif /* SUPPORT_UNICODE */
3948 }
3949
peek_char_back(compiler_common * common,sljit_u32 max,jump_list ** backtracks)3950 static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3951 {
3952 /* Reads one character back without moving STR_PTR. TMP2 must
3953 contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3954 DEFINE_COMPILER;
3955
3956 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3957 struct sljit_jump *jump;
3958 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3959
3960 SLJIT_UNUSED_ARG(max);
3961 SLJIT_UNUSED_ARG(backtracks);
3962
3963 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3964
3965 #ifdef SUPPORT_UNICODE
3966 #if PCRE2_CODE_UNIT_WIDTH == 8
3967 if (common->utf)
3968 {
3969 if (max < 128) return;
3970
3971 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3972 if (common->invalid_utf)
3973 {
3974 add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3975 if (backtracks != NULL)
3976 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3977 }
3978 else
3979 add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3980 JUMPHERE(jump);
3981 }
3982 #elif PCRE2_CODE_UNIT_WIDTH == 16
3983 if (common->utf)
3984 {
3985 if (max < 0xd800) return;
3986
3987 if (common->invalid_utf)
3988 {
3989 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3990 add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3991 if (backtracks != NULL)
3992 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3993 }
3994 else
3995 {
3996 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3997 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3998 /* TMP2 contains the low surrogate. */
3999 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4000 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
4001 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4002 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
4003 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4004 }
4005 JUMPHERE(jump);
4006 }
4007 #elif PCRE2_CODE_UNIT_WIDTH == 32
4008 if (common->invalid_utf)
4009 {
4010 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4011 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4012 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4013 }
4014 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4015 #endif /* SUPPORT_UNICODE */
4016 }
4017
4018 #define READ_CHAR_UPDATE_STR_PTR 0x1
4019 #define READ_CHAR_UTF8_NEWLINE 0x2
4020 #define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
4021 #define READ_CHAR_VALID_UTF 0x4
4022
read_char(compiler_common * common,sljit_u32 min,sljit_u32 max,jump_list ** backtracks,sljit_u32 options)4023 static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
4024 jump_list **backtracks, sljit_u32 options)
4025 {
4026 /* Reads the precise value of a character into TMP1, if the character is
4027 between min and max (c >= min && c <= max). Otherwise it returns with a value
4028 outside the range. Does not check STR_END. */
4029 DEFINE_COMPILER;
4030 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4031 struct sljit_jump *jump;
4032 #endif
4033 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4034 struct sljit_jump *jump2;
4035 #endif
4036
4037 SLJIT_UNUSED_ARG(min);
4038 SLJIT_UNUSED_ARG(max);
4039 SLJIT_UNUSED_ARG(backtracks);
4040 SLJIT_UNUSED_ARG(options);
4041 SLJIT_ASSERT(min <= max);
4042
4043 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4044 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4045
4046 #ifdef SUPPORT_UNICODE
4047 #if PCRE2_CODE_UNIT_WIDTH == 8
4048 if (common->utf)
4049 {
4050 if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4051
4052 if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4053 {
4054 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4055
4056 if (options & READ_CHAR_UTF8_NEWLINE)
4057 add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4058 else
4059 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4060
4061 if (backtracks != NULL)
4062 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4063 JUMPHERE(jump);
4064 return;
4065 }
4066
4067 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4068 if (min >= 0x10000)
4069 {
4070 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4071 if (options & READ_CHAR_UPDATE_STR_PTR)
4072 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4073 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4074 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
4075 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4076 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4077 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4078 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4079 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4080 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4081 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4082 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4083 if (!(options & READ_CHAR_UPDATE_STR_PTR))
4084 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4085 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4086 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4087 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4088 JUMPHERE(jump2);
4089 if (options & READ_CHAR_UPDATE_STR_PTR)
4090 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4091 }
4092 else if (min >= 0x800 && max <= 0xffff)
4093 {
4094 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4095 if (options & READ_CHAR_UPDATE_STR_PTR)
4096 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4097 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4098 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
4099 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4100 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4101 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4102 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4103 if (!(options & READ_CHAR_UPDATE_STR_PTR))
4104 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4105 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4106 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4107 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4108 JUMPHERE(jump2);
4109 if (options & READ_CHAR_UPDATE_STR_PTR)
4110 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4111 }
4112 else if (max >= 0x800)
4113 {
4114 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
4115 }
4116 else if (max < 128)
4117 {
4118 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4119 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4120 }
4121 else
4122 {
4123 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4124 if (!(options & READ_CHAR_UPDATE_STR_PTR))
4125 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4126 else
4127 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4128 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4129 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4130 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4131 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4132 if (options & READ_CHAR_UPDATE_STR_PTR)
4133 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4134 }
4135 JUMPHERE(jump);
4136 }
4137 #elif PCRE2_CODE_UNIT_WIDTH == 16
4138 if (common->utf)
4139 {
4140 if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4141
4142 if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4143 {
4144 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4145 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4146
4147 if (options & READ_CHAR_UTF8_NEWLINE)
4148 add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4149 else
4150 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4151
4152 if (backtracks != NULL)
4153 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4154 JUMPHERE(jump);
4155 return;
4156 }
4157
4158 if (max >= 0x10000)
4159 {
4160 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4161 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4162 /* TMP2 contains the high surrogate. */
4163 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4164 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4165 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4166 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4167 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4168 JUMPHERE(jump);
4169 return;
4170 }
4171
4172 /* Skip low surrogate if necessary. */
4173 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4174
4175 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4176 {
4177 if (options & READ_CHAR_UPDATE_STR_PTR)
4178 OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4179 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4180 if (options & READ_CHAR_UPDATE_STR_PTR)
4181 SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4182 if (max >= 0xd800)
4183 SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000, TMP1);
4184 }
4185 else
4186 {
4187 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4188 if (options & READ_CHAR_UPDATE_STR_PTR)
4189 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4190 if (max >= 0xd800)
4191 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4192 JUMPHERE(jump);
4193 }
4194 }
4195 #elif PCRE2_CODE_UNIT_WIDTH == 32
4196 if (common->invalid_utf)
4197 {
4198 if (backtracks != NULL)
4199 {
4200 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4201 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4202 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4203 }
4204 else
4205 {
4206 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4207 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
4208 SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4209 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4210 SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4211 }
4212 }
4213 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4214 #endif /* SUPPORT_UNICODE */
4215 }
4216
skip_valid_char(compiler_common * common)4217 static void skip_valid_char(compiler_common *common)
4218 {
4219 DEFINE_COMPILER;
4220 #if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4221 struct sljit_jump *jump;
4222 #endif
4223
4224 #if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4225 if (common->utf)
4226 {
4227 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4228 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4229 #if PCRE2_CODE_UNIT_WIDTH == 8
4230 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4231 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4232 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4233 #elif PCRE2_CODE_UNIT_WIDTH == 16
4234 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4235 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4236 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);
4237 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4238 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4239 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4240 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4241 JUMPHERE(jump);
4242 return;
4243 }
4244 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
4245 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4246 }
4247
4248 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4249
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)4250 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4251 {
4252 /* Tells whether the character codes below 128 are enough
4253 to determine a match. */
4254 const sljit_u8 value = nclass ? 0xff : 0;
4255 const sljit_u8 *end = bitset + 32;
4256
4257 bitset += 16;
4258 do
4259 {
4260 if (*bitset++ != value)
4261 return FALSE;
4262 }
4263 while (bitset < end);
4264 return TRUE;
4265 }
4266
read_char7_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4267 static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4268 {
4269 /* Reads the precise character type of a character into TMP1, if the character
4270 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4271 full_read argument tells whether characters above max are accepted or not. */
4272 DEFINE_COMPILER;
4273 struct sljit_jump *jump;
4274
4275 SLJIT_ASSERT(common->utf);
4276
4277 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4278 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4279
4280 /* All values > 127 are zero in ctypes. */
4281 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4282
4283 if (negated)
4284 {
4285 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4286
4287 if (common->invalid_utf)
4288 {
4289 OP1(SLJIT_MOV, TMP1, 0, TMP2, 0);
4290 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4291 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4292 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4293 }
4294 else
4295 {
4296 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4297 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4298 }
4299 JUMPHERE(jump);
4300 }
4301 }
4302
4303 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4304
read_char8_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4305 static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4306 {
4307 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4308 DEFINE_COMPILER;
4309 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4310 struct sljit_jump *jump;
4311 #endif
4312 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4313 struct sljit_jump *jump2;
4314 #endif
4315
4316 SLJIT_UNUSED_ARG(backtracks);
4317 SLJIT_UNUSED_ARG(negated);
4318
4319 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4320 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4321
4322 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4323 if (common->utf)
4324 {
4325 /* The result of this read may be unused, but saves an "else" part. */
4326 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4327 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4328
4329 if (!negated)
4330 {
4331 if (common->invalid_utf)
4332 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4333
4334 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4335 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4336 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4337 if (common->invalid_utf)
4338 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4339
4340 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4341 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4342 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4343 if (common->invalid_utf)
4344 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4345
4346 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4347 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4348 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4349 JUMPHERE(jump2);
4350 }
4351 else if (common->invalid_utf)
4352 {
4353 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4354 OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4355 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4356
4357 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4358 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4359 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4360 JUMPHERE(jump2);
4361 }
4362 else
4363 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4364
4365 JUMPHERE(jump);
4366 return;
4367 }
4368 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4369
4370 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4371 if (common->invalid_utf && negated)
4372 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4373 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4374
4375 #if PCRE2_CODE_UNIT_WIDTH != 8
4376 /* The ctypes array contains only 256 values. */
4377 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4378 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4379 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4380 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4381 #if PCRE2_CODE_UNIT_WIDTH != 8
4382 JUMPHERE(jump);
4383 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4384
4385 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4386 if (common->utf && negated)
4387 {
4388 /* Skip low surrogate if necessary. */
4389 if (!common->invalid_utf)
4390 {
4391 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4392
4393 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4394 {
4395 OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4396 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4397 SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4398 }
4399 else
4400 {
4401 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4402 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4403 JUMPHERE(jump);
4404 }
4405 return;
4406 }
4407
4408 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4409 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4410 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4411 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4412
4413 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4414 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4415 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4416 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4417
4418 JUMPHERE(jump);
4419 return;
4420 }
4421 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4422 }
4423
move_back(compiler_common * common,jump_list ** backtracks,BOOL must_be_valid)4424 static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4425 {
4426 /* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4427 TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4428 and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4429 DEFINE_COMPILER;
4430
4431 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4432 struct sljit_jump *jump;
4433 #endif
4434
4435 #ifdef SUPPORT_UNICODE
4436 #if PCRE2_CODE_UNIT_WIDTH == 8
4437 struct sljit_label *label;
4438
4439 if (common->utf)
4440 {
4441 if (!must_be_valid && common->invalid_utf)
4442 {
4443 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4444 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4445 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4446 add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4447 if (backtracks != NULL)
4448 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4449 JUMPHERE(jump);
4450 return;
4451 }
4452
4453 label = LABEL();
4454 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4455 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4456 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4457 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4458 return;
4459 }
4460 #elif PCRE2_CODE_UNIT_WIDTH == 16
4461 if (common->utf)
4462 {
4463 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4464 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4465
4466 if (!must_be_valid && common->invalid_utf)
4467 {
4468 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4469 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4470 add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4471 if (backtracks != NULL)
4472 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4473 JUMPHERE(jump);
4474 return;
4475 }
4476
4477 /* Skip low surrogate if necessary. */
4478 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4479 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00);
4480 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4481 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4482 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4483 return;
4484 }
4485 #elif PCRE2_CODE_UNIT_WIDTH == 32
4486 if (common->invalid_utf && !must_be_valid)
4487 {
4488 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4489 if (backtracks != NULL)
4490 {
4491 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4492 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4493 return;
4494 }
4495
4496 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000);
4497 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4498 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4499 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4500 return;
4501 }
4502 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4503 #endif /* SUPPORT_UNICODE */
4504
4505 SLJIT_UNUSED_ARG(backtracks);
4506 SLJIT_UNUSED_ARG(must_be_valid);
4507
4508 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4509 }
4510
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)4511 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4512 {
4513 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4514 DEFINE_COMPILER;
4515 struct sljit_jump *jump;
4516
4517 if (nltype == NLTYPE_ANY)
4518 {
4519 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4520 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4521 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4522 }
4523 else if (nltype == NLTYPE_ANYCRLF)
4524 {
4525 if (jumpifmatch)
4526 {
4527 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4528 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4529 }
4530 else
4531 {
4532 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4533 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4534 JUMPHERE(jump);
4535 }
4536 }
4537 else
4538 {
4539 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4540 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4541 }
4542 }
4543
4544 #ifdef SUPPORT_UNICODE
4545
4546 #if PCRE2_CODE_UNIT_WIDTH == 8
do_utfreadchar(compiler_common * common)4547 static void do_utfreadchar(compiler_common *common)
4548 {
4549 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
4550 of the character (>= 0xc0). Return char value in TMP1. */
4551 DEFINE_COMPILER;
4552 struct sljit_jump *jump;
4553
4554 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4555 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4556 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4557 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4558 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4559
4560 /* Searching for the first zero. */
4561 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4562 jump = JUMP(SLJIT_NOT_ZERO);
4563 /* Two byte sequence. */
4564 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4565 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4566 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4567
4568 JUMPHERE(jump);
4569 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4570 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4571 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4572 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4573
4574 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4575 jump = JUMP(SLJIT_NOT_ZERO);
4576 /* Three byte sequence. */
4577 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4578 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4579 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4580
4581 /* Four byte sequence. */
4582 JUMPHERE(jump);
4583 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4584 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4585 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4586 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4587 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4588 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4589 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4590 }
4591
do_utfreadtype8(compiler_common * common)4592 static void do_utfreadtype8(compiler_common *common)
4593 {
4594 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4595 of the character (>= 0xc0). Return value in TMP1. */
4596 DEFINE_COMPILER;
4597 struct sljit_jump *jump;
4598 struct sljit_jump *compare;
4599
4600 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4601
4602 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20);
4603 jump = JUMP(SLJIT_NOT_ZERO);
4604 /* Two byte sequence. */
4605 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4606 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4607 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4608 /* The upper 5 bits are known at this point. */
4609 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4610 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4611 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4612 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4613 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4614 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4615
4616 JUMPHERE(compare);
4617 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4618 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4619
4620 /* We only have types for characters less than 256. */
4621 JUMPHERE(jump);
4622 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4623 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4624 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4625 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4626 }
4627
do_utfreadchar_invalid(compiler_common * common)4628 static void do_utfreadchar_invalid(compiler_common *common)
4629 {
4630 /* Slow decoding a UTF-8 character. TMP1 contains the first byte
4631 of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4632 undefined for invalid characters. */
4633 DEFINE_COMPILER;
4634 sljit_s32 i;
4635 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4636 struct sljit_jump *jump;
4637 struct sljit_jump *buffer_end_close;
4638 struct sljit_label *three_byte_entry;
4639 struct sljit_label *exit_invalid_label;
4640 struct sljit_jump *exit_invalid[11];
4641
4642 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4643
4644 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4645
4646 /* Usually more than 3 characters remained in the subject buffer. */
4647 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4648
4649 /* Not a valid start of a multi-byte sequence, no more bytes read. */
4650 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4651
4652 buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4653
4654 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4655 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4656 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4657 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4658 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4659 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4660
4661 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4662 jump = JUMP(SLJIT_NOT_ZERO);
4663
4664 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4665 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4666
4667 JUMPHERE(jump);
4668
4669 /* Three-byte sequence. */
4670 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4671 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4672 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4673 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4674 if (has_cmov)
4675 {
4676 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4677 SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000, TMP1);
4678 exit_invalid[2] = NULL;
4679 }
4680 else
4681 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4682
4683 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4684 jump = JUMP(SLJIT_NOT_ZERO);
4685
4686 three_byte_entry = LABEL();
4687
4688 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4689 if (has_cmov)
4690 {
4691 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4692 SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800, TMP1);
4693 exit_invalid[3] = NULL;
4694 }
4695 else
4696 exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4697 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4698 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4699
4700 if (has_cmov)
4701 {
4702 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4703 SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4704 exit_invalid[4] = NULL;
4705 }
4706 else
4707 exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4708 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4709
4710 JUMPHERE(jump);
4711
4712 /* Four-byte sequence. */
4713 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4714 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4715 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4716 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4717 if (has_cmov)
4718 {
4719 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4720 SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0, TMP1);
4721 exit_invalid[5] = NULL;
4722 }
4723 else
4724 exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4725
4726 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4727 if (has_cmov)
4728 {
4729 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4730 SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
4731 exit_invalid[6] = NULL;
4732 }
4733 else
4734 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4735
4736 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4737 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4738
4739 JUMPHERE(buffer_end_close);
4740 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4741 exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4742
4743 /* Two-byte sequence. */
4744 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4745 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4746 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4747 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4748 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4749 exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4750
4751 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4752 jump = JUMP(SLJIT_NOT_ZERO);
4753
4754 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4755
4756 /* Three-byte sequence. */
4757 JUMPHERE(jump);
4758 exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4759
4760 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4761 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4762 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4763 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4764 if (has_cmov)
4765 {
4766 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4767 SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4768 exit_invalid[10] = NULL;
4769 }
4770 else
4771 exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4772
4773 /* One will be substracted from STR_PTR later. */
4774 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4775
4776 /* Four byte sequences are not possible. */
4777 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4778
4779 exit_invalid_label = LABEL();
4780 for (i = 0; i < 11; i++)
4781 sljit_set_label(exit_invalid[i], exit_invalid_label);
4782
4783 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4784 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4785 }
4786
do_utfreadnewline_invalid(compiler_common * common)4787 static void do_utfreadnewline_invalid(compiler_common *common)
4788 {
4789 /* Slow decoding a UTF-8 character, specialized for newlines.
4790 TMP1 contains the first byte of the character (>= 0xc0). Return
4791 char value in TMP1. */
4792 DEFINE_COMPILER;
4793 struct sljit_label *loop;
4794 struct sljit_label *skip_start;
4795 struct sljit_label *three_byte_exit;
4796 struct sljit_jump *jump[5];
4797
4798 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4799
4800 if (common->nltype != NLTYPE_ANY)
4801 {
4802 SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4803
4804 /* All newlines are ascii, just skip intermediate octets. */
4805 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4806 loop = LABEL();
4807 if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4808 sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4809 else
4810 {
4811 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4812 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4813 }
4814
4815 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4816 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4817 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4818
4819 JUMPHERE(jump[0]);
4820
4821 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4822 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4823 return;
4824 }
4825
4826 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4827 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4828 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4829
4830 jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4831 jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4832
4833 skip_start = LABEL();
4834 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4835 jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4836
4837 /* Skip intermediate octets. */
4838 loop = LABEL();
4839 jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4840 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4841 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4842 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4843 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4844
4845 JUMPHERE(jump[3]);
4846 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4847
4848 three_byte_exit = LABEL();
4849 JUMPHERE(jump[0]);
4850 JUMPHERE(jump[4]);
4851
4852 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4853 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4854
4855 /* Two byte long newline: 0x85. */
4856 JUMPHERE(jump[1]);
4857 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4858
4859 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4860 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4861
4862 /* Three byte long newlines: 0x2028 and 0x2029. */
4863 JUMPHERE(jump[2]);
4864 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4865 CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4866
4867 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4868 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4869
4870 OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4871 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4872
4873 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4874 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4875 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4876 }
4877
do_utfmoveback_invalid(compiler_common * common)4878 static void do_utfmoveback_invalid(compiler_common *common)
4879 {
4880 /* Goes one character back. */
4881 DEFINE_COMPILER;
4882 sljit_s32 i;
4883 struct sljit_jump *jump;
4884 struct sljit_jump *buffer_start_close;
4885 struct sljit_label *exit_ok_label;
4886 struct sljit_label *exit_invalid_label;
4887 struct sljit_jump *exit_invalid[7];
4888
4889 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4890
4891 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4892 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4893
4894 /* Two-byte sequence. */
4895 buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4896
4897 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4898
4899 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4900 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4901
4902 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4903 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4904 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4905
4906 /* Three-byte sequence. */
4907 JUMPHERE(jump);
4908 exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4909
4910 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4911
4912 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4913 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4914
4915 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4916 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4917 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4918
4919 /* Four-byte sequence. */
4920 JUMPHERE(jump);
4921 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4922 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4923
4924 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4925 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4926 exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4927
4928 exit_ok_label = LABEL();
4929 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4930 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4931
4932 /* Two-byte sequence. */
4933 JUMPHERE(buffer_start_close);
4934 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4935
4936 exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4937
4938 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4939
4940 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4941 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4942
4943 /* Three-byte sequence. */
4944 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4945 exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4946 exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4947
4948 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4949
4950 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4951 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4952
4953 /* Four-byte sequences are not possible. */
4954
4955 exit_invalid_label = LABEL();
4956 sljit_set_label(exit_invalid[5], exit_invalid_label);
4957 sljit_set_label(exit_invalid[6], exit_invalid_label);
4958 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4959 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4960 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4961
4962 JUMPHERE(exit_invalid[4]);
4963 /* -2 + 4 = 2 */
4964 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4965
4966 exit_invalid_label = LABEL();
4967 for (i = 0; i < 4; i++)
4968 sljit_set_label(exit_invalid[i], exit_invalid_label);
4969 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4970 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4971 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4972 }
4973
do_utfpeakcharback(compiler_common * common)4974 static void do_utfpeakcharback(compiler_common *common)
4975 {
4976 /* Peak a character back. Does not modify STR_PTR. */
4977 DEFINE_COMPILER;
4978 struct sljit_jump *jump[2];
4979
4980 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4981
4982 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4983 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4984 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4985
4986 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4987 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4988 jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4989
4990 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4991 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4992 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4993 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4994 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4995
4996 JUMPHERE(jump[1]);
4997 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4998 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4999 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5000 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5001
5002 JUMPHERE(jump[0]);
5003 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5004 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
5005 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5006 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5007
5008 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5009 }
5010
do_utfpeakcharback_invalid(compiler_common * common)5011 static void do_utfpeakcharback_invalid(compiler_common *common)
5012 {
5013 /* Peak a character back. Does not modify STR_PTR. */
5014 DEFINE_COMPILER;
5015 sljit_s32 i;
5016 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
5017 struct sljit_jump *jump[2];
5018 struct sljit_label *two_byte_entry;
5019 struct sljit_label *three_byte_entry;
5020 struct sljit_label *exit_invalid_label;
5021 struct sljit_jump *exit_invalid[8];
5022
5023 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5024
5025 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
5026 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
5027 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5028
5029 /* Two-byte sequence. */
5030 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5031 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5032 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
5033
5034 two_byte_entry = LABEL();
5035 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5036 /* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
5037 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5038 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5039
5040 JUMPHERE(jump[1]);
5041 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5042 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5043 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5044 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5045 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5046
5047 /* Three-byte sequence. */
5048 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5049 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5050 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
5051
5052 three_byte_entry = LABEL();
5053 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5054 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5055
5056 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5057 if (has_cmov)
5058 {
5059 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5060 SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800, TMP1);
5061 exit_invalid[2] = NULL;
5062 }
5063 else
5064 exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5065
5066 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5067 if (has_cmov)
5068 {
5069 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5070 SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
5071 exit_invalid[3] = NULL;
5072 }
5073 else
5074 exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5075
5076 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5077
5078 JUMPHERE(jump[1]);
5079 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
5080 exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5081 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5082 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5083
5084 /* Four-byte sequence. */
5085 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
5086 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5087 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
5088 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
5089 /* ADD is used instead of OR because of the SUB 0x10000 above. */
5090 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5091
5092 if (has_cmov)
5093 {
5094 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5095 SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
5096 exit_invalid[5] = NULL;
5097 }
5098 else
5099 exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5100
5101 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5102 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5103
5104 JUMPHERE(jump[0]);
5105 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5106 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5107
5108 /* Two-byte sequence. */
5109 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5110 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5111 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5112
5113 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5114 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5115 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5116 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5117 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5118
5119 /* Three-byte sequence. */
5120 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5121 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5122 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
5123
5124 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5125 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5126
5127 JUMPHERE(jump[0]);
5128 exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
5129
5130 /* Two-byte sequence. */
5131 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5132 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5133 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5134
5135 exit_invalid_label = LABEL();
5136 for (i = 0; i < 8; i++)
5137 sljit_set_label(exit_invalid[i], exit_invalid_label);
5138
5139 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5140 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5141 }
5142
5143 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
5144
5145 #if PCRE2_CODE_UNIT_WIDTH == 16
5146
do_utfreadchar_invalid(compiler_common * common)5147 static void do_utfreadchar_invalid(compiler_common *common)
5148 {
5149 /* Slow decoding a UTF-16 character. TMP1 contains the first half
5150 of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
5151 undefined for invalid characters. */
5152 DEFINE_COMPILER;
5153 struct sljit_jump *exit_invalid[3];
5154
5155 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5156
5157 /* TMP2 contains the high surrogate. */
5158 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5159 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5160
5161 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5162 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5163 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5164
5165 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5166 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
5167 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5168
5169 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5170 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5171
5172 JUMPHERE(exit_invalid[0]);
5173 JUMPHERE(exit_invalid[1]);
5174 JUMPHERE(exit_invalid[2]);
5175 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5176 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5177 }
5178
do_utfreadnewline_invalid(compiler_common * common)5179 static void do_utfreadnewline_invalid(compiler_common *common)
5180 {
5181 /* Slow decoding a UTF-16 character, specialized for newlines.
5182 TMP1 contains the first half of the character (>= 0xd800). Return
5183 char value in TMP1. */
5184
5185 DEFINE_COMPILER;
5186 struct sljit_jump *exit_invalid[2];
5187
5188 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5189
5190 /* TMP2 contains the high surrogate. */
5191 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5192
5193 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5194 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5195
5196 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
5197 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
5198 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
5199 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
5200 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
5201 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5202
5203 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5204
5205 JUMPHERE(exit_invalid[0]);
5206 JUMPHERE(exit_invalid[1]);
5207 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5208 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5209 }
5210
do_utfmoveback_invalid(compiler_common * common)5211 static void do_utfmoveback_invalid(compiler_common *common)
5212 {
5213 /* Goes one character back. */
5214 DEFINE_COMPILER;
5215 struct sljit_jump *exit_invalid[3];
5216
5217 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5218
5219 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5220 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5221
5222 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5223 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5224 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5225
5226 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5227 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5228 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5229
5230 JUMPHERE(exit_invalid[0]);
5231 JUMPHERE(exit_invalid[1]);
5232 JUMPHERE(exit_invalid[2]);
5233
5234 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5235 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5236 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5237 }
5238
do_utfpeakcharback_invalid(compiler_common * common)5239 static void do_utfpeakcharback_invalid(compiler_common *common)
5240 {
5241 /* Peak a character back. Does not modify STR_PTR. */
5242 DEFINE_COMPILER;
5243 struct sljit_jump *jump;
5244 struct sljit_jump *exit_invalid[3];
5245
5246 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5247
5248 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5249 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5250 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5251 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5252
5253 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5254 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5255 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5256 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5257 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5258 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5259
5260 JUMPHERE(jump);
5261 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5262
5263 JUMPHERE(exit_invalid[0]);
5264 JUMPHERE(exit_invalid[1]);
5265 JUMPHERE(exit_invalid[2]);
5266
5267 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5268 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5269 }
5270
5271 #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5272
5273 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5274 #define UCD_BLOCK_MASK 127
5275 #define UCD_BLOCK_SHIFT 7
5276
do_getucd(compiler_common * common)5277 static void do_getucd(compiler_common *common)
5278 {
5279 /* Search the UCD record for the character comes in TMP1.
5280 Returns chartype in TMP1 and UCD offset in TMP2. */
5281 DEFINE_COMPILER;
5282 #if PCRE2_CODE_UNIT_WIDTH == 32
5283 struct sljit_jump *jump;
5284 #endif
5285
5286 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5287 /* dummy_ucd_record */
5288 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5289 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5290 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5291 #endif
5292
5293 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5294
5295 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5296
5297 #if PCRE2_CODE_UNIT_WIDTH == 32
5298 if (!common->utf)
5299 {
5300 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5301 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5302 JUMPHERE(jump);
5303 }
5304 #endif
5305
5306 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5307 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5308 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5309 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5310 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5311 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5312 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5313 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5314 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5315 }
5316
do_getucdtype(compiler_common * common)5317 static void do_getucdtype(compiler_common *common)
5318 {
5319 /* Search the UCD record for the character comes in TMP1.
5320 Returns chartype in TMP1 and UCD offset in TMP2. */
5321 DEFINE_COMPILER;
5322 #if PCRE2_CODE_UNIT_WIDTH == 32
5323 struct sljit_jump *jump;
5324 #endif
5325
5326 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5327 /* dummy_ucd_record */
5328 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5329 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5330 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5331 #endif
5332
5333 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5334
5335 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5336
5337 #if PCRE2_CODE_UNIT_WIDTH == 32
5338 if (!common->utf)
5339 {
5340 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5341 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5342 JUMPHERE(jump);
5343 }
5344 #endif
5345
5346 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5347 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5348 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5349 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5350 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5351 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5352 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5353 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5354
5355 /* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5356 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5357 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5358 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5359 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5360
5361 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5362 }
5363
5364 #endif /* SUPPORT_UNICODE */
5365
mainloop_entry(compiler_common * common)5366 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5367 {
5368 DEFINE_COMPILER;
5369 struct sljit_label *mainloop;
5370 struct sljit_label *newlinelabel = NULL;
5371 struct sljit_jump *start;
5372 struct sljit_jump *end = NULL;
5373 struct sljit_jump *end2 = NULL;
5374 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5375 struct sljit_label *loop;
5376 struct sljit_jump *jump;
5377 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5378 jump_list *newline = NULL;
5379 sljit_u32 overall_options = common->re->overall_options;
5380 BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5381 BOOL newlinecheck = FALSE;
5382 BOOL readuchar = FALSE;
5383
5384 if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5385 && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5386 newlinecheck = TRUE;
5387
5388 SLJIT_ASSERT(common->abort_label == NULL);
5389
5390 if ((overall_options & PCRE2_FIRSTLINE) != 0)
5391 {
5392 /* Search for the end of the first line. */
5393 SLJIT_ASSERT(common->match_end_ptr != 0);
5394 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5395
5396 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5397 {
5398 mainloop = LABEL();
5399 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5400 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5401 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5402 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5403 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5404 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5405 JUMPHERE(end);
5406 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5407 }
5408 else
5409 {
5410 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5411 mainloop = LABEL();
5412 /* Continual stores does not cause data dependency. */
5413 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5414 read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5415 check_newlinechar(common, common->nltype, &newline, TRUE);
5416 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5417 JUMPHERE(end);
5418 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5419 set_jumps(newline, LABEL());
5420 }
5421
5422 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5423 }
5424 else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5425 {
5426 /* Check whether offset limit is set and valid. */
5427 SLJIT_ASSERT(common->match_end_ptr != 0);
5428
5429 if (HAS_VIRTUAL_REGISTERS)
5430 {
5431 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5432 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5433 }
5434 else
5435 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5436
5437 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5438 end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5439 if (HAS_VIRTUAL_REGISTERS)
5440 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5441 else
5442 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5443
5444 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5445 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5446 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5447 if (HAS_VIRTUAL_REGISTERS)
5448 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5449
5450 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5451 end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5452 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5453 JUMPHERE(end2);
5454 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5455 add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5456 JUMPHERE(end);
5457 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5458 }
5459
5460 start = JUMP(SLJIT_JUMP);
5461
5462 if (newlinecheck)
5463 {
5464 newlinelabel = LABEL();
5465 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5466 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5467 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5468 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5469 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5470 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5471 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5472 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5473 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5474 end2 = JUMP(SLJIT_JUMP);
5475 }
5476
5477 mainloop = LABEL();
5478
5479 /* Increasing the STR_PTR here requires one less jump in the most common case. */
5480 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5481 if (common->utf && !common->invalid_utf) readuchar = TRUE;
5482 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5483 if (newlinecheck) readuchar = TRUE;
5484
5485 if (readuchar)
5486 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5487
5488 if (newlinecheck)
5489 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5490
5491 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5492 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5493 #if PCRE2_CODE_UNIT_WIDTH == 8
5494 if (common->invalid_utf)
5495 {
5496 /* Skip continuation code units. */
5497 loop = LABEL();
5498 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5499 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5500 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5501 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5502 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5503 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5504 JUMPHERE(jump);
5505 }
5506 else if (common->utf)
5507 {
5508 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5509 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5510 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5511 JUMPHERE(jump);
5512 }
5513 #elif PCRE2_CODE_UNIT_WIDTH == 16
5514 if (common->invalid_utf)
5515 {
5516 /* Skip continuation code units. */
5517 loop = LABEL();
5518 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5519 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5520 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5521 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5522 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5523 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5524 JUMPHERE(jump);
5525 }
5526 else if (common->utf)
5527 {
5528 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5529
5530 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5531 {
5532 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5533 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5534 SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
5535 }
5536 else
5537 {
5538 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5539 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5540 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5541 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5542 }
5543 }
5544 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5545 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5546 JUMPHERE(start);
5547
5548 if (newlinecheck)
5549 {
5550 JUMPHERE(end);
5551 JUMPHERE(end2);
5552 }
5553
5554 return mainloop;
5555 }
5556
5557
add_prefix_char(PCRE2_UCHAR chr,fast_forward_char_data * chars,BOOL last)5558 static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5559 {
5560 sljit_u32 i, count = chars->count;
5561
5562 if (count == 255)
5563 return;
5564
5565 if (count == 0)
5566 {
5567 chars->count = 1;
5568 chars->chars[0] = chr;
5569
5570 if (last)
5571 chars->last_count = 1;
5572 return;
5573 }
5574
5575 for (i = 0; i < count; i++)
5576 if (chars->chars[i] == chr)
5577 return;
5578
5579 if (count >= MAX_DIFF_CHARS)
5580 {
5581 chars->count = 255;
5582 return;
5583 }
5584
5585 chars->chars[count] = chr;
5586 chars->count = count + 1;
5587
5588 if (last)
5589 chars->last_count++;
5590 }
5591
scan_prefix(compiler_common * common,PCRE2_SPTR cc,fast_forward_char_data * chars,int max_chars,sljit_u32 * rec_count)5592 static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5593 {
5594 /* Recursive function, which scans prefix literals. */
5595 BOOL last, any, class, caseless;
5596 int len, repeat, len_save, consumed = 0;
5597 sljit_u32 chr; /* Any unicode character. */
5598 sljit_u8 *bytes, *bytes_end, byte;
5599 PCRE2_SPTR alternative, cc_save, oc;
5600 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5601 PCRE2_UCHAR othercase[4];
5602 #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5603 PCRE2_UCHAR othercase[2];
5604 #else
5605 PCRE2_UCHAR othercase[1];
5606 #endif
5607
5608 repeat = 1;
5609 while (TRUE)
5610 {
5611 if (*rec_count == 0)
5612 return 0;
5613 (*rec_count)--;
5614
5615 last = TRUE;
5616 any = FALSE;
5617 class = FALSE;
5618 caseless = FALSE;
5619
5620 switch (*cc)
5621 {
5622 case OP_CHARI:
5623 caseless = TRUE;
5624 /* Fall through */
5625 case OP_CHAR:
5626 last = FALSE;
5627 cc++;
5628 break;
5629
5630 case OP_SOD:
5631 case OP_SOM:
5632 case OP_SET_SOM:
5633 case OP_NOT_WORD_BOUNDARY:
5634 case OP_WORD_BOUNDARY:
5635 case OP_EODN:
5636 case OP_EOD:
5637 case OP_CIRC:
5638 case OP_CIRCM:
5639 case OP_DOLL:
5640 case OP_DOLLM:
5641 case OP_NOT_UCP_WORD_BOUNDARY:
5642 case OP_UCP_WORD_BOUNDARY:
5643 /* Zero width assertions. */
5644 cc++;
5645 continue;
5646
5647 case OP_ASSERT:
5648 case OP_ASSERT_NOT:
5649 case OP_ASSERTBACK:
5650 case OP_ASSERTBACK_NOT:
5651 case OP_ASSERT_NA:
5652 case OP_ASSERTBACK_NA:
5653 cc = bracketend(cc);
5654 continue;
5655
5656 case OP_PLUSI:
5657 case OP_MINPLUSI:
5658 case OP_POSPLUSI:
5659 caseless = TRUE;
5660 /* Fall through */
5661 case OP_PLUS:
5662 case OP_MINPLUS:
5663 case OP_POSPLUS:
5664 cc++;
5665 break;
5666
5667 case OP_EXACTI:
5668 caseless = TRUE;
5669 /* Fall through */
5670 case OP_EXACT:
5671 repeat = GET2(cc, 1);
5672 last = FALSE;
5673 cc += 1 + IMM2_SIZE;
5674 break;
5675
5676 case OP_QUERYI:
5677 case OP_MINQUERYI:
5678 case OP_POSQUERYI:
5679 caseless = TRUE;
5680 /* Fall through */
5681 case OP_QUERY:
5682 case OP_MINQUERY:
5683 case OP_POSQUERY:
5684 len = 1;
5685 cc++;
5686 #ifdef SUPPORT_UNICODE
5687 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5688 #endif
5689 max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5690 if (max_chars == 0)
5691 return consumed;
5692 last = FALSE;
5693 break;
5694
5695 case OP_KET:
5696 cc += 1 + LINK_SIZE;
5697 continue;
5698
5699 case OP_ALT:
5700 cc += GET(cc, 1);
5701 continue;
5702
5703 case OP_ONCE:
5704 case OP_BRA:
5705 case OP_BRAPOS:
5706 case OP_CBRA:
5707 case OP_CBRAPOS:
5708 alternative = cc + GET(cc, 1);
5709 while (*alternative == OP_ALT)
5710 {
5711 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5712 if (max_chars == 0)
5713 return consumed;
5714 alternative += GET(alternative, 1);
5715 }
5716
5717 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5718 cc += IMM2_SIZE;
5719 cc += 1 + LINK_SIZE;
5720 continue;
5721
5722 case OP_CLASS:
5723 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5724 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5725 return consumed;
5726 #endif
5727 class = TRUE;
5728 break;
5729
5730 case OP_NCLASS:
5731 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5732 if (common->utf) return consumed;
5733 #endif
5734 class = TRUE;
5735 break;
5736
5737 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5738 case OP_XCLASS:
5739 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5740 if (common->utf) return consumed;
5741 #endif
5742 any = TRUE;
5743 cc += GET(cc, 1);
5744 break;
5745 #endif
5746
5747 case OP_DIGIT:
5748 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5749 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5750 return consumed;
5751 #endif
5752 any = TRUE;
5753 cc++;
5754 break;
5755
5756 case OP_WHITESPACE:
5757 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5758 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5759 return consumed;
5760 #endif
5761 any = TRUE;
5762 cc++;
5763 break;
5764
5765 case OP_WORDCHAR:
5766 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5767 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5768 return consumed;
5769 #endif
5770 any = TRUE;
5771 cc++;
5772 break;
5773
5774 case OP_NOT:
5775 case OP_NOTI:
5776 cc++;
5777 /* Fall through. */
5778 case OP_NOT_DIGIT:
5779 case OP_NOT_WHITESPACE:
5780 case OP_NOT_WORDCHAR:
5781 case OP_ANY:
5782 case OP_ALLANY:
5783 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5784 if (common->utf) return consumed;
5785 #endif
5786 any = TRUE;
5787 cc++;
5788 break;
5789
5790 #ifdef SUPPORT_UNICODE
5791 case OP_NOTPROP:
5792 case OP_PROP:
5793 #if PCRE2_CODE_UNIT_WIDTH != 32
5794 if (common->utf) return consumed;
5795 #endif
5796 any = TRUE;
5797 cc += 1 + 2;
5798 break;
5799 #endif
5800
5801 case OP_TYPEEXACT:
5802 repeat = GET2(cc, 1);
5803 cc += 1 + IMM2_SIZE;
5804 continue;
5805
5806 case OP_NOTEXACT:
5807 case OP_NOTEXACTI:
5808 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5809 if (common->utf) return consumed;
5810 #endif
5811 any = TRUE;
5812 repeat = GET2(cc, 1);
5813 cc += 1 + IMM2_SIZE + 1;
5814 break;
5815
5816 default:
5817 return consumed;
5818 }
5819
5820 if (any)
5821 {
5822 do
5823 {
5824 chars->count = 255;
5825
5826 consumed++;
5827 if (--max_chars == 0)
5828 return consumed;
5829 chars++;
5830 }
5831 while (--repeat > 0);
5832
5833 repeat = 1;
5834 continue;
5835 }
5836
5837 if (class)
5838 {
5839 bytes = (sljit_u8*) (cc + 1);
5840 cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5841
5842 switch (*cc)
5843 {
5844 case OP_CRSTAR:
5845 case OP_CRMINSTAR:
5846 case OP_CRPOSSTAR:
5847 case OP_CRQUERY:
5848 case OP_CRMINQUERY:
5849 case OP_CRPOSQUERY:
5850 max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5851 if (max_chars == 0)
5852 return consumed;
5853 break;
5854
5855 default:
5856 case OP_CRPLUS:
5857 case OP_CRMINPLUS:
5858 case OP_CRPOSPLUS:
5859 break;
5860
5861 case OP_CRRANGE:
5862 case OP_CRMINRANGE:
5863 case OP_CRPOSRANGE:
5864 repeat = GET2(cc, 1);
5865 if (repeat <= 0)
5866 return consumed;
5867 break;
5868 }
5869
5870 do
5871 {
5872 if (bytes[31] & 0x80)
5873 chars->count = 255;
5874 else if (chars->count != 255)
5875 {
5876 bytes_end = bytes + 32;
5877 chr = 0;
5878 do
5879 {
5880 byte = *bytes++;
5881 SLJIT_ASSERT((chr & 0x7) == 0);
5882 if (byte == 0)
5883 chr += 8;
5884 else
5885 {
5886 do
5887 {
5888 if ((byte & 0x1) != 0)
5889 add_prefix_char(chr, chars, TRUE);
5890 byte >>= 1;
5891 chr++;
5892 }
5893 while (byte != 0);
5894 chr = (chr + 7) & (sljit_u32)(~7);
5895 }
5896 }
5897 while (chars->count != 255 && bytes < bytes_end);
5898 bytes = bytes_end - 32;
5899 }
5900
5901 consumed++;
5902 if (--max_chars == 0)
5903 return consumed;
5904 chars++;
5905 }
5906 while (--repeat > 0);
5907
5908 switch (*cc)
5909 {
5910 case OP_CRSTAR:
5911 case OP_CRMINSTAR:
5912 case OP_CRPOSSTAR:
5913 return consumed;
5914
5915 case OP_CRQUERY:
5916 case OP_CRMINQUERY:
5917 case OP_CRPOSQUERY:
5918 cc++;
5919 break;
5920
5921 case OP_CRRANGE:
5922 case OP_CRMINRANGE:
5923 case OP_CRPOSRANGE:
5924 if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5925 return consumed;
5926 cc += 1 + 2 * IMM2_SIZE;
5927 break;
5928 }
5929
5930 repeat = 1;
5931 continue;
5932 }
5933
5934 len = 1;
5935 #ifdef SUPPORT_UNICODE
5936 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5937 #endif
5938
5939 if (caseless && char_has_othercase(common, cc))
5940 {
5941 #ifdef SUPPORT_UNICODE
5942 if (common->utf)
5943 {
5944 GETCHAR(chr, cc);
5945 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5946 return consumed;
5947 }
5948 else
5949 #endif
5950 {
5951 chr = *cc;
5952 #ifdef SUPPORT_UNICODE
5953 if (common->ucp && chr > 127)
5954 {
5955 chr = UCD_OTHERCASE(chr);
5956 othercase[0] = (chr == (PCRE2_UCHAR)chr) ? chr : *cc;
5957 }
5958 else
5959 #endif
5960 othercase[0] = TABLE_GET(chr, common->fcc, chr);
5961 }
5962 }
5963 else
5964 {
5965 caseless = FALSE;
5966 othercase[0] = 0; /* Stops compiler warning - PH */
5967 }
5968
5969 len_save = len;
5970 cc_save = cc;
5971 while (TRUE)
5972 {
5973 oc = othercase;
5974 do
5975 {
5976 len--;
5977 consumed++;
5978
5979 chr = *cc;
5980 add_prefix_char(*cc, chars, len == 0);
5981
5982 if (caseless)
5983 add_prefix_char(*oc, chars, len == 0);
5984
5985 if (--max_chars == 0)
5986 return consumed;
5987 chars++;
5988 cc++;
5989 oc++;
5990 }
5991 while (len > 0);
5992
5993 if (--repeat == 0)
5994 break;
5995
5996 len = len_save;
5997 cc = cc_save;
5998 }
5999
6000 repeat = 1;
6001 if (last)
6002 return consumed;
6003 }
6004 }
6005
6006 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jumpto_if_not_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg,struct sljit_label * label)6007 static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
6008 {
6009 #if PCRE2_CODE_UNIT_WIDTH == 8
6010 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
6011 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
6012 #elif PCRE2_CODE_UNIT_WIDTH == 16
6013 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
6014 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
6015 #else
6016 #error "Unknown code width"
6017 #endif
6018 }
6019 #endif
6020
6021 #include "pcre2_jit_simd_inc.h"
6022
6023 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6024
check_fast_forward_char_pair_simd(compiler_common * common,fast_forward_char_data * chars,int max)6025 static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
6026 {
6027 sljit_s32 i, j, max_i = 0, max_j = 0;
6028 sljit_u32 max_pri = 0;
6029 sljit_s32 max_offset = max_fast_forward_char_pair_offset();
6030 PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
6031
6032 for (i = max - 1; i >= 1; i--)
6033 {
6034 if (chars[i].last_count > 2)
6035 {
6036 a1 = chars[i].chars[0];
6037 a2 = chars[i].chars[1];
6038 a_pri = chars[i].last_count;
6039
6040 j = i - max_offset;
6041 if (j < 0)
6042 j = 0;
6043
6044 while (j < i)
6045 {
6046 b_pri = chars[j].last_count;
6047 if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri)
6048 {
6049 b1 = chars[j].chars[0];
6050 b2 = chars[j].chars[1];
6051
6052 if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
6053 {
6054 max_pri = a_pri + b_pri;
6055 max_i = i;
6056 max_j = j;
6057 }
6058 }
6059 j++;
6060 }
6061 }
6062 }
6063
6064 if (max_pri == 0)
6065 return FALSE;
6066
6067 fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
6068 return TRUE;
6069 }
6070
6071 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6072
fast_forward_first_char2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)6073 static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
6074 {
6075 DEFINE_COMPILER;
6076 struct sljit_label *start;
6077 struct sljit_jump *match;
6078 struct sljit_jump *partial_quit;
6079 PCRE2_UCHAR mask;
6080 BOOL has_match_end = (common->match_end_ptr != 0);
6081
6082 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
6083
6084 if (has_match_end)
6085 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6086
6087 if (offset > 0)
6088 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6089
6090 if (has_match_end)
6091 {
6092 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6093
6094 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6095 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6096 SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6097 }
6098
6099 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6100
6101 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
6102 {
6103 fast_forward_char_simd(common, char1, char2, offset);
6104
6105 if (offset > 0)
6106 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6107
6108 if (has_match_end)
6109 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6110 return;
6111 }
6112
6113 #endif
6114
6115 start = LABEL();
6116
6117 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6118 if (common->mode == PCRE2_JIT_COMPLETE)
6119 add_jump(compiler, &common->failed_match, partial_quit);
6120
6121 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6122 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6123
6124 if (char1 == char2)
6125 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
6126 else
6127 {
6128 mask = char1 ^ char2;
6129 if (is_powerof2(mask))
6130 {
6131 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6132 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
6133 }
6134 else
6135 {
6136 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
6137 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
6138 JUMPHERE(match);
6139 }
6140 }
6141
6142 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6143 if (common->utf && offset > 0)
6144 {
6145 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
6146 jumpto_if_not_utf_char_start(compiler, TMP1, start);
6147 }
6148 #endif
6149
6150 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6151
6152 if (common->mode != PCRE2_JIT_COMPLETE)
6153 JUMPHERE(partial_quit);
6154
6155 if (has_match_end)
6156 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6157 }
6158
fast_forward_first_n_chars(compiler_common * common)6159 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
6160 {
6161 DEFINE_COMPILER;
6162 struct sljit_label *start;
6163 struct sljit_jump *match;
6164 fast_forward_char_data chars[MAX_N_CHARS];
6165 sljit_s32 offset;
6166 PCRE2_UCHAR mask;
6167 PCRE2_UCHAR *char_set, *char_set_end;
6168 int i, max, from;
6169 int range_right = -1, range_len;
6170 sljit_u8 *update_table = NULL;
6171 BOOL in_range;
6172 sljit_u32 rec_count;
6173
6174 for (i = 0; i < MAX_N_CHARS; i++)
6175 {
6176 chars[i].count = 0;
6177 chars[i].last_count = 0;
6178 }
6179
6180 rec_count = 10000;
6181 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
6182
6183 if (max < 1)
6184 return FALSE;
6185
6186 /* Convert last_count to priority. */
6187 for (i = 0; i < max; i++)
6188 {
6189 SLJIT_ASSERT(chars[i].last_count <= chars[i].count);
6190
6191 switch (chars[i].count)
6192 {
6193 case 0:
6194 chars[i].count = 255;
6195 chars[i].last_count = 0;
6196 break;
6197
6198 case 1:
6199 chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
6200 /* Simplifies algorithms later. */
6201 chars[i].chars[1] = chars[i].chars[0];
6202 break;
6203
6204 case 2:
6205 SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
6206
6207 if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
6208 chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
6209 else
6210 chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
6211 break;
6212
6213 default:
6214 chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
6215 break;
6216 }
6217 }
6218
6219 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6220 if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
6221 return TRUE;
6222 #endif
6223
6224 in_range = FALSE;
6225 /* Prevent compiler "uninitialized" warning */
6226 from = 0;
6227 range_len = 4 /* minimum length */ - 1;
6228 for (i = 0; i <= max; i++)
6229 {
6230 if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
6231 {
6232 range_len = i - from;
6233 range_right = i - 1;
6234 }
6235
6236 if (i < max && chars[i].count < 255)
6237 {
6238 SLJIT_ASSERT(chars[i].count > 0);
6239 if (!in_range)
6240 {
6241 in_range = TRUE;
6242 from = i;
6243 }
6244 }
6245 else
6246 in_range = FALSE;
6247 }
6248
6249 if (range_right >= 0)
6250 {
6251 update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6252 if (update_table == NULL)
6253 return TRUE;
6254 memset(update_table, IN_UCHARS(range_len), 256);
6255
6256 for (i = 0; i < range_len; i++)
6257 {
6258 SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6259
6260 char_set = chars[range_right - i].chars;
6261 char_set_end = char_set + chars[range_right - i].count;
6262 do
6263 {
6264 if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6265 update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6266 char_set++;
6267 }
6268 while (char_set < char_set_end);
6269 }
6270 }
6271
6272 offset = -1;
6273 /* Scan forward. */
6274 for (i = 0; i < max; i++)
6275 {
6276 if (range_right == i)
6277 continue;
6278
6279 if (offset == -1)
6280 {
6281 if (chars[i].last_count >= 2)
6282 offset = i;
6283 }
6284 else if (chars[offset].last_count < chars[i].last_count)
6285 offset = i;
6286 }
6287
6288 SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6289
6290 if (range_right < 0)
6291 {
6292 if (offset < 0)
6293 return FALSE;
6294 /* Works regardless the value is 1 or 2. */
6295 fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6296 return TRUE;
6297 }
6298
6299 SLJIT_ASSERT(range_right != offset);
6300
6301 if (common->match_end_ptr != 0)
6302 {
6303 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6304 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6305 OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6306 add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6307 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6308 SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6309 }
6310 else
6311 {
6312 OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6313 add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6314 }
6315
6316 SLJIT_ASSERT(range_right >= 0);
6317
6318 if (!HAS_VIRTUAL_REGISTERS)
6319 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6320
6321 start = LABEL();
6322 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6323
6324 #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6325 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6326 #else
6327 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6328 #endif
6329
6330 if (!HAS_VIRTUAL_REGISTERS)
6331 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6332 else
6333 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6334
6335 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6336 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6337
6338 if (offset >= 0)
6339 {
6340 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6341 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6342
6343 if (chars[offset].count == 1)
6344 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6345 else
6346 {
6347 mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6348 if (is_powerof2(mask))
6349 {
6350 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6351 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6352 }
6353 else
6354 {
6355 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6356 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6357 JUMPHERE(match);
6358 }
6359 }
6360 }
6361
6362 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6363 if (common->utf && offset != 0)
6364 {
6365 if (offset < 0)
6366 {
6367 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6368 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6369 }
6370 else
6371 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6372
6373 jumpto_if_not_utf_char_start(compiler, TMP1, start);
6374
6375 if (offset < 0)
6376 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6377 }
6378 #endif
6379
6380 if (offset >= 0)
6381 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6382
6383 if (common->match_end_ptr != 0)
6384 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6385 else
6386 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6387 return TRUE;
6388 }
6389
fast_forward_first_char(compiler_common * common)6390 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6391 {
6392 PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6393 PCRE2_UCHAR oc;
6394
6395 oc = first_char;
6396 if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6397 {
6398 oc = TABLE_GET(first_char, common->fcc, first_char);
6399 #if defined SUPPORT_UNICODE
6400 if (first_char > 127 && (common->utf || common->ucp))
6401 oc = UCD_OTHERCASE(first_char);
6402 #endif
6403 }
6404
6405 fast_forward_first_char2(common, first_char, oc, 0);
6406 }
6407
fast_forward_newline(compiler_common * common)6408 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6409 {
6410 DEFINE_COMPILER;
6411 struct sljit_label *loop;
6412 struct sljit_jump *lastchar = NULL;
6413 struct sljit_jump *firstchar;
6414 struct sljit_jump *quit = NULL;
6415 struct sljit_jump *foundcr = NULL;
6416 struct sljit_jump *notfoundnl;
6417 jump_list *newline = NULL;
6418
6419 if (common->match_end_ptr != 0)
6420 {
6421 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6422 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6423 }
6424
6425 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6426 {
6427 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6428 if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6429 {
6430 if (HAS_VIRTUAL_REGISTERS)
6431 {
6432 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6433 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6434 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6435 }
6436 else
6437 {
6438 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6439 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6440 }
6441 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6442
6443 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6444 OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0);
6445 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6446 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6447 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6448 #endif
6449 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6450
6451 fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6452 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6453 }
6454 else
6455 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6456 {
6457 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6458 if (HAS_VIRTUAL_REGISTERS)
6459 {
6460 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6461 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6462 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6463 }
6464 else
6465 {
6466 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6467 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6468 }
6469 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6470
6471 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6472 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0);
6473 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6474 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6475 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6476 #endif
6477 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6478
6479 loop = LABEL();
6480 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6481 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6482 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6483 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6484 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6485 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6486
6487 JUMPHERE(quit);
6488 JUMPHERE(lastchar);
6489 }
6490
6491 JUMPHERE(firstchar);
6492
6493 if (common->match_end_ptr != 0)
6494 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6495 return;
6496 }
6497
6498 if (HAS_VIRTUAL_REGISTERS)
6499 {
6500 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6501 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6502 }
6503 else
6504 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6505
6506 /* Example: match /^/ to \r\n from offset 1. */
6507 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6508
6509 if (common->nltype == NLTYPE_ANY)
6510 move_back(common, NULL, FALSE);
6511 else
6512 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6513
6514 loop = LABEL();
6515 common->ff_newline_shortcut = loop;
6516
6517 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6518 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6519 {
6520 if (common->nltype == NLTYPE_ANYCRLF)
6521 {
6522 fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6523 if (common->mode != PCRE2_JIT_COMPLETE)
6524 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6525
6526 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6527 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6528 quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6529 }
6530 else
6531 {
6532 fast_forward_char_simd(common, common->newline, common->newline, 0);
6533
6534 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6535 if (common->mode != PCRE2_JIT_COMPLETE)
6536 {
6537 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
6538 SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
6539 }
6540 }
6541 }
6542 else
6543 #endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6544 {
6545 read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6546 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6547 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6548 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6549 check_newlinechar(common, common->nltype, &newline, FALSE);
6550 set_jumps(newline, loop);
6551 }
6552
6553 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6554 {
6555 if (quit == NULL)
6556 {
6557 quit = JUMP(SLJIT_JUMP);
6558 JUMPHERE(foundcr);
6559 }
6560
6561 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6562 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6563 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
6564 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6565 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6566 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6567 #endif
6568 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6569 JUMPHERE(notfoundnl);
6570 JUMPHERE(quit);
6571 }
6572
6573 if (lastchar)
6574 JUMPHERE(lastchar);
6575 JUMPHERE(firstchar);
6576
6577 if (common->match_end_ptr != 0)
6578 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6579 }
6580
6581 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6582
fast_forward_start_bits(compiler_common * common)6583 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6584 {
6585 DEFINE_COMPILER;
6586 const sljit_u8 *start_bits = common->re->start_bitmap;
6587 struct sljit_label *start;
6588 struct sljit_jump *partial_quit;
6589 #if PCRE2_CODE_UNIT_WIDTH != 8
6590 struct sljit_jump *found = NULL;
6591 #endif
6592 jump_list *matches = NULL;
6593
6594 if (common->match_end_ptr != 0)
6595 {
6596 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6597 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6598 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6599 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6600 SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6601 }
6602
6603 start = LABEL();
6604
6605 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6606 if (common->mode == PCRE2_JIT_COMPLETE)
6607 add_jump(compiler, &common->failed_match, partial_quit);
6608
6609 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6610 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6611
6612 if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6613 {
6614 #if PCRE2_CODE_UNIT_WIDTH != 8
6615 if ((start_bits[31] & 0x80) != 0)
6616 found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6617 else
6618 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6619 #elif defined SUPPORT_UNICODE
6620 if (common->utf && is_char7_bitset(start_bits, FALSE))
6621 CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6622 #endif
6623 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6624 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6625 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6626 if (!HAS_VIRTUAL_REGISTERS)
6627 {
6628 OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6629 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0);
6630 }
6631 else
6632 {
6633 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6634 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
6635 }
6636 JUMPTO(SLJIT_ZERO, start);
6637 }
6638 else
6639 set_jumps(matches, start);
6640
6641 #if PCRE2_CODE_UNIT_WIDTH != 8
6642 if (found != NULL)
6643 JUMPHERE(found);
6644 #endif
6645
6646 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6647
6648 if (common->mode != PCRE2_JIT_COMPLETE)
6649 JUMPHERE(partial_quit);
6650
6651 if (common->match_end_ptr != 0)
6652 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6653 }
6654
search_requested_char(compiler_common * common,PCRE2_UCHAR req_char,BOOL caseless,BOOL has_firstchar)6655 static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6656 {
6657 DEFINE_COMPILER;
6658 struct sljit_label *loop;
6659 struct sljit_jump *toolong;
6660 struct sljit_jump *already_found;
6661 struct sljit_jump *found;
6662 struct sljit_jump *found_oc = NULL;
6663 jump_list *not_found = NULL;
6664 sljit_u32 oc, bit;
6665
6666 SLJIT_ASSERT(common->req_char_ptr != 0);
6667 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6668 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6669 toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6670 already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6671
6672 if (has_firstchar)
6673 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6674 else
6675 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6676
6677 oc = req_char;
6678 if (caseless)
6679 {
6680 oc = TABLE_GET(req_char, common->fcc, req_char);
6681 #if defined SUPPORT_UNICODE
6682 if (req_char > 127 && (common->utf || common->ucp))
6683 oc = UCD_OTHERCASE(req_char);
6684 #endif
6685 }
6686
6687 #ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6688 if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6689 {
6690 not_found = fast_requested_char_simd(common, req_char, oc);
6691 }
6692 else
6693 #endif
6694 {
6695 loop = LABEL();
6696 add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6697
6698 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6699
6700 if (req_char == oc)
6701 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6702 else
6703 {
6704 bit = req_char ^ oc;
6705 if (is_powerof2(bit))
6706 {
6707 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6708 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6709 }
6710 else
6711 {
6712 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6713 found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6714 }
6715 }
6716 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6717 JUMPTO(SLJIT_JUMP, loop);
6718
6719 JUMPHERE(found);
6720 if (found_oc)
6721 JUMPHERE(found_oc);
6722 }
6723
6724 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6725
6726 JUMPHERE(already_found);
6727 JUMPHERE(toolong);
6728 return not_found;
6729 }
6730
do_revertframes(compiler_common * common)6731 static void do_revertframes(compiler_common *common)
6732 {
6733 DEFINE_COMPILER;
6734 struct sljit_jump *jump;
6735 struct sljit_label *mainloop;
6736
6737 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
6738 GET_LOCAL_BASE(TMP1, 0, 0);
6739
6740 /* Drop frames until we reach STACK_TOP. */
6741 mainloop = LABEL();
6742 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw));
6743 OP2U(SLJIT_SUB | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
6744 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
6745
6746 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6747 if (HAS_VIRTUAL_REGISTERS)
6748 {
6749 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6750 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6751 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6752 }
6753 else
6754 {
6755 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6756 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6757 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6758 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6759 GET_LOCAL_BASE(TMP1, 0, 0);
6760 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6761 }
6762 JUMPTO(SLJIT_JUMP, mainloop);
6763
6764 JUMPHERE(jump);
6765 sljit_set_current_flags(compiler, SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z);
6766 jump = JUMP(SLJIT_NOT_ZERO /* SIG_LESS */);
6767 /* End of reverting values. */
6768 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6769
6770 JUMPHERE(jump);
6771 OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0);
6772 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6773 if (HAS_VIRTUAL_REGISTERS)
6774 {
6775 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6776 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
6777 }
6778 else
6779 {
6780 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6781 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
6782 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6783 }
6784 JUMPTO(SLJIT_JUMP, mainloop);
6785 }
6786
6787 #ifdef SUPPORT_UNICODE
6788 #define UCPCAT(bit) (1 << (bit))
6789 #define UCPCAT2(bit1, bit2) (UCPCAT(bit1) | UCPCAT(bit2))
6790 #define UCPCAT3(bit1, bit2, bit3) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3))
6791 #define UCPCAT_RANGE(start, end) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1))
6792 #define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu)
6793 #define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No)
6794 #define UCPCAT_ALL ((1 << (ucp_Zs + 1)) - 1)
6795 #endif
6796
check_wordboundary(compiler_common * common,BOOL ucp)6797 static void check_wordboundary(compiler_common *common, BOOL ucp)
6798 {
6799 DEFINE_COMPILER;
6800 struct sljit_jump *skipread;
6801 jump_list *skipread_list = NULL;
6802 #ifdef SUPPORT_UNICODE
6803 struct sljit_label *valid_utf;
6804 jump_list *invalid_utf1 = NULL;
6805 #endif /* SUPPORT_UNICODE */
6806 jump_list *invalid_utf2 = NULL;
6807 #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6808 struct sljit_jump *jump;
6809 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6810
6811 SLJIT_UNUSED_ARG(ucp);
6812 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6813
6814 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6815 /* Get type of the previous char, and put it to TMP3. */
6816 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6817 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6818 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6819 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6820
6821 #ifdef SUPPORT_UNICODE
6822 if (common->invalid_utf)
6823 {
6824 peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6825
6826 if (common->mode != PCRE2_JIT_COMPLETE)
6827 {
6828 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6829 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6830 move_back(common, NULL, TRUE);
6831 check_start_used_ptr(common);
6832 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6833 OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6834 }
6835 }
6836 else
6837 #endif /* SUPPORT_UNICODE */
6838 {
6839 if (common->mode == PCRE2_JIT_COMPLETE)
6840 peek_char_back(common, READ_CHAR_MAX, NULL);
6841 else
6842 {
6843 move_back(common, NULL, TRUE);
6844 check_start_used_ptr(common);
6845 read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6846 }
6847 }
6848
6849 /* Testing char type. */
6850 #ifdef SUPPORT_UNICODE
6851 if (ucp)
6852 {
6853 add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6854 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
6855 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
6856 OP_FLAGS(SLJIT_MOV, TMP3, 0, SLJIT_NOT_ZERO);
6857 }
6858 else
6859 #endif /* SUPPORT_UNICODE */
6860 {
6861 #if PCRE2_CODE_UNIT_WIDTH != 8
6862 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6863 #elif defined SUPPORT_UNICODE
6864 /* Here TMP3 has already been zeroed. */
6865 jump = NULL;
6866 if (common->utf)
6867 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6868 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6869 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6870 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6871 OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6872 #if PCRE2_CODE_UNIT_WIDTH != 8
6873 JUMPHERE(jump);
6874 #elif defined SUPPORT_UNICODE
6875 if (jump != NULL)
6876 JUMPHERE(jump);
6877 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6878 }
6879 JUMPHERE(skipread);
6880
6881 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6882 check_str_end(common, &skipread_list);
6883 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6884
6885 /* Testing char type. This is a code duplication. */
6886 #ifdef SUPPORT_UNICODE
6887
6888 valid_utf = LABEL();
6889
6890 if (ucp)
6891 {
6892 add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6893 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
6894 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
6895 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
6896 }
6897 else
6898 #endif /* SUPPORT_UNICODE */
6899 {
6900 #if PCRE2_CODE_UNIT_WIDTH != 8
6901 /* TMP2 may be destroyed by peek_char. */
6902 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6903 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6904 #elif defined SUPPORT_UNICODE
6905 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6906 jump = NULL;
6907 if (common->utf)
6908 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6909 #endif
6910 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6911 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6912 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6913 #if PCRE2_CODE_UNIT_WIDTH != 8
6914 JUMPHERE(jump);
6915 #elif defined SUPPORT_UNICODE
6916 if (jump != NULL)
6917 JUMPHERE(jump);
6918 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6919 }
6920 set_jumps(skipread_list, LABEL());
6921
6922 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6923 OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6924 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6925
6926 #ifdef SUPPORT_UNICODE
6927 if (common->invalid_utf)
6928 {
6929 set_jumps(invalid_utf1, LABEL());
6930
6931 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6932 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6933
6934 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6935 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6936 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6937
6938 set_jumps(invalid_utf2, LABEL());
6939 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6940 OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6941 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6942 }
6943 #endif /* SUPPORT_UNICODE */
6944 }
6945
optimize_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6946 static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6947 {
6948 /* May destroy TMP1. */
6949 DEFINE_COMPILER;
6950 int ranges[MAX_CLASS_RANGE_SIZE];
6951 sljit_u8 bit, cbit, all;
6952 int i, byte, length = 0;
6953
6954 bit = bits[0] & 0x1;
6955 /* All bits will be zero or one (since bit is zero or one). */
6956 all = (sljit_u8)-bit;
6957
6958 for (i = 0; i < 256; )
6959 {
6960 byte = i >> 3;
6961 if ((i & 0x7) == 0 && bits[byte] == all)
6962 i += 8;
6963 else
6964 {
6965 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6966 if (cbit != bit)
6967 {
6968 if (length >= MAX_CLASS_RANGE_SIZE)
6969 return FALSE;
6970 ranges[length] = i;
6971 length++;
6972 bit = cbit;
6973 all = (sljit_u8)-cbit; /* sign extend bit into byte */
6974 }
6975 i++;
6976 }
6977 }
6978
6979 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6980 {
6981 if (length >= MAX_CLASS_RANGE_SIZE)
6982 return FALSE;
6983 ranges[length] = 256;
6984 length++;
6985 }
6986
6987 if (length < 0 || length > 4)
6988 return FALSE;
6989
6990 bit = bits[0] & 0x1;
6991 if (invert) bit ^= 0x1;
6992
6993 /* No character is accepted. */
6994 if (length == 0 && bit == 0)
6995 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6996
6997 switch(length)
6998 {
6999 case 0:
7000 /* When bit != 0, all characters are accepted. */
7001 return TRUE;
7002
7003 case 1:
7004 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7005 return TRUE;
7006
7007 case 2:
7008 if (ranges[0] + 1 != ranges[1])
7009 {
7010 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7011 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7012 }
7013 else
7014 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7015 return TRUE;
7016
7017 case 3:
7018 if (bit != 0)
7019 {
7020 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7021 if (ranges[0] + 1 != ranges[1])
7022 {
7023 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7024 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7025 }
7026 else
7027 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7028 return TRUE;
7029 }
7030
7031 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
7032 if (ranges[1] + 1 != ranges[2])
7033 {
7034 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
7035 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7036 }
7037 else
7038 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
7039 return TRUE;
7040
7041 case 4:
7042 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
7043 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
7044 && (ranges[1] & (ranges[2] - ranges[0])) == 0
7045 && is_powerof2(ranges[2] - ranges[0]))
7046 {
7047 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
7048 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
7049 if (ranges[2] + 1 != ranges[3])
7050 {
7051 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
7052 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7053 }
7054 else
7055 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7056 return TRUE;
7057 }
7058
7059 if (bit != 0)
7060 {
7061 i = 0;
7062 if (ranges[0] + 1 != ranges[1])
7063 {
7064 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7065 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7066 i = ranges[0];
7067 }
7068 else
7069 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7070
7071 if (ranges[2] + 1 != ranges[3])
7072 {
7073 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
7074 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7075 }
7076 else
7077 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
7078 return TRUE;
7079 }
7080
7081 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7082 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
7083 if (ranges[1] + 1 != ranges[2])
7084 {
7085 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
7086 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7087 }
7088 else
7089 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7090 return TRUE;
7091
7092 default:
7093 SLJIT_UNREACHABLE();
7094 return FALSE;
7095 }
7096 }
7097
optimize_class_chars(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)7098 static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7099 {
7100 /* May destroy TMP1. */
7101 DEFINE_COMPILER;
7102 uint16_t char_list[MAX_CLASS_CHARS_SIZE];
7103 uint8_t byte;
7104 sljit_s32 type;
7105 int i, j, k, len, c;
7106
7107 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
7108 return FALSE;
7109
7110 len = 0;
7111
7112 for (i = 0; i < 32; i++)
7113 {
7114 byte = bits[i];
7115
7116 if (nclass)
7117 byte = (sljit_u8)~byte;
7118
7119 j = 0;
7120 while (byte != 0)
7121 {
7122 if (byte & 0x1)
7123 {
7124 c = i * 8 + j;
7125
7126 k = len;
7127
7128 if ((c & 0x20) != 0)
7129 {
7130 for (k = 0; k < len; k++)
7131 if (char_list[k] == c - 0x20)
7132 {
7133 char_list[k] |= 0x120;
7134 break;
7135 }
7136 }
7137
7138 if (k == len)
7139 {
7140 if (len >= MAX_CLASS_CHARS_SIZE)
7141 return FALSE;
7142
7143 char_list[len++] = (uint16_t) c;
7144 }
7145 }
7146
7147 byte >>= 1;
7148 j++;
7149 }
7150 }
7151
7152 if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */
7153
7154 i = 0;
7155 j = 0;
7156
7157 if (char_list[0] == 0)
7158 {
7159 i++;
7160 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0);
7161 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
7162 }
7163 else
7164 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7165
7166 while (i < len)
7167 {
7168 if ((char_list[i] & 0x100) != 0)
7169 j++;
7170 else
7171 {
7172 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]);
7173 SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7174 }
7175 i++;
7176 }
7177
7178 if (j != 0)
7179 {
7180 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
7181
7182 for (i = 0; i < len; i++)
7183 if ((char_list[i] & 0x100) != 0)
7184 {
7185 j--;
7186 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
7187 SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7188 }
7189 }
7190
7191 if (invert)
7192 nclass = !nclass;
7193
7194 type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
7195 add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
7196 return TRUE;
7197 }
7198
optimize_class(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)7199 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7200 {
7201 /* May destroy TMP1. */
7202 if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
7203 return TRUE;
7204 return optimize_class_chars(common, bits, nclass, invert, backtracks);
7205 }
7206
check_anynewline(compiler_common * common)7207 static void check_anynewline(compiler_common *common)
7208 {
7209 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7210 DEFINE_COMPILER;
7211
7212 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7213
7214 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7215 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7216 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7217 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7218 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7219 #if PCRE2_CODE_UNIT_WIDTH == 8
7220 if (common->utf)
7221 {
7222 #endif
7223 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7224 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7225 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7226 #if PCRE2_CODE_UNIT_WIDTH == 8
7227 }
7228 #endif
7229 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7230 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7231 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7232 }
7233
check_hspace(compiler_common * common)7234 static void check_hspace(compiler_common *common)
7235 {
7236 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7237 DEFINE_COMPILER;
7238
7239 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7240
7241 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x09);
7242 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7243 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x20);
7244 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7245 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xa0);
7246 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7247 #if PCRE2_CODE_UNIT_WIDTH == 8
7248 if (common->utf)
7249 {
7250 #endif
7251 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7252 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680);
7253 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7254 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e);
7255 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7256 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7257 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7258 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7259 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7260 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7261 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7262 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7263 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7264 #if PCRE2_CODE_UNIT_WIDTH == 8
7265 }
7266 #endif
7267 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7268 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7269
7270 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7271 }
7272
check_vspace(compiler_common * common)7273 static void check_vspace(compiler_common *common)
7274 {
7275 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7276 DEFINE_COMPILER;
7277
7278 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7279
7280 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7281 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7282 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7283 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7284 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7285 #if PCRE2_CODE_UNIT_WIDTH == 8
7286 if (common->utf)
7287 {
7288 #endif
7289 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7290 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7291 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7292 #if PCRE2_CODE_UNIT_WIDTH == 8
7293 }
7294 #endif
7295 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7296 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7297
7298 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7299 }
7300
do_casefulcmp(compiler_common * common)7301 static void do_casefulcmp(compiler_common *common)
7302 {
7303 DEFINE_COMPILER;
7304 struct sljit_jump *jump;
7305 struct sljit_label *label;
7306 int char1_reg;
7307 int char2_reg;
7308
7309 if (HAS_VIRTUAL_REGISTERS)
7310 {
7311 char1_reg = STR_END;
7312 char2_reg = STACK_TOP;
7313 }
7314 else
7315 {
7316 char1_reg = TMP3;
7317 char2_reg = RETURN_ADDR;
7318 }
7319
7320 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7321 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7322
7323 if (char1_reg == STR_END)
7324 {
7325 OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7326 OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7327 }
7328
7329 if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7330 {
7331 label = LABEL();
7332 sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7333 sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7334 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7335 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7336 JUMPTO(SLJIT_NOT_ZERO, label);
7337
7338 JUMPHERE(jump);
7339 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7340 }
7341 else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7342 {
7343 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7344 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7345
7346 label = LABEL();
7347 sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7348 sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7349 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7350 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7351 JUMPTO(SLJIT_NOT_ZERO, label);
7352
7353 JUMPHERE(jump);
7354 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7355 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7356 }
7357 else
7358 {
7359 label = LABEL();
7360 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7361 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7362 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7363 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7364 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7365 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7366 JUMPTO(SLJIT_NOT_ZERO, label);
7367
7368 JUMPHERE(jump);
7369 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7370 }
7371
7372 if (char1_reg == STR_END)
7373 {
7374 OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7375 OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7376 }
7377
7378 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7379 }
7380
do_caselesscmp(compiler_common * common)7381 static void do_caselesscmp(compiler_common *common)
7382 {
7383 DEFINE_COMPILER;
7384 struct sljit_jump *jump;
7385 struct sljit_label *label;
7386 int char1_reg = STR_END;
7387 int char2_reg;
7388 int lcc_table;
7389 int opt_type = 0;
7390
7391 if (HAS_VIRTUAL_REGISTERS)
7392 {
7393 char2_reg = STACK_TOP;
7394 lcc_table = STACK_LIMIT;
7395 }
7396 else
7397 {
7398 char2_reg = RETURN_ADDR;
7399 lcc_table = TMP3;
7400 }
7401
7402 if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7403 opt_type = 1;
7404 else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7405 opt_type = 2;
7406
7407 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7408 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7409
7410 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7411
7412 if (char2_reg == STACK_TOP)
7413 {
7414 OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7415 OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7416 }
7417
7418 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7419
7420 if (opt_type == 1)
7421 {
7422 label = LABEL();
7423 sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7424 sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7425 }
7426 else if (opt_type == 2)
7427 {
7428 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7429 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7430
7431 label = LABEL();
7432 sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7433 sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7434 }
7435 else
7436 {
7437 label = LABEL();
7438 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7439 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7440 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7441 }
7442
7443 #if PCRE2_CODE_UNIT_WIDTH != 8
7444 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7445 #endif
7446 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7447 #if PCRE2_CODE_UNIT_WIDTH != 8
7448 JUMPHERE(jump);
7449 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7450 #endif
7451 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7452 #if PCRE2_CODE_UNIT_WIDTH != 8
7453 JUMPHERE(jump);
7454 #endif
7455
7456 if (opt_type == 0)
7457 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7458
7459 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7460 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7461 JUMPTO(SLJIT_NOT_ZERO, label);
7462
7463 JUMPHERE(jump);
7464 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7465
7466 if (opt_type == 2)
7467 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7468
7469 if (char2_reg == STACK_TOP)
7470 {
7471 OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7472 OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7473 }
7474
7475 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7476 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7477 }
7478
byte_sequence_compare(compiler_common * common,BOOL caseless,PCRE2_SPTR cc,compare_context * context,jump_list ** backtracks)7479 static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7480 compare_context *context, jump_list **backtracks)
7481 {
7482 DEFINE_COMPILER;
7483 unsigned int othercasebit = 0;
7484 PCRE2_SPTR othercasechar = NULL;
7485 #ifdef SUPPORT_UNICODE
7486 int utflength;
7487 #endif
7488
7489 if (caseless && char_has_othercase(common, cc))
7490 {
7491 othercasebit = char_get_othercase_bit(common, cc);
7492 SLJIT_ASSERT(othercasebit);
7493 /* Extracting bit difference info. */
7494 #if PCRE2_CODE_UNIT_WIDTH == 8
7495 othercasechar = cc + (othercasebit >> 8);
7496 othercasebit &= 0xff;
7497 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7498 /* Note that this code only handles characters in the BMP. If there
7499 ever are characters outside the BMP whose othercase differs in only one
7500 bit from itself (there currently are none), this code will need to be
7501 revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7502 othercasechar = cc + (othercasebit >> 9);
7503 if ((othercasebit & 0x100) != 0)
7504 othercasebit = (othercasebit & 0xff) << 8;
7505 else
7506 othercasebit &= 0xff;
7507 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7508 }
7509
7510 if (context->sourcereg == -1)
7511 {
7512 #if PCRE2_CODE_UNIT_WIDTH == 8
7513 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7514 if (context->length >= 4)
7515 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7516 else if (context->length >= 2)
7517 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7518 else
7519 #endif
7520 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7521 #elif PCRE2_CODE_UNIT_WIDTH == 16
7522 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7523 if (context->length >= 4)
7524 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7525 else
7526 #endif
7527 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7528 #elif PCRE2_CODE_UNIT_WIDTH == 32
7529 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7530 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7531 context->sourcereg = TMP2;
7532 }
7533
7534 #ifdef SUPPORT_UNICODE
7535 utflength = 1;
7536 if (common->utf && HAS_EXTRALEN(*cc))
7537 utflength += GET_EXTRALEN(*cc);
7538
7539 do
7540 {
7541 #endif
7542
7543 context->length -= IN_UCHARS(1);
7544 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7545
7546 /* Unaligned read is supported. */
7547 if (othercasebit != 0 && othercasechar == cc)
7548 {
7549 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7550 context->oc.asuchars[context->ucharptr] = othercasebit;
7551 }
7552 else
7553 {
7554 context->c.asuchars[context->ucharptr] = *cc;
7555 context->oc.asuchars[context->ucharptr] = 0;
7556 }
7557 context->ucharptr++;
7558
7559 #if PCRE2_CODE_UNIT_WIDTH == 8
7560 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7561 #else
7562 if (context->ucharptr >= 2 || context->length == 0)
7563 #endif
7564 {
7565 if (context->length >= 4)
7566 OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7567 else if (context->length >= 2)
7568 OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7569 #if PCRE2_CODE_UNIT_WIDTH == 8
7570 else if (context->length >= 1)
7571 OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7572 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7573 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7574
7575 switch(context->ucharptr)
7576 {
7577 case 4 / sizeof(PCRE2_UCHAR):
7578 if (context->oc.asint != 0)
7579 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7580 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7581 break;
7582
7583 case 2 / sizeof(PCRE2_UCHAR):
7584 if (context->oc.asushort != 0)
7585 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7586 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7587 break;
7588
7589 #if PCRE2_CODE_UNIT_WIDTH == 8
7590 case 1:
7591 if (context->oc.asbyte != 0)
7592 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7593 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7594 break;
7595 #endif
7596
7597 default:
7598 SLJIT_UNREACHABLE();
7599 break;
7600 }
7601 context->ucharptr = 0;
7602 }
7603
7604 #else
7605
7606 /* Unaligned read is unsupported or in 32 bit mode. */
7607 if (context->length >= 1)
7608 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7609
7610 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7611
7612 if (othercasebit != 0 && othercasechar == cc)
7613 {
7614 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7615 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7616 }
7617 else
7618 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7619
7620 #endif
7621
7622 cc++;
7623 #ifdef SUPPORT_UNICODE
7624 utflength--;
7625 }
7626 while (utflength > 0);
7627 #endif
7628
7629 return cc;
7630 }
7631
7632 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7633
7634 #define SET_CHAR_OFFSET(value) \
7635 if ((value) != charoffset) \
7636 { \
7637 if ((value) < charoffset) \
7638 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7639 else \
7640 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7641 } \
7642 charoffset = (value);
7643
7644 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7645
7646 #ifdef SUPPORT_UNICODE
7647 #define XCLASS_SAVE_CHAR 0x001
7648 #define XCLASS_CHAR_SAVED 0x002
7649 #define XCLASS_HAS_TYPE 0x004
7650 #define XCLASS_HAS_SCRIPT 0x008
7651 #define XCLASS_HAS_SCRIPT_EXTENSION 0x010
7652 #define XCLASS_HAS_BOOL 0x020
7653 #define XCLASS_HAS_BIDICL 0x040
7654 #define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL)
7655 #define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080
7656 #define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100
7657 #define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200
7658 #endif /* SUPPORT_UNICODE */
7659
compile_xclass_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)7660 static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7661 {
7662 DEFINE_COMPILER;
7663 jump_list *found = NULL;
7664 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7665 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7666 struct sljit_jump *jump = NULL;
7667 PCRE2_SPTR ccbegin;
7668 int compares, invertcmp, numberofcmps;
7669 #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7670 BOOL utf = common->utf;
7671 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7672
7673 #ifdef SUPPORT_UNICODE
7674 sljit_u32 unicode_status = 0;
7675 sljit_u32 category_list = 0;
7676 sljit_u32 items;
7677 int typereg = TMP1;
7678 const sljit_u32 *other_cases;
7679 #endif /* SUPPORT_UNICODE */
7680
7681 /* Scanning the necessary info. */
7682 cc++;
7683 ccbegin = cc;
7684 compares = 0;
7685
7686 if (cc[-1] & XCL_MAP)
7687 {
7688 min = 0;
7689 cc += 32 / sizeof(PCRE2_UCHAR);
7690 }
7691
7692 while (*cc != XCL_END)
7693 {
7694 compares++;
7695
7696 if (*cc == XCL_SINGLE)
7697 {
7698 cc ++;
7699 GETCHARINCTEST(c, cc);
7700 if (c > max) max = c;
7701 if (c < min) min = c;
7702 #ifdef SUPPORT_UNICODE
7703 unicode_status |= XCLASS_SAVE_CHAR;
7704 #endif /* SUPPORT_UNICODE */
7705 }
7706 else if (*cc == XCL_RANGE)
7707 {
7708 cc ++;
7709 GETCHARINCTEST(c, cc);
7710 if (c < min) min = c;
7711 GETCHARINCTEST(c, cc);
7712 if (c > max) max = c;
7713 #ifdef SUPPORT_UNICODE
7714 unicode_status |= XCLASS_SAVE_CHAR;
7715 #endif /* SUPPORT_UNICODE */
7716 }
7717 #ifdef SUPPORT_UNICODE
7718 else
7719 {
7720 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7721 cc++;
7722
7723 if (*cc == PT_CLIST && cc[-1] == XCL_PROP)
7724 {
7725 other_cases = PRIV(ucd_caseless_sets) + cc[1];
7726 while (*other_cases != NOTACHAR)
7727 {
7728 if (*other_cases > max) max = *other_cases;
7729 if (*other_cases < min) min = *other_cases;
7730 other_cases++;
7731 }
7732 }
7733 else
7734 {
7735 max = READ_CHAR_MAX;
7736 min = 0;
7737 }
7738
7739 items = 0;
7740
7741 switch(*cc)
7742 {
7743 case PT_ANY:
7744 /* Any either accepts everything or ignored. */
7745 if (cc[-1] == XCL_PROP)
7746 items = UCPCAT_ALL;
7747 else
7748 compares--;
7749 break;
7750
7751 case PT_LAMP:
7752 items = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt);
7753 break;
7754
7755 case PT_GC:
7756 items = UCPCAT_RANGE(PRIV(ucp_typerange)[(int)cc[1] * 2], PRIV(ucp_typerange)[(int)cc[1] * 2 + 1]);
7757 break;
7758
7759 case PT_PC:
7760 items = UCPCAT(cc[1]);
7761 break;
7762
7763 case PT_WORD:
7764 items = UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N;
7765 break;
7766
7767 case PT_ALNUM:
7768 items = UCPCAT_L | UCPCAT_N;
7769 break;
7770
7771 case PT_SCX:
7772 unicode_status |= XCLASS_HAS_SCRIPT_EXTENSION;
7773 if (cc[-1] == XCL_NOTPROP)
7774 {
7775 unicode_status |= XCLASS_SCRIPT_EXTENSION_NOTPROP;
7776 break;
7777 }
7778 compares++;
7779 /* Fall through */
7780
7781 case PT_SC:
7782 unicode_status |= XCLASS_HAS_SCRIPT;
7783 break;
7784
7785 case PT_SPACE:
7786 case PT_PXSPACE:
7787 case PT_PXGRAPH:
7788 case PT_PXPRINT:
7789 case PT_PXPUNCT:
7790 unicode_status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE;
7791 break;
7792
7793 case PT_CLIST:
7794 case PT_UCNC:
7795 case PT_PXXDIGIT:
7796 unicode_status |= XCLASS_SAVE_CHAR;
7797 break;
7798
7799 case PT_BOOL:
7800 unicode_status |= XCLASS_HAS_BOOL;
7801 break;
7802
7803 case PT_BIDICL:
7804 unicode_status |= XCLASS_HAS_BIDICL;
7805 break;
7806
7807 default:
7808 SLJIT_UNREACHABLE();
7809 break;
7810 }
7811
7812 if (items > 0)
7813 {
7814 if (cc[-1] == XCL_NOTPROP)
7815 items ^= UCPCAT_ALL;
7816 category_list |= items;
7817 unicode_status |= XCLASS_HAS_TYPE;
7818 compares--;
7819 }
7820
7821 cc += 2;
7822 }
7823 #endif /* SUPPORT_UNICODE */
7824 }
7825
7826 #ifdef SUPPORT_UNICODE
7827 if (category_list == UCPCAT_ALL)
7828 {
7829 /* All characters are accepted, same as dotall. */
7830 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7831 if (list == backtracks)
7832 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7833 return;
7834 }
7835
7836 if (compares == 0 && category_list == 0)
7837 {
7838 /* No characters are accepted, same as (*F) or dotall. */
7839 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7840 if (list != backtracks)
7841 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7842 return;
7843 }
7844 #else /* !SUPPORT_UNICODE */
7845 SLJIT_ASSERT(compares > 0);
7846 #endif /* SUPPORT_UNICODE */
7847
7848 /* We are not necessary in utf mode even in 8 bit mode. */
7849 cc = ccbegin;
7850 if ((cc[-1] & XCL_NOT) != 0)
7851 read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7852 else
7853 {
7854 #ifdef SUPPORT_UNICODE
7855 read_char(common, min, max, (unicode_status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0);
7856 #else /* !SUPPORT_UNICODE */
7857 read_char(common, min, max, NULL, 0);
7858 #endif /* SUPPORT_UNICODE */
7859 }
7860
7861 if ((cc[-1] & XCL_HASPROP) == 0)
7862 {
7863 if ((cc[-1] & XCL_MAP) != 0)
7864 {
7865 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7866 if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7867 {
7868 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7869 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7870 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7871 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7872 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7873 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7874 }
7875
7876 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7877 JUMPHERE(jump);
7878
7879 cc += 32 / sizeof(PCRE2_UCHAR);
7880 }
7881 else
7882 {
7883 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7884 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7885 }
7886 }
7887 else if ((cc[-1] & XCL_MAP) != 0)
7888 {
7889 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7890 #ifdef SUPPORT_UNICODE
7891 unicode_status |= XCLASS_CHAR_SAVED;
7892 #endif /* SUPPORT_UNICODE */
7893 if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7894 {
7895 #if PCRE2_CODE_UNIT_WIDTH == 8
7896 jump = NULL;
7897 if (common->utf)
7898 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7899 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7900
7901 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7902 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7903 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7904 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7905 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7906 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7907
7908 #if PCRE2_CODE_UNIT_WIDTH == 8
7909 if (common->utf)
7910 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7911 JUMPHERE(jump);
7912 }
7913
7914 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7915 cc += 32 / sizeof(PCRE2_UCHAR);
7916 }
7917
7918 #ifdef SUPPORT_UNICODE
7919 if (unicode_status & XCLASS_NEEDS_UCD)
7920 {
7921 if ((unicode_status & (XCLASS_SAVE_CHAR | XCLASS_CHAR_SAVED)) == XCLASS_SAVE_CHAR)
7922 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7923
7924 #if PCRE2_CODE_UNIT_WIDTH == 32
7925 if (!common->utf)
7926 {
7927 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7928 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7929 JUMPHERE(jump);
7930 }
7931 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7932
7933 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7934 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7935 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7936 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7937 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7938 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7939 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7940 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7941 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7942 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7943 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7944
7945 ccbegin = cc;
7946
7947 if (category_list != 0)
7948 compares++;
7949
7950 if (unicode_status & XCLASS_HAS_BIDICL)
7951 {
7952 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
7953 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
7954
7955 while (*cc != XCL_END)
7956 {
7957 if (*cc == XCL_SINGLE)
7958 {
7959 cc ++;
7960 GETCHARINCTEST(c, cc);
7961 }
7962 else if (*cc == XCL_RANGE)
7963 {
7964 cc ++;
7965 GETCHARINCTEST(c, cc);
7966 GETCHARINCTEST(c, cc);
7967 }
7968 else
7969 {
7970 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7971 cc++;
7972 if (*cc == PT_BIDICL)
7973 {
7974 compares--;
7975 invertcmp = (compares == 0 && list != backtracks);
7976 if (cc[-1] == XCL_NOTPROP)
7977 invertcmp ^= 0x1;
7978 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7979 add_jump(compiler, compares > 0 ? list : backtracks, jump);
7980 }
7981 cc += 2;
7982 }
7983 }
7984
7985 cc = ccbegin;
7986 }
7987
7988 if (unicode_status & XCLASS_HAS_BOOL)
7989 {
7990 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops));
7991 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK);
7992 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
7993
7994 while (*cc != XCL_END)
7995 {
7996 if (*cc == XCL_SINGLE)
7997 {
7998 cc ++;
7999 GETCHARINCTEST(c, cc);
8000 }
8001 else if (*cc == XCL_RANGE)
8002 {
8003 cc ++;
8004 GETCHARINCTEST(c, cc);
8005 GETCHARINCTEST(c, cc);
8006 }
8007 else
8008 {
8009 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8010 cc++;
8011 if (*cc == PT_BOOL)
8012 {
8013 compares--;
8014 invertcmp = (compares == 0 && list != backtracks);
8015 if (cc[-1] == XCL_NOTPROP)
8016 invertcmp ^= 0x1;
8017
8018 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f)));
8019 add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8020 }
8021 cc += 2;
8022 }
8023 }
8024
8025 cc = ccbegin;
8026 }
8027
8028 if (unicode_status & XCLASS_HAS_SCRIPT)
8029 {
8030 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
8031
8032 while (*cc != XCL_END)
8033 {
8034 if (*cc == XCL_SINGLE)
8035 {
8036 cc ++;
8037 GETCHARINCTEST(c, cc);
8038 }
8039 else if (*cc == XCL_RANGE)
8040 {
8041 cc ++;
8042 GETCHARINCTEST(c, cc);
8043 GETCHARINCTEST(c, cc);
8044 }
8045 else
8046 {
8047 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8048 cc++;
8049 switch (*cc)
8050 {
8051 case PT_SCX:
8052 if (cc[-1] == XCL_NOTPROP)
8053 break;
8054 /* Fall through */
8055
8056 case PT_SC:
8057 compares--;
8058 invertcmp = (compares == 0 && list != backtracks);
8059 if (cc[-1] == XCL_NOTPROP)
8060 invertcmp ^= 0x1;
8061
8062 add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]));
8063 }
8064 cc += 2;
8065 }
8066 }
8067
8068 cc = ccbegin;
8069 }
8070
8071 if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION)
8072 {
8073 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
8074 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
8075 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
8076
8077 if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP)
8078 {
8079 if (unicode_status & XCLASS_HAS_TYPE)
8080 {
8081 if (unicode_status & XCLASS_SAVE_CHAR)
8082 {
8083 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP2, 0);
8084 unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0;
8085 }
8086 else
8087 {
8088 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
8089 unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR;
8090 }
8091 }
8092 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
8093 }
8094
8095 while (*cc != XCL_END)
8096 {
8097 if (*cc == XCL_SINGLE)
8098 {
8099 cc ++;
8100 GETCHARINCTEST(c, cc);
8101 }
8102 else if (*cc == XCL_RANGE)
8103 {
8104 cc ++;
8105 GETCHARINCTEST(c, cc);
8106 GETCHARINCTEST(c, cc);
8107 }
8108 else
8109 {
8110 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8111 cc++;
8112 if (*cc == PT_SCX)
8113 {
8114 compares--;
8115 invertcmp = (compares == 0 && list != backtracks);
8116
8117 jump = NULL;
8118 if (cc[-1] == XCL_NOTPROP)
8119 {
8120 jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]);
8121 if (invertcmp)
8122 {
8123 add_jump(compiler, backtracks, jump);
8124 jump = NULL;
8125 }
8126 invertcmp ^= 0x1;
8127 }
8128
8129 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f)));
8130 add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8131
8132 if (jump != NULL)
8133 JUMPHERE(jump);
8134 }
8135 cc += 2;
8136 }
8137 }
8138
8139 if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0)
8140 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
8141 else if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR)
8142 OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
8143 cc = ccbegin;
8144 }
8145
8146 if (unicode_status & XCLASS_SAVE_CHAR)
8147 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
8148
8149 if (unicode_status & XCLASS_HAS_TYPE)
8150 {
8151 if (unicode_status & XCLASS_SAVE_CHAR)
8152 typereg = RETURN_ADDR;
8153
8154 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
8155 OP2(SLJIT_SHL, typereg, 0, SLJIT_IMM, 1, TMP2, 0);
8156
8157 if (category_list > 0)
8158 {
8159 compares--;
8160 invertcmp = (compares == 0 && list != backtracks);
8161 OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, category_list);
8162 add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8163 }
8164 }
8165 }
8166 #endif /* SUPPORT_UNICODE */
8167
8168 /* Generating code. */
8169 charoffset = 0;
8170 numberofcmps = 0;
8171
8172 while (*cc != XCL_END)
8173 {
8174 compares--;
8175 invertcmp = (compares == 0 && list != backtracks);
8176 jump = NULL;
8177
8178 if (*cc == XCL_SINGLE)
8179 {
8180 cc ++;
8181 GETCHARINCTEST(c, cc);
8182
8183 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
8184 {
8185 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8186 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8187 numberofcmps++;
8188 }
8189 else if (numberofcmps > 0)
8190 {
8191 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8192 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
8193 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8194 numberofcmps = 0;
8195 }
8196 else
8197 {
8198 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8199 numberofcmps = 0;
8200 }
8201 }
8202 else if (*cc == XCL_RANGE)
8203 {
8204 cc ++;
8205 GETCHARINCTEST(c, cc);
8206 SET_CHAR_OFFSET(c);
8207 GETCHARINCTEST(c, cc);
8208
8209 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
8210 {
8211 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8212 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8213 numberofcmps++;
8214 }
8215 else if (numberofcmps > 0)
8216 {
8217 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8218 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8219 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8220 numberofcmps = 0;
8221 }
8222 else
8223 {
8224 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8225 numberofcmps = 0;
8226 }
8227 }
8228 #ifdef SUPPORT_UNICODE
8229 else
8230 {
8231 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8232 if (*cc == XCL_NOTPROP)
8233 invertcmp ^= 0x1;
8234 cc++;
8235 switch(*cc)
8236 {
8237 case PT_ANY:
8238 case PT_LAMP:
8239 case PT_GC:
8240 case PT_PC:
8241 case PT_SC:
8242 case PT_SCX:
8243 case PT_BOOL:
8244 case PT_BIDICL:
8245 case PT_WORD:
8246 case PT_ALNUM:
8247 compares++;
8248 /* Already handled. */
8249 break;
8250
8251 case PT_SPACE:
8252 case PT_PXSPACE:
8253 SET_CHAR_OFFSET(9);
8254 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
8255 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8256
8257 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
8258 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8259
8260 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
8261 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8262
8263 OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Zl, ucp_Zs));
8264 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
8265 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8266 break;
8267
8268 case PT_CLIST:
8269 other_cases = PRIV(ucd_caseless_sets) + cc[1];
8270
8271 /* At least three characters are required.
8272 Otherwise this case would be handled by the normal code path. */
8273 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
8274 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
8275
8276 /* Optimizing character pairs, if their difference is power of 2. */
8277 if (is_powerof2(other_cases[1] ^ other_cases[0]))
8278 {
8279 if (charoffset == 0)
8280 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8281 else
8282 {
8283 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8284 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8285 }
8286 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]);
8287 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8288 other_cases += 2;
8289 }
8290 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
8291 {
8292 if (charoffset == 0)
8293 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
8294 else
8295 {
8296 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8297 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8298 }
8299 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]);
8300 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8301
8302 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
8303 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8304
8305 other_cases += 3;
8306 }
8307 else
8308 {
8309 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8310 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8311 }
8312
8313 while (*other_cases != NOTACHAR)
8314 {
8315 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8316 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8317 }
8318 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8319 break;
8320
8321 case PT_UCNC:
8322 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
8323 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8324 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
8325 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8326 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
8327 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8328
8329 SET_CHAR_OFFSET(0xa0);
8330 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
8331 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8332 SET_CHAR_OFFSET(0);
8333 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
8334 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
8335 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8336 break;
8337
8338 case PT_PXGRAPH:
8339 OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT_RANGE(ucp_Zl, ucp_Zs));
8340 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8341
8342 OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
8343 jump = JUMP(SLJIT_ZERO);
8344
8345 c = charoffset;
8346 /* In case of ucp_Cf, we overwrite the result. */
8347 SET_CHAR_OFFSET(0x2066);
8348 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8349 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8350
8351 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8352 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8353
8354 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
8355 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8356
8357 /* Restore charoffset. */
8358 SET_CHAR_OFFSET(c);
8359
8360 JUMPHERE(jump);
8361 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8362 break;
8363
8364 case PT_PXPRINT:
8365 OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT2(ucp_Zl, ucp_Zp));
8366 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8367
8368 OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
8369 jump = JUMP(SLJIT_ZERO);
8370
8371 c = charoffset;
8372 /* In case of ucp_Cf, we overwrite the result. */
8373 SET_CHAR_OFFSET(0x2066);
8374 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8375 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8376
8377 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8378 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8379
8380 /* Restore charoffset. */
8381 SET_CHAR_OFFSET(c);
8382
8383 JUMPHERE(jump);
8384 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8385 break;
8386
8387 case PT_PXPUNCT:
8388 OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Sc, ucp_So));
8389 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8390
8391 SET_CHAR_OFFSET(0);
8392 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f);
8393 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
8394
8395 OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Pc, ucp_Ps));
8396 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
8397 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8398 break;
8399
8400 case PT_PXXDIGIT:
8401 SET_CHAR_OFFSET(CHAR_A);
8402 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, ~0x20);
8403 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP2, 0, SLJIT_IMM, CHAR_F - CHAR_A);
8404 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8405
8406 SET_CHAR_OFFSET(CHAR_0);
8407 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_9 - CHAR_0);
8408 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8409
8410 SET_CHAR_OFFSET(0xff10);
8411 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff10);
8412
8413 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff19 - 0xff10);
8414 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8415
8416 SET_CHAR_OFFSET(0xff21);
8417 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff26 - 0xff21);
8418 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8419
8420 SET_CHAR_OFFSET(0xff41);
8421 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff41);
8422 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8423
8424 SET_CHAR_OFFSET(0xff10);
8425
8426 JUMPHERE(jump);
8427 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
8428 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8429 break;
8430
8431 default:
8432 SLJIT_UNREACHABLE();
8433 break;
8434 }
8435 cc += 2;
8436 }
8437 #endif /* SUPPORT_UNICODE */
8438
8439 if (jump != NULL)
8440 add_jump(compiler, compares > 0 ? list : backtracks, jump);
8441 }
8442
8443 SLJIT_ASSERT(compares == 0);
8444 if (found != NULL)
8445 set_jumps(found, LABEL());
8446 }
8447
8448 #undef SET_TYPE_OFFSET
8449 #undef SET_CHAR_OFFSET
8450
8451 #endif
8452
compile_simple_assertion_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks)8453 static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8454 {
8455 DEFINE_COMPILER;
8456 struct sljit_jump *jump[4];
8457
8458 switch(type)
8459 {
8460 case OP_SOD:
8461 if (HAS_VIRTUAL_REGISTERS)
8462 {
8463 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8464 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8465 }
8466 else
8467 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8468 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8469 return cc;
8470
8471 case OP_SOM:
8472 if (HAS_VIRTUAL_REGISTERS)
8473 {
8474 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8475 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8476 }
8477 else
8478 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
8479 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8480 return cc;
8481
8482 case OP_NOT_WORD_BOUNDARY:
8483 case OP_WORD_BOUNDARY:
8484 case OP_NOT_UCP_WORD_BOUNDARY:
8485 case OP_UCP_WORD_BOUNDARY:
8486 add_jump(compiler, (type == OP_NOT_WORD_BOUNDARY || type == OP_WORD_BOUNDARY) ? &common->wordboundary : &common->ucp_wordboundary, JUMP(SLJIT_FAST_CALL));
8487 #ifdef SUPPORT_UNICODE
8488 if (common->invalid_utf)
8489 {
8490 add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8491 return cc;
8492 }
8493 #endif /* SUPPORT_UNICODE */
8494 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8495 add_jump(compiler, backtracks, JUMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8496 return cc;
8497
8498 case OP_EODN:
8499 /* Requires rather complex checks. */
8500 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8501 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8502 {
8503 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8504 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8505 if (common->mode == PCRE2_JIT_COMPLETE)
8506 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8507 else
8508 {
8509 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
8510 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0);
8511 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
8512 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8513 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8514 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8515 check_partial(common, TRUE);
8516 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8517 JUMPHERE(jump[1]);
8518 }
8519 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8520 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8521 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8522 }
8523 else if (common->nltype == NLTYPE_FIXED)
8524 {
8525 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8526 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8527 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8528 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8529 }
8530 else
8531 {
8532 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8533 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8534 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8535 OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0);
8536 jump[2] = JUMP(SLJIT_GREATER);
8537 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8538 /* Equal. */
8539 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8540 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8541 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8542
8543 JUMPHERE(jump[1]);
8544 if (common->nltype == NLTYPE_ANYCRLF)
8545 {
8546 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8547 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8548 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8549 }
8550 else
8551 {
8552 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8553 read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8554 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8555 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8556 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8557 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8558 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8559 }
8560 JUMPHERE(jump[2]);
8561 JUMPHERE(jump[3]);
8562 }
8563 JUMPHERE(jump[0]);
8564 if (common->mode != PCRE2_JIT_COMPLETE)
8565 check_partial(common, TRUE);
8566 return cc;
8567
8568 case OP_EOD:
8569 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8570 if (common->mode != PCRE2_JIT_COMPLETE)
8571 check_partial(common, TRUE);
8572 return cc;
8573
8574 case OP_DOLL:
8575 if (HAS_VIRTUAL_REGISTERS)
8576 {
8577 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8578 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8579 }
8580 else
8581 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8582 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8583
8584 if (!common->endonly)
8585 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8586 else
8587 {
8588 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8589 check_partial(common, FALSE);
8590 }
8591 return cc;
8592
8593 case OP_DOLLM:
8594 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8595 if (HAS_VIRTUAL_REGISTERS)
8596 {
8597 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8598 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8599 }
8600 else
8601 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8602 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8603 check_partial(common, FALSE);
8604 jump[0] = JUMP(SLJIT_JUMP);
8605 JUMPHERE(jump[1]);
8606
8607 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8608 {
8609 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8610 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8611 if (common->mode == PCRE2_JIT_COMPLETE)
8612 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8613 else
8614 {
8615 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8616 /* STR_PTR = STR_END - IN_UCHARS(1) */
8617 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8618 check_partial(common, TRUE);
8619 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8620 JUMPHERE(jump[1]);
8621 }
8622
8623 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8624 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8625 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8626 }
8627 else
8628 {
8629 peek_char(common, common->nlmax, TMP3, 0, NULL);
8630 check_newlinechar(common, common->nltype, backtracks, FALSE);
8631 }
8632 JUMPHERE(jump[0]);
8633 return cc;
8634
8635 case OP_CIRC:
8636 if (HAS_VIRTUAL_REGISTERS)
8637 {
8638 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8639 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8640 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8641 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8642 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8643 }
8644 else
8645 {
8646 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8647 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8648 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8649 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8650 }
8651 return cc;
8652
8653 case OP_CIRCM:
8654 /* TMP2 might be used by peek_char_back. */
8655 if (HAS_VIRTUAL_REGISTERS)
8656 {
8657 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8658 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8659 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8660 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8661 }
8662 else
8663 {
8664 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8665 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8666 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8667 }
8668 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8669 jump[0] = JUMP(SLJIT_JUMP);
8670 JUMPHERE(jump[1]);
8671
8672 if (!common->alt_circumflex)
8673 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8674
8675 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8676 {
8677 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8678 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8679 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8680 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8681 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8682 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8683 }
8684 else
8685 {
8686 peek_char_back(common, common->nlmax, backtracks);
8687 check_newlinechar(common, common->nltype, backtracks, FALSE);
8688 }
8689 JUMPHERE(jump[0]);
8690 return cc;
8691 }
8692 SLJIT_UNREACHABLE();
8693 return cc;
8694 }
8695
8696 #ifdef SUPPORT_UNICODE
8697
8698 #if PCRE2_CODE_UNIT_WIDTH != 32
8699
8700 /* The code in this function copies the logic of the interpreter function that
8701 is defined in the pcre2_extuni.c source. If that code is updated, this
8702 function, and those below it, must be kept in step (note by PH, June 2024). */
8703
do_extuni_utf(jit_arguments * args,PCRE2_SPTR cc)8704 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8705 {
8706 PCRE2_SPTR start_subject = args->begin;
8707 PCRE2_SPTR end_subject = args->end;
8708 int lgb, rgb, ricount;
8709 PCRE2_SPTR prevcc, endcc, bptr;
8710 BOOL first = TRUE;
8711 BOOL was_ep_ZWJ = FALSE;
8712 uint32_t c;
8713
8714 prevcc = cc;
8715 endcc = NULL;
8716 do
8717 {
8718 GETCHARINC(c, cc);
8719 rgb = UCD_GRAPHBREAK(c);
8720
8721 if (first)
8722 {
8723 lgb = rgb;
8724 endcc = cc;
8725 first = FALSE;
8726 continue;
8727 }
8728
8729 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8730 break;
8731
8732 /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
8733 preceded by Extended Pictographic. */
8734
8735 if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
8736 break;
8737
8738 /* Not breaking between Regional Indicators is allowed only if there
8739 are an even number of preceding RIs. */
8740
8741 if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8742 {
8743 ricount = 0;
8744 bptr = prevcc;
8745
8746 /* bptr is pointing to the left-hand character */
8747 while (bptr > start_subject)
8748 {
8749 bptr--;
8750 BACKCHAR(bptr);
8751 GETCHAR(c, bptr);
8752
8753 if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8754 break;
8755
8756 ricount++;
8757 }
8758
8759 if ((ricount & 1) != 0) break; /* Grapheme break required */
8760 }
8761
8762 /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
8763 between; see next statement). */
8764
8765 was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
8766
8767 /* If Extend follows Extended_Pictographic, do not update lgb; this allows
8768 any number of them before a following ZWJ. */
8769
8770 if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
8771 lgb = rgb;
8772
8773 prevcc = endcc;
8774 endcc = cc;
8775 }
8776 while (cc < end_subject);
8777
8778 return endcc;
8779 }
8780
8781 #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8782
8783 /* The code in this function copies the logic of the interpreter function that
8784 is defined in the pcre2_extuni.c source. If that code is updated, this
8785 function, and the one below it, must be kept in step (note by PH, June 2024). */
8786
do_extuni_utf_invalid(jit_arguments * args,PCRE2_SPTR cc)8787 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8788 {
8789 PCRE2_SPTR start_subject = args->begin;
8790 PCRE2_SPTR end_subject = args->end;
8791 int lgb, rgb, ricount;
8792 PCRE2_SPTR prevcc, endcc, bptr;
8793 BOOL first = TRUE;
8794 BOOL was_ep_ZWJ = FALSE;
8795 uint32_t c;
8796
8797 prevcc = cc;
8798 endcc = NULL;
8799 do
8800 {
8801 GETCHARINC_INVALID(c, cc, end_subject, break);
8802 rgb = UCD_GRAPHBREAK(c);
8803
8804 if (first)
8805 {
8806 lgb = rgb;
8807 endcc = cc;
8808 first = FALSE;
8809 continue;
8810 }
8811
8812 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8813 break;
8814
8815 /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
8816 preceded by Extended Pictographic. */
8817
8818 if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
8819 break;
8820
8821 /* Not breaking between Regional Indicators is allowed only if there
8822 are an even number of preceding RIs. */
8823
8824 if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8825 {
8826 ricount = 0;
8827 bptr = prevcc;
8828
8829 /* bptr is pointing to the left-hand character */
8830 while (bptr > start_subject)
8831 {
8832 GETCHARBACK_INVALID(c, bptr, start_subject, break);
8833
8834 if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8835 break;
8836
8837 ricount++;
8838 }
8839
8840 if ((ricount & 1) != 0)
8841 break; /* Grapheme break required */
8842 }
8843
8844 /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
8845 between; see next statement). */
8846
8847 was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
8848
8849 /* If Extend follows Extended_Pictographic, do not update lgb; this allows
8850 any number of them before a following ZWJ. */
8851
8852 if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
8853 lgb = rgb;
8854
8855 prevcc = endcc;
8856 endcc = cc;
8857 }
8858 while (cc < end_subject);
8859
8860 return endcc;
8861 }
8862
8863 /* The code in this function copies the logic of the interpreter function that
8864 is defined in the pcre2_extuni.c source. If that code is updated, this
8865 function must be kept in step (note by PH, June 2024). */
8866
do_extuni_no_utf(jit_arguments * args,PCRE2_SPTR cc)8867 static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8868 {
8869 PCRE2_SPTR start_subject = args->begin;
8870 PCRE2_SPTR end_subject = args->end;
8871 int lgb, rgb, ricount;
8872 PCRE2_SPTR bptr;
8873 uint32_t c;
8874 BOOL was_ep_ZWJ = FALSE;
8875
8876 /* Patch by PH */
8877 /* GETCHARINC(c, cc); */
8878 c = *cc++;
8879
8880 #if PCRE2_CODE_UNIT_WIDTH == 32
8881 if (c >= 0x110000)
8882 return cc;
8883 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8884 lgb = UCD_GRAPHBREAK(c);
8885
8886 while (cc < end_subject)
8887 {
8888 c = *cc;
8889 #if PCRE2_CODE_UNIT_WIDTH == 32
8890 if (c >= 0x110000)
8891 break;
8892 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8893 rgb = UCD_GRAPHBREAK(c);
8894
8895 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8896 break;
8897
8898 /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
8899 preceded by Extended Pictographic. */
8900
8901 if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
8902 break;
8903
8904 /* Not breaking between Regional Indicators is allowed only if there
8905 are an even number of preceding RIs. */
8906
8907 if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8908 {
8909 ricount = 0;
8910 bptr = cc - 1;
8911
8912 /* bptr is pointing to the left-hand character */
8913 while (bptr > start_subject)
8914 {
8915 bptr--;
8916 c = *bptr;
8917 #if PCRE2_CODE_UNIT_WIDTH == 32
8918 if (c >= 0x110000)
8919 break;
8920 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8921
8922 if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
8923
8924 ricount++;
8925 }
8926
8927 if ((ricount & 1) != 0)
8928 break; /* Grapheme break required */
8929 }
8930
8931 /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
8932 between; see next statement). */
8933
8934 was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
8935
8936 /* If Extend follows Extended_Pictographic, do not update lgb; this allows
8937 any number of them before a following ZWJ. */
8938
8939 if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
8940 lgb = rgb;
8941
8942 cc++;
8943 }
8944
8945 return cc;
8946 }
8947
8948 #endif /* SUPPORT_UNICODE */
8949
compile_char1_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks,BOOL check_str_ptr)8950 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8951 {
8952 DEFINE_COMPILER;
8953 int length;
8954 unsigned int c, oc, bit;
8955 compare_context context;
8956 struct sljit_jump *jump[3];
8957 jump_list *end_list;
8958 #ifdef SUPPORT_UNICODE
8959 PCRE2_UCHAR propdata[5];
8960 #endif /* SUPPORT_UNICODE */
8961
8962 switch(type)
8963 {
8964 case OP_NOT_DIGIT:
8965 case OP_DIGIT:
8966 /* Digits are usually 0-9, so it is worth to optimize them. */
8967 if (check_str_ptr)
8968 detect_partial_match(common, backtracks);
8969 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8970 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8971 read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8972 else
8973 #endif
8974 read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8975 /* Flip the starting bit in the negative case. */
8976 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit);
8977 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8978 return cc;
8979
8980 case OP_NOT_WHITESPACE:
8981 case OP_WHITESPACE:
8982 if (check_str_ptr)
8983 detect_partial_match(common, backtracks);
8984 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8985 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8986 read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8987 else
8988 #endif
8989 read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8990 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space);
8991 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8992 return cc;
8993
8994 case OP_NOT_WORDCHAR:
8995 case OP_WORDCHAR:
8996 if (check_str_ptr)
8997 detect_partial_match(common, backtracks);
8998 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8999 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
9000 read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
9001 else
9002 #endif
9003 read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
9004 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word);
9005 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
9006 return cc;
9007
9008 case OP_ANY:
9009 if (check_str_ptr)
9010 detect_partial_match(common, backtracks);
9011 read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
9012 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
9013 {
9014 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
9015 end_list = NULL;
9016 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
9017 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
9018 else
9019 check_str_end(common, &end_list);
9020
9021 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9022 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
9023 set_jumps(end_list, LABEL());
9024 JUMPHERE(jump[0]);
9025 }
9026 else
9027 check_newlinechar(common, common->nltype, backtracks, TRUE);
9028 return cc;
9029
9030 case OP_ALLANY:
9031 if (check_str_ptr)
9032 detect_partial_match(common, backtracks);
9033 #ifdef SUPPORT_UNICODE
9034 if (common->utf && common->invalid_utf)
9035 {
9036 read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
9037 return cc;
9038 }
9039 #endif /* SUPPORT_UNICODE */
9040
9041 skip_valid_char(common);
9042 return cc;
9043
9044 case OP_ANYBYTE:
9045 if (check_str_ptr)
9046 detect_partial_match(common, backtracks);
9047 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9048 return cc;
9049
9050 #ifdef SUPPORT_UNICODE
9051 case OP_NOTPROP:
9052 case OP_PROP:
9053 propdata[0] = XCL_HASPROP;
9054 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
9055 propdata[2] = cc[0];
9056 propdata[3] = cc[1];
9057 propdata[4] = XCL_END;
9058 if (check_str_ptr)
9059 detect_partial_match(common, backtracks);
9060 compile_xclass_matchingpath(common, propdata, backtracks);
9061 return cc + 2;
9062 #endif
9063
9064 case OP_ANYNL:
9065 if (check_str_ptr)
9066 detect_partial_match(common, backtracks);
9067 read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
9068 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
9069 /* We don't need to handle soft partial matching case. */
9070 end_list = NULL;
9071 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
9072 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
9073 else
9074 check_str_end(common, &end_list);
9075 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9076 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
9077 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9078 jump[2] = JUMP(SLJIT_JUMP);
9079 JUMPHERE(jump[0]);
9080 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
9081 set_jumps(end_list, LABEL());
9082 JUMPHERE(jump[1]);
9083 JUMPHERE(jump[2]);
9084 return cc;
9085
9086 case OP_NOT_HSPACE:
9087 case OP_HSPACE:
9088 if (check_str_ptr)
9089 detect_partial_match(common, backtracks);
9090
9091 if (type == OP_NOT_HSPACE)
9092 read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
9093 else
9094 read_char(common, 0x9, 0x3000, NULL, 0);
9095
9096 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
9097 sljit_set_current_flags(compiler, SLJIT_SET_Z);
9098 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
9099 return cc;
9100
9101 case OP_NOT_VSPACE:
9102 case OP_VSPACE:
9103 if (check_str_ptr)
9104 detect_partial_match(common, backtracks);
9105
9106 if (type == OP_NOT_VSPACE)
9107 read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
9108 else
9109 read_char(common, 0xa, 0x2029, NULL, 0);
9110
9111 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
9112 sljit_set_current_flags(compiler, SLJIT_SET_Z);
9113 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
9114 return cc;
9115
9116 #ifdef SUPPORT_UNICODE
9117 case OP_EXTUNI:
9118 if (check_str_ptr)
9119 detect_partial_match(common, backtracks);
9120
9121 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9122 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
9123
9124 #if PCRE2_CODE_UNIT_WIDTH != 32
9125 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
9126 common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
9127 if (common->invalid_utf)
9128 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
9129 #else
9130 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
9131 common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
9132 if (common->invalid_utf)
9133 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
9134 #endif
9135
9136 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
9137
9138 if (common->mode == PCRE2_JIT_PARTIAL_HARD)
9139 {
9140 jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
9141 /* Since we successfully read a char above, partial matching must occure. */
9142 check_partial(common, TRUE);
9143 JUMPHERE(jump[0]);
9144 }
9145 return cc;
9146 #endif
9147
9148 case OP_CHAR:
9149 case OP_CHARI:
9150 length = 1;
9151 #ifdef SUPPORT_UNICODE
9152 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
9153 #endif
9154
9155 if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
9156 detect_partial_match(common, backtracks);
9157
9158 if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
9159 {
9160 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
9161 if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
9162 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9163
9164 context.length = IN_UCHARS(length);
9165 context.sourcereg = -1;
9166 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9167 context.ucharptr = 0;
9168 #endif
9169 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
9170 }
9171
9172 #ifdef SUPPORT_UNICODE
9173 if (common->utf)
9174 {
9175 GETCHAR(c, cc);
9176 }
9177 else
9178 #endif
9179 c = *cc;
9180
9181 SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
9182
9183 if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
9184 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
9185
9186 oc = char_othercase(common, c);
9187 read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
9188
9189 SLJIT_ASSERT(!is_powerof2(c ^ oc));
9190
9191 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
9192 {
9193 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc);
9194 SELECT(SLJIT_EQUAL, TMP1, SLJIT_IMM, c, TMP1);
9195 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9196 }
9197 else
9198 {
9199 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
9200 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9201 JUMPHERE(jump[0]);
9202 }
9203 return cc + length;
9204
9205 case OP_NOT:
9206 case OP_NOTI:
9207 if (check_str_ptr)
9208 detect_partial_match(common, backtracks);
9209
9210 length = 1;
9211 #ifdef SUPPORT_UNICODE
9212 if (common->utf)
9213 {
9214 #if PCRE2_CODE_UNIT_WIDTH == 8
9215 c = *cc;
9216 if (c < 128 && !common->invalid_utf)
9217 {
9218 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9219 if (type == OP_NOT || !char_has_othercase(common, cc))
9220 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9221 else
9222 {
9223 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
9224 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
9225 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
9226 }
9227 /* Skip the variable-length character. */
9228 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9229 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
9230 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
9231 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
9232 JUMPHERE(jump[0]);
9233 return cc + 1;
9234 }
9235 else
9236 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
9237 {
9238 GETCHARLEN(c, cc, length);
9239 }
9240 }
9241 else
9242 #endif /* SUPPORT_UNICODE */
9243 c = *cc;
9244
9245 if (type == OP_NOT || !char_has_othercase(common, cc))
9246 {
9247 read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
9248 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9249 }
9250 else
9251 {
9252 oc = char_othercase(common, c);
9253 read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
9254 bit = c ^ oc;
9255 if (is_powerof2(bit))
9256 {
9257 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
9258 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
9259 }
9260 else
9261 {
9262 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9263 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9264 }
9265 }
9266 return cc + length;
9267
9268 case OP_CLASS:
9269 case OP_NCLASS:
9270 if (check_str_ptr)
9271 detect_partial_match(common, backtracks);
9272
9273 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9274 bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
9275 if (type == OP_NCLASS)
9276 read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
9277 else
9278 read_char(common, 0, bit, NULL, 0);
9279 #else
9280 if (type == OP_NCLASS)
9281 read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
9282 else
9283 read_char(common, 0, 255, NULL, 0);
9284 #endif
9285
9286 if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
9287 return cc + 32 / sizeof(PCRE2_UCHAR);
9288
9289 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9290 jump[0] = NULL;
9291 if (common->utf)
9292 {
9293 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
9294 if (type == OP_CLASS)
9295 {
9296 add_jump(compiler, backtracks, jump[0]);
9297 jump[0] = NULL;
9298 }
9299 }
9300 #elif PCRE2_CODE_UNIT_WIDTH != 8
9301 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
9302 if (type == OP_CLASS)
9303 {
9304 add_jump(compiler, backtracks, jump[0]);
9305 jump[0] = NULL;
9306 }
9307 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
9308
9309 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
9310 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
9311 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
9312 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
9313 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
9314 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
9315
9316 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
9317 if (jump[0] != NULL)
9318 JUMPHERE(jump[0]);
9319 #endif
9320 return cc + 32 / sizeof(PCRE2_UCHAR);
9321
9322 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
9323 case OP_XCLASS:
9324 if (check_str_ptr)
9325 detect_partial_match(common, backtracks);
9326 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
9327 return cc + GET(cc, 0) - 1;
9328 #endif
9329 }
9330 SLJIT_UNREACHABLE();
9331 return cc;
9332 }
9333
compile_charn_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,jump_list ** backtracks)9334 static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
9335 {
9336 /* This function consumes at least one input character. */
9337 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
9338 DEFINE_COMPILER;
9339 PCRE2_SPTR ccbegin = cc;
9340 compare_context context;
9341 int size;
9342
9343 context.length = 0;
9344 do
9345 {
9346 if (cc >= ccend)
9347 break;
9348
9349 if (*cc == OP_CHAR)
9350 {
9351 size = 1;
9352 #ifdef SUPPORT_UNICODE
9353 if (common->utf && HAS_EXTRALEN(cc[1]))
9354 size += GET_EXTRALEN(cc[1]);
9355 #endif
9356 }
9357 else if (*cc == OP_CHARI)
9358 {
9359 size = 1;
9360 #ifdef SUPPORT_UNICODE
9361 if (common->utf)
9362 {
9363 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9364 size = 0;
9365 else if (HAS_EXTRALEN(cc[1]))
9366 size += GET_EXTRALEN(cc[1]);
9367 }
9368 else
9369 #endif
9370 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9371 size = 0;
9372 }
9373 else
9374 size = 0;
9375
9376 cc += 1 + size;
9377 context.length += IN_UCHARS(size);
9378 }
9379 while (size > 0 && context.length <= 128);
9380
9381 cc = ccbegin;
9382 if (context.length > 0)
9383 {
9384 /* We have a fixed-length byte sequence. */
9385 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
9386 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9387
9388 context.sourcereg = -1;
9389 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9390 context.ucharptr = 0;
9391 #endif
9392 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
9393 return cc;
9394 }
9395
9396 /* A non-fixed length character will be checked if length == 0. */
9397 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
9398 }
9399
9400 /* Forward definitions. */
9401 static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
9402 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
9403
9404 #define PUSH_BACKTRACK(size, ccstart, error) \
9405 do \
9406 { \
9407 backtrack = sljit_alloc_memory(compiler, (size)); \
9408 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9409 return error; \
9410 memset(backtrack, 0, size); \
9411 backtrack->prev = parent->top; \
9412 backtrack->cc = (ccstart); \
9413 parent->top = backtrack; \
9414 } \
9415 while (0)
9416
9417 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
9418 do \
9419 { \
9420 backtrack = sljit_alloc_memory(compiler, (size)); \
9421 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9422 return; \
9423 memset(backtrack, 0, size); \
9424 backtrack->prev = parent->top; \
9425 backtrack->cc = (ccstart); \
9426 parent->top = backtrack; \
9427 } \
9428 while (0)
9429
9430 #define BACKTRACK_AS(type) ((type *)backtrack)
9431
compile_dnref_search(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)9432 static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
9433 {
9434 /* The OVECTOR offset goes to TMP2. */
9435 DEFINE_COMPILER;
9436 int count = GET2(cc, 1 + IMM2_SIZE);
9437 PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9438 unsigned int offset;
9439 jump_list *found = NULL;
9440
9441 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9442
9443 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9444
9445 count--;
9446 while (count-- > 0)
9447 {
9448 offset = GET2(slot, 0) << 1;
9449 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9450 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9451 slot += common->name_entry_size;
9452 }
9453
9454 offset = GET2(slot, 0) << 1;
9455 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9456 if (backtracks != NULL && !common->unset_backref)
9457 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9458
9459 set_jumps(found, LABEL());
9460 }
9461
compile_ref_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)9462 static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9463 {
9464 DEFINE_COMPILER;
9465 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9466 int offset = 0;
9467 struct sljit_jump *jump = NULL;
9468 struct sljit_jump *partial;
9469 struct sljit_jump *nopartial;
9470 #if defined SUPPORT_UNICODE
9471 struct sljit_label *loop;
9472 struct sljit_label *caseless_loop;
9473 jump_list *no_match = NULL;
9474 int source_reg = COUNT_MATCH;
9475 int source_end_reg = ARGUMENTS;
9476 int char1_reg = STACK_LIMIT;
9477 #endif /* SUPPORT_UNICODE */
9478
9479 if (ref)
9480 {
9481 offset = GET2(cc, 1) << 1;
9482 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9483 /* OVECTOR(1) contains the "string begin - 1" constant. */
9484 if (withchecks && !common->unset_backref)
9485 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9486 }
9487 else
9488 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9489
9490 #if defined SUPPORT_UNICODE
9491 if (common->utf && *cc == OP_REFI)
9492 {
9493 SLJIT_ASSERT(common->iref_ptr != 0);
9494
9495 if (ref)
9496 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9497 else
9498 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9499
9500 if (withchecks && emptyfail)
9501 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9502
9503 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9504 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9505 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9506
9507 OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9508 OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9509
9510 loop = LABEL();
9511 jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9512 partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9513
9514 /* Read original character. It must be a valid UTF character. */
9515 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9516 OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9517
9518 read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9519
9520 OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9521 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9522 OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9523
9524 /* Read second character. */
9525 read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9526
9527 CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9528
9529 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9530
9531 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9532
9533 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9534 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9535 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9536
9537 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9538
9539 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9540 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9541 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9542 CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9543
9544 add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9545 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9546 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9547
9548 caseless_loop = LABEL();
9549 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9550 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9551 OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0);
9552 JUMPTO(SLJIT_EQUAL, loop);
9553 JUMPTO(SLJIT_LESS, caseless_loop);
9554
9555 set_jumps(no_match, LABEL());
9556 if (common->mode == PCRE2_JIT_COMPLETE)
9557 JUMPHERE(partial);
9558
9559 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9560 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9561 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9562 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9563
9564 if (common->mode != PCRE2_JIT_COMPLETE)
9565 {
9566 JUMPHERE(partial);
9567 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9568 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9569 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9570
9571 check_partial(common, FALSE);
9572 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9573 }
9574
9575 JUMPHERE(jump);
9576 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9577 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9578 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9579 return;
9580 }
9581 else
9582 #endif /* SUPPORT_UNICODE */
9583 {
9584 if (ref)
9585 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9586 else
9587 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9588
9589 if (withchecks)
9590 jump = JUMP(SLJIT_ZERO);
9591
9592 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9593 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9594 if (common->mode == PCRE2_JIT_COMPLETE)
9595 add_jump(compiler, backtracks, partial);
9596
9597 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9598 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9599
9600 if (common->mode != PCRE2_JIT_COMPLETE)
9601 {
9602 nopartial = JUMP(SLJIT_JUMP);
9603 JUMPHERE(partial);
9604 /* TMP2 -= STR_END - STR_PTR */
9605 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9606 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9607 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9608 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9609 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9610 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9611 JUMPHERE(partial);
9612 check_partial(common, FALSE);
9613 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9614 JUMPHERE(nopartial);
9615 }
9616 }
9617
9618 if (jump != NULL)
9619 {
9620 if (emptyfail)
9621 add_jump(compiler, backtracks, jump);
9622 else
9623 JUMPHERE(jump);
9624 }
9625 }
9626
compile_ref_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9627 static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9628 {
9629 DEFINE_COMPILER;
9630 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9631 backtrack_common *backtrack;
9632 PCRE2_UCHAR type;
9633 int offset = 0;
9634 struct sljit_label *label;
9635 struct sljit_jump *zerolength;
9636 struct sljit_jump *jump = NULL;
9637 PCRE2_SPTR ccbegin = cc;
9638 int min = 0, max = 0;
9639 BOOL minimize;
9640
9641 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9642
9643 if (ref)
9644 offset = GET2(cc, 1) << 1;
9645 else
9646 cc += IMM2_SIZE;
9647 type = cc[1 + IMM2_SIZE];
9648
9649 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9650 minimize = (type & 0x1) != 0;
9651 switch(type)
9652 {
9653 case OP_CRSTAR:
9654 case OP_CRMINSTAR:
9655 min = 0;
9656 max = 0;
9657 cc += 1 + IMM2_SIZE + 1;
9658 break;
9659 case OP_CRPLUS:
9660 case OP_CRMINPLUS:
9661 min = 1;
9662 max = 0;
9663 cc += 1 + IMM2_SIZE + 1;
9664 break;
9665 case OP_CRQUERY:
9666 case OP_CRMINQUERY:
9667 min = 0;
9668 max = 1;
9669 cc += 1 + IMM2_SIZE + 1;
9670 break;
9671 case OP_CRRANGE:
9672 case OP_CRMINRANGE:
9673 min = GET2(cc, 1 + IMM2_SIZE + 1);
9674 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9675 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9676 break;
9677 default:
9678 SLJIT_UNREACHABLE();
9679 break;
9680 }
9681
9682 if (!minimize)
9683 {
9684 if (min == 0)
9685 {
9686 allocate_stack(common, 2);
9687 if (ref)
9688 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9689 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9690 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9691 /* Temporary release of STR_PTR. */
9692 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9693 /* Handles both invalid and empty cases. Since the minimum repeat,
9694 is zero the invalid case is basically the same as an empty case. */
9695 if (ref)
9696 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9697 else
9698 {
9699 compile_dnref_search(common, ccbegin, NULL);
9700 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9701 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9702 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9703 }
9704 /* Restore if not zero length. */
9705 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9706 }
9707 else
9708 {
9709 allocate_stack(common, 1);
9710 if (ref)
9711 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9712 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9713
9714 if (ref)
9715 {
9716 if (!common->unset_backref)
9717 add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9718 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9719 }
9720 else
9721 {
9722 compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
9723 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9724 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9725 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9726 }
9727 }
9728
9729 if (min > 1 || max > 1)
9730 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9731
9732 label = LABEL();
9733 if (!ref)
9734 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9735 compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, FALSE, FALSE);
9736
9737 if (min > 1 || max > 1)
9738 {
9739 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9740 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9741 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9742 if (min > 1)
9743 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9744 if (max > 1)
9745 {
9746 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9747 allocate_stack(common, 1);
9748 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9749 JUMPTO(SLJIT_JUMP, label);
9750 JUMPHERE(jump);
9751 }
9752 }
9753
9754 if (max == 0)
9755 {
9756 /* Includes min > 1 case as well. */
9757 allocate_stack(common, 1);
9758 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9759 JUMPTO(SLJIT_JUMP, label);
9760 }
9761
9762 JUMPHERE(zerolength);
9763 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9764
9765 count_match(common);
9766 return cc;
9767 }
9768
9769 allocate_stack(common, ref ? 2 : 3);
9770 if (ref)
9771 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9772 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9773 if (type != OP_CRMINSTAR)
9774 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9775
9776 if (min == 0)
9777 {
9778 /* Handles both invalid and empty cases. Since the minimum repeat,
9779 is zero the invalid case is basically the same as an empty case. */
9780 if (ref)
9781 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9782 else
9783 {
9784 compile_dnref_search(common, ccbegin, NULL);
9785 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9786 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9787 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9788 }
9789 /* Length is non-zero, we can match real repeats. */
9790 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9791 jump = JUMP(SLJIT_JUMP);
9792 }
9793 else
9794 {
9795 if (ref)
9796 {
9797 if (!common->unset_backref)
9798 add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9799 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9800 }
9801 else
9802 {
9803 compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
9804 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9805 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9806 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9807 }
9808 }
9809
9810 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9811 if (max > 0)
9812 add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9813
9814 if (!ref)
9815 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9816 compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, TRUE, TRUE);
9817 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9818
9819 if (min > 1)
9820 {
9821 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9822 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9823 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9824 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9825 }
9826 else if (max > 0)
9827 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9828
9829 if (jump != NULL)
9830 JUMPHERE(jump);
9831 JUMPHERE(zerolength);
9832
9833 count_match(common);
9834 return cc;
9835 }
9836
compile_recurse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9837 static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9838 {
9839 DEFINE_COMPILER;
9840 backtrack_common *backtrack;
9841 recurse_entry *entry = common->entries;
9842 recurse_entry *prev = NULL;
9843 sljit_sw start = GET(cc, 1);
9844 PCRE2_SPTR start_cc;
9845 BOOL needs_control_head;
9846
9847 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9848
9849 /* Inlining simple patterns. */
9850 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9851 {
9852 start_cc = common->start + start;
9853 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9854 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9855 return cc + 1 + LINK_SIZE;
9856 }
9857
9858 while (entry != NULL)
9859 {
9860 if (entry->start == start)
9861 break;
9862 prev = entry;
9863 entry = entry->next;
9864 }
9865
9866 if (entry == NULL)
9867 {
9868 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9869 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9870 return NULL;
9871 entry->next = NULL;
9872 entry->entry_label = NULL;
9873 entry->backtrack_label = NULL;
9874 entry->entry_calls = NULL;
9875 entry->backtrack_calls = NULL;
9876 entry->start = start;
9877
9878 if (prev != NULL)
9879 prev->next = entry;
9880 else
9881 common->entries = entry;
9882 }
9883
9884 BACKTRACK_AS(recurse_backtrack)->entry = entry;
9885
9886 if (entry->entry_label == NULL)
9887 add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9888 else
9889 JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9890 /* Leave if the match is failed. */
9891 add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9892 BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9893 return cc + 1 + LINK_SIZE;
9894 }
9895
do_callout_jit(struct jit_arguments * arguments,pcre2_callout_block * callout_block,PCRE2_SPTR * jit_ovector)9896 static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9897 {
9898 PCRE2_SPTR begin;
9899 PCRE2_SIZE *ovector;
9900 sljit_u32 oveccount, capture_top;
9901
9902 if (arguments->callout == NULL)
9903 return 0;
9904
9905 SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9906
9907 begin = arguments->begin;
9908 ovector = (PCRE2_SIZE*)(callout_block + 1);
9909 oveccount = callout_block->capture_top;
9910
9911 SLJIT_ASSERT(oveccount >= 1);
9912
9913 callout_block->version = 2;
9914 callout_block->callout_flags = 0;
9915
9916 /* Offsets in subject. */
9917 callout_block->subject_length = arguments->end - arguments->begin;
9918 callout_block->start_match = jit_ovector[0] - begin;
9919 callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9920 callout_block->subject = begin;
9921
9922 /* Convert and copy the JIT offset vector to the ovector array. */
9923 callout_block->capture_top = 1;
9924 callout_block->offset_vector = ovector;
9925
9926 ovector[0] = PCRE2_UNSET;
9927 ovector[1] = PCRE2_UNSET;
9928 ovector += 2;
9929 jit_ovector += 2;
9930 capture_top = 1;
9931
9932 /* Convert pointers to sizes. */
9933 while (--oveccount != 0)
9934 {
9935 capture_top++;
9936
9937 ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9938 ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9939
9940 if (ovector[0] != PCRE2_UNSET)
9941 callout_block->capture_top = capture_top;
9942
9943 ovector += 2;
9944 jit_ovector += 2;
9945 }
9946
9947 return (arguments->callout)(callout_block, arguments->callout_data);
9948 }
9949
9950 #define CALLOUT_ARG_OFFSET(arg) \
9951 SLJIT_OFFSETOF(pcre2_callout_block, arg)
9952
compile_callout_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9953 static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9954 {
9955 DEFINE_COMPILER;
9956 backtrack_common *backtrack;
9957 sljit_s32 mov_opcode;
9958 unsigned int callout_length = (*cc == OP_CALLOUT)
9959 ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9960 sljit_sw value1;
9961 sljit_sw value2;
9962 sljit_sw value3;
9963 sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw);
9964
9965 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9966
9967 callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9968
9969 allocate_stack(common, callout_arg_size);
9970
9971 SLJIT_ASSERT(common->capture_last_ptr != 0);
9972 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9973 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9974 value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9975 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9976 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9977 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9978
9979 /* These pointer sized fields temporarly stores internal variables. */
9980 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9981
9982 if (common->mark_ptr != 0)
9983 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9984 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9985 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9986 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9987
9988 if (*cc == OP_CALLOUT)
9989 {
9990 value1 = 0;
9991 value2 = 0;
9992 value3 = 0;
9993 }
9994 else
9995 {
9996 value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9997 value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9998 value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9999 }
10000
10001 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
10002 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
10003 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
10004 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
10005
10006 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10007
10008 /* Needed to save important temporary registers. */
10009 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
10010 /* SLJIT_R0 = arguments */
10011 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
10012 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
10013 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit));
10014 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10015 free_stack(common, callout_arg_size);
10016
10017 /* Check return value. */
10018 OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
10019 add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_SIG_GREATER));
10020 if (common->abort_label == NULL)
10021 add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
10022 else
10023 JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
10024 return cc + callout_length;
10025 }
10026
10027 #undef CALLOUT_ARG_SIZE
10028 #undef CALLOUT_ARG_OFFSET
10029
compile_reverse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10030 static PCRE2_SPTR compile_reverse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10031 {
10032 DEFINE_COMPILER;
10033 backtrack_common *backtrack = NULL;
10034 jump_list **reverse_failed;
10035 unsigned int lmin, lmax;
10036 #ifdef SUPPORT_UNICODE
10037 struct sljit_jump *jump;
10038 struct sljit_label *label;
10039 #endif
10040
10041 SLJIT_ASSERT(parent->top == NULL);
10042
10043 if (*cc == OP_REVERSE)
10044 {
10045 reverse_failed = &parent->own_backtracks;
10046 lmin = GET2(cc, 1);
10047 lmax = lmin;
10048 cc += 1 + IMM2_SIZE;
10049
10050 SLJIT_ASSERT(lmin > 0);
10051 }
10052 else
10053 {
10054 SLJIT_ASSERT(*cc == OP_VREVERSE);
10055 PUSH_BACKTRACK(sizeof(vreverse_backtrack), cc, NULL);
10056
10057 reverse_failed = &backtrack->own_backtracks;
10058 lmin = GET2(cc, 1);
10059 lmax = GET2(cc, 1 + IMM2_SIZE);
10060 cc += 1 + 2 * IMM2_SIZE;
10061
10062 SLJIT_ASSERT(lmin < lmax);
10063 }
10064
10065 if (HAS_VIRTUAL_REGISTERS)
10066 {
10067 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10068 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
10069 }
10070 else
10071 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
10072
10073 #ifdef SUPPORT_UNICODE
10074 if (common->utf)
10075 {
10076 if (lmin > 0)
10077 {
10078 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmin);
10079 label = LABEL();
10080 add_jump(compiler, reverse_failed, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
10081 move_back(common, reverse_failed, FALSE);
10082 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10083 JUMPTO(SLJIT_NOT_ZERO, label);
10084 }
10085
10086 if (lmin < lmax)
10087 {
10088 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
10089
10090 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmax - lmin);
10091 label = LABEL();
10092 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
10093 move_back(common, reverse_failed, FALSE);
10094 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10095 JUMPTO(SLJIT_NOT_ZERO, label);
10096
10097 JUMPHERE(jump);
10098 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
10099 }
10100 }
10101 else
10102 #endif
10103 {
10104 if (lmin > 0)
10105 {
10106 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmin));
10107 add_jump(compiler, reverse_failed, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
10108 }
10109
10110 if (lmin < lmax)
10111 {
10112 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
10113
10114 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmax - lmin));
10115 OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_PTR, 0, TMP2, 0);
10116 SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
10117
10118 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
10119 }
10120 }
10121
10122 check_start_used_ptr(common);
10123
10124 if (lmin < lmax)
10125 BACKTRACK_AS(vreverse_backtrack)->matchingpath = LABEL();
10126
10127 return cc;
10128 }
10129
assert_needs_str_ptr_saving(PCRE2_SPTR cc)10130 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
10131 {
10132 while (TRUE)
10133 {
10134 switch (*cc)
10135 {
10136 case OP_CALLOUT_STR:
10137 cc += GET(cc, 1 + 2*LINK_SIZE);
10138 break;
10139
10140 case OP_NOT_WORD_BOUNDARY:
10141 case OP_WORD_BOUNDARY:
10142 case OP_CIRC:
10143 case OP_CIRCM:
10144 case OP_DOLL:
10145 case OP_DOLLM:
10146 case OP_CALLOUT:
10147 case OP_ALT:
10148 case OP_NOT_UCP_WORD_BOUNDARY:
10149 case OP_UCP_WORD_BOUNDARY:
10150 cc += PRIV(OP_lengths)[*cc];
10151 break;
10152
10153 case OP_KET:
10154 return FALSE;
10155
10156 default:
10157 return TRUE;
10158 }
10159 }
10160 }
10161
compile_assert_matchingpath(compiler_common * common,PCRE2_SPTR cc,assert_backtrack * backtrack,BOOL conditional)10162 static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
10163 {
10164 DEFINE_COMPILER;
10165 int framesize;
10166 int extrasize;
10167 BOOL local_quit_available = FALSE;
10168 BOOL needs_control_head;
10169 BOOL end_block_size = 0;
10170 BOOL has_vreverse;
10171 int private_data_ptr;
10172 backtrack_common altbacktrack;
10173 PCRE2_SPTR ccbegin;
10174 PCRE2_UCHAR opcode;
10175 PCRE2_UCHAR bra = OP_BRA;
10176 jump_list *tmp = NULL;
10177 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.own_backtracks;
10178 jump_list **found;
10179 /* Saving previous accept variables. */
10180 BOOL save_local_quit_available = common->local_quit_available;
10181 BOOL save_in_positive_assertion = common->in_positive_assertion;
10182 then_trap_backtrack *save_then_trap = common->then_trap;
10183 struct sljit_label *save_quit_label = common->quit_label;
10184 struct sljit_label *save_accept_label = common->accept_label;
10185 jump_list *save_quit = common->quit;
10186 jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
10187 jump_list *save_accept = common->accept;
10188 struct sljit_jump *jump;
10189 struct sljit_jump *brajump = NULL;
10190
10191 /* Assert captures then. */
10192 common->then_trap = NULL;
10193
10194 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10195 {
10196 SLJIT_ASSERT(!conditional);
10197 bra = *cc;
10198 cc++;
10199 }
10200
10201 private_data_ptr = PRIVATE_DATA(cc);
10202 SLJIT_ASSERT(private_data_ptr != 0);
10203 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10204 backtrack->framesize = framesize;
10205 backtrack->private_data_ptr = private_data_ptr;
10206 opcode = *cc;
10207 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
10208 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
10209 ccbegin = cc;
10210 cc += GET(cc, 1);
10211
10212 if (bra == OP_BRAMINZERO)
10213 {
10214 /* This is a braminzero backtrack path. */
10215 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10216 free_stack(common, 1);
10217 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10218 }
10219
10220 if ((opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NOT) && find_vreverse(ccbegin))
10221 end_block_size = 3;
10222
10223 if (framesize < 0)
10224 {
10225 extrasize = 1;
10226 if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
10227 extrasize = 0;
10228
10229 extrasize += end_block_size;
10230
10231 if (needs_control_head)
10232 extrasize++;
10233
10234 if (framesize == no_frame)
10235 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10236
10237 if (extrasize > 0)
10238 allocate_stack(common, extrasize);
10239
10240 if (needs_control_head)
10241 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10242
10243 if (extrasize > 0)
10244 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10245
10246 if (needs_control_head)
10247 {
10248 SLJIT_ASSERT(extrasize == end_block_size + 2);
10249 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10250 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
10251 }
10252 }
10253 else
10254 {
10255 extrasize = (needs_control_head ? 3 : 2) + end_block_size;
10256
10257 OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
10258 allocate_stack(common, framesize + extrasize);
10259
10260 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10261 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10262 if (needs_control_head)
10263 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10264 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10265
10266 if (needs_control_head)
10267 {
10268 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 2), TMP1, 0);
10269 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP2, 0);
10270 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10271 }
10272 else
10273 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
10274
10275 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
10276 }
10277
10278 if (end_block_size > 0)
10279 {
10280 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_END, 0);
10281 OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
10282 }
10283
10284 memset(&altbacktrack, 0, sizeof(backtrack_common));
10285 if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
10286 {
10287 /* Control verbs cannot escape from these asserts. */
10288 local_quit_available = TRUE;
10289 common->local_quit_available = TRUE;
10290 common->quit_label = NULL;
10291 common->quit = NULL;
10292 }
10293
10294 common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
10295 common->positive_assertion_quit = NULL;
10296
10297 while (1)
10298 {
10299 common->accept_label = NULL;
10300 common->accept = NULL;
10301 altbacktrack.top = NULL;
10302 altbacktrack.own_backtracks = NULL;
10303
10304 if (*ccbegin == OP_ALT && extrasize > 0)
10305 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10306
10307 altbacktrack.cc = ccbegin;
10308 ccbegin += 1 + LINK_SIZE;
10309
10310 has_vreverse = (*ccbegin == OP_VREVERSE);
10311 if (*ccbegin == OP_REVERSE || has_vreverse)
10312 ccbegin = compile_reverse_matchingpath(common, ccbegin, &altbacktrack);
10313
10314 compile_matchingpath(common, ccbegin, cc, &altbacktrack);
10315 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10316 {
10317 if (local_quit_available)
10318 {
10319 common->local_quit_available = save_local_quit_available;
10320 common->quit_label = save_quit_label;
10321 common->quit = save_quit;
10322 }
10323 common->in_positive_assertion = save_in_positive_assertion;
10324 common->then_trap = save_then_trap;
10325 common->accept_label = save_accept_label;
10326 common->positive_assertion_quit = save_positive_assertion_quit;
10327 common->accept = save_accept;
10328 return NULL;
10329 }
10330
10331 if (has_vreverse)
10332 {
10333 SLJIT_ASSERT(altbacktrack.top != NULL);
10334 add_jump(compiler, &altbacktrack.top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
10335 }
10336
10337 common->accept_label = LABEL();
10338 if (common->accept != NULL)
10339 set_jumps(common->accept, common->accept_label);
10340
10341 /* Reset stack. */
10342 if (framesize < 0)
10343 {
10344 if (framesize == no_frame)
10345 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10346 else if (extrasize > 0)
10347 free_stack(common, extrasize);
10348
10349 if (end_block_size > 0)
10350 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10351
10352 if (needs_control_head)
10353 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10354 }
10355 else
10356 {
10357 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
10358 {
10359 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10360 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10361
10362 if (end_block_size > 0)
10363 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 2));
10364
10365 if (needs_control_head)
10366 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10367 }
10368 else
10369 {
10370 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10371
10372 if (end_block_size > 0)
10373 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize + 1));
10374
10375 if (needs_control_head)
10376 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
10377 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10378 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10379 }
10380 }
10381
10382 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
10383 {
10384 /* We know that STR_PTR was stored on the top of the stack. */
10385 if (conditional)
10386 {
10387 if (extrasize > 0)
10388 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-end_block_size - (needs_control_head ? 2 : 1)));
10389 }
10390 else if (bra == OP_BRAZERO)
10391 {
10392 if (framesize < 0)
10393 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10394 else
10395 {
10396 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10397 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
10398 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10399 }
10400 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10401 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10402 }
10403 else if (framesize >= 0)
10404 {
10405 /* For OP_BRA and OP_BRAMINZERO. */
10406 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10407 }
10408 }
10409 add_jump(compiler, found, JUMP(SLJIT_JUMP));
10410
10411 compile_backtrackingpath(common, altbacktrack.top);
10412 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10413 {
10414 if (local_quit_available)
10415 {
10416 common->local_quit_available = save_local_quit_available;
10417 common->quit_label = save_quit_label;
10418 common->quit = save_quit;
10419 }
10420 common->in_positive_assertion = save_in_positive_assertion;
10421 common->then_trap = save_then_trap;
10422 common->accept_label = save_accept_label;
10423 common->positive_assertion_quit = save_positive_assertion_quit;
10424 common->accept = save_accept;
10425 return NULL;
10426 }
10427 set_jumps(altbacktrack.own_backtracks, LABEL());
10428
10429 if (*cc != OP_ALT)
10430 break;
10431
10432 ccbegin = cc;
10433 cc += GET(cc, 1);
10434 }
10435
10436 if (local_quit_available)
10437 {
10438 SLJIT_ASSERT(common->positive_assertion_quit == NULL);
10439 /* Makes the check less complicated below. */
10440 common->positive_assertion_quit = common->quit;
10441 }
10442
10443 /* None of them matched. */
10444 if (common->positive_assertion_quit != NULL)
10445 {
10446 jump = JUMP(SLJIT_JUMP);
10447 set_jumps(common->positive_assertion_quit, LABEL());
10448 SLJIT_ASSERT(framesize != no_stack);
10449 if (framesize < 0)
10450 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
10451 else
10452 {
10453 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10454 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10455 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
10456 }
10457 JUMPHERE(jump);
10458 }
10459
10460 if (end_block_size > 0)
10461 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10462
10463 if (needs_control_head)
10464 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1));
10465
10466 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
10467 {
10468 /* Assert is failed. */
10469 if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
10470 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10471
10472 if (framesize < 0)
10473 {
10474 /* The topmost item should be 0. */
10475 if (bra == OP_BRAZERO)
10476 {
10477 if (extrasize >= 2)
10478 free_stack(common, extrasize - 1);
10479 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10480 }
10481 else if (extrasize > 0)
10482 free_stack(common, extrasize);
10483 }
10484 else
10485 {
10486 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10487 /* The topmost item should be 0. */
10488 if (bra == OP_BRAZERO)
10489 {
10490 free_stack(common, framesize + extrasize - 1);
10491 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10492 }
10493 else
10494 free_stack(common, framesize + extrasize);
10495 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10496 }
10497 jump = JUMP(SLJIT_JUMP);
10498 if (bra != OP_BRAZERO)
10499 add_jump(compiler, target, jump);
10500
10501 /* Assert is successful. */
10502 set_jumps(tmp, LABEL());
10503 if (framesize < 0)
10504 {
10505 /* We know that STR_PTR was stored on the top of the stack. */
10506 if (extrasize > 0)
10507 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10508
10509 /* Keep the STR_PTR on the top of the stack. */
10510 if (bra == OP_BRAZERO)
10511 {
10512 /* This allocation is always successful. */
10513 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10514 if (extrasize >= 2)
10515 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10516 }
10517 else if (bra == OP_BRAMINZERO)
10518 {
10519 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10520 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10521 }
10522 }
10523 else
10524 {
10525 if (bra == OP_BRA)
10526 {
10527 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10528 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10529 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10530 }
10531 else
10532 {
10533 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10534 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + end_block_size + 2) * sizeof(sljit_sw));
10535
10536 if (extrasize == 2 + end_block_size)
10537 {
10538 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10539 if (bra == OP_BRAMINZERO)
10540 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10541 }
10542 else
10543 {
10544 SLJIT_ASSERT(extrasize == 3 + end_block_size);
10545 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10546 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
10547 }
10548 }
10549 }
10550
10551 if (bra == OP_BRAZERO)
10552 {
10553 backtrack->matchingpath = LABEL();
10554 SET_LABEL(jump, backtrack->matchingpath);
10555 }
10556 else if (bra == OP_BRAMINZERO)
10557 {
10558 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10559 JUMPHERE(brajump);
10560 if (framesize >= 0)
10561 {
10562 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10563 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10564 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10565 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10566 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10567 }
10568 set_jumps(backtrack->common.own_backtracks, LABEL());
10569 }
10570 }
10571 else
10572 {
10573 /* AssertNot is successful. */
10574 if (framesize < 0)
10575 {
10576 if (extrasize > 0)
10577 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10578
10579 if (bra != OP_BRA)
10580 {
10581 if (extrasize >= 2)
10582 free_stack(common, extrasize - 1);
10583 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10584 }
10585 else if (extrasize > 0)
10586 free_stack(common, extrasize);
10587 }
10588 else
10589 {
10590 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10591 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10592 /* The topmost item should be 0. */
10593 if (bra != OP_BRA)
10594 {
10595 free_stack(common, framesize + extrasize - 1);
10596 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10597 }
10598 else
10599 free_stack(common, framesize + extrasize);
10600 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10601 }
10602
10603 if (bra == OP_BRAZERO)
10604 backtrack->matchingpath = LABEL();
10605 else if (bra == OP_BRAMINZERO)
10606 {
10607 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10608 JUMPHERE(brajump);
10609 }
10610
10611 if (bra != OP_BRA)
10612 {
10613 SLJIT_ASSERT(found == &backtrack->common.own_backtracks);
10614 set_jumps(backtrack->common.own_backtracks, LABEL());
10615 backtrack->common.own_backtracks = NULL;
10616 }
10617 }
10618
10619 if (local_quit_available)
10620 {
10621 common->local_quit_available = save_local_quit_available;
10622 common->quit_label = save_quit_label;
10623 common->quit = save_quit;
10624 }
10625 common->in_positive_assertion = save_in_positive_assertion;
10626 common->then_trap = save_then_trap;
10627 common->accept_label = save_accept_label;
10628 common->positive_assertion_quit = save_positive_assertion_quit;
10629 common->accept = save_accept;
10630 return cc + 1 + LINK_SIZE;
10631 }
10632
match_once_common(compiler_common * common,PCRE2_UCHAR ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)10633 static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10634 {
10635 DEFINE_COMPILER;
10636 int stacksize;
10637
10638 if (framesize < 0)
10639 {
10640 if (framesize == no_frame)
10641 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10642 else
10643 {
10644 stacksize = needs_control_head ? 1 : 0;
10645 if (ket != OP_KET || has_alternatives)
10646 stacksize++;
10647
10648 if (stacksize > 0)
10649 free_stack(common, stacksize);
10650 }
10651
10652 if (needs_control_head)
10653 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10654
10655 /* TMP2 which is set here used by OP_KETRMAX below. */
10656 if (ket == OP_KETRMAX)
10657 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10658 else if (ket == OP_KETRMIN)
10659 {
10660 /* Move the STR_PTR to the private_data_ptr. */
10661 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10662 }
10663 }
10664 else
10665 {
10666 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10667 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10668 if (needs_control_head)
10669 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10670
10671 if (ket == OP_KETRMAX)
10672 {
10673 /* TMP2 which is set here used by OP_KETRMAX below. */
10674 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10675 }
10676 }
10677 if (needs_control_head)
10678 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10679 }
10680
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)10681 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10682 {
10683 DEFINE_COMPILER;
10684
10685 if (common->capture_last_ptr != 0)
10686 {
10687 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10688 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10689 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10690 stacksize++;
10691 }
10692 if (common->optimized_cbracket[offset >> 1] == 0)
10693 {
10694 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10695 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10696 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10697 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10698 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10699 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10700 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10701 stacksize += 2;
10702 }
10703 return stacksize;
10704 }
10705
do_script_run(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10706 static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10707 {
10708 if (PRIV(script_run)(ptr, endptr, FALSE))
10709 return endptr;
10710 return NULL;
10711 }
10712
10713 #ifdef SUPPORT_UNICODE
10714
do_script_run_utf(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10715 static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10716 {
10717 if (PRIV(script_run)(ptr, endptr, TRUE))
10718 return endptr;
10719 return NULL;
10720 }
10721
10722 #endif /* SUPPORT_UNICODE */
10723
match_script_run_common(compiler_common * common,int private_data_ptr,backtrack_common * parent)10724 static void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10725 {
10726 DEFINE_COMPILER;
10727
10728 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10729
10730 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10731 #ifdef SUPPORT_UNICODE
10732 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
10733 common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run));
10734 #else
10735 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run));
10736 #endif
10737
10738 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10739 add_jump(compiler, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10740 }
10741
10742 /*
10743 Handling bracketed expressions is probably the most complex part.
10744
10745 Stack layout naming characters:
10746 S - Push the current STR_PTR
10747 0 - Push a 0 (NULL)
10748 A - Push the current STR_PTR. Needed for restoring the STR_PTR
10749 before the next alternative. Not pushed if there are no alternatives.
10750 M - Any values pushed by the current alternative. Can be empty, or anything.
10751 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10752 L - Push the previous local (pointed by localptr) to the stack
10753 () - opional values stored on the stack
10754 ()* - optonal, can be stored multiple times
10755
10756 The following list shows the regular expression templates, their PCRE byte codes
10757 and stack layout supported by pcre-sljit.
10758
10759 (?:) OP_BRA | OP_KET A M
10760 () OP_CBRA | OP_KET C M
10761 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
10762 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
10763 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
10764 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
10765 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
10766 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
10767 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
10768 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
10769 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
10770 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
10771 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
10772 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
10773 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
10774 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
10775 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
10776 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
10777 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
10778 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
10779 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
10780 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
10781
10782
10783 Stack layout naming characters:
10784 A - Push the alternative index (starting from 0) on the stack.
10785 Not pushed if there is no alternatives.
10786 M - Any values pushed by the current alternative. Can be empty, or anything.
10787
10788 The next list shows the possible content of a bracket:
10789 (|) OP_*BRA | OP_ALT ... M A
10790 (?()|) OP_*COND | OP_ALT M A
10791 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
10792 Or nothing, if trace is unnecessary
10793 */
10794
compile_bracket_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10795 static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10796 {
10797 DEFINE_COMPILER;
10798 backtrack_common *backtrack;
10799 PCRE2_UCHAR opcode;
10800 int private_data_ptr = 0;
10801 int offset = 0;
10802 int i, stacksize;
10803 int repeat_ptr = 0, repeat_length = 0;
10804 int repeat_type = 0, repeat_count = 0;
10805 PCRE2_SPTR ccbegin;
10806 PCRE2_SPTR matchingpath;
10807 PCRE2_SPTR slot;
10808 PCRE2_UCHAR bra = OP_BRA;
10809 PCRE2_UCHAR ket;
10810 assert_backtrack *assert;
10811 BOOL has_alternatives;
10812 BOOL needs_control_head = FALSE;
10813 BOOL has_vreverse = FALSE;
10814 struct sljit_jump *jump;
10815 struct sljit_jump *skip;
10816 struct sljit_label *rmax_label = NULL;
10817 struct sljit_jump *braminzero = NULL;
10818
10819 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10820
10821 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10822 {
10823 bra = *cc;
10824 cc++;
10825 opcode = *cc;
10826 }
10827
10828 opcode = *cc;
10829 ccbegin = cc;
10830 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10831 ket = *matchingpath;
10832 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10833 {
10834 repeat_ptr = PRIVATE_DATA(matchingpath);
10835 repeat_length = PRIVATE_DATA(matchingpath + 1);
10836 repeat_type = PRIVATE_DATA(matchingpath + 2);
10837 repeat_count = PRIVATE_DATA(matchingpath + 3);
10838 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10839 if (repeat_type == OP_UPTO)
10840 ket = OP_KETRMAX;
10841 if (repeat_type == OP_MINUPTO)
10842 ket = OP_KETRMIN;
10843 }
10844
10845 matchingpath = ccbegin + 1 + LINK_SIZE;
10846 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10847 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10848 cc += GET(cc, 1);
10849
10850 has_alternatives = *cc == OP_ALT;
10851 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10852 {
10853 SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10854 compile_time_checks_must_be_grouped_together);
10855 has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10856 }
10857
10858 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10859 opcode = OP_SCOND;
10860
10861 if (opcode == OP_CBRA || opcode == OP_SCBRA)
10862 {
10863 /* Capturing brackets has a pre-allocated space. */
10864 offset = GET2(ccbegin, 1 + LINK_SIZE);
10865 if (common->optimized_cbracket[offset] == 0)
10866 {
10867 private_data_ptr = OVECTOR_PRIV(offset);
10868 offset <<= 1;
10869 }
10870 else
10871 {
10872 offset <<= 1;
10873 private_data_ptr = OVECTOR(offset);
10874 }
10875 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10876 matchingpath += IMM2_SIZE;
10877 }
10878 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10879 {
10880 /* Other brackets simply allocate the next entry. */
10881 private_data_ptr = PRIVATE_DATA(ccbegin);
10882 SLJIT_ASSERT(private_data_ptr != 0);
10883 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10884 if (opcode == OP_ONCE)
10885 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10886 }
10887
10888 /* Instructions before the first alternative. */
10889 stacksize = 0;
10890 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10891 stacksize++;
10892 if (bra == OP_BRAZERO)
10893 stacksize++;
10894
10895 if (stacksize > 0)
10896 allocate_stack(common, stacksize);
10897
10898 stacksize = 0;
10899 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10900 {
10901 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10902 stacksize++;
10903 }
10904
10905 if (bra == OP_BRAZERO)
10906 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10907
10908 if (bra == OP_BRAMINZERO)
10909 {
10910 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10911 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10912 if (ket != OP_KETRMIN)
10913 {
10914 free_stack(common, 1);
10915 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10916 }
10917 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10918 {
10919 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10920 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10921 /* Nothing stored during the first run. */
10922 skip = JUMP(SLJIT_JUMP);
10923 JUMPHERE(jump);
10924 /* Checking zero-length iteration. */
10925 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10926 {
10927 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10928 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10929 }
10930 else
10931 {
10932 /* Except when the whole stack frame must be saved. */
10933 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10934 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10935 }
10936 JUMPHERE(skip);
10937 }
10938 else
10939 {
10940 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10941 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10942 JUMPHERE(jump);
10943 }
10944 }
10945
10946 if (repeat_type != 0)
10947 {
10948 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10949 if (repeat_type == OP_EXACT)
10950 rmax_label = LABEL();
10951 }
10952
10953 if (ket == OP_KETRMIN)
10954 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10955
10956 if (ket == OP_KETRMAX)
10957 {
10958 rmax_label = LABEL();
10959 if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10960 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10961 }
10962
10963 /* Handling capturing brackets and alternatives. */
10964 if (opcode == OP_ONCE)
10965 {
10966 stacksize = 0;
10967 if (needs_control_head)
10968 {
10969 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10970 stacksize++;
10971 }
10972
10973 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10974 {
10975 /* Neither capturing brackets nor recursions are found in the block. */
10976 if (ket == OP_KETRMIN)
10977 {
10978 stacksize += 2;
10979 if (!needs_control_head)
10980 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10981 }
10982 else
10983 {
10984 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10985 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10986 if (ket == OP_KETRMAX || has_alternatives)
10987 stacksize++;
10988 }
10989
10990 if (stacksize > 0)
10991 allocate_stack(common, stacksize);
10992
10993 stacksize = 0;
10994 if (needs_control_head)
10995 {
10996 stacksize++;
10997 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10998 }
10999
11000 if (ket == OP_KETRMIN)
11001 {
11002 if (needs_control_head)
11003 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11004 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11005 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
11006 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
11007 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
11008 }
11009 else if (ket == OP_KETRMAX || has_alternatives)
11010 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11011 }
11012 else
11013 {
11014 if (ket != OP_KET || has_alternatives)
11015 stacksize++;
11016
11017 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
11018 allocate_stack(common, stacksize);
11019
11020 if (needs_control_head)
11021 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11022
11023 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11024 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11025
11026 stacksize = needs_control_head ? 1 : 0;
11027 if (ket != OP_KET || has_alternatives)
11028 {
11029 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11030 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
11031 stacksize++;
11032 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
11033 }
11034 else
11035 {
11036 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
11037 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
11038 }
11039 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
11040 }
11041 }
11042 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
11043 {
11044 /* Saving the previous values. */
11045 if (common->optimized_cbracket[offset >> 1] != 0)
11046 {
11047 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
11048 allocate_stack(common, 2);
11049 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11050 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
11051 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11052 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11053 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
11054 }
11055 else
11056 {
11057 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11058 allocate_stack(common, 1);
11059 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11060 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11061 }
11062 }
11063 else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
11064 {
11065 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11066 allocate_stack(common, 4);
11067 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
11068 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11069 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
11070 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11071 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
11072 OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
11073
11074 has_vreverse = (*matchingpath == OP_VREVERSE);
11075 if (*matchingpath == OP_REVERSE || has_vreverse)
11076 matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
11077 }
11078 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
11079 {
11080 /* Saving the previous value. */
11081 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11082 allocate_stack(common, 1);
11083 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11084 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11085
11086 if (*matchingpath == OP_REVERSE)
11087 matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
11088 }
11089 else if (has_alternatives)
11090 {
11091 /* Pushing the starting string pointer. */
11092 allocate_stack(common, 1);
11093 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11094 }
11095
11096 /* Generating code for the first alternative. */
11097 if (opcode == OP_COND || opcode == OP_SCOND)
11098 {
11099 if (*matchingpath == OP_CREF)
11100 {
11101 SLJIT_ASSERT(has_alternatives);
11102 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
11103 CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
11104 matchingpath += 1 + IMM2_SIZE;
11105 }
11106 else if (*matchingpath == OP_DNCREF)
11107 {
11108 SLJIT_ASSERT(has_alternatives);
11109
11110 i = GET2(matchingpath, 1 + IMM2_SIZE);
11111 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
11112 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
11113 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
11114 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
11115 slot += common->name_entry_size;
11116 i--;
11117 while (i-- > 0)
11118 {
11119 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
11120 OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
11121 slot += common->name_entry_size;
11122 }
11123 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11124 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
11125 matchingpath += 1 + 2 * IMM2_SIZE;
11126 }
11127 else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
11128 {
11129 /* Never has other case. */
11130 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
11131 SLJIT_ASSERT(!has_alternatives);
11132
11133 if (*matchingpath == OP_TRUE)
11134 {
11135 stacksize = 1;
11136 matchingpath++;
11137 }
11138 else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
11139 stacksize = 0;
11140 else if (*matchingpath == OP_RREF)
11141 {
11142 stacksize = GET2(matchingpath, 1);
11143 if (common->currententry == NULL)
11144 stacksize = 0;
11145 else if (stacksize == RREF_ANY)
11146 stacksize = 1;
11147 else if (common->currententry->start == 0)
11148 stacksize = stacksize == 0;
11149 else
11150 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
11151
11152 if (stacksize != 0)
11153 matchingpath += 1 + IMM2_SIZE;
11154 }
11155 else
11156 {
11157 if (common->currententry == NULL || common->currententry->start == 0)
11158 stacksize = 0;
11159 else
11160 {
11161 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
11162 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
11163 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
11164 while (stacksize > 0)
11165 {
11166 if ((int)GET2(slot, 0) == i)
11167 break;
11168 slot += common->name_entry_size;
11169 stacksize--;
11170 }
11171 }
11172
11173 if (stacksize != 0)
11174 matchingpath += 1 + 2 * IMM2_SIZE;
11175 }
11176
11177 /* The stacksize == 0 is a common "else" case. */
11178 if (stacksize == 0)
11179 {
11180 if (*cc == OP_ALT)
11181 {
11182 matchingpath = cc + 1 + LINK_SIZE;
11183 cc += GET(cc, 1);
11184 }
11185 else
11186 matchingpath = cc;
11187 }
11188 }
11189 else
11190 {
11191 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
11192 /* Similar code as PUSH_BACKTRACK macro. */
11193 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
11194 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11195 return NULL;
11196 memset(assert, 0, sizeof(assert_backtrack));
11197 assert->common.cc = matchingpath;
11198 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
11199 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
11200 }
11201 }
11202
11203 compile_matchingpath(common, matchingpath, cc, backtrack);
11204 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11205 return NULL;
11206
11207 switch (opcode)
11208 {
11209 case OP_ASSERTBACK_NA:
11210 if (has_vreverse)
11211 {
11212 SLJIT_ASSERT(backtrack->top != NULL && PRIVATE_DATA(ccbegin + 1));
11213 add_jump(compiler, &backtrack->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
11214 }
11215
11216 if (PRIVATE_DATA(ccbegin + 1))
11217 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
11218 break;
11219 case OP_ASSERT_NA:
11220 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11221 break;
11222 case OP_ONCE:
11223 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
11224 break;
11225 case OP_SCRIPT_RUN:
11226 match_script_run_common(common, private_data_ptr, backtrack);
11227 break;
11228 }
11229
11230 stacksize = 0;
11231 if (repeat_type == OP_MINUPTO)
11232 {
11233 /* We need to preserve the counter. TMP2 will be used below. */
11234 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
11235 stacksize++;
11236 }
11237 if (ket != OP_KET || bra != OP_BRA)
11238 stacksize++;
11239 if (offset != 0)
11240 {
11241 if (common->capture_last_ptr != 0)
11242 stacksize++;
11243 if (common->optimized_cbracket[offset >> 1] == 0)
11244 stacksize += 2;
11245 }
11246 if (has_alternatives && opcode != OP_ONCE)
11247 stacksize++;
11248
11249 if (stacksize > 0)
11250 allocate_stack(common, stacksize);
11251
11252 stacksize = 0;
11253 if (repeat_type == OP_MINUPTO)
11254 {
11255 /* TMP2 was set above. */
11256 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
11257 stacksize++;
11258 }
11259
11260 if (ket != OP_KET || bra != OP_BRA)
11261 {
11262 if (ket != OP_KET)
11263 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11264 else
11265 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
11266 stacksize++;
11267 }
11268
11269 if (offset != 0)
11270 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
11271
11272 /* Skip and count the other alternatives. */
11273 i = 1;
11274 while (*cc == OP_ALT)
11275 {
11276 cc += GET(cc, 1);
11277 i++;
11278 }
11279
11280 if (has_alternatives)
11281 {
11282 if (opcode != OP_ONCE)
11283 {
11284 if (i <= 3)
11285 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
11286 else
11287 BACKTRACK_AS(bracket_backtrack)->u.matching_mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
11288 }
11289 if (ket != OP_KETRMAX)
11290 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11291 }
11292
11293 /* Must be after the matchingpath label. */
11294 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
11295 {
11296 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
11297 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11298 }
11299
11300 if (ket == OP_KETRMAX)
11301 {
11302 if (repeat_type != 0)
11303 {
11304 if (has_alternatives)
11305 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11306 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
11307 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
11308 /* Drop STR_PTR for greedy plus quantifier. */
11309 if (opcode != OP_ONCE)
11310 free_stack(common, 1);
11311 }
11312 else if (opcode < OP_BRA || opcode >= OP_SBRA)
11313 {
11314 if (has_alternatives)
11315 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11316
11317 /* Checking zero-length iteration. */
11318 if (opcode != OP_ONCE)
11319 {
11320 /* This case includes opcodes such as OP_SCRIPT_RUN. */
11321 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
11322 /* Drop STR_PTR for greedy plus quantifier. */
11323 if (bra != OP_BRAZERO)
11324 free_stack(common, 1);
11325 }
11326 else
11327 /* TMP2 must contain the starting STR_PTR. */
11328 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
11329 }
11330 else
11331 JUMPTO(SLJIT_JUMP, rmax_label);
11332 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
11333 }
11334
11335 if (repeat_type == OP_EXACT)
11336 {
11337 count_match(common);
11338 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
11339 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
11340 }
11341 else if (repeat_type == OP_UPTO)
11342 {
11343 /* We need to preserve the counter. */
11344 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
11345 allocate_stack(common, 1);
11346 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11347 }
11348
11349 if (bra == OP_BRAZERO)
11350 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
11351
11352 if (bra == OP_BRAMINZERO)
11353 {
11354 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
11355 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
11356 if (braminzero != NULL)
11357 {
11358 JUMPHERE(braminzero);
11359 /* We need to release the end pointer to perform the
11360 backtrack for the zero-length iteration. When
11361 framesize is < 0, OP_ONCE will do the release itself. */
11362 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
11363 {
11364 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11365 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
11366 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
11367 }
11368 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
11369 free_stack(common, 1);
11370 }
11371 /* Continue to the normal backtrack. */
11372 }
11373
11374 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO || (has_alternatives && repeat_type != OP_EXACT))
11375 count_match(common);
11376
11377 cc += 1 + LINK_SIZE;
11378
11379 if (opcode == OP_ONCE)
11380 {
11381 int data;
11382 int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize;
11383
11384 SLJIT_ASSERT(SHRT_MIN <= framesize && framesize < SHRT_MAX/2);
11385 /* We temporarily encode the needs_control_head in the lowest bit.
11386 The real value should be short enough for this operation to work
11387 without triggering Undefined Behaviour. */
11388 data = (int)((short)((unsigned short)framesize << 1) | (needs_control_head ? 1 : 0));
11389 BACKTRACK_AS(bracket_backtrack)->u.framesize = data;
11390 }
11391 return cc + repeat_length;
11392 }
11393
compile_bracketpos_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11394 static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11395 {
11396 DEFINE_COMPILER;
11397 backtrack_common *backtrack;
11398 PCRE2_UCHAR opcode;
11399 int private_data_ptr;
11400 int cbraprivptr = 0;
11401 BOOL needs_control_head;
11402 int framesize;
11403 int stacksize;
11404 int offset = 0;
11405 BOOL zero = FALSE;
11406 PCRE2_SPTR ccbegin = NULL;
11407 int stack; /* Also contains the offset of control head. */
11408 struct sljit_label *loop = NULL;
11409 struct jump_list *emptymatch = NULL;
11410
11411 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
11412 if (*cc == OP_BRAPOSZERO)
11413 {
11414 zero = TRUE;
11415 cc++;
11416 }
11417
11418 opcode = *cc;
11419 private_data_ptr = PRIVATE_DATA(cc);
11420 SLJIT_ASSERT(private_data_ptr != 0);
11421 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
11422 switch(opcode)
11423 {
11424 case OP_BRAPOS:
11425 case OP_SBRAPOS:
11426 ccbegin = cc + 1 + LINK_SIZE;
11427 break;
11428
11429 case OP_CBRAPOS:
11430 case OP_SCBRAPOS:
11431 offset = GET2(cc, 1 + LINK_SIZE);
11432 /* This case cannot be optimized in the same way as
11433 normal capturing brackets. */
11434 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
11435 cbraprivptr = OVECTOR_PRIV(offset);
11436 offset <<= 1;
11437 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
11438 break;
11439
11440 default:
11441 SLJIT_UNREACHABLE();
11442 break;
11443 }
11444
11445 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
11446 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
11447 if (framesize < 0)
11448 {
11449 if (offset != 0)
11450 {
11451 stacksize = 2;
11452 if (common->capture_last_ptr != 0)
11453 stacksize++;
11454 }
11455 else
11456 stacksize = 1;
11457
11458 if (needs_control_head)
11459 stacksize++;
11460 if (!zero)
11461 stacksize++;
11462
11463 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11464 allocate_stack(common, stacksize);
11465 if (framesize == no_frame)
11466 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
11467
11468 stack = 0;
11469 if (offset != 0)
11470 {
11471 stack = 2;
11472 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
11473 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
11474 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11475 if (common->capture_last_ptr != 0)
11476 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
11477 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
11478 if (needs_control_head)
11479 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11480 if (common->capture_last_ptr != 0)
11481 {
11482 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
11483 stack = 3;
11484 }
11485 }
11486 else
11487 {
11488 if (needs_control_head)
11489 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11490 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11491 stack = 1;
11492 }
11493
11494 if (needs_control_head)
11495 stack++;
11496 if (!zero)
11497 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
11498 if (needs_control_head)
11499 {
11500 stack--;
11501 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11502 }
11503 }
11504 else
11505 {
11506 stacksize = framesize + 1;
11507 if (!zero)
11508 stacksize++;
11509 if (needs_control_head)
11510 stacksize++;
11511 if (offset == 0)
11512 stacksize++;
11513 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11514
11515 allocate_stack(common, stacksize);
11516 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11517 if (needs_control_head)
11518 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11519 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11520
11521 stack = 0;
11522 if (!zero)
11523 {
11524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
11525 stack = 1;
11526 }
11527 if (needs_control_head)
11528 {
11529 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11530 stack++;
11531 }
11532 if (offset == 0)
11533 {
11534 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
11535 stack++;
11536 }
11537 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
11538 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
11539 stack -= 1 + (offset == 0);
11540 }
11541
11542 if (offset != 0)
11543 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11544
11545 loop = LABEL();
11546 while (*cc != OP_KETRPOS)
11547 {
11548 backtrack->top = NULL;
11549 backtrack->own_backtracks = NULL;
11550 cc += GET(cc, 1);
11551
11552 compile_matchingpath(common, ccbegin, cc, backtrack);
11553 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11554 return NULL;
11555
11556 if (framesize < 0)
11557 {
11558 if (framesize == no_frame)
11559 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11560
11561 if (offset != 0)
11562 {
11563 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11564 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11565 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11566 if (common->capture_last_ptr != 0)
11567 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11568 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11569 }
11570 else
11571 {
11572 if (opcode == OP_SBRAPOS)
11573 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11574 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11575 }
11576
11577 /* Even if the match is empty, we need to reset the control head. */
11578 if (needs_control_head)
11579 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11580
11581 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11582 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11583
11584 if (!zero)
11585 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11586 }
11587 else
11588 {
11589 if (offset != 0)
11590 {
11591 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11592 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11593 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11594 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11595 if (common->capture_last_ptr != 0)
11596 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11597 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11598 }
11599 else
11600 {
11601 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11602 OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11603 if (opcode == OP_SBRAPOS)
11604 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11605 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
11606 }
11607
11608 /* Even if the match is empty, we need to reset the control head. */
11609 if (needs_control_head)
11610 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11611
11612 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11613 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11614
11615 if (!zero)
11616 {
11617 if (framesize < 0)
11618 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11619 else
11620 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11621 }
11622 }
11623
11624 JUMPTO(SLJIT_JUMP, loop);
11625 flush_stubs(common);
11626
11627 compile_backtrackingpath(common, backtrack->top);
11628 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11629 return NULL;
11630 set_jumps(backtrack->own_backtracks, LABEL());
11631
11632 if (framesize < 0)
11633 {
11634 if (offset != 0)
11635 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11636 else
11637 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11638 }
11639 else
11640 {
11641 if (offset != 0)
11642 {
11643 /* Last alternative. */
11644 if (*cc == OP_KETRPOS)
11645 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11646 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11647 }
11648 else
11649 {
11650 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11651 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11652 }
11653 }
11654
11655 if (*cc == OP_KETRPOS)
11656 break;
11657 ccbegin = cc + 1 + LINK_SIZE;
11658 }
11659
11660 /* We don't have to restore the control head in case of a failed match. */
11661
11662 backtrack->own_backtracks = NULL;
11663 if (!zero)
11664 {
11665 if (framesize < 0)
11666 add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11667 else /* TMP2 is set to [private_data_ptr] above. */
11668 add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11669 }
11670
11671 /* None of them matched. */
11672 set_jumps(emptymatch, LABEL());
11673 count_match(common);
11674 return cc + 1 + LINK_SIZE;
11675 }
11676
get_iterator_parameters(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * opcode,PCRE2_UCHAR * type,sljit_u32 * max,sljit_u32 * exact,PCRE2_SPTR * end)11677 static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11678 {
11679 int class_len;
11680
11681 *opcode = *cc;
11682 *exact = 0;
11683
11684 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11685 {
11686 cc++;
11687 *type = OP_CHAR;
11688 }
11689 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11690 {
11691 cc++;
11692 *type = OP_CHARI;
11693 *opcode -= OP_STARI - OP_STAR;
11694 }
11695 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11696 {
11697 cc++;
11698 *type = OP_NOT;
11699 *opcode -= OP_NOTSTAR - OP_STAR;
11700 }
11701 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11702 {
11703 cc++;
11704 *type = OP_NOTI;
11705 *opcode -= OP_NOTSTARI - OP_STAR;
11706 }
11707 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11708 {
11709 cc++;
11710 *opcode -= OP_TYPESTAR - OP_STAR;
11711 *type = OP_END;
11712 }
11713 else
11714 {
11715 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11716 *type = *opcode;
11717 cc++;
11718 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11719 *opcode = cc[class_len - 1];
11720
11721 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11722 {
11723 *opcode -= OP_CRSTAR - OP_STAR;
11724 *end = cc + class_len;
11725
11726 if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11727 {
11728 *exact = 1;
11729 *opcode -= OP_PLUS - OP_STAR;
11730 }
11731 }
11732 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11733 {
11734 *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11735 *end = cc + class_len;
11736
11737 if (*opcode == OP_POSPLUS)
11738 {
11739 *exact = 1;
11740 *opcode = OP_POSSTAR;
11741 }
11742 }
11743 else
11744 {
11745 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11746 *max = GET2(cc, (class_len + IMM2_SIZE));
11747 *exact = GET2(cc, class_len);
11748
11749 if (*max == 0)
11750 {
11751 if (*opcode == OP_CRPOSRANGE)
11752 *opcode = OP_POSSTAR;
11753 else
11754 *opcode -= OP_CRRANGE - OP_STAR;
11755 }
11756 else
11757 {
11758 *max -= *exact;
11759 if (*max == 0)
11760 *opcode = OP_EXACT;
11761 else if (*max == 1)
11762 {
11763 if (*opcode == OP_CRPOSRANGE)
11764 *opcode = OP_POSQUERY;
11765 else
11766 *opcode -= OP_CRRANGE - OP_QUERY;
11767 }
11768 else
11769 {
11770 if (*opcode == OP_CRPOSRANGE)
11771 *opcode = OP_POSUPTO;
11772 else
11773 *opcode -= OP_CRRANGE - OP_UPTO;
11774 }
11775 }
11776 *end = cc + class_len + 2 * IMM2_SIZE;
11777 }
11778 return cc;
11779 }
11780
11781 switch(*opcode)
11782 {
11783 case OP_EXACT:
11784 *exact = GET2(cc, 0);
11785 cc += IMM2_SIZE;
11786 break;
11787
11788 case OP_PLUS:
11789 case OP_MINPLUS:
11790 *exact = 1;
11791 *opcode -= OP_PLUS - OP_STAR;
11792 break;
11793
11794 case OP_POSPLUS:
11795 *exact = 1;
11796 *opcode = OP_POSSTAR;
11797 break;
11798
11799 case OP_UPTO:
11800 case OP_MINUPTO:
11801 case OP_POSUPTO:
11802 *max = GET2(cc, 0);
11803 cc += IMM2_SIZE;
11804 break;
11805 }
11806
11807 if (*type == OP_END)
11808 {
11809 *type = *cc;
11810 *end = next_opcode(common, cc);
11811 cc++;
11812 return cc;
11813 }
11814
11815 *end = cc + 1;
11816 #ifdef SUPPORT_UNICODE
11817 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11818 #endif
11819 return cc;
11820 }
11821
compile_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11822 static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11823 {
11824 DEFINE_COMPILER;
11825 backtrack_common *backtrack;
11826 PCRE2_UCHAR opcode;
11827 PCRE2_UCHAR type;
11828 sljit_u32 max = 0, exact;
11829 sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11830 sljit_s32 early_fail_type;
11831 BOOL charpos_enabled;
11832 PCRE2_UCHAR charpos_char;
11833 unsigned int charpos_othercasebit;
11834 PCRE2_SPTR end;
11835 jump_list *no_match = NULL;
11836 jump_list *no_char1_match = NULL;
11837 struct sljit_jump *jump = NULL;
11838 struct sljit_label *label;
11839 int private_data_ptr = PRIVATE_DATA(cc);
11840 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11841 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11842 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
11843 int tmp_base, tmp_offset;
11844 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11845 BOOL use_tmp;
11846 #endif
11847
11848 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11849
11850 early_fail_type = (early_fail_ptr & 0x7);
11851 early_fail_ptr >>= 3;
11852
11853 /* During recursion, these optimizations are disabled. */
11854 if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
11855 {
11856 early_fail_ptr = 0;
11857 early_fail_type = type_skip;
11858 }
11859
11860 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11861 || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11862
11863 if (early_fail_type == type_fail)
11864 add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11865
11866 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11867
11868 if (type != OP_EXTUNI)
11869 {
11870 tmp_base = TMP3;
11871 tmp_offset = 0;
11872 }
11873 else
11874 {
11875 tmp_base = SLJIT_MEM1(SLJIT_SP);
11876 tmp_offset = POSSESSIVE0;
11877 }
11878
11879 /* Handle fixed part first. */
11880 if (exact > 1)
11881 {
11882 SLJIT_ASSERT(early_fail_ptr == 0);
11883
11884 if (common->mode == PCRE2_JIT_COMPLETE
11885 #ifdef SUPPORT_UNICODE
11886 && !common->utf
11887 #endif
11888 && type != OP_ANYNL && type != OP_EXTUNI)
11889 {
11890 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11891 add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11892 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11893 label = LABEL();
11894 compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE);
11895 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11896 JUMPTO(SLJIT_NOT_ZERO, label);
11897 }
11898 else
11899 {
11900 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11901 label = LABEL();
11902 compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11903 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11904 JUMPTO(SLJIT_NOT_ZERO, label);
11905 }
11906 }
11907 else if (exact == 1)
11908 compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11909
11910 if (early_fail_type == type_fail_range)
11911 {
11912 /* Range end first, followed by range start. */
11913 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11914 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw));
11915 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11916 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11917 add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11918
11919 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0);
11921 }
11922
11923 switch(opcode)
11924 {
11925 case OP_STAR:
11926 case OP_UPTO:
11927 SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11928
11929 if (type == OP_ANYNL || type == OP_EXTUNI)
11930 {
11931 SLJIT_ASSERT(private_data_ptr == 0);
11932 SLJIT_ASSERT(early_fail_ptr == 0);
11933
11934 allocate_stack(common, 2);
11935 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11936 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11937
11938 if (opcode == OP_UPTO)
11939 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11940
11941 label = LABEL();
11942 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11943 if (opcode == OP_UPTO)
11944 {
11945 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11946 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11947 jump = JUMP(SLJIT_ZERO);
11948 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11949 }
11950
11951 /* We cannot use TMP3 because of allocate_stack. */
11952 allocate_stack(common, 1);
11953 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11954 JUMPTO(SLJIT_JUMP, label);
11955 if (jump != NULL)
11956 JUMPHERE(jump);
11957 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11958 break;
11959 }
11960 #ifdef SUPPORT_UNICODE
11961 else if (type == OP_ALLANY && !common->invalid_utf)
11962 #else
11963 else if (type == OP_ALLANY)
11964 #endif
11965 {
11966 if (opcode == OP_STAR)
11967 {
11968 if (private_data_ptr == 0)
11969 allocate_stack(common, 2);
11970
11971 OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11972 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11973
11974 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11975 process_partial_match(common);
11976
11977 if (early_fail_ptr != 0)
11978 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11979 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11980 break;
11981 }
11982 #ifdef SUPPORT_UNICODE
11983 else if (!common->utf)
11984 #else
11985 else
11986 #endif
11987 {
11988 if (private_data_ptr == 0)
11989 allocate_stack(common, 2);
11990
11991 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11992 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11993
11994 if (common->mode == PCRE2_JIT_COMPLETE)
11995 {
11996 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
11997 SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
11998 }
11999 else
12000 {
12001 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
12002 process_partial_match(common);
12003 JUMPHERE(jump);
12004 }
12005
12006 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12007
12008 if (early_fail_ptr != 0)
12009 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12010 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12011 break;
12012 }
12013 }
12014
12015 charpos_enabled = FALSE;
12016 charpos_char = 0;
12017 charpos_othercasebit = 0;
12018
12019 if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
12020 {
12021 #ifdef SUPPORT_UNICODE
12022 charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
12023 #else
12024 charpos_enabled = TRUE;
12025 #endif
12026 if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
12027 {
12028 charpos_othercasebit = char_get_othercase_bit(common, end + 1);
12029 if (charpos_othercasebit == 0)
12030 charpos_enabled = FALSE;
12031 }
12032
12033 if (charpos_enabled)
12034 {
12035 charpos_char = end[1];
12036 /* Consume the OP_CHAR opcode. */
12037 end += 2;
12038 #if PCRE2_CODE_UNIT_WIDTH == 8
12039 SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
12040 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12041 SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
12042 if ((charpos_othercasebit & 0x100) != 0)
12043 charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
12044 #endif
12045 if (charpos_othercasebit != 0)
12046 charpos_char |= charpos_othercasebit;
12047
12048 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
12049 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
12050 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
12051 }
12052 }
12053
12054 if (charpos_enabled)
12055 {
12056 if (opcode == OP_UPTO)
12057 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
12058
12059 /* Search the first instance of charpos_char. */
12060 jump = JUMP(SLJIT_JUMP);
12061 label = LABEL();
12062 if (opcode == OP_UPTO)
12063 {
12064 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12065 add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_ZERO));
12066 }
12067 compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE);
12068 if (early_fail_ptr != 0)
12069 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12070 JUMPHERE(jump);
12071
12072 detect_partial_match(common, &backtrack->own_backtracks);
12073 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
12074 if (charpos_othercasebit != 0)
12075 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
12076 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
12077
12078 if (private_data_ptr == 0)
12079 allocate_stack(common, 2);
12080 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12081 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
12082
12083 if (opcode == OP_UPTO)
12084 {
12085 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12086 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12087 }
12088
12089 /* Search the last instance of charpos_char. */
12090 label = LABEL();
12091 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12092 if (early_fail_ptr != 0)
12093 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12094 detect_partial_match(common, &no_match);
12095 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
12096 if (charpos_othercasebit != 0)
12097 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
12098
12099 if (opcode == OP_STAR)
12100 {
12101 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
12102 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12103 JUMPTO(SLJIT_JUMP, label);
12104 }
12105 else
12106 {
12107 jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
12108 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12109 JUMPHERE(jump);
12110 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12111 JUMPTO(SLJIT_NOT_ZERO, label);
12112 }
12113
12114 set_jumps(no_match, LABEL());
12115 OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
12116 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12117 }
12118 else
12119 {
12120 if (private_data_ptr == 0)
12121 allocate_stack(common, 2);
12122
12123 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
12124 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12125 use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
12126 SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
12127
12128 if (common->utf)
12129 OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
12130 #endif
12131 if (opcode == OP_UPTO)
12132 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12133
12134 detect_partial_match(common, &no_match);
12135 label = LABEL();
12136 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12137 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12138 if (common->utf)
12139 OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
12140 #endif
12141
12142 if (opcode == OP_UPTO)
12143 {
12144 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12145 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12146 }
12147
12148 detect_partial_match_to(common, label);
12149 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12150
12151 set_jumps(no_char1_match, LABEL());
12152 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12153 if (common->utf)
12154 {
12155 set_jumps(no_match, LABEL());
12156 if (use_tmp)
12157 {
12158 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
12159 OP1(SLJIT_MOV, base, offset0, TMP3, 0);
12160 }
12161 else
12162 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12163 }
12164 else
12165 #endif
12166 {
12167 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12168 set_jumps(no_match, LABEL());
12169 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12170 }
12171
12172 if (early_fail_ptr != 0)
12173 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12174 }
12175
12176 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12177 break;
12178
12179 case OP_MINSTAR:
12180 if (private_data_ptr == 0)
12181 allocate_stack(common, 1);
12182 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12183 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12184 if (early_fail_ptr != 0)
12185 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12186 break;
12187
12188 case OP_MINUPTO:
12189 SLJIT_ASSERT(early_fail_ptr == 0);
12190 if (private_data_ptr == 0)
12191 allocate_stack(common, 2);
12192 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12193 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
12194 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12195 break;
12196
12197 case OP_QUERY:
12198 case OP_MINQUERY:
12199 SLJIT_ASSERT(early_fail_ptr == 0);
12200 if (private_data_ptr == 0)
12201 allocate_stack(common, 1);
12202 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12203 if (opcode == OP_QUERY)
12204 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
12205 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12206 break;
12207
12208 case OP_EXACT:
12209 break;
12210
12211 case OP_POSSTAR:
12212 #if defined SUPPORT_UNICODE
12213 if (type == OP_ALLANY && !common->invalid_utf)
12214 #else
12215 if (type == OP_ALLANY)
12216 #endif
12217 {
12218 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
12219 process_partial_match(common);
12220 if (early_fail_ptr != 0)
12221 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
12222 break;
12223 }
12224
12225 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12226 if (type == OP_EXTUNI || common->utf)
12227 {
12228 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12229 detect_partial_match(common, &no_match);
12230 label = LABEL();
12231 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12232 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12233 detect_partial_match_to(common, label);
12234
12235 set_jumps(no_match, LABEL());
12236 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
12237 if (early_fail_ptr != 0)
12238 {
12239 if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
12240 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
12241 else
12242 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12243 }
12244 break;
12245 }
12246 #endif
12247
12248 detect_partial_match(common, &no_match);
12249 label = LABEL();
12250 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12251 detect_partial_match_to(common, label);
12252 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12253
12254 set_jumps(no_char1_match, LABEL());
12255 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12256 set_jumps(no_match, LABEL());
12257 if (early_fail_ptr != 0)
12258 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12259 break;
12260
12261 case OP_POSUPTO:
12262 SLJIT_ASSERT(early_fail_ptr == 0);
12263 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12264 if (common->utf)
12265 {
12266 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
12267 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12268
12269 detect_partial_match(common, &no_match);
12270 label = LABEL();
12271 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12272 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
12273 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12274 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12275 detect_partial_match_to(common, label);
12276
12277 set_jumps(no_match, LABEL());
12278 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
12279 break;
12280 }
12281 #endif
12282
12283 if (type == OP_ALLANY)
12284 {
12285 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
12286
12287 if (common->mode == PCRE2_JIT_COMPLETE)
12288 {
12289 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
12290 SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
12291 }
12292 else
12293 {
12294 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
12295 process_partial_match(common);
12296 JUMPHERE(jump);
12297 }
12298 break;
12299 }
12300
12301 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12302
12303 detect_partial_match(common, &no_match);
12304 label = LABEL();
12305 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12306 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12307 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12308 detect_partial_match_to(common, label);
12309 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12310
12311 set_jumps(no_char1_match, LABEL());
12312 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12313 set_jumps(no_match, LABEL());
12314 break;
12315
12316 case OP_POSQUERY:
12317 SLJIT_ASSERT(early_fail_ptr == 0);
12318 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12319 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
12320 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12321 set_jumps(no_match, LABEL());
12322 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
12323 break;
12324
12325 default:
12326 SLJIT_UNREACHABLE();
12327 break;
12328 }
12329
12330 count_match(common);
12331 return end;
12332 }
12333
compile_fail_accept_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)12334 static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
12335 {
12336 DEFINE_COMPILER;
12337 backtrack_common *backtrack;
12338
12339 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
12340
12341 if (*cc == OP_FAIL)
12342 {
12343 add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
12344 return cc + 1;
12345 }
12346
12347 if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
12348 add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
12349
12350 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
12351 {
12352 /* No need to check notempty conditions. */
12353 if (common->accept_label == NULL)
12354 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
12355 else
12356 JUMPTO(SLJIT_JUMP, common->accept_label);
12357 return cc + 1;
12358 }
12359
12360 if (common->accept_label == NULL)
12361 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
12362 else
12363 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
12364
12365 if (HAS_VIRTUAL_REGISTERS)
12366 {
12367 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12368 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
12369 }
12370 else
12371 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
12372
12373 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
12374 add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_NOT_ZERO));
12375 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
12376 if (common->accept_label == NULL)
12377 add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
12378 else
12379 JUMPTO(SLJIT_ZERO, common->accept_label);
12380
12381 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
12382 if (common->accept_label == NULL)
12383 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
12384 else
12385 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
12386 add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
12387 return cc + 1;
12388 }
12389
compile_close_matchingpath(compiler_common * common,PCRE2_SPTR cc)12390 static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
12391 {
12392 DEFINE_COMPILER;
12393 int offset = GET2(cc, 1);
12394 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
12395
12396 /* Data will be discarded anyway... */
12397 if (common->currententry != NULL)
12398 return cc + 1 + IMM2_SIZE;
12399
12400 if (!optimized_cbracket)
12401 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
12402 offset <<= 1;
12403 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12404 if (!optimized_cbracket)
12405 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12406 return cc + 1 + IMM2_SIZE;
12407 }
12408
compile_control_verb_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)12409 static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
12410 {
12411 DEFINE_COMPILER;
12412 backtrack_common *backtrack;
12413 PCRE2_UCHAR opcode = *cc;
12414 PCRE2_SPTR ccend = cc + 1;
12415
12416 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
12417 opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
12418 ccend += 2 + cc[1];
12419
12420 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
12421
12422 if (opcode == OP_SKIP)
12423 {
12424 allocate_stack(common, 1);
12425 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12426 return ccend;
12427 }
12428
12429 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
12430 {
12431 if (HAS_VIRTUAL_REGISTERS)
12432 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12433 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12434 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12435 OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12436 }
12437
12438 return ccend;
12439 }
12440
12441 static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
12442
compile_then_trap_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)12443 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12444 {
12445 DEFINE_COMPILER;
12446 backtrack_common *backtrack;
12447 BOOL needs_control_head;
12448 int size;
12449
12450 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12451 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
12452 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12453 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
12454 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
12455
12456 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12457 size = 3 + (size < 0 ? 0 : size);
12458
12459 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12460 allocate_stack(common, size);
12461 if (size > 3)
12462 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
12463 else
12464 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12465 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
12466 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
12467 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
12468
12469 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12470 if (size >= 0)
12471 init_frame(common, cc, ccend, size - 1, 0);
12472 }
12473
compile_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)12474 static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12475 {
12476 DEFINE_COMPILER;
12477 backtrack_common *backtrack;
12478 BOOL has_then_trap = FALSE;
12479 then_trap_backtrack *save_then_trap = NULL;
12480
12481 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
12482
12483 if (common->has_then && common->then_offsets[cc - common->start] != 0)
12484 {
12485 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
12486 has_then_trap = TRUE;
12487 save_then_trap = common->then_trap;
12488 /* Tail item on backtrack. */
12489 compile_then_trap_matchingpath(common, cc, ccend, parent);
12490 }
12491
12492 while (cc < ccend)
12493 {
12494 switch(*cc)
12495 {
12496 case OP_SOD:
12497 case OP_SOM:
12498 case OP_NOT_WORD_BOUNDARY:
12499 case OP_WORD_BOUNDARY:
12500 case OP_EODN:
12501 case OP_EOD:
12502 case OP_DOLL:
12503 case OP_DOLLM:
12504 case OP_CIRC:
12505 case OP_CIRCM:
12506 case OP_NOT_UCP_WORD_BOUNDARY:
12507 case OP_UCP_WORD_BOUNDARY:
12508 cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12509 break;
12510
12511 case OP_NOT_DIGIT:
12512 case OP_DIGIT:
12513 case OP_NOT_WHITESPACE:
12514 case OP_WHITESPACE:
12515 case OP_NOT_WORDCHAR:
12516 case OP_WORDCHAR:
12517 case OP_ANY:
12518 case OP_ALLANY:
12519 case OP_ANYBYTE:
12520 case OP_NOTPROP:
12521 case OP_PROP:
12522 case OP_ANYNL:
12523 case OP_NOT_HSPACE:
12524 case OP_HSPACE:
12525 case OP_NOT_VSPACE:
12526 case OP_VSPACE:
12527 case OP_EXTUNI:
12528 case OP_NOT:
12529 case OP_NOTI:
12530 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12531 break;
12532
12533 case OP_SET_SOM:
12534 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12535 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
12536 allocate_stack(common, 1);
12537 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
12538 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
12539 cc++;
12540 break;
12541
12542 case OP_CHAR:
12543 case OP_CHARI:
12544 if (common->mode == PCRE2_JIT_COMPLETE)
12545 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12546 else
12547 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12548 break;
12549
12550 case OP_STAR:
12551 case OP_MINSTAR:
12552 case OP_PLUS:
12553 case OP_MINPLUS:
12554 case OP_QUERY:
12555 case OP_MINQUERY:
12556 case OP_UPTO:
12557 case OP_MINUPTO:
12558 case OP_EXACT:
12559 case OP_POSSTAR:
12560 case OP_POSPLUS:
12561 case OP_POSQUERY:
12562 case OP_POSUPTO:
12563 case OP_STARI:
12564 case OP_MINSTARI:
12565 case OP_PLUSI:
12566 case OP_MINPLUSI:
12567 case OP_QUERYI:
12568 case OP_MINQUERYI:
12569 case OP_UPTOI:
12570 case OP_MINUPTOI:
12571 case OP_EXACTI:
12572 case OP_POSSTARI:
12573 case OP_POSPLUSI:
12574 case OP_POSQUERYI:
12575 case OP_POSUPTOI:
12576 case OP_NOTSTAR:
12577 case OP_NOTMINSTAR:
12578 case OP_NOTPLUS:
12579 case OP_NOTMINPLUS:
12580 case OP_NOTQUERY:
12581 case OP_NOTMINQUERY:
12582 case OP_NOTUPTO:
12583 case OP_NOTMINUPTO:
12584 case OP_NOTEXACT:
12585 case OP_NOTPOSSTAR:
12586 case OP_NOTPOSPLUS:
12587 case OP_NOTPOSQUERY:
12588 case OP_NOTPOSUPTO:
12589 case OP_NOTSTARI:
12590 case OP_NOTMINSTARI:
12591 case OP_NOTPLUSI:
12592 case OP_NOTMINPLUSI:
12593 case OP_NOTQUERYI:
12594 case OP_NOTMINQUERYI:
12595 case OP_NOTUPTOI:
12596 case OP_NOTMINUPTOI:
12597 case OP_NOTEXACTI:
12598 case OP_NOTPOSSTARI:
12599 case OP_NOTPOSPLUSI:
12600 case OP_NOTPOSQUERYI:
12601 case OP_NOTPOSUPTOI:
12602 case OP_TYPESTAR:
12603 case OP_TYPEMINSTAR:
12604 case OP_TYPEPLUS:
12605 case OP_TYPEMINPLUS:
12606 case OP_TYPEQUERY:
12607 case OP_TYPEMINQUERY:
12608 case OP_TYPEUPTO:
12609 case OP_TYPEMINUPTO:
12610 case OP_TYPEEXACT:
12611 case OP_TYPEPOSSTAR:
12612 case OP_TYPEPOSPLUS:
12613 case OP_TYPEPOSQUERY:
12614 case OP_TYPEPOSUPTO:
12615 cc = compile_iterator_matchingpath(common, cc, parent);
12616 break;
12617
12618 case OP_CLASS:
12619 case OP_NCLASS:
12620 if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
12621 cc = compile_iterator_matchingpath(common, cc, parent);
12622 else
12623 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12624 break;
12625
12626 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12627 case OP_XCLASS:
12628 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
12629 cc = compile_iterator_matchingpath(common, cc, parent);
12630 else
12631 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12632 break;
12633 #endif
12634
12635 case OP_REF:
12636 case OP_REFI:
12637 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
12638 cc = compile_ref_iterator_matchingpath(common, cc, parent);
12639 else
12640 {
12641 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
12642 cc += 1 + IMM2_SIZE;
12643 }
12644 break;
12645
12646 case OP_DNREF:
12647 case OP_DNREFI:
12648 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
12649 cc = compile_ref_iterator_matchingpath(common, cc, parent);
12650 else
12651 {
12652 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12653 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
12654 cc += 1 + 2 * IMM2_SIZE;
12655 }
12656 break;
12657
12658 case OP_RECURSE:
12659 cc = compile_recurse_matchingpath(common, cc, parent);
12660 break;
12661
12662 case OP_CALLOUT:
12663 case OP_CALLOUT_STR:
12664 cc = compile_callout_matchingpath(common, cc, parent);
12665 break;
12666
12667 case OP_ASSERT:
12668 case OP_ASSERT_NOT:
12669 case OP_ASSERTBACK:
12670 case OP_ASSERTBACK_NOT:
12671 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12672 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12673 break;
12674
12675 case OP_BRAMINZERO:
12676 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12677 cc = bracketend(cc + 1);
12678 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12679 {
12680 allocate_stack(common, 1);
12681 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12682 }
12683 else
12684 {
12685 allocate_stack(common, 2);
12686 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12687 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12688 }
12689 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12690 count_match(common);
12691 break;
12692
12693 case OP_ASSERT_NA:
12694 case OP_ASSERTBACK_NA:
12695 case OP_ONCE:
12696 case OP_SCRIPT_RUN:
12697 case OP_BRA:
12698 case OP_CBRA:
12699 case OP_COND:
12700 case OP_SBRA:
12701 case OP_SCBRA:
12702 case OP_SCOND:
12703 cc = compile_bracket_matchingpath(common, cc, parent);
12704 break;
12705
12706 case OP_BRAZERO:
12707 if (cc[1] > OP_ASSERTBACK_NOT)
12708 cc = compile_bracket_matchingpath(common, cc, parent);
12709 else
12710 {
12711 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12712 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12713 }
12714 break;
12715
12716 case OP_BRAPOS:
12717 case OP_CBRAPOS:
12718 case OP_SBRAPOS:
12719 case OP_SCBRAPOS:
12720 case OP_BRAPOSZERO:
12721 cc = compile_bracketpos_matchingpath(common, cc, parent);
12722 break;
12723
12724 case OP_MARK:
12725 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12726 SLJIT_ASSERT(common->mark_ptr != 0);
12727 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12728 allocate_stack(common, common->has_skip_arg ? 5 : 1);
12729 if (HAS_VIRTUAL_REGISTERS)
12730 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12731 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12732 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12733 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12734 OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12735 if (common->has_skip_arg)
12736 {
12737 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12738 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12739 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12740 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12741 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12742 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12743 }
12744 cc += 1 + 2 + cc[1];
12745 break;
12746
12747 case OP_PRUNE:
12748 case OP_PRUNE_ARG:
12749 case OP_SKIP:
12750 case OP_SKIP_ARG:
12751 case OP_THEN:
12752 case OP_THEN_ARG:
12753 case OP_COMMIT:
12754 case OP_COMMIT_ARG:
12755 cc = compile_control_verb_matchingpath(common, cc, parent);
12756 break;
12757
12758 case OP_FAIL:
12759 case OP_ACCEPT:
12760 case OP_ASSERT_ACCEPT:
12761 cc = compile_fail_accept_matchingpath(common, cc, parent);
12762 break;
12763
12764 case OP_CLOSE:
12765 cc = compile_close_matchingpath(common, cc);
12766 break;
12767
12768 case OP_SKIPZERO:
12769 cc = bracketend(cc + 1);
12770 break;
12771
12772 default:
12773 SLJIT_UNREACHABLE();
12774 return;
12775 }
12776 if (cc == NULL)
12777 return;
12778 }
12779
12780 if (has_then_trap)
12781 {
12782 /* Head item on backtrack. */
12783 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12784 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12785 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12786 common->then_trap = save_then_trap;
12787 }
12788 SLJIT_ASSERT(cc == ccend);
12789 }
12790
12791 #undef PUSH_BACKTRACK
12792 #undef PUSH_BACKTRACK_NOVALUE
12793 #undef BACKTRACK_AS
12794
12795 #define COMPILE_BACKTRACKINGPATH(current) \
12796 do \
12797 { \
12798 compile_backtrackingpath(common, (current)); \
12799 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12800 return; \
12801 } \
12802 while (0)
12803
12804 #define CURRENT_AS(type) ((type *)current)
12805
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12806 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12807 {
12808 DEFINE_COMPILER;
12809 PCRE2_SPTR cc = current->cc;
12810 PCRE2_UCHAR opcode;
12811 PCRE2_UCHAR type;
12812 sljit_u32 max = 0, exact;
12813 struct sljit_label *label = NULL;
12814 struct sljit_jump *jump = NULL;
12815 jump_list *jumplist = NULL;
12816 PCRE2_SPTR end;
12817 int private_data_ptr = PRIVATE_DATA(cc);
12818 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12819 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12820 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
12821
12822 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12823
12824 switch(opcode)
12825 {
12826 case OP_STAR:
12827 case OP_UPTO:
12828 if (type == OP_ANYNL || type == OP_EXTUNI)
12829 {
12830 SLJIT_ASSERT(private_data_ptr == 0);
12831 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12832 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12833 free_stack(common, 1);
12834 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12835 }
12836 else
12837 {
12838 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12839 {
12840 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12841 OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12842 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12843
12844 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12845 label = LABEL();
12846 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12847 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12848 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12849 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12850 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12851 move_back(common, NULL, TRUE);
12852 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12853 }
12854 else
12855 {
12856 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12857 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12858 move_back(common, NULL, TRUE);
12859 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12860 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12861 }
12862 JUMPHERE(jump);
12863 if (private_data_ptr == 0)
12864 free_stack(common, 2);
12865 }
12866 break;
12867
12868 case OP_MINSTAR:
12869 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12870 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12871 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12872 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12873 set_jumps(jumplist, LABEL());
12874 if (private_data_ptr == 0)
12875 free_stack(common, 1);
12876 break;
12877
12878 case OP_MINUPTO:
12879 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12880 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12881 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12882 add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12883
12884 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12885 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12886 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12887 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12888
12889 set_jumps(jumplist, LABEL());
12890 if (private_data_ptr == 0)
12891 free_stack(common, 2);
12892 break;
12893
12894 case OP_QUERY:
12895 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12896 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12897 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12898 jump = JUMP(SLJIT_JUMP);
12899 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12900 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12901 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12902 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12903 JUMPHERE(jump);
12904 if (private_data_ptr == 0)
12905 free_stack(common, 1);
12906 break;
12907
12908 case OP_MINQUERY:
12909 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12910 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12911 jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12912 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12913 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12914 set_jumps(jumplist, LABEL());
12915 JUMPHERE(jump);
12916 if (private_data_ptr == 0)
12917 free_stack(common, 1);
12918 break;
12919
12920 case OP_EXACT:
12921 case OP_POSSTAR:
12922 case OP_POSQUERY:
12923 case OP_POSUPTO:
12924 break;
12925
12926 default:
12927 SLJIT_UNREACHABLE();
12928 break;
12929 }
12930
12931 set_jumps(current->own_backtracks, LABEL());
12932 }
12933
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12934 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12935 {
12936 DEFINE_COMPILER;
12937 PCRE2_SPTR cc = current->cc;
12938 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12939 PCRE2_UCHAR type;
12940
12941 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12942
12943 if ((type & 0x1) == 0)
12944 {
12945 /* Maximize case. */
12946 set_jumps(current->own_backtracks, LABEL());
12947 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12948 free_stack(common, 1);
12949 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12950 return;
12951 }
12952
12953 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12954 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12955 set_jumps(current->own_backtracks, LABEL());
12956 free_stack(common, ref ? 2 : 3);
12957 }
12958
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)12959 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12960 {
12961 DEFINE_COMPILER;
12962 recurse_entry *entry;
12963
12964 if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12965 {
12966 entry = CURRENT_AS(recurse_backtrack)->entry;
12967 if (entry->backtrack_label == NULL)
12968 add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12969 else
12970 JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12971 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12972 }
12973 else
12974 compile_backtrackingpath(common, current->top);
12975
12976 set_jumps(current->own_backtracks, LABEL());
12977 }
12978
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)12979 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12980 {
12981 DEFINE_COMPILER;
12982 PCRE2_SPTR cc = current->cc;
12983 PCRE2_UCHAR bra = OP_BRA;
12984 struct sljit_jump *brajump = NULL;
12985
12986 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12987 if (*cc == OP_BRAZERO)
12988 {
12989 bra = *cc;
12990 cc++;
12991 }
12992
12993 if (bra == OP_BRAZERO)
12994 {
12995 SLJIT_ASSERT(current->own_backtracks == NULL);
12996 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12997 }
12998
12999 if (CURRENT_AS(assert_backtrack)->framesize < 0)
13000 {
13001 set_jumps(current->own_backtracks, LABEL());
13002
13003 if (bra == OP_BRAZERO)
13004 {
13005 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
13006 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
13007 free_stack(common, 1);
13008 }
13009 return;
13010 }
13011
13012 if (bra == OP_BRAZERO)
13013 {
13014 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
13015 {
13016 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
13017 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
13018 free_stack(common, 1);
13019 return;
13020 }
13021 free_stack(common, 1);
13022 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
13023 }
13024
13025 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
13026 {
13027 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
13028 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13029 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13030 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
13031 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
13032
13033 set_jumps(current->own_backtracks, LABEL());
13034 }
13035 else
13036 set_jumps(current->own_backtracks, LABEL());
13037
13038 if (bra == OP_BRAZERO)
13039 {
13040 /* We know there is enough place on the stack. */
13041 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
13042 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
13043 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
13044 JUMPHERE(brajump);
13045 }
13046 }
13047
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)13048 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13049 {
13050 DEFINE_COMPILER;
13051 int opcode, stacksize, alt_count, alt_max;
13052 int offset = 0;
13053 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
13054 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
13055 PCRE2_SPTR cc = current->cc;
13056 PCRE2_SPTR ccbegin;
13057 PCRE2_SPTR ccprev;
13058 PCRE2_UCHAR bra = OP_BRA;
13059 PCRE2_UCHAR ket;
13060 assert_backtrack *assert;
13061 BOOL has_alternatives;
13062 BOOL needs_control_head = FALSE;
13063 BOOL has_vreverse;
13064 struct sljit_jump *brazero = NULL;
13065 struct sljit_jump *next_alt = NULL;
13066 struct sljit_jump *once = NULL;
13067 struct sljit_jump *cond = NULL;
13068 struct sljit_label *rmin_label = NULL;
13069 struct sljit_label *exact_label = NULL;
13070 struct sljit_jump *mov_addr = NULL;
13071
13072 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
13073 {
13074 bra = *cc;
13075 cc++;
13076 }
13077
13078 opcode = *cc;
13079 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
13080 ket = *ccbegin;
13081 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
13082 {
13083 repeat_ptr = PRIVATE_DATA(ccbegin);
13084 repeat_type = PRIVATE_DATA(ccbegin + 2);
13085 repeat_count = PRIVATE_DATA(ccbegin + 3);
13086 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
13087 if (repeat_type == OP_UPTO)
13088 ket = OP_KETRMAX;
13089 if (repeat_type == OP_MINUPTO)
13090 ket = OP_KETRMIN;
13091 }
13092 ccbegin = cc;
13093 cc += GET(cc, 1);
13094 has_alternatives = *cc == OP_ALT;
13095 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13096 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
13097 if (opcode == OP_CBRA || opcode == OP_SCBRA)
13098 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
13099 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
13100 opcode = OP_SCOND;
13101
13102 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
13103
13104 /* Decoding the needs_control_head in framesize. */
13105 if (opcode == OP_ONCE)
13106 {
13107 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
13108 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
13109 }
13110
13111 if (ket != OP_KET && repeat_type != 0)
13112 {
13113 /* TMP1 is used in OP_KETRMIN below. */
13114 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13115 free_stack(common, 1);
13116 if (repeat_type == OP_UPTO)
13117 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
13118 else
13119 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
13120 }
13121
13122 if (ket == OP_KETRMAX)
13123 {
13124 if (bra == OP_BRAZERO)
13125 {
13126 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13127 free_stack(common, 1);
13128 brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13129 }
13130 }
13131 else if (ket == OP_KETRMIN)
13132 {
13133 if (bra != OP_BRAMINZERO)
13134 {
13135 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13136 if (repeat_type != 0)
13137 {
13138 /* TMP1 was set a few lines above. */
13139 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13140 /* Drop STR_PTR for non-greedy plus quantifier. */
13141 if (opcode != OP_ONCE)
13142 free_stack(common, 1);
13143 }
13144 else if (opcode >= OP_SBRA || opcode == OP_ONCE)
13145 {
13146 /* Checking zero-length iteration. */
13147 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
13148 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13149 else
13150 {
13151 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13152 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13153 }
13154 /* Drop STR_PTR for non-greedy plus quantifier. */
13155 if (opcode != OP_ONCE)
13156 free_stack(common, 1);
13157 }
13158 else
13159 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13160 }
13161 rmin_label = LABEL();
13162 if (repeat_type != 0)
13163 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13164 }
13165 else if (bra == OP_BRAZERO)
13166 {
13167 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13168 free_stack(common, 1);
13169 brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13170 }
13171 else if (repeat_type == OP_EXACT)
13172 {
13173 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13174 exact_label = LABEL();
13175 }
13176
13177 if (offset != 0)
13178 {
13179 if (common->capture_last_ptr != 0)
13180 {
13181 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
13182 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13183 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13184 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
13185 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13186 free_stack(common, 3);
13187 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
13188 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
13189 }
13190 else if (common->optimized_cbracket[offset >> 1] == 0)
13191 {
13192 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13193 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13194 free_stack(common, 2);
13195 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13196 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13197 }
13198 }
13199
13200 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
13201 {
13202 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13203 {
13204 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13205 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13206 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
13207 }
13208 once = JUMP(SLJIT_JUMP);
13209 }
13210 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13211 {
13212 if (has_alternatives)
13213 {
13214 /* Always exactly one alternative. */
13215 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13216 free_stack(common, 1);
13217
13218 alt_max = 2;
13219 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13220 }
13221 }
13222 else if (has_alternatives)
13223 {
13224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13225 free_stack(common, 1);
13226
13227 if (alt_max > 3)
13228 {
13229 sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13230
13231 SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_mov_addr);
13232 sljit_set_label(CURRENT_AS(bracket_backtrack)->u.matching_mov_addr, LABEL());
13233 sljit_emit_op0(compiler, SLJIT_ENDBR);
13234 }
13235 else
13236 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13237 }
13238
13239 COMPILE_BACKTRACKINGPATH(current->top);
13240 if (current->own_backtracks)
13241 set_jumps(current->own_backtracks, LABEL());
13242
13243 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13244 {
13245 /* Conditional block always has at most one alternative. */
13246 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
13247 {
13248 SLJIT_ASSERT(has_alternatives);
13249 assert = CURRENT_AS(bracket_backtrack)->u.assert;
13250 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
13251 {
13252 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
13253 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13254 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13255 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
13256 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
13257 }
13258 cond = JUMP(SLJIT_JUMP);
13259 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
13260 }
13261 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
13262 {
13263 SLJIT_ASSERT(has_alternatives);
13264 cond = JUMP(SLJIT_JUMP);
13265 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
13266 }
13267 else
13268 SLJIT_ASSERT(!has_alternatives);
13269 }
13270
13271 if (has_alternatives)
13272 {
13273 alt_count = 1;
13274 do
13275 {
13276 current->top = NULL;
13277 current->own_backtracks = NULL;
13278 current->simple_backtracks = NULL;
13279 /* Conditional blocks always have an additional alternative, even if it is empty. */
13280 if (*cc == OP_ALT)
13281 {
13282 ccprev = cc + 1 + LINK_SIZE;
13283 cc += GET(cc, 1);
13284
13285 has_vreverse = FALSE;
13286 if (opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NA)
13287 {
13288 SLJIT_ASSERT(private_data_ptr != 0);
13289 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13290
13291 has_vreverse = (*ccprev == OP_VREVERSE);
13292 if (*ccprev == OP_REVERSE || has_vreverse)
13293 ccprev = compile_reverse_matchingpath(common, ccprev, current);
13294 }
13295 else if (opcode != OP_COND && opcode != OP_SCOND)
13296 {
13297 if (opcode != OP_ONCE)
13298 {
13299 if (private_data_ptr != 0)
13300 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13301 else
13302 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13303 }
13304 else
13305 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
13306 }
13307
13308 compile_matchingpath(common, ccprev, cc, current);
13309 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13310 return;
13311
13312 switch (opcode)
13313 {
13314 case OP_ASSERTBACK_NA:
13315 if (has_vreverse)
13316 {
13317 SLJIT_ASSERT(current->top != NULL && PRIVATE_DATA(ccbegin + 1));
13318 add_jump(compiler, ¤t->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
13319 }
13320
13321 if (PRIVATE_DATA(ccbegin + 1))
13322 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
13323 break;
13324 case OP_ASSERT_NA:
13325 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13326 break;
13327 case OP_SCRIPT_RUN:
13328 match_script_run_common(common, private_data_ptr, current);
13329 break;
13330 }
13331 }
13332
13333 /* Instructions after the current alternative is successfully matched. */
13334 /* There is a similar code in compile_bracket_matchingpath. */
13335 if (opcode == OP_ONCE)
13336 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
13337
13338 stacksize = 0;
13339 if (repeat_type == OP_MINUPTO)
13340 {
13341 /* We need to preserve the counter. TMP2 will be used below. */
13342 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
13343 stacksize++;
13344 }
13345 if (ket != OP_KET || bra != OP_BRA)
13346 stacksize++;
13347 if (offset != 0)
13348 {
13349 if (common->capture_last_ptr != 0)
13350 stacksize++;
13351 if (common->optimized_cbracket[offset >> 1] == 0)
13352 stacksize += 2;
13353 }
13354 if (opcode != OP_ONCE)
13355 stacksize++;
13356
13357 if (stacksize > 0)
13358 allocate_stack(common, stacksize);
13359
13360 stacksize = 0;
13361 if (repeat_type == OP_MINUPTO)
13362 {
13363 /* TMP2 was set above. */
13364 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
13365 stacksize++;
13366 }
13367
13368 if (ket != OP_KET || bra != OP_BRA)
13369 {
13370 if (ket != OP_KET)
13371 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
13372 else
13373 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
13374 stacksize++;
13375 }
13376
13377 if (offset != 0)
13378 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
13379
13380 if (opcode != OP_ONCE)
13381 {
13382 if (alt_max <= 3)
13383 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
13384 else
13385 mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
13386 }
13387
13388 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
13389 {
13390 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
13391 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
13392 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
13393 }
13394
13395 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
13396
13397 if (opcode != OP_ONCE)
13398 {
13399 if (alt_max <= 3)
13400 {
13401 JUMPHERE(next_alt);
13402 alt_count++;
13403 if (alt_count < alt_max)
13404 {
13405 SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
13406 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13407 }
13408 }
13409 else
13410 {
13411 sljit_set_label(mov_addr, LABEL());
13412 sljit_emit_op0(compiler, SLJIT_ENDBR);
13413 }
13414 }
13415
13416 COMPILE_BACKTRACKINGPATH(current->top);
13417 if (current->own_backtracks)
13418 set_jumps(current->own_backtracks, LABEL());
13419 SLJIT_ASSERT(!current->simple_backtracks);
13420 }
13421 while (*cc == OP_ALT);
13422
13423 if (cond != NULL)
13424 {
13425 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
13426 assert = CURRENT_AS(bracket_backtrack)->u.assert;
13427 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
13428 {
13429 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
13430 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13431 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13432 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
13433 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
13434 }
13435 JUMPHERE(cond);
13436 }
13437
13438 /* Free the STR_PTR. */
13439 if (private_data_ptr == 0)
13440 free_stack(common, 1);
13441 }
13442
13443 if (offset != 0)
13444 {
13445 /* Using both tmp register is better for instruction scheduling. */
13446 if (common->optimized_cbracket[offset >> 1] != 0)
13447 {
13448 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13449 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13450 free_stack(common, 2);
13451 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13452 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13453 }
13454 else
13455 {
13456 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13457 free_stack(common, 1);
13458 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13459 }
13460 }
13461 else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
13462 {
13463 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13464 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13465 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
13466 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13467 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);
13468 free_stack(common, 4);
13469 }
13470 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
13471 {
13472 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
13473 free_stack(common, 1);
13474 }
13475 else if (opcode == OP_ONCE)
13476 {
13477 cc = ccbegin + GET(ccbegin, 1);
13478 stacksize = needs_control_head ? 1 : 0;
13479
13480 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13481 {
13482 /* Reset head and drop saved frame. */
13483 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
13484 }
13485 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
13486 {
13487 /* The STR_PTR must be released. */
13488 stacksize++;
13489 }
13490
13491 if (stacksize > 0)
13492 free_stack(common, stacksize);
13493
13494 JUMPHERE(once);
13495 /* Restore previous private_data_ptr */
13496 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13497 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
13498 else if (ket == OP_KETRMIN)
13499 {
13500 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13501 /* See the comment below. */
13502 free_stack(common, 2);
13503 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13504 }
13505 }
13506
13507 if (repeat_type == OP_EXACT)
13508 {
13509 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13510 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
13511 CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
13512 }
13513 else if (ket == OP_KETRMAX)
13514 {
13515 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13516 if (bra != OP_BRAZERO)
13517 free_stack(common, 1);
13518
13519 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13520 if (bra == OP_BRAZERO)
13521 {
13522 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13523 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13524 JUMPHERE(brazero);
13525 free_stack(common, 1);
13526 }
13527 }
13528 else if (ket == OP_KETRMIN)
13529 {
13530 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13531
13532 /* OP_ONCE removes everything in case of a backtrack, so we don't
13533 need to explicitly release the STR_PTR. The extra release would
13534 affect badly the free_stack(2) above. */
13535 if (opcode != OP_ONCE)
13536 free_stack(common, 1);
13537 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
13538 if (opcode == OP_ONCE)
13539 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
13540 else if (bra == OP_BRAMINZERO)
13541 free_stack(common, 1);
13542 }
13543 else if (bra == OP_BRAZERO)
13544 {
13545 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13546 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13547 JUMPHERE(brazero);
13548 }
13549 }
13550
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)13551 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13552 {
13553 DEFINE_COMPILER;
13554 int offset;
13555 struct sljit_jump *jump;
13556 PCRE2_SPTR cc;
13557
13558 /* No retry on backtrack, just drop everything. */
13559 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
13560 {
13561 cc = current->cc;
13562
13563 if (*cc == OP_BRAPOSZERO)
13564 cc++;
13565
13566 if (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)
13567 {
13568 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
13569 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13570 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13571 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13572 if (common->capture_last_ptr != 0)
13573 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13574 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13575 if (common->capture_last_ptr != 0)
13576 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
13577 }
13578 set_jumps(current->own_backtracks, LABEL());
13579 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13580 return;
13581 }
13582
13583 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
13584 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13585 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
13586
13587 if (current->own_backtracks)
13588 {
13589 jump = JUMP(SLJIT_JUMP);
13590 set_jumps(current->own_backtracks, LABEL());
13591 /* Drop the stack frame. */
13592 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13593 JUMPHERE(jump);
13594 }
13595 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
13596 }
13597
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)13598 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13599 {
13600 assert_backtrack backtrack;
13601
13602 current->top = NULL;
13603 current->own_backtracks = NULL;
13604 current->simple_backtracks = NULL;
13605 if (current->cc[1] > OP_ASSERTBACK_NOT)
13606 {
13607 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
13608 compile_bracket_matchingpath(common, current->cc, current);
13609 compile_bracket_backtrackingpath(common, current->top);
13610 }
13611 else
13612 {
13613 memset(&backtrack, 0, sizeof(backtrack));
13614 backtrack.common.cc = current->cc;
13615 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
13616 /* Manual call of compile_assert_matchingpath. */
13617 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
13618 }
13619 SLJIT_ASSERT(!current->simple_backtracks && !current->own_backtracks);
13620 }
13621
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)13622 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13623 {
13624 DEFINE_COMPILER;
13625 PCRE2_UCHAR opcode = *current->cc;
13626 struct sljit_label *loop;
13627 struct sljit_jump *jump;
13628
13629 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
13630 {
13631 if (common->then_trap != NULL)
13632 {
13633 SLJIT_ASSERT(common->control_head_ptr != 0);
13634
13635 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13636 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
13637 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
13638 jump = JUMP(SLJIT_JUMP);
13639
13640 loop = LABEL();
13641 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13642 JUMPHERE(jump);
13643 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
13644 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
13645 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
13646 return;
13647 }
13648 else if (!common->local_quit_available && common->in_positive_assertion)
13649 {
13650 add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
13651 return;
13652 }
13653 }
13654
13655 if (common->local_quit_available)
13656 {
13657 /* Abort match with a fail. */
13658 if (common->quit_label == NULL)
13659 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13660 else
13661 JUMPTO(SLJIT_JUMP, common->quit_label);
13662 return;
13663 }
13664
13665 if (opcode == OP_SKIP_ARG)
13666 {
13667 SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13668 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13669 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
13670 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark));
13671
13672 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
13673 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
13674 return;
13675 }
13676
13677 if (opcode == OP_SKIP)
13678 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13679 else
13680 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
13681 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
13682 }
13683
compile_vreverse_backtrackingpath(compiler_common * common,struct backtrack_common * current)13684 static SLJIT_INLINE void compile_vreverse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13685 {
13686 DEFINE_COMPILER;
13687 struct sljit_jump *jump;
13688 struct sljit_label *label;
13689
13690 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13691 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(3));
13692 skip_valid_char(common);
13693 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
13694 JUMPTO(SLJIT_JUMP, CURRENT_AS(vreverse_backtrack)->matchingpath);
13695
13696 label = LABEL();
13697 sljit_set_label(jump, label);
13698 set_jumps(current->own_backtracks, label);
13699 }
13700
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)13701 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13702 {
13703 DEFINE_COMPILER;
13704 struct sljit_jump *jump;
13705 int size;
13706
13707 if (CURRENT_AS(then_trap_backtrack)->then_trap)
13708 {
13709 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
13710 return;
13711 }
13712
13713 size = CURRENT_AS(then_trap_backtrack)->framesize;
13714 size = 3 + (size < 0 ? 0 : size);
13715
13716 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13717 free_stack(common, size);
13718 jump = JUMP(SLJIT_JUMP);
13719
13720 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13721 /* STACK_TOP is set by THEN. */
13722 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13723 {
13724 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13725 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13726 }
13727 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13728 free_stack(common, 3);
13729
13730 JUMPHERE(jump);
13731 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13732 }
13733
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)13734 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13735 {
13736 DEFINE_COMPILER;
13737 then_trap_backtrack *save_then_trap = common->then_trap;
13738
13739 while (current)
13740 {
13741 if (current->simple_backtracks != NULL)
13742 set_jumps(current->simple_backtracks, LABEL());
13743 switch(*current->cc)
13744 {
13745 case OP_SET_SOM:
13746 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13747 free_stack(common, 1);
13748 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13749 break;
13750
13751 case OP_STAR:
13752 case OP_MINSTAR:
13753 case OP_PLUS:
13754 case OP_MINPLUS:
13755 case OP_QUERY:
13756 case OP_MINQUERY:
13757 case OP_UPTO:
13758 case OP_MINUPTO:
13759 case OP_EXACT:
13760 case OP_POSSTAR:
13761 case OP_POSPLUS:
13762 case OP_POSQUERY:
13763 case OP_POSUPTO:
13764 case OP_STARI:
13765 case OP_MINSTARI:
13766 case OP_PLUSI:
13767 case OP_MINPLUSI:
13768 case OP_QUERYI:
13769 case OP_MINQUERYI:
13770 case OP_UPTOI:
13771 case OP_MINUPTOI:
13772 case OP_EXACTI:
13773 case OP_POSSTARI:
13774 case OP_POSPLUSI:
13775 case OP_POSQUERYI:
13776 case OP_POSUPTOI:
13777 case OP_NOTSTAR:
13778 case OP_NOTMINSTAR:
13779 case OP_NOTPLUS:
13780 case OP_NOTMINPLUS:
13781 case OP_NOTQUERY:
13782 case OP_NOTMINQUERY:
13783 case OP_NOTUPTO:
13784 case OP_NOTMINUPTO:
13785 case OP_NOTEXACT:
13786 case OP_NOTPOSSTAR:
13787 case OP_NOTPOSPLUS:
13788 case OP_NOTPOSQUERY:
13789 case OP_NOTPOSUPTO:
13790 case OP_NOTSTARI:
13791 case OP_NOTMINSTARI:
13792 case OP_NOTPLUSI:
13793 case OP_NOTMINPLUSI:
13794 case OP_NOTQUERYI:
13795 case OP_NOTMINQUERYI:
13796 case OP_NOTUPTOI:
13797 case OP_NOTMINUPTOI:
13798 case OP_NOTEXACTI:
13799 case OP_NOTPOSSTARI:
13800 case OP_NOTPOSPLUSI:
13801 case OP_NOTPOSQUERYI:
13802 case OP_NOTPOSUPTOI:
13803 case OP_TYPESTAR:
13804 case OP_TYPEMINSTAR:
13805 case OP_TYPEPLUS:
13806 case OP_TYPEMINPLUS:
13807 case OP_TYPEQUERY:
13808 case OP_TYPEMINQUERY:
13809 case OP_TYPEUPTO:
13810 case OP_TYPEMINUPTO:
13811 case OP_TYPEEXACT:
13812 case OP_TYPEPOSSTAR:
13813 case OP_TYPEPOSPLUS:
13814 case OP_TYPEPOSQUERY:
13815 case OP_TYPEPOSUPTO:
13816 case OP_CLASS:
13817 case OP_NCLASS:
13818 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13819 case OP_XCLASS:
13820 #endif
13821 compile_iterator_backtrackingpath(common, current);
13822 break;
13823
13824 case OP_REF:
13825 case OP_REFI:
13826 case OP_DNREF:
13827 case OP_DNREFI:
13828 compile_ref_iterator_backtrackingpath(common, current);
13829 break;
13830
13831 case OP_RECURSE:
13832 compile_recurse_backtrackingpath(common, current);
13833 break;
13834
13835 case OP_ASSERT:
13836 case OP_ASSERT_NOT:
13837 case OP_ASSERTBACK:
13838 case OP_ASSERTBACK_NOT:
13839 compile_assert_backtrackingpath(common, current);
13840 break;
13841
13842 case OP_ASSERT_NA:
13843 case OP_ASSERTBACK_NA:
13844 case OP_ONCE:
13845 case OP_SCRIPT_RUN:
13846 case OP_BRA:
13847 case OP_CBRA:
13848 case OP_COND:
13849 case OP_SBRA:
13850 case OP_SCBRA:
13851 case OP_SCOND:
13852 compile_bracket_backtrackingpath(common, current);
13853 break;
13854
13855 case OP_BRAZERO:
13856 if (current->cc[1] > OP_ASSERTBACK_NOT)
13857 compile_bracket_backtrackingpath(common, current);
13858 else
13859 compile_assert_backtrackingpath(common, current);
13860 break;
13861
13862 case OP_BRAPOS:
13863 case OP_CBRAPOS:
13864 case OP_SBRAPOS:
13865 case OP_SCBRAPOS:
13866 case OP_BRAPOSZERO:
13867 compile_bracketpos_backtrackingpath(common, current);
13868 break;
13869
13870 case OP_BRAMINZERO:
13871 compile_braminzero_backtrackingpath(common, current);
13872 break;
13873
13874 case OP_MARK:
13875 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13876 if (common->has_skip_arg)
13877 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13878 free_stack(common, common->has_skip_arg ? 5 : 1);
13879 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13880 if (common->has_skip_arg)
13881 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13882 break;
13883
13884 case OP_THEN:
13885 case OP_THEN_ARG:
13886 case OP_PRUNE:
13887 case OP_PRUNE_ARG:
13888 case OP_SKIP:
13889 case OP_SKIP_ARG:
13890 compile_control_verb_backtrackingpath(common, current);
13891 break;
13892
13893 case OP_COMMIT:
13894 case OP_COMMIT_ARG:
13895 if (!common->local_quit_available)
13896 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13897 if (common->quit_label == NULL)
13898 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13899 else
13900 JUMPTO(SLJIT_JUMP, common->quit_label);
13901 break;
13902
13903 case OP_CALLOUT:
13904 case OP_CALLOUT_STR:
13905 case OP_FAIL:
13906 case OP_ACCEPT:
13907 case OP_ASSERT_ACCEPT:
13908 set_jumps(current->own_backtracks, LABEL());
13909 break;
13910
13911 case OP_VREVERSE:
13912 compile_vreverse_backtrackingpath(common, current);
13913 break;
13914
13915 case OP_THEN_TRAP:
13916 /* A virtual opcode for then traps. */
13917 compile_then_trap_backtrackingpath(common, current);
13918 break;
13919
13920 default:
13921 SLJIT_UNREACHABLE();
13922 break;
13923 }
13924 current = current->prev;
13925 }
13926 common->then_trap = save_then_trap;
13927 }
13928
compile_recurse(compiler_common * common)13929 static SLJIT_INLINE void compile_recurse(compiler_common *common)
13930 {
13931 DEFINE_COMPILER;
13932 PCRE2_SPTR cc = common->start + common->currententry->start;
13933 PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13934 PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13935 uint32_t recurse_flags = 0;
13936 int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags);
13937 int alt_count, alt_max, local_size;
13938 backtrack_common altbacktrack;
13939 jump_list *match = NULL;
13940 struct sljit_jump *next_alt = NULL;
13941 struct sljit_jump *accept_exit = NULL;
13942 struct sljit_label *quit;
13943 struct sljit_jump *mov_addr = NULL;
13944
13945 /* Recurse captures then. */
13946 common->then_trap = NULL;
13947
13948 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13949
13950 alt_max = no_alternatives(cc);
13951 alt_count = 0;
13952
13953 /* Matching path. */
13954 SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13955 common->currententry->entry_label = LABEL();
13956 set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13957
13958 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP2, 0);
13959 count_match(common);
13960
13961 local_size = (alt_max > 1) ? 2 : 1;
13962
13963 /* (Reversed) stack layout:
13964 [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13965
13966 allocate_stack(common, private_data_size + local_size);
13967 /* Save return address. */
13968 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13969
13970 copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags);
13971
13972 /* This variable is saved and restored all time when we enter or exit from a recursive context. */
13973 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13974
13975 if (recurse_flags & recurse_flag_control_head_found)
13976 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13977
13978 if (alt_max > 1)
13979 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13980
13981 memset(&altbacktrack, 0, sizeof(backtrack_common));
13982 common->quit_label = NULL;
13983 common->accept_label = NULL;
13984 common->quit = NULL;
13985 common->accept = NULL;
13986 altbacktrack.cc = ccbegin;
13987 cc += GET(cc, 1);
13988 while (1)
13989 {
13990 altbacktrack.top = NULL;
13991 altbacktrack.own_backtracks = NULL;
13992
13993 if (altbacktrack.cc != ccbegin)
13994 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13995
13996 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13997 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13998 return;
13999
14000 allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1);
14001 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
14002
14003 if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found))
14004 {
14005 if (alt_max > 3)
14006 mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
14007 else
14008 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
14009 }
14010
14011 add_jump(compiler, &match, JUMP(SLJIT_JUMP));
14012
14013 if (alt_count == 0)
14014 {
14015 /* Backtracking path entry. */
14016 SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
14017 common->currententry->backtrack_label = LABEL();
14018 set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
14019
14020 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP1, 0);
14021
14022 if (recurse_flags & recurse_flag_accept_found)
14023 accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
14024
14025 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
14026 /* Save return address. */
14027 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
14028
14029 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
14030
14031 if (alt_max > 1)
14032 {
14033 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
14034 free_stack(common, 2);
14035
14036 if (alt_max > 3)
14037 {
14038 sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
14039 sljit_set_label(mov_addr, LABEL());
14040 sljit_emit_op0(compiler, SLJIT_ENDBR);
14041 }
14042 else
14043 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
14044 }
14045 else
14046 free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1);
14047 }
14048 else if (alt_max > 3)
14049 {
14050 sljit_set_label(mov_addr, LABEL());
14051 sljit_emit_op0(compiler, SLJIT_ENDBR);
14052 }
14053 else
14054 {
14055 JUMPHERE(next_alt);
14056 if (alt_count + 1 < alt_max)
14057 {
14058 SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
14059 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
14060 }
14061 }
14062
14063 alt_count++;
14064
14065 compile_backtrackingpath(common, altbacktrack.top);
14066 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14067 return;
14068 set_jumps(altbacktrack.own_backtracks, LABEL());
14069
14070 if (*cc != OP_ALT)
14071 break;
14072
14073 altbacktrack.cc = cc + 1 + LINK_SIZE;
14074 cc += GET(cc, 1);
14075 }
14076
14077 /* No alternative is matched. */
14078
14079 quit = LABEL();
14080
14081 copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags);
14082
14083 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
14084 free_stack(common, private_data_size + local_size);
14085 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
14086 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14087
14088 if (common->quit != NULL)
14089 {
14090 SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found);
14091
14092 set_jumps(common->quit, LABEL());
14093 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
14094 copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
14095 JUMPTO(SLJIT_JUMP, quit);
14096 }
14097
14098 if (recurse_flags & recurse_flag_accept_found)
14099 {
14100 JUMPHERE(accept_exit);
14101 free_stack(common, 2);
14102
14103 /* Save return address. */
14104 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
14105
14106 copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
14107
14108 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
14109 free_stack(common, private_data_size + local_size);
14110 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
14111 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14112 }
14113
14114 if (common->accept != NULL)
14115 {
14116 SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found);
14117
14118 set_jumps(common->accept, LABEL());
14119
14120 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
14121 OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
14122
14123 allocate_stack(common, 2);
14124 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
14125 }
14126
14127 set_jumps(match, LABEL());
14128
14129 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
14130
14131 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
14132
14133 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
14134 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
14135 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14136 }
14137
14138 #undef COMPILE_BACKTRACKINGPATH
14139 #undef CURRENT_AS
14140
14141 #define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
14142 (PCRE2_JIT_INVALID_UTF)
14143
jit_compile(pcre2_code * code,sljit_u32 mode)14144 static int jit_compile(pcre2_code *code, sljit_u32 mode)
14145 {
14146 pcre2_real_code *re = (pcre2_real_code *)code;
14147 struct sljit_compiler *compiler;
14148 backtrack_common rootbacktrack;
14149 compiler_common common_data;
14150 compiler_common *common = &common_data;
14151 const sljit_u8 *tables = re->tables;
14152 void *allocator_data = &re->memctl;
14153 int private_data_size;
14154 PCRE2_SPTR ccend;
14155 executable_functions *functions;
14156 void *executable_func;
14157 sljit_uw executable_size;
14158 sljit_uw total_length;
14159 struct sljit_label *mainloop_label = NULL;
14160 struct sljit_label *continue_match_label;
14161 struct sljit_label *empty_match_found_label = NULL;
14162 struct sljit_label *empty_match_backtrack_label = NULL;
14163 struct sljit_label *reset_match_label;
14164 struct sljit_label *quit_label;
14165 struct sljit_jump *jump;
14166 struct sljit_jump *minlength_check_failed = NULL;
14167 struct sljit_jump *empty_match = NULL;
14168 struct sljit_jump *end_anchor_failed = NULL;
14169 jump_list *reqcu_not_found = NULL;
14170
14171 SLJIT_ASSERT(tables);
14172
14173 #if HAS_VIRTUAL_REGISTERS == 1
14174 SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) < 0);
14175 #elif HAS_VIRTUAL_REGISTERS == 0
14176 SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) >= 0);
14177 #else
14178 #error "Invalid value for HAS_VIRTUAL_REGISTERS"
14179 #endif
14180
14181 memset(&rootbacktrack, 0, sizeof(backtrack_common));
14182 memset(common, 0, sizeof(compiler_common));
14183 common->re = re;
14184 common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
14185 rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
14186
14187 #ifdef SUPPORT_UNICODE
14188 common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
14189 #endif /* SUPPORT_UNICODE */
14190 mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
14191
14192 common->start = rootbacktrack.cc;
14193 common->read_only_data_head = NULL;
14194 common->fcc = tables + fcc_offset;
14195 common->lcc = (sljit_sw)(tables + lcc_offset);
14196 common->mode = mode;
14197 common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
14198 common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
14199 common->nltype = NLTYPE_FIXED;
14200 switch(re->newline_convention)
14201 {
14202 case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
14203 case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
14204 case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
14205 case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
14206 case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
14207 case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
14208 default: return PCRE2_ERROR_INTERNAL;
14209 }
14210 common->nlmax = READ_CHAR_MAX;
14211 common->nlmin = 0;
14212 if (re->bsr_convention == PCRE2_BSR_UNICODE)
14213 common->bsr_nltype = NLTYPE_ANY;
14214 else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
14215 common->bsr_nltype = NLTYPE_ANYCRLF;
14216 else
14217 {
14218 #ifdef BSR_ANYCRLF
14219 common->bsr_nltype = NLTYPE_ANYCRLF;
14220 #else
14221 common->bsr_nltype = NLTYPE_ANY;
14222 #endif
14223 }
14224 common->bsr_nlmax = READ_CHAR_MAX;
14225 common->bsr_nlmin = 0;
14226 common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
14227 common->ctypes = (sljit_sw)(tables + ctypes_offset);
14228 common->name_count = re->name_count;
14229 common->name_entry_size = re->name_entry_size;
14230 common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
14231 common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
14232 #ifdef SUPPORT_UNICODE
14233 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
14234 common->utf = (re->overall_options & PCRE2_UTF) != 0;
14235 common->ucp = (re->overall_options & PCRE2_UCP) != 0;
14236 if (common->utf)
14237 {
14238 if (common->nltype == NLTYPE_ANY)
14239 common->nlmax = 0x2029;
14240 else if (common->nltype == NLTYPE_ANYCRLF)
14241 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
14242 else
14243 {
14244 /* We only care about the first newline character. */
14245 common->nlmax = common->newline & 0xff;
14246 }
14247
14248 if (common->nltype == NLTYPE_FIXED)
14249 common->nlmin = common->newline & 0xff;
14250 else
14251 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
14252
14253 if (common->bsr_nltype == NLTYPE_ANY)
14254 common->bsr_nlmax = 0x2029;
14255 else
14256 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
14257 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
14258 }
14259 else
14260 common->invalid_utf = FALSE;
14261 #endif /* SUPPORT_UNICODE */
14262 ccend = bracketend(common->start);
14263
14264 /* Calculate the local space size on the stack. */
14265 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
14266 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
14267 if (!common->optimized_cbracket)
14268 return PCRE2_ERROR_NOMEMORY;
14269 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
14270 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
14271 #else
14272 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
14273 #endif
14274
14275 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
14276 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
14277 common->capture_last_ptr = common->ovector_start;
14278 common->ovector_start += sizeof(sljit_sw);
14279 #endif
14280 if (!check_opcode_types(common, common->start, ccend))
14281 {
14282 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14283 return PCRE2_ERROR_NOMEMORY;
14284 }
14285
14286 /* Checking flags and updating ovector_start. */
14287 if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14288 {
14289 common->req_char_ptr = common->ovector_start;
14290 common->ovector_start += sizeof(sljit_sw);
14291 }
14292 if (mode != PCRE2_JIT_COMPLETE)
14293 {
14294 common->start_used_ptr = common->ovector_start;
14295 common->ovector_start += sizeof(sljit_sw);
14296 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14297 {
14298 common->hit_start = common->ovector_start;
14299 common->ovector_start += sizeof(sljit_sw);
14300 }
14301 }
14302 if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
14303 {
14304 common->match_end_ptr = common->ovector_start;
14305 common->ovector_start += sizeof(sljit_sw);
14306 }
14307 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
14308 common->control_head_ptr = 1;
14309 #endif
14310 if (common->control_head_ptr != 0)
14311 {
14312 common->control_head_ptr = common->ovector_start;
14313 common->ovector_start += sizeof(sljit_sw);
14314 }
14315 if (common->has_set_som)
14316 {
14317 /* Saving the real start pointer is necessary. */
14318 common->start_ptr = common->ovector_start;
14319 common->ovector_start += sizeof(sljit_sw);
14320 }
14321
14322 /* Aligning ovector to even number of sljit words. */
14323 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
14324 common->ovector_start += sizeof(sljit_sw);
14325
14326 if (common->start_ptr == 0)
14327 common->start_ptr = OVECTOR(0);
14328
14329 /* Capturing brackets cannot be optimized if callouts are allowed. */
14330 if (common->capture_last_ptr != 0)
14331 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
14332
14333 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
14334 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
14335
14336 total_length = ccend - common->start;
14337 common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
14338 if (!common->private_data_ptrs)
14339 {
14340 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14341 return PCRE2_ERROR_NOMEMORY;
14342 }
14343 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
14344
14345 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
14346
14347 if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
14348 detect_early_fail(common, common->start, &private_data_size, 0, 0);
14349
14350 set_private_data_ptrs(common, &private_data_size, ccend);
14351
14352 SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
14353
14354 if (private_data_size > 65536)
14355 {
14356 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14357 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14358 return PCRE2_ERROR_NOMEMORY;
14359 }
14360
14361 if (common->has_then)
14362 {
14363 common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
14364 memset(common->then_offsets, 0, total_length);
14365 set_then_offsets(common, common->start, NULL);
14366 }
14367
14368 compiler = sljit_create_compiler(allocator_data);
14369 if (!compiler)
14370 {
14371 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14372 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14373 return PCRE2_ERROR_NOMEMORY;
14374 }
14375 common->compiler = compiler;
14376
14377 /* Main pcre2_jit_exec entry. */
14378 SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);
14379 sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5, 5, SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, 0, private_data_size);
14380
14381 /* Register init. */
14382 reset_ovector(common, (re->top_bracket + 1) * 2);
14383 if (common->req_char_ptr != 0)
14384 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
14385
14386 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
14387 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
14388 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
14389 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
14390 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
14391 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
14392 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
14393 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
14394 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
14395 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
14396
14397 if (common->early_fail_start_ptr < common->early_fail_end_ptr)
14398 reset_early_fail(common);
14399
14400 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14401 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
14402 if (common->mark_ptr != 0)
14403 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
14404 if (common->control_head_ptr != 0)
14405 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
14406
14407 /* Main part of the matching */
14408 if ((re->overall_options & PCRE2_ANCHORED) == 0)
14409 {
14410 mainloop_label = mainloop_entry(common);
14411 continue_match_label = LABEL();
14412 /* Forward search if possible. */
14413 if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14414 {
14415 if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
14416 ;
14417 else if ((re->flags & PCRE2_FIRSTSET) != 0)
14418 fast_forward_first_char(common);
14419 else if ((re->flags & PCRE2_STARTLINE) != 0)
14420 fast_forward_newline(common);
14421 else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
14422 fast_forward_start_bits(common);
14423 }
14424 }
14425 else
14426 continue_match_label = LABEL();
14427
14428 if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14429 {
14430 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14431 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
14432 minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
14433 }
14434 if (common->req_char_ptr != 0)
14435 reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
14436
14437 /* Store the current STR_PTR in OVECTOR(0). */
14438 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
14439 /* Copy the limit of allowed recursions. */
14440 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
14441 if (common->capture_last_ptr != 0)
14442 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
14443 if (common->fast_forward_bc_ptr != NULL)
14444 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
14445
14446 if (common->start_ptr != OVECTOR(0))
14447 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
14448
14449 /* Copy the beginning of the string. */
14450 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14451 {
14452 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
14453 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14454 JUMPHERE(jump);
14455 }
14456 else if (mode == PCRE2_JIT_PARTIAL_HARD)
14457 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14458
14459 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
14460 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14461 {
14462 sljit_free_compiler(compiler);
14463 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14464 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14465 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14466 return PCRE2_ERROR_NOMEMORY;
14467 }
14468
14469 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14470 end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
14471
14472 if (common->might_be_empty)
14473 {
14474 empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
14475 empty_match_found_label = LABEL();
14476 }
14477
14478 common->accept_label = LABEL();
14479 if (common->accept != NULL)
14480 set_jumps(common->accept, common->accept_label);
14481
14482 /* This means we have a match. Update the ovector. */
14483 copy_ovector(common, re->top_bracket + 1);
14484 common->quit_label = common->abort_label = LABEL();
14485 if (common->quit != NULL)
14486 set_jumps(common->quit, common->quit_label);
14487 if (common->abort != NULL)
14488 set_jumps(common->abort, common->abort_label);
14489 if (minlength_check_failed != NULL)
14490 SET_LABEL(minlength_check_failed, common->abort_label);
14491
14492 sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
14493 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
14494
14495 if (common->failed_match != NULL)
14496 {
14497 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
14498 set_jumps(common->failed_match, LABEL());
14499 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14500 JUMPTO(SLJIT_JUMP, common->abort_label);
14501 }
14502
14503 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14504 JUMPHERE(end_anchor_failed);
14505
14506 if (mode != PCRE2_JIT_COMPLETE)
14507 {
14508 common->partialmatchlabel = LABEL();
14509 set_jumps(common->partialmatch, common->partialmatchlabel);
14510 return_with_partial_match(common, common->quit_label);
14511 }
14512
14513 if (common->might_be_empty)
14514 empty_match_backtrack_label = LABEL();
14515 compile_backtrackingpath(common, rootbacktrack.top);
14516 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14517 {
14518 sljit_free_compiler(compiler);
14519 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14520 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14521 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14522 return PCRE2_ERROR_NOMEMORY;
14523 }
14524
14525 SLJIT_ASSERT(rootbacktrack.prev == NULL);
14526 reset_match_label = LABEL();
14527
14528 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14529 {
14530 /* Update hit_start only in the first time. */
14531 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
14532 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14533 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
14534 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
14535 JUMPHERE(jump);
14536 }
14537
14538 /* Check we have remaining characters. */
14539 if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
14540 {
14541 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
14542 }
14543
14544 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
14545 (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
14546
14547 if ((re->overall_options & PCRE2_ANCHORED) == 0)
14548 {
14549 if (common->ff_newline_shortcut != NULL)
14550 {
14551 /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
14552 if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
14553 {
14554 if (common->match_end_ptr != 0)
14555 {
14556 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
14557 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
14558 CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
14559 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
14560 }
14561 else
14562 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
14563 }
14564 }
14565 else
14566 CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
14567 }
14568
14569 /* No more remaining characters. */
14570 if (reqcu_not_found != NULL)
14571 set_jumps(reqcu_not_found, LABEL());
14572
14573 if (mode == PCRE2_JIT_PARTIAL_SOFT)
14574 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
14575
14576 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14577 JUMPTO(SLJIT_JUMP, common->quit_label);
14578
14579 flush_stubs(common);
14580
14581 if (common->might_be_empty)
14582 {
14583 JUMPHERE(empty_match);
14584 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
14585 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
14586 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
14587 JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
14588 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
14589 JUMPTO(SLJIT_ZERO, empty_match_found_label);
14590 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
14591 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
14592 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
14593 }
14594
14595 common->fast_forward_bc_ptr = NULL;
14596 common->early_fail_start_ptr = 0;
14597 common->early_fail_end_ptr = 0;
14598 common->currententry = common->entries;
14599 common->local_quit_available = TRUE;
14600 quit_label = common->quit_label;
14601 if (common->currententry != NULL)
14602 {
14603 /* A free bit for each private data. */
14604 common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3;
14605 SLJIT_ASSERT(common->recurse_bitset_size > 0);
14606 common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
14607
14608 if (common->recurse_bitset != NULL)
14609 {
14610 do
14611 {
14612 /* Might add new entries. */
14613 compile_recurse(common);
14614 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14615 break;
14616 flush_stubs(common);
14617 common->currententry = common->currententry->next;
14618 }
14619 while (common->currententry != NULL);
14620
14621 SLJIT_FREE(common->recurse_bitset, allocator_data);
14622 }
14623
14624 if (common->currententry != NULL)
14625 {
14626 /* The common->recurse_bitset has been freed. */
14627 SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);
14628
14629 sljit_free_compiler(compiler);
14630 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14631 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14632 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14633 return PCRE2_ERROR_NOMEMORY;
14634 }
14635 }
14636 common->local_quit_available = FALSE;
14637 common->quit_label = quit_label;
14638
14639 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
14640 /* This is a (really) rare case. */
14641 set_jumps(common->stackalloc, LABEL());
14642 /* RETURN_ADDR is not a saved register. */
14643 sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14644
14645 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
14646
14647 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
14648 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
14649 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
14650 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
14651 OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
14652
14653 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize));
14654
14655 jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
14656 OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
14657 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
14658 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14659 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
14660 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
14661
14662 /* Allocation failed. */
14663 JUMPHERE(jump);
14664 /* We break the return address cache here, but this is a really rare case. */
14665 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
14666 JUMPTO(SLJIT_JUMP, common->quit_label);
14667
14668 /* Call limit reached. */
14669 set_jumps(common->calllimit, LABEL());
14670 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
14671 JUMPTO(SLJIT_JUMP, common->quit_label);
14672
14673 if (common->revertframes != NULL)
14674 {
14675 set_jumps(common->revertframes, LABEL());
14676 do_revertframes(common);
14677 }
14678 if (common->wordboundary != NULL)
14679 {
14680 set_jumps(common->wordboundary, LABEL());
14681 check_wordboundary(common, FALSE);
14682 }
14683 if (common->ucp_wordboundary != NULL)
14684 {
14685 set_jumps(common->ucp_wordboundary, LABEL());
14686 check_wordboundary(common, TRUE);
14687 }
14688 if (common->anynewline != NULL)
14689 {
14690 set_jumps(common->anynewline, LABEL());
14691 check_anynewline(common);
14692 }
14693 if (common->hspace != NULL)
14694 {
14695 set_jumps(common->hspace, LABEL());
14696 check_hspace(common);
14697 }
14698 if (common->vspace != NULL)
14699 {
14700 set_jumps(common->vspace, LABEL());
14701 check_vspace(common);
14702 }
14703 if (common->casefulcmp != NULL)
14704 {
14705 set_jumps(common->casefulcmp, LABEL());
14706 do_casefulcmp(common);
14707 }
14708 if (common->caselesscmp != NULL)
14709 {
14710 set_jumps(common->caselesscmp, LABEL());
14711 do_caselesscmp(common);
14712 }
14713 if (common->reset_match != NULL || common->restart_match != NULL)
14714 {
14715 if (common->restart_match != NULL)
14716 {
14717 set_jumps(common->restart_match, LABEL());
14718 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14719 }
14720
14721 set_jumps(common->reset_match, LABEL());
14722 do_reset_match(common, (re->top_bracket + 1) * 2);
14723 /* The value of restart_match is in TMP1. */
14724 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14725 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14726 JUMPTO(SLJIT_JUMP, reset_match_label);
14727 }
14728 #ifdef SUPPORT_UNICODE
14729 #if PCRE2_CODE_UNIT_WIDTH == 8
14730 if (common->utfreadchar != NULL)
14731 {
14732 set_jumps(common->utfreadchar, LABEL());
14733 do_utfreadchar(common);
14734 }
14735 if (common->utfreadtype8 != NULL)
14736 {
14737 set_jumps(common->utfreadtype8, LABEL());
14738 do_utfreadtype8(common);
14739 }
14740 if (common->utfpeakcharback != NULL)
14741 {
14742 set_jumps(common->utfpeakcharback, LABEL());
14743 do_utfpeakcharback(common);
14744 }
14745 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14746 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14747 if (common->utfreadchar_invalid != NULL)
14748 {
14749 set_jumps(common->utfreadchar_invalid, LABEL());
14750 do_utfreadchar_invalid(common);
14751 }
14752 if (common->utfreadnewline_invalid != NULL)
14753 {
14754 set_jumps(common->utfreadnewline_invalid, LABEL());
14755 do_utfreadnewline_invalid(common);
14756 }
14757 if (common->utfmoveback_invalid)
14758 {
14759 set_jumps(common->utfmoveback_invalid, LABEL());
14760 do_utfmoveback_invalid(common);
14761 }
14762 if (common->utfpeakcharback_invalid)
14763 {
14764 set_jumps(common->utfpeakcharback_invalid, LABEL());
14765 do_utfpeakcharback_invalid(common);
14766 }
14767 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14768 if (common->getucd != NULL)
14769 {
14770 set_jumps(common->getucd, LABEL());
14771 do_getucd(common);
14772 }
14773 if (common->getucdtype != NULL)
14774 {
14775 set_jumps(common->getucdtype, LABEL());
14776 do_getucdtype(common);
14777 }
14778 #endif /* SUPPORT_UNICODE */
14779
14780 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14781 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14782
14783 executable_func = sljit_generate_code(compiler, 0, NULL);
14784 executable_size = sljit_get_generated_code_size(compiler);
14785 sljit_free_compiler(compiler);
14786
14787 if (executable_func == NULL)
14788 {
14789 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14790 return PCRE2_ERROR_NOMEMORY;
14791 }
14792
14793 /* Reuse the function descriptor if possible. */
14794 if (re->executable_jit != NULL)
14795 functions = (executable_functions *)re->executable_jit;
14796 else
14797 {
14798 functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14799 if (functions == NULL)
14800 {
14801 /* This case is highly unlikely since we just recently
14802 freed a lot of memory. Not impossible though. */
14803 sljit_free_code(executable_func, NULL);
14804 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14805 return PCRE2_ERROR_NOMEMORY;
14806 }
14807 memset(functions, 0, sizeof(executable_functions));
14808 functions->top_bracket = re->top_bracket + 1;
14809 functions->limit_match = re->limit_match;
14810 re->executable_jit = functions;
14811 }
14812
14813 /* Turn mode into an index. */
14814 if (mode == PCRE2_JIT_COMPLETE)
14815 mode = 0;
14816 else
14817 mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14818
14819 SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14820 functions->executable_funcs[mode] = executable_func;
14821 functions->read_only_data_heads[mode] = common->read_only_data_head;
14822 functions->executable_sizes[mode] = executable_size;
14823 return 0;
14824 }
14825
14826 #endif
14827
14828 /*************************************************
14829 * JIT compile a Regular Expression *
14830 *************************************************/
14831
14832 /* This function used JIT to convert a previously-compiled pattern into machine
14833 code.
14834
14835 Arguments:
14836 code a compiled pattern
14837 options JIT option bits
14838
14839 Returns: 0: success or (*NOJIT) was used
14840 <0: an error code
14841 */
14842
14843 #define PUBLIC_JIT_COMPILE_OPTIONS \
14844 (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14845
14846 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code * code,uint32_t options)14847 pcre2_jit_compile(pcre2_code *code, uint32_t options)
14848 {
14849 pcre2_real_code *re = (pcre2_real_code *)code;
14850 #ifdef SUPPORT_JIT
14851 executable_functions *functions;
14852 static int executable_allocator_is_working = -1;
14853 #endif
14854
14855 if (code == NULL)
14856 return PCRE2_ERROR_NULL;
14857
14858 if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14859 return PCRE2_ERROR_JIT_BADOPTION;
14860
14861 /* Support for invalid UTF was first introduced in JIT, with the option
14862 PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14863 compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14864 preferred feature, with the earlier option deprecated. However, for backward
14865 compatibility, if the earlier option is set, it forces the new option so that
14866 if JIT matching falls back to the interpreter, there is still support for
14867 invalid UTF. However, if this function has already been successfully called
14868 without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14869 non-invalid-supporting JIT code was compiled), give an error.
14870
14871 If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14872 actions are needed:
14873
14874 1. Remove the definition from pcre2.h.in and from the list in
14875 PUBLIC_JIT_COMPILE_OPTIONS above.
14876
14877 2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14878
14879 3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14880
14881 4. Delete the following short block of code. The setting of "re" and
14882 "functions" can be moved into the JIT-only block below, but if that is
14883 done, (void)re and (void)functions will be needed in the non-JIT case, to
14884 avoid compiler warnings.
14885 */
14886
14887 #ifdef SUPPORT_JIT
14888 functions = (executable_functions *)re->executable_jit;
14889 #endif
14890
14891 if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14892 {
14893 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14894 {
14895 #ifdef SUPPORT_JIT
14896 if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14897 #endif
14898 re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14899 }
14900 }
14901
14902 /* The above tests are run with and without JIT support. This means that
14903 PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14904 interpreter support) even in the absence of JIT. But now, if there is no JIT
14905 support, give an error return. */
14906
14907 #ifndef SUPPORT_JIT
14908 return PCRE2_ERROR_JIT_BADOPTION;
14909 #else /* SUPPORT_JIT */
14910
14911 /* There is JIT support. Do the necessary. */
14912
14913 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14914
14915 if (executable_allocator_is_working == -1)
14916 {
14917 /* Checks whether the executable allocator is working. This check
14918 might run multiple times in multi-threaded environments, but the
14919 result should not be affected by it. */
14920 void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
14921 if (ptr != NULL)
14922 {
14923 SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
14924 executable_allocator_is_working = 1;
14925 }
14926 else executable_allocator_is_working = 0;
14927 }
14928
14929 if (!executable_allocator_is_working)
14930 return PCRE2_ERROR_NOMEMORY;
14931
14932 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14933 options |= PCRE2_JIT_INVALID_UTF;
14934
14935 if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14936 || functions->executable_funcs[0] == NULL)) {
14937 uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14938 int result = jit_compile(code, options & ~excluded_options);
14939 if (result != 0)
14940 return result;
14941 }
14942
14943 if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14944 || functions->executable_funcs[1] == NULL)) {
14945 uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14946 int result = jit_compile(code, options & ~excluded_options);
14947 if (result != 0)
14948 return result;
14949 }
14950
14951 if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14952 || functions->executable_funcs[2] == NULL)) {
14953 uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14954 int result = jit_compile(code, options & ~excluded_options);
14955 if (result != 0)
14956 return result;
14957 }
14958
14959 return 0;
14960
14961 #endif /* SUPPORT_JIT */
14962 }
14963
14964 /* JIT compiler uses an all-in-one approach. This improves security,
14965 since the code generator functions are not exported. */
14966
14967 #define INCLUDED_FROM_PCRE2_JIT_COMPILE
14968
14969 #include "pcre2_jit_match.c"
14970 #include "pcre2_jit_misc.c"
14971
14972 /* End of pcre2_jit_compile.c */
14973