1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016-2018 University of Cambridge
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41 #ifdef HAVE_CONFIG_H
42 #include "config.h"
43 #endif
44
45 #include "pcre2_internal.h"
46
47 #ifdef SUPPORT_JIT
48
49 /* All-in-one: Since we use the JIT compiler only from here,
50 we just include it. This way we don't need to touch the build
51 system files. */
52
53 #define SLJIT_CONFIG_AUTO 1
54 #define SLJIT_CONFIG_STATIC 1
55 #define SLJIT_VERBOSE 0
56
57 #ifdef PCRE2_DEBUG
58 #define SLJIT_DEBUG 1
59 #else
60 #define SLJIT_DEBUG 0
61 #endif
62
63 #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
64 #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
65
pcre2_jit_malloc(size_t size,void * allocator_data)66 static void * pcre2_jit_malloc(size_t size, void *allocator_data)
67 {
68 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
69 return allocator->malloc(size, allocator->memory_data);
70 }
71
pcre2_jit_free(void * ptr,void * allocator_data)72 static void pcre2_jit_free(void *ptr, void *allocator_data)
73 {
74 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
75 allocator->free(ptr, allocator->memory_data);
76 }
77
78 #include "sljit/sljitLir.c"
79
80 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
81 #error Unsupported architecture
82 #endif
83
84 /* Defines for debugging purposes. */
85
86 /* 1 - Use unoptimized capturing brackets.
87 2 - Enable capture_last_ptr (includes option 1). */
88 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
89
90 /* 1 - Always have a control head. */
91 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
92
93 /* Allocate memory for the regex stack on the real machine stack.
94 Fast, but limited size. */
95 #define MACHINE_STACK_SIZE 32768
96
97 /* Growth rate for stack allocated by the OS. Should be the multiply
98 of page size. */
99 #define STACK_GROWTH_RATE 8192
100
101 /* Enable to check that the allocation could destroy temporaries. */
102 #if defined SLJIT_DEBUG && SLJIT_DEBUG
103 #define DESTROY_REGISTERS 1
104 #endif
105
106 /*
107 Short summary about the backtracking mechanism empolyed by the jit code generator:
108
109 The code generator follows the recursive nature of the PERL compatible regular
110 expressions. The basic blocks of regular expressions are condition checkers
111 whose execute different commands depending on the result of the condition check.
112 The relationship between the operators can be horizontal (concatenation) and
113 vertical (sub-expression) (See struct backtrack_common for more details).
114
115 'ab' - 'a' and 'b' regexps are concatenated
116 'a+' - 'a' is the sub-expression of the '+' operator
117
118 The condition checkers are boolean (true/false) checkers. Machine code is generated
119 for the checker itself and for the actions depending on the result of the checker.
120 The 'true' case is called as the matching path (expected path), and the other is called as
121 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
122 branches on the matching path.
123
124 Greedy star operator (*) :
125 Matching path: match happens.
126 Backtrack path: match failed.
127 Non-greedy star operator (*?) :
128 Matching path: no need to perform a match.
129 Backtrack path: match is required.
130
131 The following example shows how the code generated for a capturing bracket
132 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
133 we have the following regular expression:
134
135 A(B|C)D
136
137 The generated code will be the following:
138
139 A matching path
140 '(' matching path (pushing arguments to the stack)
141 B matching path
142 ')' matching path (pushing arguments to the stack)
143 D matching path
144 return with successful match
145
146 D backtrack path
147 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
148 B backtrack path
149 C expected path
150 jump to D matching path
151 C backtrack path
152 A backtrack path
153
154 Notice, that the order of backtrack code paths are the opposite of the fast
155 code paths. In this way the topmost value on the stack is always belong
156 to the current backtrack code path. The backtrack path must check
157 whether there is a next alternative. If so, it needs to jump back to
158 the matching path eventually. Otherwise it needs to clear out its own stack
159 frame and continue the execution on the backtrack code paths.
160 */
161
162 /*
163 Saved stack frames:
164
165 Atomic blocks and asserts require reloading the values of private data
166 when the backtrack mechanism performed. Because of OP_RECURSE, the data
167 are not necessarly known in compile time, thus we need a dynamic restore
168 mechanism.
169
170 The stack frames are stored in a chain list, and have the following format:
171 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
172
173 Thus we can restore the private data to a particular point in the stack.
174 */
175
176 typedef struct jit_arguments {
177 /* Pointers first. */
178 struct sljit_stack *stack;
179 PCRE2_SPTR str;
180 PCRE2_SPTR begin;
181 PCRE2_SPTR end;
182 pcre2_match_data *match_data;
183 PCRE2_SPTR startchar_ptr;
184 PCRE2_UCHAR *mark_ptr;
185 int (*callout)(pcre2_callout_block *, void *);
186 void *callout_data;
187 /* Everything else after. */
188 sljit_uw offset_limit;
189 sljit_u32 limit_match;
190 sljit_u32 oveccount;
191 sljit_u32 options;
192 } jit_arguments;
193
194 #define JIT_NUMBER_OF_COMPILE_MODES 3
195
196 typedef struct executable_functions {
197 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
198 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
199 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
200 sljit_u32 top_bracket;
201 sljit_u32 limit_match;
202 } executable_functions;
203
204 typedef struct jump_list {
205 struct sljit_jump *jump;
206 struct jump_list *next;
207 } jump_list;
208
209 typedef struct stub_list {
210 struct sljit_jump *start;
211 struct sljit_label *quit;
212 struct stub_list *next;
213 } stub_list;
214
215 typedef struct label_addr_list {
216 struct sljit_label *label;
217 sljit_uw *update_addr;
218 struct label_addr_list *next;
219 } label_addr_list;
220
221 enum frame_types {
222 no_frame = -1,
223 no_stack = -2
224 };
225
226 enum control_types {
227 type_mark = 0,
228 type_then_trap = 1
229 };
230
231 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
232
233 /* The following structure is the key data type for the recursive
234 code generator. It is allocated by compile_matchingpath, and contains
235 the arguments for compile_backtrackingpath. Must be the first member
236 of its descendants. */
237 typedef struct backtrack_common {
238 /* Concatenation stack. */
239 struct backtrack_common *prev;
240 jump_list *nextbacktracks;
241 /* Internal stack (for component operators). */
242 struct backtrack_common *top;
243 jump_list *topbacktracks;
244 /* Opcode pointer. */
245 PCRE2_SPTR cc;
246 } backtrack_common;
247
248 typedef struct assert_backtrack {
249 backtrack_common common;
250 jump_list *condfailed;
251 /* Less than 0 if a frame is not needed. */
252 int framesize;
253 /* Points to our private memory word on the stack. */
254 int private_data_ptr;
255 /* For iterators. */
256 struct sljit_label *matchingpath;
257 } assert_backtrack;
258
259 typedef struct bracket_backtrack {
260 backtrack_common common;
261 /* Where to coninue if an alternative is successfully matched. */
262 struct sljit_label *alternative_matchingpath;
263 /* For rmin and rmax iterators. */
264 struct sljit_label *recursive_matchingpath;
265 /* For greedy ? operator. */
266 struct sljit_label *zero_matchingpath;
267 /* Contains the branches of a failed condition. */
268 union {
269 /* Both for OP_COND, OP_SCOND. */
270 jump_list *condfailed;
271 assert_backtrack *assert;
272 /* For OP_ONCE. Less than 0 if not needed. */
273 int framesize;
274 } u;
275 /* Points to our private memory word on the stack. */
276 int private_data_ptr;
277 } bracket_backtrack;
278
279 typedef struct bracketpos_backtrack {
280 backtrack_common common;
281 /* Points to our private memory word on the stack. */
282 int private_data_ptr;
283 /* Reverting stack is needed. */
284 int framesize;
285 /* Allocated stack size. */
286 int stacksize;
287 } bracketpos_backtrack;
288
289 typedef struct braminzero_backtrack {
290 backtrack_common common;
291 struct sljit_label *matchingpath;
292 } braminzero_backtrack;
293
294 typedef struct char_iterator_backtrack {
295 backtrack_common common;
296 /* Next iteration. */
297 struct sljit_label *matchingpath;
298 union {
299 jump_list *backtracks;
300 struct {
301 unsigned int othercasebit;
302 PCRE2_UCHAR chr;
303 BOOL enabled;
304 } charpos;
305 } u;
306 } char_iterator_backtrack;
307
308 typedef struct ref_iterator_backtrack {
309 backtrack_common common;
310 /* Next iteration. */
311 struct sljit_label *matchingpath;
312 } ref_iterator_backtrack;
313
314 typedef struct recurse_entry {
315 struct recurse_entry *next;
316 /* Contains the function entry label. */
317 struct sljit_label *entry_label;
318 /* Contains the function entry label. */
319 struct sljit_label *backtrack_label;
320 /* Collects the entry calls until the function is not created. */
321 jump_list *entry_calls;
322 /* Collects the backtrack calls until the function is not created. */
323 jump_list *backtrack_calls;
324 /* Points to the starting opcode. */
325 sljit_sw start;
326 } recurse_entry;
327
328 typedef struct recurse_backtrack {
329 backtrack_common common;
330 /* Return to the matching path. */
331 struct sljit_label *matchingpath;
332 /* Recursive pattern. */
333 recurse_entry *entry;
334 /* Pattern is inlined. */
335 BOOL inlined_pattern;
336 } recurse_backtrack;
337
338 #define OP_THEN_TRAP OP_TABLE_LENGTH
339
340 typedef struct then_trap_backtrack {
341 backtrack_common common;
342 /* If then_trap is not NULL, this structure contains the real
343 then_trap for the backtracking path. */
344 struct then_trap_backtrack *then_trap;
345 /* Points to the starting opcode. */
346 sljit_sw start;
347 /* Exit point for the then opcodes of this alternative. */
348 jump_list *quit;
349 /* Frame size of the current alternative. */
350 int framesize;
351 } then_trap_backtrack;
352
353 #define MAX_N_CHARS 12
354 #define MAX_DIFF_CHARS 5
355
356 typedef struct fast_forward_char_data {
357 /* Number of characters in the chars array, 255 for any character. */
358 sljit_u8 count;
359 /* Number of last UTF-8 characters in the chars array. */
360 sljit_u8 last_count;
361 /* Available characters in the current position. */
362 PCRE2_UCHAR chars[MAX_DIFF_CHARS];
363 } fast_forward_char_data;
364
365 #define MAX_CLASS_RANGE_SIZE 4
366 #define MAX_CLASS_CHARS_SIZE 3
367
368 typedef struct compiler_common {
369 /* The sljit ceneric compiler. */
370 struct sljit_compiler *compiler;
371 /* Compiled regular expression. */
372 pcre2_real_code *re;
373 /* First byte code. */
374 PCRE2_SPTR start;
375 /* Maps private data offset to each opcode. */
376 sljit_s32 *private_data_ptrs;
377 /* Chain list of read-only data ptrs. */
378 void *read_only_data_head;
379 /* Tells whether the capturing bracket is optimized. */
380 sljit_u8 *optimized_cbracket;
381 /* Tells whether the starting offset is a target of then. */
382 sljit_u8 *then_offsets;
383 /* Current position where a THEN must jump. */
384 then_trap_backtrack *then_trap;
385 /* Starting offset of private data for capturing brackets. */
386 sljit_s32 cbra_ptr;
387 /* Output vector starting point. Must be divisible by 2. */
388 sljit_s32 ovector_start;
389 /* Points to the starting character of the current match. */
390 sljit_s32 start_ptr;
391 /* Last known position of the requested byte. */
392 sljit_s32 req_char_ptr;
393 /* Head of the last recursion. */
394 sljit_s32 recursive_head_ptr;
395 /* First inspected character for partial matching.
396 (Needed for avoiding zero length partial matches.) */
397 sljit_s32 start_used_ptr;
398 /* Starting pointer for partial soft matches. */
399 sljit_s32 hit_start;
400 /* Pointer of the match end position. */
401 sljit_s32 match_end_ptr;
402 /* Points to the marked string. */
403 sljit_s32 mark_ptr;
404 /* Recursive control verb management chain. */
405 sljit_s32 control_head_ptr;
406 /* Points to the last matched capture block index. */
407 sljit_s32 capture_last_ptr;
408 /* Fast forward skipping byte code pointer. */
409 PCRE2_SPTR fast_forward_bc_ptr;
410 /* Locals used by fast fail optimization. */
411 sljit_s32 fast_fail_start_ptr;
412 sljit_s32 fast_fail_end_ptr;
413
414 /* Flipped and lower case tables. */
415 const sljit_u8 *fcc;
416 sljit_sw lcc;
417 /* Mode can be PCRE2_JIT_COMPLETE and others. */
418 int mode;
419 /* TRUE, when minlength is greater than 0. */
420 BOOL might_be_empty;
421 /* \K is found in the pattern. */
422 BOOL has_set_som;
423 /* (*SKIP:arg) is found in the pattern. */
424 BOOL has_skip_arg;
425 /* (*THEN) is found in the pattern. */
426 BOOL has_then;
427 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
428 BOOL has_skip_in_assert_back;
429 /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
430 BOOL local_quit_available;
431 /* Currently in a positive assertion. */
432 BOOL in_positive_assertion;
433 /* Newline control. */
434 int nltype;
435 sljit_u32 nlmax;
436 sljit_u32 nlmin;
437 int newline;
438 int bsr_nltype;
439 sljit_u32 bsr_nlmax;
440 sljit_u32 bsr_nlmin;
441 /* Dollar endonly. */
442 int endonly;
443 /* Tables. */
444 sljit_sw ctypes;
445 /* Named capturing brackets. */
446 PCRE2_SPTR name_table;
447 sljit_sw name_count;
448 sljit_sw name_entry_size;
449
450 /* Labels and jump lists. */
451 struct sljit_label *partialmatchlabel;
452 struct sljit_label *quit_label;
453 struct sljit_label *abort_label;
454 struct sljit_label *accept_label;
455 struct sljit_label *ff_newline_shortcut;
456 stub_list *stubs;
457 label_addr_list *label_addrs;
458 recurse_entry *entries;
459 recurse_entry *currententry;
460 jump_list *partialmatch;
461 jump_list *quit;
462 jump_list *positive_assertion_quit;
463 jump_list *abort;
464 jump_list *failed_match;
465 jump_list *accept;
466 jump_list *calllimit;
467 jump_list *stackalloc;
468 jump_list *revertframes;
469 jump_list *wordboundary;
470 jump_list *anynewline;
471 jump_list *hspace;
472 jump_list *vspace;
473 jump_list *casefulcmp;
474 jump_list *caselesscmp;
475 jump_list *reset_match;
476 BOOL unset_backref;
477 BOOL alt_circumflex;
478 #ifdef SUPPORT_UNICODE
479 BOOL utf;
480 BOOL use_ucp;
481 jump_list *getucd;
482 #if PCRE2_CODE_UNIT_WIDTH == 8
483 jump_list *utfreadchar;
484 jump_list *utfreadchar16;
485 jump_list *utfreadtype8;
486 #endif
487 #endif /* SUPPORT_UNICODE */
488 } compiler_common;
489
490 /* For byte_sequence_compare. */
491
492 typedef struct compare_context {
493 int length;
494 int sourcereg;
495 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
496 int ucharptr;
497 union {
498 sljit_s32 asint;
499 sljit_u16 asushort;
500 #if PCRE2_CODE_UNIT_WIDTH == 8
501 sljit_u8 asbyte;
502 sljit_u8 asuchars[4];
503 #elif PCRE2_CODE_UNIT_WIDTH == 16
504 sljit_u16 asuchars[2];
505 #elif PCRE2_CODE_UNIT_WIDTH == 32
506 sljit_u32 asuchars[1];
507 #endif
508 } c;
509 union {
510 sljit_s32 asint;
511 sljit_u16 asushort;
512 #if PCRE2_CODE_UNIT_WIDTH == 8
513 sljit_u8 asbyte;
514 sljit_u8 asuchars[4];
515 #elif PCRE2_CODE_UNIT_WIDTH == 16
516 sljit_u16 asuchars[2];
517 #elif PCRE2_CODE_UNIT_WIDTH == 32
518 sljit_u32 asuchars[1];
519 #endif
520 } oc;
521 #endif
522 } compare_context;
523
524 /* Undefine sljit macros. */
525 #undef CMP
526
527 /* Used for accessing the elements of the stack. */
528 #define STACK(i) ((i) * (int)sizeof(sljit_sw))
529
530 #ifdef SLJIT_PREF_SHIFT_REG
531 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
532 /* Nothing. */
533 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
534 #define SHIFT_REG_IS_R3
535 #else
536 #error "Unsupported shift register"
537 #endif
538 #endif
539
540 #define TMP1 SLJIT_R0
541 #ifdef SHIFT_REG_IS_R3
542 #define TMP2 SLJIT_R3
543 #define TMP3 SLJIT_R2
544 #else
545 #define TMP2 SLJIT_R2
546 #define TMP3 SLJIT_R3
547 #endif
548 #define STR_PTR SLJIT_R1
549 #define STR_END SLJIT_S0
550 #define STACK_TOP SLJIT_S1
551 #define STACK_LIMIT SLJIT_S2
552 #define COUNT_MATCH SLJIT_S3
553 #define ARGUMENTS SLJIT_S4
554 #define RETURN_ADDR SLJIT_R4
555
556 /* Local space layout. */
557 /* These two locals can be used by the current opcode. */
558 #define LOCALS0 (0 * sizeof(sljit_sw))
559 #define LOCALS1 (1 * sizeof(sljit_sw))
560 /* Two local variables for possessive quantifiers (char1 cannot use them). */
561 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
562 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
563 /* Max limit of recursions. */
564 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
565 /* The output vector is stored on the stack, and contains pointers
566 to characters. The vector data is divided into two groups: the first
567 group contains the start / end character pointers, and the second is
568 the start pointers when the end of the capturing group has not yet reached. */
569 #define OVECTOR_START (common->ovector_start)
570 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
571 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
572 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
573
574 #if PCRE2_CODE_UNIT_WIDTH == 8
575 #define MOV_UCHAR SLJIT_MOV_U8
576 #define IN_UCHARS(x) (x)
577 #elif PCRE2_CODE_UNIT_WIDTH == 16
578 #define MOV_UCHAR SLJIT_MOV_U16
579 #define UCHAR_SHIFT (1)
580 #define IN_UCHARS(x) ((x) * 2)
581 #elif PCRE2_CODE_UNIT_WIDTH == 32
582 #define MOV_UCHAR SLJIT_MOV_U32
583 #define UCHAR_SHIFT (2)
584 #define IN_UCHARS(x) ((x) * 4)
585 #else
586 #error Unsupported compiling mode
587 #endif
588
589 /* Shortcuts. */
590 #define DEFINE_COMPILER \
591 struct sljit_compiler *compiler = common->compiler
592 #define OP1(op, dst, dstw, src, srcw) \
593 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
594 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
595 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
596 #define LABEL() \
597 sljit_emit_label(compiler)
598 #define JUMP(type) \
599 sljit_emit_jump(compiler, (type))
600 #define JUMPTO(type, label) \
601 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
602 #define JUMPHERE(jump) \
603 sljit_set_label((jump), sljit_emit_label(compiler))
604 #define SET_LABEL(jump, label) \
605 sljit_set_label((jump), (label))
606 #define CMP(type, src1, src1w, src2, src2w) \
607 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
608 #define CMPTO(type, src1, src1w, src2, src2w, label) \
609 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
610 #define OP_FLAGS(op, dst, dstw, type) \
611 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
612 #define CMOV(type, dst_reg, src, srcw) \
613 sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
614 #define GET_LOCAL_BASE(dst, dstw, offset) \
615 sljit_get_local_base(compiler, (dst), (dstw), (offset))
616
617 #define READ_CHAR_MAX 0x7fffffff
618
619 #define INVALID_UTF_CHAR 888
620
bracketend(PCRE2_SPTR cc)621 static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
622 {
623 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
624 do cc += GET(cc, 1); while (*cc == OP_ALT);
625 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
626 cc += 1 + LINK_SIZE;
627 return cc;
628 }
629
no_alternatives(PCRE2_SPTR cc)630 static int no_alternatives(PCRE2_SPTR cc)
631 {
632 int count = 0;
633 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
634 do
635 {
636 cc += GET(cc, 1);
637 count++;
638 }
639 while (*cc == OP_ALT);
640 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
641 return count;
642 }
643
644 /* Functions whose might need modification for all new supported opcodes:
645 next_opcode
646 check_opcode_types
647 set_private_data_ptrs
648 get_framesize
649 init_frame
650 get_recurse_data_length
651 copy_recurse_data
652 compile_matchingpath
653 compile_backtrackingpath
654 */
655
next_opcode(compiler_common * common,PCRE2_SPTR cc)656 static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
657 {
658 SLJIT_UNUSED_ARG(common);
659 switch(*cc)
660 {
661 case OP_SOD:
662 case OP_SOM:
663 case OP_SET_SOM:
664 case OP_NOT_WORD_BOUNDARY:
665 case OP_WORD_BOUNDARY:
666 case OP_NOT_DIGIT:
667 case OP_DIGIT:
668 case OP_NOT_WHITESPACE:
669 case OP_WHITESPACE:
670 case OP_NOT_WORDCHAR:
671 case OP_WORDCHAR:
672 case OP_ANY:
673 case OP_ALLANY:
674 case OP_NOTPROP:
675 case OP_PROP:
676 case OP_ANYNL:
677 case OP_NOT_HSPACE:
678 case OP_HSPACE:
679 case OP_NOT_VSPACE:
680 case OP_VSPACE:
681 case OP_EXTUNI:
682 case OP_EODN:
683 case OP_EOD:
684 case OP_CIRC:
685 case OP_CIRCM:
686 case OP_DOLL:
687 case OP_DOLLM:
688 case OP_CRSTAR:
689 case OP_CRMINSTAR:
690 case OP_CRPLUS:
691 case OP_CRMINPLUS:
692 case OP_CRQUERY:
693 case OP_CRMINQUERY:
694 case OP_CRRANGE:
695 case OP_CRMINRANGE:
696 case OP_CRPOSSTAR:
697 case OP_CRPOSPLUS:
698 case OP_CRPOSQUERY:
699 case OP_CRPOSRANGE:
700 case OP_CLASS:
701 case OP_NCLASS:
702 case OP_REF:
703 case OP_REFI:
704 case OP_DNREF:
705 case OP_DNREFI:
706 case OP_RECURSE:
707 case OP_CALLOUT:
708 case OP_ALT:
709 case OP_KET:
710 case OP_KETRMAX:
711 case OP_KETRMIN:
712 case OP_KETRPOS:
713 case OP_REVERSE:
714 case OP_ASSERT:
715 case OP_ASSERT_NOT:
716 case OP_ASSERTBACK:
717 case OP_ASSERTBACK_NOT:
718 case OP_ONCE:
719 case OP_BRA:
720 case OP_BRAPOS:
721 case OP_CBRA:
722 case OP_CBRAPOS:
723 case OP_COND:
724 case OP_SBRA:
725 case OP_SBRAPOS:
726 case OP_SCBRA:
727 case OP_SCBRAPOS:
728 case OP_SCOND:
729 case OP_CREF:
730 case OP_DNCREF:
731 case OP_RREF:
732 case OP_DNRREF:
733 case OP_FALSE:
734 case OP_TRUE:
735 case OP_BRAZERO:
736 case OP_BRAMINZERO:
737 case OP_BRAPOSZERO:
738 case OP_PRUNE:
739 case OP_SKIP:
740 case OP_THEN:
741 case OP_COMMIT:
742 case OP_FAIL:
743 case OP_ACCEPT:
744 case OP_ASSERT_ACCEPT:
745 case OP_CLOSE:
746 case OP_SKIPZERO:
747 return cc + PRIV(OP_lengths)[*cc];
748
749 case OP_CHAR:
750 case OP_CHARI:
751 case OP_NOT:
752 case OP_NOTI:
753 case OP_STAR:
754 case OP_MINSTAR:
755 case OP_PLUS:
756 case OP_MINPLUS:
757 case OP_QUERY:
758 case OP_MINQUERY:
759 case OP_UPTO:
760 case OP_MINUPTO:
761 case OP_EXACT:
762 case OP_POSSTAR:
763 case OP_POSPLUS:
764 case OP_POSQUERY:
765 case OP_POSUPTO:
766 case OP_STARI:
767 case OP_MINSTARI:
768 case OP_PLUSI:
769 case OP_MINPLUSI:
770 case OP_QUERYI:
771 case OP_MINQUERYI:
772 case OP_UPTOI:
773 case OP_MINUPTOI:
774 case OP_EXACTI:
775 case OP_POSSTARI:
776 case OP_POSPLUSI:
777 case OP_POSQUERYI:
778 case OP_POSUPTOI:
779 case OP_NOTSTAR:
780 case OP_NOTMINSTAR:
781 case OP_NOTPLUS:
782 case OP_NOTMINPLUS:
783 case OP_NOTQUERY:
784 case OP_NOTMINQUERY:
785 case OP_NOTUPTO:
786 case OP_NOTMINUPTO:
787 case OP_NOTEXACT:
788 case OP_NOTPOSSTAR:
789 case OP_NOTPOSPLUS:
790 case OP_NOTPOSQUERY:
791 case OP_NOTPOSUPTO:
792 case OP_NOTSTARI:
793 case OP_NOTMINSTARI:
794 case OP_NOTPLUSI:
795 case OP_NOTMINPLUSI:
796 case OP_NOTQUERYI:
797 case OP_NOTMINQUERYI:
798 case OP_NOTUPTOI:
799 case OP_NOTMINUPTOI:
800 case OP_NOTEXACTI:
801 case OP_NOTPOSSTARI:
802 case OP_NOTPOSPLUSI:
803 case OP_NOTPOSQUERYI:
804 case OP_NOTPOSUPTOI:
805 cc += PRIV(OP_lengths)[*cc];
806 #ifdef SUPPORT_UNICODE
807 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
808 #endif
809 return cc;
810
811 /* Special cases. */
812 case OP_TYPESTAR:
813 case OP_TYPEMINSTAR:
814 case OP_TYPEPLUS:
815 case OP_TYPEMINPLUS:
816 case OP_TYPEQUERY:
817 case OP_TYPEMINQUERY:
818 case OP_TYPEUPTO:
819 case OP_TYPEMINUPTO:
820 case OP_TYPEEXACT:
821 case OP_TYPEPOSSTAR:
822 case OP_TYPEPOSPLUS:
823 case OP_TYPEPOSQUERY:
824 case OP_TYPEPOSUPTO:
825 return cc + PRIV(OP_lengths)[*cc] - 1;
826
827 case OP_ANYBYTE:
828 #ifdef SUPPORT_UNICODE
829 if (common->utf) return NULL;
830 #endif
831 return cc + 1;
832
833 case OP_CALLOUT_STR:
834 return cc + GET(cc, 1 + 2*LINK_SIZE);
835
836 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
837 case OP_XCLASS:
838 return cc + GET(cc, 1);
839 #endif
840
841 case OP_MARK:
842 case OP_COMMIT_ARG:
843 case OP_PRUNE_ARG:
844 case OP_SKIP_ARG:
845 case OP_THEN_ARG:
846 return cc + 1 + 2 + cc[1];
847
848 default:
849 /* All opcodes are supported now! */
850 SLJIT_UNREACHABLE();
851 return NULL;
852 }
853 }
854
check_opcode_types(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend)855 static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
856 {
857 int count;
858 PCRE2_SPTR slot;
859 PCRE2_SPTR assert_back_end = cc - 1;
860
861 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
862 while (cc < ccend)
863 {
864 switch(*cc)
865 {
866 case OP_SET_SOM:
867 common->has_set_som = TRUE;
868 common->might_be_empty = TRUE;
869 cc += 1;
870 break;
871
872 case OP_REF:
873 case OP_REFI:
874 common->optimized_cbracket[GET2(cc, 1)] = 0;
875 cc += 1 + IMM2_SIZE;
876 break;
877
878 case OP_CBRAPOS:
879 case OP_SCBRAPOS:
880 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
881 cc += 1 + LINK_SIZE + IMM2_SIZE;
882 break;
883
884 case OP_COND:
885 case OP_SCOND:
886 /* Only AUTO_CALLOUT can insert this opcode. We do
887 not intend to support this case. */
888 if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
889 return FALSE;
890 cc += 1 + LINK_SIZE;
891 break;
892
893 case OP_CREF:
894 common->optimized_cbracket[GET2(cc, 1)] = 0;
895 cc += 1 + IMM2_SIZE;
896 break;
897
898 case OP_DNREF:
899 case OP_DNREFI:
900 case OP_DNCREF:
901 count = GET2(cc, 1 + IMM2_SIZE);
902 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
903 while (count-- > 0)
904 {
905 common->optimized_cbracket[GET2(slot, 0)] = 0;
906 slot += common->name_entry_size;
907 }
908 cc += 1 + 2 * IMM2_SIZE;
909 break;
910
911 case OP_RECURSE:
912 /* Set its value only once. */
913 if (common->recursive_head_ptr == 0)
914 {
915 common->recursive_head_ptr = common->ovector_start;
916 common->ovector_start += sizeof(sljit_sw);
917 }
918 cc += 1 + LINK_SIZE;
919 break;
920
921 case OP_CALLOUT:
922 case OP_CALLOUT_STR:
923 if (common->capture_last_ptr == 0)
924 {
925 common->capture_last_ptr = common->ovector_start;
926 common->ovector_start += sizeof(sljit_sw);
927 }
928 cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
929 break;
930
931 case OP_ASSERTBACK:
932 slot = bracketend(cc);
933 if (slot > assert_back_end)
934 assert_back_end = slot;
935 cc += 1 + LINK_SIZE;
936 break;
937
938 case OP_THEN_ARG:
939 common->has_then = TRUE;
940 common->control_head_ptr = 1;
941 /* Fall through. */
942
943 case OP_COMMIT_ARG:
944 case OP_PRUNE_ARG:
945 case OP_MARK:
946 if (common->mark_ptr == 0)
947 {
948 common->mark_ptr = common->ovector_start;
949 common->ovector_start += sizeof(sljit_sw);
950 }
951 cc += 1 + 2 + cc[1];
952 break;
953
954 case OP_THEN:
955 common->has_then = TRUE;
956 common->control_head_ptr = 1;
957 cc += 1;
958 break;
959
960 case OP_SKIP:
961 if (cc < assert_back_end)
962 common->has_skip_in_assert_back = TRUE;
963 cc += 1;
964 break;
965
966 case OP_SKIP_ARG:
967 common->control_head_ptr = 1;
968 common->has_skip_arg = TRUE;
969 if (cc < assert_back_end)
970 common->has_skip_in_assert_back = TRUE;
971 cc += 1 + 2 + cc[1];
972 break;
973
974 default:
975 cc = next_opcode(common, cc);
976 if (cc == NULL)
977 return FALSE;
978 break;
979 }
980 }
981 return TRUE;
982 }
983
is_accelerated_repeat(PCRE2_SPTR cc)984 static BOOL is_accelerated_repeat(PCRE2_SPTR cc)
985 {
986 switch(*cc)
987 {
988 case OP_TYPESTAR:
989 case OP_TYPEMINSTAR:
990 case OP_TYPEPLUS:
991 case OP_TYPEMINPLUS:
992 case OP_TYPEPOSSTAR:
993 case OP_TYPEPOSPLUS:
994 return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
995
996 case OP_STAR:
997 case OP_MINSTAR:
998 case OP_PLUS:
999 case OP_MINPLUS:
1000 case OP_POSSTAR:
1001 case OP_POSPLUS:
1002
1003 case OP_STARI:
1004 case OP_MINSTARI:
1005 case OP_PLUSI:
1006 case OP_MINPLUSI:
1007 case OP_POSSTARI:
1008 case OP_POSPLUSI:
1009
1010 case OP_NOTSTAR:
1011 case OP_NOTMINSTAR:
1012 case OP_NOTPLUS:
1013 case OP_NOTMINPLUS:
1014 case OP_NOTPOSSTAR:
1015 case OP_NOTPOSPLUS:
1016
1017 case OP_NOTSTARI:
1018 case OP_NOTMINSTARI:
1019 case OP_NOTPLUSI:
1020 case OP_NOTMINPLUSI:
1021 case OP_NOTPOSSTARI:
1022 case OP_NOTPOSPLUSI:
1023 return TRUE;
1024
1025 case OP_CLASS:
1026 case OP_NCLASS:
1027 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1028 case OP_XCLASS:
1029 cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(PCRE2_UCHAR)));
1030 #else
1031 cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1032 #endif
1033
1034 switch(*cc)
1035 {
1036 case OP_CRSTAR:
1037 case OP_CRMINSTAR:
1038 case OP_CRPLUS:
1039 case OP_CRMINPLUS:
1040 case OP_CRPOSSTAR:
1041 case OP_CRPOSPLUS:
1042 return TRUE;
1043 }
1044 break;
1045 }
1046 return FALSE;
1047 }
1048
detect_fast_forward_skip(compiler_common * common,int * private_data_start)1049 static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
1050 {
1051 PCRE2_SPTR cc = common->start;
1052 PCRE2_SPTR end;
1053
1054 /* Skip not repeated brackets. */
1055 while (TRUE)
1056 {
1057 switch(*cc)
1058 {
1059 case OP_SOD:
1060 case OP_SOM:
1061 case OP_SET_SOM:
1062 case OP_NOT_WORD_BOUNDARY:
1063 case OP_WORD_BOUNDARY:
1064 case OP_EODN:
1065 case OP_EOD:
1066 case OP_CIRC:
1067 case OP_CIRCM:
1068 case OP_DOLL:
1069 case OP_DOLLM:
1070 /* Zero width assertions. */
1071 cc++;
1072 continue;
1073 }
1074
1075 if (*cc != OP_BRA && *cc != OP_CBRA)
1076 break;
1077
1078 end = cc + GET(cc, 1);
1079 if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1080 return FALSE;
1081 if (*cc == OP_CBRA)
1082 {
1083 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1084 return FALSE;
1085 cc += IMM2_SIZE;
1086 }
1087 cc += 1 + LINK_SIZE;
1088 }
1089
1090 if (is_accelerated_repeat(cc))
1091 {
1092 common->fast_forward_bc_ptr = cc;
1093 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1094 *private_data_start += sizeof(sljit_sw);
1095 return TRUE;
1096 }
1097 return FALSE;
1098 }
1099
detect_fast_fail(compiler_common * common,PCRE2_SPTR cc,int * private_data_start,sljit_s32 depth)1100 static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth)
1101 {
1102 PCRE2_SPTR next_alt;
1103
1104 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
1105
1106 if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1107 return;
1108
1109 next_alt = bracketend(cc) - (1 + LINK_SIZE);
1110 if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1111 return;
1112
1113 do
1114 {
1115 next_alt = cc + GET(cc, 1);
1116
1117 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1118
1119 while (TRUE)
1120 {
1121 switch(*cc)
1122 {
1123 case OP_SOD:
1124 case OP_SOM:
1125 case OP_SET_SOM:
1126 case OP_NOT_WORD_BOUNDARY:
1127 case OP_WORD_BOUNDARY:
1128 case OP_EODN:
1129 case OP_EOD:
1130 case OP_CIRC:
1131 case OP_CIRCM:
1132 case OP_DOLL:
1133 case OP_DOLLM:
1134 /* Zero width assertions. */
1135 cc++;
1136 continue;
1137 }
1138 break;
1139 }
1140
1141 if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1142 detect_fast_fail(common, cc, private_data_start, depth - 1);
1143
1144 if (is_accelerated_repeat(cc))
1145 {
1146 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1147
1148 if (common->fast_fail_start_ptr == 0)
1149 common->fast_fail_start_ptr = *private_data_start;
1150
1151 *private_data_start += sizeof(sljit_sw);
1152 common->fast_fail_end_ptr = *private_data_start;
1153
1154 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1155 return;
1156 }
1157
1158 cc = next_alt;
1159 }
1160 while (*cc == OP_ALT);
1161 }
1162
get_class_iterator_size(PCRE2_SPTR cc)1163 static int get_class_iterator_size(PCRE2_SPTR cc)
1164 {
1165 sljit_u32 min;
1166 sljit_u32 max;
1167 switch(*cc)
1168 {
1169 case OP_CRSTAR:
1170 case OP_CRPLUS:
1171 return 2;
1172
1173 case OP_CRMINSTAR:
1174 case OP_CRMINPLUS:
1175 case OP_CRQUERY:
1176 case OP_CRMINQUERY:
1177 return 1;
1178
1179 case OP_CRRANGE:
1180 case OP_CRMINRANGE:
1181 min = GET2(cc, 1);
1182 max = GET2(cc, 1 + IMM2_SIZE);
1183 if (max == 0)
1184 return (*cc == OP_CRRANGE) ? 2 : 1;
1185 max -= min;
1186 if (max > 2)
1187 max = 2;
1188 return max;
1189
1190 default:
1191 return 0;
1192 }
1193 }
1194
detect_repeat(compiler_common * common,PCRE2_SPTR begin)1195 static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1196 {
1197 PCRE2_SPTR end = bracketend(begin);
1198 PCRE2_SPTR next;
1199 PCRE2_SPTR next_end;
1200 PCRE2_SPTR max_end;
1201 PCRE2_UCHAR type;
1202 sljit_sw length = end - begin;
1203 sljit_s32 min, max, i;
1204
1205 /* Detect fixed iterations first. */
1206 if (end[-(1 + LINK_SIZE)] != OP_KET)
1207 return FALSE;
1208
1209 /* Already detected repeat. */
1210 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1211 return TRUE;
1212
1213 next = end;
1214 min = 1;
1215 while (1)
1216 {
1217 if (*next != *begin)
1218 break;
1219 next_end = bracketend(next);
1220 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1221 break;
1222 next = next_end;
1223 min++;
1224 }
1225
1226 if (min == 2)
1227 return FALSE;
1228
1229 max = 0;
1230 max_end = next;
1231 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1232 {
1233 type = *next;
1234 while (1)
1235 {
1236 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1237 break;
1238 next_end = bracketend(next + 2 + LINK_SIZE);
1239 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1240 break;
1241 next = next_end;
1242 max++;
1243 }
1244
1245 if (next[0] == type && next[1] == *begin && max >= 1)
1246 {
1247 next_end = bracketend(next + 1);
1248 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1249 {
1250 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1251 if (*next_end != OP_KET)
1252 break;
1253
1254 if (i == max)
1255 {
1256 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1257 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1258 /* +2 the original and the last. */
1259 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1260 if (min == 1)
1261 return TRUE;
1262 min--;
1263 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1264 }
1265 }
1266 }
1267 }
1268
1269 if (min >= 3)
1270 {
1271 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1272 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1273 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1274 return TRUE;
1275 }
1276
1277 return FALSE;
1278 }
1279
1280 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1281 case OP_MINSTAR: \
1282 case OP_MINPLUS: \
1283 case OP_QUERY: \
1284 case OP_MINQUERY: \
1285 case OP_MINSTARI: \
1286 case OP_MINPLUSI: \
1287 case OP_QUERYI: \
1288 case OP_MINQUERYI: \
1289 case OP_NOTMINSTAR: \
1290 case OP_NOTMINPLUS: \
1291 case OP_NOTQUERY: \
1292 case OP_NOTMINQUERY: \
1293 case OP_NOTMINSTARI: \
1294 case OP_NOTMINPLUSI: \
1295 case OP_NOTQUERYI: \
1296 case OP_NOTMINQUERYI:
1297
1298 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1299 case OP_STAR: \
1300 case OP_PLUS: \
1301 case OP_STARI: \
1302 case OP_PLUSI: \
1303 case OP_NOTSTAR: \
1304 case OP_NOTPLUS: \
1305 case OP_NOTSTARI: \
1306 case OP_NOTPLUSI:
1307
1308 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1309 case OP_UPTO: \
1310 case OP_MINUPTO: \
1311 case OP_UPTOI: \
1312 case OP_MINUPTOI: \
1313 case OP_NOTUPTO: \
1314 case OP_NOTMINUPTO: \
1315 case OP_NOTUPTOI: \
1316 case OP_NOTMINUPTOI:
1317
1318 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1319 case OP_TYPEMINSTAR: \
1320 case OP_TYPEMINPLUS: \
1321 case OP_TYPEQUERY: \
1322 case OP_TYPEMINQUERY:
1323
1324 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1325 case OP_TYPESTAR: \
1326 case OP_TYPEPLUS:
1327
1328 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1329 case OP_TYPEUPTO: \
1330 case OP_TYPEMINUPTO:
1331
set_private_data_ptrs(compiler_common * common,int * private_data_start,PCRE2_SPTR ccend)1332 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1333 {
1334 PCRE2_SPTR cc = common->start;
1335 PCRE2_SPTR alternative;
1336 PCRE2_SPTR end = NULL;
1337 int private_data_ptr = *private_data_start;
1338 int space, size, bracketlen;
1339 BOOL repeat_check = TRUE;
1340
1341 while (cc < ccend)
1342 {
1343 space = 0;
1344 size = 0;
1345 bracketlen = 0;
1346 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1347 break;
1348
1349 if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1350 {
1351 if (detect_repeat(common, cc))
1352 {
1353 /* These brackets are converted to repeats, so no global
1354 based single character repeat is allowed. */
1355 if (cc >= end)
1356 end = bracketend(cc);
1357 }
1358 }
1359 repeat_check = TRUE;
1360
1361 switch(*cc)
1362 {
1363 case OP_KET:
1364 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1365 {
1366 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1367 private_data_ptr += sizeof(sljit_sw);
1368 cc += common->private_data_ptrs[cc + 1 - common->start];
1369 }
1370 cc += 1 + LINK_SIZE;
1371 break;
1372
1373 case OP_ASSERT:
1374 case OP_ASSERT_NOT:
1375 case OP_ASSERTBACK:
1376 case OP_ASSERTBACK_NOT:
1377 case OP_ONCE:
1378 case OP_BRAPOS:
1379 case OP_SBRA:
1380 case OP_SBRAPOS:
1381 case OP_SCOND:
1382 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1383 private_data_ptr += sizeof(sljit_sw);
1384 bracketlen = 1 + LINK_SIZE;
1385 break;
1386
1387 case OP_CBRAPOS:
1388 case OP_SCBRAPOS:
1389 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1390 private_data_ptr += sizeof(sljit_sw);
1391 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1392 break;
1393
1394 case OP_COND:
1395 /* Might be a hidden SCOND. */
1396 alternative = cc + GET(cc, 1);
1397 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1398 {
1399 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1400 private_data_ptr += sizeof(sljit_sw);
1401 }
1402 bracketlen = 1 + LINK_SIZE;
1403 break;
1404
1405 case OP_BRA:
1406 bracketlen = 1 + LINK_SIZE;
1407 break;
1408
1409 case OP_CBRA:
1410 case OP_SCBRA:
1411 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1412 break;
1413
1414 case OP_BRAZERO:
1415 case OP_BRAMINZERO:
1416 case OP_BRAPOSZERO:
1417 repeat_check = FALSE;
1418 size = 1;
1419 break;
1420
1421 CASE_ITERATOR_PRIVATE_DATA_1
1422 space = 1;
1423 size = -2;
1424 break;
1425
1426 CASE_ITERATOR_PRIVATE_DATA_2A
1427 space = 2;
1428 size = -2;
1429 break;
1430
1431 CASE_ITERATOR_PRIVATE_DATA_2B
1432 space = 2;
1433 size = -(2 + IMM2_SIZE);
1434 break;
1435
1436 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1437 space = 1;
1438 size = 1;
1439 break;
1440
1441 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1442 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1443 space = 2;
1444 size = 1;
1445 break;
1446
1447 case OP_TYPEUPTO:
1448 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1449 space = 2;
1450 size = 1 + IMM2_SIZE;
1451 break;
1452
1453 case OP_TYPEMINUPTO:
1454 space = 2;
1455 size = 1 + IMM2_SIZE;
1456 break;
1457
1458 case OP_CLASS:
1459 case OP_NCLASS:
1460 space = get_class_iterator_size(cc + size);
1461 size = 1 + 32 / sizeof(PCRE2_UCHAR);
1462 break;
1463
1464 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1465 case OP_XCLASS:
1466 space = get_class_iterator_size(cc + size);
1467 size = GET(cc, 1);
1468 break;
1469 #endif
1470
1471 default:
1472 cc = next_opcode(common, cc);
1473 SLJIT_ASSERT(cc != NULL);
1474 break;
1475 }
1476
1477 /* Character iterators, which are not inside a repeated bracket,
1478 gets a private slot instead of allocating it on the stack. */
1479 if (space > 0 && cc >= end)
1480 {
1481 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1482 private_data_ptr += sizeof(sljit_sw) * space;
1483 }
1484
1485 if (size != 0)
1486 {
1487 if (size < 0)
1488 {
1489 cc += -size;
1490 #ifdef SUPPORT_UNICODE
1491 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1492 #endif
1493 }
1494 else
1495 cc += size;
1496 }
1497
1498 if (bracketlen > 0)
1499 {
1500 if (cc >= end)
1501 {
1502 end = bracketend(cc);
1503 if (end[-1 - LINK_SIZE] == OP_KET)
1504 end = NULL;
1505 }
1506 cc += bracketlen;
1507 }
1508 }
1509 *private_data_start = private_data_ptr;
1510 }
1511
1512 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL recursive,BOOL * needs_control_head)1513 static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
1514 {
1515 int length = 0;
1516 int possessive = 0;
1517 BOOL stack_restore = FALSE;
1518 BOOL setsom_found = recursive;
1519 BOOL setmark_found = recursive;
1520 /* The last capture is a local variable even for recursions. */
1521 BOOL capture_last_found = FALSE;
1522
1523 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1524 SLJIT_ASSERT(common->control_head_ptr != 0);
1525 *needs_control_head = TRUE;
1526 #else
1527 *needs_control_head = FALSE;
1528 #endif
1529
1530 if (ccend == NULL)
1531 {
1532 ccend = bracketend(cc) - (1 + LINK_SIZE);
1533 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1534 {
1535 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1536 /* This is correct regardless of common->capture_last_ptr. */
1537 capture_last_found = TRUE;
1538 }
1539 cc = next_opcode(common, cc);
1540 }
1541
1542 SLJIT_ASSERT(cc != NULL);
1543 while (cc < ccend)
1544 switch(*cc)
1545 {
1546 case OP_SET_SOM:
1547 SLJIT_ASSERT(common->has_set_som);
1548 stack_restore = TRUE;
1549 if (!setsom_found)
1550 {
1551 length += 2;
1552 setsom_found = TRUE;
1553 }
1554 cc += 1;
1555 break;
1556
1557 case OP_MARK:
1558 case OP_COMMIT_ARG:
1559 case OP_PRUNE_ARG:
1560 case OP_THEN_ARG:
1561 SLJIT_ASSERT(common->mark_ptr != 0);
1562 stack_restore = TRUE;
1563 if (!setmark_found)
1564 {
1565 length += 2;
1566 setmark_found = TRUE;
1567 }
1568 if (common->control_head_ptr != 0)
1569 *needs_control_head = TRUE;
1570 cc += 1 + 2 + cc[1];
1571 break;
1572
1573 case OP_RECURSE:
1574 stack_restore = TRUE;
1575 if (common->has_set_som && !setsom_found)
1576 {
1577 length += 2;
1578 setsom_found = TRUE;
1579 }
1580 if (common->mark_ptr != 0 && !setmark_found)
1581 {
1582 length += 2;
1583 setmark_found = TRUE;
1584 }
1585 if (common->capture_last_ptr != 0 && !capture_last_found)
1586 {
1587 length += 2;
1588 capture_last_found = TRUE;
1589 }
1590 cc += 1 + LINK_SIZE;
1591 break;
1592
1593 case OP_CBRA:
1594 case OP_CBRAPOS:
1595 case OP_SCBRA:
1596 case OP_SCBRAPOS:
1597 stack_restore = TRUE;
1598 if (common->capture_last_ptr != 0 && !capture_last_found)
1599 {
1600 length += 2;
1601 capture_last_found = TRUE;
1602 }
1603 length += 3;
1604 cc += 1 + LINK_SIZE + IMM2_SIZE;
1605 break;
1606
1607 case OP_THEN:
1608 stack_restore = TRUE;
1609 if (common->control_head_ptr != 0)
1610 *needs_control_head = TRUE;
1611 cc ++;
1612 break;
1613
1614 default:
1615 stack_restore = TRUE;
1616 /* Fall through. */
1617
1618 case OP_NOT_WORD_BOUNDARY:
1619 case OP_WORD_BOUNDARY:
1620 case OP_NOT_DIGIT:
1621 case OP_DIGIT:
1622 case OP_NOT_WHITESPACE:
1623 case OP_WHITESPACE:
1624 case OP_NOT_WORDCHAR:
1625 case OP_WORDCHAR:
1626 case OP_ANY:
1627 case OP_ALLANY:
1628 case OP_ANYBYTE:
1629 case OP_NOTPROP:
1630 case OP_PROP:
1631 case OP_ANYNL:
1632 case OP_NOT_HSPACE:
1633 case OP_HSPACE:
1634 case OP_NOT_VSPACE:
1635 case OP_VSPACE:
1636 case OP_EXTUNI:
1637 case OP_EODN:
1638 case OP_EOD:
1639 case OP_CIRC:
1640 case OP_CIRCM:
1641 case OP_DOLL:
1642 case OP_DOLLM:
1643 case OP_CHAR:
1644 case OP_CHARI:
1645 case OP_NOT:
1646 case OP_NOTI:
1647
1648 case OP_EXACT:
1649 case OP_POSSTAR:
1650 case OP_POSPLUS:
1651 case OP_POSQUERY:
1652 case OP_POSUPTO:
1653
1654 case OP_EXACTI:
1655 case OP_POSSTARI:
1656 case OP_POSPLUSI:
1657 case OP_POSQUERYI:
1658 case OP_POSUPTOI:
1659
1660 case OP_NOTEXACT:
1661 case OP_NOTPOSSTAR:
1662 case OP_NOTPOSPLUS:
1663 case OP_NOTPOSQUERY:
1664 case OP_NOTPOSUPTO:
1665
1666 case OP_NOTEXACTI:
1667 case OP_NOTPOSSTARI:
1668 case OP_NOTPOSPLUSI:
1669 case OP_NOTPOSQUERYI:
1670 case OP_NOTPOSUPTOI:
1671
1672 case OP_TYPEEXACT:
1673 case OP_TYPEPOSSTAR:
1674 case OP_TYPEPOSPLUS:
1675 case OP_TYPEPOSQUERY:
1676 case OP_TYPEPOSUPTO:
1677
1678 case OP_CLASS:
1679 case OP_NCLASS:
1680 case OP_XCLASS:
1681
1682 case OP_CALLOUT:
1683 case OP_CALLOUT_STR:
1684
1685 cc = next_opcode(common, cc);
1686 SLJIT_ASSERT(cc != NULL);
1687 break;
1688 }
1689
1690 /* Possessive quantifiers can use a special case. */
1691 if (SLJIT_UNLIKELY(possessive == length))
1692 return stack_restore ? no_frame : no_stack;
1693
1694 if (length > 0)
1695 return length + 1;
1696 return stack_restore ? no_frame : no_stack;
1697 }
1698
init_frame(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int stackpos,int stacktop)1699 static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
1700 {
1701 DEFINE_COMPILER;
1702 BOOL setsom_found = FALSE;
1703 BOOL setmark_found = FALSE;
1704 /* The last capture is a local variable even for recursions. */
1705 BOOL capture_last_found = FALSE;
1706 int offset;
1707
1708 /* >= 1 + shortest item size (2) */
1709 SLJIT_UNUSED_ARG(stacktop);
1710 SLJIT_ASSERT(stackpos >= stacktop + 2);
1711
1712 stackpos = STACK(stackpos);
1713 if (ccend == NULL)
1714 {
1715 ccend = bracketend(cc) - (1 + LINK_SIZE);
1716 if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
1717 cc = next_opcode(common, cc);
1718 }
1719
1720 SLJIT_ASSERT(cc != NULL);
1721 while (cc < ccend)
1722 switch(*cc)
1723 {
1724 case OP_SET_SOM:
1725 SLJIT_ASSERT(common->has_set_som);
1726 if (!setsom_found)
1727 {
1728 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1729 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1730 stackpos -= (int)sizeof(sljit_sw);
1731 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1732 stackpos -= (int)sizeof(sljit_sw);
1733 setsom_found = TRUE;
1734 }
1735 cc += 1;
1736 break;
1737
1738 case OP_MARK:
1739 case OP_COMMIT_ARG:
1740 case OP_PRUNE_ARG:
1741 case OP_THEN_ARG:
1742 SLJIT_ASSERT(common->mark_ptr != 0);
1743 if (!setmark_found)
1744 {
1745 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1746 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1747 stackpos -= (int)sizeof(sljit_sw);
1748 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1749 stackpos -= (int)sizeof(sljit_sw);
1750 setmark_found = TRUE;
1751 }
1752 cc += 1 + 2 + cc[1];
1753 break;
1754
1755 case OP_RECURSE:
1756 if (common->has_set_som && !setsom_found)
1757 {
1758 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1759 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1760 stackpos -= (int)sizeof(sljit_sw);
1761 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1762 stackpos -= (int)sizeof(sljit_sw);
1763 setsom_found = TRUE;
1764 }
1765 if (common->mark_ptr != 0 && !setmark_found)
1766 {
1767 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1768 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1769 stackpos -= (int)sizeof(sljit_sw);
1770 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1771 stackpos -= (int)sizeof(sljit_sw);
1772 setmark_found = TRUE;
1773 }
1774 if (common->capture_last_ptr != 0 && !capture_last_found)
1775 {
1776 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1777 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1778 stackpos -= (int)sizeof(sljit_sw);
1779 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1780 stackpos -= (int)sizeof(sljit_sw);
1781 capture_last_found = TRUE;
1782 }
1783 cc += 1 + LINK_SIZE;
1784 break;
1785
1786 case OP_CBRA:
1787 case OP_CBRAPOS:
1788 case OP_SCBRA:
1789 case OP_SCBRAPOS:
1790 if (common->capture_last_ptr != 0 && !capture_last_found)
1791 {
1792 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1793 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1794 stackpos -= (int)sizeof(sljit_sw);
1795 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1796 stackpos -= (int)sizeof(sljit_sw);
1797 capture_last_found = TRUE;
1798 }
1799 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1800 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1801 stackpos -= (int)sizeof(sljit_sw);
1802 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1803 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1804 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1805 stackpos -= (int)sizeof(sljit_sw);
1806 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1807 stackpos -= (int)sizeof(sljit_sw);
1808
1809 cc += 1 + LINK_SIZE + IMM2_SIZE;
1810 break;
1811
1812 default:
1813 cc = next_opcode(common, cc);
1814 SLJIT_ASSERT(cc != NULL);
1815 break;
1816 }
1817
1818 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1819 SLJIT_ASSERT(stackpos == STACK(stacktop));
1820 }
1821
1822 #define RECURSE_TMP_REG_COUNT 3
1823
1824 typedef struct delayed_mem_copy_status {
1825 struct sljit_compiler *compiler;
1826 int store_bases[RECURSE_TMP_REG_COUNT];
1827 int store_offsets[RECURSE_TMP_REG_COUNT];
1828 int tmp_regs[RECURSE_TMP_REG_COUNT];
1829 int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
1830 int next_tmp_reg;
1831 } delayed_mem_copy_status;
1832
delayed_mem_copy_init(delayed_mem_copy_status * status,compiler_common * common)1833 static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
1834 {
1835 int i;
1836
1837 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
1838 {
1839 SLJIT_ASSERT(status->tmp_regs[i] >= 0);
1840 SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
1841
1842 status->store_bases[i] = -1;
1843 }
1844 status->next_tmp_reg = 0;
1845 status->compiler = common->compiler;
1846 }
1847
delayed_mem_copy_move(delayed_mem_copy_status * status,int load_base,sljit_sw load_offset,int store_base,sljit_sw store_offset)1848 static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
1849 int store_base, sljit_sw store_offset)
1850 {
1851 struct sljit_compiler *compiler = status->compiler;
1852 int next_tmp_reg = status->next_tmp_reg;
1853 int tmp_reg = status->tmp_regs[next_tmp_reg];
1854
1855 SLJIT_ASSERT(load_base > 0 && store_base > 0);
1856
1857 if (status->store_bases[next_tmp_reg] == -1)
1858 {
1859 /* Preserve virtual registers. */
1860 if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
1861 OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
1862 }
1863 else
1864 OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
1865
1866 OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
1867 status->store_bases[next_tmp_reg] = store_base;
1868 status->store_offsets[next_tmp_reg] = store_offset;
1869
1870 status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
1871 }
1872
delayed_mem_copy_finish(delayed_mem_copy_status * status)1873 static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
1874 {
1875 struct sljit_compiler *compiler = status->compiler;
1876 int next_tmp_reg = status->next_tmp_reg;
1877 int tmp_reg, saved_tmp_reg, i;
1878
1879 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
1880 {
1881 if (status->store_bases[next_tmp_reg] != -1)
1882 {
1883 tmp_reg = status->tmp_regs[next_tmp_reg];
1884 saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
1885
1886 OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
1887
1888 /* Restore virtual registers. */
1889 if (sljit_get_register_index(saved_tmp_reg) < 0)
1890 OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
1891 }
1892
1893 next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
1894 }
1895 }
1896
1897 #undef RECURSE_TMP_REG_COUNT
1898
get_recurse_data_length(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL * needs_control_head,BOOL * has_quit,BOOL * has_accept)1899 static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
1900 BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept)
1901 {
1902 int length = 1;
1903 int size;
1904 PCRE2_SPTR alternative;
1905 BOOL quit_found = FALSE;
1906 BOOL accept_found = FALSE;
1907 BOOL setsom_found = FALSE;
1908 BOOL setmark_found = FALSE;
1909 BOOL capture_last_found = FALSE;
1910 BOOL control_head_found = FALSE;
1911
1912 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1913 SLJIT_ASSERT(common->control_head_ptr != 0);
1914 control_head_found = TRUE;
1915 #endif
1916
1917 /* Calculate the sum of the private machine words. */
1918 while (cc < ccend)
1919 {
1920 size = 0;
1921 switch(*cc)
1922 {
1923 case OP_SET_SOM:
1924 SLJIT_ASSERT(common->has_set_som);
1925 setsom_found = TRUE;
1926 cc += 1;
1927 break;
1928
1929 case OP_RECURSE:
1930 if (common->has_set_som)
1931 setsom_found = TRUE;
1932 if (common->mark_ptr != 0)
1933 setmark_found = TRUE;
1934 if (common->capture_last_ptr != 0)
1935 capture_last_found = TRUE;
1936 cc += 1 + LINK_SIZE;
1937 break;
1938
1939 case OP_KET:
1940 if (PRIVATE_DATA(cc) != 0)
1941 {
1942 length++;
1943 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1944 cc += PRIVATE_DATA(cc + 1);
1945 }
1946 cc += 1 + LINK_SIZE;
1947 break;
1948
1949 case OP_ASSERT:
1950 case OP_ASSERT_NOT:
1951 case OP_ASSERTBACK:
1952 case OP_ASSERTBACK_NOT:
1953 case OP_ONCE:
1954 case OP_BRAPOS:
1955 case OP_SBRA:
1956 case OP_SBRAPOS:
1957 case OP_SCOND:
1958 length++;
1959 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1960 cc += 1 + LINK_SIZE;
1961 break;
1962
1963 case OP_CBRA:
1964 case OP_SCBRA:
1965 length += 2;
1966 if (common->capture_last_ptr != 0)
1967 capture_last_found = TRUE;
1968 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1969 length++;
1970 cc += 1 + LINK_SIZE + IMM2_SIZE;
1971 break;
1972
1973 case OP_CBRAPOS:
1974 case OP_SCBRAPOS:
1975 length += 2 + 2;
1976 if (common->capture_last_ptr != 0)
1977 capture_last_found = TRUE;
1978 cc += 1 + LINK_SIZE + IMM2_SIZE;
1979 break;
1980
1981 case OP_COND:
1982 /* Might be a hidden SCOND. */
1983 alternative = cc + GET(cc, 1);
1984 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1985 length++;
1986 cc += 1 + LINK_SIZE;
1987 break;
1988
1989 CASE_ITERATOR_PRIVATE_DATA_1
1990 if (PRIVATE_DATA(cc) != 0)
1991 length++;
1992 cc += 2;
1993 #ifdef SUPPORT_UNICODE
1994 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1995 #endif
1996 break;
1997
1998 CASE_ITERATOR_PRIVATE_DATA_2A
1999 if (PRIVATE_DATA(cc) != 0)
2000 length += 2;
2001 cc += 2;
2002 #ifdef SUPPORT_UNICODE
2003 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2004 #endif
2005 break;
2006
2007 CASE_ITERATOR_PRIVATE_DATA_2B
2008 if (PRIVATE_DATA(cc) != 0)
2009 length += 2;
2010 cc += 2 + IMM2_SIZE;
2011 #ifdef SUPPORT_UNICODE
2012 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2013 #endif
2014 break;
2015
2016 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2017 if (PRIVATE_DATA(cc) != 0)
2018 length++;
2019 cc += 1;
2020 break;
2021
2022 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2023 if (PRIVATE_DATA(cc) != 0)
2024 length += 2;
2025 cc += 1;
2026 break;
2027
2028 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2029 if (PRIVATE_DATA(cc) != 0)
2030 length += 2;
2031 cc += 1 + IMM2_SIZE;
2032 break;
2033
2034 case OP_CLASS:
2035 case OP_NCLASS:
2036 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2037 case OP_XCLASS:
2038 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2039 #else
2040 size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2041 #endif
2042 if (PRIVATE_DATA(cc) != 0)
2043 length += get_class_iterator_size(cc + size);
2044 cc += size;
2045 break;
2046
2047 case OP_MARK:
2048 case OP_COMMIT_ARG:
2049 case OP_PRUNE_ARG:
2050 case OP_THEN_ARG:
2051 SLJIT_ASSERT(common->mark_ptr != 0);
2052 if (!setmark_found)
2053 setmark_found = TRUE;
2054 if (common->control_head_ptr != 0)
2055 control_head_found = TRUE;
2056 if (*cc != OP_MARK)
2057 quit_found = TRUE;
2058
2059 cc += 1 + 2 + cc[1];
2060 break;
2061
2062 case OP_PRUNE:
2063 case OP_SKIP:
2064 case OP_COMMIT:
2065 quit_found = TRUE;
2066 cc++;
2067 break;
2068
2069 case OP_SKIP_ARG:
2070 quit_found = TRUE;
2071 cc += 1 + 2 + cc[1];
2072 break;
2073
2074 case OP_THEN:
2075 SLJIT_ASSERT(common->control_head_ptr != 0);
2076 quit_found = TRUE;
2077 if (!control_head_found)
2078 control_head_found = TRUE;
2079 cc++;
2080 break;
2081
2082 case OP_ACCEPT:
2083 case OP_ASSERT_ACCEPT:
2084 accept_found = TRUE;
2085 cc++;
2086 break;
2087
2088 default:
2089 cc = next_opcode(common, cc);
2090 SLJIT_ASSERT(cc != NULL);
2091 break;
2092 }
2093 }
2094 SLJIT_ASSERT(cc == ccend);
2095
2096 if (control_head_found)
2097 length++;
2098 if (capture_last_found)
2099 length++;
2100 if (quit_found)
2101 {
2102 if (setsom_found)
2103 length++;
2104 if (setmark_found)
2105 length++;
2106 }
2107
2108 *needs_control_head = control_head_found;
2109 *has_quit = quit_found;
2110 *has_accept = accept_found;
2111 return length;
2112 }
2113
2114 enum copy_recurse_data_types {
2115 recurse_copy_from_global,
2116 recurse_copy_private_to_global,
2117 recurse_copy_shared_to_global,
2118 recurse_copy_kept_shared_to_global,
2119 recurse_swap_global
2120 };
2121
copy_recurse_data(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int type,int stackptr,int stacktop,BOOL has_quit)2122 static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2123 int type, int stackptr, int stacktop, BOOL has_quit)
2124 {
2125 delayed_mem_copy_status status;
2126 PCRE2_SPTR alternative;
2127 sljit_sw private_srcw[2];
2128 sljit_sw shared_srcw[3];
2129 sljit_sw kept_shared_srcw[2];
2130 int private_count, shared_count, kept_shared_count;
2131 int from_sp, base_reg, offset, i;
2132 BOOL setsom_found = FALSE;
2133 BOOL setmark_found = FALSE;
2134 BOOL capture_last_found = FALSE;
2135 BOOL control_head_found = FALSE;
2136
2137 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2138 SLJIT_ASSERT(common->control_head_ptr != 0);
2139 control_head_found = TRUE;
2140 #endif
2141
2142 switch (type)
2143 {
2144 case recurse_copy_from_global:
2145 from_sp = TRUE;
2146 base_reg = STACK_TOP;
2147 break;
2148
2149 case recurse_copy_private_to_global:
2150 case recurse_copy_shared_to_global:
2151 case recurse_copy_kept_shared_to_global:
2152 from_sp = FALSE;
2153 base_reg = STACK_TOP;
2154 break;
2155
2156 default:
2157 SLJIT_ASSERT(type == recurse_swap_global);
2158 from_sp = FALSE;
2159 base_reg = TMP2;
2160 break;
2161 }
2162
2163 stackptr = STACK(stackptr);
2164 stacktop = STACK(stacktop);
2165
2166 status.tmp_regs[0] = TMP1;
2167 status.saved_tmp_regs[0] = TMP1;
2168
2169 if (base_reg != TMP2)
2170 {
2171 status.tmp_regs[1] = TMP2;
2172 status.saved_tmp_regs[1] = TMP2;
2173 }
2174 else
2175 {
2176 status.saved_tmp_regs[1] = RETURN_ADDR;
2177 if (sljit_get_register_index (RETURN_ADDR) == -1)
2178 status.tmp_regs[1] = STR_PTR;
2179 else
2180 status.tmp_regs[1] = RETURN_ADDR;
2181 }
2182
2183 status.saved_tmp_regs[2] = TMP3;
2184 if (sljit_get_register_index (TMP3) == -1)
2185 status.tmp_regs[2] = STR_END;
2186 else
2187 status.tmp_regs[2] = TMP3;
2188
2189 delayed_mem_copy_init(&status, common);
2190
2191 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2192 {
2193 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2194
2195 if (!from_sp)
2196 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2197
2198 if (from_sp || type == recurse_swap_global)
2199 delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2200 }
2201
2202 stackptr += sizeof(sljit_sw);
2203
2204 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2205 if (type != recurse_copy_shared_to_global)
2206 {
2207 if (!from_sp)
2208 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2209
2210 if (from_sp || type == recurse_swap_global)
2211 delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2212 }
2213
2214 stackptr += sizeof(sljit_sw);
2215 #endif
2216
2217 while (cc < ccend)
2218 {
2219 private_count = 0;
2220 shared_count = 0;
2221 kept_shared_count = 0;
2222
2223 switch(*cc)
2224 {
2225 case OP_SET_SOM:
2226 SLJIT_ASSERT(common->has_set_som);
2227 if (has_quit && !setsom_found)
2228 {
2229 kept_shared_srcw[0] = OVECTOR(0);
2230 kept_shared_count = 1;
2231 setsom_found = TRUE;
2232 }
2233 cc += 1;
2234 break;
2235
2236 case OP_RECURSE:
2237 if (has_quit)
2238 {
2239 if (common->has_set_som && !setsom_found)
2240 {
2241 kept_shared_srcw[0] = OVECTOR(0);
2242 kept_shared_count = 1;
2243 setsom_found = TRUE;
2244 }
2245 if (common->mark_ptr != 0 && !setmark_found)
2246 {
2247 kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2248 kept_shared_count++;
2249 setmark_found = TRUE;
2250 }
2251 }
2252 if (common->capture_last_ptr != 0 && !capture_last_found)
2253 {
2254 shared_srcw[0] = common->capture_last_ptr;
2255 shared_count = 1;
2256 capture_last_found = TRUE;
2257 }
2258 cc += 1 + LINK_SIZE;
2259 break;
2260
2261 case OP_KET:
2262 if (PRIVATE_DATA(cc) != 0)
2263 {
2264 private_count = 1;
2265 private_srcw[0] = PRIVATE_DATA(cc);
2266 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2267 cc += PRIVATE_DATA(cc + 1);
2268 }
2269 cc += 1 + LINK_SIZE;
2270 break;
2271
2272 case OP_ASSERT:
2273 case OP_ASSERT_NOT:
2274 case OP_ASSERTBACK:
2275 case OP_ASSERTBACK_NOT:
2276 case OP_ONCE:
2277 case OP_BRAPOS:
2278 case OP_SBRA:
2279 case OP_SBRAPOS:
2280 case OP_SCOND:
2281 private_count = 1;
2282 private_srcw[0] = PRIVATE_DATA(cc);
2283 cc += 1 + LINK_SIZE;
2284 break;
2285
2286 case OP_CBRA:
2287 case OP_SCBRA:
2288 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2289 shared_srcw[0] = OVECTOR(offset);
2290 shared_srcw[1] = OVECTOR(offset + 1);
2291 shared_count = 2;
2292
2293 if (common->capture_last_ptr != 0 && !capture_last_found)
2294 {
2295 shared_srcw[2] = common->capture_last_ptr;
2296 shared_count = 3;
2297 capture_last_found = TRUE;
2298 }
2299
2300 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2301 {
2302 private_count = 1;
2303 private_srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2304 }
2305 cc += 1 + LINK_SIZE + IMM2_SIZE;
2306 break;
2307
2308 case OP_CBRAPOS:
2309 case OP_SCBRAPOS:
2310 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2311 shared_srcw[0] = OVECTOR(offset);
2312 shared_srcw[1] = OVECTOR(offset + 1);
2313 shared_count = 2;
2314
2315 if (common->capture_last_ptr != 0 && !capture_last_found)
2316 {
2317 shared_srcw[2] = common->capture_last_ptr;
2318 shared_count = 3;
2319 capture_last_found = TRUE;
2320 }
2321
2322 private_count = 2;
2323 private_srcw[0] = PRIVATE_DATA(cc);
2324 private_srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2325 cc += 1 + LINK_SIZE + IMM2_SIZE;
2326 break;
2327
2328 case OP_COND:
2329 /* Might be a hidden SCOND. */
2330 alternative = cc + GET(cc, 1);
2331 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2332 {
2333 private_count = 1;
2334 private_srcw[0] = PRIVATE_DATA(cc);
2335 }
2336 cc += 1 + LINK_SIZE;
2337 break;
2338
2339 CASE_ITERATOR_PRIVATE_DATA_1
2340 if (PRIVATE_DATA(cc))
2341 {
2342 private_count = 1;
2343 private_srcw[0] = PRIVATE_DATA(cc);
2344 }
2345 cc += 2;
2346 #ifdef SUPPORT_UNICODE
2347 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2348 #endif
2349 break;
2350
2351 CASE_ITERATOR_PRIVATE_DATA_2A
2352 if (PRIVATE_DATA(cc))
2353 {
2354 private_count = 2;
2355 private_srcw[0] = PRIVATE_DATA(cc);
2356 private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2357 }
2358 cc += 2;
2359 #ifdef SUPPORT_UNICODE
2360 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2361 #endif
2362 break;
2363
2364 CASE_ITERATOR_PRIVATE_DATA_2B
2365 if (PRIVATE_DATA(cc))
2366 {
2367 private_count = 2;
2368 private_srcw[0] = PRIVATE_DATA(cc);
2369 private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2370 }
2371 cc += 2 + IMM2_SIZE;
2372 #ifdef SUPPORT_UNICODE
2373 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2374 #endif
2375 break;
2376
2377 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2378 if (PRIVATE_DATA(cc))
2379 {
2380 private_count = 1;
2381 private_srcw[0] = PRIVATE_DATA(cc);
2382 }
2383 cc += 1;
2384 break;
2385
2386 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2387 if (PRIVATE_DATA(cc))
2388 {
2389 private_count = 2;
2390 private_srcw[0] = PRIVATE_DATA(cc);
2391 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2392 }
2393 cc += 1;
2394 break;
2395
2396 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2397 if (PRIVATE_DATA(cc))
2398 {
2399 private_count = 2;
2400 private_srcw[0] = PRIVATE_DATA(cc);
2401 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2402 }
2403 cc += 1 + IMM2_SIZE;
2404 break;
2405
2406 case OP_CLASS:
2407 case OP_NCLASS:
2408 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2409 case OP_XCLASS:
2410 i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2411 #else
2412 i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2413 #endif
2414 if (PRIVATE_DATA(cc) != 0)
2415 switch(get_class_iterator_size(cc + i))
2416 {
2417 case 1:
2418 private_count = 1;
2419 private_srcw[0] = PRIVATE_DATA(cc);
2420 break;
2421
2422 case 2:
2423 private_count = 2;
2424 private_srcw[0] = PRIVATE_DATA(cc);
2425 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2426 break;
2427
2428 default:
2429 SLJIT_UNREACHABLE();
2430 break;
2431 }
2432 cc += i;
2433 break;
2434
2435 case OP_MARK:
2436 case OP_COMMIT_ARG:
2437 case OP_PRUNE_ARG:
2438 case OP_THEN_ARG:
2439 SLJIT_ASSERT(common->mark_ptr != 0);
2440 if (has_quit && !setmark_found)
2441 {
2442 kept_shared_srcw[0] = common->mark_ptr;
2443 kept_shared_count = 1;
2444 setmark_found = TRUE;
2445 }
2446 if (common->control_head_ptr != 0 && !control_head_found)
2447 {
2448 shared_srcw[0] = common->control_head_ptr;
2449 shared_count = 1;
2450 control_head_found = TRUE;
2451 }
2452 cc += 1 + 2 + cc[1];
2453 break;
2454
2455 case OP_THEN:
2456 SLJIT_ASSERT(common->control_head_ptr != 0);
2457 if (!control_head_found)
2458 {
2459 shared_srcw[0] = common->control_head_ptr;
2460 shared_count = 1;
2461 control_head_found = TRUE;
2462 }
2463 cc++;
2464 break;
2465
2466 default:
2467 cc = next_opcode(common, cc);
2468 SLJIT_ASSERT(cc != NULL);
2469 break;
2470 }
2471
2472 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2473 {
2474 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2475
2476 for (i = 0; i < private_count; i++)
2477 {
2478 SLJIT_ASSERT(private_srcw[i] != 0);
2479
2480 if (!from_sp)
2481 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
2482
2483 if (from_sp || type == recurse_swap_global)
2484 delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
2485
2486 stackptr += sizeof(sljit_sw);
2487 }
2488 }
2489 else
2490 stackptr += sizeof(sljit_sw) * private_count;
2491
2492 if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
2493 {
2494 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
2495
2496 for (i = 0; i < shared_count; i++)
2497 {
2498 SLJIT_ASSERT(shared_srcw[i] != 0);
2499
2500 if (!from_sp)
2501 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
2502
2503 if (from_sp || type == recurse_swap_global)
2504 delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
2505
2506 stackptr += sizeof(sljit_sw);
2507 }
2508 }
2509 else
2510 stackptr += sizeof(sljit_sw) * shared_count;
2511
2512 if (type != recurse_copy_private_to_global && type != recurse_swap_global)
2513 {
2514 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
2515
2516 for (i = 0; i < kept_shared_count; i++)
2517 {
2518 SLJIT_ASSERT(kept_shared_srcw[i] != 0);
2519
2520 if (!from_sp)
2521 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
2522
2523 if (from_sp || type == recurse_swap_global)
2524 delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
2525
2526 stackptr += sizeof(sljit_sw);
2527 }
2528 }
2529 else
2530 stackptr += sizeof(sljit_sw) * kept_shared_count;
2531 }
2532
2533 SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
2534
2535 delayed_mem_copy_finish(&status);
2536 }
2537
set_then_offsets(compiler_common * common,PCRE2_SPTR cc,sljit_u8 * current_offset)2538 static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
2539 {
2540 PCRE2_SPTR end = bracketend(cc);
2541 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2542
2543 /* Assert captures then. */
2544 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2545 current_offset = NULL;
2546 /* Conditional block does not. */
2547 if (*cc == OP_COND || *cc == OP_SCOND)
2548 has_alternatives = FALSE;
2549
2550 cc = next_opcode(common, cc);
2551 if (has_alternatives)
2552 current_offset = common->then_offsets + (cc - common->start);
2553
2554 while (cc < end)
2555 {
2556 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2557 cc = set_then_offsets(common, cc, current_offset);
2558 else
2559 {
2560 if (*cc == OP_ALT && has_alternatives)
2561 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2562 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2563 *current_offset = 1;
2564 cc = next_opcode(common, cc);
2565 }
2566 }
2567
2568 return end;
2569 }
2570
2571 #undef CASE_ITERATOR_PRIVATE_DATA_1
2572 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2573 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2574 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2575 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2576 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2577
is_powerof2(unsigned int value)2578 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2579 {
2580 return (value & (value - 1)) == 0;
2581 }
2582
set_jumps(jump_list * list,struct sljit_label * label)2583 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2584 {
2585 while (list)
2586 {
2587 /* sljit_set_label is clever enough to do nothing
2588 if either the jump or the label is NULL. */
2589 SET_LABEL(list->jump, label);
2590 list = list->next;
2591 }
2592 }
2593
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)2594 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2595 {
2596 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2597 if (list_item)
2598 {
2599 list_item->next = *list;
2600 list_item->jump = jump;
2601 *list = list_item;
2602 }
2603 }
2604
add_stub(compiler_common * common,struct sljit_jump * start)2605 static void add_stub(compiler_common *common, struct sljit_jump *start)
2606 {
2607 DEFINE_COMPILER;
2608 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2609
2610 if (list_item)
2611 {
2612 list_item->start = start;
2613 list_item->quit = LABEL();
2614 list_item->next = common->stubs;
2615 common->stubs = list_item;
2616 }
2617 }
2618
flush_stubs(compiler_common * common)2619 static void flush_stubs(compiler_common *common)
2620 {
2621 DEFINE_COMPILER;
2622 stub_list *list_item = common->stubs;
2623
2624 while (list_item)
2625 {
2626 JUMPHERE(list_item->start);
2627 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2628 JUMPTO(SLJIT_JUMP, list_item->quit);
2629 list_item = list_item->next;
2630 }
2631 common->stubs = NULL;
2632 }
2633
add_label_addr(compiler_common * common,sljit_uw * update_addr)2634 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2635 {
2636 DEFINE_COMPILER;
2637 label_addr_list *label_addr;
2638
2639 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2640 if (label_addr == NULL)
2641 return;
2642 label_addr->label = LABEL();
2643 label_addr->update_addr = update_addr;
2644 label_addr->next = common->label_addrs;
2645 common->label_addrs = label_addr;
2646 }
2647
count_match(compiler_common * common)2648 static SLJIT_INLINE void count_match(compiler_common *common)
2649 {
2650 DEFINE_COMPILER;
2651
2652 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2653 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2654 }
2655
allocate_stack(compiler_common * common,int size)2656 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2657 {
2658 /* May destroy all locals and registers except TMP2. */
2659 DEFINE_COMPILER;
2660
2661 SLJIT_ASSERT(size > 0);
2662 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2663 #ifdef DESTROY_REGISTERS
2664 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2665 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2666 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2667 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2668 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2669 #endif
2670 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
2671 }
2672
free_stack(compiler_common * common,int size)2673 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2674 {
2675 DEFINE_COMPILER;
2676
2677 SLJIT_ASSERT(size > 0);
2678 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2679 }
2680
allocate_read_only_data(compiler_common * common,sljit_uw size)2681 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2682 {
2683 DEFINE_COMPILER;
2684 sljit_uw *result;
2685
2686 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2687 return NULL;
2688
2689 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2690 if (SLJIT_UNLIKELY(result == NULL))
2691 {
2692 sljit_set_compiler_memory_error(compiler);
2693 return NULL;
2694 }
2695
2696 *(void**)result = common->read_only_data_head;
2697 common->read_only_data_head = (void *)result;
2698 return result + 1;
2699 }
2700
reset_ovector(compiler_common * common,int length)2701 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2702 {
2703 DEFINE_COMPILER;
2704 struct sljit_label *loop;
2705 sljit_s32 i;
2706
2707 /* At this point we can freely use all temporary registers. */
2708 SLJIT_ASSERT(length > 1);
2709 /* TMP1 returns with begin - 1. */
2710 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2711 if (length < 8)
2712 {
2713 for (i = 1; i < length; i++)
2714 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2715 }
2716 else
2717 {
2718 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
2719 {
2720 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2721 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2722 loop = LABEL();
2723 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
2724 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2725 JUMPTO(SLJIT_NOT_ZERO, loop);
2726 }
2727 else
2728 {
2729 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
2730 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2731 loop = LABEL();
2732 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
2733 OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
2734 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2735 JUMPTO(SLJIT_NOT_ZERO, loop);
2736 }
2737 }
2738 }
2739
reset_fast_fail(compiler_common * common)2740 static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2741 {
2742 DEFINE_COMPILER;
2743 sljit_s32 i;
2744
2745 SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
2746
2747 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2748 for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2749 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2750 }
2751
do_reset_match(compiler_common * common,int length)2752 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2753 {
2754 DEFINE_COMPILER;
2755 struct sljit_label *loop;
2756 int i;
2757
2758 SLJIT_ASSERT(length > 1);
2759 /* OVECTOR(1) contains the "string begin - 1" constant. */
2760 if (length > 2)
2761 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2762 if (length < 8)
2763 {
2764 for (i = 2; i < length; i++)
2765 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2766 }
2767 else
2768 {
2769 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
2770 {
2771 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2772 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2773 loop = LABEL();
2774 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
2775 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2776 JUMPTO(SLJIT_NOT_ZERO, loop);
2777 }
2778 else
2779 {
2780 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
2781 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2782 loop = LABEL();
2783 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
2784 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
2785 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2786 JUMPTO(SLJIT_NOT_ZERO, loop);
2787 }
2788 }
2789
2790 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2791 if (common->mark_ptr != 0)
2792 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2793 if (common->control_head_ptr != 0)
2794 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2795 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2796 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2797 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
2798 }
2799
do_search_mark(sljit_sw * current,PCRE2_SPTR skip_arg)2800 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
2801 {
2802 while (current != NULL)
2803 {
2804 switch (current[1])
2805 {
2806 case type_then_trap:
2807 break;
2808
2809 case type_mark:
2810 if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
2811 return current[3];
2812 break;
2813
2814 default:
2815 SLJIT_UNREACHABLE();
2816 break;
2817 }
2818 SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
2819 current = (sljit_sw*)current[0];
2820 }
2821 return 0;
2822 }
2823
copy_ovector(compiler_common * common,int topbracket)2824 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2825 {
2826 DEFINE_COMPILER;
2827 struct sljit_label *loop;
2828 BOOL has_pre;
2829
2830 /* At this point we can freely use all registers. */
2831 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2832 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2833
2834 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2835 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2836 if (common->mark_ptr != 0)
2837 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2838 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
2839 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
2840 if (common->mark_ptr != 0)
2841 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2842 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
2843 SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
2844
2845 has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
2846
2847 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
2848 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2849
2850 loop = LABEL();
2851
2852 if (has_pre)
2853 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
2854 else
2855 {
2856 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
2857 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2858 }
2859
2860 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
2861 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
2862 /* Copy the integer value to the output buffer */
2863 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
2864 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2865 #endif
2866
2867 SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
2868 OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
2869
2870 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2871 JUMPTO(SLJIT_NOT_ZERO, loop);
2872
2873 /* Calculate the return value, which is the maximum ovector value. */
2874 if (topbracket > 1)
2875 {
2876 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
2877 {
2878 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2879 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2880
2881 /* OVECTOR(0) is never equal to SLJIT_S2. */
2882 loop = LABEL();
2883 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2884 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2885 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2886 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2887 }
2888 else
2889 {
2890 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
2891 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2892
2893 /* OVECTOR(0) is never equal to SLJIT_S2. */
2894 loop = LABEL();
2895 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
2896 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
2897 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2898 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2899 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2900 }
2901 }
2902 else
2903 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2904 }
2905
return_with_partial_match(compiler_common * common,struct sljit_label * quit)2906 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2907 {
2908 DEFINE_COMPILER;
2909 sljit_s32 mov_opcode;
2910
2911 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
2912 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2913 && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
2914
2915 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2916 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
2917 common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
2918 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
2919
2920 /* Store match begin and end. */
2921 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2922 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
2923 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, match_data));
2924
2925 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
2926
2927 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
2928 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
2929 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2930 #endif
2931 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
2932
2933 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
2934 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
2935 OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
2936 #endif
2937 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
2938
2939 JUMPTO(SLJIT_JUMP, quit);
2940 }
2941
check_start_used_ptr(compiler_common * common)2942 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2943 {
2944 /* May destroy TMP1. */
2945 DEFINE_COMPILER;
2946 struct sljit_jump *jump;
2947
2948 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
2949 {
2950 /* The value of -1 must be kept for start_used_ptr! */
2951 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2952 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2953 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2954 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2955 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2956 JUMPHERE(jump);
2957 }
2958 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
2959 {
2960 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2961 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2962 JUMPHERE(jump);
2963 }
2964 }
2965
char_has_othercase(compiler_common * common,PCRE2_SPTR cc)2966 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
2967 {
2968 /* Detects if the character has an othercase. */
2969 unsigned int c;
2970
2971 #ifdef SUPPORT_UNICODE
2972 if (common->utf)
2973 {
2974 GETCHAR(c, cc);
2975 if (c > 127)
2976 {
2977 return c != UCD_OTHERCASE(c);
2978 }
2979 #if PCRE2_CODE_UNIT_WIDTH != 8
2980 return common->fcc[c] != c;
2981 #endif
2982 }
2983 else
2984 #endif
2985 c = *cc;
2986 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2987 }
2988
char_othercase(compiler_common * common,unsigned int c)2989 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2990 {
2991 /* Returns with the othercase. */
2992 #ifdef SUPPORT_UNICODE
2993 if (common->utf && c > 127)
2994 {
2995 return UCD_OTHERCASE(c);
2996 }
2997 #endif
2998 return TABLE_GET(c, common->fcc, c);
2999 }
3000
char_get_othercase_bit(compiler_common * common,PCRE2_SPTR cc)3001 static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3002 {
3003 /* Detects if the character and its othercase has only 1 bit difference. */
3004 unsigned int c, oc, bit;
3005 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3006 int n;
3007 #endif
3008
3009 #ifdef SUPPORT_UNICODE
3010 if (common->utf)
3011 {
3012 GETCHAR(c, cc);
3013 if (c <= 127)
3014 oc = common->fcc[c];
3015 else
3016 {
3017 oc = UCD_OTHERCASE(c);
3018 }
3019 }
3020 else
3021 {
3022 c = *cc;
3023 oc = TABLE_GET(c, common->fcc, c);
3024 }
3025 #else
3026 c = *cc;
3027 oc = TABLE_GET(c, common->fcc, c);
3028 #endif
3029
3030 SLJIT_ASSERT(c != oc);
3031
3032 bit = c ^ oc;
3033 /* Optimized for English alphabet. */
3034 if (c <= 127 && bit == 0x20)
3035 return (0 << 8) | 0x20;
3036
3037 /* Since c != oc, they must have at least 1 bit difference. */
3038 if (!is_powerof2(bit))
3039 return 0;
3040
3041 #if PCRE2_CODE_UNIT_WIDTH == 8
3042
3043 #ifdef SUPPORT_UNICODE
3044 if (common->utf && c > 127)
3045 {
3046 n = GET_EXTRALEN(*cc);
3047 while ((bit & 0x3f) == 0)
3048 {
3049 n--;
3050 bit >>= 6;
3051 }
3052 return (n << 8) | bit;
3053 }
3054 #endif /* SUPPORT_UNICODE */
3055 return (0 << 8) | bit;
3056
3057 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3058
3059 #ifdef SUPPORT_UNICODE
3060 if (common->utf && c > 65535)
3061 {
3062 if (bit >= (1 << 10))
3063 bit >>= 10;
3064 else
3065 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3066 }
3067 #endif /* SUPPORT_UNICODE */
3068 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
3069
3070 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3071 }
3072
check_partial(compiler_common * common,BOOL force)3073 static void check_partial(compiler_common *common, BOOL force)
3074 {
3075 /* Checks whether a partial matching is occurred. Does not modify registers. */
3076 DEFINE_COMPILER;
3077 struct sljit_jump *jump = NULL;
3078
3079 SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3080
3081 if (common->mode == PCRE2_JIT_COMPLETE)
3082 return;
3083
3084 if (!force)
3085 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3086 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3087 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3088
3089 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3090 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3091 else
3092 {
3093 if (common->partialmatchlabel != NULL)
3094 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3095 else
3096 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3097 }
3098
3099 if (jump != NULL)
3100 JUMPHERE(jump);
3101 }
3102
check_str_end(compiler_common * common,jump_list ** end_reached)3103 static void check_str_end(compiler_common *common, jump_list **end_reached)
3104 {
3105 /* Does not affect registers. Usually used in a tight spot. */
3106 DEFINE_COMPILER;
3107 struct sljit_jump *jump;
3108
3109 if (common->mode == PCRE2_JIT_COMPLETE)
3110 {
3111 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3112 return;
3113 }
3114
3115 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3116 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3117 {
3118 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3119 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3120 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3121 }
3122 else
3123 {
3124 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3125 if (common->partialmatchlabel != NULL)
3126 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3127 else
3128 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3129 }
3130 JUMPHERE(jump);
3131 }
3132
detect_partial_match(compiler_common * common,jump_list ** backtracks)3133 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3134 {
3135 DEFINE_COMPILER;
3136 struct sljit_jump *jump;
3137
3138 if (common->mode == PCRE2_JIT_COMPLETE)
3139 {
3140 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3141 return;
3142 }
3143
3144 /* Partial matching mode. */
3145 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3146 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3147 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3148 {
3149 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3150 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3151 }
3152 else
3153 {
3154 if (common->partialmatchlabel != NULL)
3155 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3156 else
3157 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3158 }
3159 JUMPHERE(jump);
3160 }
3161
peek_char(compiler_common * common,sljit_u32 max)3162 static void peek_char(compiler_common *common, sljit_u32 max)
3163 {
3164 /* Reads the character into TMP1, keeps STR_PTR.
3165 Does not check STR_END. TMP2 Destroyed. */
3166 DEFINE_COMPILER;
3167 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3168 struct sljit_jump *jump;
3169 #endif
3170
3171 SLJIT_UNUSED_ARG(max);
3172
3173 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3174 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3175 if (common->utf)
3176 {
3177 if (max < 128) return;
3178
3179 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3180 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3181 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3182 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3183 JUMPHERE(jump);
3184 }
3185 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3186
3187 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
3188 if (common->utf)
3189 {
3190 if (max < 0xd800) return;
3191
3192 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3193 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3194 /* TMP2 contains the high surrogate. */
3195 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3196 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
3197 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3198 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
3199 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3200 JUMPHERE(jump);
3201 }
3202 #endif
3203 }
3204
3205 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3206
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)3207 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
3208 {
3209 /* Tells whether the character codes below 128 are enough
3210 to determine a match. */
3211 const sljit_u8 value = nclass ? 0xff : 0;
3212 const sljit_u8 *end = bitset + 32;
3213
3214 bitset += 16;
3215 do
3216 {
3217 if (*bitset++ != value)
3218 return FALSE;
3219 }
3220 while (bitset < end);
3221 return TRUE;
3222 }
3223
read_char7_type(compiler_common * common,BOOL full_read)3224 static void read_char7_type(compiler_common *common, BOOL full_read)
3225 {
3226 /* Reads the precise character type of a character into TMP1, if the character
3227 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
3228 full_read argument tells whether characters above max are accepted or not. */
3229 DEFINE_COMPILER;
3230 struct sljit_jump *jump;
3231
3232 SLJIT_ASSERT(common->utf);
3233
3234 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3235 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3236
3237 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3238
3239 if (full_read)
3240 {
3241 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
3242 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3243 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3244 JUMPHERE(jump);
3245 }
3246 }
3247
3248 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
3249
read_char_range(compiler_common * common,sljit_u32 min,sljit_u32 max,BOOL update_str_ptr)3250 static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr)
3251 {
3252 /* Reads the precise value of a character into TMP1, if the character is
3253 between min and max (c >= min && c <= max). Otherwise it returns with a value
3254 outside the range. Does not check STR_END. */
3255 DEFINE_COMPILER;
3256 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3257 struct sljit_jump *jump;
3258 #endif
3259 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3260 struct sljit_jump *jump2;
3261 #endif
3262
3263 SLJIT_UNUSED_ARG(update_str_ptr);
3264 SLJIT_UNUSED_ARG(min);
3265 SLJIT_UNUSED_ARG(max);
3266 SLJIT_ASSERT(min <= max);
3267
3268 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3269 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3270
3271 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3272 if (common->utf)
3273 {
3274 if (max < 128 && !update_str_ptr) return;
3275
3276 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3277 if (min >= 0x10000)
3278 {
3279 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
3280 if (update_str_ptr)
3281 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3282 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3283 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
3284 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3285 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3286 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3287 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3288 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3289 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3290 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3291 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3292 if (!update_str_ptr)
3293 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3294 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3295 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3296 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3297 JUMPHERE(jump2);
3298 if (update_str_ptr)
3299 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3300 }
3301 else if (min >= 0x800 && max <= 0xffff)
3302 {
3303 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
3304 if (update_str_ptr)
3305 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3306 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3307 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3308 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3309 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3310 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3311 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3312 if (!update_str_ptr)
3313 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3314 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3315 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3316 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3317 JUMPHERE(jump2);
3318 if (update_str_ptr)
3319 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3320 }
3321 else if (max >= 0x800)
3322 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3323 else if (max < 128)
3324 {
3325 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3326 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3327 }
3328 else
3329 {
3330 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3331 if (!update_str_ptr)
3332 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3333 else
3334 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3335 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3336 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3337 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3338 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3339 if (update_str_ptr)
3340 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3341 }
3342 JUMPHERE(jump);
3343 }
3344 #endif
3345
3346 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
3347 if (common->utf)
3348 {
3349 if (max >= 0x10000)
3350 {
3351 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3352 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3353 /* TMP2 contains the high surrogate. */
3354 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3355 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
3356 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3357 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3358 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
3359 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3360 JUMPHERE(jump);
3361 return;
3362 }
3363
3364 if (max < 0xd800 && !update_str_ptr) return;
3365
3366 /* Skip low surrogate if necessary. */
3367 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3368 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3369 if (update_str_ptr)
3370 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3371 if (max >= 0xd800)
3372 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3373 JUMPHERE(jump);
3374 }
3375 #endif
3376 }
3377
read_char(compiler_common * common)3378 static SLJIT_INLINE void read_char(compiler_common *common)
3379 {
3380 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
3381 }
3382
read_char8_type(compiler_common * common,BOOL update_str_ptr)3383 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
3384 {
3385 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
3386 DEFINE_COMPILER;
3387 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
3388 struct sljit_jump *jump;
3389 #endif
3390 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3391 struct sljit_jump *jump2;
3392 #endif
3393
3394 SLJIT_UNUSED_ARG(update_str_ptr);
3395
3396 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3397 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3398
3399 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3400 if (common->utf)
3401 {
3402 /* This can be an extra read in some situations, but hopefully
3403 it is needed in most cases. */
3404 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3405 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
3406 if (!update_str_ptr)
3407 {
3408 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3409 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3410 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3411 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3412 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3413 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3414 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3415 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3416 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3417 JUMPHERE(jump2);
3418 }
3419 else
3420 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3421 JUMPHERE(jump);
3422 return;
3423 }
3424 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
3425
3426 #if PCRE2_CODE_UNIT_WIDTH != 8
3427 /* The ctypes array contains only 256 values. */
3428 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3429 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3430 #endif
3431 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3432 #if PCRE2_CODE_UNIT_WIDTH != 8
3433 JUMPHERE(jump);
3434 #endif
3435
3436 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
3437 if (common->utf && update_str_ptr)
3438 {
3439 /* Skip low surrogate if necessary. */
3440 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3441 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3442 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3443 JUMPHERE(jump);
3444 }
3445 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
3446 }
3447
skip_char_back(compiler_common * common)3448 static void skip_char_back(compiler_common *common)
3449 {
3450 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
3451 DEFINE_COMPILER;
3452 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3453 #if PCRE2_CODE_UNIT_WIDTH == 8
3454 struct sljit_label *label;
3455
3456 if (common->utf)
3457 {
3458 label = LABEL();
3459 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3460 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3461 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3462 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3463 return;
3464 }
3465 #elif PCRE2_CODE_UNIT_WIDTH == 16
3466 if (common->utf)
3467 {
3468 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3469 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3470 /* Skip low surrogate if necessary. */
3471 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3472 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3473 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3474 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3475 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3476 return;
3477 }
3478 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
3479 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3480 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3481 }
3482
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)3483 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3484 {
3485 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3486 DEFINE_COMPILER;
3487 struct sljit_jump *jump;
3488
3489 if (nltype == NLTYPE_ANY)
3490 {
3491 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3492 sljit_set_current_flags(compiler, SLJIT_SET_Z);
3493 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3494 }
3495 else if (nltype == NLTYPE_ANYCRLF)
3496 {
3497 if (jumpifmatch)
3498 {
3499 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3500 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3501 }
3502 else
3503 {
3504 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3505 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3506 JUMPHERE(jump);
3507 }
3508 }
3509 else
3510 {
3511 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3512 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3513 }
3514 }
3515
3516 #ifdef SUPPORT_UNICODE
3517
3518 #if PCRE2_CODE_UNIT_WIDTH == 8
do_utfreadchar(compiler_common * common)3519 static void do_utfreadchar(compiler_common *common)
3520 {
3521 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3522 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
3523 DEFINE_COMPILER;
3524 struct sljit_jump *jump;
3525
3526 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3527 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3528 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3529 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3530 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3531 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3532
3533 /* Searching for the first zero. */
3534 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3535 jump = JUMP(SLJIT_NOT_ZERO);
3536 /* Two byte sequence. */
3537 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3538 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
3539 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3540
3541 JUMPHERE(jump);
3542 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3543 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3544 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3545 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3546 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3547
3548 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3549 jump = JUMP(SLJIT_NOT_ZERO);
3550 /* Three byte sequence. */
3551 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3552 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
3553 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3554
3555 /* Four byte sequence. */
3556 JUMPHERE(jump);
3557 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3558 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3559 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3560 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3561 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3562 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3563 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
3564 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3565 }
3566
do_utfreadchar16(compiler_common * common)3567 static void do_utfreadchar16(compiler_common *common)
3568 {
3569 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3570 of the character (>= 0xc0). Return value in TMP1. */
3571 DEFINE_COMPILER;
3572 struct sljit_jump *jump;
3573
3574 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3575 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3576 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3577 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3578 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3579 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3580
3581 /* Searching for the first zero. */
3582 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3583 jump = JUMP(SLJIT_NOT_ZERO);
3584 /* Two byte sequence. */
3585 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3586 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3587
3588 JUMPHERE(jump);
3589 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3590 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
3591 /* This code runs only in 8 bit mode. No need to shift the value. */
3592 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3593 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3594 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3595 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3596 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3597 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3598 /* Three byte sequence. */
3599 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3600 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3601 }
3602
do_utfreadtype8(compiler_common * common)3603 static void do_utfreadtype8(compiler_common *common)
3604 {
3605 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3606 of the character (>= 0xc0). Return value in TMP1. */
3607 DEFINE_COMPILER;
3608 struct sljit_jump *jump;
3609 struct sljit_jump *compare;
3610
3611 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3612
3613 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3614 jump = JUMP(SLJIT_NOT_ZERO);
3615 /* Two byte sequence. */
3616 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3617 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3618 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3619 /* The upper 5 bits are known at this point. */
3620 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3621 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3622 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3623 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3624 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3625 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3626
3627 JUMPHERE(compare);
3628 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3629 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3630
3631 /* We only have types for characters less than 256. */
3632 JUMPHERE(jump);
3633 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3634 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3635 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3636 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3637 }
3638
3639 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
3640
3641 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3642 #define UCD_BLOCK_MASK 127
3643 #define UCD_BLOCK_SHIFT 7
3644
do_getucd(compiler_common * common)3645 static void do_getucd(compiler_common *common)
3646 {
3647 /* Search the UCD record for the character comes in TMP1.
3648 Returns chartype in TMP1 and UCD offset in TMP2. */
3649 DEFINE_COMPILER;
3650 #if PCRE2_CODE_UNIT_WIDTH == 32
3651 struct sljit_jump *jump;
3652 #endif
3653
3654 #if defined SLJIT_DEBUG && SLJIT_DEBUG
3655 /* dummy_ucd_record */
3656 const ucd_record *record = GET_UCD(INVALID_UTF_CHAR);
3657 SLJIT_ASSERT(record->script == ucp_Common && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
3658 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
3659 #endif
3660
3661 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3662
3663 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3664
3665 #if PCRE2_CODE_UNIT_WIDTH == 32
3666 if (!common->utf)
3667 {
3668 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
3669 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
3670 JUMPHERE(jump);
3671 }
3672 #endif
3673
3674 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3675 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3676 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3677 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3678 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3679 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3680 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3681 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3682 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3683 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3684 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3685 }
3686
3687 #endif /* SUPPORT_UNICODE */
3688
mainloop_entry(compiler_common * common)3689 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
3690 {
3691 DEFINE_COMPILER;
3692 struct sljit_label *mainloop;
3693 struct sljit_label *newlinelabel = NULL;
3694 struct sljit_jump *start;
3695 struct sljit_jump *end = NULL;
3696 struct sljit_jump *end2 = NULL;
3697 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3698 struct sljit_jump *singlechar;
3699 #endif
3700 jump_list *newline = NULL;
3701 sljit_u32 overall_options = common->re->overall_options;
3702 BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
3703 BOOL newlinecheck = FALSE;
3704 BOOL readuchar = FALSE;
3705
3706 if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
3707 && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3708 newlinecheck = TRUE;
3709
3710 SLJIT_ASSERT(common->abort_label == NULL);
3711
3712 if ((overall_options & PCRE2_FIRSTLINE) != 0)
3713 {
3714 /* Search for the end of the first line. */
3715 SLJIT_ASSERT(common->match_end_ptr != 0);
3716 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3717
3718 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3719 {
3720 mainloop = LABEL();
3721 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3722 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3723 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3724 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3725 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3726 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3727 JUMPHERE(end);
3728 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3729 }
3730 else
3731 {
3732 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3733 mainloop = LABEL();
3734 /* Continual stores does not cause data dependency. */
3735 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3736 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3737 check_newlinechar(common, common->nltype, &newline, TRUE);
3738 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3739 JUMPHERE(end);
3740 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3741 set_jumps(newline, LABEL());
3742 }
3743
3744 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3745 }
3746 else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
3747 {
3748 /* Check whether offset limit is set and valid. */
3749 SLJIT_ASSERT(common->match_end_ptr != 0);
3750
3751 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3752 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
3753 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
3754 end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
3755 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3756 #if PCRE2_CODE_UNIT_WIDTH == 16
3757 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3758 #elif PCRE2_CODE_UNIT_WIDTH == 32
3759 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
3760 #endif
3761 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3762 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
3763 end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
3764 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
3765 JUMPHERE(end2);
3766 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
3767 add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
3768 JUMPHERE(end);
3769 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
3770 }
3771
3772 start = JUMP(SLJIT_JUMP);
3773
3774 if (newlinecheck)
3775 {
3776 newlinelabel = LABEL();
3777 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3778 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3779 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3780 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3781 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3782 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3783 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3784 #endif
3785 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3786 end2 = JUMP(SLJIT_JUMP);
3787 }
3788
3789 mainloop = LABEL();
3790
3791 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3792 #ifdef SUPPORT_UNICODE
3793 if (common->utf) readuchar = TRUE;
3794 #endif
3795 if (newlinecheck) readuchar = TRUE;
3796
3797 if (readuchar)
3798 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3799
3800 if (newlinecheck)
3801 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3802
3803 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3804 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3805 #if PCRE2_CODE_UNIT_WIDTH == 8
3806 if (common->utf)
3807 {
3808 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3809 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3810 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3811 JUMPHERE(singlechar);
3812 }
3813 #elif PCRE2_CODE_UNIT_WIDTH == 16
3814 if (common->utf)
3815 {
3816 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3817 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3818 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3819 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3820 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3821 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3822 JUMPHERE(singlechar);
3823 }
3824 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
3825 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3826 JUMPHERE(start);
3827
3828 if (newlinecheck)
3829 {
3830 JUMPHERE(end);
3831 JUMPHERE(end2);
3832 }
3833
3834 return mainloop;
3835 }
3836
3837
add_prefix_char(PCRE2_UCHAR chr,fast_forward_char_data * chars,BOOL last)3838 static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
3839 {
3840 sljit_u32 i, count = chars->count;
3841
3842 if (count == 255)
3843 return;
3844
3845 if (count == 0)
3846 {
3847 chars->count = 1;
3848 chars->chars[0] = chr;
3849
3850 if (last)
3851 chars->last_count = 1;
3852 return;
3853 }
3854
3855 for (i = 0; i < count; i++)
3856 if (chars->chars[i] == chr)
3857 return;
3858
3859 if (count >= MAX_DIFF_CHARS)
3860 {
3861 chars->count = 255;
3862 return;
3863 }
3864
3865 chars->chars[count] = chr;
3866 chars->count = count + 1;
3867
3868 if (last)
3869 chars->last_count++;
3870 }
3871
scan_prefix(compiler_common * common,PCRE2_SPTR cc,fast_forward_char_data * chars,int max_chars,sljit_u32 * rec_count)3872 static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
3873 {
3874 /* Recursive function, which scans prefix literals. */
3875 BOOL last, any, class, caseless;
3876 int len, repeat, len_save, consumed = 0;
3877 sljit_u32 chr; /* Any unicode character. */
3878 sljit_u8 *bytes, *bytes_end, byte;
3879 PCRE2_SPTR alternative, cc_save, oc;
3880 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3881 PCRE2_UCHAR othercase[4];
3882 #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
3883 PCRE2_UCHAR othercase[2];
3884 #else
3885 PCRE2_UCHAR othercase[1];
3886 #endif
3887
3888 repeat = 1;
3889 while (TRUE)
3890 {
3891 if (*rec_count == 0)
3892 return 0;
3893 (*rec_count)--;
3894
3895 last = TRUE;
3896 any = FALSE;
3897 class = FALSE;
3898 caseless = FALSE;
3899
3900 switch (*cc)
3901 {
3902 case OP_CHARI:
3903 caseless = TRUE;
3904 /* Fall through */
3905 case OP_CHAR:
3906 last = FALSE;
3907 cc++;
3908 break;
3909
3910 case OP_SOD:
3911 case OP_SOM:
3912 case OP_SET_SOM:
3913 case OP_NOT_WORD_BOUNDARY:
3914 case OP_WORD_BOUNDARY:
3915 case OP_EODN:
3916 case OP_EOD:
3917 case OP_CIRC:
3918 case OP_CIRCM:
3919 case OP_DOLL:
3920 case OP_DOLLM:
3921 /* Zero width assertions. */
3922 cc++;
3923 continue;
3924
3925 case OP_ASSERT:
3926 case OP_ASSERT_NOT:
3927 case OP_ASSERTBACK:
3928 case OP_ASSERTBACK_NOT:
3929 cc = bracketend(cc);
3930 continue;
3931
3932 case OP_PLUSI:
3933 case OP_MINPLUSI:
3934 case OP_POSPLUSI:
3935 caseless = TRUE;
3936 /* Fall through */
3937 case OP_PLUS:
3938 case OP_MINPLUS:
3939 case OP_POSPLUS:
3940 cc++;
3941 break;
3942
3943 case OP_EXACTI:
3944 caseless = TRUE;
3945 /* Fall through */
3946 case OP_EXACT:
3947 repeat = GET2(cc, 1);
3948 last = FALSE;
3949 cc += 1 + IMM2_SIZE;
3950 break;
3951
3952 case OP_QUERYI:
3953 case OP_MINQUERYI:
3954 case OP_POSQUERYI:
3955 caseless = TRUE;
3956 /* Fall through */
3957 case OP_QUERY:
3958 case OP_MINQUERY:
3959 case OP_POSQUERY:
3960 len = 1;
3961 cc++;
3962 #ifdef SUPPORT_UNICODE
3963 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3964 #endif
3965 max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
3966 if (max_chars == 0)
3967 return consumed;
3968 last = FALSE;
3969 break;
3970
3971 case OP_KET:
3972 cc += 1 + LINK_SIZE;
3973 continue;
3974
3975 case OP_ALT:
3976 cc += GET(cc, 1);
3977 continue;
3978
3979 case OP_ONCE:
3980 case OP_BRA:
3981 case OP_BRAPOS:
3982 case OP_CBRA:
3983 case OP_CBRAPOS:
3984 alternative = cc + GET(cc, 1);
3985 while (*alternative == OP_ALT)
3986 {
3987 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
3988 if (max_chars == 0)
3989 return consumed;
3990 alternative += GET(alternative, 1);
3991 }
3992
3993 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3994 cc += IMM2_SIZE;
3995 cc += 1 + LINK_SIZE;
3996 continue;
3997
3998 case OP_CLASS:
3999 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4000 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
4001 return consumed;
4002 #endif
4003 class = TRUE;
4004 break;
4005
4006 case OP_NCLASS:
4007 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4008 if (common->utf) return consumed;
4009 #endif
4010 class = TRUE;
4011 break;
4012
4013 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4014 case OP_XCLASS:
4015 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4016 if (common->utf) return consumed;
4017 #endif
4018 any = TRUE;
4019 cc += GET(cc, 1);
4020 break;
4021 #endif
4022
4023 case OP_DIGIT:
4024 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4025 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
4026 return consumed;
4027 #endif
4028 any = TRUE;
4029 cc++;
4030 break;
4031
4032 case OP_WHITESPACE:
4033 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4034 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
4035 return consumed;
4036 #endif
4037 any = TRUE;
4038 cc++;
4039 break;
4040
4041 case OP_WORDCHAR:
4042 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4043 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
4044 return consumed;
4045 #endif
4046 any = TRUE;
4047 cc++;
4048 break;
4049
4050 case OP_NOT:
4051 case OP_NOTI:
4052 cc++;
4053 /* Fall through. */
4054 case OP_NOT_DIGIT:
4055 case OP_NOT_WHITESPACE:
4056 case OP_NOT_WORDCHAR:
4057 case OP_ANY:
4058 case OP_ALLANY:
4059 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4060 if (common->utf) return consumed;
4061 #endif
4062 any = TRUE;
4063 cc++;
4064 break;
4065
4066 #ifdef SUPPORT_UNICODE
4067 case OP_NOTPROP:
4068 case OP_PROP:
4069 #if PCRE2_CODE_UNIT_WIDTH != 32
4070 if (common->utf) return consumed;
4071 #endif
4072 any = TRUE;
4073 cc += 1 + 2;
4074 break;
4075 #endif
4076
4077 case OP_TYPEEXACT:
4078 repeat = GET2(cc, 1);
4079 cc += 1 + IMM2_SIZE;
4080 continue;
4081
4082 case OP_NOTEXACT:
4083 case OP_NOTEXACTI:
4084 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4085 if (common->utf) return consumed;
4086 #endif
4087 any = TRUE;
4088 repeat = GET2(cc, 1);
4089 cc += 1 + IMM2_SIZE + 1;
4090 break;
4091
4092 default:
4093 return consumed;
4094 }
4095
4096 if (any)
4097 {
4098 do
4099 {
4100 chars->count = 255;
4101
4102 consumed++;
4103 if (--max_chars == 0)
4104 return consumed;
4105 chars++;
4106 }
4107 while (--repeat > 0);
4108
4109 repeat = 1;
4110 continue;
4111 }
4112
4113 if (class)
4114 {
4115 bytes = (sljit_u8*) (cc + 1);
4116 cc += 1 + 32 / sizeof(PCRE2_UCHAR);
4117
4118 switch (*cc)
4119 {
4120 case OP_CRSTAR:
4121 case OP_CRMINSTAR:
4122 case OP_CRPOSSTAR:
4123 case OP_CRQUERY:
4124 case OP_CRMINQUERY:
4125 case OP_CRPOSQUERY:
4126 max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
4127 if (max_chars == 0)
4128 return consumed;
4129 break;
4130
4131 default:
4132 case OP_CRPLUS:
4133 case OP_CRMINPLUS:
4134 case OP_CRPOSPLUS:
4135 break;
4136
4137 case OP_CRRANGE:
4138 case OP_CRMINRANGE:
4139 case OP_CRPOSRANGE:
4140 repeat = GET2(cc, 1);
4141 if (repeat <= 0)
4142 return consumed;
4143 break;
4144 }
4145
4146 do
4147 {
4148 if (bytes[31] & 0x80)
4149 chars->count = 255;
4150 else if (chars->count != 255)
4151 {
4152 bytes_end = bytes + 32;
4153 chr = 0;
4154 do
4155 {
4156 byte = *bytes++;
4157 SLJIT_ASSERT((chr & 0x7) == 0);
4158 if (byte == 0)
4159 chr += 8;
4160 else
4161 {
4162 do
4163 {
4164 if ((byte & 0x1) != 0)
4165 add_prefix_char(chr, chars, TRUE);
4166 byte >>= 1;
4167 chr++;
4168 }
4169 while (byte != 0);
4170 chr = (chr + 7) & ~7;
4171 }
4172 }
4173 while (chars->count != 255 && bytes < bytes_end);
4174 bytes = bytes_end - 32;
4175 }
4176
4177 consumed++;
4178 if (--max_chars == 0)
4179 return consumed;
4180 chars++;
4181 }
4182 while (--repeat > 0);
4183
4184 switch (*cc)
4185 {
4186 case OP_CRSTAR:
4187 case OP_CRMINSTAR:
4188 case OP_CRPOSSTAR:
4189 return consumed;
4190
4191 case OP_CRQUERY:
4192 case OP_CRMINQUERY:
4193 case OP_CRPOSQUERY:
4194 cc++;
4195 break;
4196
4197 case OP_CRRANGE:
4198 case OP_CRMINRANGE:
4199 case OP_CRPOSRANGE:
4200 if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
4201 return consumed;
4202 cc += 1 + 2 * IMM2_SIZE;
4203 break;
4204 }
4205
4206 repeat = 1;
4207 continue;
4208 }
4209
4210 len = 1;
4211 #ifdef SUPPORT_UNICODE
4212 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
4213 #endif
4214
4215 if (caseless && char_has_othercase(common, cc))
4216 {
4217 #ifdef SUPPORT_UNICODE
4218 if (common->utf)
4219 {
4220 GETCHAR(chr, cc);
4221 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
4222 return consumed;
4223 }
4224 else
4225 #endif
4226 {
4227 chr = *cc;
4228 othercase[0] = TABLE_GET(chr, common->fcc, chr);
4229 }
4230 }
4231 else
4232 {
4233 caseless = FALSE;
4234 othercase[0] = 0; /* Stops compiler warning - PH */
4235 }
4236
4237 len_save = len;
4238 cc_save = cc;
4239 while (TRUE)
4240 {
4241 oc = othercase;
4242 do
4243 {
4244 len--;
4245 consumed++;
4246
4247 chr = *cc;
4248 add_prefix_char(*cc, chars, len == 0);
4249
4250 if (caseless)
4251 add_prefix_char(*oc, chars, len == 0);
4252
4253 if (--max_chars == 0)
4254 return consumed;
4255 chars++;
4256 cc++;
4257 oc++;
4258 }
4259 while (len > 0);
4260
4261 if (--repeat == 0)
4262 break;
4263
4264 len = len_save;
4265 cc = cc_save;
4266 }
4267
4268 repeat = 1;
4269 if (last)
4270 return consumed;
4271 }
4272 }
4273
4274 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jumpto_if_not_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg,struct sljit_label * label)4275 static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
4276 {
4277 #if PCRE2_CODE_UNIT_WIDTH == 8
4278 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
4279 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
4280 #elif PCRE2_CODE_UNIT_WIDTH == 16
4281 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
4282 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
4283 #else
4284 #error "Unknown code width"
4285 #endif
4286 }
4287 #endif
4288
4289 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
4290
4291 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jump_if_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg)4292 static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg)
4293 {
4294 #if PCRE2_CODE_UNIT_WIDTH == 8
4295 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
4296 return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80);
4297 #elif PCRE2_CODE_UNIT_WIDTH == 16
4298 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
4299 return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00);
4300 #else
4301 #error "Unknown code width"
4302 #endif
4303 }
4304 #endif
4305
character_to_int32(PCRE2_UCHAR chr)4306 static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
4307 {
4308 sljit_s32 value = (sljit_s32)chr;
4309 #if PCRE2_CODE_UNIT_WIDTH == 8
4310 #define SSE2_COMPARE_TYPE_INDEX 0
4311 return (value << 24) | (value << 16) | (value << 8) | value;
4312 #elif PCRE2_CODE_UNIT_WIDTH == 16
4313 #define SSE2_COMPARE_TYPE_INDEX 1
4314 return (value << 16) | value;
4315 #elif PCRE2_CODE_UNIT_WIDTH == 32
4316 #define SSE2_COMPARE_TYPE_INDEX 2
4317 return value;
4318 #else
4319 #error "Unsupported unit width"
4320 #endif
4321 }
4322
load_from_mem_sse2(struct sljit_compiler * compiler,sljit_s32 dst_xmm_reg,sljit_s32 src_general_reg)4323 static void load_from_mem_sse2(struct sljit_compiler *compiler, sljit_s32 dst_xmm_reg, sljit_s32 src_general_reg)
4324 {
4325 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4326 sljit_u8 instruction[5];
4327 #else
4328 sljit_u8 instruction[4];
4329 #endif
4330
4331 SLJIT_ASSERT(dst_xmm_reg < 8);
4332
4333 /* MOVDQA xmm1, xmm2/m128 */
4334 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4335 if (src_general_reg < 8)
4336 {
4337 instruction[0] = 0x66;
4338 instruction[1] = 0x0f;
4339 instruction[2] = 0x6f;
4340 instruction[3] = (dst_xmm_reg << 3) | src_general_reg;
4341 sljit_emit_op_custom(compiler, instruction, 4);
4342 }
4343 else
4344 {
4345 instruction[0] = 0x66;
4346 instruction[1] = 0x41;
4347 instruction[2] = 0x0f;
4348 instruction[3] = 0x6f;
4349 instruction[4] = (dst_xmm_reg << 3) | (src_general_reg & 0x7);
4350 sljit_emit_op_custom(compiler, instruction, 4);
4351 }
4352 #else
4353 instruction[0] = 0x66;
4354 instruction[1] = 0x0f;
4355 instruction[2] = 0x6f;
4356 instruction[3] = (dst_xmm_reg << 3) | src_general_reg;
4357 sljit_emit_op_custom(compiler, instruction, 4);
4358 #endif
4359 }
4360
fast_forward_char_pair_sse2_compare(struct sljit_compiler * compiler,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_u32 bit,sljit_s32 dst_ind,sljit_s32 cmp1_ind,sljit_s32 cmp2_ind,sljit_s32 tmp_ind)4361 static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, PCRE2_UCHAR char1, PCRE2_UCHAR char2,
4362 sljit_u32 bit, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
4363 {
4364 sljit_u8 instruction[4];
4365 instruction[0] = 0x66;
4366 instruction[1] = 0x0f;
4367
4368 if (char1 == char2 || bit != 0)
4369 {
4370 if (bit != 0)
4371 {
4372 /* POR xmm1, xmm2/m128 */
4373 /* instruction[0] = 0x66; */
4374 /* instruction[1] = 0x0f; */
4375 instruction[2] = 0xeb;
4376 instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind;
4377 sljit_emit_op_custom(compiler, instruction, 4);
4378 }
4379
4380 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4381 /* instruction[0] = 0x66; */
4382 /* instruction[1] = 0x0f; */
4383 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4384 instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
4385 sljit_emit_op_custom(compiler, instruction, 4);
4386 }
4387 else
4388 {
4389 /* MOVDQA xmm1, xmm2/m128 */
4390 /* instruction[0] = 0x66; */
4391 /* instruction[1] = 0x0f; */
4392 instruction[2] = 0x6f;
4393 instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind;
4394 sljit_emit_op_custom(compiler, instruction, 4);
4395
4396 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4397 /* instruction[0] = 0x66; */
4398 /* instruction[1] = 0x0f; */
4399 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4400 instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
4401 sljit_emit_op_custom(compiler, instruction, 4);
4402
4403 instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind;
4404 sljit_emit_op_custom(compiler, instruction, 4);
4405
4406 /* POR xmm1, xmm2/m128 */
4407 /* instruction[0] = 0x66; */
4408 /* instruction[1] = 0x0f; */
4409 instruction[2] = 0xeb;
4410 instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind;
4411 sljit_emit_op_custom(compiler, instruction, 4);
4412 }
4413 }
4414
fast_forward_first_char2_sse2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)4415 static void fast_forward_first_char2_sse2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
4416 {
4417 DEFINE_COMPILER;
4418 struct sljit_label *start;
4419 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4420 struct sljit_label *restart;
4421 #endif
4422 struct sljit_jump *quit;
4423 struct sljit_jump *partial_quit[2];
4424 sljit_u8 instruction[8];
4425 sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
4426 sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
4427 sljit_s32 data_ind = 0;
4428 sljit_s32 tmp_ind = 1;
4429 sljit_s32 cmp1_ind = 2;
4430 sljit_s32 cmp2_ind = 3;
4431 sljit_u32 bit = 0;
4432
4433 SLJIT_UNUSED_ARG(offset);
4434
4435 if (char1 != char2)
4436 {
4437 bit = char1 ^ char2;
4438 if (!is_powerof2(bit))
4439 bit = 0;
4440 }
4441
4442 partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4443 if (common->mode == PCRE2_JIT_COMPLETE)
4444 add_jump(compiler, &common->failed_match, partial_quit[0]);
4445
4446 /* First part (unaligned start) */
4447
4448 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
4449
4450 SLJIT_ASSERT(tmp1_ind < 8);
4451
4452 /* MOVD xmm, r/m32 */
4453 instruction[0] = 0x66;
4454 instruction[1] = 0x0f;
4455 instruction[2] = 0x6e;
4456 instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_ind;
4457 sljit_emit_op_custom(compiler, instruction, 4);
4458
4459 if (char1 != char2)
4460 {
4461 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
4462
4463 /* MOVD xmm, r/m32 */
4464 instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_ind;
4465 sljit_emit_op_custom(compiler, instruction, 4);
4466 }
4467
4468 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
4469
4470 /* PSHUFD xmm1, xmm2/m128, imm8 */
4471 /* instruction[0] = 0x66; */
4472 /* instruction[1] = 0x0f; */
4473 instruction[2] = 0x70;
4474 instruction[3] = 0xc0 | (cmp1_ind << 3) | 2;
4475 instruction[4] = 0;
4476 sljit_emit_op_custom(compiler, instruction, 5);
4477
4478 if (char1 != char2)
4479 {
4480 /* PSHUFD xmm1, xmm2/m128, imm8 */
4481 instruction[3] = 0xc0 | (cmp2_ind << 3) | 3;
4482 sljit_emit_op_custom(compiler, instruction, 5);
4483 }
4484
4485 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4486 restart = LABEL();
4487 #endif
4488 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
4489 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
4490
4491 load_from_mem_sse2(compiler, data_ind, str_ptr_ind);
4492 fast_forward_char_pair_sse2_compare(compiler, char1, char2, bit, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
4493
4494 /* PMOVMSKB reg, xmm */
4495 /* instruction[0] = 0x66; */
4496 /* instruction[1] = 0x0f; */
4497 instruction[2] = 0xd7;
4498 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4499 sljit_emit_op_custom(compiler, instruction, 4);
4500
4501 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4502 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
4503
4504 /* BSF r32, r/m32 */
4505 instruction[0] = 0x0f;
4506 instruction[1] = 0xbc;
4507 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4508 sljit_emit_op_custom(compiler, instruction, 3);
4509 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4510
4511 quit = JUMP(SLJIT_NOT_ZERO);
4512
4513 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4514
4515 start = LABEL();
4516 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
4517
4518 partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4519 if (common->mode == PCRE2_JIT_COMPLETE)
4520 add_jump(compiler, &common->failed_match, partial_quit[1]);
4521
4522 /* Second part (aligned) */
4523
4524 load_from_mem_sse2(compiler, 0, str_ptr_ind);
4525 fast_forward_char_pair_sse2_compare(compiler, char1, char2, bit, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
4526
4527 /* PMOVMSKB reg, xmm */
4528 instruction[0] = 0x66;
4529 instruction[1] = 0x0f;
4530 instruction[2] = 0xd7;
4531 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4532 sljit_emit_op_custom(compiler, instruction, 4);
4533
4534 /* BSF r32, r/m32 */
4535 instruction[0] = 0x0f;
4536 instruction[1] = 0xbc;
4537 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4538 sljit_emit_op_custom(compiler, instruction, 3);
4539 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4540
4541 JUMPTO(SLJIT_ZERO, start);
4542
4543 JUMPHERE(quit);
4544 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4545
4546 if (common->mode != PCRE2_JIT_COMPLETE)
4547 {
4548 JUMPHERE(partial_quit[0]);
4549 JUMPHERE(partial_quit[1]);
4550 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
4551 CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
4552 }
4553 else
4554 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4555
4556 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4557 if (common->utf && offset > 0)
4558 {
4559 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
4560
4561 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4562
4563 quit = jump_if_utf_char_start(compiler, TMP1);
4564
4565 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4566 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4567 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
4568 JUMPTO(SLJIT_JUMP, restart);
4569
4570 JUMPHERE(quit);
4571 }
4572 #endif
4573 }
4574
4575 #ifndef _WIN64
4576
max_fast_forward_char_pair_sse2_offset(void)4577 static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_sse2_offset(void)
4578 {
4579 #if PCRE2_CODE_UNIT_WIDTH == 8
4580 return 15;
4581 #elif PCRE2_CODE_UNIT_WIDTH == 16
4582 return 7;
4583 #elif PCRE2_CODE_UNIT_WIDTH == 32
4584 return 3;
4585 #else
4586 #error "Unsupported unit width"
4587 #endif
4588 }
4589
fast_forward_char_pair_sse2(compiler_common * common,sljit_s32 offs1,PCRE2_UCHAR char1a,PCRE2_UCHAR char1b,sljit_s32 offs2,PCRE2_UCHAR char2a,PCRE2_UCHAR char2b)4590 static void fast_forward_char_pair_sse2(compiler_common *common, sljit_s32 offs1,
4591 PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
4592 {
4593 DEFINE_COMPILER;
4594 sljit_u32 bit1 = 0;
4595 sljit_u32 bit2 = 0;
4596 sljit_u32 diff = IN_UCHARS(offs1 - offs2);
4597 sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
4598 sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
4599 sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
4600 sljit_s32 data1_ind = 0;
4601 sljit_s32 data2_ind = 1;
4602 sljit_s32 tmp_ind = 2;
4603 sljit_s32 cmp1a_ind = 3;
4604 sljit_s32 cmp1b_ind = 4;
4605 sljit_s32 cmp2a_ind = 5;
4606 sljit_s32 cmp2b_ind = 6;
4607 struct sljit_label *start;
4608 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4609 struct sljit_label *restart;
4610 #endif
4611 struct sljit_jump *jump[2];
4612
4613 sljit_u8 instruction[8];
4614
4615 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
4616 SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_sse2_offset()));
4617 SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
4618
4619 /* Initialize. */
4620 if (common->match_end_ptr != 0)
4621 {
4622 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4623 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4624 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
4625
4626 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
4627 CMOV(SLJIT_LESS, STR_END, TMP1, 0);
4628 }
4629
4630 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
4631 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4632
4633 /* MOVD xmm, r/m32 */
4634 instruction[0] = 0x66;
4635 instruction[1] = 0x0f;
4636 instruction[2] = 0x6e;
4637
4638 if (char1a == char1b)
4639 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
4640 else
4641 {
4642 bit1 = char1a ^ char1b;
4643 if (is_powerof2(bit1))
4644 {
4645 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1));
4646 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1));
4647 }
4648 else
4649 {
4650 bit1 = 0;
4651 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
4652 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b));
4653 }
4654 }
4655
4656 instruction[3] = 0xc0 | (cmp1a_ind << 3) | tmp1_ind;
4657 sljit_emit_op_custom(compiler, instruction, 4);
4658
4659 if (char1a != char1b)
4660 {
4661 instruction[3] = 0xc0 | (cmp1b_ind << 3) | tmp2_ind;
4662 sljit_emit_op_custom(compiler, instruction, 4);
4663 }
4664
4665 if (char2a == char2b)
4666 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
4667 else
4668 {
4669 bit2 = char2a ^ char2b;
4670 if (is_powerof2(bit2))
4671 {
4672 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2));
4673 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2));
4674 }
4675 else
4676 {
4677 bit2 = 0;
4678 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
4679 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b));
4680 }
4681 }
4682
4683 instruction[3] = 0xc0 | (cmp2a_ind << 3) | tmp1_ind;
4684 sljit_emit_op_custom(compiler, instruction, 4);
4685
4686 if (char2a != char2b)
4687 {
4688 instruction[3] = 0xc0 | (cmp2b_ind << 3) | tmp2_ind;
4689 sljit_emit_op_custom(compiler, instruction, 4);
4690 }
4691
4692 /* PSHUFD xmm1, xmm2/m128, imm8 */
4693 /* instruction[0] = 0x66; */
4694 /* instruction[1] = 0x0f; */
4695 instruction[2] = 0x70;
4696 instruction[4] = 0;
4697
4698 instruction[3] = 0xc0 | (cmp1a_ind << 3) | cmp1a_ind;
4699 sljit_emit_op_custom(compiler, instruction, 5);
4700
4701 if (char1a != char1b)
4702 {
4703 instruction[3] = 0xc0 | (cmp1b_ind << 3) | cmp1b_ind;
4704 sljit_emit_op_custom(compiler, instruction, 5);
4705 }
4706
4707 instruction[3] = 0xc0 | (cmp2a_ind << 3) | cmp2a_ind;
4708 sljit_emit_op_custom(compiler, instruction, 5);
4709
4710 if (char2a != char2b)
4711 {
4712 instruction[3] = 0xc0 | (cmp2b_ind << 3) | cmp2b_ind;
4713 sljit_emit_op_custom(compiler, instruction, 5);
4714 }
4715
4716 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4717 restart = LABEL();
4718 #endif
4719
4720 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1 - offs2));
4721 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
4722 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
4723 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, ~0xf);
4724
4725 load_from_mem_sse2(compiler, data1_ind, str_ptr_ind);
4726
4727 jump[0] = CMP(SLJIT_EQUAL, STR_PTR, 0, TMP1, 0);
4728
4729 load_from_mem_sse2(compiler, data2_ind, tmp1_ind);
4730
4731 /* MOVDQA xmm1, xmm2/m128 */
4732 /* instruction[0] = 0x66; */
4733 /* instruction[1] = 0x0f; */
4734 instruction[2] = 0x6f;
4735 instruction[3] = 0xc0 | (tmp_ind << 3) | data1_ind;
4736 sljit_emit_op_custom(compiler, instruction, 4);
4737
4738 /* PSLLDQ xmm1, xmm2/m128, imm8 */
4739 /* instruction[0] = 0x66; */
4740 /* instruction[1] = 0x0f; */
4741 instruction[2] = 0x73;
4742 instruction[3] = 0xc0 | (7 << 3) | tmp_ind;
4743 instruction[4] = diff;
4744 sljit_emit_op_custom(compiler, instruction, 5);
4745
4746 /* PSRLDQ xmm1, xmm2/m128, imm8 */
4747 /* instruction[0] = 0x66; */
4748 /* instruction[1] = 0x0f; */
4749 /* instruction[2] = 0x73; */
4750 instruction[3] = 0xc0 | (3 << 3) | data2_ind;
4751 instruction[4] = 16 - diff;
4752 sljit_emit_op_custom(compiler, instruction, 5);
4753
4754 /* POR xmm1, xmm2/m128 */
4755 /* instruction[0] = 0x66; */
4756 /* instruction[1] = 0x0f; */
4757 instruction[2] = 0xeb;
4758 instruction[3] = 0xc0 | (data2_ind << 3) | tmp_ind;
4759 sljit_emit_op_custom(compiler, instruction, 4);
4760
4761 jump[1] = JUMP(SLJIT_JUMP);
4762
4763 JUMPHERE(jump[0]);
4764
4765 /* MOVDQA xmm1, xmm2/m128 */
4766 /* instruction[0] = 0x66; */
4767 /* instruction[1] = 0x0f; */
4768 instruction[2] = 0x6f;
4769 instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind;
4770 sljit_emit_op_custom(compiler, instruction, 4);
4771
4772 /* PSLLDQ xmm1, xmm2/m128, imm8 */
4773 /* instruction[0] = 0x66; */
4774 /* instruction[1] = 0x0f; */
4775 instruction[2] = 0x73;
4776 instruction[3] = 0xc0 | (7 << 3) | data2_ind;
4777 instruction[4] = diff;
4778 sljit_emit_op_custom(compiler, instruction, 5);
4779
4780 JUMPHERE(jump[1]);
4781
4782 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
4783
4784 fast_forward_char_pair_sse2_compare(compiler, char2a, char2b, bit2, data2_ind, cmp2a_ind, cmp2b_ind, tmp_ind);
4785 fast_forward_char_pair_sse2_compare(compiler, char1a, char1b, bit1, data1_ind, cmp1a_ind, cmp1b_ind, tmp_ind);
4786
4787 /* PAND xmm1, xmm2/m128 */
4788 /* instruction[0] = 0x66; */
4789 /* instruction[1] = 0x0f; */
4790 instruction[2] = 0xdb;
4791 instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
4792 sljit_emit_op_custom(compiler, instruction, 4);
4793
4794 /* PMOVMSKB reg, xmm */
4795 /* instruction[0] = 0x66; */
4796 /* instruction[1] = 0x0f; */
4797 instruction[2] = 0xd7;
4798 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4799 sljit_emit_op_custom(compiler, instruction, 4);
4800
4801 /* Ignore matches before the first STR_PTR. */
4802 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4803 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
4804
4805 /* BSF r32, r/m32 */
4806 instruction[0] = 0x0f;
4807 instruction[1] = 0xbc;
4808 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4809 sljit_emit_op_custom(compiler, instruction, 3);
4810 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4811
4812 jump[0] = JUMP(SLJIT_NOT_ZERO);
4813
4814 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4815
4816 /* Main loop. */
4817 instruction[0] = 0x66;
4818 instruction[1] = 0x0f;
4819
4820 start = LABEL();
4821
4822 load_from_mem_sse2(compiler, data2_ind, str_ptr_ind);
4823
4824 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
4825 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4826
4827 load_from_mem_sse2(compiler, data1_ind, str_ptr_ind);
4828
4829 /* PSRLDQ xmm1, xmm2/m128, imm8 */
4830 /* instruction[0] = 0x66; */
4831 /* instruction[1] = 0x0f; */
4832 instruction[2] = 0x73;
4833 instruction[3] = 0xc0 | (3 << 3) | data2_ind;
4834 instruction[4] = 16 - diff;
4835 sljit_emit_op_custom(compiler, instruction, 5);
4836
4837 /* MOVDQA xmm1, xmm2/m128 */
4838 /* instruction[0] = 0x66; */
4839 /* instruction[1] = 0x0f; */
4840 instruction[2] = 0x6f;
4841 instruction[3] = 0xc0 | (tmp_ind << 3) | data1_ind;
4842 sljit_emit_op_custom(compiler, instruction, 4);
4843
4844 /* PSLLDQ xmm1, xmm2/m128, imm8 */
4845 /* instruction[0] = 0x66; */
4846 /* instruction[1] = 0x0f; */
4847 instruction[2] = 0x73;
4848 instruction[3] = 0xc0 | (7 << 3) | tmp_ind;
4849 instruction[4] = diff;
4850 sljit_emit_op_custom(compiler, instruction, 5);
4851
4852 /* POR xmm1, xmm2/m128 */
4853 /* instruction[0] = 0x66; */
4854 /* instruction[1] = 0x0f; */
4855 instruction[2] = 0xeb;
4856 instruction[3] = 0xc0 | (data2_ind << 3) | tmp_ind;
4857 sljit_emit_op_custom(compiler, instruction, 4);
4858
4859 fast_forward_char_pair_sse2_compare(compiler, char1a, char1b, bit1, data1_ind, cmp1a_ind, cmp1b_ind, tmp_ind);
4860 fast_forward_char_pair_sse2_compare(compiler, char2a, char2b, bit2, data2_ind, cmp2a_ind, cmp2b_ind, tmp_ind);
4861
4862 /* PAND xmm1, xmm2/m128 */
4863 /* instruction[0] = 0x66; */
4864 /* instruction[1] = 0x0f; */
4865 instruction[2] = 0xdb;
4866 instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
4867 sljit_emit_op_custom(compiler, instruction, 4);
4868
4869 /* PMOVMSKB reg, xmm */
4870 /* instruction[0] = 0x66; */
4871 /* instruction[1] = 0x0f; */
4872 instruction[2] = 0xd7;
4873 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4874 sljit_emit_op_custom(compiler, instruction, 4);
4875
4876 /* BSF r32, r/m32 */
4877 instruction[0] = 0x0f;
4878 instruction[1] = 0xbc;
4879 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4880 sljit_emit_op_custom(compiler, instruction, 3);
4881 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4882
4883 JUMPTO(SLJIT_ZERO, start);
4884
4885 JUMPHERE(jump[0]);
4886
4887 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4888
4889 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4890
4891 if (common->match_end_ptr != 0)
4892 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4893
4894 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4895 if (common->utf)
4896 {
4897 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
4898
4899 jump[0] = jump_if_utf_char_start(compiler, TMP1);
4900
4901 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4902 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
4903
4904 add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
4905
4906 JUMPHERE(jump[0]);
4907 }
4908 #endif
4909
4910 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
4911
4912 if (common->match_end_ptr != 0)
4913 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4914 }
4915
check_fast_forward_char_pair_sse2(compiler_common * common,fast_forward_char_data * chars,int max)4916 static BOOL check_fast_forward_char_pair_sse2(compiler_common *common, fast_forward_char_data *chars, int max)
4917 {
4918 sljit_s32 i, j, priority, count;
4919 sljit_u32 priorities;
4920 PCRE2_UCHAR a1, a2, b1, b2;
4921
4922 priorities = 0;
4923
4924 count = 0;
4925 for (i = 0; i < max; i++)
4926 {
4927 if (chars[i].last_count > 2)
4928 {
4929 SLJIT_ASSERT(chars[i].last_count <= 7);
4930
4931 priorities |= (1 << chars[i].last_count);
4932 count++;
4933 }
4934 }
4935
4936 if (count < 2)
4937 return FALSE;
4938
4939 for (priority = 7; priority > 2; priority--)
4940 {
4941 if ((priorities & (1 << priority)) == 0)
4942 continue;
4943
4944 for (i = max - 1; i >= 1; i--)
4945 if (chars[i].last_count >= priority)
4946 {
4947 SLJIT_ASSERT(chars[i].count <= 2 && chars[i].count >= 1);
4948
4949 a1 = chars[i].chars[0];
4950 a2 = chars[i].chars[1];
4951
4952 j = i - max_fast_forward_char_pair_sse2_offset();
4953 if (j < 0)
4954 j = 0;
4955
4956 while (j < i)
4957 {
4958 if (chars[j].last_count >= priority)
4959 {
4960 b1 = chars[j].chars[0];
4961 b2 = chars[j].chars[1];
4962
4963 if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
4964 {
4965 fast_forward_char_pair_sse2(common, i, a1, a2, j, b1, b2);
4966 return TRUE;
4967 }
4968 }
4969 j++;
4970 }
4971 }
4972 }
4973
4974 return FALSE;
4975 }
4976
4977 #endif
4978
4979 #undef SSE2_COMPARE_TYPE_INDEX
4980
4981 #endif
4982
fast_forward_first_char2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)4983 static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
4984 {
4985 DEFINE_COMPILER;
4986 struct sljit_label *start;
4987 struct sljit_jump *match;
4988 struct sljit_jump *partial_quit;
4989 PCRE2_UCHAR mask;
4990 BOOL has_match_end = (common->match_end_ptr != 0);
4991
4992 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
4993
4994 if (has_match_end)
4995 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4996
4997 if (offset > 0)
4998 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4999
5000 if (has_match_end)
5001 {
5002 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5003
5004 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5005 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
5006 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
5007 }
5008
5009 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
5010
5011 /* SSE2 accelerated first character search. */
5012
5013 if (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
5014 {
5015 fast_forward_first_char2_sse2(common, char1, char2, offset);
5016
5017 if (offset > 0)
5018 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5019
5020 if (has_match_end)
5021 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5022 return;
5023 }
5024
5025 #endif
5026
5027 start = LABEL();
5028
5029 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5030 if (common->mode == PCRE2_JIT_COMPLETE)
5031 add_jump(compiler, &common->failed_match, partial_quit);
5032
5033 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5034 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5035
5036 if (char1 == char2)
5037 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
5038 else
5039 {
5040 mask = char1 ^ char2;
5041 if (is_powerof2(mask))
5042 {
5043 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
5044 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
5045 }
5046 else
5047 {
5048 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
5049 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
5050 JUMPHERE(match);
5051 }
5052 }
5053
5054 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5055 if (common->utf && offset > 0)
5056 {
5057 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
5058 jumpto_if_not_utf_char_start(compiler, TMP1, start);
5059 }
5060 #endif
5061
5062 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5063
5064 if (common->mode != PCRE2_JIT_COMPLETE)
5065 JUMPHERE(partial_quit);
5066
5067 if (has_match_end)
5068 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5069 }
5070
fast_forward_first_n_chars(compiler_common * common)5071 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
5072 {
5073 DEFINE_COMPILER;
5074 struct sljit_label *start;
5075 struct sljit_jump *match;
5076 fast_forward_char_data chars[MAX_N_CHARS];
5077 sljit_s32 offset;
5078 PCRE2_UCHAR mask;
5079 PCRE2_UCHAR *char_set, *char_set_end;
5080 int i, max, from;
5081 int range_right = -1, range_len;
5082 sljit_u8 *update_table = NULL;
5083 BOOL in_range;
5084 sljit_u32 rec_count;
5085
5086 for (i = 0; i < MAX_N_CHARS; i++)
5087 {
5088 chars[i].count = 0;
5089 chars[i].last_count = 0;
5090 }
5091
5092 rec_count = 10000;
5093 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
5094
5095 if (max < 1)
5096 return FALSE;
5097
5098 /* Convert last_count to priority. */
5099 for (i = 0; i < max; i++)
5100 {
5101 SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
5102
5103 if (chars[i].count == 1)
5104 {
5105 chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
5106 /* Simplifies algorithms later. */
5107 chars[i].chars[1] = chars[i].chars[0];
5108 }
5109 else if (chars[i].count == 2)
5110 {
5111 SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
5112
5113 if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
5114 chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
5115 else
5116 chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
5117 }
5118 else
5119 chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
5120 }
5121
5122 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) && !(defined _WIN64)
5123 if (check_fast_forward_char_pair_sse2(common, chars, max))
5124 return TRUE;
5125 #endif
5126
5127 in_range = FALSE;
5128 /* Prevent compiler "uninitialized" warning */
5129 from = 0;
5130 range_len = 4 /* minimum length */ - 1;
5131 for (i = 0; i <= max; i++)
5132 {
5133 if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
5134 {
5135 range_len = i - from;
5136 range_right = i - 1;
5137 }
5138
5139 if (i < max && chars[i].count < 255)
5140 {
5141 SLJIT_ASSERT(chars[i].count > 0);
5142 if (!in_range)
5143 {
5144 in_range = TRUE;
5145 from = i;
5146 }
5147 }
5148 else
5149 in_range = FALSE;
5150 }
5151
5152 if (range_right >= 0)
5153 {
5154 update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
5155 if (update_table == NULL)
5156 return TRUE;
5157 memset(update_table, IN_UCHARS(range_len), 256);
5158
5159 for (i = 0; i < range_len; i++)
5160 {
5161 SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
5162
5163 char_set = chars[range_right - i].chars;
5164 char_set_end = char_set + chars[range_right - i].count;
5165 do
5166 {
5167 if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
5168 update_table[(*char_set) & 0xff] = IN_UCHARS(i);
5169 char_set++;
5170 }
5171 while (char_set < char_set_end);
5172 }
5173 }
5174
5175 offset = -1;
5176 /* Scan forward. */
5177 for (i = 0; i < max; i++)
5178 {
5179 if (range_right == i)
5180 continue;
5181
5182 if (offset == -1)
5183 {
5184 if (chars[i].last_count >= 2)
5185 offset = i;
5186 }
5187 else if (chars[offset].last_count < chars[i].last_count)
5188 offset = i;
5189 }
5190
5191 SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
5192
5193 if (range_right < 0)
5194 {
5195 if (offset < 0)
5196 return FALSE;
5197 /* Works regardless the value is 1 or 2. */
5198 fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
5199 return TRUE;
5200 }
5201
5202 SLJIT_ASSERT(range_right != offset);
5203
5204 if (common->match_end_ptr != 0)
5205 {
5206 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5207 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5208 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
5209 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
5210 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
5211 }
5212 else
5213 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
5214
5215 SLJIT_ASSERT(range_right >= 0);
5216
5217 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
5218 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
5219 #endif
5220
5221 start = LABEL();
5222 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5223
5224 #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
5225 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
5226 #else
5227 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
5228 #endif
5229
5230 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
5231 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
5232 #else
5233 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
5234 #endif
5235 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5236 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
5237
5238 if (offset >= 0)
5239 {
5240 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
5241 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5242
5243 if (chars[offset].count == 1)
5244 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
5245 else
5246 {
5247 mask = chars[offset].chars[0] ^ chars[offset].chars[1];
5248 if (is_powerof2(mask))
5249 {
5250 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
5251 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
5252 }
5253 else
5254 {
5255 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
5256 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
5257 JUMPHERE(match);
5258 }
5259 }
5260 }
5261
5262 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5263 if (common->utf && offset != 0)
5264 {
5265 if (offset < 0)
5266 {
5267 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5268 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5269 }
5270 else
5271 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5272
5273 jumpto_if_not_utf_char_start(compiler, TMP1, start);
5274
5275 if (offset < 0)
5276 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5277 }
5278 #endif
5279
5280 if (offset >= 0)
5281 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5282
5283 if (common->match_end_ptr != 0)
5284 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5285 else
5286 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
5287 return TRUE;
5288 }
5289
fast_forward_first_char(compiler_common * common)5290 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
5291 {
5292 PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
5293 PCRE2_UCHAR oc;
5294
5295 oc = first_char;
5296 if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
5297 {
5298 oc = TABLE_GET(first_char, common->fcc, first_char);
5299 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
5300 if (first_char > 127 && common->utf)
5301 oc = UCD_OTHERCASE(first_char);
5302 #endif
5303 }
5304
5305 fast_forward_first_char2(common, first_char, oc, 0);
5306 }
5307
fast_forward_newline(compiler_common * common)5308 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
5309 {
5310 DEFINE_COMPILER;
5311 struct sljit_label *loop;
5312 struct sljit_jump *lastchar;
5313 struct sljit_jump *firstchar;
5314 struct sljit_jump *quit;
5315 struct sljit_jump *foundcr = NULL;
5316 struct sljit_jump *notfoundnl;
5317 jump_list *newline = NULL;
5318
5319 if (common->match_end_ptr != 0)
5320 {
5321 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5322 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5323 }
5324
5325 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5326 {
5327 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5328 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5329 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5330 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5331 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
5332
5333 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
5334 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
5335 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
5336 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5337 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
5338 #endif
5339 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5340
5341 loop = LABEL();
5342 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5343 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5344 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5345 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5346 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
5347 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
5348
5349 JUMPHERE(quit);
5350 JUMPHERE(firstchar);
5351 JUMPHERE(lastchar);
5352
5353 if (common->match_end_ptr != 0)
5354 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5355 return;
5356 }
5357
5358 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5359 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5360 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
5361 skip_char_back(common);
5362
5363 loop = LABEL();
5364 common->ff_newline_shortcut = loop;
5365
5366 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5367 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5368 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
5369 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5370 check_newlinechar(common, common->nltype, &newline, FALSE);
5371 set_jumps(newline, loop);
5372
5373 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
5374 {
5375 quit = JUMP(SLJIT_JUMP);
5376 JUMPHERE(foundcr);
5377 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5378 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5379 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
5380 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5381 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5382 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5383 #endif
5384 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5385 JUMPHERE(notfoundnl);
5386 JUMPHERE(quit);
5387 }
5388 JUMPHERE(lastchar);
5389 JUMPHERE(firstchar);
5390
5391 if (common->match_end_ptr != 0)
5392 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5393 }
5394
5395 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
5396
fast_forward_start_bits(compiler_common * common)5397 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
5398 {
5399 DEFINE_COMPILER;
5400 const sljit_u8 *start_bits = common->re->start_bitmap;
5401 struct sljit_label *start;
5402 struct sljit_jump *partial_quit;
5403 #if PCRE2_CODE_UNIT_WIDTH != 8
5404 struct sljit_jump *found = NULL;
5405 #endif
5406 jump_list *matches = NULL;
5407
5408 if (common->match_end_ptr != 0)
5409 {
5410 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5411 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
5412 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5413 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
5414 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
5415 }
5416
5417 start = LABEL();
5418
5419 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5420 if (common->mode == PCRE2_JIT_COMPLETE)
5421 add_jump(compiler, &common->failed_match, partial_quit);
5422
5423 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5424 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5425
5426 if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
5427 {
5428 #if PCRE2_CODE_UNIT_WIDTH != 8
5429 if ((start_bits[31] & 0x80) != 0)
5430 found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
5431 else
5432 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
5433 #elif defined SUPPORT_UNICODE
5434 if (common->utf && is_char7_bitset(start_bits, FALSE))
5435 CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
5436 #endif
5437 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5438 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5439 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
5440 if (sljit_get_register_index(TMP3) >= 0)
5441 {
5442 OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
5443 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0);
5444 }
5445 else
5446 {
5447 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5448 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5449 }
5450 JUMPTO(SLJIT_ZERO, start);
5451 }
5452 else
5453 set_jumps(matches, start);
5454
5455 #if PCRE2_CODE_UNIT_WIDTH != 8
5456 if (found != NULL)
5457 JUMPHERE(found);
5458 #endif
5459
5460 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5461
5462 if (common->mode != PCRE2_JIT_COMPLETE)
5463 JUMPHERE(partial_quit);
5464
5465 if (common->match_end_ptr != 0)
5466 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
5467 }
5468
search_requested_char(compiler_common * common,PCRE2_UCHAR req_char,BOOL caseless,BOOL has_firstchar)5469 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
5470 {
5471 DEFINE_COMPILER;
5472 struct sljit_label *loop;
5473 struct sljit_jump *toolong;
5474 struct sljit_jump *alreadyfound;
5475 struct sljit_jump *found;
5476 struct sljit_jump *foundoc = NULL;
5477 struct sljit_jump *notfound;
5478 sljit_u32 oc, bit;
5479
5480 SLJIT_ASSERT(common->req_char_ptr != 0);
5481 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
5482 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_CU_MAX);
5483 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
5484 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
5485
5486 if (has_firstchar)
5487 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5488 else
5489 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
5490
5491 loop = LABEL();
5492 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
5493
5494 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
5495 oc = req_char;
5496 if (caseless)
5497 {
5498 oc = TABLE_GET(req_char, common->fcc, req_char);
5499 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
5500 if (req_char > 127 && common->utf)
5501 oc = UCD_OTHERCASE(req_char);
5502 #endif
5503 }
5504 if (req_char == oc)
5505 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
5506 else
5507 {
5508 bit = req_char ^ oc;
5509 if (is_powerof2(bit))
5510 {
5511 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
5512 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
5513 }
5514 else
5515 {
5516 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
5517 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
5518 }
5519 }
5520 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5521 JUMPTO(SLJIT_JUMP, loop);
5522
5523 JUMPHERE(found);
5524 if (foundoc)
5525 JUMPHERE(foundoc);
5526 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
5527 JUMPHERE(alreadyfound);
5528 JUMPHERE(toolong);
5529 return notfound;
5530 }
5531
do_revertframes(compiler_common * common)5532 static void do_revertframes(compiler_common *common)
5533 {
5534 DEFINE_COMPILER;
5535 struct sljit_jump *jump;
5536 struct sljit_label *mainloop;
5537
5538 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5539 GET_LOCAL_BASE(TMP1, 0, 0);
5540
5541 /* Drop frames until we reach STACK_TOP. */
5542 mainloop = LABEL();
5543 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
5544 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5545
5546 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5547 if (sljit_get_register_index (TMP3) < 0)
5548 {
5549 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
5550 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
5551 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
5552 }
5553 else
5554 {
5555 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
5556 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
5557 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
5558 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
5559 GET_LOCAL_BASE(TMP1, 0, 0);
5560 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
5561 }
5562 JUMPTO(SLJIT_JUMP, mainloop);
5563
5564 JUMPHERE(jump);
5565 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
5566 /* End of reverting values. */
5567 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5568
5569 JUMPHERE(jump);
5570 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
5571 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5572 if (sljit_get_register_index (TMP3) < 0)
5573 {
5574 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
5575 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
5576 }
5577 else
5578 {
5579 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
5580 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
5581 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
5582 }
5583 JUMPTO(SLJIT_JUMP, mainloop);
5584 }
5585
check_wordboundary(compiler_common * common)5586 static void check_wordboundary(compiler_common *common)
5587 {
5588 DEFINE_COMPILER;
5589 struct sljit_jump *skipread;
5590 jump_list *skipread_list = NULL;
5591 #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
5592 struct sljit_jump *jump;
5593 #endif
5594
5595 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
5596
5597 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5598 /* Get type of the previous char, and put it to LOCALS1. */
5599 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5600 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5601 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
5602 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
5603 skip_char_back(common);
5604 check_start_used_ptr(common);
5605 read_char(common);
5606
5607 /* Testing char type. */
5608 #ifdef SUPPORT_UNICODE
5609 if (common->use_ucp)
5610 {
5611 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
5612 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
5613 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5614 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
5615 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5616 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5617 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
5618 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5619 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5620 JUMPHERE(jump);
5621 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
5622 }
5623 else
5624 #endif
5625 {
5626 #if PCRE2_CODE_UNIT_WIDTH != 8
5627 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5628 #elif defined SUPPORT_UNICODE
5629 /* Here LOCALS1 has already been zeroed. */
5630 jump = NULL;
5631 if (common->utf)
5632 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5633 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
5634 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
5635 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
5636 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5637 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
5638 #if PCRE2_CODE_UNIT_WIDTH != 8
5639 JUMPHERE(jump);
5640 #elif defined SUPPORT_UNICODE
5641 if (jump != NULL)
5642 JUMPHERE(jump);
5643 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
5644 }
5645 JUMPHERE(skipread);
5646
5647 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
5648 check_str_end(common, &skipread_list);
5649 peek_char(common, READ_CHAR_MAX);
5650
5651 /* Testing char type. This is a code duplication. */
5652 #ifdef SUPPORT_UNICODE
5653 if (common->use_ucp)
5654 {
5655 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
5656 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
5657 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5658 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
5659 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5660 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5661 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
5662 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5663 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5664 JUMPHERE(jump);
5665 }
5666 else
5667 #endif
5668 {
5669 #if PCRE2_CODE_UNIT_WIDTH != 8
5670 /* TMP2 may be destroyed by peek_char. */
5671 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
5672 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5673 #elif defined SUPPORT_UNICODE
5674 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
5675 jump = NULL;
5676 if (common->utf)
5677 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5678 #endif
5679 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
5680 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
5681 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5682 #if PCRE2_CODE_UNIT_WIDTH != 8
5683 JUMPHERE(jump);
5684 #elif defined SUPPORT_UNICODE
5685 if (jump != NULL)
5686 JUMPHERE(jump);
5687 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
5688 }
5689 set_jumps(skipread_list, LABEL());
5690
5691 OP2(SLJIT_XOR | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5692 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5693 }
5694
optimize_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)5695 static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
5696 {
5697 /* May destroy TMP1. */
5698 DEFINE_COMPILER;
5699 int ranges[MAX_CLASS_RANGE_SIZE];
5700 sljit_u8 bit, cbit, all;
5701 int i, byte, length = 0;
5702
5703 bit = bits[0] & 0x1;
5704 /* All bits will be zero or one (since bit is zero or one). */
5705 all = -bit;
5706
5707 for (i = 0; i < 256; )
5708 {
5709 byte = i >> 3;
5710 if ((i & 0x7) == 0 && bits[byte] == all)
5711 i += 8;
5712 else
5713 {
5714 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
5715 if (cbit != bit)
5716 {
5717 if (length >= MAX_CLASS_RANGE_SIZE)
5718 return FALSE;
5719 ranges[length] = i;
5720 length++;
5721 bit = cbit;
5722 all = -cbit;
5723 }
5724 i++;
5725 }
5726 }
5727
5728 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
5729 {
5730 if (length >= MAX_CLASS_RANGE_SIZE)
5731 return FALSE;
5732 ranges[length] = 256;
5733 length++;
5734 }
5735
5736 if (length < 0 || length > 4)
5737 return FALSE;
5738
5739 bit = bits[0] & 0x1;
5740 if (invert) bit ^= 0x1;
5741
5742 /* No character is accepted. */
5743 if (length == 0 && bit == 0)
5744 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5745
5746 switch(length)
5747 {
5748 case 0:
5749 /* When bit != 0, all characters are accepted. */
5750 return TRUE;
5751
5752 case 1:
5753 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5754 return TRUE;
5755
5756 case 2:
5757 if (ranges[0] + 1 != ranges[1])
5758 {
5759 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5760 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5761 }
5762 else
5763 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5764 return TRUE;
5765
5766 case 3:
5767 if (bit != 0)
5768 {
5769 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5770 if (ranges[0] + 1 != ranges[1])
5771 {
5772 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5773 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5774 }
5775 else
5776 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5777 return TRUE;
5778 }
5779
5780 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
5781 if (ranges[1] + 1 != ranges[2])
5782 {
5783 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
5784 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5785 }
5786 else
5787 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
5788 return TRUE;
5789
5790 case 4:
5791 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
5792 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
5793 && (ranges[1] & (ranges[2] - ranges[0])) == 0
5794 && is_powerof2(ranges[2] - ranges[0]))
5795 {
5796 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
5797 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
5798 if (ranges[2] + 1 != ranges[3])
5799 {
5800 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
5801 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5802 }
5803 else
5804 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5805 return TRUE;
5806 }
5807
5808 if (bit != 0)
5809 {
5810 i = 0;
5811 if (ranges[0] + 1 != ranges[1])
5812 {
5813 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5814 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5815 i = ranges[0];
5816 }
5817 else
5818 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5819
5820 if (ranges[2] + 1 != ranges[3])
5821 {
5822 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
5823 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5824 }
5825 else
5826 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
5827 return TRUE;
5828 }
5829
5830 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5831 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
5832 if (ranges[1] + 1 != ranges[2])
5833 {
5834 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
5835 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5836 }
5837 else
5838 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5839 return TRUE;
5840
5841 default:
5842 SLJIT_UNREACHABLE();
5843 return FALSE;
5844 }
5845 }
5846
optimize_class_chars(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)5847 static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
5848 {
5849 /* May destroy TMP1. */
5850 DEFINE_COMPILER;
5851 uint16_t char_list[MAX_CLASS_CHARS_SIZE];
5852 uint8_t byte;
5853 sljit_s32 type;
5854 int i, j, k, len, c;
5855
5856 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5857 return FALSE;
5858
5859 len = 0;
5860
5861 for (i = 0; i < 32; i++)
5862 {
5863 byte = bits[i];
5864
5865 if (nclass)
5866 byte = ~byte;
5867
5868 j = 0;
5869 while (byte != 0)
5870 {
5871 if (byte & 0x1)
5872 {
5873 c = i * 8 + j;
5874
5875 k = len;
5876
5877 if ((c & 0x20) != 0)
5878 {
5879 for (k = 0; k < len; k++)
5880 if (char_list[k] == c - 0x20)
5881 {
5882 char_list[k] |= 0x120;
5883 break;
5884 }
5885 }
5886
5887 if (k == len)
5888 {
5889 if (len >= MAX_CLASS_CHARS_SIZE)
5890 return FALSE;
5891
5892 char_list[len++] = (uint16_t) c;
5893 }
5894 }
5895
5896 byte >>= 1;
5897 j++;
5898 }
5899 }
5900
5901 if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */
5902
5903 i = 0;
5904 j = 0;
5905
5906 if (char_list[0] == 0)
5907 {
5908 i++;
5909 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0);
5910 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
5911 }
5912 else
5913 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
5914
5915 while (i < len)
5916 {
5917 if ((char_list[i] & 0x100) != 0)
5918 j++;
5919 else
5920 {
5921 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]);
5922 CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
5923 }
5924 i++;
5925 }
5926
5927 if (j != 0)
5928 {
5929 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
5930
5931 for (i = 0; i < len; i++)
5932 if ((char_list[i] & 0x100) != 0)
5933 {
5934 j--;
5935 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
5936 CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
5937 }
5938 }
5939
5940 if (invert)
5941 nclass = !nclass;
5942
5943 type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
5944 add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
5945 return TRUE;
5946 }
5947
optimize_class(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)5948 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
5949 {
5950 /* May destroy TMP1. */
5951 if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
5952 return TRUE;
5953 return optimize_class_chars(common, bits, nclass, invert, backtracks);
5954 }
5955
check_anynewline(compiler_common * common)5956 static void check_anynewline(compiler_common *common)
5957 {
5958 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5959 DEFINE_COMPILER;
5960
5961 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5962
5963 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5964 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5965 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5966 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5967 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5968 #if PCRE2_CODE_UNIT_WIDTH == 8
5969 if (common->utf)
5970 {
5971 #endif
5972 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5973 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5974 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5975 #if PCRE2_CODE_UNIT_WIDTH == 8
5976 }
5977 #endif
5978 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
5979 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5980 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5981 }
5982
check_hspace(compiler_common * common)5983 static void check_hspace(compiler_common *common)
5984 {
5985 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5986 DEFINE_COMPILER;
5987
5988 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5989
5990 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
5991 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5992 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
5993 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5994 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
5995 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5996 #if PCRE2_CODE_UNIT_WIDTH == 8
5997 if (common->utf)
5998 {
5999 #endif
6000 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6001 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
6002 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6003 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
6004 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6005 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
6006 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
6007 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6008 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
6009 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6010 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
6011 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6012 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
6013 #if PCRE2_CODE_UNIT_WIDTH == 8
6014 }
6015 #endif
6016 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
6017 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
6018
6019 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
6020 }
6021
check_vspace(compiler_common * common)6022 static void check_vspace(compiler_common *common)
6023 {
6024 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
6025 DEFINE_COMPILER;
6026
6027 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6028
6029 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
6030 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
6031 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6032 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
6033 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6034 #if PCRE2_CODE_UNIT_WIDTH == 8
6035 if (common->utf)
6036 {
6037 #endif
6038 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6039 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
6040 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
6041 #if PCRE2_CODE_UNIT_WIDTH == 8
6042 }
6043 #endif
6044 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
6045 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
6046
6047 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
6048 }
6049
do_casefulcmp(compiler_common * common)6050 static void do_casefulcmp(compiler_common *common)
6051 {
6052 DEFINE_COMPILER;
6053 struct sljit_jump *jump;
6054 struct sljit_label *label;
6055 int char1_reg;
6056 int char2_reg;
6057
6058 if (sljit_get_register_index(TMP3) < 0)
6059 {
6060 char1_reg = STR_END;
6061 char2_reg = STACK_TOP;
6062 }
6063 else
6064 {
6065 char1_reg = TMP3;
6066 char2_reg = RETURN_ADDR;
6067 }
6068
6069 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6070 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6071
6072 if (char1_reg == STR_END)
6073 {
6074 OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
6075 OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
6076 }
6077
6078 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
6079 {
6080 label = LABEL();
6081 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
6082 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6083 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
6084 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
6085 JUMPTO(SLJIT_NOT_ZERO, label);
6086
6087 JUMPHERE(jump);
6088 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6089 }
6090 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
6091 {
6092 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6093 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6094
6095 label = LABEL();
6096 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
6097 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6098 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
6099 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
6100 JUMPTO(SLJIT_NOT_ZERO, label);
6101
6102 JUMPHERE(jump);
6103 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6104 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6105 }
6106 else
6107 {
6108 label = LABEL();
6109 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
6110 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
6111 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6112 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6113 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
6114 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
6115 JUMPTO(SLJIT_NOT_ZERO, label);
6116
6117 JUMPHERE(jump);
6118 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6119 }
6120
6121 if (char1_reg == STR_END)
6122 {
6123 OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
6124 OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
6125 }
6126
6127 sljit_emit_fast_return(compiler, TMP1, 0);
6128 }
6129
do_caselesscmp(compiler_common * common)6130 static void do_caselesscmp(compiler_common *common)
6131 {
6132 DEFINE_COMPILER;
6133 struct sljit_jump *jump;
6134 struct sljit_label *label;
6135 int char1_reg = STR_END;
6136 int char2_reg;
6137 int lcc_table;
6138 int opt_type = 0;
6139
6140 if (sljit_get_register_index(TMP3) < 0)
6141 {
6142 char2_reg = STACK_TOP;
6143 lcc_table = STACK_LIMIT;
6144 }
6145 else
6146 {
6147 char2_reg = RETURN_ADDR;
6148 lcc_table = TMP3;
6149 }
6150
6151 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
6152 opt_type = 1;
6153 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
6154 opt_type = 2;
6155
6156 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6157 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6158
6159 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
6160
6161 if (char2_reg == STACK_TOP)
6162 {
6163 OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
6164 OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
6165 }
6166
6167 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
6168
6169 if (opt_type == 1)
6170 {
6171 label = LABEL();
6172 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
6173 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6174 }
6175 else if (opt_type == 2)
6176 {
6177 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6178 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6179
6180 label = LABEL();
6181 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
6182 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6183 }
6184 else
6185 {
6186 label = LABEL();
6187 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
6188 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
6189 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6190 }
6191
6192 #if PCRE2_CODE_UNIT_WIDTH != 8
6193 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
6194 #endif
6195 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
6196 #if PCRE2_CODE_UNIT_WIDTH != 8
6197 JUMPHERE(jump);
6198 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
6199 #endif
6200 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
6201 #if PCRE2_CODE_UNIT_WIDTH != 8
6202 JUMPHERE(jump);
6203 #endif
6204
6205 if (opt_type == 0)
6206 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6207
6208 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
6209 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
6210 JUMPTO(SLJIT_NOT_ZERO, label);
6211
6212 JUMPHERE(jump);
6213 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6214
6215 if (opt_type == 2)
6216 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6217
6218 if (char2_reg == STACK_TOP)
6219 {
6220 OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
6221 OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
6222 }
6223
6224 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
6225 sljit_emit_fast_return(compiler, TMP1, 0);
6226 }
6227
6228 #if defined SUPPORT_UNICODE
6229
do_utf_caselesscmp(PCRE2_SPTR src1,PCRE2_SPTR src2,PCRE2_SPTR end1,PCRE2_SPTR end2)6230 static PCRE2_SPTR SLJIT_FUNC do_utf_caselesscmp(PCRE2_SPTR src1, PCRE2_SPTR src2, PCRE2_SPTR end1, PCRE2_SPTR end2)
6231 {
6232 /* This function would be ineffective to do in JIT level. */
6233 sljit_u32 c1, c2;
6234 const ucd_record *ur;
6235 const sljit_u32 *pp;
6236
6237 while (src1 < end1)
6238 {
6239 if (src2 >= end2)
6240 return (PCRE2_SPTR)1;
6241 GETCHARINC(c1, src1);
6242 GETCHARINC(c2, src2);
6243 ur = GET_UCD(c2);
6244 if (c1 != c2 && c1 != c2 + ur->other_case)
6245 {
6246 pp = PRIV(ucd_caseless_sets) + ur->caseset;
6247 for (;;)
6248 {
6249 if (c1 < *pp) return NULL;
6250 if (c1 == *pp++) break;
6251 }
6252 }
6253 }
6254 return src2;
6255 }
6256
6257 #endif /* SUPPORT_UNICODE */
6258
byte_sequence_compare(compiler_common * common,BOOL caseless,PCRE2_SPTR cc,compare_context * context,jump_list ** backtracks)6259 static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
6260 compare_context *context, jump_list **backtracks)
6261 {
6262 DEFINE_COMPILER;
6263 unsigned int othercasebit = 0;
6264 PCRE2_SPTR othercasechar = NULL;
6265 #ifdef SUPPORT_UNICODE
6266 int utflength;
6267 #endif
6268
6269 if (caseless && char_has_othercase(common, cc))
6270 {
6271 othercasebit = char_get_othercase_bit(common, cc);
6272 SLJIT_ASSERT(othercasebit);
6273 /* Extracting bit difference info. */
6274 #if PCRE2_CODE_UNIT_WIDTH == 8
6275 othercasechar = cc + (othercasebit >> 8);
6276 othercasebit &= 0xff;
6277 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6278 /* Note that this code only handles characters in the BMP. If there
6279 ever are characters outside the BMP whose othercase differs in only one
6280 bit from itself (there currently are none), this code will need to be
6281 revised for PCRE2_CODE_UNIT_WIDTH == 32. */
6282 othercasechar = cc + (othercasebit >> 9);
6283 if ((othercasebit & 0x100) != 0)
6284 othercasebit = (othercasebit & 0xff) << 8;
6285 else
6286 othercasebit &= 0xff;
6287 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
6288 }
6289
6290 if (context->sourcereg == -1)
6291 {
6292 #if PCRE2_CODE_UNIT_WIDTH == 8
6293 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6294 if (context->length >= 4)
6295 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
6296 else if (context->length >= 2)
6297 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
6298 else
6299 #endif
6300 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
6301 #elif PCRE2_CODE_UNIT_WIDTH == 16
6302 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6303 if (context->length >= 4)
6304 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
6305 else
6306 #endif
6307 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
6308 #elif PCRE2_CODE_UNIT_WIDTH == 32
6309 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
6310 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
6311 context->sourcereg = TMP2;
6312 }
6313
6314 #ifdef SUPPORT_UNICODE
6315 utflength = 1;
6316 if (common->utf && HAS_EXTRALEN(*cc))
6317 utflength += GET_EXTRALEN(*cc);
6318
6319 do
6320 {
6321 #endif
6322
6323 context->length -= IN_UCHARS(1);
6324 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
6325
6326 /* Unaligned read is supported. */
6327 if (othercasebit != 0 && othercasechar == cc)
6328 {
6329 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
6330 context->oc.asuchars[context->ucharptr] = othercasebit;
6331 }
6332 else
6333 {
6334 context->c.asuchars[context->ucharptr] = *cc;
6335 context->oc.asuchars[context->ucharptr] = 0;
6336 }
6337 context->ucharptr++;
6338
6339 #if PCRE2_CODE_UNIT_WIDTH == 8
6340 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
6341 #else
6342 if (context->ucharptr >= 2 || context->length == 0)
6343 #endif
6344 {
6345 if (context->length >= 4)
6346 OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
6347 else if (context->length >= 2)
6348 OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
6349 #if PCRE2_CODE_UNIT_WIDTH == 8
6350 else if (context->length >= 1)
6351 OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
6352 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6353 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
6354
6355 switch(context->ucharptr)
6356 {
6357 case 4 / sizeof(PCRE2_UCHAR):
6358 if (context->oc.asint != 0)
6359 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
6360 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
6361 break;
6362
6363 case 2 / sizeof(PCRE2_UCHAR):
6364 if (context->oc.asushort != 0)
6365 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
6366 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
6367 break;
6368
6369 #if PCRE2_CODE_UNIT_WIDTH == 8
6370 case 1:
6371 if (context->oc.asbyte != 0)
6372 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
6373 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
6374 break;
6375 #endif
6376
6377 default:
6378 SLJIT_UNREACHABLE();
6379 break;
6380 }
6381 context->ucharptr = 0;
6382 }
6383
6384 #else
6385
6386 /* Unaligned read is unsupported or in 32 bit mode. */
6387 if (context->length >= 1)
6388 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
6389
6390 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
6391
6392 if (othercasebit != 0 && othercasechar == cc)
6393 {
6394 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
6395 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
6396 }
6397 else
6398 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
6399
6400 #endif
6401
6402 cc++;
6403 #ifdef SUPPORT_UNICODE
6404 utflength--;
6405 }
6406 while (utflength > 0);
6407 #endif
6408
6409 return cc;
6410 }
6411
6412 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
6413
6414 #define SET_TYPE_OFFSET(value) \
6415 if ((value) != typeoffset) \
6416 { \
6417 if ((value) < typeoffset) \
6418 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
6419 else \
6420 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
6421 } \
6422 typeoffset = (value);
6423
6424 #define SET_CHAR_OFFSET(value) \
6425 if ((value) != charoffset) \
6426 { \
6427 if ((value) < charoffset) \
6428 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
6429 else \
6430 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
6431 } \
6432 charoffset = (value);
6433
6434 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
6435
compile_xclass_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)6436 static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
6437 {
6438 DEFINE_COMPILER;
6439 jump_list *found = NULL;
6440 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
6441 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
6442 struct sljit_jump *jump = NULL;
6443 PCRE2_SPTR ccbegin;
6444 int compares, invertcmp, numberofcmps;
6445 #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
6446 BOOL utf = common->utf;
6447 #endif
6448
6449 #ifdef SUPPORT_UNICODE
6450 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
6451 BOOL charsaved = FALSE;
6452 int typereg = TMP1;
6453 const sljit_u32 *other_cases;
6454 sljit_uw typeoffset;
6455 #endif
6456
6457 /* Scanning the necessary info. */
6458 cc++;
6459 ccbegin = cc;
6460 compares = 0;
6461
6462 if (cc[-1] & XCL_MAP)
6463 {
6464 min = 0;
6465 cc += 32 / sizeof(PCRE2_UCHAR);
6466 }
6467
6468 while (*cc != XCL_END)
6469 {
6470 compares++;
6471 if (*cc == XCL_SINGLE)
6472 {
6473 cc ++;
6474 GETCHARINCTEST(c, cc);
6475 if (c > max) max = c;
6476 if (c < min) min = c;
6477 #ifdef SUPPORT_UNICODE
6478 needschar = TRUE;
6479 #endif
6480 }
6481 else if (*cc == XCL_RANGE)
6482 {
6483 cc ++;
6484 GETCHARINCTEST(c, cc);
6485 if (c < min) min = c;
6486 GETCHARINCTEST(c, cc);
6487 if (c > max) max = c;
6488 #ifdef SUPPORT_UNICODE
6489 needschar = TRUE;
6490 #endif
6491 }
6492 #ifdef SUPPORT_UNICODE
6493 else
6494 {
6495 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
6496 cc++;
6497 if (*cc == PT_CLIST)
6498 {
6499 other_cases = PRIV(ucd_caseless_sets) + cc[1];
6500 while (*other_cases != NOTACHAR)
6501 {
6502 if (*other_cases > max) max = *other_cases;
6503 if (*other_cases < min) min = *other_cases;
6504 other_cases++;
6505 }
6506 }
6507 else
6508 {
6509 max = READ_CHAR_MAX;
6510 min = 0;
6511 }
6512
6513 switch(*cc)
6514 {
6515 case PT_ANY:
6516 /* Any either accepts everything or ignored. */
6517 if (cc[-1] == XCL_PROP)
6518 {
6519 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
6520 if (list == backtracks)
6521 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6522 return;
6523 }
6524 break;
6525
6526 case PT_LAMP:
6527 case PT_GC:
6528 case PT_PC:
6529 case PT_ALNUM:
6530 needstype = TRUE;
6531 break;
6532
6533 case PT_SC:
6534 needsscript = TRUE;
6535 break;
6536
6537 case PT_SPACE:
6538 case PT_PXSPACE:
6539 case PT_WORD:
6540 case PT_PXGRAPH:
6541 case PT_PXPRINT:
6542 case PT_PXPUNCT:
6543 needstype = TRUE;
6544 needschar = TRUE;
6545 break;
6546
6547 case PT_CLIST:
6548 case PT_UCNC:
6549 needschar = TRUE;
6550 break;
6551
6552 default:
6553 SLJIT_UNREACHABLE();
6554 break;
6555 }
6556 cc += 2;
6557 }
6558 #endif
6559 }
6560 SLJIT_ASSERT(compares > 0);
6561
6562 /* We are not necessary in utf mode even in 8 bit mode. */
6563 cc = ccbegin;
6564 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
6565
6566 if ((cc[-1] & XCL_HASPROP) == 0)
6567 {
6568 if ((cc[-1] & XCL_MAP) != 0)
6569 {
6570 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6571 if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
6572 {
6573 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6574 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6575 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
6576 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6577 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6578 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
6579 }
6580
6581 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6582 JUMPHERE(jump);
6583
6584 cc += 32 / sizeof(PCRE2_UCHAR);
6585 }
6586 else
6587 {
6588 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
6589 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
6590 }
6591 }
6592 else if ((cc[-1] & XCL_MAP) != 0)
6593 {
6594 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6595 #ifdef SUPPORT_UNICODE
6596 charsaved = TRUE;
6597 #endif
6598 if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
6599 {
6600 #if PCRE2_CODE_UNIT_WIDTH == 8
6601 jump = NULL;
6602 if (common->utf)
6603 #endif
6604 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6605
6606 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6607 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6608 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
6609 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6610 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6611 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
6612
6613 #if PCRE2_CODE_UNIT_WIDTH == 8
6614 if (common->utf)
6615 #endif
6616 JUMPHERE(jump);
6617 }
6618
6619 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6620 cc += 32 / sizeof(PCRE2_UCHAR);
6621 }
6622
6623 #ifdef SUPPORT_UNICODE
6624 if (needstype || needsscript)
6625 {
6626 if (needschar && !charsaved)
6627 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6628
6629 #if PCRE2_CODE_UNIT_WIDTH == 32
6630 if (!common->utf)
6631 {
6632 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
6633 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
6634 JUMPHERE(jump);
6635 }
6636 #endif
6637
6638 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
6639 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6640 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
6641 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
6642 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
6643 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
6644 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
6645 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
6646
6647 /* Before anything else, we deal with scripts. */
6648 if (needsscript)
6649 {
6650 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
6651 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6652
6653 ccbegin = cc;
6654
6655 while (*cc != XCL_END)
6656 {
6657 if (*cc == XCL_SINGLE)
6658 {
6659 cc ++;
6660 GETCHARINCTEST(c, cc);
6661 }
6662 else if (*cc == XCL_RANGE)
6663 {
6664 cc ++;
6665 GETCHARINCTEST(c, cc);
6666 GETCHARINCTEST(c, cc);
6667 }
6668 else
6669 {
6670 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
6671 cc++;
6672 if (*cc == PT_SC)
6673 {
6674 compares--;
6675 invertcmp = (compares == 0 && list != backtracks);
6676 if (cc[-1] == XCL_NOTPROP)
6677 invertcmp ^= 0x1;
6678 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
6679 add_jump(compiler, compares > 0 ? list : backtracks, jump);
6680 }
6681 cc += 2;
6682 }
6683 }
6684
6685 cc = ccbegin;
6686 }
6687
6688 if (needschar)
6689 {
6690 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6691 }
6692
6693 if (needstype)
6694 {
6695 if (!needschar)
6696 {
6697 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
6698 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6699 }
6700 else
6701 {
6702 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
6703 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
6704 typereg = RETURN_ADDR;
6705 }
6706 }
6707 }
6708 #endif
6709
6710 /* Generating code. */
6711 charoffset = 0;
6712 numberofcmps = 0;
6713 #ifdef SUPPORT_UNICODE
6714 typeoffset = 0;
6715 #endif
6716
6717 while (*cc != XCL_END)
6718 {
6719 compares--;
6720 invertcmp = (compares == 0 && list != backtracks);
6721 jump = NULL;
6722
6723 if (*cc == XCL_SINGLE)
6724 {
6725 cc ++;
6726 GETCHARINCTEST(c, cc);
6727
6728 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
6729 {
6730 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
6731 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6732 numberofcmps++;
6733 }
6734 else if (numberofcmps > 0)
6735 {
6736 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
6737 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
6738 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6739 numberofcmps = 0;
6740 }
6741 else
6742 {
6743 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
6744 numberofcmps = 0;
6745 }
6746 }
6747 else if (*cc == XCL_RANGE)
6748 {
6749 cc ++;
6750 GETCHARINCTEST(c, cc);
6751 SET_CHAR_OFFSET(c);
6752 GETCHARINCTEST(c, cc);
6753
6754 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
6755 {
6756 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
6757 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6758 numberofcmps++;
6759 }
6760 else if (numberofcmps > 0)
6761 {
6762 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
6763 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6764 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6765 numberofcmps = 0;
6766 }
6767 else
6768 {
6769 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
6770 numberofcmps = 0;
6771 }
6772 }
6773 #ifdef SUPPORT_UNICODE
6774 else
6775 {
6776 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
6777 if (*cc == XCL_NOTPROP)
6778 invertcmp ^= 0x1;
6779 cc++;
6780 switch(*cc)
6781 {
6782 case PT_ANY:
6783 if (!invertcmp)
6784 jump = JUMP(SLJIT_JUMP);
6785 break;
6786
6787 case PT_LAMP:
6788 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
6789 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6790 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
6791 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6792 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
6793 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
6794 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6795 break;
6796
6797 case PT_GC:
6798 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
6799 SET_TYPE_OFFSET(c);
6800 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
6801 break;
6802
6803 case PT_PC:
6804 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
6805 break;
6806
6807 case PT_SC:
6808 compares++;
6809 /* Do nothing. */
6810 break;
6811
6812 case PT_SPACE:
6813 case PT_PXSPACE:
6814 SET_CHAR_OFFSET(9);
6815 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
6816 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6817
6818 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
6819 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6820
6821 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
6822 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6823
6824 SET_TYPE_OFFSET(ucp_Zl);
6825 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
6826 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6827 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6828 break;
6829
6830 case PT_WORD:
6831 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
6832 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6833 /* Fall through. */
6834
6835 case PT_ALNUM:
6836 SET_TYPE_OFFSET(ucp_Ll);
6837 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6838 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6839 SET_TYPE_OFFSET(ucp_Nd);
6840 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6841 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6842 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6843 break;
6844
6845 case PT_CLIST:
6846 other_cases = PRIV(ucd_caseless_sets) + cc[1];
6847
6848 /* At least three characters are required.
6849 Otherwise this case would be handled by the normal code path. */
6850 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
6851 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
6852
6853 /* Optimizing character pairs, if their difference is power of 2. */
6854 if (is_powerof2(other_cases[1] ^ other_cases[0]))
6855 {
6856 if (charoffset == 0)
6857 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6858 else
6859 {
6860 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
6861 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6862 }
6863 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
6864 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6865 other_cases += 2;
6866 }
6867 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
6868 {
6869 if (charoffset == 0)
6870 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
6871 else
6872 {
6873 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
6874 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6875 }
6876 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
6877 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6878
6879 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
6880 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
6881
6882 other_cases += 3;
6883 }
6884 else
6885 {
6886 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
6887 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6888 }
6889
6890 while (*other_cases != NOTACHAR)
6891 {
6892 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
6893 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
6894 }
6895 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6896 break;
6897
6898 case PT_UCNC:
6899 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
6900 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6901 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
6902 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6903 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
6904 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6905
6906 SET_CHAR_OFFSET(0xa0);
6907 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
6908 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6909 SET_CHAR_OFFSET(0);
6910 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
6911 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
6912 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6913 break;
6914
6915 case PT_PXGRAPH:
6916 /* C and Z groups are the farthest two groups. */
6917 SET_TYPE_OFFSET(ucp_Ll);
6918 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
6919 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
6920
6921 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
6922
6923 /* In case of ucp_Cf, we overwrite the result. */
6924 SET_CHAR_OFFSET(0x2066);
6925 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
6926 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6927
6928 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
6929 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6930
6931 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
6932 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6933
6934 JUMPHERE(jump);
6935 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
6936 break;
6937
6938 case PT_PXPRINT:
6939 /* C and Z groups are the farthest two groups. */
6940 SET_TYPE_OFFSET(ucp_Ll);
6941 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
6942 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
6943
6944 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
6945 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
6946
6947 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
6948
6949 /* In case of ucp_Cf, we overwrite the result. */
6950 SET_CHAR_OFFSET(0x2066);
6951 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
6952 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6953
6954 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
6955 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6956
6957 JUMPHERE(jump);
6958 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
6959 break;
6960
6961 case PT_PXPUNCT:
6962 SET_TYPE_OFFSET(ucp_Sc);
6963 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
6964 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6965
6966 SET_CHAR_OFFSET(0);
6967 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
6968 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
6969
6970 SET_TYPE_OFFSET(ucp_Pc);
6971 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
6972 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6973 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6974 break;
6975
6976 default:
6977 SLJIT_UNREACHABLE();
6978 break;
6979 }
6980 cc += 2;
6981 }
6982 #endif
6983
6984 if (jump != NULL)
6985 add_jump(compiler, compares > 0 ? list : backtracks, jump);
6986 }
6987
6988 if (found != NULL)
6989 set_jumps(found, LABEL());
6990 }
6991
6992 #undef SET_TYPE_OFFSET
6993 #undef SET_CHAR_OFFSET
6994
6995 #endif
6996
compile_simple_assertion_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks)6997 static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
6998 {
6999 DEFINE_COMPILER;
7000 int length;
7001 struct sljit_jump *jump[4];
7002 #ifdef SUPPORT_UNICODE
7003 struct sljit_label *label;
7004 #endif /* SUPPORT_UNICODE */
7005
7006 switch(type)
7007 {
7008 case OP_SOD:
7009 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7010 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
7011 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
7012 return cc;
7013
7014 case OP_SOM:
7015 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7016 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
7017 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
7018 return cc;
7019
7020 case OP_NOT_WORD_BOUNDARY:
7021 case OP_WORD_BOUNDARY:
7022 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
7023 sljit_set_current_flags(compiler, SLJIT_SET_Z);
7024 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
7025 return cc;
7026
7027 case OP_EODN:
7028 /* Requires rather complex checks. */
7029 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
7030 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
7031 {
7032 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7033 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7034 if (common->mode == PCRE2_JIT_COMPLETE)
7035 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
7036 else
7037 {
7038 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
7039 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
7040 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
7041 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
7042 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
7043 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
7044 check_partial(common, TRUE);
7045 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7046 JUMPHERE(jump[1]);
7047 }
7048 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7049 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
7050 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
7051 }
7052 else if (common->nltype == NLTYPE_FIXED)
7053 {
7054 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7055 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7056 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
7057 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
7058 }
7059 else
7060 {
7061 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7062 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
7063 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7064 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
7065 jump[2] = JUMP(SLJIT_GREATER);
7066 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
7067 /* Equal. */
7068 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7069 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
7070 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7071
7072 JUMPHERE(jump[1]);
7073 if (common->nltype == NLTYPE_ANYCRLF)
7074 {
7075 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7076 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
7077 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
7078 }
7079 else
7080 {
7081 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
7082 read_char_range(common, common->nlmin, common->nlmax, TRUE);
7083 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
7084 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
7085 sljit_set_current_flags(compiler, SLJIT_SET_Z);
7086 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
7087 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7088 }
7089 JUMPHERE(jump[2]);
7090 JUMPHERE(jump[3]);
7091 }
7092 JUMPHERE(jump[0]);
7093 check_partial(common, FALSE);
7094 return cc;
7095
7096 case OP_EOD:
7097 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
7098 check_partial(common, FALSE);
7099 return cc;
7100
7101 case OP_DOLL:
7102 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
7103 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
7104 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
7105
7106 if (!common->endonly)
7107 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
7108 else
7109 {
7110 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
7111 check_partial(common, FALSE);
7112 }
7113 return cc;
7114
7115 case OP_DOLLM:
7116 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
7117 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
7118 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
7119 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
7120 check_partial(common, FALSE);
7121 jump[0] = JUMP(SLJIT_JUMP);
7122 JUMPHERE(jump[1]);
7123
7124 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
7125 {
7126 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7127 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7128 if (common->mode == PCRE2_JIT_COMPLETE)
7129 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
7130 else
7131 {
7132 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
7133 /* STR_PTR = STR_END - IN_UCHARS(1) */
7134 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
7135 check_partial(common, TRUE);
7136 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7137 JUMPHERE(jump[1]);
7138 }
7139
7140 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7141 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
7142 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
7143 }
7144 else
7145 {
7146 peek_char(common, common->nlmax);
7147 check_newlinechar(common, common->nltype, backtracks, FALSE);
7148 }
7149 JUMPHERE(jump[0]);
7150 return cc;
7151
7152 case OP_CIRC:
7153 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
7154 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
7155 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
7156 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
7157 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
7158 return cc;
7159
7160 case OP_CIRCM:
7161 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
7162 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
7163 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
7164 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
7165 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
7166 jump[0] = JUMP(SLJIT_JUMP);
7167 JUMPHERE(jump[1]);
7168
7169 if (!common->alt_circumflex)
7170 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
7171
7172 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
7173 {
7174 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7175 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
7176 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
7177 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
7178 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
7179 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
7180 }
7181 else
7182 {
7183 skip_char_back(common);
7184 read_char_range(common, common->nlmin, common->nlmax, TRUE);
7185 check_newlinechar(common, common->nltype, backtracks, FALSE);
7186 }
7187 JUMPHERE(jump[0]);
7188 return cc;
7189
7190 case OP_REVERSE:
7191 length = GET(cc, 0);
7192 if (length == 0)
7193 return cc + LINK_SIZE;
7194 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7195 #ifdef SUPPORT_UNICODE
7196 if (common->utf)
7197 {
7198 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
7199 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
7200 label = LABEL();
7201 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
7202 skip_char_back(common);
7203 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7204 JUMPTO(SLJIT_NOT_ZERO, label);
7205 }
7206 else
7207 #endif
7208 {
7209 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
7210 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
7211 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
7212 }
7213 check_start_used_ptr(common);
7214 return cc + LINK_SIZE;
7215 }
7216 SLJIT_UNREACHABLE();
7217 return cc;
7218 }
7219
7220 #ifdef SUPPORT_UNICODE
7221
7222 #if PCRE2_CODE_UNIT_WIDTH != 32
7223
do_extuni_utf(jit_arguments * args,PCRE2_SPTR cc)7224 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
7225 {
7226 PCRE2_SPTR start_subject = args->begin;
7227 PCRE2_SPTR end_subject = args->end;
7228 int lgb, rgb, len, ricount;
7229 PCRE2_SPTR prevcc, bptr;
7230 uint32_t c;
7231
7232 prevcc = cc;
7233 GETCHARINC(c, cc);
7234 lgb = UCD_GRAPHBREAK(c);
7235
7236 while (cc < end_subject)
7237 {
7238 len = 1;
7239 GETCHARLEN(c, cc, len);
7240 rgb = UCD_GRAPHBREAK(c);
7241
7242 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
7243
7244 /* Not breaking between Regional Indicators is allowed only if there
7245 are an even number of preceding RIs. */
7246
7247 if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
7248 {
7249 ricount = 0;
7250 bptr = prevcc;
7251
7252 /* bptr is pointing to the left-hand character */
7253 while (bptr > start_subject)
7254 {
7255 bptr--;
7256 BACKCHAR(bptr);
7257 GETCHAR(c, bptr);
7258
7259 if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
7260
7261 ricount++;
7262 }
7263
7264 if ((ricount & 1) != 0) break; /* Grapheme break required */
7265 }
7266
7267 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
7268 allows any number of them before a following Extended_Pictographic. */
7269
7270 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
7271 lgb != ucp_gbExtended_Pictographic)
7272 lgb = rgb;
7273
7274 prevcc = cc;
7275 cc += len;
7276 }
7277
7278 return cc;
7279 }
7280
7281 #endif
7282
do_extuni_no_utf(jit_arguments * args,PCRE2_SPTR cc)7283 static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
7284 {
7285 PCRE2_SPTR start_subject = args->begin;
7286 PCRE2_SPTR end_subject = args->end;
7287 int lgb, rgb, ricount;
7288 PCRE2_SPTR bptr;
7289 uint32_t c;
7290
7291 c = *cc++;
7292 lgb = UCD_GRAPHBREAK(c);
7293
7294 while (cc < end_subject)
7295 {
7296 c = *cc;
7297 rgb = UCD_GRAPHBREAK(c);
7298
7299 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
7300
7301 /* Not breaking between Regional Indicators is allowed only if there
7302 are an even number of preceding RIs. */
7303
7304 if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
7305 {
7306 ricount = 0;
7307 bptr = cc - 1;
7308
7309 /* bptr is pointing to the left-hand character */
7310 while (bptr > start_subject)
7311 {
7312 bptr--;
7313 c = *bptr;
7314
7315 if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
7316
7317 ricount++;
7318 }
7319
7320 if ((ricount & 1) != 0) break; /* Grapheme break required */
7321 }
7322
7323 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
7324 allows any number of them before a following Extended_Pictographic. */
7325
7326 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
7327 lgb != ucp_gbExtended_Pictographic)
7328 lgb = rgb;
7329
7330 cc++;
7331 }
7332
7333 return cc;
7334 }
7335
7336 #endif
7337
compile_char1_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks,BOOL check_str_ptr)7338 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
7339 {
7340 DEFINE_COMPILER;
7341 int length;
7342 unsigned int c, oc, bit;
7343 compare_context context;
7344 struct sljit_jump *jump[3];
7345 jump_list *end_list;
7346 #ifdef SUPPORT_UNICODE
7347 PCRE2_UCHAR propdata[5];
7348 #endif /* SUPPORT_UNICODE */
7349
7350 switch(type)
7351 {
7352 case OP_NOT_DIGIT:
7353 case OP_DIGIT:
7354 /* Digits are usually 0-9, so it is worth to optimize them. */
7355 if (check_str_ptr)
7356 detect_partial_match(common, backtracks);
7357 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
7358 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
7359 read_char7_type(common, type == OP_NOT_DIGIT);
7360 else
7361 #endif
7362 read_char8_type(common, type == OP_NOT_DIGIT);
7363 /* Flip the starting bit in the negative case. */
7364 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
7365 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
7366 return cc;
7367
7368 case OP_NOT_WHITESPACE:
7369 case OP_WHITESPACE:
7370 if (check_str_ptr)
7371 detect_partial_match(common, backtracks);
7372 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
7373 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
7374 read_char7_type(common, type == OP_NOT_WHITESPACE);
7375 else
7376 #endif
7377 read_char8_type(common, type == OP_NOT_WHITESPACE);
7378 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
7379 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
7380 return cc;
7381
7382 case OP_NOT_WORDCHAR:
7383 case OP_WORDCHAR:
7384 if (check_str_ptr)
7385 detect_partial_match(common, backtracks);
7386 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
7387 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
7388 read_char7_type(common, type == OP_NOT_WORDCHAR);
7389 else
7390 #endif
7391 read_char8_type(common, type == OP_NOT_WORDCHAR);
7392 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
7393 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
7394 return cc;
7395
7396 case OP_ANY:
7397 if (check_str_ptr)
7398 detect_partial_match(common, backtracks);
7399 read_char_range(common, common->nlmin, common->nlmax, TRUE);
7400 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
7401 {
7402 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
7403 end_list = NULL;
7404 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
7405 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
7406 else
7407 check_str_end(common, &end_list);
7408
7409 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
7410 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
7411 set_jumps(end_list, LABEL());
7412 JUMPHERE(jump[0]);
7413 }
7414 else
7415 check_newlinechar(common, common->nltype, backtracks, TRUE);
7416 return cc;
7417
7418 case OP_ALLANY:
7419 if (check_str_ptr)
7420 detect_partial_match(common, backtracks);
7421 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
7422 if (common->utf)
7423 {
7424 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
7425 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7426 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
7427 #if PCRE2_CODE_UNIT_WIDTH == 8
7428 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
7429 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
7430 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
7431 #elif PCRE2_CODE_UNIT_WIDTH == 16
7432 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
7433 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
7434 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
7435 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
7436 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7437 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
7438 #endif
7439 JUMPHERE(jump[0]);
7440 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
7441 return cc;
7442 }
7443 #endif
7444 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7445 return cc;
7446
7447 case OP_ANYBYTE:
7448 if (check_str_ptr)
7449 detect_partial_match(common, backtracks);
7450 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7451 return cc;
7452
7453 #ifdef SUPPORT_UNICODE
7454 case OP_NOTPROP:
7455 case OP_PROP:
7456 propdata[0] = XCL_HASPROP;
7457 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
7458 propdata[2] = cc[0];
7459 propdata[3] = cc[1];
7460 propdata[4] = XCL_END;
7461 if (check_str_ptr)
7462 detect_partial_match(common, backtracks);
7463 compile_xclass_matchingpath(common, propdata, backtracks);
7464 return cc + 2;
7465 #endif
7466
7467 case OP_ANYNL:
7468 if (check_str_ptr)
7469 detect_partial_match(common, backtracks);
7470 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
7471 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
7472 /* We don't need to handle soft partial matching case. */
7473 end_list = NULL;
7474 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
7475 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
7476 else
7477 check_str_end(common, &end_list);
7478 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
7479 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
7480 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7481 jump[2] = JUMP(SLJIT_JUMP);
7482 JUMPHERE(jump[0]);
7483 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
7484 set_jumps(end_list, LABEL());
7485 JUMPHERE(jump[1]);
7486 JUMPHERE(jump[2]);
7487 return cc;
7488
7489 case OP_NOT_HSPACE:
7490 case OP_HSPACE:
7491 if (check_str_ptr)
7492 detect_partial_match(common, backtracks);
7493 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
7494 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
7495 sljit_set_current_flags(compiler, SLJIT_SET_Z);
7496 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
7497 return cc;
7498
7499 case OP_NOT_VSPACE:
7500 case OP_VSPACE:
7501 if (check_str_ptr)
7502 detect_partial_match(common, backtracks);
7503 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
7504 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
7505 sljit_set_current_flags(compiler, SLJIT_SET_Z);
7506 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
7507 return cc;
7508
7509 #ifdef SUPPORT_UNICODE
7510 case OP_EXTUNI:
7511 if (check_str_ptr)
7512 detect_partial_match(common, backtracks);
7513
7514 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
7515 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
7516
7517 #if PCRE2_CODE_UNIT_WIDTH != 32
7518 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
7519 common->utf ? SLJIT_FUNC_OFFSET(do_extuni_utf) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
7520 #else
7521 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_extuni_no_utf));
7522 #endif
7523
7524 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
7525
7526 if (common->mode == PCRE2_JIT_PARTIAL_HARD)
7527 {
7528 jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
7529 /* Since we successfully read a char above, partial matching must occure. */
7530 check_partial(common, TRUE);
7531 JUMPHERE(jump[0]);
7532 }
7533 return cc;
7534 #endif
7535
7536 case OP_CHAR:
7537 case OP_CHARI:
7538 length = 1;
7539 #ifdef SUPPORT_UNICODE
7540 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
7541 #endif
7542 if (common->mode == PCRE2_JIT_COMPLETE && check_str_ptr
7543 && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
7544 {
7545 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
7546 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
7547
7548 context.length = IN_UCHARS(length);
7549 context.sourcereg = -1;
7550 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7551 context.ucharptr = 0;
7552 #endif
7553 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
7554 }
7555
7556 if (check_str_ptr)
7557 detect_partial_match(common, backtracks);
7558 #ifdef SUPPORT_UNICODE
7559 if (common->utf)
7560 {
7561 GETCHAR(c, cc);
7562 }
7563 else
7564 #endif
7565 c = *cc;
7566
7567 if (type == OP_CHAR || !char_has_othercase(common, cc))
7568 {
7569 read_char_range(common, c, c, FALSE);
7570 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
7571 return cc + length;
7572 }
7573 oc = char_othercase(common, c);
7574 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
7575 bit = c ^ oc;
7576 if (is_powerof2(bit))
7577 {
7578 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
7579 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
7580 return cc + length;
7581 }
7582 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
7583 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
7584 JUMPHERE(jump[0]);
7585 return cc + length;
7586
7587 case OP_NOT:
7588 case OP_NOTI:
7589 if (check_str_ptr)
7590 detect_partial_match(common, backtracks);
7591
7592 length = 1;
7593 #ifdef SUPPORT_UNICODE
7594 if (common->utf)
7595 {
7596 #if PCRE2_CODE_UNIT_WIDTH == 8
7597 c = *cc;
7598 if (c < 128)
7599 {
7600 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
7601 if (type == OP_NOT || !char_has_othercase(common, cc))
7602 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
7603 else
7604 {
7605 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
7606 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
7607 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
7608 }
7609 /* Skip the variable-length character. */
7610 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7611 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
7612 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
7613 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
7614 JUMPHERE(jump[0]);
7615 return cc + 1;
7616 }
7617 else
7618 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7619 {
7620 GETCHARLEN(c, cc, length);
7621 }
7622 }
7623 else
7624 #endif /* SUPPORT_UNICODE */
7625 c = *cc;
7626
7627 if (type == OP_NOT || !char_has_othercase(common, cc))
7628 {
7629 read_char_range(common, c, c, TRUE);
7630 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
7631 }
7632 else
7633 {
7634 oc = char_othercase(common, c);
7635 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
7636 bit = c ^ oc;
7637 if (is_powerof2(bit))
7638 {
7639 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
7640 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
7641 }
7642 else
7643 {
7644 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
7645 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
7646 }
7647 }
7648 return cc + length;
7649
7650 case OP_CLASS:
7651 case OP_NCLASS:
7652 if (check_str_ptr)
7653 detect_partial_match(common, backtracks);
7654
7655 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
7656 bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
7657 read_char_range(common, 0, bit, type == OP_NCLASS);
7658 #else
7659 read_char_range(common, 0, 255, type == OP_NCLASS);
7660 #endif
7661
7662 if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
7663 return cc + 32 / sizeof(PCRE2_UCHAR);
7664
7665 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
7666 jump[0] = NULL;
7667 if (common->utf)
7668 {
7669 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
7670 if (type == OP_CLASS)
7671 {
7672 add_jump(compiler, backtracks, jump[0]);
7673 jump[0] = NULL;
7674 }
7675 }
7676 #elif PCRE2_CODE_UNIT_WIDTH != 8
7677 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7678 if (type == OP_CLASS)
7679 {
7680 add_jump(compiler, backtracks, jump[0]);
7681 jump[0] = NULL;
7682 }
7683 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
7684
7685 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7686 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7687 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7688 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7689 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7690 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
7691
7692 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7693 if (jump[0] != NULL)
7694 JUMPHERE(jump[0]);
7695 #endif
7696 return cc + 32 / sizeof(PCRE2_UCHAR);
7697
7698 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7699 case OP_XCLASS:
7700 if (check_str_ptr)
7701 detect_partial_match(common, backtracks);
7702 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
7703 return cc + GET(cc, 0) - 1;
7704 #endif
7705 }
7706 SLJIT_UNREACHABLE();
7707 return cc;
7708 }
7709
compile_charn_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,jump_list ** backtracks)7710 static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
7711 {
7712 /* This function consumes at least one input character. */
7713 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
7714 DEFINE_COMPILER;
7715 PCRE2_SPTR ccbegin = cc;
7716 compare_context context;
7717 int size;
7718
7719 context.length = 0;
7720 do
7721 {
7722 if (cc >= ccend)
7723 break;
7724
7725 if (*cc == OP_CHAR)
7726 {
7727 size = 1;
7728 #ifdef SUPPORT_UNICODE
7729 if (common->utf && HAS_EXTRALEN(cc[1]))
7730 size += GET_EXTRALEN(cc[1]);
7731 #endif
7732 }
7733 else if (*cc == OP_CHARI)
7734 {
7735 size = 1;
7736 #ifdef SUPPORT_UNICODE
7737 if (common->utf)
7738 {
7739 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
7740 size = 0;
7741 else if (HAS_EXTRALEN(cc[1]))
7742 size += GET_EXTRALEN(cc[1]);
7743 }
7744 else
7745 #endif
7746 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
7747 size = 0;
7748 }
7749 else
7750 size = 0;
7751
7752 cc += 1 + size;
7753 context.length += IN_UCHARS(size);
7754 }
7755 while (size > 0 && context.length <= 128);
7756
7757 cc = ccbegin;
7758 if (context.length > 0)
7759 {
7760 /* We have a fixed-length byte sequence. */
7761 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
7762 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
7763
7764 context.sourcereg = -1;
7765 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7766 context.ucharptr = 0;
7767 #endif
7768 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
7769 return cc;
7770 }
7771
7772 /* A non-fixed length character will be checked if length == 0. */
7773 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
7774 }
7775
7776 /* Forward definitions. */
7777 static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
7778 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
7779
7780 #define PUSH_BACKTRACK(size, ccstart, error) \
7781 do \
7782 { \
7783 backtrack = sljit_alloc_memory(compiler, (size)); \
7784 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
7785 return error; \
7786 memset(backtrack, 0, size); \
7787 backtrack->prev = parent->top; \
7788 backtrack->cc = (ccstart); \
7789 parent->top = backtrack; \
7790 } \
7791 while (0)
7792
7793 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
7794 do \
7795 { \
7796 backtrack = sljit_alloc_memory(compiler, (size)); \
7797 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
7798 return; \
7799 memset(backtrack, 0, size); \
7800 backtrack->prev = parent->top; \
7801 backtrack->cc = (ccstart); \
7802 parent->top = backtrack; \
7803 } \
7804 while (0)
7805
7806 #define BACKTRACK_AS(type) ((type *)backtrack)
7807
compile_dnref_search(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)7808 static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7809 {
7810 /* The OVECTOR offset goes to TMP2. */
7811 DEFINE_COMPILER;
7812 int count = GET2(cc, 1 + IMM2_SIZE);
7813 PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
7814 unsigned int offset;
7815 jump_list *found = NULL;
7816
7817 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
7818
7819 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
7820
7821 count--;
7822 while (count-- > 0)
7823 {
7824 offset = GET2(slot, 0) << 1;
7825 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
7826 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
7827 slot += common->name_entry_size;
7828 }
7829
7830 offset = GET2(slot, 0) << 1;
7831 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
7832 if (backtracks != NULL && !common->unset_backref)
7833 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
7834
7835 set_jumps(found, LABEL());
7836 }
7837
compile_ref_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)7838 static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
7839 {
7840 DEFINE_COMPILER;
7841 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
7842 int offset = 0;
7843 struct sljit_jump *jump = NULL;
7844 struct sljit_jump *partial;
7845 struct sljit_jump *nopartial;
7846
7847 if (ref)
7848 {
7849 offset = GET2(cc, 1) << 1;
7850 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7851 /* OVECTOR(1) contains the "string begin - 1" constant. */
7852 if (withchecks && !common->unset_backref)
7853 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7854 }
7855 else
7856 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7857
7858 #if defined SUPPORT_UNICODE
7859 if (common->utf && *cc == OP_REFI)
7860 {
7861 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
7862 if (ref)
7863 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7864 else
7865 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7866
7867 if (withchecks)
7868 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_R2, 0);
7869 /* No free saved registers so save data on stack. */
7870
7871 OP1(SLJIT_MOV, SLJIT_R3, 0, STR_END, 0);
7872 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
7873 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
7874
7875 if (common->mode == PCRE2_JIT_COMPLETE)
7876 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
7877 else
7878 {
7879 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
7880
7881 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
7882
7883 nopartial = JUMP(SLJIT_NOT_EQUAL);
7884 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
7885 check_partial(common, FALSE);
7886 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7887 JUMPHERE(nopartial);
7888 }
7889 }
7890 else
7891 #endif /* SUPPORT_UNICODE */
7892 {
7893 if (ref)
7894 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
7895 else
7896 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
7897
7898 if (withchecks)
7899 jump = JUMP(SLJIT_ZERO);
7900
7901 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7902 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
7903 if (common->mode == PCRE2_JIT_COMPLETE)
7904 add_jump(compiler, backtracks, partial);
7905
7906 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
7907 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
7908
7909 if (common->mode != PCRE2_JIT_COMPLETE)
7910 {
7911 nopartial = JUMP(SLJIT_JUMP);
7912 JUMPHERE(partial);
7913 /* TMP2 -= STR_END - STR_PTR */
7914 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
7915 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
7916 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
7917 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
7918 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
7919 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
7920 JUMPHERE(partial);
7921 check_partial(common, FALSE);
7922 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7923 JUMPHERE(nopartial);
7924 }
7925 }
7926
7927 if (jump != NULL)
7928 {
7929 if (emptyfail)
7930 add_jump(compiler, backtracks, jump);
7931 else
7932 JUMPHERE(jump);
7933 }
7934 }
7935
compile_ref_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)7936 static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
7937 {
7938 DEFINE_COMPILER;
7939 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
7940 backtrack_common *backtrack;
7941 PCRE2_UCHAR type;
7942 int offset = 0;
7943 struct sljit_label *label;
7944 struct sljit_jump *zerolength;
7945 struct sljit_jump *jump = NULL;
7946 PCRE2_SPTR ccbegin = cc;
7947 int min = 0, max = 0;
7948 BOOL minimize;
7949
7950 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
7951
7952 if (ref)
7953 offset = GET2(cc, 1) << 1;
7954 else
7955 cc += IMM2_SIZE;
7956 type = cc[1 + IMM2_SIZE];
7957
7958 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
7959 minimize = (type & 0x1) != 0;
7960 switch(type)
7961 {
7962 case OP_CRSTAR:
7963 case OP_CRMINSTAR:
7964 min = 0;
7965 max = 0;
7966 cc += 1 + IMM2_SIZE + 1;
7967 break;
7968 case OP_CRPLUS:
7969 case OP_CRMINPLUS:
7970 min = 1;
7971 max = 0;
7972 cc += 1 + IMM2_SIZE + 1;
7973 break;
7974 case OP_CRQUERY:
7975 case OP_CRMINQUERY:
7976 min = 0;
7977 max = 1;
7978 cc += 1 + IMM2_SIZE + 1;
7979 break;
7980 case OP_CRRANGE:
7981 case OP_CRMINRANGE:
7982 min = GET2(cc, 1 + IMM2_SIZE + 1);
7983 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
7984 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
7985 break;
7986 default:
7987 SLJIT_UNREACHABLE();
7988 break;
7989 }
7990
7991 if (!minimize)
7992 {
7993 if (min == 0)
7994 {
7995 allocate_stack(common, 2);
7996 if (ref)
7997 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7998 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7999 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
8000 /* Temporary release of STR_PTR. */
8001 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8002 /* Handles both invalid and empty cases. Since the minimum repeat,
8003 is zero the invalid case is basically the same as an empty case. */
8004 if (ref)
8005 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8006 else
8007 {
8008 compile_dnref_search(common, ccbegin, NULL);
8009 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8010 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
8011 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8012 }
8013 /* Restore if not zero length. */
8014 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8015 }
8016 else
8017 {
8018 allocate_stack(common, 1);
8019 if (ref)
8020 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8021 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8022 if (ref)
8023 {
8024 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8025 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8026 }
8027 else
8028 {
8029 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
8030 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8031 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
8032 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8033 }
8034 }
8035
8036 if (min > 1 || max > 1)
8037 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
8038
8039 label = LABEL();
8040 if (!ref)
8041 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
8042 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
8043
8044 if (min > 1 || max > 1)
8045 {
8046 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
8047 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8048 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
8049 if (min > 1)
8050 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
8051 if (max > 1)
8052 {
8053 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
8054 allocate_stack(common, 1);
8055 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8056 JUMPTO(SLJIT_JUMP, label);
8057 JUMPHERE(jump);
8058 }
8059 }
8060
8061 if (max == 0)
8062 {
8063 /* Includes min > 1 case as well. */
8064 allocate_stack(common, 1);
8065 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8066 JUMPTO(SLJIT_JUMP, label);
8067 }
8068
8069 JUMPHERE(zerolength);
8070 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
8071
8072 count_match(common);
8073 return cc;
8074 }
8075
8076 allocate_stack(common, ref ? 2 : 3);
8077 if (ref)
8078 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8079 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8080 if (type != OP_CRMINSTAR)
8081 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
8082
8083 if (min == 0)
8084 {
8085 /* Handles both invalid and empty cases. Since the minimum repeat,
8086 is zero the invalid case is basically the same as an empty case. */
8087 if (ref)
8088 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8089 else
8090 {
8091 compile_dnref_search(common, ccbegin, NULL);
8092 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8093 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
8094 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8095 }
8096 /* Length is non-zero, we can match real repeats. */
8097 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8098 jump = JUMP(SLJIT_JUMP);
8099 }
8100 else
8101 {
8102 if (ref)
8103 {
8104 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8105 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8106 }
8107 else
8108 {
8109 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
8110 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8111 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
8112 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8113 }
8114 }
8115
8116 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
8117 if (max > 0)
8118 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
8119
8120 if (!ref)
8121 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
8122 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
8123 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8124
8125 if (min > 1)
8126 {
8127 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8128 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8129 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
8130 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
8131 }
8132 else if (max > 0)
8133 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
8134
8135 if (jump != NULL)
8136 JUMPHERE(jump);
8137 JUMPHERE(zerolength);
8138
8139 count_match(common);
8140 return cc;
8141 }
8142
compile_recurse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)8143 static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8144 {
8145 DEFINE_COMPILER;
8146 backtrack_common *backtrack;
8147 recurse_entry *entry = common->entries;
8148 recurse_entry *prev = NULL;
8149 sljit_sw start = GET(cc, 1);
8150 PCRE2_SPTR start_cc;
8151 BOOL needs_control_head;
8152
8153 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
8154
8155 /* Inlining simple patterns. */
8156 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
8157 {
8158 start_cc = common->start + start;
8159 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
8160 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
8161 return cc + 1 + LINK_SIZE;
8162 }
8163
8164 while (entry != NULL)
8165 {
8166 if (entry->start == start)
8167 break;
8168 prev = entry;
8169 entry = entry->next;
8170 }
8171
8172 if (entry == NULL)
8173 {
8174 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
8175 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8176 return NULL;
8177 entry->next = NULL;
8178 entry->entry_label = NULL;
8179 entry->backtrack_label = NULL;
8180 entry->entry_calls = NULL;
8181 entry->backtrack_calls = NULL;
8182 entry->start = start;
8183
8184 if (prev != NULL)
8185 prev->next = entry;
8186 else
8187 common->entries = entry;
8188 }
8189
8190 BACKTRACK_AS(recurse_backtrack)->entry = entry;
8191
8192 if (entry->entry_label == NULL)
8193 add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
8194 else
8195 JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
8196 /* Leave if the match is failed. */
8197 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
8198 BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
8199 return cc + 1 + LINK_SIZE;
8200 }
8201
do_callout(struct jit_arguments * arguments,pcre2_callout_block * callout_block,PCRE2_SPTR * jit_ovector)8202 static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
8203 {
8204 PCRE2_SPTR begin;
8205 PCRE2_SIZE *ovector;
8206 sljit_u32 oveccount, capture_top;
8207
8208 if (arguments->callout == NULL)
8209 return 0;
8210
8211 SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
8212
8213 begin = arguments->begin;
8214 ovector = (PCRE2_SIZE*)(callout_block + 1);
8215 oveccount = callout_block->capture_top;
8216
8217 SLJIT_ASSERT(oveccount >= 1);
8218
8219 callout_block->version = 2;
8220 callout_block->callout_flags = 0;
8221
8222 /* Offsets in subject. */
8223 callout_block->subject_length = arguments->end - arguments->begin;
8224 callout_block->start_match = jit_ovector[0] - begin;
8225 callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
8226 callout_block->subject = begin;
8227
8228 /* Convert and copy the JIT offset vector to the ovector array. */
8229 callout_block->capture_top = 1;
8230 callout_block->offset_vector = ovector;
8231
8232 ovector[0] = PCRE2_UNSET;
8233 ovector[1] = PCRE2_UNSET;
8234 ovector += 2;
8235 jit_ovector += 2;
8236 capture_top = 1;
8237
8238 /* Convert pointers to sizes. */
8239 while (--oveccount != 0)
8240 {
8241 capture_top++;
8242
8243 ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
8244 ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
8245
8246 if (ovector[0] != PCRE2_UNSET)
8247 callout_block->capture_top = capture_top;
8248
8249 ovector += 2;
8250 jit_ovector += 2;
8251 }
8252
8253 return (arguments->callout)(callout_block, arguments->callout_data);
8254 }
8255
8256 #define CALLOUT_ARG_OFFSET(arg) \
8257 SLJIT_OFFSETOF(pcre2_callout_block, arg)
8258
compile_callout_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)8259 static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8260 {
8261 DEFINE_COMPILER;
8262 backtrack_common *backtrack;
8263 sljit_s32 mov_opcode;
8264 unsigned int callout_length = (*cc == OP_CALLOUT)
8265 ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
8266 sljit_sw value1;
8267 sljit_sw value2;
8268 sljit_sw value3;
8269 sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
8270
8271 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
8272
8273 callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
8274
8275 allocate_stack(common, callout_arg_size);
8276
8277 SLJIT_ASSERT(common->capture_last_ptr != 0);
8278 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
8279 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8280 value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
8281 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
8282 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
8283 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
8284
8285 /* These pointer sized fields temporarly stores internal variables. */
8286 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
8287
8288 if (common->mark_ptr != 0)
8289 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
8290 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
8291 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
8292 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
8293
8294 if (*cc == OP_CALLOUT)
8295 {
8296 value1 = 0;
8297 value2 = 0;
8298 value3 = 0;
8299 }
8300 else
8301 {
8302 value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
8303 value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
8304 value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
8305 }
8306
8307 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
8308 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
8309 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
8310 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
8311
8312 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8313
8314 /* Needed to save important temporary registers. */
8315 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
8316 /* SLJIT_R0 = arguments */
8317 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
8318 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
8319 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
8320 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
8321 free_stack(common, callout_arg_size);
8322
8323 /* Check return value. */
8324 OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
8325 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER32));
8326 if (common->abort_label == NULL)
8327 add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL32) /* SIG_LESS */);
8328 else
8329 JUMPTO(SLJIT_NOT_EQUAL32 /* SIG_LESS */, common->abort_label);
8330 return cc + callout_length;
8331 }
8332
8333 #undef CALLOUT_ARG_SIZE
8334 #undef CALLOUT_ARG_OFFSET
8335
assert_needs_str_ptr_saving(PCRE2_SPTR cc)8336 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
8337 {
8338 while (TRUE)
8339 {
8340 switch (*cc)
8341 {
8342 case OP_CALLOUT_STR:
8343 cc += GET(cc, 1 + 2*LINK_SIZE);
8344 break;
8345
8346 case OP_NOT_WORD_BOUNDARY:
8347 case OP_WORD_BOUNDARY:
8348 case OP_CIRC:
8349 case OP_CIRCM:
8350 case OP_DOLL:
8351 case OP_DOLLM:
8352 case OP_CALLOUT:
8353 case OP_ALT:
8354 cc += PRIV(OP_lengths)[*cc];
8355 break;
8356
8357 case OP_KET:
8358 return FALSE;
8359
8360 default:
8361 return TRUE;
8362 }
8363 }
8364 }
8365
compile_assert_matchingpath(compiler_common * common,PCRE2_SPTR cc,assert_backtrack * backtrack,BOOL conditional)8366 static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
8367 {
8368 DEFINE_COMPILER;
8369 int framesize;
8370 int extrasize;
8371 BOOL local_quit_available = FALSE;
8372 BOOL needs_control_head;
8373 int private_data_ptr;
8374 backtrack_common altbacktrack;
8375 PCRE2_SPTR ccbegin;
8376 PCRE2_UCHAR opcode;
8377 PCRE2_UCHAR bra = OP_BRA;
8378 jump_list *tmp = NULL;
8379 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
8380 jump_list **found;
8381 /* Saving previous accept variables. */
8382 BOOL save_local_quit_available = common->local_quit_available;
8383 BOOL save_in_positive_assertion = common->in_positive_assertion;
8384 then_trap_backtrack *save_then_trap = common->then_trap;
8385 struct sljit_label *save_quit_label = common->quit_label;
8386 struct sljit_label *save_accept_label = common->accept_label;
8387 jump_list *save_quit = common->quit;
8388 jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
8389 jump_list *save_accept = common->accept;
8390 struct sljit_jump *jump;
8391 struct sljit_jump *brajump = NULL;
8392
8393 /* Assert captures then. */
8394 common->then_trap = NULL;
8395
8396 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
8397 {
8398 SLJIT_ASSERT(!conditional);
8399 bra = *cc;
8400 cc++;
8401 }
8402 private_data_ptr = PRIVATE_DATA(cc);
8403 SLJIT_ASSERT(private_data_ptr != 0);
8404 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
8405 backtrack->framesize = framesize;
8406 backtrack->private_data_ptr = private_data_ptr;
8407 opcode = *cc;
8408 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
8409 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
8410 ccbegin = cc;
8411 cc += GET(cc, 1);
8412
8413 if (bra == OP_BRAMINZERO)
8414 {
8415 /* This is a braminzero backtrack path. */
8416 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8417 free_stack(common, 1);
8418 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8419 }
8420
8421 if (framesize < 0)
8422 {
8423 extrasize = 1;
8424 if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
8425 extrasize = 0;
8426
8427 if (needs_control_head)
8428 extrasize++;
8429
8430 if (framesize == no_frame)
8431 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8432
8433 if (extrasize > 0)
8434 allocate_stack(common, extrasize);
8435
8436 if (needs_control_head)
8437 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8438
8439 if (extrasize > 0)
8440 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8441
8442 if (needs_control_head)
8443 {
8444 SLJIT_ASSERT(extrasize == 2);
8445 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
8446 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
8447 }
8448 }
8449 else
8450 {
8451 extrasize = needs_control_head ? 3 : 2;
8452 allocate_stack(common, framesize + extrasize);
8453
8454 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8455 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
8456 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8457 if (needs_control_head)
8458 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8459 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8460
8461 if (needs_control_head)
8462 {
8463 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
8464 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
8465 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
8466 }
8467 else
8468 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
8469
8470 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
8471 }
8472
8473 memset(&altbacktrack, 0, sizeof(backtrack_common));
8474 if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
8475 {
8476 /* Control verbs cannot escape from these asserts. */
8477 local_quit_available = TRUE;
8478 common->local_quit_available = TRUE;
8479 common->quit_label = NULL;
8480 common->quit = NULL;
8481 }
8482
8483 common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
8484 common->positive_assertion_quit = NULL;
8485
8486 while (1)
8487 {
8488 common->accept_label = NULL;
8489 common->accept = NULL;
8490 altbacktrack.top = NULL;
8491 altbacktrack.topbacktracks = NULL;
8492
8493 if (*ccbegin == OP_ALT && extrasize > 0)
8494 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8495
8496 altbacktrack.cc = ccbegin;
8497 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
8498 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8499 {
8500 if (local_quit_available)
8501 {
8502 common->local_quit_available = save_local_quit_available;
8503 common->quit_label = save_quit_label;
8504 common->quit = save_quit;
8505 }
8506 common->in_positive_assertion = save_in_positive_assertion;
8507 common->then_trap = save_then_trap;
8508 common->accept_label = save_accept_label;
8509 common->positive_assertion_quit = save_positive_assertion_quit;
8510 common->accept = save_accept;
8511 return NULL;
8512 }
8513 common->accept_label = LABEL();
8514 if (common->accept != NULL)
8515 set_jumps(common->accept, common->accept_label);
8516
8517 /* Reset stack. */
8518 if (framesize < 0)
8519 {
8520 if (framesize == no_frame)
8521 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8522 else if (extrasize > 0)
8523 free_stack(common, extrasize);
8524
8525 if (needs_control_head)
8526 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
8527 }
8528 else
8529 {
8530 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
8531 {
8532 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
8533 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
8534 if (needs_control_head)
8535 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
8536 }
8537 else
8538 {
8539 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8540 if (needs_control_head)
8541 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
8542 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8543 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
8544 }
8545 }
8546
8547 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
8548 {
8549 /* We know that STR_PTR was stored on the top of the stack. */
8550 if (conditional)
8551 {
8552 if (extrasize > 0)
8553 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
8554 }
8555 else if (bra == OP_BRAZERO)
8556 {
8557 if (framesize < 0)
8558 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
8559 else
8560 {
8561 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
8562 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
8563 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
8564 }
8565 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8566 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8567 }
8568 else if (framesize >= 0)
8569 {
8570 /* For OP_BRA and OP_BRAMINZERO. */
8571 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
8572 }
8573 }
8574 add_jump(compiler, found, JUMP(SLJIT_JUMP));
8575
8576 compile_backtrackingpath(common, altbacktrack.top);
8577 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8578 {
8579 if (local_quit_available)
8580 {
8581 common->local_quit_available = save_local_quit_available;
8582 common->quit_label = save_quit_label;
8583 common->quit = save_quit;
8584 }
8585 common->in_positive_assertion = save_in_positive_assertion;
8586 common->then_trap = save_then_trap;
8587 common->accept_label = save_accept_label;
8588 common->positive_assertion_quit = save_positive_assertion_quit;
8589 common->accept = save_accept;
8590 return NULL;
8591 }
8592 set_jumps(altbacktrack.topbacktracks, LABEL());
8593
8594 if (*cc != OP_ALT)
8595 break;
8596
8597 ccbegin = cc;
8598 cc += GET(cc, 1);
8599 }
8600
8601 if (local_quit_available)
8602 {
8603 SLJIT_ASSERT(common->positive_assertion_quit == NULL);
8604 /* Makes the check less complicated below. */
8605 common->positive_assertion_quit = common->quit;
8606 }
8607
8608 /* None of them matched. */
8609 if (common->positive_assertion_quit != NULL)
8610 {
8611 jump = JUMP(SLJIT_JUMP);
8612 set_jumps(common->positive_assertion_quit, LABEL());
8613 SLJIT_ASSERT(framesize != no_stack);
8614 if (framesize < 0)
8615 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
8616 else
8617 {
8618 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8619 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8620 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
8621 }
8622 JUMPHERE(jump);
8623 }
8624
8625 if (needs_control_head)
8626 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
8627
8628 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
8629 {
8630 /* Assert is failed. */
8631 if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
8632 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8633
8634 if (framesize < 0)
8635 {
8636 /* The topmost item should be 0. */
8637 if (bra == OP_BRAZERO)
8638 {
8639 if (extrasize == 2)
8640 free_stack(common, 1);
8641 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8642 }
8643 else if (extrasize > 0)
8644 free_stack(common, extrasize);
8645 }
8646 else
8647 {
8648 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
8649 /* The topmost item should be 0. */
8650 if (bra == OP_BRAZERO)
8651 {
8652 free_stack(common, framesize + extrasize - 1);
8653 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8654 }
8655 else
8656 free_stack(common, framesize + extrasize);
8657 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
8658 }
8659 jump = JUMP(SLJIT_JUMP);
8660 if (bra != OP_BRAZERO)
8661 add_jump(compiler, target, jump);
8662
8663 /* Assert is successful. */
8664 set_jumps(tmp, LABEL());
8665 if (framesize < 0)
8666 {
8667 /* We know that STR_PTR was stored on the top of the stack. */
8668 if (extrasize > 0)
8669 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
8670
8671 /* Keep the STR_PTR on the top of the stack. */
8672 if (bra == OP_BRAZERO)
8673 {
8674 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8675 if (extrasize == 2)
8676 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8677 }
8678 else if (bra == OP_BRAMINZERO)
8679 {
8680 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8681 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8682 }
8683 }
8684 else
8685 {
8686 if (bra == OP_BRA)
8687 {
8688 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
8689 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
8690 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
8691 }
8692 else
8693 {
8694 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
8695 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
8696 if (extrasize == 2)
8697 {
8698 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8699 if (bra == OP_BRAMINZERO)
8700 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8701 }
8702 else
8703 {
8704 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
8705 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
8706 }
8707 }
8708 }
8709
8710 if (bra == OP_BRAZERO)
8711 {
8712 backtrack->matchingpath = LABEL();
8713 SET_LABEL(jump, backtrack->matchingpath);
8714 }
8715 else if (bra == OP_BRAMINZERO)
8716 {
8717 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
8718 JUMPHERE(brajump);
8719 if (framesize >= 0)
8720 {
8721 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8722 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8723 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
8724 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
8725 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
8726 }
8727 set_jumps(backtrack->common.topbacktracks, LABEL());
8728 }
8729 }
8730 else
8731 {
8732 /* AssertNot is successful. */
8733 if (framesize < 0)
8734 {
8735 if (extrasize > 0)
8736 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8737
8738 if (bra != OP_BRA)
8739 {
8740 if (extrasize == 2)
8741 free_stack(common, 1);
8742 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8743 }
8744 else if (extrasize > 0)
8745 free_stack(common, extrasize);
8746 }
8747 else
8748 {
8749 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8750 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
8751 /* The topmost item should be 0. */
8752 if (bra != OP_BRA)
8753 {
8754 free_stack(common, framesize + extrasize - 1);
8755 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8756 }
8757 else
8758 free_stack(common, framesize + extrasize);
8759 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
8760 }
8761
8762 if (bra == OP_BRAZERO)
8763 backtrack->matchingpath = LABEL();
8764 else if (bra == OP_BRAMINZERO)
8765 {
8766 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
8767 JUMPHERE(brajump);
8768 }
8769
8770 if (bra != OP_BRA)
8771 {
8772 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
8773 set_jumps(backtrack->common.topbacktracks, LABEL());
8774 backtrack->common.topbacktracks = NULL;
8775 }
8776 }
8777
8778 if (local_quit_available)
8779 {
8780 common->local_quit_available = save_local_quit_available;
8781 common->quit_label = save_quit_label;
8782 common->quit = save_quit;
8783 }
8784 common->in_positive_assertion = save_in_positive_assertion;
8785 common->then_trap = save_then_trap;
8786 common->accept_label = save_accept_label;
8787 common->positive_assertion_quit = save_positive_assertion_quit;
8788 common->accept = save_accept;
8789 return cc + 1 + LINK_SIZE;
8790 }
8791
match_once_common(compiler_common * common,PCRE2_UCHAR ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)8792 static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
8793 {
8794 DEFINE_COMPILER;
8795 int stacksize;
8796
8797 if (framesize < 0)
8798 {
8799 if (framesize == no_frame)
8800 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8801 else
8802 {
8803 stacksize = needs_control_head ? 1 : 0;
8804 if (ket != OP_KET || has_alternatives)
8805 stacksize++;
8806
8807 if (stacksize > 0)
8808 free_stack(common, stacksize);
8809 }
8810
8811 if (needs_control_head)
8812 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
8813
8814 /* TMP2 which is set here used by OP_KETRMAX below. */
8815 if (ket == OP_KETRMAX)
8816 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
8817 else if (ket == OP_KETRMIN)
8818 {
8819 /* Move the STR_PTR to the private_data_ptr. */
8820 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
8821 }
8822 }
8823 else
8824 {
8825 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
8826 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
8827 if (needs_control_head)
8828 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
8829
8830 if (ket == OP_KETRMAX)
8831 {
8832 /* TMP2 which is set here used by OP_KETRMAX below. */
8833 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8834 }
8835 }
8836 if (needs_control_head)
8837 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
8838 }
8839
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)8840 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
8841 {
8842 DEFINE_COMPILER;
8843
8844 if (common->capture_last_ptr != 0)
8845 {
8846 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
8847 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
8848 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8849 stacksize++;
8850 }
8851 if (common->optimized_cbracket[offset >> 1] == 0)
8852 {
8853 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8854 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8855 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8856 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8857 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
8858 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8859 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8860 stacksize += 2;
8861 }
8862 return stacksize;
8863 }
8864
8865 /*
8866 Handling bracketed expressions is probably the most complex part.
8867
8868 Stack layout naming characters:
8869 S - Push the current STR_PTR
8870 0 - Push a 0 (NULL)
8871 A - Push the current STR_PTR. Needed for restoring the STR_PTR
8872 before the next alternative. Not pushed if there are no alternatives.
8873 M - Any values pushed by the current alternative. Can be empty, or anything.
8874 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
8875 L - Push the previous local (pointed by localptr) to the stack
8876 () - opional values stored on the stack
8877 ()* - optonal, can be stored multiple times
8878
8879 The following list shows the regular expression templates, their PCRE byte codes
8880 and stack layout supported by pcre-sljit.
8881
8882 (?:) OP_BRA | OP_KET A M
8883 () OP_CBRA | OP_KET C M
8884 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
8885 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
8886 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
8887 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
8888 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
8889 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
8890 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
8891 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
8892 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
8893 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
8894 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
8895 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
8896 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
8897 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
8898 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
8899 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
8900 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
8901 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
8902 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
8903 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
8904
8905
8906 Stack layout naming characters:
8907 A - Push the alternative index (starting from 0) on the stack.
8908 Not pushed if there is no alternatives.
8909 M - Any values pushed by the current alternative. Can be empty, or anything.
8910
8911 The next list shows the possible content of a bracket:
8912 (|) OP_*BRA | OP_ALT ... M A
8913 (?()|) OP_*COND | OP_ALT M A
8914 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
8915 Or nothing, if trace is unnecessary
8916 */
8917
compile_bracket_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)8918 static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8919 {
8920 DEFINE_COMPILER;
8921 backtrack_common *backtrack;
8922 PCRE2_UCHAR opcode;
8923 int private_data_ptr = 0;
8924 int offset = 0;
8925 int i, stacksize;
8926 int repeat_ptr = 0, repeat_length = 0;
8927 int repeat_type = 0, repeat_count = 0;
8928 PCRE2_SPTR ccbegin;
8929 PCRE2_SPTR matchingpath;
8930 PCRE2_SPTR slot;
8931 PCRE2_UCHAR bra = OP_BRA;
8932 PCRE2_UCHAR ket;
8933 assert_backtrack *assert;
8934 BOOL has_alternatives;
8935 BOOL needs_control_head = FALSE;
8936 struct sljit_jump *jump;
8937 struct sljit_jump *skip;
8938 struct sljit_label *rmax_label = NULL;
8939 struct sljit_jump *braminzero = NULL;
8940
8941 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
8942
8943 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
8944 {
8945 bra = *cc;
8946 cc++;
8947 opcode = *cc;
8948 }
8949
8950 opcode = *cc;
8951 ccbegin = cc;
8952 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
8953 ket = *matchingpath;
8954 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
8955 {
8956 repeat_ptr = PRIVATE_DATA(matchingpath);
8957 repeat_length = PRIVATE_DATA(matchingpath + 1);
8958 repeat_type = PRIVATE_DATA(matchingpath + 2);
8959 repeat_count = PRIVATE_DATA(matchingpath + 3);
8960 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
8961 if (repeat_type == OP_UPTO)
8962 ket = OP_KETRMAX;
8963 if (repeat_type == OP_MINUPTO)
8964 ket = OP_KETRMIN;
8965 }
8966
8967 matchingpath = ccbegin + 1 + LINK_SIZE;
8968 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
8969 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
8970 cc += GET(cc, 1);
8971
8972 has_alternatives = *cc == OP_ALT;
8973 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
8974 {
8975 SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
8976 compile_time_checks_must_be_grouped_together);
8977 has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
8978 }
8979
8980 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
8981 opcode = OP_SCOND;
8982
8983 if (opcode == OP_CBRA || opcode == OP_SCBRA)
8984 {
8985 /* Capturing brackets has a pre-allocated space. */
8986 offset = GET2(ccbegin, 1 + LINK_SIZE);
8987 if (common->optimized_cbracket[offset] == 0)
8988 {
8989 private_data_ptr = OVECTOR_PRIV(offset);
8990 offset <<= 1;
8991 }
8992 else
8993 {
8994 offset <<= 1;
8995 private_data_ptr = OVECTOR(offset);
8996 }
8997 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
8998 matchingpath += IMM2_SIZE;
8999 }
9000 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
9001 {
9002 /* Other brackets simply allocate the next entry. */
9003 private_data_ptr = PRIVATE_DATA(ccbegin);
9004 SLJIT_ASSERT(private_data_ptr != 0);
9005 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
9006 if (opcode == OP_ONCE)
9007 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
9008 }
9009
9010 /* Instructions before the first alternative. */
9011 stacksize = 0;
9012 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
9013 stacksize++;
9014 if (bra == OP_BRAZERO)
9015 stacksize++;
9016
9017 if (stacksize > 0)
9018 allocate_stack(common, stacksize);
9019
9020 stacksize = 0;
9021 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
9022 {
9023 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9024 stacksize++;
9025 }
9026
9027 if (bra == OP_BRAZERO)
9028 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9029
9030 if (bra == OP_BRAMINZERO)
9031 {
9032 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
9033 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9034 if (ket != OP_KETRMIN)
9035 {
9036 free_stack(common, 1);
9037 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9038 }
9039 else
9040 {
9041 if (opcode == OP_ONCE || opcode >= OP_SBRA)
9042 {
9043 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9044 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9045 /* Nothing stored during the first run. */
9046 skip = JUMP(SLJIT_JUMP);
9047 JUMPHERE(jump);
9048 /* Checking zero-length iteration. */
9049 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
9050 {
9051 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
9052 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9053 }
9054 else
9055 {
9056 /* Except when the whole stack frame must be saved. */
9057 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9058 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
9059 }
9060 JUMPHERE(skip);
9061 }
9062 else
9063 {
9064 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9065 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9066 JUMPHERE(jump);
9067 }
9068 }
9069 }
9070
9071 if (repeat_type != 0)
9072 {
9073 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
9074 if (repeat_type == OP_EXACT)
9075 rmax_label = LABEL();
9076 }
9077
9078 if (ket == OP_KETRMIN)
9079 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
9080
9081 if (ket == OP_KETRMAX)
9082 {
9083 rmax_label = LABEL();
9084 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
9085 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
9086 }
9087
9088 /* Handling capturing brackets and alternatives. */
9089 if (opcode == OP_ONCE)
9090 {
9091 stacksize = 0;
9092 if (needs_control_head)
9093 {
9094 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9095 stacksize++;
9096 }
9097
9098 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
9099 {
9100 /* Neither capturing brackets nor recursions are found in the block. */
9101 if (ket == OP_KETRMIN)
9102 {
9103 stacksize += 2;
9104 if (!needs_control_head)
9105 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9106 }
9107 else
9108 {
9109 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
9110 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9111 if (ket == OP_KETRMAX || has_alternatives)
9112 stacksize++;
9113 }
9114
9115 if (stacksize > 0)
9116 allocate_stack(common, stacksize);
9117
9118 stacksize = 0;
9119 if (needs_control_head)
9120 {
9121 stacksize++;
9122 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9123 }
9124
9125 if (ket == OP_KETRMIN)
9126 {
9127 if (needs_control_head)
9128 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9129 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9130 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
9131 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
9132 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
9133 }
9134 else if (ket == OP_KETRMAX || has_alternatives)
9135 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9136 }
9137 else
9138 {
9139 if (ket != OP_KET || has_alternatives)
9140 stacksize++;
9141
9142 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
9143 allocate_stack(common, stacksize);
9144
9145 if (needs_control_head)
9146 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9147
9148 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9149 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
9150
9151 stacksize = needs_control_head ? 1 : 0;
9152 if (ket != OP_KET || has_alternatives)
9153 {
9154 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9155 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9156 stacksize++;
9157 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
9158 }
9159 else
9160 {
9161 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9162 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
9163 }
9164 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
9165 }
9166 }
9167 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
9168 {
9169 /* Saving the previous values. */
9170 if (common->optimized_cbracket[offset >> 1] != 0)
9171 {
9172 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
9173 allocate_stack(common, 2);
9174 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9175 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
9176 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9177 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9178 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9179 }
9180 else
9181 {
9182 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9183 allocate_stack(common, 1);
9184 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9185 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9186 }
9187 }
9188 else if (opcode == OP_SBRA || opcode == OP_SCOND)
9189 {
9190 /* Saving the previous value. */
9191 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9192 allocate_stack(common, 1);
9193 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9194 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9195 }
9196 else if (has_alternatives)
9197 {
9198 /* Pushing the starting string pointer. */
9199 allocate_stack(common, 1);
9200 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9201 }
9202
9203 /* Generating code for the first alternative. */
9204 if (opcode == OP_COND || opcode == OP_SCOND)
9205 {
9206 if (*matchingpath == OP_CREF)
9207 {
9208 SLJIT_ASSERT(has_alternatives);
9209 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
9210 CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9211 matchingpath += 1 + IMM2_SIZE;
9212 }
9213 else if (*matchingpath == OP_DNCREF)
9214 {
9215 SLJIT_ASSERT(has_alternatives);
9216
9217 i = GET2(matchingpath, 1 + IMM2_SIZE);
9218 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
9219 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9220 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9221 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
9222 slot += common->name_entry_size;
9223 i--;
9224 while (i-- > 0)
9225 {
9226 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
9227 OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
9228 slot += common->name_entry_size;
9229 }
9230 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9231 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
9232 matchingpath += 1 + 2 * IMM2_SIZE;
9233 }
9234 else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
9235 {
9236 /* Never has other case. */
9237 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
9238 SLJIT_ASSERT(!has_alternatives);
9239
9240 if (*matchingpath == OP_TRUE)
9241 {
9242 stacksize = 1;
9243 matchingpath++;
9244 }
9245 else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
9246 stacksize = 0;
9247 else if (*matchingpath == OP_RREF)
9248 {
9249 stacksize = GET2(matchingpath, 1);
9250 if (common->currententry == NULL)
9251 stacksize = 0;
9252 else if (stacksize == RREF_ANY)
9253 stacksize = 1;
9254 else if (common->currententry->start == 0)
9255 stacksize = stacksize == 0;
9256 else
9257 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
9258
9259 if (stacksize != 0)
9260 matchingpath += 1 + IMM2_SIZE;
9261 }
9262 else
9263 {
9264 if (common->currententry == NULL || common->currententry->start == 0)
9265 stacksize = 0;
9266 else
9267 {
9268 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
9269 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
9270 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
9271 while (stacksize > 0)
9272 {
9273 if ((int)GET2(slot, 0) == i)
9274 break;
9275 slot += common->name_entry_size;
9276 stacksize--;
9277 }
9278 }
9279
9280 if (stacksize != 0)
9281 matchingpath += 1 + 2 * IMM2_SIZE;
9282 }
9283
9284 /* The stacksize == 0 is a common "else" case. */
9285 if (stacksize == 0)
9286 {
9287 if (*cc == OP_ALT)
9288 {
9289 matchingpath = cc + 1 + LINK_SIZE;
9290 cc += GET(cc, 1);
9291 }
9292 else
9293 matchingpath = cc;
9294 }
9295 }
9296 else
9297 {
9298 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
9299 /* Similar code as PUSH_BACKTRACK macro. */
9300 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
9301 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9302 return NULL;
9303 memset(assert, 0, sizeof(assert_backtrack));
9304 assert->common.cc = matchingpath;
9305 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
9306 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
9307 }
9308 }
9309
9310 compile_matchingpath(common, matchingpath, cc, backtrack);
9311 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9312 return NULL;
9313
9314 if (opcode == OP_ONCE)
9315 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
9316
9317 stacksize = 0;
9318 if (repeat_type == OP_MINUPTO)
9319 {
9320 /* We need to preserve the counter. TMP2 will be used below. */
9321 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
9322 stacksize++;
9323 }
9324 if (ket != OP_KET || bra != OP_BRA)
9325 stacksize++;
9326 if (offset != 0)
9327 {
9328 if (common->capture_last_ptr != 0)
9329 stacksize++;
9330 if (common->optimized_cbracket[offset >> 1] == 0)
9331 stacksize += 2;
9332 }
9333 if (has_alternatives && opcode != OP_ONCE)
9334 stacksize++;
9335
9336 if (stacksize > 0)
9337 allocate_stack(common, stacksize);
9338
9339 stacksize = 0;
9340 if (repeat_type == OP_MINUPTO)
9341 {
9342 /* TMP2 was set above. */
9343 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
9344 stacksize++;
9345 }
9346
9347 if (ket != OP_KET || bra != OP_BRA)
9348 {
9349 if (ket != OP_KET)
9350 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9351 else
9352 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9353 stacksize++;
9354 }
9355
9356 if (offset != 0)
9357 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
9358
9359 if (has_alternatives)
9360 {
9361 if (opcode != OP_ONCE)
9362 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9363 if (ket != OP_KETRMAX)
9364 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
9365 }
9366
9367 /* Must be after the matchingpath label. */
9368 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
9369 {
9370 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
9371 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9372 }
9373
9374 if (ket == OP_KETRMAX)
9375 {
9376 if (repeat_type != 0)
9377 {
9378 if (has_alternatives)
9379 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
9380 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
9381 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
9382 /* Drop STR_PTR for greedy plus quantifier. */
9383 if (opcode != OP_ONCE)
9384 free_stack(common, 1);
9385 }
9386 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
9387 {
9388 if (has_alternatives)
9389 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
9390 /* Checking zero-length iteration. */
9391 if (opcode != OP_ONCE)
9392 {
9393 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
9394 /* Drop STR_PTR for greedy plus quantifier. */
9395 if (bra != OP_BRAZERO)
9396 free_stack(common, 1);
9397 }
9398 else
9399 /* TMP2 must contain the starting STR_PTR. */
9400 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
9401 }
9402 else
9403 JUMPTO(SLJIT_JUMP, rmax_label);
9404 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
9405 }
9406
9407 if (repeat_type == OP_EXACT)
9408 {
9409 count_match(common);
9410 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
9411 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
9412 }
9413 else if (repeat_type == OP_UPTO)
9414 {
9415 /* We need to preserve the counter. */
9416 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
9417 allocate_stack(common, 1);
9418 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9419 }
9420
9421 if (bra == OP_BRAZERO)
9422 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
9423
9424 if (bra == OP_BRAMINZERO)
9425 {
9426 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
9427 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
9428 if (braminzero != NULL)
9429 {
9430 JUMPHERE(braminzero);
9431 /* We need to release the end pointer to perform the
9432 backtrack for the zero-length iteration. When
9433 framesize is < 0, OP_ONCE will do the release itself. */
9434 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
9435 {
9436 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9437 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9438 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
9439 }
9440 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
9441 free_stack(common, 1);
9442 }
9443 /* Continue to the normal backtrack. */
9444 }
9445
9446 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
9447 count_match(common);
9448
9449 /* Skip the other alternatives. */
9450 while (*cc == OP_ALT)
9451 cc += GET(cc, 1);
9452 cc += 1 + LINK_SIZE;
9453
9454 if (opcode == OP_ONCE)
9455 {
9456 /* We temporarily encode the needs_control_head in the lowest bit.
9457 Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
9458 the same value for small signed numbers (including negative numbers). */
9459 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
9460 }
9461 return cc + repeat_length;
9462 }
9463
compile_bracketpos_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9464 static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9465 {
9466 DEFINE_COMPILER;
9467 backtrack_common *backtrack;
9468 PCRE2_UCHAR opcode;
9469 int private_data_ptr;
9470 int cbraprivptr = 0;
9471 BOOL needs_control_head;
9472 int framesize;
9473 int stacksize;
9474 int offset = 0;
9475 BOOL zero = FALSE;
9476 PCRE2_SPTR ccbegin = NULL;
9477 int stack; /* Also contains the offset of control head. */
9478 struct sljit_label *loop = NULL;
9479 struct jump_list *emptymatch = NULL;
9480
9481 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
9482 if (*cc == OP_BRAPOSZERO)
9483 {
9484 zero = TRUE;
9485 cc++;
9486 }
9487
9488 opcode = *cc;
9489 private_data_ptr = PRIVATE_DATA(cc);
9490 SLJIT_ASSERT(private_data_ptr != 0);
9491 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
9492 switch(opcode)
9493 {
9494 case OP_BRAPOS:
9495 case OP_SBRAPOS:
9496 ccbegin = cc + 1 + LINK_SIZE;
9497 break;
9498
9499 case OP_CBRAPOS:
9500 case OP_SCBRAPOS:
9501 offset = GET2(cc, 1 + LINK_SIZE);
9502 /* This case cannot be optimized in the same was as
9503 normal capturing brackets. */
9504 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
9505 cbraprivptr = OVECTOR_PRIV(offset);
9506 offset <<= 1;
9507 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
9508 break;
9509
9510 default:
9511 SLJIT_UNREACHABLE();
9512 break;
9513 }
9514
9515 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
9516 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
9517 if (framesize < 0)
9518 {
9519 if (offset != 0)
9520 {
9521 stacksize = 2;
9522 if (common->capture_last_ptr != 0)
9523 stacksize++;
9524 }
9525 else
9526 stacksize = 1;
9527
9528 if (needs_control_head)
9529 stacksize++;
9530 if (!zero)
9531 stacksize++;
9532
9533 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
9534 allocate_stack(common, stacksize);
9535 if (framesize == no_frame)
9536 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9537
9538 stack = 0;
9539 if (offset != 0)
9540 {
9541 stack = 2;
9542 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9543 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9544 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9545 if (common->capture_last_ptr != 0)
9546 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9547 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9548 if (needs_control_head)
9549 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9550 if (common->capture_last_ptr != 0)
9551 {
9552 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
9553 stack = 3;
9554 }
9555 }
9556 else
9557 {
9558 if (needs_control_head)
9559 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9560 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9561 stack = 1;
9562 }
9563
9564 if (needs_control_head)
9565 stack++;
9566 if (!zero)
9567 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
9568 if (needs_control_head)
9569 {
9570 stack--;
9571 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
9572 }
9573 }
9574 else
9575 {
9576 stacksize = framesize + 1;
9577 if (!zero)
9578 stacksize++;
9579 if (needs_control_head)
9580 stacksize++;
9581 if (offset == 0)
9582 stacksize++;
9583 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
9584
9585 allocate_stack(common, stacksize);
9586 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9587 if (needs_control_head)
9588 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9589 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
9590
9591 stack = 0;
9592 if (!zero)
9593 {
9594 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
9595 stack = 1;
9596 }
9597 if (needs_control_head)
9598 {
9599 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
9600 stack++;
9601 }
9602 if (offset == 0)
9603 {
9604 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
9605 stack++;
9606 }
9607 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
9608 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
9609 stack -= 1 + (offset == 0);
9610 }
9611
9612 if (offset != 0)
9613 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
9614
9615 loop = LABEL();
9616 while (*cc != OP_KETRPOS)
9617 {
9618 backtrack->top = NULL;
9619 backtrack->topbacktracks = NULL;
9620 cc += GET(cc, 1);
9621
9622 compile_matchingpath(common, ccbegin, cc, backtrack);
9623 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9624 return NULL;
9625
9626 if (framesize < 0)
9627 {
9628 if (framesize == no_frame)
9629 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9630
9631 if (offset != 0)
9632 {
9633 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
9634 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9635 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
9636 if (common->capture_last_ptr != 0)
9637 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
9638 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9639 }
9640 else
9641 {
9642 if (opcode == OP_SBRAPOS)
9643 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9644 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9645 }
9646
9647 /* Even if the match is empty, we need to reset the control head. */
9648 if (needs_control_head)
9649 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
9650
9651 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
9652 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
9653
9654 if (!zero)
9655 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
9656 }
9657 else
9658 {
9659 if (offset != 0)
9660 {
9661 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
9662 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
9663 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9664 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
9665 if (common->capture_last_ptr != 0)
9666 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
9667 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9668 }
9669 else
9670 {
9671 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9672 OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
9673 if (opcode == OP_SBRAPOS)
9674 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
9675 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
9676 }
9677
9678 /* Even if the match is empty, we need to reset the control head. */
9679 if (needs_control_head)
9680 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
9681
9682 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
9683 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
9684
9685 if (!zero)
9686 {
9687 if (framesize < 0)
9688 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
9689 else
9690 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9691 }
9692 }
9693
9694 JUMPTO(SLJIT_JUMP, loop);
9695 flush_stubs(common);
9696
9697 compile_backtrackingpath(common, backtrack->top);
9698 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9699 return NULL;
9700 set_jumps(backtrack->topbacktracks, LABEL());
9701
9702 if (framesize < 0)
9703 {
9704 if (offset != 0)
9705 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
9706 else
9707 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9708 }
9709 else
9710 {
9711 if (offset != 0)
9712 {
9713 /* Last alternative. */
9714 if (*cc == OP_KETRPOS)
9715 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9716 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
9717 }
9718 else
9719 {
9720 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9721 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
9722 }
9723 }
9724
9725 if (*cc == OP_KETRPOS)
9726 break;
9727 ccbegin = cc + 1 + LINK_SIZE;
9728 }
9729
9730 /* We don't have to restore the control head in case of a failed match. */
9731
9732 backtrack->topbacktracks = NULL;
9733 if (!zero)
9734 {
9735 if (framesize < 0)
9736 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
9737 else /* TMP2 is set to [private_data_ptr] above. */
9738 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
9739 }
9740
9741 /* None of them matched. */
9742 set_jumps(emptymatch, LABEL());
9743 count_match(common);
9744 return cc + 1 + LINK_SIZE;
9745 }
9746
get_iterator_parameters(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * opcode,PCRE2_UCHAR * type,sljit_u32 * max,sljit_u32 * exact,PCRE2_SPTR * end)9747 static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
9748 {
9749 int class_len;
9750
9751 *opcode = *cc;
9752 *exact = 0;
9753
9754 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
9755 {
9756 cc++;
9757 *type = OP_CHAR;
9758 }
9759 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
9760 {
9761 cc++;
9762 *type = OP_CHARI;
9763 *opcode -= OP_STARI - OP_STAR;
9764 }
9765 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
9766 {
9767 cc++;
9768 *type = OP_NOT;
9769 *opcode -= OP_NOTSTAR - OP_STAR;
9770 }
9771 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
9772 {
9773 cc++;
9774 *type = OP_NOTI;
9775 *opcode -= OP_NOTSTARI - OP_STAR;
9776 }
9777 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
9778 {
9779 cc++;
9780 *opcode -= OP_TYPESTAR - OP_STAR;
9781 *type = OP_END;
9782 }
9783 else
9784 {
9785 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
9786 *type = *opcode;
9787 cc++;
9788 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
9789 *opcode = cc[class_len - 1];
9790
9791 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
9792 {
9793 *opcode -= OP_CRSTAR - OP_STAR;
9794 *end = cc + class_len;
9795
9796 if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
9797 {
9798 *exact = 1;
9799 *opcode -= OP_PLUS - OP_STAR;
9800 }
9801 }
9802 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
9803 {
9804 *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
9805 *end = cc + class_len;
9806
9807 if (*opcode == OP_POSPLUS)
9808 {
9809 *exact = 1;
9810 *opcode = OP_POSSTAR;
9811 }
9812 }
9813 else
9814 {
9815 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
9816 *max = GET2(cc, (class_len + IMM2_SIZE));
9817 *exact = GET2(cc, class_len);
9818
9819 if (*max == 0)
9820 {
9821 if (*opcode == OP_CRPOSRANGE)
9822 *opcode = OP_POSSTAR;
9823 else
9824 *opcode -= OP_CRRANGE - OP_STAR;
9825 }
9826 else
9827 {
9828 *max -= *exact;
9829 if (*max == 0)
9830 *opcode = OP_EXACT;
9831 else if (*max == 1)
9832 {
9833 if (*opcode == OP_CRPOSRANGE)
9834 *opcode = OP_POSQUERY;
9835 else
9836 *opcode -= OP_CRRANGE - OP_QUERY;
9837 }
9838 else
9839 {
9840 if (*opcode == OP_CRPOSRANGE)
9841 *opcode = OP_POSUPTO;
9842 else
9843 *opcode -= OP_CRRANGE - OP_UPTO;
9844 }
9845 }
9846 *end = cc + class_len + 2 * IMM2_SIZE;
9847 }
9848 return cc;
9849 }
9850
9851 switch(*opcode)
9852 {
9853 case OP_EXACT:
9854 *exact = GET2(cc, 0);
9855 cc += IMM2_SIZE;
9856 break;
9857
9858 case OP_PLUS:
9859 case OP_MINPLUS:
9860 *exact = 1;
9861 *opcode -= OP_PLUS - OP_STAR;
9862 break;
9863
9864 case OP_POSPLUS:
9865 *exact = 1;
9866 *opcode = OP_POSSTAR;
9867 break;
9868
9869 case OP_UPTO:
9870 case OP_MINUPTO:
9871 case OP_POSUPTO:
9872 *max = GET2(cc, 0);
9873 cc += IMM2_SIZE;
9874 break;
9875 }
9876
9877 if (*type == OP_END)
9878 {
9879 *type = *cc;
9880 *end = next_opcode(common, cc);
9881 cc++;
9882 return cc;
9883 }
9884
9885 *end = cc + 1;
9886 #ifdef SUPPORT_UNICODE
9887 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
9888 #endif
9889 return cc;
9890 }
9891
compile_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9892 static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9893 {
9894 DEFINE_COMPILER;
9895 backtrack_common *backtrack;
9896 PCRE2_UCHAR opcode;
9897 PCRE2_UCHAR type;
9898 sljit_u32 max = 0, exact;
9899 BOOL fast_fail;
9900 sljit_s32 fast_str_ptr;
9901 BOOL charpos_enabled;
9902 PCRE2_UCHAR charpos_char;
9903 unsigned int charpos_othercasebit;
9904 PCRE2_SPTR end;
9905 jump_list *no_match = NULL;
9906 jump_list *no_char1_match = NULL;
9907 struct sljit_jump *jump = NULL;
9908 struct sljit_label *label;
9909 int private_data_ptr = PRIVATE_DATA(cc);
9910 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
9911 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
9912 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
9913 int tmp_base, tmp_offset;
9914
9915 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
9916
9917 fast_str_ptr = PRIVATE_DATA(cc + 1);
9918 fast_fail = TRUE;
9919
9920 SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr);
9921
9922 if (cc == common->fast_forward_bc_ptr)
9923 fast_fail = FALSE;
9924 else if (common->fast_fail_start_ptr == 0)
9925 fast_str_ptr = 0;
9926
9927 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0
9928 || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr));
9929
9930 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
9931
9932 if (type != OP_EXTUNI)
9933 {
9934 tmp_base = TMP3;
9935 tmp_offset = 0;
9936 }
9937 else
9938 {
9939 tmp_base = SLJIT_MEM1(SLJIT_SP);
9940 tmp_offset = POSSESSIVE0;
9941 }
9942
9943 if (fast_fail && fast_str_ptr != 0)
9944 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr));
9945
9946 /* Handle fixed part first. */
9947 if (exact > 1)
9948 {
9949 SLJIT_ASSERT(fast_str_ptr == 0);
9950 if (common->mode == PCRE2_JIT_COMPLETE
9951 #ifdef SUPPORT_UNICODE
9952 && !common->utf
9953 #endif
9954 )
9955 {
9956 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
9957 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
9958 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
9959 label = LABEL();
9960 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
9961 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9962 JUMPTO(SLJIT_NOT_ZERO, label);
9963 }
9964 else
9965 {
9966 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
9967 label = LABEL();
9968 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
9969 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9970 JUMPTO(SLJIT_NOT_ZERO, label);
9971 }
9972 }
9973 else if (exact == 1)
9974 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
9975
9976 switch(opcode)
9977 {
9978 case OP_STAR:
9979 case OP_UPTO:
9980 SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR);
9981
9982 if (type == OP_ANYNL || type == OP_EXTUNI)
9983 {
9984 SLJIT_ASSERT(private_data_ptr == 0);
9985 SLJIT_ASSERT(fast_str_ptr == 0);
9986
9987 allocate_stack(common, 2);
9988 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9989 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9990
9991 if (opcode == OP_UPTO)
9992 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
9993
9994 label = LABEL();
9995 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
9996 if (opcode == OP_UPTO)
9997 {
9998 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9999 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
10000 jump = JUMP(SLJIT_ZERO);
10001 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
10002 }
10003
10004 /* We cannot use TMP3 because of this allocate_stack. */
10005 allocate_stack(common, 1);
10006 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10007 JUMPTO(SLJIT_JUMP, label);
10008 if (jump != NULL)
10009 JUMPHERE(jump);
10010 }
10011 else
10012 {
10013 charpos_enabled = FALSE;
10014 charpos_char = 0;
10015 charpos_othercasebit = 0;
10016
10017 if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
10018 {
10019 charpos_enabled = TRUE;
10020 #ifdef SUPPORT_UNICODE
10021 charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
10022 #endif
10023 if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
10024 {
10025 charpos_othercasebit = char_get_othercase_bit(common, end + 1);
10026 if (charpos_othercasebit == 0)
10027 charpos_enabled = FALSE;
10028 }
10029
10030 if (charpos_enabled)
10031 {
10032 charpos_char = end[1];
10033 /* Consumpe the OP_CHAR opcode. */
10034 end += 2;
10035 #if PCRE2_CODE_UNIT_WIDTH == 8
10036 SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
10037 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
10038 SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
10039 if ((charpos_othercasebit & 0x100) != 0)
10040 charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
10041 #endif
10042 if (charpos_othercasebit != 0)
10043 charpos_char |= charpos_othercasebit;
10044
10045 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
10046 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
10047 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
10048 }
10049 }
10050
10051 if (charpos_enabled)
10052 {
10053 if (opcode == OP_UPTO)
10054 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
10055
10056 /* Search the first instance of charpos_char. */
10057 jump = JUMP(SLJIT_JUMP);
10058 label = LABEL();
10059 if (opcode == OP_UPTO)
10060 {
10061 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10062 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
10063 }
10064 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
10065 if (fast_str_ptr != 0)
10066 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
10067 JUMPHERE(jump);
10068
10069 detect_partial_match(common, &backtrack->topbacktracks);
10070 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
10071 if (charpos_othercasebit != 0)
10072 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
10073 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
10074
10075 if (private_data_ptr == 0)
10076 allocate_stack(common, 2);
10077 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10078 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
10079 if (opcode == OP_UPTO)
10080 {
10081 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10082 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
10083 }
10084
10085 /* Search the last instance of charpos_char. */
10086 label = LABEL();
10087 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
10088 if (fast_str_ptr != 0)
10089 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
10090 detect_partial_match(common, &no_match);
10091 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
10092 if (charpos_othercasebit != 0)
10093 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
10094 if (opcode == OP_STAR)
10095 {
10096 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
10097 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10098 }
10099 else
10100 {
10101 jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
10102 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10103 JUMPHERE(jump);
10104 }
10105
10106 if (opcode == OP_UPTO)
10107 {
10108 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10109 JUMPTO(SLJIT_NOT_ZERO, label);
10110 }
10111 else
10112 JUMPTO(SLJIT_JUMP, label);
10113
10114 set_jumps(no_match, LABEL());
10115 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
10116 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
10117 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10118 }
10119 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10120 else if (common->utf)
10121 {
10122 if (private_data_ptr == 0)
10123 allocate_stack(common, 2);
10124
10125 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10126 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
10127
10128 if (opcode == OP_UPTO)
10129 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
10130
10131 label = LABEL();
10132 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
10133 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10134
10135 if (opcode == OP_UPTO)
10136 {
10137 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10138 JUMPTO(SLJIT_NOT_ZERO, label);
10139 }
10140 else
10141 JUMPTO(SLJIT_JUMP, label);
10142
10143 set_jumps(no_match, LABEL());
10144 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
10145 if (fast_str_ptr != 0)
10146 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
10147 }
10148 #endif
10149 else
10150 {
10151 if (private_data_ptr == 0)
10152 allocate_stack(common, 2);
10153
10154 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
10155 if (opcode == OP_UPTO)
10156 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
10157
10158 label = LABEL();
10159 detect_partial_match(common, &no_match);
10160 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
10161 if (opcode == OP_UPTO)
10162 {
10163 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10164 JUMPTO(SLJIT_NOT_ZERO, label);
10165 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
10166 }
10167 else
10168 JUMPTO(SLJIT_JUMP, label);
10169
10170 set_jumps(no_char1_match, LABEL());
10171 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
10172 set_jumps(no_match, LABEL());
10173 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10174 if (fast_str_ptr != 0)
10175 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
10176 }
10177 }
10178 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10179 break;
10180
10181 case OP_MINSTAR:
10182 if (private_data_ptr == 0)
10183 allocate_stack(common, 1);
10184 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10185 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10186 if (fast_str_ptr != 0)
10187 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
10188 break;
10189
10190 case OP_MINUPTO:
10191 SLJIT_ASSERT(fast_str_ptr == 0);
10192 if (private_data_ptr == 0)
10193 allocate_stack(common, 2);
10194 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10195 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
10196 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10197 break;
10198
10199 case OP_QUERY:
10200 case OP_MINQUERY:
10201 SLJIT_ASSERT(fast_str_ptr == 0);
10202 if (private_data_ptr == 0)
10203 allocate_stack(common, 1);
10204 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10205 if (opcode == OP_QUERY)
10206 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
10207 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10208 break;
10209
10210 case OP_EXACT:
10211 break;
10212
10213 case OP_POSSTAR:
10214 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10215 if (common->utf)
10216 {
10217 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
10218 label = LABEL();
10219 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
10220 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
10221 JUMPTO(SLJIT_JUMP, label);
10222 set_jumps(no_match, LABEL());
10223 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
10224 if (fast_str_ptr != 0)
10225 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
10226 break;
10227 }
10228 #endif
10229 label = LABEL();
10230 detect_partial_match(common, &no_match);
10231 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
10232 JUMPTO(SLJIT_JUMP, label);
10233 set_jumps(no_char1_match, LABEL());
10234 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
10235 set_jumps(no_match, LABEL());
10236 if (fast_str_ptr != 0)
10237 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
10238 break;
10239
10240 case OP_POSUPTO:
10241 SLJIT_ASSERT(fast_str_ptr == 0);
10242 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10243 if (common->utf)
10244 {
10245 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
10246 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
10247 label = LABEL();
10248 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
10249 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
10250 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10251 JUMPTO(SLJIT_NOT_ZERO, label);
10252 set_jumps(no_match, LABEL());
10253 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
10254 break;
10255 }
10256 #endif
10257 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
10258 label = LABEL();
10259 detect_partial_match(common, &no_match);
10260 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
10261 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10262 JUMPTO(SLJIT_NOT_ZERO, label);
10263 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
10264 set_jumps(no_char1_match, LABEL());
10265 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
10266 set_jumps(no_match, LABEL());
10267 break;
10268
10269 case OP_POSQUERY:
10270 SLJIT_ASSERT(fast_str_ptr == 0);
10271 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
10272 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
10273 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
10274 set_jumps(no_match, LABEL());
10275 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
10276 break;
10277
10278 default:
10279 SLJIT_UNREACHABLE();
10280 break;
10281 }
10282
10283 count_match(common);
10284 return end;
10285 }
10286
compile_fail_accept_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10287 static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10288 {
10289 DEFINE_COMPILER;
10290 backtrack_common *backtrack;
10291
10292 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
10293
10294 if (*cc == OP_FAIL)
10295 {
10296 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
10297 return cc + 1;
10298 }
10299
10300 if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
10301 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
10302
10303 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
10304 {
10305 /* No need to check notempty conditions. */
10306 if (common->accept_label == NULL)
10307 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
10308 else
10309 JUMPTO(SLJIT_JUMP, common->accept_label);
10310 return cc + 1;
10311 }
10312
10313 if (common->accept_label == NULL)
10314 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
10315 else
10316 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
10317 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10318 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
10319 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
10320 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
10321 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
10322 if (common->accept_label == NULL)
10323 add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
10324 else
10325 JUMPTO(SLJIT_ZERO, common->accept_label);
10326 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
10327 if (common->accept_label == NULL)
10328 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
10329 else
10330 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
10331 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
10332 return cc + 1;
10333 }
10334
compile_close_matchingpath(compiler_common * common,PCRE2_SPTR cc)10335 static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
10336 {
10337 DEFINE_COMPILER;
10338 int offset = GET2(cc, 1);
10339 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
10340
10341 /* Data will be discarded anyway... */
10342 if (common->currententry != NULL)
10343 return cc + 1 + IMM2_SIZE;
10344
10345 if (!optimized_cbracket)
10346 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
10347 offset <<= 1;
10348 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10349 if (!optimized_cbracket)
10350 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10351 return cc + 1 + IMM2_SIZE;
10352 }
10353
compile_control_verb_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10354 static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10355 {
10356 DEFINE_COMPILER;
10357 backtrack_common *backtrack;
10358 PCRE2_UCHAR opcode = *cc;
10359 PCRE2_SPTR ccend = cc + 1;
10360
10361 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
10362 opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
10363 ccend += 2 + cc[1];
10364
10365 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
10366
10367 if (opcode == OP_SKIP)
10368 {
10369 allocate_stack(common, 1);
10370 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10371 return ccend;
10372 }
10373
10374 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
10375 {
10376 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10377 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
10378 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
10379 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
10380 }
10381
10382 return ccend;
10383 }
10384
10385 static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
10386
compile_then_trap_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)10387 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
10388 {
10389 DEFINE_COMPILER;
10390 backtrack_common *backtrack;
10391 BOOL needs_control_head;
10392 int size;
10393
10394 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
10395 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
10396 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
10397 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
10398 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
10399
10400 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
10401 size = 3 + (size < 0 ? 0 : size);
10402
10403 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10404 allocate_stack(common, size);
10405 if (size > 3)
10406 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
10407 else
10408 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
10409 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
10410 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
10411 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
10412
10413 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
10414 if (size >= 0)
10415 init_frame(common, cc, ccend, size - 1, 0);
10416 }
10417
compile_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)10418 static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
10419 {
10420 DEFINE_COMPILER;
10421 backtrack_common *backtrack;
10422 BOOL has_then_trap = FALSE;
10423 then_trap_backtrack *save_then_trap = NULL;
10424
10425 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
10426
10427 if (common->has_then && common->then_offsets[cc - common->start] != 0)
10428 {
10429 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
10430 has_then_trap = TRUE;
10431 save_then_trap = common->then_trap;
10432 /* Tail item on backtrack. */
10433 compile_then_trap_matchingpath(common, cc, ccend, parent);
10434 }
10435
10436 while (cc < ccend)
10437 {
10438 switch(*cc)
10439 {
10440 case OP_SOD:
10441 case OP_SOM:
10442 case OP_NOT_WORD_BOUNDARY:
10443 case OP_WORD_BOUNDARY:
10444 case OP_EODN:
10445 case OP_EOD:
10446 case OP_DOLL:
10447 case OP_DOLLM:
10448 case OP_CIRC:
10449 case OP_CIRCM:
10450 case OP_REVERSE:
10451 cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
10452 break;
10453
10454 case OP_NOT_DIGIT:
10455 case OP_DIGIT:
10456 case OP_NOT_WHITESPACE:
10457 case OP_WHITESPACE:
10458 case OP_NOT_WORDCHAR:
10459 case OP_WORDCHAR:
10460 case OP_ANY:
10461 case OP_ALLANY:
10462 case OP_ANYBYTE:
10463 case OP_NOTPROP:
10464 case OP_PROP:
10465 case OP_ANYNL:
10466 case OP_NOT_HSPACE:
10467 case OP_HSPACE:
10468 case OP_NOT_VSPACE:
10469 case OP_VSPACE:
10470 case OP_EXTUNI:
10471 case OP_NOT:
10472 case OP_NOTI:
10473 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
10474 break;
10475
10476 case OP_SET_SOM:
10477 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
10478 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
10479 allocate_stack(common, 1);
10480 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
10481 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10482 cc++;
10483 break;
10484
10485 case OP_CHAR:
10486 case OP_CHARI:
10487 if (common->mode == PCRE2_JIT_COMPLETE)
10488 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
10489 else
10490 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
10491 break;
10492
10493 case OP_STAR:
10494 case OP_MINSTAR:
10495 case OP_PLUS:
10496 case OP_MINPLUS:
10497 case OP_QUERY:
10498 case OP_MINQUERY:
10499 case OP_UPTO:
10500 case OP_MINUPTO:
10501 case OP_EXACT:
10502 case OP_POSSTAR:
10503 case OP_POSPLUS:
10504 case OP_POSQUERY:
10505 case OP_POSUPTO:
10506 case OP_STARI:
10507 case OP_MINSTARI:
10508 case OP_PLUSI:
10509 case OP_MINPLUSI:
10510 case OP_QUERYI:
10511 case OP_MINQUERYI:
10512 case OP_UPTOI:
10513 case OP_MINUPTOI:
10514 case OP_EXACTI:
10515 case OP_POSSTARI:
10516 case OP_POSPLUSI:
10517 case OP_POSQUERYI:
10518 case OP_POSUPTOI:
10519 case OP_NOTSTAR:
10520 case OP_NOTMINSTAR:
10521 case OP_NOTPLUS:
10522 case OP_NOTMINPLUS:
10523 case OP_NOTQUERY:
10524 case OP_NOTMINQUERY:
10525 case OP_NOTUPTO:
10526 case OP_NOTMINUPTO:
10527 case OP_NOTEXACT:
10528 case OP_NOTPOSSTAR:
10529 case OP_NOTPOSPLUS:
10530 case OP_NOTPOSQUERY:
10531 case OP_NOTPOSUPTO:
10532 case OP_NOTSTARI:
10533 case OP_NOTMINSTARI:
10534 case OP_NOTPLUSI:
10535 case OP_NOTMINPLUSI:
10536 case OP_NOTQUERYI:
10537 case OP_NOTMINQUERYI:
10538 case OP_NOTUPTOI:
10539 case OP_NOTMINUPTOI:
10540 case OP_NOTEXACTI:
10541 case OP_NOTPOSSTARI:
10542 case OP_NOTPOSPLUSI:
10543 case OP_NOTPOSQUERYI:
10544 case OP_NOTPOSUPTOI:
10545 case OP_TYPESTAR:
10546 case OP_TYPEMINSTAR:
10547 case OP_TYPEPLUS:
10548 case OP_TYPEMINPLUS:
10549 case OP_TYPEQUERY:
10550 case OP_TYPEMINQUERY:
10551 case OP_TYPEUPTO:
10552 case OP_TYPEMINUPTO:
10553 case OP_TYPEEXACT:
10554 case OP_TYPEPOSSTAR:
10555 case OP_TYPEPOSPLUS:
10556 case OP_TYPEPOSQUERY:
10557 case OP_TYPEPOSUPTO:
10558 cc = compile_iterator_matchingpath(common, cc, parent);
10559 break;
10560
10561 case OP_CLASS:
10562 case OP_NCLASS:
10563 if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
10564 cc = compile_iterator_matchingpath(common, cc, parent);
10565 else
10566 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
10567 break;
10568
10569 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
10570 case OP_XCLASS:
10571 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
10572 cc = compile_iterator_matchingpath(common, cc, parent);
10573 else
10574 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
10575 break;
10576 #endif
10577
10578 case OP_REF:
10579 case OP_REFI:
10580 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
10581 cc = compile_ref_iterator_matchingpath(common, cc, parent);
10582 else
10583 {
10584 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
10585 cc += 1 + IMM2_SIZE;
10586 }
10587 break;
10588
10589 case OP_DNREF:
10590 case OP_DNREFI:
10591 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
10592 cc = compile_ref_iterator_matchingpath(common, cc, parent);
10593 else
10594 {
10595 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
10596 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
10597 cc += 1 + 2 * IMM2_SIZE;
10598 }
10599 break;
10600
10601 case OP_RECURSE:
10602 cc = compile_recurse_matchingpath(common, cc, parent);
10603 break;
10604
10605 case OP_CALLOUT:
10606 case OP_CALLOUT_STR:
10607 cc = compile_callout_matchingpath(common, cc, parent);
10608 break;
10609
10610 case OP_ASSERT:
10611 case OP_ASSERT_NOT:
10612 case OP_ASSERTBACK:
10613 case OP_ASSERTBACK_NOT:
10614 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
10615 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
10616 break;
10617
10618 case OP_BRAMINZERO:
10619 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
10620 cc = bracketend(cc + 1);
10621 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
10622 {
10623 allocate_stack(common, 1);
10624 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10625 }
10626 else
10627 {
10628 allocate_stack(common, 2);
10629 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10630 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
10631 }
10632 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
10633 count_match(common);
10634 break;
10635
10636 case OP_ONCE:
10637 case OP_BRA:
10638 case OP_CBRA:
10639 case OP_COND:
10640 case OP_SBRA:
10641 case OP_SCBRA:
10642 case OP_SCOND:
10643 cc = compile_bracket_matchingpath(common, cc, parent);
10644 break;
10645
10646 case OP_BRAZERO:
10647 if (cc[1] > OP_ASSERTBACK_NOT)
10648 cc = compile_bracket_matchingpath(common, cc, parent);
10649 else
10650 {
10651 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
10652 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
10653 }
10654 break;
10655
10656 case OP_BRAPOS:
10657 case OP_CBRAPOS:
10658 case OP_SBRAPOS:
10659 case OP_SCBRAPOS:
10660 case OP_BRAPOSZERO:
10661 cc = compile_bracketpos_matchingpath(common, cc, parent);
10662 break;
10663
10664 case OP_MARK:
10665 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
10666 SLJIT_ASSERT(common->mark_ptr != 0);
10667 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
10668 allocate_stack(common, common->has_skip_arg ? 5 : 1);
10669 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10670 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
10671 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
10672 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
10673 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
10674 if (common->has_skip_arg)
10675 {
10676 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10677 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
10678 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
10679 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
10680 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
10681 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10682 }
10683 cc += 1 + 2 + cc[1];
10684 break;
10685
10686 case OP_PRUNE:
10687 case OP_PRUNE_ARG:
10688 case OP_SKIP:
10689 case OP_SKIP_ARG:
10690 case OP_THEN:
10691 case OP_THEN_ARG:
10692 case OP_COMMIT:
10693 case OP_COMMIT_ARG:
10694 cc = compile_control_verb_matchingpath(common, cc, parent);
10695 break;
10696
10697 case OP_FAIL:
10698 case OP_ACCEPT:
10699 case OP_ASSERT_ACCEPT:
10700 cc = compile_fail_accept_matchingpath(common, cc, parent);
10701 break;
10702
10703 case OP_CLOSE:
10704 cc = compile_close_matchingpath(common, cc);
10705 break;
10706
10707 case OP_SKIPZERO:
10708 cc = bracketend(cc + 1);
10709 break;
10710
10711 default:
10712 SLJIT_UNREACHABLE();
10713 return;
10714 }
10715 if (cc == NULL)
10716 return;
10717 }
10718
10719 if (has_then_trap)
10720 {
10721 /* Head item on backtrack. */
10722 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
10723 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
10724 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
10725 common->then_trap = save_then_trap;
10726 }
10727 SLJIT_ASSERT(cc == ccend);
10728 }
10729
10730 #undef PUSH_BACKTRACK
10731 #undef PUSH_BACKTRACK_NOVALUE
10732 #undef BACKTRACK_AS
10733
10734 #define COMPILE_BACKTRACKINGPATH(current) \
10735 do \
10736 { \
10737 compile_backtrackingpath(common, (current)); \
10738 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
10739 return; \
10740 } \
10741 while (0)
10742
10743 #define CURRENT_AS(type) ((type *)current)
10744
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)10745 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10746 {
10747 DEFINE_COMPILER;
10748 PCRE2_SPTR cc = current->cc;
10749 PCRE2_UCHAR opcode;
10750 PCRE2_UCHAR type;
10751 sljit_u32 max = 0, exact;
10752 struct sljit_label *label = NULL;
10753 struct sljit_jump *jump = NULL;
10754 jump_list *jumplist = NULL;
10755 PCRE2_SPTR end;
10756 int private_data_ptr = PRIVATE_DATA(cc);
10757 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
10758 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
10759 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
10760
10761 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
10762
10763 switch(opcode)
10764 {
10765 case OP_STAR:
10766 case OP_UPTO:
10767 if (type == OP_ANYNL || type == OP_EXTUNI)
10768 {
10769 SLJIT_ASSERT(private_data_ptr == 0);
10770 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
10771 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10772 free_stack(common, 1);
10773 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
10774 }
10775 else
10776 {
10777 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
10778 {
10779 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
10780 OP1(SLJIT_MOV, TMP2, 0, base, offset1);
10781 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
10782
10783 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
10784 label = LABEL();
10785 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
10786 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10787 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
10788 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
10789 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
10790 skip_char_back(common);
10791 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
10792 }
10793 else
10794 {
10795 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
10796 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
10797 skip_char_back(common);
10798 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10799 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
10800 }
10801 JUMPHERE(jump);
10802 if (private_data_ptr == 0)
10803 free_stack(common, 2);
10804 }
10805 break;
10806
10807 case OP_MINSTAR:
10808 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
10809 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
10810 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10811 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
10812 set_jumps(jumplist, LABEL());
10813 if (private_data_ptr == 0)
10814 free_stack(common, 1);
10815 break;
10816
10817 case OP_MINUPTO:
10818 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
10819 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
10820 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
10821 add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
10822
10823 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
10824 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
10825 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10826 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
10827
10828 set_jumps(jumplist, LABEL());
10829 if (private_data_ptr == 0)
10830 free_stack(common, 2);
10831 break;
10832
10833 case OP_QUERY:
10834 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
10835 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
10836 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
10837 jump = JUMP(SLJIT_JUMP);
10838 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
10839 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
10840 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
10841 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
10842 JUMPHERE(jump);
10843 if (private_data_ptr == 0)
10844 free_stack(common, 1);
10845 break;
10846
10847 case OP_MINQUERY:
10848 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
10849 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
10850 jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10851 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
10852 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
10853 set_jumps(jumplist, LABEL());
10854 JUMPHERE(jump);
10855 if (private_data_ptr == 0)
10856 free_stack(common, 1);
10857 break;
10858
10859 case OP_EXACT:
10860 case OP_POSSTAR:
10861 case OP_POSQUERY:
10862 case OP_POSUPTO:
10863 break;
10864
10865 default:
10866 SLJIT_UNREACHABLE();
10867 break;
10868 }
10869
10870 set_jumps(current->topbacktracks, LABEL());
10871 }
10872
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)10873 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10874 {
10875 DEFINE_COMPILER;
10876 PCRE2_SPTR cc = current->cc;
10877 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
10878 PCRE2_UCHAR type;
10879
10880 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
10881
10882 if ((type & 0x1) == 0)
10883 {
10884 /* Maximize case. */
10885 set_jumps(current->topbacktracks, LABEL());
10886 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10887 free_stack(common, 1);
10888 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
10889 return;
10890 }
10891
10892 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10893 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
10894 set_jumps(current->topbacktracks, LABEL());
10895 free_stack(common, ref ? 2 : 3);
10896 }
10897
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)10898 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10899 {
10900 DEFINE_COMPILER;
10901 recurse_entry *entry;
10902
10903 if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
10904 {
10905 entry = CURRENT_AS(recurse_backtrack)->entry;
10906 if (entry->backtrack_label == NULL)
10907 add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
10908 else
10909 JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
10910 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
10911 }
10912 else
10913 compile_backtrackingpath(common, current->top);
10914
10915 set_jumps(current->topbacktracks, LABEL());
10916 }
10917
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)10918 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10919 {
10920 DEFINE_COMPILER;
10921 PCRE2_SPTR cc = current->cc;
10922 PCRE2_UCHAR bra = OP_BRA;
10923 struct sljit_jump *brajump = NULL;
10924
10925 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
10926 if (*cc == OP_BRAZERO)
10927 {
10928 bra = *cc;
10929 cc++;
10930 }
10931
10932 if (bra == OP_BRAZERO)
10933 {
10934 SLJIT_ASSERT(current->topbacktracks == NULL);
10935 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10936 }
10937
10938 if (CURRENT_AS(assert_backtrack)->framesize < 0)
10939 {
10940 set_jumps(current->topbacktracks, LABEL());
10941
10942 if (bra == OP_BRAZERO)
10943 {
10944 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10945 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
10946 free_stack(common, 1);
10947 }
10948 return;
10949 }
10950
10951 if (bra == OP_BRAZERO)
10952 {
10953 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
10954 {
10955 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10956 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
10957 free_stack(common, 1);
10958 return;
10959 }
10960 free_stack(common, 1);
10961 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10962 }
10963
10964 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
10965 {
10966 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
10967 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10968 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10969 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
10970 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
10971
10972 set_jumps(current->topbacktracks, LABEL());
10973 }
10974 else
10975 set_jumps(current->topbacktracks, LABEL());
10976
10977 if (bra == OP_BRAZERO)
10978 {
10979 /* We know there is enough place on the stack. */
10980 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10981 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10982 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
10983 JUMPHERE(brajump);
10984 }
10985 }
10986
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)10987 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10988 {
10989 DEFINE_COMPILER;
10990 int opcode, stacksize, alt_count, alt_max;
10991 int offset = 0;
10992 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
10993 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
10994 PCRE2_SPTR cc = current->cc;
10995 PCRE2_SPTR ccbegin;
10996 PCRE2_SPTR ccprev;
10997 PCRE2_UCHAR bra = OP_BRA;
10998 PCRE2_UCHAR ket;
10999 assert_backtrack *assert;
11000 sljit_uw *next_update_addr = NULL;
11001 BOOL has_alternatives;
11002 BOOL needs_control_head = FALSE;
11003 struct sljit_jump *brazero = NULL;
11004 struct sljit_jump *alt1 = NULL;
11005 struct sljit_jump *alt2 = NULL;
11006 struct sljit_jump *once = NULL;
11007 struct sljit_jump *cond = NULL;
11008 struct sljit_label *rmin_label = NULL;
11009 struct sljit_label *exact_label = NULL;
11010
11011 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
11012 {
11013 bra = *cc;
11014 cc++;
11015 }
11016
11017 opcode = *cc;
11018 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
11019 ket = *ccbegin;
11020 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
11021 {
11022 repeat_ptr = PRIVATE_DATA(ccbegin);
11023 repeat_type = PRIVATE_DATA(ccbegin + 2);
11024 repeat_count = PRIVATE_DATA(ccbegin + 3);
11025 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
11026 if (repeat_type == OP_UPTO)
11027 ket = OP_KETRMAX;
11028 if (repeat_type == OP_MINUPTO)
11029 ket = OP_KETRMIN;
11030 }
11031 ccbegin = cc;
11032 cc += GET(cc, 1);
11033 has_alternatives = *cc == OP_ALT;
11034 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
11035 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
11036 if (opcode == OP_CBRA || opcode == OP_SCBRA)
11037 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
11038 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
11039 opcode = OP_SCOND;
11040
11041 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
11042
11043 /* Decoding the needs_control_head in framesize. */
11044 if (opcode == OP_ONCE)
11045 {
11046 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
11047 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
11048 }
11049
11050 if (ket != OP_KET && repeat_type != 0)
11051 {
11052 /* TMP1 is used in OP_KETRMIN below. */
11053 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11054 free_stack(common, 1);
11055 if (repeat_type == OP_UPTO)
11056 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
11057 else
11058 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
11059 }
11060
11061 if (ket == OP_KETRMAX)
11062 {
11063 if (bra == OP_BRAZERO)
11064 {
11065 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11066 free_stack(common, 1);
11067 brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
11068 }
11069 }
11070 else if (ket == OP_KETRMIN)
11071 {
11072 if (bra != OP_BRAMINZERO)
11073 {
11074 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11075 if (repeat_type != 0)
11076 {
11077 /* TMP1 was set a few lines above. */
11078 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
11079 /* Drop STR_PTR for non-greedy plus quantifier. */
11080 if (opcode != OP_ONCE)
11081 free_stack(common, 1);
11082 }
11083 else if (opcode >= OP_SBRA || opcode == OP_ONCE)
11084 {
11085 /* Checking zero-length iteration. */
11086 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
11087 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
11088 else
11089 {
11090 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11091 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
11092 }
11093 /* Drop STR_PTR for non-greedy plus quantifier. */
11094 if (opcode != OP_ONCE)
11095 free_stack(common, 1);
11096 }
11097 else
11098 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
11099 }
11100 rmin_label = LABEL();
11101 if (repeat_type != 0)
11102 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
11103 }
11104 else if (bra == OP_BRAZERO)
11105 {
11106 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11107 free_stack(common, 1);
11108 brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
11109 }
11110 else if (repeat_type == OP_EXACT)
11111 {
11112 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
11113 exact_label = LABEL();
11114 }
11115
11116 if (offset != 0)
11117 {
11118 if (common->capture_last_ptr != 0)
11119 {
11120 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
11121 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11122 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
11123 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
11124 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
11125 free_stack(common, 3);
11126 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
11127 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
11128 }
11129 else if (common->optimized_cbracket[offset >> 1] == 0)
11130 {
11131 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11132 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
11133 free_stack(common, 2);
11134 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11135 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
11136 }
11137 }
11138
11139 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
11140 {
11141 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
11142 {
11143 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11144 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
11145 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
11146 }
11147 once = JUMP(SLJIT_JUMP);
11148 }
11149 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
11150 {
11151 if (has_alternatives)
11152 {
11153 /* Always exactly one alternative. */
11154 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11155 free_stack(common, 1);
11156
11157 alt_max = 2;
11158 alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
11159 }
11160 }
11161 else if (has_alternatives)
11162 {
11163 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11164 free_stack(common, 1);
11165
11166 if (alt_max > 4)
11167 {
11168 /* Table jump if alt_max is greater than 4. */
11169 next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
11170 if (SLJIT_UNLIKELY(next_update_addr == NULL))
11171 return;
11172 sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
11173 add_label_addr(common, next_update_addr++);
11174 }
11175 else
11176 {
11177 if (alt_max == 4)
11178 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
11179 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
11180 }
11181 }
11182
11183 COMPILE_BACKTRACKINGPATH(current->top);
11184 if (current->topbacktracks)
11185 set_jumps(current->topbacktracks, LABEL());
11186
11187 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
11188 {
11189 /* Conditional block always has at most one alternative. */
11190 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
11191 {
11192 SLJIT_ASSERT(has_alternatives);
11193 assert = CURRENT_AS(bracket_backtrack)->u.assert;
11194 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
11195 {
11196 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
11197 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
11198 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
11199 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
11200 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
11201 }
11202 cond = JUMP(SLJIT_JUMP);
11203 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
11204 }
11205 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
11206 {
11207 SLJIT_ASSERT(has_alternatives);
11208 cond = JUMP(SLJIT_JUMP);
11209 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
11210 }
11211 else
11212 SLJIT_ASSERT(!has_alternatives);
11213 }
11214
11215 if (has_alternatives)
11216 {
11217 alt_count = sizeof(sljit_uw);
11218 do
11219 {
11220 current->top = NULL;
11221 current->topbacktracks = NULL;
11222 current->nextbacktracks = NULL;
11223 /* Conditional blocks always have an additional alternative, even if it is empty. */
11224 if (*cc == OP_ALT)
11225 {
11226 ccprev = cc + 1 + LINK_SIZE;
11227 cc += GET(cc, 1);
11228 if (opcode != OP_COND && opcode != OP_SCOND)
11229 {
11230 if (opcode != OP_ONCE)
11231 {
11232 if (private_data_ptr != 0)
11233 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11234 else
11235 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11236 }
11237 else
11238 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
11239 }
11240 compile_matchingpath(common, ccprev, cc, current);
11241 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11242 return;
11243 }
11244
11245 /* Instructions after the current alternative is successfully matched. */
11246 /* There is a similar code in compile_bracket_matchingpath. */
11247 if (opcode == OP_ONCE)
11248 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
11249
11250 stacksize = 0;
11251 if (repeat_type == OP_MINUPTO)
11252 {
11253 /* We need to preserve the counter. TMP2 will be used below. */
11254 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
11255 stacksize++;
11256 }
11257 if (ket != OP_KET || bra != OP_BRA)
11258 stacksize++;
11259 if (offset != 0)
11260 {
11261 if (common->capture_last_ptr != 0)
11262 stacksize++;
11263 if (common->optimized_cbracket[offset >> 1] == 0)
11264 stacksize += 2;
11265 }
11266 if (opcode != OP_ONCE)
11267 stacksize++;
11268
11269 if (stacksize > 0)
11270 allocate_stack(common, stacksize);
11271
11272 stacksize = 0;
11273 if (repeat_type == OP_MINUPTO)
11274 {
11275 /* TMP2 was set above. */
11276 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
11277 stacksize++;
11278 }
11279
11280 if (ket != OP_KET || bra != OP_BRA)
11281 {
11282 if (ket != OP_KET)
11283 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11284 else
11285 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
11286 stacksize++;
11287 }
11288
11289 if (offset != 0)
11290 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
11291
11292 if (opcode != OP_ONCE)
11293 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
11294
11295 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
11296 {
11297 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
11298 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
11299 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11300 }
11301
11302 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
11303
11304 if (opcode != OP_ONCE)
11305 {
11306 if (alt_max > 4)
11307 add_label_addr(common, next_update_addr++);
11308 else
11309 {
11310 if (alt_count != 2 * sizeof(sljit_uw))
11311 {
11312 JUMPHERE(alt1);
11313 if (alt_max == 3 && alt_count == sizeof(sljit_uw))
11314 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
11315 }
11316 else
11317 {
11318 JUMPHERE(alt2);
11319 if (alt_max == 4)
11320 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
11321 }
11322 }
11323 alt_count += sizeof(sljit_uw);
11324 }
11325
11326 COMPILE_BACKTRACKINGPATH(current->top);
11327 if (current->topbacktracks)
11328 set_jumps(current->topbacktracks, LABEL());
11329 SLJIT_ASSERT(!current->nextbacktracks);
11330 }
11331 while (*cc == OP_ALT);
11332
11333 if (cond != NULL)
11334 {
11335 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
11336 assert = CURRENT_AS(bracket_backtrack)->u.assert;
11337 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
11338 {
11339 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
11340 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
11341 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
11342 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
11343 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
11344 }
11345 JUMPHERE(cond);
11346 }
11347
11348 /* Free the STR_PTR. */
11349 if (private_data_ptr == 0)
11350 free_stack(common, 1);
11351 }
11352
11353 if (offset != 0)
11354 {
11355 /* Using both tmp register is better for instruction scheduling. */
11356 if (common->optimized_cbracket[offset >> 1] != 0)
11357 {
11358 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11359 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
11360 free_stack(common, 2);
11361 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11362 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
11363 }
11364 else
11365 {
11366 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11367 free_stack(common, 1);
11368 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
11369 }
11370 }
11371 else if (opcode == OP_SBRA || opcode == OP_SCOND)
11372 {
11373 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
11374 free_stack(common, 1);
11375 }
11376 else if (opcode == OP_ONCE)
11377 {
11378 cc = ccbegin + GET(ccbegin, 1);
11379 stacksize = needs_control_head ? 1 : 0;
11380
11381 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
11382 {
11383 /* Reset head and drop saved frame. */
11384 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
11385 }
11386 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
11387 {
11388 /* The STR_PTR must be released. */
11389 stacksize++;
11390 }
11391
11392 if (stacksize > 0)
11393 free_stack(common, stacksize);
11394
11395 JUMPHERE(once);
11396 /* Restore previous private_data_ptr */
11397 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
11398 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
11399 else if (ket == OP_KETRMIN)
11400 {
11401 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
11402 /* See the comment below. */
11403 free_stack(common, 2);
11404 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
11405 }
11406 }
11407
11408 if (repeat_type == OP_EXACT)
11409 {
11410 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
11411 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
11412 CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
11413 }
11414 else if (ket == OP_KETRMAX)
11415 {
11416 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11417 if (bra != OP_BRAZERO)
11418 free_stack(common, 1);
11419
11420 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
11421 if (bra == OP_BRAZERO)
11422 {
11423 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
11424 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
11425 JUMPHERE(brazero);
11426 free_stack(common, 1);
11427 }
11428 }
11429 else if (ket == OP_KETRMIN)
11430 {
11431 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11432
11433 /* OP_ONCE removes everything in case of a backtrack, so we don't
11434 need to explicitly release the STR_PTR. The extra release would
11435 affect badly the free_stack(2) above. */
11436 if (opcode != OP_ONCE)
11437 free_stack(common, 1);
11438 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
11439 if (opcode == OP_ONCE)
11440 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
11441 else if (bra == OP_BRAMINZERO)
11442 free_stack(common, 1);
11443 }
11444 else if (bra == OP_BRAZERO)
11445 {
11446 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11447 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
11448 JUMPHERE(brazero);
11449 }
11450 }
11451
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)11452 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
11453 {
11454 DEFINE_COMPILER;
11455 int offset;
11456 struct sljit_jump *jump;
11457
11458 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
11459 {
11460 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
11461 {
11462 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
11463 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11464 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
11465 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11466 if (common->capture_last_ptr != 0)
11467 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
11468 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
11469 if (common->capture_last_ptr != 0)
11470 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
11471 }
11472 set_jumps(current->topbacktracks, LABEL());
11473 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
11474 return;
11475 }
11476
11477 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
11478 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
11479 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
11480
11481 if (current->topbacktracks)
11482 {
11483 jump = JUMP(SLJIT_JUMP);
11484 set_jumps(current->topbacktracks, LABEL());
11485 /* Drop the stack frame. */
11486 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
11487 JUMPHERE(jump);
11488 }
11489 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
11490 }
11491
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)11492 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
11493 {
11494 assert_backtrack backtrack;
11495
11496 current->top = NULL;
11497 current->topbacktracks = NULL;
11498 current->nextbacktracks = NULL;
11499 if (current->cc[1] > OP_ASSERTBACK_NOT)
11500 {
11501 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
11502 compile_bracket_matchingpath(common, current->cc, current);
11503 compile_bracket_backtrackingpath(common, current->top);
11504 }
11505 else
11506 {
11507 memset(&backtrack, 0, sizeof(backtrack));
11508 backtrack.common.cc = current->cc;
11509 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
11510 /* Manual call of compile_assert_matchingpath. */
11511 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
11512 }
11513 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
11514 }
11515
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)11516 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
11517 {
11518 DEFINE_COMPILER;
11519 PCRE2_UCHAR opcode = *current->cc;
11520 struct sljit_label *loop;
11521 struct sljit_jump *jump;
11522
11523 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
11524 {
11525 if (common->then_trap != NULL)
11526 {
11527 SLJIT_ASSERT(common->control_head_ptr != 0);
11528
11529 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11530 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
11531 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
11532 jump = JUMP(SLJIT_JUMP);
11533
11534 loop = LABEL();
11535 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11536 JUMPHERE(jump);
11537 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
11538 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
11539 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
11540 return;
11541 }
11542 else if (!common->local_quit_available && common->in_positive_assertion)
11543 {
11544 add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
11545 return;
11546 }
11547 }
11548
11549 if (common->local_quit_available)
11550 {
11551 /* Abort match with a fail. */
11552 if (common->quit_label == NULL)
11553 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
11554 else
11555 JUMPTO(SLJIT_JUMP, common->quit_label);
11556 return;
11557 }
11558
11559 if (opcode == OP_SKIP_ARG)
11560 {
11561 SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
11562 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11563 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
11564 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
11565
11566 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
11567 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
11568 return;
11569 }
11570
11571 if (opcode == OP_SKIP)
11572 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11573 else
11574 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
11575 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
11576 }
11577
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)11578 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
11579 {
11580 DEFINE_COMPILER;
11581 struct sljit_jump *jump;
11582 int size;
11583
11584 if (CURRENT_AS(then_trap_backtrack)->then_trap)
11585 {
11586 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
11587 return;
11588 }
11589
11590 size = CURRENT_AS(then_trap_backtrack)->framesize;
11591 size = 3 + (size < 0 ? 0 : size);
11592
11593 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
11594 free_stack(common, size);
11595 jump = JUMP(SLJIT_JUMP);
11596
11597 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
11598 /* STACK_TOP is set by THEN. */
11599 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
11600 {
11601 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
11602 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
11603 }
11604 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11605 free_stack(common, 3);
11606
11607 JUMPHERE(jump);
11608 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
11609 }
11610
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)11611 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
11612 {
11613 DEFINE_COMPILER;
11614 then_trap_backtrack *save_then_trap = common->then_trap;
11615
11616 while (current)
11617 {
11618 if (current->nextbacktracks != NULL)
11619 set_jumps(current->nextbacktracks, LABEL());
11620 switch(*current->cc)
11621 {
11622 case OP_SET_SOM:
11623 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11624 free_stack(common, 1);
11625 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
11626 break;
11627
11628 case OP_STAR:
11629 case OP_MINSTAR:
11630 case OP_PLUS:
11631 case OP_MINPLUS:
11632 case OP_QUERY:
11633 case OP_MINQUERY:
11634 case OP_UPTO:
11635 case OP_MINUPTO:
11636 case OP_EXACT:
11637 case OP_POSSTAR:
11638 case OP_POSPLUS:
11639 case OP_POSQUERY:
11640 case OP_POSUPTO:
11641 case OP_STARI:
11642 case OP_MINSTARI:
11643 case OP_PLUSI:
11644 case OP_MINPLUSI:
11645 case OP_QUERYI:
11646 case OP_MINQUERYI:
11647 case OP_UPTOI:
11648 case OP_MINUPTOI:
11649 case OP_EXACTI:
11650 case OP_POSSTARI:
11651 case OP_POSPLUSI:
11652 case OP_POSQUERYI:
11653 case OP_POSUPTOI:
11654 case OP_NOTSTAR:
11655 case OP_NOTMINSTAR:
11656 case OP_NOTPLUS:
11657 case OP_NOTMINPLUS:
11658 case OP_NOTQUERY:
11659 case OP_NOTMINQUERY:
11660 case OP_NOTUPTO:
11661 case OP_NOTMINUPTO:
11662 case OP_NOTEXACT:
11663 case OP_NOTPOSSTAR:
11664 case OP_NOTPOSPLUS:
11665 case OP_NOTPOSQUERY:
11666 case OP_NOTPOSUPTO:
11667 case OP_NOTSTARI:
11668 case OP_NOTMINSTARI:
11669 case OP_NOTPLUSI:
11670 case OP_NOTMINPLUSI:
11671 case OP_NOTQUERYI:
11672 case OP_NOTMINQUERYI:
11673 case OP_NOTUPTOI:
11674 case OP_NOTMINUPTOI:
11675 case OP_NOTEXACTI:
11676 case OP_NOTPOSSTARI:
11677 case OP_NOTPOSPLUSI:
11678 case OP_NOTPOSQUERYI:
11679 case OP_NOTPOSUPTOI:
11680 case OP_TYPESTAR:
11681 case OP_TYPEMINSTAR:
11682 case OP_TYPEPLUS:
11683 case OP_TYPEMINPLUS:
11684 case OP_TYPEQUERY:
11685 case OP_TYPEMINQUERY:
11686 case OP_TYPEUPTO:
11687 case OP_TYPEMINUPTO:
11688 case OP_TYPEEXACT:
11689 case OP_TYPEPOSSTAR:
11690 case OP_TYPEPOSPLUS:
11691 case OP_TYPEPOSQUERY:
11692 case OP_TYPEPOSUPTO:
11693 case OP_CLASS:
11694 case OP_NCLASS:
11695 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
11696 case OP_XCLASS:
11697 #endif
11698 compile_iterator_backtrackingpath(common, current);
11699 break;
11700
11701 case OP_REF:
11702 case OP_REFI:
11703 case OP_DNREF:
11704 case OP_DNREFI:
11705 compile_ref_iterator_backtrackingpath(common, current);
11706 break;
11707
11708 case OP_RECURSE:
11709 compile_recurse_backtrackingpath(common, current);
11710 break;
11711
11712 case OP_ASSERT:
11713 case OP_ASSERT_NOT:
11714 case OP_ASSERTBACK:
11715 case OP_ASSERTBACK_NOT:
11716 compile_assert_backtrackingpath(common, current);
11717 break;
11718
11719 case OP_ONCE:
11720 case OP_BRA:
11721 case OP_CBRA:
11722 case OP_COND:
11723 case OP_SBRA:
11724 case OP_SCBRA:
11725 case OP_SCOND:
11726 compile_bracket_backtrackingpath(common, current);
11727 break;
11728
11729 case OP_BRAZERO:
11730 if (current->cc[1] > OP_ASSERTBACK_NOT)
11731 compile_bracket_backtrackingpath(common, current);
11732 else
11733 compile_assert_backtrackingpath(common, current);
11734 break;
11735
11736 case OP_BRAPOS:
11737 case OP_CBRAPOS:
11738 case OP_SBRAPOS:
11739 case OP_SCBRAPOS:
11740 case OP_BRAPOSZERO:
11741 compile_bracketpos_backtrackingpath(common, current);
11742 break;
11743
11744 case OP_BRAMINZERO:
11745 compile_braminzero_backtrackingpath(common, current);
11746 break;
11747
11748 case OP_MARK:
11749 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
11750 if (common->has_skip_arg)
11751 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11752 free_stack(common, common->has_skip_arg ? 5 : 1);
11753 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
11754 if (common->has_skip_arg)
11755 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
11756 break;
11757
11758 case OP_THEN:
11759 case OP_THEN_ARG:
11760 case OP_PRUNE:
11761 case OP_PRUNE_ARG:
11762 case OP_SKIP:
11763 case OP_SKIP_ARG:
11764 compile_control_verb_backtrackingpath(common, current);
11765 break;
11766
11767 case OP_COMMIT:
11768 case OP_COMMIT_ARG:
11769 if (!common->local_quit_available)
11770 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
11771 if (common->quit_label == NULL)
11772 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
11773 else
11774 JUMPTO(SLJIT_JUMP, common->quit_label);
11775 break;
11776
11777 case OP_CALLOUT:
11778 case OP_CALLOUT_STR:
11779 case OP_FAIL:
11780 case OP_ACCEPT:
11781 case OP_ASSERT_ACCEPT:
11782 set_jumps(current->topbacktracks, LABEL());
11783 break;
11784
11785 case OP_THEN_TRAP:
11786 /* A virtual opcode for then traps. */
11787 compile_then_trap_backtrackingpath(common, current);
11788 break;
11789
11790 default:
11791 SLJIT_UNREACHABLE();
11792 break;
11793 }
11794 current = current->prev;
11795 }
11796 common->then_trap = save_then_trap;
11797 }
11798
compile_recurse(compiler_common * common)11799 static SLJIT_INLINE void compile_recurse(compiler_common *common)
11800 {
11801 DEFINE_COMPILER;
11802 PCRE2_SPTR cc = common->start + common->currententry->start;
11803 PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
11804 PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
11805 BOOL needs_control_head;
11806 BOOL has_quit;
11807 BOOL has_accept;
11808 int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept);
11809 int alt_count, alt_max, local_size;
11810 backtrack_common altbacktrack;
11811 jump_list *match = NULL;
11812 sljit_uw *next_update_addr = NULL;
11813 struct sljit_jump *alt1 = NULL;
11814 struct sljit_jump *alt2 = NULL;
11815 struct sljit_jump *accept_exit = NULL;
11816 struct sljit_label *quit;
11817
11818 /* Recurse captures then. */
11819 common->then_trap = NULL;
11820
11821 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
11822
11823 alt_max = no_alternatives(cc);
11824 alt_count = 0;
11825
11826 /* Matching path. */
11827 SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
11828 common->currententry->entry_label = LABEL();
11829 set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
11830
11831 sljit_emit_fast_enter(compiler, TMP2, 0);
11832 count_match(common);
11833
11834 local_size = (alt_max > 1) ? 2 : 1;
11835
11836 /* (Reversed) stack layout:
11837 [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
11838
11839 allocate_stack(common, private_data_size + local_size);
11840 /* Save return address. */
11841 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
11842
11843 copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit);
11844
11845 /* This variable is saved and restored all time when we enter or exit from a recursive context. */
11846 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
11847
11848 if (needs_control_head)
11849 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
11850
11851 if (alt_max > 1)
11852 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11853
11854 memset(&altbacktrack, 0, sizeof(backtrack_common));
11855 common->quit_label = NULL;
11856 common->accept_label = NULL;
11857 common->quit = NULL;
11858 common->accept = NULL;
11859 altbacktrack.cc = ccbegin;
11860 cc += GET(cc, 1);
11861 while (1)
11862 {
11863 altbacktrack.top = NULL;
11864 altbacktrack.topbacktracks = NULL;
11865
11866 if (altbacktrack.cc != ccbegin)
11867 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11868
11869 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
11870 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11871 return;
11872
11873 allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1);
11874 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
11875
11876 if (alt_max > 1 || has_accept)
11877 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
11878
11879 add_jump(compiler, &match, JUMP(SLJIT_JUMP));
11880
11881 if (alt_count == 0)
11882 {
11883 /* Backtracking path entry. */
11884 SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
11885 common->currententry->backtrack_label = LABEL();
11886 set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
11887
11888 sljit_emit_fast_enter(compiler, TMP1, 0);
11889
11890 if (has_accept)
11891 accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_max * sizeof (sljit_sw));
11892
11893 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11894 /* Save return address. */
11895 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
11896
11897 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
11898
11899 if (alt_max > 1)
11900 {
11901 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
11902 free_stack(common, 2);
11903
11904 if (alt_max > 4)
11905 {
11906 /* Table jump if alt_max is greater than 4. */
11907 next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
11908 if (SLJIT_UNLIKELY(next_update_addr == NULL))
11909 return;
11910 sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
11911 add_label_addr(common, next_update_addr++);
11912 }
11913 else
11914 {
11915 if (alt_max == 4)
11916 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
11917 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
11918 }
11919 }
11920 else
11921 free_stack(common, has_accept ? 2 : 1);
11922 }
11923 else if (alt_max > 4)
11924 add_label_addr(common, next_update_addr++);
11925 else
11926 {
11927 if (alt_count != 2 * sizeof(sljit_uw))
11928 {
11929 JUMPHERE(alt1);
11930 if (alt_max == 3 && alt_count == sizeof(sljit_uw))
11931 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
11932 }
11933 else
11934 {
11935 JUMPHERE(alt2);
11936 if (alt_max == 4)
11937 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
11938 }
11939 }
11940
11941 alt_count += sizeof(sljit_uw);
11942
11943 compile_backtrackingpath(common, altbacktrack.top);
11944 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11945 return;
11946 set_jumps(altbacktrack.topbacktracks, LABEL());
11947
11948 if (*cc != OP_ALT)
11949 break;
11950
11951 altbacktrack.cc = cc + 1 + LINK_SIZE;
11952 cc += GET(cc, 1);
11953 }
11954
11955 /* No alternative is matched. */
11956
11957 quit = LABEL();
11958
11959 copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit);
11960
11961 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
11962 free_stack(common, private_data_size + local_size);
11963 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
11964 sljit_emit_fast_return(compiler, TMP2, 0);
11965
11966 if (common->quit != NULL)
11967 {
11968 SLJIT_ASSERT(has_quit);
11969
11970 set_jumps(common->quit, LABEL());
11971 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
11972 copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit);
11973 JUMPTO(SLJIT_JUMP, quit);
11974 }
11975
11976 if (has_accept)
11977 {
11978 JUMPHERE(accept_exit);
11979 free_stack(common, 2);
11980
11981 /* Save return address. */
11982 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
11983
11984 copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit);
11985
11986 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
11987 free_stack(common, private_data_size + local_size);
11988 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
11989 sljit_emit_fast_return(compiler, TMP2, 0);
11990 }
11991
11992 if (common->accept != NULL)
11993 {
11994 SLJIT_ASSERT(has_accept);
11995
11996 set_jumps(common->accept, LABEL());
11997
11998 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
11999 OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
12000
12001 allocate_stack(common, 2);
12002 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
12003 }
12004
12005 set_jumps(match, LABEL());
12006
12007 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
12008
12009 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
12010
12011 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
12012 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
12013 sljit_emit_fast_return(compiler, TMP2, 0);
12014 }
12015
12016 #undef COMPILE_BACKTRACKINGPATH
12017 #undef CURRENT_AS
12018
jit_compile(pcre2_code * code,sljit_u32 mode)12019 static int jit_compile(pcre2_code *code, sljit_u32 mode)
12020 {
12021 pcre2_real_code *re = (pcre2_real_code *)code;
12022 struct sljit_compiler *compiler;
12023 backtrack_common rootbacktrack;
12024 compiler_common common_data;
12025 compiler_common *common = &common_data;
12026 const sljit_u8 *tables = re->tables;
12027 void *allocator_data = &re->memctl;
12028 int private_data_size;
12029 PCRE2_SPTR ccend;
12030 executable_functions *functions;
12031 void *executable_func;
12032 sljit_uw executable_size;
12033 sljit_uw total_length;
12034 label_addr_list *label_addr;
12035 struct sljit_label *mainloop_label = NULL;
12036 struct sljit_label *continue_match_label;
12037 struct sljit_label *empty_match_found_label = NULL;
12038 struct sljit_label *empty_match_backtrack_label = NULL;
12039 struct sljit_label *reset_match_label;
12040 struct sljit_label *quit_label;
12041 struct sljit_jump *jump;
12042 struct sljit_jump *minlength_check_failed = NULL;
12043 struct sljit_jump *reqbyte_notfound = NULL;
12044 struct sljit_jump *empty_match = NULL;
12045 struct sljit_jump *end_anchor_failed = NULL;
12046
12047 SLJIT_ASSERT(tables);
12048
12049 memset(&rootbacktrack, 0, sizeof(backtrack_common));
12050 memset(common, 0, sizeof(compiler_common));
12051 common->re = re;
12052 common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
12053 rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
12054
12055 common->start = rootbacktrack.cc;
12056 common->read_only_data_head = NULL;
12057 common->fcc = tables + fcc_offset;
12058 common->lcc = (sljit_sw)(tables + lcc_offset);
12059 common->mode = mode;
12060 common->might_be_empty = re->minlength == 0;
12061 common->nltype = NLTYPE_FIXED;
12062 switch(re->newline_convention)
12063 {
12064 case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
12065 case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
12066 case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
12067 case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
12068 case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
12069 default: return PCRE2_ERROR_INTERNAL;
12070 }
12071 common->nlmax = READ_CHAR_MAX;
12072 common->nlmin = 0;
12073 if (re->bsr_convention == PCRE2_BSR_UNICODE)
12074 common->bsr_nltype = NLTYPE_ANY;
12075 else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
12076 common->bsr_nltype = NLTYPE_ANYCRLF;
12077 else
12078 {
12079 #ifdef BSR_ANYCRLF
12080 common->bsr_nltype = NLTYPE_ANYCRLF;
12081 #else
12082 common->bsr_nltype = NLTYPE_ANY;
12083 #endif
12084 }
12085 common->bsr_nlmax = READ_CHAR_MAX;
12086 common->bsr_nlmin = 0;
12087 common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
12088 common->ctypes = (sljit_sw)(tables + ctypes_offset);
12089 common->name_count = re->name_count;
12090 common->name_entry_size = re->name_entry_size;
12091 common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
12092 common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
12093 #ifdef SUPPORT_UNICODE
12094 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
12095 common->utf = (re->overall_options & PCRE2_UTF) != 0;
12096 common->use_ucp = (re->overall_options & PCRE2_UCP) != 0;
12097 if (common->utf)
12098 {
12099 if (common->nltype == NLTYPE_ANY)
12100 common->nlmax = 0x2029;
12101 else if (common->nltype == NLTYPE_ANYCRLF)
12102 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
12103 else
12104 {
12105 /* We only care about the first newline character. */
12106 common->nlmax = common->newline & 0xff;
12107 }
12108
12109 if (common->nltype == NLTYPE_FIXED)
12110 common->nlmin = common->newline & 0xff;
12111 else
12112 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
12113
12114 if (common->bsr_nltype == NLTYPE_ANY)
12115 common->bsr_nlmax = 0x2029;
12116 else
12117 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
12118 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
12119 }
12120 #endif /* SUPPORT_UNICODE */
12121 ccend = bracketend(common->start);
12122
12123 /* Calculate the local space size on the stack. */
12124 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
12125 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
12126 if (!common->optimized_cbracket)
12127 return PCRE2_ERROR_NOMEMORY;
12128 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
12129 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
12130 #else
12131 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
12132 #endif
12133
12134 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
12135 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
12136 common->capture_last_ptr = common->ovector_start;
12137 common->ovector_start += sizeof(sljit_sw);
12138 #endif
12139 if (!check_opcode_types(common, common->start, ccend))
12140 {
12141 SLJIT_FREE(common->optimized_cbracket, allocator_data);
12142 return PCRE2_ERROR_NOMEMORY;
12143 }
12144
12145 /* Checking flags and updating ovector_start. */
12146 if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
12147 {
12148 common->req_char_ptr = common->ovector_start;
12149 common->ovector_start += sizeof(sljit_sw);
12150 }
12151 if (mode != PCRE2_JIT_COMPLETE)
12152 {
12153 common->start_used_ptr = common->ovector_start;
12154 common->ovector_start += sizeof(sljit_sw);
12155 if (mode == PCRE2_JIT_PARTIAL_SOFT)
12156 {
12157 common->hit_start = common->ovector_start;
12158 common->ovector_start += sizeof(sljit_sw);
12159 }
12160 }
12161 if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
12162 {
12163 common->match_end_ptr = common->ovector_start;
12164 common->ovector_start += sizeof(sljit_sw);
12165 }
12166 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
12167 common->control_head_ptr = 1;
12168 #endif
12169 if (common->control_head_ptr != 0)
12170 {
12171 common->control_head_ptr = common->ovector_start;
12172 common->ovector_start += sizeof(sljit_sw);
12173 }
12174 if (common->has_set_som)
12175 {
12176 /* Saving the real start pointer is necessary. */
12177 common->start_ptr = common->ovector_start;
12178 common->ovector_start += sizeof(sljit_sw);
12179 }
12180
12181 /* Aligning ovector to even number of sljit words. */
12182 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
12183 common->ovector_start += sizeof(sljit_sw);
12184
12185 if (common->start_ptr == 0)
12186 common->start_ptr = OVECTOR(0);
12187
12188 /* Capturing brackets cannot be optimized if callouts are allowed. */
12189 if (common->capture_last_ptr != 0)
12190 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
12191
12192 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
12193 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
12194
12195 total_length = ccend - common->start;
12196 common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
12197 if (!common->private_data_ptrs)
12198 {
12199 SLJIT_FREE(common->optimized_cbracket, allocator_data);
12200 return PCRE2_ERROR_NOMEMORY;
12201 }
12202 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
12203
12204 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
12205 set_private_data_ptrs(common, &private_data_size, ccend);
12206 if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
12207 {
12208 if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back)
12209 detect_fast_fail(common, common->start, &private_data_size, 4);
12210 }
12211
12212 SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr);
12213
12214 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
12215 {
12216 SLJIT_FREE(common->private_data_ptrs, allocator_data);
12217 SLJIT_FREE(common->optimized_cbracket, allocator_data);
12218 return PCRE2_ERROR_NOMEMORY;
12219 }
12220
12221 if (common->has_then)
12222 {
12223 common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
12224 memset(common->then_offsets, 0, total_length);
12225 set_then_offsets(common, common->start, NULL);
12226 }
12227
12228 compiler = sljit_create_compiler(allocator_data);
12229 if (!compiler)
12230 {
12231 SLJIT_FREE(common->optimized_cbracket, allocator_data);
12232 SLJIT_FREE(common->private_data_ptrs, allocator_data);
12233 return PCRE2_ERROR_NOMEMORY;
12234 }
12235 common->compiler = compiler;
12236
12237 /* Main pcre_jit_exec entry. */
12238 sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
12239
12240 /* Register init. */
12241 reset_ovector(common, (re->top_bracket + 1) * 2);
12242 if (common->req_char_ptr != 0)
12243 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
12244
12245 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
12246 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
12247 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
12248 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
12249 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
12250 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
12251 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
12252 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
12253 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12254 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
12255
12256 if (common->fast_fail_start_ptr < common->fast_fail_end_ptr)
12257 reset_fast_fail(common);
12258
12259 if (mode == PCRE2_JIT_PARTIAL_SOFT)
12260 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
12261 if (common->mark_ptr != 0)
12262 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
12263 if (common->control_head_ptr != 0)
12264 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
12265
12266 /* Main part of the matching */
12267 if ((re->overall_options & PCRE2_ANCHORED) == 0)
12268 {
12269 mainloop_label = mainloop_entry(common);
12270 continue_match_label = LABEL();
12271 /* Forward search if possible. */
12272 if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
12273 {
12274 if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
12275 ;
12276 else if ((re->flags & PCRE2_FIRSTSET) != 0)
12277 fast_forward_first_char(common);
12278 else if ((re->flags & PCRE2_STARTLINE) != 0)
12279 fast_forward_newline(common);
12280 else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
12281 fast_forward_start_bits(common);
12282 }
12283 }
12284 else
12285 continue_match_label = LABEL();
12286
12287 if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
12288 {
12289 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
12290 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
12291 minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
12292 }
12293 if (common->req_char_ptr != 0)
12294 reqbyte_notfound = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
12295
12296 /* Store the current STR_PTR in OVECTOR(0). */
12297 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
12298 /* Copy the limit of allowed recursions. */
12299 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
12300 if (common->capture_last_ptr != 0)
12301 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
12302 if (common->fast_forward_bc_ptr != NULL)
12303 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0);
12304
12305 if (common->start_ptr != OVECTOR(0))
12306 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
12307
12308 /* Copy the beginning of the string. */
12309 if (mode == PCRE2_JIT_PARTIAL_SOFT)
12310 {
12311 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
12312 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
12313 JUMPHERE(jump);
12314 }
12315 else if (mode == PCRE2_JIT_PARTIAL_HARD)
12316 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
12317
12318 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
12319 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12320 {
12321 sljit_free_compiler(compiler);
12322 SLJIT_FREE(common->optimized_cbracket, allocator_data);
12323 SLJIT_FREE(common->private_data_ptrs, allocator_data);
12324 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
12325 return PCRE2_ERROR_NOMEMORY;
12326 }
12327
12328 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
12329 end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
12330
12331 if (common->might_be_empty)
12332 {
12333 empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
12334 empty_match_found_label = LABEL();
12335 }
12336
12337 common->accept_label = LABEL();
12338 if (common->accept != NULL)
12339 set_jumps(common->accept, common->accept_label);
12340
12341 /* This means we have a match. Update the ovector. */
12342 copy_ovector(common, re->top_bracket + 1);
12343 common->quit_label = common->abort_label = LABEL();
12344 if (common->quit != NULL)
12345 set_jumps(common->quit, common->quit_label);
12346 if (common->abort != NULL)
12347 set_jumps(common->abort, common->abort_label);
12348 if (minlength_check_failed != NULL)
12349 SET_LABEL(minlength_check_failed, common->abort_label);
12350 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
12351
12352 if (common->failed_match != NULL)
12353 {
12354 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
12355 set_jumps(common->failed_match, LABEL());
12356 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
12357 JUMPTO(SLJIT_JUMP, common->abort_label);
12358 }
12359
12360 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
12361 JUMPHERE(end_anchor_failed);
12362
12363 if (mode != PCRE2_JIT_COMPLETE)
12364 {
12365 common->partialmatchlabel = LABEL();
12366 set_jumps(common->partialmatch, common->partialmatchlabel);
12367 return_with_partial_match(common, common->quit_label);
12368 }
12369
12370 if (common->might_be_empty)
12371 empty_match_backtrack_label = LABEL();
12372 compile_backtrackingpath(common, rootbacktrack.top);
12373 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12374 {
12375 sljit_free_compiler(compiler);
12376 SLJIT_FREE(common->optimized_cbracket, allocator_data);
12377 SLJIT_FREE(common->private_data_ptrs, allocator_data);
12378 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
12379 return PCRE2_ERROR_NOMEMORY;
12380 }
12381
12382 SLJIT_ASSERT(rootbacktrack.prev == NULL);
12383 reset_match_label = LABEL();
12384
12385 if (mode == PCRE2_JIT_PARTIAL_SOFT)
12386 {
12387 /* Update hit_start only in the first time. */
12388 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
12389 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
12390 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
12391 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
12392 JUMPHERE(jump);
12393 }
12394
12395 /* Check we have remaining characters. */
12396 if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
12397 {
12398 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
12399 }
12400
12401 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
12402 (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr);
12403
12404 if ((re->overall_options & PCRE2_ANCHORED) == 0)
12405 {
12406 if (common->ff_newline_shortcut != NULL)
12407 {
12408 /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
12409 if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
12410 {
12411 if (common->match_end_ptr != 0)
12412 {
12413 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
12414 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
12415 CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
12416 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
12417 }
12418 else
12419 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
12420 }
12421 }
12422 else
12423 CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
12424 }
12425
12426 /* No more remaining characters. */
12427 if (reqbyte_notfound != NULL)
12428 JUMPHERE(reqbyte_notfound);
12429
12430 if (mode == PCRE2_JIT_PARTIAL_SOFT)
12431 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
12432
12433 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
12434 JUMPTO(SLJIT_JUMP, common->quit_label);
12435
12436 flush_stubs(common);
12437
12438 if (common->might_be_empty)
12439 {
12440 JUMPHERE(empty_match);
12441 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12442 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
12443 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
12444 JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
12445 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
12446 JUMPTO(SLJIT_ZERO, empty_match_found_label);
12447 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
12448 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
12449 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
12450 }
12451
12452 common->fast_forward_bc_ptr = NULL;
12453 common->fast_fail_start_ptr = 0;
12454 common->fast_fail_end_ptr = 0;
12455 common->currententry = common->entries;
12456 common->local_quit_available = TRUE;
12457 quit_label = common->quit_label;
12458 while (common->currententry != NULL)
12459 {
12460 /* Might add new entries. */
12461 compile_recurse(common);
12462 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12463 {
12464 sljit_free_compiler(compiler);
12465 SLJIT_FREE(common->optimized_cbracket, allocator_data);
12466 SLJIT_FREE(common->private_data_ptrs, allocator_data);
12467 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
12468 return PCRE2_ERROR_NOMEMORY;
12469 }
12470 flush_stubs(common);
12471 common->currententry = common->currententry->next;
12472 }
12473 common->local_quit_available = FALSE;
12474 common->quit_label = quit_label;
12475
12476 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
12477 /* This is a (really) rare case. */
12478 set_jumps(common->stackalloc, LABEL());
12479 /* RETURN_ADDR is not a saved register. */
12480 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
12481
12482 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
12483
12484 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
12485 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
12486 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
12487 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
12488 OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
12489
12490 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
12491
12492 jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
12493 OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
12494 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
12495 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
12496 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
12497 sljit_emit_fast_return(compiler, TMP1, 0);
12498
12499 /* Allocation failed. */
12500 JUMPHERE(jump);
12501 /* We break the return address cache here, but this is a really rare case. */
12502 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
12503 JUMPTO(SLJIT_JUMP, common->quit_label);
12504
12505 /* Call limit reached. */
12506 set_jumps(common->calllimit, LABEL());
12507 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
12508 JUMPTO(SLJIT_JUMP, common->quit_label);
12509
12510 if (common->revertframes != NULL)
12511 {
12512 set_jumps(common->revertframes, LABEL());
12513 do_revertframes(common);
12514 }
12515 if (common->wordboundary != NULL)
12516 {
12517 set_jumps(common->wordboundary, LABEL());
12518 check_wordboundary(common);
12519 }
12520 if (common->anynewline != NULL)
12521 {
12522 set_jumps(common->anynewline, LABEL());
12523 check_anynewline(common);
12524 }
12525 if (common->hspace != NULL)
12526 {
12527 set_jumps(common->hspace, LABEL());
12528 check_hspace(common);
12529 }
12530 if (common->vspace != NULL)
12531 {
12532 set_jumps(common->vspace, LABEL());
12533 check_vspace(common);
12534 }
12535 if (common->casefulcmp != NULL)
12536 {
12537 set_jumps(common->casefulcmp, LABEL());
12538 do_casefulcmp(common);
12539 }
12540 if (common->caselesscmp != NULL)
12541 {
12542 set_jumps(common->caselesscmp, LABEL());
12543 do_caselesscmp(common);
12544 }
12545 if (common->reset_match != NULL)
12546 {
12547 set_jumps(common->reset_match, LABEL());
12548 do_reset_match(common, (re->top_bracket + 1) * 2);
12549 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
12550 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
12551 JUMPTO(SLJIT_JUMP, reset_match_label);
12552 }
12553 #ifdef SUPPORT_UNICODE
12554 #if PCRE2_CODE_UNIT_WIDTH == 8
12555 if (common->utfreadchar != NULL)
12556 {
12557 set_jumps(common->utfreadchar, LABEL());
12558 do_utfreadchar(common);
12559 }
12560 if (common->utfreadchar16 != NULL)
12561 {
12562 set_jumps(common->utfreadchar16, LABEL());
12563 do_utfreadchar16(common);
12564 }
12565 if (common->utfreadtype8 != NULL)
12566 {
12567 set_jumps(common->utfreadtype8, LABEL());
12568 do_utfreadtype8(common);
12569 }
12570 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
12571 if (common->getucd != NULL)
12572 {
12573 set_jumps(common->getucd, LABEL());
12574 do_getucd(common);
12575 }
12576 #endif /* SUPPORT_UNICODE */
12577
12578 SLJIT_FREE(common->optimized_cbracket, allocator_data);
12579 SLJIT_FREE(common->private_data_ptrs, allocator_data);
12580
12581 executable_func = sljit_generate_code(compiler);
12582 executable_size = sljit_get_generated_code_size(compiler);
12583 label_addr = common->label_addrs;
12584 while (label_addr != NULL)
12585 {
12586 *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
12587 label_addr = label_addr->next;
12588 }
12589 sljit_free_compiler(compiler);
12590 if (executable_func == NULL)
12591 {
12592 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
12593 return PCRE2_ERROR_NOMEMORY;
12594 }
12595
12596 /* Reuse the function descriptor if possible. */
12597 if (re->executable_jit != NULL)
12598 functions = (executable_functions *)re->executable_jit;
12599 else
12600 {
12601 functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
12602 if (functions == NULL)
12603 {
12604 /* This case is highly unlikely since we just recently
12605 freed a lot of memory. Not impossible though. */
12606 sljit_free_code(executable_func);
12607 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
12608 return PCRE2_ERROR_NOMEMORY;
12609 }
12610 memset(functions, 0, sizeof(executable_functions));
12611 functions->top_bracket = re->top_bracket + 1;
12612 functions->limit_match = re->limit_match;
12613 re->executable_jit = functions;
12614 }
12615
12616 /* Turn mode into an index. */
12617 if (mode == PCRE2_JIT_COMPLETE)
12618 mode = 0;
12619 else
12620 mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
12621
12622 SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
12623 functions->executable_funcs[mode] = executable_func;
12624 functions->read_only_data_heads[mode] = common->read_only_data_head;
12625 functions->executable_sizes[mode] = executable_size;
12626 return 0;
12627 }
12628
12629 #endif
12630
12631 /*************************************************
12632 * JIT compile a Regular Expression *
12633 *************************************************/
12634
12635 /* This function used JIT to convert a previously-compiled pattern into machine
12636 code.
12637
12638 Arguments:
12639 code a compiled pattern
12640 options JIT option bits
12641
12642 Returns: 0: success or (*NOJIT) was used
12643 <0: an error code
12644 */
12645
12646 #define PUBLIC_JIT_COMPILE_OPTIONS \
12647 (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD)
12648
12649 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code * code,uint32_t options)12650 pcre2_jit_compile(pcre2_code *code, uint32_t options)
12651 {
12652 #ifndef SUPPORT_JIT
12653
12654 (void)code;
12655 (void)options;
12656 return PCRE2_ERROR_JIT_BADOPTION;
12657
12658 #else /* SUPPORT_JIT */
12659
12660 pcre2_real_code *re = (pcre2_real_code *)code;
12661 executable_functions *functions;
12662 int result;
12663
12664 if (code == NULL)
12665 return PCRE2_ERROR_NULL;
12666
12667 if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
12668 return PCRE2_ERROR_JIT_BADOPTION;
12669
12670 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
12671
12672 functions = (executable_functions *)re->executable_jit;
12673
12674 if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
12675 || functions->executable_funcs[0] == NULL)) {
12676 result = jit_compile(code, PCRE2_JIT_COMPLETE);
12677 if (result != 0)
12678 return result;
12679 }
12680
12681 if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
12682 || functions->executable_funcs[1] == NULL)) {
12683 result = jit_compile(code, PCRE2_JIT_PARTIAL_SOFT);
12684 if (result != 0)
12685 return result;
12686 }
12687
12688 if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
12689 || functions->executable_funcs[2] == NULL)) {
12690 result = jit_compile(code, PCRE2_JIT_PARTIAL_HARD);
12691 if (result != 0)
12692 return result;
12693 }
12694
12695 return 0;
12696
12697 #endif /* SUPPORT_JIT */
12698 }
12699
12700 /* JIT compiler uses an all-in-one approach. This improves security,
12701 since the code generator functions are not exported. */
12702
12703 #define INCLUDED_FROM_PCRE2_JIT_COMPILE
12704
12705 #include "pcre2_jit_match.c"
12706 #include "pcre2_jit_misc.c"
12707
12708 /* End of pcre2_jit_compile.c */
12709