xref: /php-src/ext/pcre/pcre2lib/pcre2_match.c (revision ae5beff6)
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10           New API code Copyright (c) 2015-2024 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45 
46 #include "pcre2_internal.h"
47 
48 /* These defines enable debugging code */
49 
50 /* #define DEBUG_FRAMES_DISPLAY */
51 /* #define DEBUG_SHOW_OPS */
52 /* #define DEBUG_SHOW_RMATCH */
53 
54 #ifdef DEBUG_FRAMES_DISPLAY
55 #include <stdarg.h>
56 #endif
57 
58 #ifdef DEBUG_SHOW_OPS
59 static const char *OP_names[] = { OP_NAME_LIST };
60 #endif
61 
62 /* These defines identify the name of the block containing "static"
63 information, and fields within it. */
64 
65 #define NLBLOCK mb              /* Block containing newline information */
66 #define PSSTART start_subject   /* Field containing processed string start */
67 #define PSEND   end_subject     /* Field containing processed string end */
68 
69 #define RECURSE_UNSET 0xffffffffu  /* Bigger than max group number */
70 
71 /* Masks for identifying the public options that are permitted at match time. */
72 
73 #define PUBLIC_MATCH_OPTIONS \
74   (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
75    PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
76    PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT| \
77    PCRE2_DISABLE_RECURSELOOP_CHECK)
78 
79 #define PUBLIC_JIT_MATCH_OPTIONS \
80    (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
81     PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD|\
82     PCRE2_COPY_MATCHED_SUBJECT)
83 
84 /* Non-error returns from and within the match() function. Error returns are
85 externally defined PCRE2_ERROR_xxx codes, which are all negative. */
86 
87 #define MATCH_MATCH        1
88 #define MATCH_NOMATCH      0
89 
90 /* Special internal returns used in the match() function. Make them
91 sufficiently negative to avoid the external error codes. */
92 
93 #define MATCH_ACCEPT       (-999)
94 #define MATCH_KETRPOS      (-998)
95 /* The next 5 must be kept together and in sequence so that a test that checks
96 for any one of them can use a range. */
97 #define MATCH_COMMIT       (-997)
98 #define MATCH_PRUNE        (-996)
99 #define MATCH_SKIP         (-995)
100 #define MATCH_SKIP_ARG     (-994)
101 #define MATCH_THEN         (-993)
102 #define MATCH_BACKTRACK_MAX MATCH_THEN
103 #define MATCH_BACKTRACK_MIN MATCH_COMMIT
104 
105 /* Group frame type values. Zero means the frame is not a group frame. The
106 lower 16 bits are used for data (e.g. the capture number). Group frames are
107 used for most groups so that information about the start is easily available at
108 the end without having to scan back through intermediate frames (backtrack
109 points). */
110 
111 #define GF_CAPTURE     0x00010000u
112 #define GF_NOCAPTURE   0x00020000u
113 #define GF_CONDASSERT  0x00030000u
114 #define GF_RECURSE     0x00040000u
115 
116 /* Masks for the identity and data parts of the group frame type. */
117 
118 #define GF_IDMASK(a)   ((a) & 0xffff0000u)
119 #define GF_DATAMASK(a) ((a) & 0x0000ffffu)
120 
121 /* Repetition types */
122 
123 enum { REPTYPE_MIN, REPTYPE_MAX, REPTYPE_POS };
124 
125 /* Min and max values for the common repeats; a maximum of UINT32_MAX =>
126 infinity. */
127 
128 static const uint32_t rep_min[] = {
129   0, 0,       /* * and *? */
130   1, 1,       /* + and +? */
131   0, 0,       /* ? and ?? */
132   0, 0,       /* dummy placefillers for OP_CR[MIN]RANGE */
133   0, 1, 0 };  /* OP_CRPOS{STAR, PLUS, QUERY} */
134 
135 static const uint32_t rep_max[] = {
136   UINT32_MAX, UINT32_MAX,      /* * and *? */
137   UINT32_MAX, UINT32_MAX,      /* + and +? */
138   1, 1,                        /* ? and ?? */
139   0, 0,                        /* dummy placefillers for OP_CR[MIN]RANGE */
140   UINT32_MAX, UINT32_MAX, 1 }; /* OP_CRPOS{STAR, PLUS, QUERY} */
141 
142 /* Repetition types - must include OP_CRPOSRANGE (not needed above) */
143 
144 static const uint32_t rep_typ[] = {
145   REPTYPE_MAX, REPTYPE_MIN,    /* * and *? */
146   REPTYPE_MAX, REPTYPE_MIN,    /* + and +? */
147   REPTYPE_MAX, REPTYPE_MIN,    /* ? and ?? */
148   REPTYPE_MAX, REPTYPE_MIN,    /* OP_CRRANGE and OP_CRMINRANGE */
149   REPTYPE_POS, REPTYPE_POS,    /* OP_CRPOSSTAR, OP_CRPOSPLUS */
150   REPTYPE_POS, REPTYPE_POS };  /* OP_CRPOSQUERY, OP_CRPOSRANGE */
151 
152 /* Numbers for RMATCH calls at backtracking points. When these lists are
153 changed, the code at RETURN_SWITCH below must be updated in sync.  */
154 
155 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
156        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
157        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
158        RM31,  RM32, RM33, RM34, RM35, RM36, RM37 };
159 
160 #ifdef SUPPORT_WIDE_CHARS
161 enum { RM100=100, RM101 };
162 #endif
163 
164 #ifdef SUPPORT_UNICODE
165 enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207,
166        RM208,     RM209, RM210, RM211, RM212, RM213, RM214, RM215,
167        RM216,     RM217, RM218, RM219, RM220, RM221, RM222, RM223,
168        RM224,     RM225 };
169 #endif
170 
171 /* Define short names for general fields in the current backtrack frame, which
172 is always pointed to by the F variable. Occasional references to fields in
173 other frames are written out explicitly. There are also some fields in the
174 current frame whose names start with "temp" that are used for short-term,
175 localised backtracking memory. These are #defined with Lxxx names at the point
176 of use and undefined afterwards. */
177 
178 #define Fback_frame        F->back_frame
179 #define Fcapture_last      F->capture_last
180 #define Fcurrent_recurse   F->current_recurse
181 #define Fecode             F->ecode
182 #define Feptr              F->eptr
183 #define Fgroup_frame_type  F->group_frame_type
184 #define Flast_group_offset F->last_group_offset
185 #define Flength            F->length
186 #define Fmark              F->mark
187 #define Frdepth            F->rdepth
188 #define Fstart_match       F->start_match
189 #define Foffset_top        F->offset_top
190 #define Foccu              F->occu
191 #define Fop                F->op
192 #define Fovector           F->ovector
193 #define Freturn_id         F->return_id
194 
195 
196 #ifdef DEBUG_FRAMES_DISPLAY
197 /*************************************************
198 *      Display current frames and contents       *
199 *************************************************/
200 
201 /* This debugging function displays the current set of frames and their
202 contents. It is not called automatically from anywhere, the intention being
203 that calls can be inserted where necessary when debugging frame-related
204 problems.
205 
206 Arguments:
207   f           the file to write to
208   F           the current top frame
209   P           a previous frame of interest
210   frame_size  the frame size
211   mb          points to the match block
212   match_data  points to the match data block
213   s           identification text
214 
215 Returns:    nothing
216 */
217 
218 static void
display_frames(FILE * f,heapframe * F,heapframe * P,PCRE2_SIZE frame_size,match_block * mb,pcre2_match_data * match_data,const char * s,...)219 display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size,
220   match_block *mb, pcre2_match_data *match_data, const char *s, ...)
221 {
222 uint32_t i;
223 heapframe *Q;
224 va_list ap;
225 va_start(ap, s);
226 
227 fprintf(f, "FRAMES ");
228 vfprintf(f, s, ap);
229 va_end(ap);
230 
231 if (P != NULL) fprintf(f, " P=%lu",
232   ((char *)P - (char *)(match_data->heapframes))/frame_size);
233 fprintf(f, "\n");
234 
235 for (i = 0, Q = match_data->heapframes;
236      Q <= F;
237      i++, Q = (heapframe *)((char *)Q + frame_size))
238   {
239   fprintf(f, "Frame %d type=%x subj=%lu code=%d back=%lu id=%d",
240     i, Q->group_frame_type, Q->eptr - mb->start_subject, *(Q->ecode),
241     Q->back_frame, Q->return_id);
242 
243   if (Q->last_group_offset == PCRE2_UNSET)
244     fprintf(f, " lgoffset=unset\n");
245   else
246     fprintf(f, " lgoffset=%lu\n",  Q->last_group_offset/frame_size);
247   }
248 }
249 
250 #endif
251 
252 
253 
254 /*************************************************
255 *                Process a callout               *
256 *************************************************/
257 
258 /* This function is called for all callouts, whether "standalone" or at the
259 start of a conditional group. Feptr will be pointing to either OP_CALLOUT or
260 OP_CALLOUT_STR. A callout block is allocated in pcre2_match() and initialized
261 with fixed values.
262 
263 Arguments:
264   F          points to the current backtracking frame
265   mb         points to the match block
266   lengthptr  where to return the length of the callout item
267 
268 Returns:     the return from the callout
269              or 0 if no callout function exists
270 */
271 
272 static int
do_callout(heapframe * F,match_block * mb,PCRE2_SIZE * lengthptr)273 do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr)
274 {
275 int rc;
276 PCRE2_SIZE save0, save1;
277 PCRE2_SIZE *callout_ovector;
278 pcre2_callout_block *cb;
279 
280 *lengthptr = (*Fecode == OP_CALLOUT)?
281   PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE);
282 
283 if (mb->callout == NULL) return 0;   /* No callout function provided */
284 
285 /* The original matching code (pre 10.30) worked directly with the ovector
286 passed by the user, and this was passed to callouts. Now that the working
287 ovector is in the backtracking frame, it no longer needs to reserve space for
288 the overall match offsets (which would waste space in the frame). For backward
289 compatibility, however, we pass capture_top and offset_vector to the callout as
290 if for the extended ovector, and we ensure that the first two slots are unset
291 by preserving and restoring their current contents. Picky compilers complain if
292 references such as Fovector[-2] are use directly, so we set up a separate
293 pointer. */
294 
295 callout_ovector = (PCRE2_SIZE *)(Fovector) - 2;
296 
297 /* The cb->version, cb->subject, cb->subject_length, and cb->start_match fields
298 are set externally. The first 3 never change; the last is updated for each
299 bumpalong. */
300 
301 cb = mb->cb;
302 cb->capture_top      = (uint32_t)Foffset_top/2 + 1;
303 cb->capture_last     = Fcapture_last;
304 cb->offset_vector    = callout_ovector;
305 cb->mark             = mb->nomatch_mark;
306 cb->current_position = (PCRE2_SIZE)(Feptr - mb->start_subject);
307 cb->pattern_position = GET(Fecode, 1);
308 cb->next_item_length = GET(Fecode, 1 + LINK_SIZE);
309 
310 if (*Fecode == OP_CALLOUT)  /* Numerical callout */
311   {
312   cb->callout_number = Fecode[1 + 2*LINK_SIZE];
313   cb->callout_string_offset = 0;
314   cb->callout_string = NULL;
315   cb->callout_string_length = 0;
316   }
317 else  /* String callout */
318   {
319   cb->callout_number = 0;
320   cb->callout_string_offset = GET(Fecode, 1 + 3*LINK_SIZE);
321   cb->callout_string = Fecode + (1 + 4*LINK_SIZE) + 1;
322   cb->callout_string_length =
323     *lengthptr - (1 + 4*LINK_SIZE) - 2;
324   }
325 
326 save0 = callout_ovector[0];
327 save1 = callout_ovector[1];
328 callout_ovector[0] = callout_ovector[1] = PCRE2_UNSET;
329 rc = mb->callout(cb, mb->callout_data);
330 callout_ovector[0] = save0;
331 callout_ovector[1] = save1;
332 cb->callout_flags = 0;
333 return rc;
334 }
335 
336 
337 
338 /*************************************************
339 *          Match a back-reference                *
340 *************************************************/
341 
342 /* This function is called only when it is known that the offset lies within
343 the offsets that have so far been used in the match. Note that in caseless
344 UTF-8 mode, the number of subject bytes matched may be different to the number
345 of reference bytes. (In theory this could also happen in UTF-16 mode, but it
346 seems unlikely.)
347 
348 Arguments:
349   offset      index into the offset vector
350   caseless    TRUE if caseless
351   F           the current backtracking frame pointer
352   mb          points to match block
353   lengthptr   pointer for returning the length matched
354 
355 Returns:      = 0 sucessful match; number of code units matched is set
356               < 0 no match
357               > 0 partial match
358 */
359 
360 static int
match_ref(PCRE2_SIZE offset,BOOL caseless,heapframe * F,match_block * mb,PCRE2_SIZE * lengthptr)361 match_ref(PCRE2_SIZE offset, BOOL caseless, heapframe *F, match_block *mb,
362   PCRE2_SIZE *lengthptr)
363 {
364 PCRE2_SPTR p;
365 PCRE2_SIZE length;
366 PCRE2_SPTR eptr;
367 PCRE2_SPTR eptr_start;
368 
369 /* Deal with an unset group. The default is no match, but there is an option to
370 match an empty string. */
371 
372 if (offset >= Foffset_top || Fovector[offset] == PCRE2_UNSET)
373   {
374   if ((mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
375     {
376     *lengthptr = 0;
377     return 0;      /* Match */
378     }
379   else return -1;  /* No match */
380   }
381 
382 /* Separate the caseless and UTF cases for speed. */
383 
384 eptr = eptr_start = Feptr;
385 p = mb->start_subject + Fovector[offset];
386 length = Fovector[offset+1] - Fovector[offset];
387 
388 if (caseless)
389   {
390 #if defined SUPPORT_UNICODE
391   BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
392 
393   if (utf || (mb->poptions & PCRE2_UCP) != 0)
394     {
395     PCRE2_SPTR endptr = p + length;
396 
397     /* Match characters up to the end of the reference. NOTE: the number of
398     code units matched may differ, because in UTF-8 there are some characters
399     whose upper and lower case codes have different numbers of bytes. For
400     example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65 (3
401     bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
402     sequence of two of the latter. It is important, therefore, to check the
403     length along the reference, not along the subject (earlier code did this
404     wrong). UCP without uses Unicode properties but without UTF encoding. */
405 
406     while (p < endptr)
407       {
408       uint32_t c, d;
409       const ucd_record *ur;
410       if (eptr >= mb->end_subject) return 1;   /* Partial match */
411 
412       if (utf)
413         {
414         GETCHARINC(c, eptr);
415         GETCHARINC(d, p);
416         }
417       else
418         {
419         c = *eptr++;
420         d = *p++;
421         }
422 
423       ur = GET_UCD(d);
424       if (c != d && c != (uint32_t)((int)d + ur->other_case))
425         {
426         const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset;
427         for (;;)
428           {
429           if (c < *pp) return -1;  /* No match */
430           if (c == *pp++) break;
431           }
432         }
433       }
434     }
435   else
436 #endif
437 
438   /* Not in UTF or UCP mode */
439     {
440     for (; length > 0; length--)
441       {
442       uint32_t cc, cp;
443       if (eptr >= mb->end_subject) return 1;   /* Partial match */
444       cc = UCHAR21TEST(eptr);
445       cp = UCHAR21TEST(p);
446       if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
447         return -1;  /* No match */
448       p++;
449       eptr++;
450       }
451     }
452   }
453 
454 /* In the caseful case, we can just compare the code units, whether or not we
455 are in UTF and/or UCP mode. When partial matching, we have to do this unit by
456 unit. */
457 
458 else
459   {
460   if (mb->partial != 0)
461     {
462     for (; length > 0; length--)
463       {
464       if (eptr >= mb->end_subject) return 1;   /* Partial match */
465       if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;  /* No match */
466       }
467     }
468 
469   /* Not partial matching */
470 
471   else
472     {
473     if ((PCRE2_SIZE)(mb->end_subject - eptr) < length) return 1; /* Partial */
474     if (memcmp(p, eptr, CU2BYTES(length)) != 0) return -1;  /* No match */
475     eptr += length;
476     }
477   }
478 
479 *lengthptr = eptr - eptr_start;
480 return 0;  /* Match */
481 }
482 
483 
484 
485 /******************************************************************************
486 *******************************************************************************
487                    "Recursion" in the match() function
488 
489 The original match() function was highly recursive, but this proved to be the
490 source of a number of problems over the years, mostly because of the relatively
491 small system stacks that are commonly found. As new features were added to
492 patterns, various kludges were invented to reduce the amount of stack used,
493 making the code hard to understand in places.
494 
495 A version did exist that used individual frames on the heap instead of calling
496 match() recursively, but this ran substantially slower. The current version is
497 a refactoring that uses a vector of frames to remember backtracking points.
498 This runs no slower, and possibly even a bit faster than the original recursive
499 implementation.
500 
501 At first, an initial vector of size START_FRAMES_SIZE (enough for maybe 50
502 frames) was allocated on the system stack. If this was not big enough, the heap
503 was used for a larger vector. However, it turns out that there are environments
504 where taking as little as 20KiB from the system stack is an embarrassment.
505 After another refactoring, the heap is used exclusively, but a pointer the
506 frames vector and its size are cached in the match_data block, so that there is
507 no new memory allocation if the same match_data block is used for multiple
508 matches (unless the frames vector has to be extended).
509 *******************************************************************************
510 ******************************************************************************/
511 
512 
513 
514 
515 /*************************************************
516 *       Macros for the match() function          *
517 *************************************************/
518 
519 /* These macros pack up tests that are used for partial matching several times
520 in the code. The second one is used when we already know we are past the end of
521 the subject. We set the "hit end" flag if the pointer is at the end of the
522 subject and either (a) the pointer is past the earliest inspected character
523 (i.e. something has been matched, even if not part of the actual matched
524 string), or (b) the pattern contains a lookbehind. These are the conditions for
525 which adding more characters may allow the current match to continue.
526 
527 For hard partial matching, we immediately return a partial match. Otherwise,
528 carrying on means that a complete match on the current subject will be sought.
529 A partial match is returned only if no complete match can be found. */
530 
531 #define CHECK_PARTIAL()\
532   if (Feptr >= mb->end_subject) \
533     { \
534     SCHECK_PARTIAL(); \
535     }
536 
537 #define SCHECK_PARTIAL()\
538   if (mb->partial != 0 && \
539       (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \
540     { \
541     mb->hitend = TRUE; \
542     if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
543     }
544 
545 
546 /* These macros are used to implement backtracking. They simulate a recursive
547 call to the match() function by means of a local vector of frames which
548 remember the backtracking points. */
549 
550 #define RMATCH(ra,rb)\
551   {\
552   start_ecode = ra;\
553   Freturn_id = rb;\
554   goto MATCH_RECURSE;\
555   L_##rb:;\
556   }
557 
558 #define RRETURN(ra)\
559   {\
560   rrc = ra;\
561   goto RETURN_SWITCH;\
562   }
563 
564 
565 
566 /*************************************************
567 *         Match from current position            *
568 *************************************************/
569 
570 /* This function is called to run one match attempt at a single starting point
571 in the subject.
572 
573 Performance note: It might be tempting to extract commonly used fields from the
574 mb structure (e.g. end_subject) into individual variables to improve
575 performance. Tests using gcc on a SPARC disproved this; in the first case, it
576 made performance worse.
577 
578 Arguments:
579    start_eptr   starting character in subject
580    start_ecode  starting position in compiled code
581    top_bracket  number of capturing parentheses in the pattern
582    frame_size   size of each backtracking frame
583    match_data   pointer to the match_data block
584    mb           pointer to "static" variables block
585 
586 Returns:        MATCH_MATCH if matched            )  these values are >= 0
587                 MATCH_NOMATCH if failed to match  )
588                 negative MATCH_xxx value for PRUNE, SKIP, etc
589                 negative PCRE2_ERROR_xxx value if aborted by an error condition
590                 (e.g. stopped by repeated call or depth limit)
591 */
592 
593 static int
match(PCRE2_SPTR start_eptr,PCRE2_SPTR start_ecode,uint16_t top_bracket,PCRE2_SIZE frame_size,pcre2_match_data * match_data,match_block * mb)594 match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket,
595   PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
596 {
597 /* Frame-handling variables */
598 
599 heapframe *F;           /* Current frame pointer */
600 heapframe *N = NULL;    /* Temporary frame pointers */
601 heapframe *P = NULL;
602 
603 heapframe *frames_top;  /* End of frames vector */
604 heapframe *assert_accept_frame = NULL;  /* For passing back a frame with captures */
605 PCRE2_SIZE frame_copy_size;   /* Amount to copy when creating a new frame */
606 
607 /* Local variables that do not need to be preserved over calls to RRMATCH(). */
608 
609 PCRE2_SPTR branch_end = NULL;
610 PCRE2_SPTR branch_start;
611 PCRE2_SPTR bracode;     /* Temp pointer to start of group */
612 PCRE2_SIZE offset;      /* Used for group offsets */
613 PCRE2_SIZE length;      /* Used for various length calculations */
614 
615 int rrc;                /* Return from functions & backtracking "recursions" */
616 #ifdef SUPPORT_UNICODE
617 int proptype;           /* Type of character property */
618 #endif
619 
620 uint32_t i;             /* Used for local loops */
621 uint32_t fc;            /* Character values */
622 uint32_t number;        /* Used for group and other numbers */
623 uint32_t reptype = 0;   /* Type of repetition (0 to avoid compiler warning) */
624 uint32_t group_frame_type;  /* Specifies type for new group frames */
625 
626 BOOL condition;         /* Used in conditional groups */
627 BOOL cur_is_word;       /* Used in "word" tests */
628 BOOL prev_is_word;      /* Used in "word" tests */
629 
630 /* UTF and UCP flags */
631 
632 #ifdef SUPPORT_UNICODE
633 BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
634 BOOL ucp = (mb->poptions & PCRE2_UCP) != 0;
635 #else
636 BOOL utf = FALSE;  /* Required for convenience even when no Unicode support */
637 #endif
638 
639 /* This is the length of the last part of a backtracking frame that must be
640 copied when a new frame is created. */
641 
642 frame_copy_size = frame_size - offsetof(heapframe, eptr);
643 
644 /* Set up the first frame and the end of the frames vector. */
645 
646 F = match_data->heapframes;
647 frames_top = (heapframe *)((char *)F + match_data->heapframes_size);
648 
649 Frdepth = 0;                        /* "Recursion" depth */
650 Fcapture_last = 0;                  /* Number of most recent capture */
651 Fcurrent_recurse = RECURSE_UNSET;   /* Not pattern recursing. */
652 Fstart_match = Feptr = start_eptr;  /* Current data pointer and start match */
653 Fmark = NULL;                       /* Most recent mark */
654 Foffset_top = 0;                    /* End of captures within the frame */
655 Flast_group_offset = PCRE2_UNSET;   /* Saved frame of most recent group */
656 group_frame_type = 0;               /* Not a start of group frame */
657 goto NEW_FRAME;                     /* Start processing with this frame */
658 
659 /* Come back here when we want to create a new frame for remembering a
660 backtracking point. */
661 
662 MATCH_RECURSE:
663 
664 /* Set up a new backtracking frame. If the vector is full, get a new one,
665 doubling the size, but constrained by the heap limit (which is in KiB). */
666 
667 N = (heapframe *)((char *)F + frame_size);
668 if ((heapframe *)((char *)N + frame_size) >= frames_top)
669   {
670   heapframe *new;
671   PCRE2_SIZE newsize;
672   PCRE2_SIZE usedsize = (char *)N - (char *)(match_data->heapframes);
673 
674   if (match_data->heapframes_size >= PCRE2_SIZE_MAX / 2)
675     {
676     if (match_data->heapframes_size == PCRE2_SIZE_MAX - 1)
677       return PCRE2_ERROR_NOMEMORY;
678     newsize = PCRE2_SIZE_MAX - 1;
679     }
680   else
681     newsize = match_data->heapframes_size * 2;
682 
683   if (newsize / 1024 >= mb->heap_limit)
684     {
685     PCRE2_SIZE old_size = match_data->heapframes_size / 1024;
686     if (mb->heap_limit <= old_size)
687       return PCRE2_ERROR_HEAPLIMIT;
688     else
689       {
690       PCRE2_SIZE max_delta = 1024 * (mb->heap_limit - old_size);
691       int over_bytes = match_data->heapframes_size % 1024;
692       if (over_bytes) max_delta -= (1024 - over_bytes);
693       newsize = match_data->heapframes_size + max_delta;
694       }
695     }
696 
697   /* With a heap limit set, the permitted additional size may not be enough for
698   another frame, so do a final check. */
699 
700   if (newsize - usedsize < frame_size) return PCRE2_ERROR_HEAPLIMIT;
701   new = match_data->memctl.malloc(newsize, match_data->memctl.memory_data);
702   if (new == NULL) return PCRE2_ERROR_NOMEMORY;
703   memcpy(new, match_data->heapframes, usedsize);
704 
705   N = (heapframe *)((char *)new + usedsize);
706   F = (heapframe *)((char *)N - frame_size);
707 
708   match_data->memctl.free(match_data->heapframes, match_data->memctl.memory_data);
709   match_data->heapframes = new;
710   match_data->heapframes_size = newsize;
711   frames_top = (heapframe *)((char *)new + newsize);
712   }
713 
714 #ifdef DEBUG_SHOW_RMATCH
715 fprintf(stderr, "++ RMATCH %d frame=%d", Freturn_id, Frdepth + 1);
716 if (group_frame_type != 0)
717   {
718   fprintf(stderr, " type=%x ", group_frame_type);
719   switch (GF_IDMASK(group_frame_type))
720     {
721     case GF_CAPTURE:
722     fprintf(stderr, "capture=%d", GF_DATAMASK(group_frame_type));
723     break;
724 
725     case GF_NOCAPTURE:
726     fprintf(stderr, "nocapture op=%d", GF_DATAMASK(group_frame_type));
727     break;
728 
729     case GF_CONDASSERT:
730     fprintf(stderr, "condassert op=%d", GF_DATAMASK(group_frame_type));
731     break;
732 
733     case GF_RECURSE:
734     fprintf(stderr, "recurse=%d", GF_DATAMASK(group_frame_type));
735     break;
736 
737     default:
738     fprintf(stderr, "*** unknown ***");
739     break;
740     }
741   }
742 fprintf(stderr, "\n");
743 #endif
744 
745 /* Copy those fields that must be copied into the new frame, increase the
746 "recursion" depth (i.e. the new frame's index) and then make the new frame
747 current. */
748 
749 memcpy((char *)N + offsetof(heapframe, eptr),
750        (char *)F + offsetof(heapframe, eptr),
751        frame_copy_size);
752 
753 N->rdepth = Frdepth + 1;
754 F = N;
755 
756 /* Carry on processing with a new frame. */
757 
758 NEW_FRAME:
759 Fgroup_frame_type = group_frame_type;
760 Fecode = start_ecode;      /* Starting code pointer */
761 Fback_frame = frame_size;  /* Default is go back one frame */
762 
763 /* If this is a special type of group frame, remember its offset for quick
764 access at the end of the group. If this is a recursion, set a new current
765 recursion value. */
766 
767 if (group_frame_type != 0)
768   {
769   Flast_group_offset = (char *)F - (char *)match_data->heapframes;
770   if (GF_IDMASK(group_frame_type) == GF_RECURSE)
771     Fcurrent_recurse = GF_DATAMASK(group_frame_type);
772   group_frame_type = 0;
773   }
774 
775 
776 /* ========================================================================= */
777 /* This is the main processing loop. First check that we haven't recorded too
778 many backtracks (search tree is too large), or that we haven't exceeded the
779 recursive depth limit (used too many backtracking frames). If not, process the
780 opcodes. */
781 
782 if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
783 if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
784 
785 #ifdef DEBUG_SHOW_OPS
786 fprintf(stderr, "\n++ New frame: type=0x%x subject offset %ld\n",
787   GF_IDMASK(Fgroup_frame_type), Feptr - mb->start_subject);
788 #endif
789 
790 for (;;)
791   {
792 #ifdef DEBUG_SHOW_OPS
793 fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
794   OP_names[*Fecode]);
795 #endif
796 
797   Fop = (uint8_t)(*Fecode);  /* Cast needed for 16-bit and 32-bit modes */
798   switch(Fop)
799     {
800     /* ===================================================================== */
801     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, to close
802     any currently open capturing brackets. Unlike reaching the end of a group,
803     where we know the starting frame is at the top of the chained frames, in
804     this case we have to search back for the relevant frame in case other types
805     of group that use chained frames have intervened. Multiple OP_CLOSEs always
806     come innermost first, which matches the chain order. We can ignore this in
807     a recursion, because captures are not passed out of recursions. */
808 
809     case OP_CLOSE:
810     if (Fcurrent_recurse == RECURSE_UNSET)
811       {
812       number = GET2(Fecode, 1);
813       offset = Flast_group_offset;
814       for(;;)
815         {
816         if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
817         N = (heapframe *)((char *)match_data->heapframes + offset);
818         P = (heapframe *)((char *)N - frame_size);
819         if (N->group_frame_type == (GF_CAPTURE | number)) break;
820         offset = P->last_group_offset;
821         }
822       offset = (number << 1) - 2;
823       Fcapture_last = number;
824       Fovector[offset] = P->eptr - mb->start_subject;
825       Fovector[offset+1] = Feptr - mb->start_subject;
826       if (offset >= Foffset_top) Foffset_top = offset + 2;
827       }
828     Fecode += PRIV(OP_lengths)[*Fecode];
829     break;
830 
831 
832     /* ===================================================================== */
833     /* Real or forced end of the pattern, assertion, or recursion. In an
834     assertion ACCEPT, update the last used pointer and remember the current
835     frame so that the captures and mark can be fished out of it. */
836 
837     case OP_ASSERT_ACCEPT:
838     if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
839     assert_accept_frame = F;
840     RRETURN(MATCH_ACCEPT);
841 
842     /* For ACCEPT within a recursion, we have to find the most recent
843     recursion. If not in a recursion, fall through to code that is common with
844     OP_END. */
845 
846     case OP_ACCEPT:
847     if (Fcurrent_recurse != RECURSE_UNSET)
848       {
849 #ifdef DEBUG_SHOW_OPS
850       fprintf(stderr, "++ Accept within recursion\n");
851 #endif
852       offset = Flast_group_offset;
853       for(;;)
854         {
855         if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
856         N = (heapframe *)((char *)match_data->heapframes + offset);
857         P = (heapframe *)((char *)N - frame_size);
858         if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break;
859         offset = P->last_group_offset;
860         }
861 
862       /* N is now the frame of the recursion; the previous frame is at the
863       OP_RECURSE position. Go back there, copying the current subject position
864       and mark, and the start_match position (\K might have changed it), and
865       then move on past the OP_RECURSE. */
866 
867       P->eptr = Feptr;
868       P->mark = Fmark;
869       P->start_match = Fstart_match;
870       F = P;
871       Fecode += 1 + LINK_SIZE;
872       continue;
873       }
874     /* Fall through */
875 
876     /* OP_END itself can never be reached within a recursion because that is
877     picked up when the OP_KET that always precedes OP_END is reached. */
878 
879     case OP_END:
880 
881     /* Fail for an empty string match if either PCRE2_NOTEMPTY is set, or if
882     PCRE2_NOTEMPTY_ATSTART is set and we have matched at the start of the
883     subject. In both cases, backtracking will then try other alternatives, if
884     any. */
885 
886     if (Feptr == Fstart_match &&
887          ((mb->moptions & PCRE2_NOTEMPTY) != 0 ||
888            ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 &&
889              Fstart_match == mb->start_subject + mb->start_offset)))
890       {
891 #ifdef DEBUG_SHOW_OPS
892       fprintf(stderr, "++ Backtrack because empty string\n");
893 #endif
894       RRETURN(MATCH_NOMATCH);
895       }
896 
897     /* Fail if PCRE2_ENDANCHORED is set and the end of the match is not
898     the end of the subject. After (*ACCEPT) we fail the entire match (at this
899     position) but backtrack if we've reached the end of the pattern. This
900     applies whether or not we are in a recursion. */
901 
902     if (Feptr < mb->end_subject &&
903         ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0)
904       {
905       if (Fop == OP_END)
906         {
907 #ifdef DEBUG_SHOW_OPS
908         fprintf(stderr, "++ Backtrack because not at end (endanchored set)\n");
909 #endif
910         RRETURN(MATCH_NOMATCH);
911         }
912 
913 #ifdef DEBUG_SHOW_OPS
914       fprintf(stderr, "++ Failed ACCEPT not at end (endanchnored set)\n");
915 #endif
916       return MATCH_NOMATCH;   /* (*ACCEPT) */
917       }
918 
919     /* We have a successful match of the whole pattern. Record the result and
920     then do a direct return from the function. If there is space in the offset
921     vector, set any pairs that follow the highest-numbered captured string but
922     are less than the number of capturing groups in the pattern to PCRE2_UNSET.
923     It is documented that this happens. "Gaps" are set to PCRE2_UNSET
924     dynamically. It is only those at the end that need setting here. */
925 
926     mb->end_match_ptr = Feptr;           /* Record where we ended */
927     mb->end_offset_top = Foffset_top;    /* and how many extracts were taken */
928     mb->mark = Fmark;                    /* and the last success mark */
929     if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
930 
931     match_data->ovector[0] = Fstart_match - mb->start_subject;
932     match_data->ovector[1] = Feptr - mb->start_subject;
933 
934     /* Set i to the smaller of the sizes of the external and frame ovectors. */
935 
936     i = 2 * ((top_bracket + 1 > match_data->oveccount)?
937       match_data->oveccount : top_bracket + 1);
938     memcpy(match_data->ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
939     while (--i >= Foffset_top + 2) match_data->ovector[i] = PCRE2_UNSET;
940     return MATCH_MATCH;  /* Note: NOT RRETURN */
941 
942 
943     /*===================================================================== */
944     /* Match any single character type except newline; have to take care with
945     CRLF newlines and partial matching. */
946 
947     case OP_ANY:
948     if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
949     if (mb->partial != 0 &&
950         Feptr == mb->end_subject - 1 &&
951         NLBLOCK->nltype == NLTYPE_FIXED &&
952         NLBLOCK->nllen == 2 &&
953         UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
954       {
955       mb->hitend = TRUE;
956       if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
957       }
958     /* Fall through */
959 
960     /* Match any single character whatsoever. */
961 
962     case OP_ALLANY:
963     if (Feptr >= mb->end_subject)  /* DO NOT merge the Feptr++ here; it must */
964       {                            /* not be updated before SCHECK_PARTIAL. */
965       SCHECK_PARTIAL();
966       RRETURN(MATCH_NOMATCH);
967       }
968     Feptr++;
969 #ifdef SUPPORT_UNICODE
970     if (utf) ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
971 #endif
972     Fecode++;
973     break;
974 
975 
976     /* ===================================================================== */
977     /* Match a single code unit, even in UTF mode. This opcode really does
978     match any code unit, even newline. (It really should be called ANYCODEUNIT,
979     of course - the byte name is from pre-16 bit days.) */
980 
981     case OP_ANYBYTE:
982     if (Feptr >= mb->end_subject)   /* DO NOT merge the Feptr++ here; it must */
983       {                             /* not be updated before SCHECK_PARTIAL. */
984       SCHECK_PARTIAL();
985       RRETURN(MATCH_NOMATCH);
986       }
987     Feptr++;
988     Fecode++;
989     break;
990 
991 
992     /* ===================================================================== */
993     /* Match a single character, casefully */
994 
995     case OP_CHAR:
996 #ifdef SUPPORT_UNICODE
997     if (utf)
998       {
999       Flength = 1;
1000       Fecode++;
1001       GETCHARLEN(fc, Fecode, Flength);
1002       if (Flength > (PCRE2_SIZE)(mb->end_subject - Feptr))
1003         {
1004         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
1005         RRETURN(MATCH_NOMATCH);
1006         }
1007       for (; Flength > 0; Flength--)
1008         {
1009         if (*Fecode++ != UCHAR21INC(Feptr)) RRETURN(MATCH_NOMATCH);
1010         }
1011       }
1012     else
1013 #endif
1014 
1015     /* Not UTF mode */
1016       {
1017       if (mb->end_subject - Feptr < 1)
1018         {
1019         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
1020         RRETURN(MATCH_NOMATCH);
1021         }
1022       if (Fecode[1] != *Feptr++) RRETURN(MATCH_NOMATCH);
1023       Fecode += 2;
1024       }
1025     break;
1026 
1027 
1028     /* ===================================================================== */
1029     /* Match a single character, caselessly. If we are at the end of the
1030     subject, give up immediately. We get here only when the pattern character
1031     has at most one other case. Characters with more than two cases are coded
1032     as OP_PROP with the pseudo-property PT_CLIST. */
1033 
1034     case OP_CHARI:
1035     if (Feptr >= mb->end_subject)
1036       {
1037       SCHECK_PARTIAL();
1038       RRETURN(MATCH_NOMATCH);
1039       }
1040 
1041 #ifdef SUPPORT_UNICODE
1042     if (utf)
1043       {
1044       Flength = 1;
1045       Fecode++;
1046       GETCHARLEN(fc, Fecode, Flength);
1047 
1048       /* If the pattern character's value is < 128, we know that its other case
1049       (if any) is also < 128 (and therefore only one code unit long in all
1050       code-unit widths), so we can use the fast lookup table. We checked above
1051       that there is at least one character left in the subject. */
1052 
1053       if (fc < 128)
1054         {
1055         uint32_t cc = UCHAR21(Feptr);
1056         if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
1057         Fecode++;
1058         Feptr++;
1059         }
1060 
1061       /* Otherwise we must pick up the subject character and use Unicode
1062       property support to test its other case. Note that we cannot use the
1063       value of "Flength" to check for sufficient bytes left, because the other
1064       case of the character may have more or fewer code units. */
1065 
1066       else
1067         {
1068         uint32_t dc;
1069         GETCHARINC(dc, Feptr);
1070         Fecode += Flength;
1071         if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1072         }
1073       }
1074 
1075     /* If UCP is set without UTF we must do the same as above, but with one
1076     character per code unit. */
1077 
1078     else if (ucp)
1079       {
1080       uint32_t cc = UCHAR21(Feptr);
1081       fc = Fecode[1];
1082       if (fc < 128)
1083         {
1084         if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
1085         }
1086       else
1087         {
1088         if (cc != fc && cc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1089         }
1090       Feptr++;
1091       Fecode += 2;
1092       }
1093 
1094     else
1095 #endif   /* SUPPORT_UNICODE */
1096 
1097     /* Not UTF or UCP mode; use the table for characters < 256. */
1098       {
1099       if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
1100           != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);
1101       Feptr++;
1102       Fecode += 2;
1103       }
1104     break;
1105 
1106 
1107     /* ===================================================================== */
1108     /* Match not a single character. */
1109 
1110     case OP_NOT:
1111     case OP_NOTI:
1112     if (Feptr >= mb->end_subject)
1113       {
1114       SCHECK_PARTIAL();
1115       RRETURN(MATCH_NOMATCH);
1116       }
1117 
1118 #ifdef SUPPORT_UNICODE
1119     if (utf)
1120       {
1121       uint32_t ch;
1122       Fecode++;
1123       GETCHARINC(ch, Fecode);
1124       GETCHARINC(fc, Feptr);
1125       if (ch == fc)
1126         {
1127         RRETURN(MATCH_NOMATCH);  /* Caseful match */
1128         }
1129       else if (Fop == OP_NOTI)   /* If caseless */
1130         {
1131         if (ch > 127)
1132           ch = UCD_OTHERCASE(ch);
1133         else
1134           ch = (mb->fcc)[ch];
1135         if (ch == fc) RRETURN(MATCH_NOMATCH);
1136         }
1137       }
1138 
1139     /* UCP without UTF is as above, but with one character per code unit. */
1140 
1141     else if (ucp)
1142       {
1143       uint32_t ch;
1144       fc = UCHAR21INC(Feptr);
1145       ch = Fecode[1];
1146       Fecode += 2;
1147 
1148       if (ch == fc)
1149         {
1150         RRETURN(MATCH_NOMATCH);  /* Caseful match */
1151         }
1152       else if (Fop == OP_NOTI)   /* If caseless */
1153         {
1154         if (ch > 127)
1155           ch = UCD_OTHERCASE(ch);
1156         else
1157           ch = (mb->fcc)[ch];
1158         if (ch == fc) RRETURN(MATCH_NOMATCH);
1159         }
1160       }
1161 
1162     else
1163 #endif  /* SUPPORT_UNICODE */
1164 
1165     /* Neither UTF nor UCP is set */
1166 
1167       {
1168       uint32_t ch = Fecode[1];
1169       fc = UCHAR21INC(Feptr);
1170       if (ch == fc || (Fop == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == fc))
1171         RRETURN(MATCH_NOMATCH);
1172       Fecode += 2;
1173       }
1174     break;
1175 
1176 
1177     /* ===================================================================== */
1178     /* Match a single character repeatedly. */
1179 
1180 #define Loclength    F->temp_size
1181 #define Lstart_eptr  F->temp_sptr[0]
1182 #define Lcharptr     F->temp_sptr[1]
1183 #define Lmin         F->temp_32[0]
1184 #define Lmax         F->temp_32[1]
1185 #define Lc           F->temp_32[2]
1186 #define Loc          F->temp_32[3]
1187 
1188     case OP_EXACT:
1189     case OP_EXACTI:
1190     Lmin = Lmax = GET2(Fecode, 1);
1191     Fecode += 1 + IMM2_SIZE;
1192     goto REPEATCHAR;
1193 
1194     case OP_POSUPTO:
1195     case OP_POSUPTOI:
1196     reptype = REPTYPE_POS;
1197     Lmin = 0;
1198     Lmax = GET2(Fecode, 1);
1199     Fecode += 1 + IMM2_SIZE;
1200     goto REPEATCHAR;
1201 
1202     case OP_UPTO:
1203     case OP_UPTOI:
1204     reptype = REPTYPE_MAX;
1205     Lmin = 0;
1206     Lmax = GET2(Fecode, 1);
1207     Fecode += 1 + IMM2_SIZE;
1208     goto REPEATCHAR;
1209 
1210     case OP_MINUPTO:
1211     case OP_MINUPTOI:
1212     reptype = REPTYPE_MIN;
1213     Lmin = 0;
1214     Lmax = GET2(Fecode, 1);
1215     Fecode += 1 + IMM2_SIZE;
1216     goto REPEATCHAR;
1217 
1218     case OP_POSSTAR:
1219     case OP_POSSTARI:
1220     reptype = REPTYPE_POS;
1221     Lmin = 0;
1222     Lmax = UINT32_MAX;
1223     Fecode++;
1224     goto REPEATCHAR;
1225 
1226     case OP_POSPLUS:
1227     case OP_POSPLUSI:
1228     reptype = REPTYPE_POS;
1229     Lmin = 1;
1230     Lmax = UINT32_MAX;
1231     Fecode++;
1232     goto REPEATCHAR;
1233 
1234     case OP_POSQUERY:
1235     case OP_POSQUERYI:
1236     reptype = REPTYPE_POS;
1237     Lmin = 0;
1238     Lmax = 1;
1239     Fecode++;
1240     goto REPEATCHAR;
1241 
1242     case OP_STAR:
1243     case OP_STARI:
1244     case OP_MINSTAR:
1245     case OP_MINSTARI:
1246     case OP_PLUS:
1247     case OP_PLUSI:
1248     case OP_MINPLUS:
1249     case OP_MINPLUSI:
1250     case OP_QUERY:
1251     case OP_QUERYI:
1252     case OP_MINQUERY:
1253     case OP_MINQUERYI:
1254     fc = *Fecode++ - ((Fop < OP_STARI)? OP_STAR : OP_STARI);
1255     Lmin = rep_min[fc];
1256     Lmax = rep_max[fc];
1257     reptype = rep_typ[fc];
1258 
1259     /* Common code for all repeated single-character matches. We first check
1260     for the minimum number of characters. If the minimum equals the maximum, we
1261     are done. Otherwise, if minimizing, check the rest of the pattern for a
1262     match; if there isn't one, advance up to the maximum, one character at a
1263     time.
1264 
1265     If maximizing, advance up to the maximum number of matching characters,
1266     until Feptr is past the end of the maximum run. If possessive, we are
1267     then done (no backing up). Otherwise, match at this position; anything
1268     other than no match is immediately returned. For nomatch, back up one
1269     character, unless we are matching \R and the last thing matched was
1270     \r\n, in which case, back up two code units until we reach the first
1271     optional character position.
1272 
1273     The various UTF/non-UTF and caseful/caseless cases are handled separately,
1274     for speed. */
1275 
1276     REPEATCHAR:
1277 #ifdef SUPPORT_UNICODE
1278     if (utf)
1279       {
1280       Flength = 1;
1281       Lcharptr = Fecode;
1282       GETCHARLEN(fc, Fecode, Flength);
1283       Fecode += Flength;
1284 
1285       /* Handle multi-code-unit character matching, caseful and caseless. */
1286 
1287       if (Flength > 1)
1288         {
1289         uint32_t othercase;
1290 
1291         if (Fop >= OP_STARI &&     /* Caseless */
1292             (othercase = UCD_OTHERCASE(fc)) != fc)
1293           Loclength = PRIV(ord2utf)(othercase, Foccu);
1294         else Loclength = 0;
1295 
1296         for (i = 1; i <= Lmin; i++)
1297           {
1298           if (Feptr <= mb->end_subject - Flength &&
1299             memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1300           else if (Loclength > 0 &&
1301                    Feptr <= mb->end_subject - Loclength &&
1302                    memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1303             Feptr += Loclength;
1304           else
1305             {
1306             CHECK_PARTIAL();
1307             RRETURN(MATCH_NOMATCH);
1308             }
1309           }
1310 
1311         if (Lmin == Lmax) continue;
1312 
1313         if (reptype == REPTYPE_MIN)
1314           {
1315           for (;;)
1316             {
1317             RMATCH(Fecode, RM202);
1318             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1319             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1320             if (Feptr <= mb->end_subject - Flength &&
1321               memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1322             else if (Loclength > 0 &&
1323                      Feptr <= mb->end_subject - Loclength &&
1324                      memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1325               Feptr += Loclength;
1326             else
1327               {
1328               CHECK_PARTIAL();
1329               RRETURN(MATCH_NOMATCH);
1330               }
1331             }
1332           /* Control never gets here */
1333           }
1334 
1335         else  /* Maximize */
1336           {
1337           Lstart_eptr = Feptr;
1338           for (i = Lmin; i < Lmax; i++)
1339             {
1340             if (Feptr <= mb->end_subject - Flength &&
1341                 memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0)
1342               Feptr += Flength;
1343             else if (Loclength > 0 &&
1344                      Feptr <= mb->end_subject - Loclength &&
1345                      memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1346               Feptr += Loclength;
1347             else
1348               {
1349               CHECK_PARTIAL();
1350               break;
1351               }
1352             }
1353 
1354           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1355           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1356           go too far. */
1357 
1358           if (reptype != REPTYPE_POS) for(;;)
1359             {
1360             if (Feptr <= Lstart_eptr) break;
1361             RMATCH(Fecode, RM203);
1362             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1363             Feptr--;
1364             BACKCHAR(Feptr);
1365             }
1366           }
1367         break;   /* End of repeated wide character handling */
1368         }
1369 
1370       /* Length of UTF character is 1. Put it into the preserved variable and
1371       fall through to the non-UTF code. */
1372 
1373       Lc = fc;
1374       }
1375     else
1376 #endif  /* SUPPORT_UNICODE */
1377 
1378     /* When not in UTF mode, load a single-code-unit character. Then proceed as
1379     above, using Unicode casing if either UTF or UCP is set. */
1380 
1381     Lc = *Fecode++;
1382 
1383     /* Caseless comparison */
1384 
1385     if (Fop >= OP_STARI)
1386       {
1387 #if PCRE2_CODE_UNIT_WIDTH == 8
1388 #ifdef SUPPORT_UNICODE
1389       if (ucp && !utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1390       else
1391 #endif  /* SUPPORT_UNICODE */
1392       /* Lc will be < 128 in UTF-8 mode. */
1393       Loc = mb->fcc[Lc];
1394 #else /* 16-bit & 32-bit */
1395 #ifdef SUPPORT_UNICODE
1396       if ((utf || ucp) && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1397       else
1398 #endif  /* SUPPORT_UNICODE */
1399       Loc = TABLE_GET(Lc, mb->fcc, Lc);
1400 #endif  /* PCRE2_CODE_UNIT_WIDTH == 8 */
1401 
1402       for (i = 1; i <= Lmin; i++)
1403         {
1404         uint32_t cc;                 /* Faster than PCRE2_UCHAR */
1405         if (Feptr >= mb->end_subject)
1406           {
1407           SCHECK_PARTIAL();
1408           RRETURN(MATCH_NOMATCH);
1409           }
1410         cc = UCHAR21TEST(Feptr);
1411         if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1412         Feptr++;
1413         }
1414       if (Lmin == Lmax) continue;
1415 
1416       if (reptype == REPTYPE_MIN)
1417         {
1418         for (;;)
1419           {
1420           uint32_t cc;               /* Faster than PCRE2_UCHAR */
1421           RMATCH(Fecode, RM25);
1422           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1423           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1424           if (Feptr >= mb->end_subject)
1425             {
1426             SCHECK_PARTIAL();
1427             RRETURN(MATCH_NOMATCH);
1428             }
1429           cc = UCHAR21TEST(Feptr);
1430           if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1431           Feptr++;
1432           }
1433         /* Control never gets here */
1434         }
1435 
1436       else  /* Maximize */
1437         {
1438         Lstart_eptr = Feptr;
1439         for (i = Lmin; i < Lmax; i++)
1440           {
1441           uint32_t cc;               /* Faster than PCRE2_UCHAR */
1442           if (Feptr >= mb->end_subject)
1443             {
1444             SCHECK_PARTIAL();
1445             break;
1446             }
1447           cc = UCHAR21TEST(Feptr);
1448           if (Lc != cc && Loc != cc) break;
1449           Feptr++;
1450           }
1451         if (reptype != REPTYPE_POS) for (;;)
1452           {
1453           if (Feptr == Lstart_eptr) break;
1454           RMATCH(Fecode, RM26);
1455           Feptr--;
1456           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1457           }
1458         }
1459       }
1460 
1461     /* Caseful comparisons (includes all multi-byte characters) */
1462 
1463     else
1464       {
1465       for (i = 1; i <= Lmin; i++)
1466         {
1467         if (Feptr >= mb->end_subject)
1468           {
1469           SCHECK_PARTIAL();
1470           RRETURN(MATCH_NOMATCH);
1471           }
1472         if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1473         }
1474 
1475       if (Lmin == Lmax) continue;
1476 
1477       if (reptype == REPTYPE_MIN)
1478         {
1479         for (;;)
1480           {
1481           RMATCH(Fecode, RM27);
1482           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1483           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1484           if (Feptr >= mb->end_subject)
1485             {
1486             SCHECK_PARTIAL();
1487             RRETURN(MATCH_NOMATCH);
1488             }
1489           if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1490           }
1491         /* Control never gets here */
1492         }
1493       else  /* Maximize */
1494         {
1495         Lstart_eptr = Feptr;
1496         for (i = Lmin; i < Lmax; i++)
1497           {
1498           if (Feptr >= mb->end_subject)
1499             {
1500             SCHECK_PARTIAL();
1501             break;
1502             }
1503 
1504           if (Lc != UCHAR21TEST(Feptr)) break;
1505           Feptr++;
1506           }
1507 
1508         if (reptype != REPTYPE_POS) for (;;)
1509           {
1510           if (Feptr <= Lstart_eptr) break;
1511           RMATCH(Fecode, RM28);
1512           Feptr--;
1513           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1514           }
1515         }
1516       }
1517     break;
1518 
1519 #undef Loclength
1520 #undef Lstart_eptr
1521 #undef Lcharptr
1522 #undef Lmin
1523 #undef Lmax
1524 #undef Lc
1525 #undef Loc
1526 
1527 
1528     /* ===================================================================== */
1529     /* Match a negated single one-byte character repeatedly. This is almost a
1530     repeat of the code for a repeated single character, but I haven't found a
1531     nice way of commoning these up that doesn't require a test of the
1532     positive/negative option for each character match. Maybe that wouldn't add
1533     very much to the time taken, but character matching *is* what this is all
1534     about... */
1535 
1536 #define Lstart_eptr  F->temp_sptr[0]
1537 #define Lmin         F->temp_32[0]
1538 #define Lmax         F->temp_32[1]
1539 #define Lc           F->temp_32[2]
1540 #define Loc          F->temp_32[3]
1541 
1542     case OP_NOTEXACT:
1543     case OP_NOTEXACTI:
1544     Lmin = Lmax = GET2(Fecode, 1);
1545     Fecode += 1 + IMM2_SIZE;
1546     goto REPEATNOTCHAR;
1547 
1548     case OP_NOTUPTO:
1549     case OP_NOTUPTOI:
1550     Lmin = 0;
1551     Lmax = GET2(Fecode, 1);
1552     reptype = REPTYPE_MAX;
1553     Fecode += 1 + IMM2_SIZE;
1554     goto REPEATNOTCHAR;
1555 
1556     case OP_NOTMINUPTO:
1557     case OP_NOTMINUPTOI:
1558     Lmin = 0;
1559     Lmax = GET2(Fecode, 1);
1560     reptype = REPTYPE_MIN;
1561     Fecode += 1 + IMM2_SIZE;
1562     goto REPEATNOTCHAR;
1563 
1564     case OP_NOTPOSSTAR:
1565     case OP_NOTPOSSTARI:
1566     reptype = REPTYPE_POS;
1567     Lmin = 0;
1568     Lmax = UINT32_MAX;
1569     Fecode++;
1570     goto REPEATNOTCHAR;
1571 
1572     case OP_NOTPOSPLUS:
1573     case OP_NOTPOSPLUSI:
1574     reptype = REPTYPE_POS;
1575     Lmin = 1;
1576     Lmax = UINT32_MAX;
1577     Fecode++;
1578     goto REPEATNOTCHAR;
1579 
1580     case OP_NOTPOSQUERY:
1581     case OP_NOTPOSQUERYI:
1582     reptype = REPTYPE_POS;
1583     Lmin = 0;
1584     Lmax = 1;
1585     Fecode++;
1586     goto REPEATNOTCHAR;
1587 
1588     case OP_NOTPOSUPTO:
1589     case OP_NOTPOSUPTOI:
1590     reptype = REPTYPE_POS;
1591     Lmin = 0;
1592     Lmax = GET2(Fecode, 1);
1593     Fecode += 1 + IMM2_SIZE;
1594     goto REPEATNOTCHAR;
1595 
1596     case OP_NOTSTAR:
1597     case OP_NOTSTARI:
1598     case OP_NOTMINSTAR:
1599     case OP_NOTMINSTARI:
1600     case OP_NOTPLUS:
1601     case OP_NOTPLUSI:
1602     case OP_NOTMINPLUS:
1603     case OP_NOTMINPLUSI:
1604     case OP_NOTQUERY:
1605     case OP_NOTQUERYI:
1606     case OP_NOTMINQUERY:
1607     case OP_NOTMINQUERYI:
1608     fc = *Fecode++ - ((Fop >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
1609     Lmin = rep_min[fc];
1610     Lmax = rep_max[fc];
1611     reptype = rep_typ[fc];
1612 
1613     /* Common code for all repeated single-character non-matches. */
1614 
1615     REPEATNOTCHAR:
1616     GETCHARINCTEST(Lc, Fecode);
1617 
1618     /* The code is duplicated for the caseless and caseful cases, for speed,
1619     since matching characters is likely to be quite common. First, ensure the
1620     minimum number of matches are present. If Lmin = Lmax, we are done.
1621     Otherwise, if minimizing, keep trying the rest of the expression and
1622     advancing one matching character if failing, up to the maximum.
1623     Alternatively, if maximizing, find the maximum number of characters and
1624     work backwards. */
1625 
1626     if (Fop >= OP_NOTSTARI)     /* Caseless */
1627       {
1628 #ifdef SUPPORT_UNICODE
1629       if ((utf || ucp) && Lc > 127)
1630         Loc = UCD_OTHERCASE(Lc);
1631       else
1632 #endif /* SUPPORT_UNICODE */
1633 
1634       Loc = TABLE_GET(Lc, mb->fcc, Lc);  /* Other case from table */
1635 
1636 #ifdef SUPPORT_UNICODE
1637       if (utf)
1638         {
1639         uint32_t d;
1640         for (i = 1; i <= Lmin; i++)
1641           {
1642           if (Feptr >= mb->end_subject)
1643             {
1644             SCHECK_PARTIAL();
1645             RRETURN(MATCH_NOMATCH);
1646             }
1647           GETCHARINC(d, Feptr);
1648           if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1649           }
1650         }
1651       else
1652 #endif  /* SUPPORT_UNICODE */
1653 
1654       /* Not UTF mode */
1655         {
1656         for (i = 1; i <= Lmin; i++)
1657           {
1658           if (Feptr >= mb->end_subject)
1659             {
1660             SCHECK_PARTIAL();
1661             RRETURN(MATCH_NOMATCH);
1662             }
1663           if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1664           Feptr++;
1665           }
1666         }
1667 
1668       if (Lmin == Lmax) continue;  /* Finished for exact count */
1669 
1670       if (reptype == REPTYPE_MIN)
1671         {
1672 #ifdef SUPPORT_UNICODE
1673         if (utf)
1674           {
1675           uint32_t d;
1676           for (;;)
1677             {
1678             RMATCH(Fecode, RM204);
1679             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1680             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1681             if (Feptr >= mb->end_subject)
1682               {
1683               SCHECK_PARTIAL();
1684               RRETURN(MATCH_NOMATCH);
1685               }
1686             GETCHARINC(d, Feptr);
1687             if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1688             }
1689           }
1690         else
1691 #endif  /*SUPPORT_UNICODE */
1692 
1693         /* Not UTF mode */
1694           {
1695           for (;;)
1696             {
1697             RMATCH(Fecode, RM29);
1698             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1699             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1700             if (Feptr >= mb->end_subject)
1701               {
1702               SCHECK_PARTIAL();
1703               RRETURN(MATCH_NOMATCH);
1704               }
1705             if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1706             Feptr++;
1707             }
1708           }
1709         /* Control never gets here */
1710         }
1711 
1712       /* Maximize case */
1713 
1714       else
1715         {
1716         Lstart_eptr = Feptr;
1717 
1718 #ifdef SUPPORT_UNICODE
1719         if (utf)
1720           {
1721           uint32_t d;
1722           for (i = Lmin; i < Lmax; i++)
1723             {
1724             int len = 1;
1725             if (Feptr >= mb->end_subject)
1726               {
1727               SCHECK_PARTIAL();
1728               break;
1729               }
1730             GETCHARLEN(d, Feptr, len);
1731             if (Lc == d || Loc == d) break;
1732             Feptr += len;
1733             }
1734 
1735           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1736           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1737           go too far. */
1738 
1739           if (reptype != REPTYPE_POS) for(;;)
1740             {
1741             if (Feptr <= Lstart_eptr) break;
1742             RMATCH(Fecode, RM205);
1743             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1744             Feptr--;
1745             BACKCHAR(Feptr);
1746             }
1747           }
1748         else
1749 #endif  /* SUPPORT_UNICODE */
1750 
1751         /* Not UTF mode */
1752           {
1753           for (i = Lmin; i < Lmax; i++)
1754             {
1755             if (Feptr >= mb->end_subject)
1756               {
1757               SCHECK_PARTIAL();
1758               break;
1759               }
1760             if (Lc == *Feptr || Loc == *Feptr) break;
1761             Feptr++;
1762             }
1763           if (reptype != REPTYPE_POS) for (;;)
1764             {
1765             if (Feptr == Lstart_eptr) break;
1766             RMATCH(Fecode, RM30);
1767             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1768             Feptr--;
1769             }
1770           }
1771         }
1772       }
1773 
1774     /* Caseful comparisons */
1775 
1776     else
1777       {
1778 #ifdef SUPPORT_UNICODE
1779       if (utf)
1780         {
1781         uint32_t d;
1782         for (i = 1; i <= Lmin; i++)
1783           {
1784           if (Feptr >= mb->end_subject)
1785             {
1786             SCHECK_PARTIAL();
1787             RRETURN(MATCH_NOMATCH);
1788             }
1789           GETCHARINC(d, Feptr);
1790           if (Lc == d) RRETURN(MATCH_NOMATCH);
1791           }
1792         }
1793       else
1794 #endif
1795       /* Not UTF mode */
1796         {
1797         for (i = 1; i <= Lmin; i++)
1798           {
1799           if (Feptr >= mb->end_subject)
1800             {
1801             SCHECK_PARTIAL();
1802             RRETURN(MATCH_NOMATCH);
1803             }
1804           if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1805           }
1806         }
1807 
1808       if (Lmin == Lmax) continue;
1809 
1810       if (reptype == REPTYPE_MIN)
1811         {
1812 #ifdef SUPPORT_UNICODE
1813         if (utf)
1814           {
1815           uint32_t d;
1816           for (;;)
1817             {
1818             RMATCH(Fecode, RM206);
1819             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1820             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1821             if (Feptr >= mb->end_subject)
1822               {
1823               SCHECK_PARTIAL();
1824               RRETURN(MATCH_NOMATCH);
1825               }
1826             GETCHARINC(d, Feptr);
1827             if (Lc == d) RRETURN(MATCH_NOMATCH);
1828             }
1829           }
1830         else
1831 #endif
1832         /* Not UTF mode */
1833           {
1834           for (;;)
1835             {
1836             RMATCH(Fecode, RM31);
1837             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1838             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1839             if (Feptr >= mb->end_subject)
1840               {
1841               SCHECK_PARTIAL();
1842               RRETURN(MATCH_NOMATCH);
1843               }
1844             if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1845             }
1846           }
1847         /* Control never gets here */
1848         }
1849 
1850       /* Maximize case */
1851 
1852       else
1853         {
1854         Lstart_eptr = Feptr;
1855 
1856 #ifdef SUPPORT_UNICODE
1857         if (utf)
1858           {
1859           uint32_t d;
1860           for (i = Lmin; i < Lmax; i++)
1861             {
1862             int len = 1;
1863             if (Feptr >= mb->end_subject)
1864               {
1865               SCHECK_PARTIAL();
1866               break;
1867               }
1868             GETCHARLEN(d, Feptr, len);
1869             if (Lc == d) break;
1870             Feptr += len;
1871             }
1872 
1873           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1874           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1875           go too far. */
1876 
1877           if (reptype != REPTYPE_POS) for(;;)
1878             {
1879             if (Feptr <= Lstart_eptr) break;
1880             RMATCH(Fecode, RM207);
1881             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1882             Feptr--;
1883             BACKCHAR(Feptr);
1884             }
1885           }
1886         else
1887 #endif
1888         /* Not UTF mode */
1889           {
1890           for (i = Lmin; i < Lmax; i++)
1891             {
1892             if (Feptr >= mb->end_subject)
1893               {
1894               SCHECK_PARTIAL();
1895               break;
1896               }
1897             if (Lc == *Feptr) break;
1898             Feptr++;
1899             }
1900           if (reptype != REPTYPE_POS) for (;;)
1901             {
1902             if (Feptr == Lstart_eptr) break;
1903             RMATCH(Fecode, RM32);
1904             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1905             Feptr--;
1906             }
1907           }
1908         }
1909       }
1910     break;
1911 
1912 #undef Lstart_eptr
1913 #undef Lmin
1914 #undef Lmax
1915 #undef Lc
1916 #undef Loc
1917 
1918 
1919     /* ===================================================================== */
1920     /* Match a bit-mapped character class, possibly repeatedly. These opcodes
1921     are used when all the characters in the class have values in the range
1922     0-255, and either the matching is caseful, or the characters are in the
1923     range 0-127 when UTF processing is enabled. The only difference between
1924     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1925     encountered. */
1926 
1927 #define Lmin               F->temp_32[0]
1928 #define Lmax               F->temp_32[1]
1929 #define Lstart_eptr        F->temp_sptr[0]
1930 #define Lbyte_map_address  F->temp_sptr[1]
1931 #define Lbyte_map          ((unsigned char *)Lbyte_map_address)
1932 
1933     case OP_NCLASS:
1934     case OP_CLASS:
1935       {
1936       Lbyte_map_address = Fecode + 1;           /* Save for matching */
1937       Fecode += 1 + (32 / sizeof(PCRE2_UCHAR)); /* Advance past the item */
1938 
1939       /* Look past the end of the item to see if there is repeat information
1940       following. Then obey similar code to character type repeats. */
1941 
1942       switch (*Fecode)
1943         {
1944         case OP_CRSTAR:
1945         case OP_CRMINSTAR:
1946         case OP_CRPLUS:
1947         case OP_CRMINPLUS:
1948         case OP_CRQUERY:
1949         case OP_CRMINQUERY:
1950         case OP_CRPOSSTAR:
1951         case OP_CRPOSPLUS:
1952         case OP_CRPOSQUERY:
1953         fc = *Fecode++ - OP_CRSTAR;
1954         Lmin = rep_min[fc];
1955         Lmax = rep_max[fc];
1956         reptype = rep_typ[fc];
1957         break;
1958 
1959         case OP_CRRANGE:
1960         case OP_CRMINRANGE:
1961         case OP_CRPOSRANGE:
1962         Lmin = GET2(Fecode, 1);
1963         Lmax = GET2(Fecode, 1 + IMM2_SIZE);
1964         if (Lmax == 0) Lmax = UINT32_MAX;       /* Max 0 => infinity */
1965         reptype = rep_typ[*Fecode - OP_CRSTAR];
1966         Fecode += 1 + 2 * IMM2_SIZE;
1967         break;
1968 
1969         default:               /* No repeat follows */
1970         Lmin = Lmax = 1;
1971         break;
1972         }
1973 
1974       /* First, ensure the minimum number of matches are present. */
1975 
1976 #ifdef SUPPORT_UNICODE
1977       if (utf)
1978         {
1979         for (i = 1; i <= Lmin; i++)
1980           {
1981           if (Feptr >= mb->end_subject)
1982             {
1983             SCHECK_PARTIAL();
1984             RRETURN(MATCH_NOMATCH);
1985             }
1986           GETCHARINC(fc, Feptr);
1987           if (fc > 255)
1988             {
1989             if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1990             }
1991           else
1992             if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1993           }
1994         }
1995       else
1996 #endif
1997       /* Not UTF mode */
1998         {
1999         for (i = 1; i <= Lmin; i++)
2000           {
2001           if (Feptr >= mb->end_subject)
2002             {
2003             SCHECK_PARTIAL();
2004             RRETURN(MATCH_NOMATCH);
2005             }
2006           fc = *Feptr++;
2007 #if PCRE2_CODE_UNIT_WIDTH != 8
2008           if (fc > 255)
2009             {
2010             if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2011             }
2012           else
2013 #endif
2014           if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2015           }
2016         }
2017 
2018       /* If Lmax == Lmin we are done. Continue with main loop. */
2019 
2020       if (Lmin == Lmax) continue;
2021 
2022       /* If minimizing, keep testing the rest of the expression and advancing
2023       the pointer while it matches the class. */
2024 
2025       if (reptype == REPTYPE_MIN)
2026         {
2027 #ifdef SUPPORT_UNICODE
2028         if (utf)
2029           {
2030           for (;;)
2031             {
2032             RMATCH(Fecode, RM200);
2033             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2034             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2035             if (Feptr >= mb->end_subject)
2036               {
2037               SCHECK_PARTIAL();
2038               RRETURN(MATCH_NOMATCH);
2039               }
2040             GETCHARINC(fc, Feptr);
2041             if (fc > 255)
2042               {
2043               if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2044               }
2045             else
2046               if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2047             }
2048           }
2049         else
2050 #endif
2051         /* Not UTF mode */
2052           {
2053           for (;;)
2054             {
2055             RMATCH(Fecode, RM23);
2056             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2057             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2058             if (Feptr >= mb->end_subject)
2059               {
2060               SCHECK_PARTIAL();
2061               RRETURN(MATCH_NOMATCH);
2062               }
2063             fc = *Feptr++;
2064 #if PCRE2_CODE_UNIT_WIDTH != 8
2065             if (fc > 255)
2066               {
2067               if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2068               }
2069             else
2070 #endif
2071             if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2072             }
2073           }
2074         /* Control never gets here */
2075         }
2076 
2077       /* If maximizing, find the longest possible run, then work backwards. */
2078 
2079       else
2080         {
2081         Lstart_eptr = Feptr;
2082 
2083 #ifdef SUPPORT_UNICODE
2084         if (utf)
2085           {
2086           for (i = Lmin; i < Lmax; i++)
2087             {
2088             int len = 1;
2089             if (Feptr >= mb->end_subject)
2090               {
2091               SCHECK_PARTIAL();
2092               break;
2093               }
2094             GETCHARLEN(fc, Feptr, len);
2095             if (fc > 255)
2096               {
2097               if (Fop == OP_CLASS) break;
2098               }
2099             else
2100               if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2101             Feptr += len;
2102             }
2103 
2104           if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2105 
2106           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2107           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2108           go too far. */
2109 
2110           for (;;)
2111             {
2112             RMATCH(Fecode, RM201);
2113             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2114             if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2115             BACKCHAR(Feptr);
2116             }
2117           }
2118         else
2119 #endif
2120           /* Not UTF mode */
2121           {
2122           for (i = Lmin; i < Lmax; i++)
2123             {
2124             if (Feptr >= mb->end_subject)
2125               {
2126               SCHECK_PARTIAL();
2127               break;
2128               }
2129             fc = *Feptr;
2130 #if PCRE2_CODE_UNIT_WIDTH != 8
2131             if (fc > 255)
2132               {
2133               if (Fop == OP_CLASS) break;
2134               }
2135             else
2136 #endif
2137             if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2138             Feptr++;
2139             }
2140 
2141           if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2142 
2143           while (Feptr >= Lstart_eptr)
2144             {
2145             RMATCH(Fecode, RM24);
2146             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2147             Feptr--;
2148             }
2149           }
2150 
2151         RRETURN(MATCH_NOMATCH);
2152         }
2153       }
2154     /* Control never gets here */
2155 
2156 #undef Lbyte_map_address
2157 #undef Lbyte_map
2158 #undef Lstart_eptr
2159 #undef Lmin
2160 #undef Lmax
2161 
2162 
2163     /* ===================================================================== */
2164     /* Match an extended character class. In the 8-bit library, this opcode is
2165     encountered only when UTF-8 mode mode is supported. In the 16-bit and
2166     32-bit libraries, codepoints greater than 255 may be encountered even when
2167     UTF is not supported. */
2168 
2169 #define Lstart_eptr  F->temp_sptr[0]
2170 #define Lxclass_data F->temp_sptr[1]
2171 #define Lmin         F->temp_32[0]
2172 #define Lmax         F->temp_32[1]
2173 
2174 #ifdef SUPPORT_WIDE_CHARS
2175     case OP_XCLASS:
2176       {
2177       Lxclass_data = Fecode + 1 + LINK_SIZE;  /* Save for matching */
2178       Fecode += GET(Fecode, 1);               /* Advance past the item */
2179 
2180       switch (*Fecode)
2181         {
2182         case OP_CRSTAR:
2183         case OP_CRMINSTAR:
2184         case OP_CRPLUS:
2185         case OP_CRMINPLUS:
2186         case OP_CRQUERY:
2187         case OP_CRMINQUERY:
2188         case OP_CRPOSSTAR:
2189         case OP_CRPOSPLUS:
2190         case OP_CRPOSQUERY:
2191         fc = *Fecode++ - OP_CRSTAR;
2192         Lmin = rep_min[fc];
2193         Lmax = rep_max[fc];
2194         reptype = rep_typ[fc];
2195         break;
2196 
2197         case OP_CRRANGE:
2198         case OP_CRMINRANGE:
2199         case OP_CRPOSRANGE:
2200         Lmin = GET2(Fecode, 1);
2201         Lmax = GET2(Fecode, 1 + IMM2_SIZE);
2202         if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
2203         reptype = rep_typ[*Fecode - OP_CRSTAR];
2204         Fecode += 1 + 2 * IMM2_SIZE;
2205         break;
2206 
2207         default:               /* No repeat follows */
2208         Lmin = Lmax = 1;
2209         break;
2210         }
2211 
2212       /* First, ensure the minimum number of matches are present. */
2213 
2214       for (i = 1; i <= Lmin; i++)
2215         {
2216         if (Feptr >= mb->end_subject)
2217           {
2218           SCHECK_PARTIAL();
2219           RRETURN(MATCH_NOMATCH);
2220           }
2221         GETCHARINCTEST(fc, Feptr);
2222         if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
2223         }
2224 
2225       /* If Lmax == Lmin we can just continue with the main loop. */
2226 
2227       if (Lmin == Lmax) continue;
2228 
2229       /* If minimizing, keep testing the rest of the expression and advancing
2230       the pointer while it matches the class. */
2231 
2232       if (reptype == REPTYPE_MIN)
2233         {
2234         for (;;)
2235           {
2236           RMATCH(Fecode, RM100);
2237           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2238           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2239           if (Feptr >= mb->end_subject)
2240             {
2241             SCHECK_PARTIAL();
2242             RRETURN(MATCH_NOMATCH);
2243             }
2244           GETCHARINCTEST(fc, Feptr);
2245           if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
2246           }
2247         /* Control never gets here */
2248         }
2249 
2250       /* If maximizing, find the longest possible run, then work backwards. */
2251 
2252       else
2253         {
2254         Lstart_eptr = Feptr;
2255         for (i = Lmin; i < Lmax; i++)
2256           {
2257           int len = 1;
2258           if (Feptr >= mb->end_subject)
2259             {
2260             SCHECK_PARTIAL();
2261             break;
2262             }
2263 #ifdef SUPPORT_UNICODE
2264           GETCHARLENTEST(fc, Feptr, len);
2265 #else
2266           fc = *Feptr;
2267 #endif
2268           if (!PRIV(xclass)(fc, Lxclass_data, utf)) break;
2269           Feptr += len;
2270           }
2271 
2272         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2273 
2274         /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2275         Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2276         go too far. */
2277 
2278         for(;;)
2279           {
2280           RMATCH(Fecode, RM101);
2281           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2282           if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2283 #ifdef SUPPORT_UNICODE
2284           if (utf) BACKCHAR(Feptr);
2285 #endif
2286           }
2287         RRETURN(MATCH_NOMATCH);
2288         }
2289 
2290       /* Control never gets here */
2291       }
2292 #endif  /* SUPPORT_WIDE_CHARS: end of XCLASS */
2293 
2294 #undef Lstart_eptr
2295 #undef Lxclass_data
2296 #undef Lmin
2297 #undef Lmax
2298 
2299 
2300     /* ===================================================================== */
2301     /* Match various character types when PCRE2_UCP is not set. These opcodes
2302     are not generated when PCRE2_UCP is set - instead appropriate property
2303     tests are compiled. */
2304 
2305     case OP_NOT_DIGIT:
2306     if (Feptr >= mb->end_subject)
2307       {
2308       SCHECK_PARTIAL();
2309       RRETURN(MATCH_NOMATCH);
2310       }
2311     GETCHARINCTEST(fc, Feptr);
2312     if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
2313       RRETURN(MATCH_NOMATCH);
2314     Fecode++;
2315     break;
2316 
2317     case OP_DIGIT:
2318     if (Feptr >= mb->end_subject)
2319       {
2320       SCHECK_PARTIAL();
2321       RRETURN(MATCH_NOMATCH);
2322       }
2323     GETCHARINCTEST(fc, Feptr);
2324     if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
2325       RRETURN(MATCH_NOMATCH);
2326     Fecode++;
2327     break;
2328 
2329     case OP_NOT_WHITESPACE:
2330     if (Feptr >= mb->end_subject)
2331       {
2332       SCHECK_PARTIAL();
2333       RRETURN(MATCH_NOMATCH);
2334       }
2335     GETCHARINCTEST(fc, Feptr);
2336     if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
2337       RRETURN(MATCH_NOMATCH);
2338     Fecode++;
2339     break;
2340 
2341     case OP_WHITESPACE:
2342     if (Feptr >= mb->end_subject)
2343       {
2344       SCHECK_PARTIAL();
2345       RRETURN(MATCH_NOMATCH);
2346       }
2347     GETCHARINCTEST(fc, Feptr);
2348     if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
2349       RRETURN(MATCH_NOMATCH);
2350     Fecode++;
2351     break;
2352 
2353     case OP_NOT_WORDCHAR:
2354     if (Feptr >= mb->end_subject)
2355       {
2356       SCHECK_PARTIAL();
2357       RRETURN(MATCH_NOMATCH);
2358       }
2359     GETCHARINCTEST(fc, Feptr);
2360     if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
2361       RRETURN(MATCH_NOMATCH);
2362     Fecode++;
2363     break;
2364 
2365     case OP_WORDCHAR:
2366     if (Feptr >= mb->end_subject)
2367       {
2368       SCHECK_PARTIAL();
2369       RRETURN(MATCH_NOMATCH);
2370       }
2371     GETCHARINCTEST(fc, Feptr);
2372     if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
2373       RRETURN(MATCH_NOMATCH);
2374     Fecode++;
2375     break;
2376 
2377     case OP_ANYNL:
2378     if (Feptr >= mb->end_subject)
2379       {
2380       SCHECK_PARTIAL();
2381       RRETURN(MATCH_NOMATCH);
2382       }
2383     GETCHARINCTEST(fc, Feptr);
2384     switch(fc)
2385       {
2386       default: RRETURN(MATCH_NOMATCH);
2387 
2388       case CHAR_CR:
2389       if (Feptr >= mb->end_subject)
2390         {
2391         SCHECK_PARTIAL();
2392         }
2393       else if (UCHAR21TEST(Feptr) == CHAR_LF) Feptr++;
2394       break;
2395 
2396       case CHAR_LF:
2397       break;
2398 
2399       case CHAR_VT:
2400       case CHAR_FF:
2401       case CHAR_NEL:
2402 #ifndef EBCDIC
2403       case 0x2028:
2404       case 0x2029:
2405 #endif  /* Not EBCDIC */
2406       if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
2407       break;
2408       }
2409     Fecode++;
2410     break;
2411 
2412     case OP_NOT_HSPACE:
2413     if (Feptr >= mb->end_subject)
2414       {
2415       SCHECK_PARTIAL();
2416       RRETURN(MATCH_NOMATCH);
2417       }
2418     GETCHARINCTEST(fc, Feptr);
2419     switch(fc)
2420       {
2421       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2422       default: break;
2423       }
2424     Fecode++;
2425     break;
2426 
2427     case OP_HSPACE:
2428     if (Feptr >= mb->end_subject)
2429       {
2430       SCHECK_PARTIAL();
2431       RRETURN(MATCH_NOMATCH);
2432       }
2433     GETCHARINCTEST(fc, Feptr);
2434     switch(fc)
2435       {
2436       HSPACE_CASES: break;  /* Byte and multibyte cases */
2437       default: RRETURN(MATCH_NOMATCH);
2438       }
2439     Fecode++;
2440     break;
2441 
2442     case OP_NOT_VSPACE:
2443     if (Feptr >= mb->end_subject)
2444       {
2445       SCHECK_PARTIAL();
2446       RRETURN(MATCH_NOMATCH);
2447       }
2448     GETCHARINCTEST(fc, Feptr);
2449     switch(fc)
2450       {
2451       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2452       default: break;
2453       }
2454     Fecode++;
2455     break;
2456 
2457     case OP_VSPACE:
2458     if (Feptr >= mb->end_subject)
2459       {
2460       SCHECK_PARTIAL();
2461       RRETURN(MATCH_NOMATCH);
2462       }
2463     GETCHARINCTEST(fc, Feptr);
2464     switch(fc)
2465       {
2466       VSPACE_CASES: break;
2467       default: RRETURN(MATCH_NOMATCH);
2468       }
2469     Fecode++;
2470     break;
2471 
2472 
2473 #ifdef SUPPORT_UNICODE
2474 
2475     /* ===================================================================== */
2476     /* Check the next character by Unicode property. We will get here only
2477     if the support is in the binary; otherwise a compile-time error occurs. */
2478 
2479     case OP_PROP:
2480     case OP_NOTPROP:
2481     if (Feptr >= mb->end_subject)
2482       {
2483       SCHECK_PARTIAL();
2484       RRETURN(MATCH_NOMATCH);
2485       }
2486     GETCHARINCTEST(fc, Feptr);
2487       {
2488       const uint32_t *cp;
2489       uint32_t chartype;
2490       const ucd_record *prop = GET_UCD(fc);
2491       BOOL notmatch = Fop == OP_NOTPROP;
2492 
2493       switch(Fecode[1])
2494         {
2495         case PT_ANY:
2496         if (notmatch) RRETURN(MATCH_NOMATCH);
2497         break;
2498 
2499         case PT_LAMP:
2500         chartype = prop->chartype;
2501         if ((chartype == ucp_Lu ||
2502              chartype == ucp_Ll ||
2503              chartype == ucp_Lt) == notmatch)
2504           RRETURN(MATCH_NOMATCH);
2505         break;
2506 
2507         case PT_GC:
2508         if ((Fecode[2] == PRIV(ucp_gentype)[prop->chartype]) == notmatch)
2509           RRETURN(MATCH_NOMATCH);
2510         break;
2511 
2512         case PT_PC:
2513         if ((Fecode[2] == prop->chartype) == notmatch)
2514           RRETURN(MATCH_NOMATCH);
2515         break;
2516 
2517         case PT_SC:
2518         if ((Fecode[2] == prop->script) == notmatch)
2519           RRETURN(MATCH_NOMATCH);
2520         break;
2521 
2522         case PT_SCX:
2523           {
2524           BOOL ok = (Fecode[2] == prop->script ||
2525                      MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Fecode[2]) != 0);
2526           if (ok == notmatch) RRETURN(MATCH_NOMATCH);
2527           }
2528         break;
2529 
2530         /* These are specials */
2531 
2532         case PT_ALNUM:
2533         chartype = prop->chartype;
2534         if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
2535              PRIV(ucp_gentype)[chartype] == ucp_N) == notmatch)
2536           RRETURN(MATCH_NOMATCH);
2537         break;
2538 
2539         /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2540         which means that Perl space and POSIX space are now identical. PCRE
2541         was changed at release 8.34. */
2542 
2543         case PT_SPACE:    /* Perl space */
2544         case PT_PXSPACE:  /* POSIX space */
2545         switch(fc)
2546           {
2547           HSPACE_CASES:
2548           VSPACE_CASES:
2549           if (notmatch) RRETURN(MATCH_NOMATCH);
2550           break;
2551 
2552           default:
2553           if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == notmatch)
2554             RRETURN(MATCH_NOMATCH);
2555           break;
2556           }
2557         break;
2558 
2559         case PT_WORD:
2560         chartype = prop->chartype;
2561         if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
2562              PRIV(ucp_gentype)[chartype] == ucp_N ||
2563              chartype == ucp_Mn ||
2564              chartype == ucp_Pc) == notmatch)
2565           RRETURN(MATCH_NOMATCH);
2566         break;
2567 
2568         case PT_CLIST:
2569 #if PCRE2_CODE_UNIT_WIDTH == 32
2570             if (fc > MAX_UTF_CODE_POINT)
2571               {
2572               if (notmatch) break;;
2573               RRETURN(MATCH_NOMATCH);
2574               }
2575 #endif
2576         cp = PRIV(ucd_caseless_sets) + Fecode[2];
2577         for (;;)
2578           {
2579           if (fc < *cp)
2580             { if (notmatch) break; else { RRETURN(MATCH_NOMATCH); } }
2581           if (fc == *cp++)
2582             { if (notmatch) { RRETURN(MATCH_NOMATCH); } else break; }
2583           }
2584         break;
2585 
2586         case PT_UCNC:
2587         if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2588              fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2589              fc >= 0xe000) == notmatch)
2590           RRETURN(MATCH_NOMATCH);
2591         break;
2592 
2593         case PT_BIDICL:
2594         if ((UCD_BIDICLASS_PROP(prop) == Fecode[2]) == notmatch)
2595           RRETURN(MATCH_NOMATCH);
2596         break;
2597 
2598         case PT_BOOL:
2599           {
2600           BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) +
2601             UCD_BPROPS_PROP(prop), Fecode[2]) != 0;
2602           if (ok == notmatch) RRETURN(MATCH_NOMATCH);
2603           }
2604         break;
2605 
2606         /* This should never occur */
2607 
2608         default:
2609         return PCRE2_ERROR_INTERNAL;
2610         }
2611 
2612       Fecode += 3;
2613       }
2614     break;
2615 
2616 
2617     /* ===================================================================== */
2618     /* Match an extended Unicode sequence. We will get here only if the support
2619     is in the binary; otherwise a compile-time error occurs. */
2620 
2621     case OP_EXTUNI:
2622     if (Feptr >= mb->end_subject)
2623       {
2624       SCHECK_PARTIAL();
2625       RRETURN(MATCH_NOMATCH);
2626       }
2627     else
2628       {
2629       GETCHARINCTEST(fc, Feptr);
2630       Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf,
2631         NULL);
2632       }
2633     CHECK_PARTIAL();
2634     Fecode++;
2635     break;
2636 
2637 #endif  /* SUPPORT_UNICODE */
2638 
2639 
2640     /* ===================================================================== */
2641     /* Match a single character type repeatedly. Note that the property type
2642     does not need to be in a stack frame as it is not used within an RMATCH()
2643     loop. */
2644 
2645 #define Lstart_eptr  F->temp_sptr[0]
2646 #define Lmin         F->temp_32[0]
2647 #define Lmax         F->temp_32[1]
2648 #define Lctype       F->temp_32[2]
2649 #define Lpropvalue   F->temp_32[3]
2650 
2651     case OP_TYPEEXACT:
2652     Lmin = Lmax = GET2(Fecode, 1);
2653     Fecode += 1 + IMM2_SIZE;
2654     goto REPEATTYPE;
2655 
2656     case OP_TYPEUPTO:
2657     case OP_TYPEMINUPTO:
2658     Lmin = 0;
2659     Lmax = GET2(Fecode, 1);
2660     reptype = (*Fecode == OP_TYPEMINUPTO)? REPTYPE_MIN : REPTYPE_MAX;
2661     Fecode += 1 + IMM2_SIZE;
2662     goto REPEATTYPE;
2663 
2664     case OP_TYPEPOSSTAR:
2665     reptype = REPTYPE_POS;
2666     Lmin = 0;
2667     Lmax = UINT32_MAX;
2668     Fecode++;
2669     goto REPEATTYPE;
2670 
2671     case OP_TYPEPOSPLUS:
2672     reptype = REPTYPE_POS;
2673     Lmin = 1;
2674     Lmax = UINT32_MAX;
2675     Fecode++;
2676     goto REPEATTYPE;
2677 
2678     case OP_TYPEPOSQUERY:
2679     reptype = REPTYPE_POS;
2680     Lmin = 0;
2681     Lmax = 1;
2682     Fecode++;
2683     goto REPEATTYPE;
2684 
2685     case OP_TYPEPOSUPTO:
2686     reptype = REPTYPE_POS;
2687     Lmin = 0;
2688     Lmax = GET2(Fecode, 1);
2689     Fecode += 1 + IMM2_SIZE;
2690     goto REPEATTYPE;
2691 
2692     case OP_TYPESTAR:
2693     case OP_TYPEMINSTAR:
2694     case OP_TYPEPLUS:
2695     case OP_TYPEMINPLUS:
2696     case OP_TYPEQUERY:
2697     case OP_TYPEMINQUERY:
2698     fc = *Fecode++ - OP_TYPESTAR;
2699     Lmin = rep_min[fc];
2700     Lmax = rep_max[fc];
2701     reptype = rep_typ[fc];
2702 
2703     /* Common code for all repeated character type matches. */
2704 
2705     REPEATTYPE:
2706     Lctype = *Fecode++;      /* Code for the character type */
2707 
2708 #ifdef SUPPORT_UNICODE
2709     if (Lctype == OP_PROP || Lctype == OP_NOTPROP)
2710       {
2711       proptype = *Fecode++;
2712       Lpropvalue = *Fecode++;
2713       }
2714     else proptype = -1;
2715 #endif
2716 
2717     /* First, ensure the minimum number of matches are present. Use inline
2718     code for maximizing the speed, and do the type test once at the start
2719     (i.e. keep it out of the loops). As there are no calls to RMATCH in the
2720     loops, we can use an ordinary variable for "notmatch". The code for UTF
2721     mode is separated out for tidiness, except for Unicode property tests. */
2722 
2723     if (Lmin > 0)
2724       {
2725 #ifdef SUPPORT_UNICODE
2726       if (proptype >= 0)  /* Property tests in all modes */
2727         {
2728         BOOL notmatch = Lctype == OP_NOTPROP;
2729         switch(proptype)
2730           {
2731           case PT_ANY:
2732           if (notmatch) RRETURN(MATCH_NOMATCH);
2733           for (i = 1; i <= Lmin; i++)
2734             {
2735             if (Feptr >= mb->end_subject)
2736               {
2737               SCHECK_PARTIAL();
2738               RRETURN(MATCH_NOMATCH);
2739               }
2740             GETCHARINCTEST(fc, Feptr);
2741             }
2742           break;
2743 
2744           case PT_LAMP:
2745           for (i = 1; i <= Lmin; i++)
2746             {
2747             int chartype;
2748             if (Feptr >= mb->end_subject)
2749               {
2750               SCHECK_PARTIAL();
2751               RRETURN(MATCH_NOMATCH);
2752               }
2753             GETCHARINCTEST(fc, Feptr);
2754             chartype = UCD_CHARTYPE(fc);
2755             if ((chartype == ucp_Lu ||
2756                  chartype == ucp_Ll ||
2757                  chartype == ucp_Lt) == notmatch)
2758               RRETURN(MATCH_NOMATCH);
2759             }
2760           break;
2761 
2762           case PT_GC:
2763           for (i = 1; i <= Lmin; i++)
2764             {
2765             if (Feptr >= mb->end_subject)
2766               {
2767               SCHECK_PARTIAL();
2768               RRETURN(MATCH_NOMATCH);
2769               }
2770             GETCHARINCTEST(fc, Feptr);
2771             if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch)
2772               RRETURN(MATCH_NOMATCH);
2773             }
2774           break;
2775 
2776           case PT_PC:
2777           for (i = 1; i <= Lmin; i++)
2778             {
2779             if (Feptr >= mb->end_subject)
2780               {
2781               SCHECK_PARTIAL();
2782               RRETURN(MATCH_NOMATCH);
2783               }
2784             GETCHARINCTEST(fc, Feptr);
2785             if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch)
2786               RRETURN(MATCH_NOMATCH);
2787             }
2788           break;
2789 
2790           case PT_SC:
2791           for (i = 1; i <= Lmin; i++)
2792             {
2793             if (Feptr >= mb->end_subject)
2794               {
2795               SCHECK_PARTIAL();
2796               RRETURN(MATCH_NOMATCH);
2797               }
2798             GETCHARINCTEST(fc, Feptr);
2799             if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch)
2800               RRETURN(MATCH_NOMATCH);
2801             }
2802           break;
2803 
2804           case PT_SCX:
2805           for (i = 1; i <= Lmin; i++)
2806             {
2807             BOOL ok;
2808             const ucd_record *prop;
2809             if (Feptr >= mb->end_subject)
2810               {
2811               SCHECK_PARTIAL();
2812               RRETURN(MATCH_NOMATCH);
2813               }
2814             GETCHARINCTEST(fc, Feptr);
2815             prop = GET_UCD(fc);
2816             ok = (prop->script == Lpropvalue ||
2817                   MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
2818             if (ok == notmatch)
2819               RRETURN(MATCH_NOMATCH);
2820             }
2821           break;
2822 
2823           case PT_ALNUM:
2824           for (i = 1; i <= Lmin; i++)
2825             {
2826             int category;
2827             if (Feptr >= mb->end_subject)
2828               {
2829               SCHECK_PARTIAL();
2830               RRETURN(MATCH_NOMATCH);
2831               }
2832             GETCHARINCTEST(fc, Feptr);
2833             category = UCD_CATEGORY(fc);
2834             if ((category == ucp_L || category == ucp_N) == notmatch)
2835               RRETURN(MATCH_NOMATCH);
2836             }
2837           break;
2838 
2839           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2840           which means that Perl space and POSIX space are now identical. PCRE
2841           was changed at release 8.34. */
2842 
2843           case PT_SPACE:    /* Perl space */
2844           case PT_PXSPACE:  /* POSIX space */
2845           for (i = 1; i <= Lmin; i++)
2846             {
2847             if (Feptr >= mb->end_subject)
2848               {
2849               SCHECK_PARTIAL();
2850               RRETURN(MATCH_NOMATCH);
2851               }
2852             GETCHARINCTEST(fc, Feptr);
2853             switch(fc)
2854               {
2855               HSPACE_CASES:
2856               VSPACE_CASES:
2857               if (notmatch) RRETURN(MATCH_NOMATCH);
2858               break;
2859 
2860               default:
2861               if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
2862                 RRETURN(MATCH_NOMATCH);
2863               break;
2864               }
2865             }
2866           break;
2867 
2868           case PT_WORD:
2869           for (i = 1; i <= Lmin; i++)
2870             {
2871             int chartype, category;
2872             if (Feptr >= mb->end_subject)
2873               {
2874               SCHECK_PARTIAL();
2875               RRETURN(MATCH_NOMATCH);
2876               }
2877             GETCHARINCTEST(fc, Feptr);
2878             chartype = UCD_CHARTYPE(fc);
2879             category = PRIV(ucp_gentype)[chartype];
2880             if ((category == ucp_L || category == ucp_N ||
2881                  chartype == ucp_Mn || chartype == ucp_Pc) == notmatch)
2882               RRETURN(MATCH_NOMATCH);
2883             }
2884           break;
2885 
2886           case PT_CLIST:
2887           for (i = 1; i <= Lmin; i++)
2888             {
2889             const uint32_t *cp;
2890             if (Feptr >= mb->end_subject)
2891               {
2892               SCHECK_PARTIAL();
2893               RRETURN(MATCH_NOMATCH);
2894               }
2895             GETCHARINCTEST(fc, Feptr);
2896 #if PCRE2_CODE_UNIT_WIDTH == 32
2897             if (fc > MAX_UTF_CODE_POINT)
2898               {
2899               if (notmatch) continue;
2900               RRETURN(MATCH_NOMATCH);
2901               }
2902 #endif
2903             cp = PRIV(ucd_caseless_sets) + Lpropvalue;
2904             for (;;)
2905               {
2906               if (fc < *cp)
2907                 {
2908                 if (notmatch) break;
2909                 RRETURN(MATCH_NOMATCH);
2910                 }
2911               if (fc == *cp++)
2912                 {
2913                 if (notmatch) RRETURN(MATCH_NOMATCH);
2914                 break;
2915                 }
2916               }
2917             }
2918           break;
2919 
2920           case PT_UCNC:
2921           for (i = 1; i <= Lmin; i++)
2922             {
2923             if (Feptr >= mb->end_subject)
2924               {
2925               SCHECK_PARTIAL();
2926               RRETURN(MATCH_NOMATCH);
2927               }
2928             GETCHARINCTEST(fc, Feptr);
2929             if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2930                  fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2931                  fc >= 0xe000) == notmatch)
2932               RRETURN(MATCH_NOMATCH);
2933             }
2934           break;
2935 
2936           case PT_BIDICL:
2937           for (i = 1; i <= Lmin; i++)
2938             {
2939             if (Feptr >= mb->end_subject)
2940               {
2941               SCHECK_PARTIAL();
2942               RRETURN(MATCH_NOMATCH);
2943               }
2944             GETCHARINCTEST(fc, Feptr);
2945             if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch)
2946               RRETURN(MATCH_NOMATCH);
2947             }
2948           break;
2949 
2950           case PT_BOOL:
2951           for (i = 1; i <= Lmin; i++)
2952             {
2953             BOOL ok;
2954             const ucd_record *prop;
2955             if (Feptr >= mb->end_subject)
2956               {
2957               SCHECK_PARTIAL();
2958               RRETURN(MATCH_NOMATCH);
2959               }
2960             GETCHARINCTEST(fc, Feptr);
2961             prop = GET_UCD(fc);
2962             ok = MAPBIT(PRIV(ucd_boolprop_sets) +
2963               UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
2964             if (ok == notmatch)
2965               RRETURN(MATCH_NOMATCH);
2966             }
2967           break;
2968 
2969           /* This should not occur */
2970 
2971           default:
2972           return PCRE2_ERROR_INTERNAL;
2973           }
2974         }
2975 
2976       /* Match extended Unicode sequences. We will get here only if the
2977       support is in the binary; otherwise a compile-time error occurs. */
2978 
2979       else if (Lctype == OP_EXTUNI)
2980         {
2981         for (i = 1; i <= Lmin; i++)
2982           {
2983           if (Feptr >= mb->end_subject)
2984             {
2985             SCHECK_PARTIAL();
2986             RRETURN(MATCH_NOMATCH);
2987             }
2988           else
2989             {
2990             GETCHARINCTEST(fc, Feptr);
2991             Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject,
2992               mb->end_subject, utf, NULL);
2993             }
2994           CHECK_PARTIAL();
2995           }
2996         }
2997       else
2998 #endif     /* SUPPORT_UNICODE */
2999 
3000 /* Handle all other cases in UTF mode */
3001 
3002 #ifdef SUPPORT_UNICODE
3003       if (utf) switch(Lctype)
3004         {
3005         case OP_ANY:
3006         for (i = 1; i <= Lmin; i++)
3007           {
3008           if (Feptr >= mb->end_subject)
3009             {
3010             SCHECK_PARTIAL();
3011             RRETURN(MATCH_NOMATCH);
3012             }
3013           if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3014           if (mb->partial != 0 &&
3015               Feptr + 1 >= mb->end_subject &&
3016               NLBLOCK->nltype == NLTYPE_FIXED &&
3017               NLBLOCK->nllen == 2 &&
3018               UCHAR21(Feptr) == NLBLOCK->nl[0])
3019             {
3020             mb->hitend = TRUE;
3021             if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3022             }
3023           Feptr++;
3024           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3025           }
3026         break;
3027 
3028         case OP_ALLANY:
3029         for (i = 1; i <= Lmin; i++)
3030           {
3031           if (Feptr >= mb->end_subject)
3032             {
3033             SCHECK_PARTIAL();
3034             RRETURN(MATCH_NOMATCH);
3035             }
3036           Feptr++;
3037           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3038           }
3039         break;
3040 
3041         case OP_ANYBYTE:
3042         if (Feptr > mb->end_subject - Lmin) RRETURN(MATCH_NOMATCH);
3043         Feptr += Lmin;
3044         break;
3045 
3046         case OP_ANYNL:
3047         for (i = 1; i <= Lmin; i++)
3048           {
3049           if (Feptr >= mb->end_subject)
3050             {
3051             SCHECK_PARTIAL();
3052             RRETURN(MATCH_NOMATCH);
3053             }
3054           GETCHARINC(fc, Feptr);
3055           switch(fc)
3056             {
3057             default: RRETURN(MATCH_NOMATCH);
3058 
3059             case CHAR_CR:
3060             if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
3061             break;
3062 
3063             case CHAR_LF:
3064             break;
3065 
3066             case CHAR_VT:
3067             case CHAR_FF:
3068             case CHAR_NEL:
3069 #ifndef EBCDIC
3070             case 0x2028:
3071             case 0x2029:
3072 #endif  /* Not EBCDIC */
3073             if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3074             break;
3075             }
3076           }
3077         break;
3078 
3079         case OP_NOT_HSPACE:
3080         for (i = 1; i <= Lmin; i++)
3081           {
3082           if (Feptr >= mb->end_subject)
3083             {
3084             SCHECK_PARTIAL();
3085             RRETURN(MATCH_NOMATCH);
3086             }
3087           GETCHARINC(fc, Feptr);
3088           switch(fc)
3089             {
3090             HSPACE_CASES: RRETURN(MATCH_NOMATCH);
3091             default: break;
3092             }
3093           }
3094         break;
3095 
3096         case OP_HSPACE:
3097         for (i = 1; i <= Lmin; i++)
3098           {
3099           if (Feptr >= mb->end_subject)
3100             {
3101             SCHECK_PARTIAL();
3102             RRETURN(MATCH_NOMATCH);
3103             }
3104           GETCHARINC(fc, Feptr);
3105           switch(fc)
3106             {
3107             HSPACE_CASES: break;
3108             default: RRETURN(MATCH_NOMATCH);
3109             }
3110           }
3111         break;
3112 
3113         case OP_NOT_VSPACE:
3114         for (i = 1; i <= Lmin; i++)
3115           {
3116           if (Feptr >= mb->end_subject)
3117             {
3118             SCHECK_PARTIAL();
3119             RRETURN(MATCH_NOMATCH);
3120             }
3121           GETCHARINC(fc, Feptr);
3122           switch(fc)
3123             {
3124             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
3125             default: break;
3126             }
3127           }
3128         break;
3129 
3130         case OP_VSPACE:
3131         for (i = 1; i <= Lmin; i++)
3132           {
3133           if (Feptr >= mb->end_subject)
3134             {
3135             SCHECK_PARTIAL();
3136             RRETURN(MATCH_NOMATCH);
3137             }
3138           GETCHARINC(fc, Feptr);
3139           switch(fc)
3140             {
3141             VSPACE_CASES: break;
3142             default: RRETURN(MATCH_NOMATCH);
3143             }
3144           }
3145         break;
3146 
3147         case OP_NOT_DIGIT:
3148         for (i = 1; i <= Lmin; i++)
3149           {
3150           if (Feptr >= mb->end_subject)
3151             {
3152             SCHECK_PARTIAL();
3153             RRETURN(MATCH_NOMATCH);
3154             }
3155           GETCHARINC(fc, Feptr);
3156           if (fc < 128 && (mb->ctypes[fc] & ctype_digit) != 0)
3157             RRETURN(MATCH_NOMATCH);
3158           }
3159         break;
3160 
3161         case OP_DIGIT:
3162         for (i = 1; i <= Lmin; i++)
3163           {
3164           uint32_t cc;
3165           if (Feptr >= mb->end_subject)
3166             {
3167             SCHECK_PARTIAL();
3168             RRETURN(MATCH_NOMATCH);
3169             }
3170           cc = UCHAR21(Feptr);
3171           if (cc >= 128 || (mb->ctypes[cc] & ctype_digit) == 0)
3172             RRETURN(MATCH_NOMATCH);
3173           Feptr++;
3174           /* No need to skip more code units - we know it has only one. */
3175           }
3176         break;
3177 
3178         case OP_NOT_WHITESPACE:
3179         for (i = 1; i <= Lmin; i++)
3180           {
3181           uint32_t cc;
3182           if (Feptr >= mb->end_subject)
3183             {
3184             SCHECK_PARTIAL();
3185             RRETURN(MATCH_NOMATCH);
3186             }
3187           cc = UCHAR21(Feptr);
3188           if (cc < 128 && (mb->ctypes[cc] & ctype_space) != 0)
3189             RRETURN(MATCH_NOMATCH);
3190           Feptr++;
3191           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3192           }
3193         break;
3194 
3195         case OP_WHITESPACE:
3196         for (i = 1; i <= Lmin; i++)
3197           {
3198           uint32_t cc;
3199           if (Feptr >= mb->end_subject)
3200             {
3201             SCHECK_PARTIAL();
3202             RRETURN(MATCH_NOMATCH);
3203             }
3204           cc = UCHAR21(Feptr);
3205           if (cc >= 128 || (mb->ctypes[cc] & ctype_space) == 0)
3206             RRETURN(MATCH_NOMATCH);
3207           Feptr++;
3208           /* No need to skip more code units - we know it has only one. */
3209           }
3210         break;
3211 
3212         case OP_NOT_WORDCHAR:
3213         for (i = 1; i <= Lmin; i++)
3214           {
3215           uint32_t cc;
3216           if (Feptr >= mb->end_subject)
3217             {
3218             SCHECK_PARTIAL();
3219             RRETURN(MATCH_NOMATCH);
3220             }
3221           cc = UCHAR21(Feptr);
3222           if (cc < 128 && (mb->ctypes[cc] & ctype_word) != 0)
3223             RRETURN(MATCH_NOMATCH);
3224           Feptr++;
3225           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3226           }
3227         break;
3228 
3229         case OP_WORDCHAR:
3230         for (i = 1; i <= Lmin; i++)
3231           {
3232           uint32_t cc;
3233           if (Feptr >= mb->end_subject)
3234             {
3235             SCHECK_PARTIAL();
3236             RRETURN(MATCH_NOMATCH);
3237             }
3238           cc = UCHAR21(Feptr);
3239           if (cc >= 128 || (mb->ctypes[cc] & ctype_word) == 0)
3240             RRETURN(MATCH_NOMATCH);
3241           Feptr++;
3242           /* No need to skip more code units - we know it has only one. */
3243           }
3244         break;
3245 
3246         default:
3247         return PCRE2_ERROR_INTERNAL;
3248         }  /* End switch(Lctype) */
3249 
3250       else
3251 #endif     /* SUPPORT_UNICODE */
3252 
3253       /* Code for the non-UTF case for minimum matching of operators other
3254       than OP_PROP and OP_NOTPROP. */
3255 
3256       switch(Lctype)
3257         {
3258         case OP_ANY:
3259         for (i = 1; i <= Lmin; i++)
3260           {
3261           if (Feptr >= mb->end_subject)
3262             {
3263             SCHECK_PARTIAL();
3264             RRETURN(MATCH_NOMATCH);
3265             }
3266           if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3267           if (mb->partial != 0 &&
3268               Feptr + 1 >= mb->end_subject &&
3269               NLBLOCK->nltype == NLTYPE_FIXED &&
3270               NLBLOCK->nllen == 2 &&
3271               *Feptr == NLBLOCK->nl[0])
3272             {
3273             mb->hitend = TRUE;
3274             if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3275             }
3276           Feptr++;
3277           }
3278         break;
3279 
3280         case OP_ALLANY:
3281         if (Feptr > mb->end_subject - Lmin)
3282           {
3283           SCHECK_PARTIAL();
3284           RRETURN(MATCH_NOMATCH);
3285           }
3286         Feptr += Lmin;
3287         break;
3288 
3289         /* This OP_ANYBYTE case will never be reached because \C gets turned
3290         into OP_ALLANY in non-UTF mode. Cut out the code so that coverage
3291         reports don't complain about it's never being used. */
3292 
3293 /*        case OP_ANYBYTE:
3294 *        if (Feptr > mb->end_subject - Lmin)
3295 *          {
3296 *          SCHECK_PARTIAL();
3297 *          RRETURN(MATCH_NOMATCH);
3298 *          }
3299 *        Feptr += Lmin;
3300 *        break;
3301 */
3302         case OP_ANYNL:
3303         for (i = 1; i <= Lmin; i++)
3304           {
3305           if (Feptr >= mb->end_subject)
3306             {
3307             SCHECK_PARTIAL();
3308             RRETURN(MATCH_NOMATCH);
3309             }
3310           switch(*Feptr++)
3311             {
3312             default: RRETURN(MATCH_NOMATCH);
3313 
3314             case CHAR_CR:
3315             if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
3316             break;
3317 
3318             case CHAR_LF:
3319             break;
3320 
3321             case CHAR_VT:
3322             case CHAR_FF:
3323             case CHAR_NEL:
3324 #if PCRE2_CODE_UNIT_WIDTH != 8
3325             case 0x2028:
3326             case 0x2029:
3327 #endif
3328             if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3329             break;
3330             }
3331           }
3332         break;
3333 
3334         case OP_NOT_HSPACE:
3335         for (i = 1; i <= Lmin; i++)
3336           {
3337           if (Feptr >= mb->end_subject)
3338             {
3339             SCHECK_PARTIAL();
3340             RRETURN(MATCH_NOMATCH);
3341             }
3342           switch(*Feptr++)
3343             {
3344             default: break;
3345             HSPACE_BYTE_CASES:
3346 #if PCRE2_CODE_UNIT_WIDTH != 8
3347             HSPACE_MULTIBYTE_CASES:
3348 #endif
3349             RRETURN(MATCH_NOMATCH);
3350             }
3351           }
3352         break;
3353 
3354         case OP_HSPACE:
3355         for (i = 1; i <= Lmin; i++)
3356           {
3357           if (Feptr >= mb->end_subject)
3358             {
3359             SCHECK_PARTIAL();
3360             RRETURN(MATCH_NOMATCH);
3361             }
3362           switch(*Feptr++)
3363             {
3364             default: RRETURN(MATCH_NOMATCH);
3365             HSPACE_BYTE_CASES:
3366 #if PCRE2_CODE_UNIT_WIDTH != 8
3367             HSPACE_MULTIBYTE_CASES:
3368 #endif
3369             break;
3370             }
3371           }
3372         break;
3373 
3374         case OP_NOT_VSPACE:
3375         for (i = 1; i <= Lmin; i++)
3376           {
3377           if (Feptr >= mb->end_subject)
3378             {
3379             SCHECK_PARTIAL();
3380             RRETURN(MATCH_NOMATCH);
3381             }
3382           switch(*Feptr++)
3383             {
3384             VSPACE_BYTE_CASES:
3385 #if PCRE2_CODE_UNIT_WIDTH != 8
3386             VSPACE_MULTIBYTE_CASES:
3387 #endif
3388             RRETURN(MATCH_NOMATCH);
3389             default: break;
3390             }
3391           }
3392         break;
3393 
3394         case OP_VSPACE:
3395         for (i = 1; i <= Lmin; i++)
3396           {
3397           if (Feptr >= mb->end_subject)
3398             {
3399             SCHECK_PARTIAL();
3400             RRETURN(MATCH_NOMATCH);
3401             }
3402           switch(*Feptr++)
3403             {
3404             default: RRETURN(MATCH_NOMATCH);
3405             VSPACE_BYTE_CASES:
3406 #if PCRE2_CODE_UNIT_WIDTH != 8
3407             VSPACE_MULTIBYTE_CASES:
3408 #endif
3409             break;
3410             }
3411           }
3412         break;
3413 
3414         case OP_NOT_DIGIT:
3415         for (i = 1; i <= Lmin; i++)
3416           {
3417           if (Feptr >= mb->end_subject)
3418             {
3419             SCHECK_PARTIAL();
3420             RRETURN(MATCH_NOMATCH);
3421             }
3422           if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
3423             RRETURN(MATCH_NOMATCH);
3424           Feptr++;
3425           }
3426         break;
3427 
3428         case OP_DIGIT:
3429         for (i = 1; i <= Lmin; i++)
3430           {
3431           if (Feptr >= mb->end_subject)
3432             {
3433             SCHECK_PARTIAL();
3434             RRETURN(MATCH_NOMATCH);
3435             }
3436           if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
3437             RRETURN(MATCH_NOMATCH);
3438           Feptr++;
3439           }
3440         break;
3441 
3442         case OP_NOT_WHITESPACE:
3443         for (i = 1; i <= Lmin; i++)
3444           {
3445           if (Feptr >= mb->end_subject)
3446             {
3447             SCHECK_PARTIAL();
3448             RRETURN(MATCH_NOMATCH);
3449             }
3450           if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
3451             RRETURN(MATCH_NOMATCH);
3452           Feptr++;
3453           }
3454         break;
3455 
3456         case OP_WHITESPACE:
3457         for (i = 1; i <= Lmin; i++)
3458           {
3459           if (Feptr >= mb->end_subject)
3460             {
3461             SCHECK_PARTIAL();
3462             RRETURN(MATCH_NOMATCH);
3463             }
3464           if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
3465             RRETURN(MATCH_NOMATCH);
3466           Feptr++;
3467           }
3468         break;
3469 
3470         case OP_NOT_WORDCHAR:
3471         for (i = 1; i <= Lmin; i++)
3472           {
3473           if (Feptr >= mb->end_subject)
3474             {
3475             SCHECK_PARTIAL();
3476             RRETURN(MATCH_NOMATCH);
3477             }
3478           if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
3479             RRETURN(MATCH_NOMATCH);
3480           Feptr++;
3481           }
3482         break;
3483 
3484         case OP_WORDCHAR:
3485         for (i = 1; i <= Lmin; i++)
3486           {
3487           if (Feptr >= mb->end_subject)
3488             {
3489             SCHECK_PARTIAL();
3490             RRETURN(MATCH_NOMATCH);
3491             }
3492           if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
3493             RRETURN(MATCH_NOMATCH);
3494           Feptr++;
3495           }
3496         break;
3497 
3498         default:
3499         return PCRE2_ERROR_INTERNAL;
3500         }
3501       }
3502 
3503     /* If Lmin = Lmax we are done. Continue with the main loop. */
3504 
3505     if (Lmin == Lmax) continue;
3506 
3507     /* If minimizing, we have to test the rest of the pattern before each
3508     subsequent match. This means we cannot use a local "notmatch" variable as
3509     in the other cases. As all 4 temporary 32-bit values in the frame are
3510     already in use, just test the type each time. */
3511 
3512     if (reptype == REPTYPE_MIN)
3513       {
3514 #ifdef SUPPORT_UNICODE
3515       if (proptype >= 0)
3516         {
3517         switch(proptype)
3518           {
3519           case PT_ANY:
3520           for (;;)
3521             {
3522             RMATCH(Fecode, RM208);
3523             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3524             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3525             if (Feptr >= mb->end_subject)
3526               {
3527               SCHECK_PARTIAL();
3528               RRETURN(MATCH_NOMATCH);
3529               }
3530             GETCHARINCTEST(fc, Feptr);
3531             if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3532             }
3533           /* Control never gets here */
3534 
3535           case PT_LAMP:
3536           for (;;)
3537             {
3538             int chartype;
3539             RMATCH(Fecode, RM209);
3540             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3541             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3542             if (Feptr >= mb->end_subject)
3543               {
3544               SCHECK_PARTIAL();
3545               RRETURN(MATCH_NOMATCH);
3546               }
3547             GETCHARINCTEST(fc, Feptr);
3548             chartype = UCD_CHARTYPE(fc);
3549             if ((chartype == ucp_Lu ||
3550                  chartype == ucp_Ll ||
3551                  chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
3552               RRETURN(MATCH_NOMATCH);
3553             }
3554           /* Control never gets here */
3555 
3556           case PT_GC:
3557           for (;;)
3558             {
3559             RMATCH(Fecode, RM210);
3560             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3561             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3562             if (Feptr >= mb->end_subject)
3563               {
3564               SCHECK_PARTIAL();
3565               RRETURN(MATCH_NOMATCH);
3566               }
3567             GETCHARINCTEST(fc, Feptr);
3568             if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3569               RRETURN(MATCH_NOMATCH);
3570             }
3571           /* Control never gets here */
3572 
3573           case PT_PC:
3574           for (;;)
3575             {
3576             RMATCH(Fecode, RM211);
3577             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3578             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3579             if (Feptr >= mb->end_subject)
3580               {
3581               SCHECK_PARTIAL();
3582               RRETURN(MATCH_NOMATCH);
3583               }
3584             GETCHARINCTEST(fc, Feptr);
3585             if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3586               RRETURN(MATCH_NOMATCH);
3587             }
3588           /* Control never gets here */
3589 
3590           case PT_SC:
3591           for (;;)
3592             {
3593             RMATCH(Fecode, RM212);
3594             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3595             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3596             if (Feptr >= mb->end_subject)
3597               {
3598               SCHECK_PARTIAL();
3599               RRETURN(MATCH_NOMATCH);
3600               }
3601             GETCHARINCTEST(fc, Feptr);
3602             if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3603               RRETURN(MATCH_NOMATCH);
3604             }
3605           /* Control never gets here */
3606 
3607           case PT_SCX:
3608           for (;;)
3609             {
3610             BOOL ok;
3611             const ucd_record *prop;
3612             RMATCH(Fecode, RM225);
3613             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3614             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3615             if (Feptr >= mb->end_subject)
3616               {
3617               SCHECK_PARTIAL();
3618               RRETURN(MATCH_NOMATCH);
3619               }
3620             GETCHARINCTEST(fc, Feptr);
3621             prop = GET_UCD(fc);
3622             ok = (prop->script == Lpropvalue
3623                   || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
3624             if (ok == (Lctype == OP_NOTPROP))
3625               RRETURN(MATCH_NOMATCH);
3626             }
3627           /* Control never gets here */
3628 
3629           case PT_ALNUM:
3630           for (;;)
3631             {
3632             int category;
3633             RMATCH(Fecode, RM213);
3634             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3635             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3636             if (Feptr >= mb->end_subject)
3637               {
3638               SCHECK_PARTIAL();
3639               RRETURN(MATCH_NOMATCH);
3640               }
3641             GETCHARINCTEST(fc, Feptr);
3642             category = UCD_CATEGORY(fc);
3643             if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP))
3644               RRETURN(MATCH_NOMATCH);
3645             }
3646           /* Control never gets here */
3647 
3648           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3649           which means that Perl space and POSIX space are now identical. PCRE
3650           was changed at release 8.34. */
3651 
3652           case PT_SPACE:    /* Perl space */
3653           case PT_PXSPACE:  /* POSIX space */
3654           for (;;)
3655             {
3656             RMATCH(Fecode, RM214);
3657             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3658             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3659             if (Feptr >= mb->end_subject)
3660               {
3661               SCHECK_PARTIAL();
3662               RRETURN(MATCH_NOMATCH);
3663               }
3664             GETCHARINCTEST(fc, Feptr);
3665             switch(fc)
3666               {
3667               HSPACE_CASES:
3668               VSPACE_CASES:
3669               if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3670               break;
3671 
3672               default:
3673               if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
3674                 RRETURN(MATCH_NOMATCH);
3675               break;
3676               }
3677             }
3678           /* Control never gets here */
3679 
3680           case PT_WORD:
3681           for (;;)
3682             {
3683             int chartype, category;
3684             RMATCH(Fecode, RM215);
3685             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3686             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3687             if (Feptr >= mb->end_subject)
3688               {
3689               SCHECK_PARTIAL();
3690               RRETURN(MATCH_NOMATCH);
3691               }
3692             GETCHARINCTEST(fc, Feptr);
3693             chartype = UCD_CHARTYPE(fc);
3694             category = PRIV(ucp_gentype)[chartype];
3695             if ((category == ucp_L ||
3696                  category == ucp_N ||
3697                  chartype == ucp_Mn ||
3698                  chartype == ucp_Pc) == (Lctype == OP_NOTPROP))
3699               RRETURN(MATCH_NOMATCH);
3700             }
3701           /* Control never gets here */
3702 
3703           case PT_CLIST:
3704           for (;;)
3705             {
3706             const uint32_t *cp;
3707             RMATCH(Fecode, RM216);
3708             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3709             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3710             if (Feptr >= mb->end_subject)
3711               {
3712               SCHECK_PARTIAL();
3713               RRETURN(MATCH_NOMATCH);
3714               }
3715             GETCHARINCTEST(fc, Feptr);
3716 #if PCRE2_CODE_UNIT_WIDTH == 32
3717             if (fc > MAX_UTF_CODE_POINT)
3718               {
3719               if (Lctype == OP_NOTPROP) continue;
3720               RRETURN(MATCH_NOMATCH);
3721               }
3722 #endif
3723             cp = PRIV(ucd_caseless_sets) + Lpropvalue;
3724             for (;;)
3725               {
3726               if (fc < *cp)
3727                 {
3728                 if (Lctype == OP_NOTPROP) break;
3729                 RRETURN(MATCH_NOMATCH);
3730                 }
3731               if (fc == *cp++)
3732                 {
3733                 if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3734                 break;
3735                 }
3736               }
3737             }
3738           /* Control never gets here */
3739 
3740           case PT_UCNC:
3741           for (;;)
3742             {
3743             RMATCH(Fecode, RM217);
3744             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3745             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3746             if (Feptr >= mb->end_subject)
3747               {
3748               SCHECK_PARTIAL();
3749               RRETURN(MATCH_NOMATCH);
3750               }
3751             GETCHARINCTEST(fc, Feptr);
3752             if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
3753                  fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
3754                  fc >= 0xe000) == (Lctype == OP_NOTPROP))
3755               RRETURN(MATCH_NOMATCH);
3756             }
3757           /* Control never gets here */
3758 
3759           case PT_BIDICL:
3760           for (;;)
3761             {
3762             RMATCH(Fecode, RM224);
3763             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3764             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3765             if (Feptr >= mb->end_subject)
3766               {
3767               SCHECK_PARTIAL();
3768               RRETURN(MATCH_NOMATCH);
3769               }
3770             GETCHARINCTEST(fc, Feptr);
3771             if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3772               RRETURN(MATCH_NOMATCH);
3773             }
3774           /* Control never gets here */
3775 
3776           case PT_BOOL:
3777           for (;;)
3778             {
3779             BOOL ok;
3780             const ucd_record *prop;
3781             RMATCH(Fecode, RM223);
3782             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3783             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3784             if (Feptr >= mb->end_subject)
3785               {
3786               SCHECK_PARTIAL();
3787               RRETURN(MATCH_NOMATCH);
3788               }
3789             GETCHARINCTEST(fc, Feptr);
3790             prop = GET_UCD(fc);
3791             ok = MAPBIT(PRIV(ucd_boolprop_sets) +
3792               UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
3793             if (ok == (Lctype == OP_NOTPROP))
3794               RRETURN(MATCH_NOMATCH);
3795             }
3796           /* Control never gets here */
3797 
3798           /* This should never occur */
3799           default:
3800           return PCRE2_ERROR_INTERNAL;
3801           }
3802         }
3803 
3804       /* Match extended Unicode sequences. We will get here only if the
3805       support is in the binary; otherwise a compile-time error occurs. */
3806 
3807       else if (Lctype == OP_EXTUNI)
3808         {
3809         for (;;)
3810           {
3811           RMATCH(Fecode, RM218);
3812           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3813           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3814           if (Feptr >= mb->end_subject)
3815             {
3816             SCHECK_PARTIAL();
3817             RRETURN(MATCH_NOMATCH);
3818             }
3819           else
3820             {
3821             GETCHARINCTEST(fc, Feptr);
3822             Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
3823               utf, NULL);
3824             }
3825           CHECK_PARTIAL();
3826           }
3827         }
3828       else
3829 #endif     /* SUPPORT_UNICODE */
3830 
3831       /* UTF mode for non-property testing character types. */
3832 
3833 #ifdef SUPPORT_UNICODE
3834       if (utf)
3835         {
3836         for (;;)
3837           {
3838           RMATCH(Fecode, RM219);
3839           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3840           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3841           if (Feptr >= mb->end_subject)
3842             {
3843             SCHECK_PARTIAL();
3844             RRETURN(MATCH_NOMATCH);
3845             }
3846           if (Lctype == OP_ANY && IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3847           GETCHARINC(fc, Feptr);
3848           switch(Lctype)
3849             {
3850             case OP_ANY:               /* This is the non-NL case */
3851             if (mb->partial != 0 &&    /* Take care with CRLF partial */
3852                 Feptr >= mb->end_subject &&
3853                 NLBLOCK->nltype == NLTYPE_FIXED &&
3854                 NLBLOCK->nllen == 2 &&
3855                 fc == NLBLOCK->nl[0])
3856               {
3857               mb->hitend = TRUE;
3858               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3859               }
3860             break;
3861 
3862             case OP_ALLANY:
3863             case OP_ANYBYTE:
3864             break;
3865 
3866             case OP_ANYNL:
3867             switch(fc)
3868               {
3869               default: RRETURN(MATCH_NOMATCH);
3870 
3871               case CHAR_CR:
3872               if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
3873               break;
3874 
3875               case CHAR_LF:
3876               break;
3877 
3878               case CHAR_VT:
3879               case CHAR_FF:
3880               case CHAR_NEL:
3881 #ifndef EBCDIC
3882               case 0x2028:
3883               case 0x2029:
3884 #endif  /* Not EBCDIC */
3885               if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
3886                 RRETURN(MATCH_NOMATCH);
3887               break;
3888               }
3889             break;
3890 
3891             case OP_NOT_HSPACE:
3892             switch(fc)
3893               {
3894               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
3895               default: break;
3896               }
3897             break;
3898 
3899             case OP_HSPACE:
3900             switch(fc)
3901               {
3902               HSPACE_CASES: break;
3903               default: RRETURN(MATCH_NOMATCH);
3904               }
3905             break;
3906 
3907             case OP_NOT_VSPACE:
3908             switch(fc)
3909               {
3910               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
3911               default: break;
3912               }
3913             break;
3914 
3915             case OP_VSPACE:
3916             switch(fc)
3917               {
3918               VSPACE_CASES: break;
3919               default: RRETURN(MATCH_NOMATCH);
3920               }
3921             break;
3922 
3923             case OP_NOT_DIGIT:
3924             if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0)
3925               RRETURN(MATCH_NOMATCH);
3926             break;
3927 
3928             case OP_DIGIT:
3929             if (fc >= 256 || (mb->ctypes[fc] & ctype_digit) == 0)
3930               RRETURN(MATCH_NOMATCH);
3931             break;
3932 
3933             case OP_NOT_WHITESPACE:
3934             if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0)
3935               RRETURN(MATCH_NOMATCH);
3936             break;
3937 
3938             case OP_WHITESPACE:
3939             if (fc >= 256 || (mb->ctypes[fc] & ctype_space) == 0)
3940               RRETURN(MATCH_NOMATCH);
3941             break;
3942 
3943             case OP_NOT_WORDCHAR:
3944             if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0)
3945               RRETURN(MATCH_NOMATCH);
3946             break;
3947 
3948             case OP_WORDCHAR:
3949             if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0)
3950               RRETURN(MATCH_NOMATCH);
3951             break;
3952 
3953             default:
3954             return PCRE2_ERROR_INTERNAL;
3955             }
3956           }
3957         }
3958       else
3959 #endif  /* SUPPORT_UNICODE */
3960 
3961       /* Not UTF mode */
3962         {
3963         for (;;)
3964           {
3965           RMATCH(Fecode, RM33);
3966           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3967           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3968           if (Feptr >= mb->end_subject)
3969             {
3970             SCHECK_PARTIAL();
3971             RRETURN(MATCH_NOMATCH);
3972             }
3973           if (Lctype == OP_ANY && IS_NEWLINE(Feptr))
3974             RRETURN(MATCH_NOMATCH);
3975           fc = *Feptr++;
3976           switch(Lctype)
3977             {
3978             case OP_ANY:               /* This is the non-NL case */
3979             if (mb->partial != 0 &&    /* Take care with CRLF partial */
3980                 Feptr >= mb->end_subject &&
3981                 NLBLOCK->nltype == NLTYPE_FIXED &&
3982                 NLBLOCK->nllen == 2 &&
3983                 fc == NLBLOCK->nl[0])
3984               {
3985               mb->hitend = TRUE;
3986               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3987               }
3988             break;
3989 
3990             case OP_ALLANY:
3991             case OP_ANYBYTE:
3992             break;
3993 
3994             case OP_ANYNL:
3995             switch(fc)
3996               {
3997               default: RRETURN(MATCH_NOMATCH);
3998 
3999               case CHAR_CR:
4000               if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
4001               break;
4002 
4003               case CHAR_LF:
4004               break;
4005 
4006               case CHAR_VT:
4007               case CHAR_FF:
4008               case CHAR_NEL:
4009 #if PCRE2_CODE_UNIT_WIDTH != 8
4010               case 0x2028:
4011               case 0x2029:
4012 #endif
4013               if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
4014                 RRETURN(MATCH_NOMATCH);
4015               break;
4016               }
4017             break;
4018 
4019             case OP_NOT_HSPACE:
4020             switch(fc)
4021               {
4022               default: break;
4023               HSPACE_BYTE_CASES:
4024 #if PCRE2_CODE_UNIT_WIDTH != 8
4025               HSPACE_MULTIBYTE_CASES:
4026 #endif
4027               RRETURN(MATCH_NOMATCH);
4028               }
4029             break;
4030 
4031             case OP_HSPACE:
4032             switch(fc)
4033               {
4034               default: RRETURN(MATCH_NOMATCH);
4035               HSPACE_BYTE_CASES:
4036 #if PCRE2_CODE_UNIT_WIDTH != 8
4037               HSPACE_MULTIBYTE_CASES:
4038 #endif
4039               break;
4040               }
4041             break;
4042 
4043             case OP_NOT_VSPACE:
4044             switch(fc)
4045               {
4046               default: break;
4047               VSPACE_BYTE_CASES:
4048 #if PCRE2_CODE_UNIT_WIDTH != 8
4049               VSPACE_MULTIBYTE_CASES:
4050 #endif
4051               RRETURN(MATCH_NOMATCH);
4052               }
4053             break;
4054 
4055             case OP_VSPACE:
4056             switch(fc)
4057               {
4058               default: RRETURN(MATCH_NOMATCH);
4059               VSPACE_BYTE_CASES:
4060 #if PCRE2_CODE_UNIT_WIDTH != 8
4061               VSPACE_MULTIBYTE_CASES:
4062 #endif
4063               break;
4064               }
4065             break;
4066 
4067             case OP_NOT_DIGIT:
4068             if (MAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
4069               RRETURN(MATCH_NOMATCH);
4070             break;
4071 
4072             case OP_DIGIT:
4073             if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
4074               RRETURN(MATCH_NOMATCH);
4075             break;
4076 
4077             case OP_NOT_WHITESPACE:
4078             if (MAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
4079               RRETURN(MATCH_NOMATCH);
4080             break;
4081 
4082             case OP_WHITESPACE:
4083             if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
4084               RRETURN(MATCH_NOMATCH);
4085             break;
4086 
4087             case OP_NOT_WORDCHAR:
4088             if (MAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
4089               RRETURN(MATCH_NOMATCH);
4090             break;
4091 
4092             case OP_WORDCHAR:
4093             if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
4094               RRETURN(MATCH_NOMATCH);
4095             break;
4096 
4097             default:
4098             return PCRE2_ERROR_INTERNAL;
4099             }
4100           }
4101         }
4102       /* Control never gets here */
4103       }
4104 
4105     /* If maximizing, it is worth using inline code for speed, doing the type
4106     test once at the start (i.e. keep it out of the loops). Once again,
4107     "notmatch" can be an ordinary local variable because the loops do not call
4108     RMATCH. */
4109 
4110     else
4111       {
4112       Lstart_eptr = Feptr;  /* Remember where we started */
4113 
4114 #ifdef SUPPORT_UNICODE
4115       if (proptype >= 0)
4116         {
4117         BOOL notmatch = Lctype == OP_NOTPROP;
4118         switch(proptype)
4119           {
4120           case PT_ANY:
4121           for (i = Lmin; i < Lmax; i++)
4122             {
4123             int len = 1;
4124             if (Feptr >= mb->end_subject)
4125               {
4126               SCHECK_PARTIAL();
4127               break;
4128               }
4129             GETCHARLENTEST(fc, Feptr, len);
4130             if (notmatch) break;
4131             Feptr+= len;
4132             }
4133           break;
4134 
4135           case PT_LAMP:
4136           for (i = Lmin; i < Lmax; i++)
4137             {
4138             int chartype;
4139             int len = 1;
4140             if (Feptr >= mb->end_subject)
4141               {
4142               SCHECK_PARTIAL();
4143               break;
4144               }
4145             GETCHARLENTEST(fc, Feptr, len);
4146             chartype = UCD_CHARTYPE(fc);
4147             if ((chartype == ucp_Lu ||
4148                  chartype == ucp_Ll ||
4149                  chartype == ucp_Lt) == notmatch)
4150               break;
4151             Feptr+= len;
4152             }
4153           break;
4154 
4155           case PT_GC:
4156           for (i = Lmin; i < Lmax; i++)
4157             {
4158             int len = 1;
4159             if (Feptr >= mb->end_subject)
4160               {
4161               SCHECK_PARTIAL();
4162               break;
4163               }
4164             GETCHARLENTEST(fc, Feptr, len);
4165             if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch) break;
4166             Feptr+= len;
4167             }
4168           break;
4169 
4170           case PT_PC:
4171           for (i = Lmin; i < Lmax; i++)
4172             {
4173             int len = 1;
4174             if (Feptr >= mb->end_subject)
4175               {
4176               SCHECK_PARTIAL();
4177               break;
4178               }
4179             GETCHARLENTEST(fc, Feptr, len);
4180             if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch) break;
4181             Feptr+= len;
4182             }
4183           break;
4184 
4185           case PT_SC:
4186           for (i = Lmin; i < Lmax; i++)
4187             {
4188             int len = 1;
4189             if (Feptr >= mb->end_subject)
4190               {
4191               SCHECK_PARTIAL();
4192               break;
4193               }
4194             GETCHARLENTEST(fc, Feptr, len);
4195             if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch) break;
4196             Feptr+= len;
4197             }
4198           break;
4199 
4200           case PT_SCX:
4201           for (i = Lmin; i < Lmax; i++)
4202             {
4203             BOOL ok;
4204             const ucd_record *prop;
4205             int len = 1;
4206             if (Feptr >= mb->end_subject)
4207               {
4208               SCHECK_PARTIAL();
4209               break;
4210               }
4211             GETCHARLENTEST(fc, Feptr, len);
4212             prop = GET_UCD(fc);
4213             ok = (prop->script == Lpropvalue ||
4214                   MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
4215             if (ok == notmatch) break;
4216             Feptr+= len;
4217             }
4218           break;
4219 
4220           case PT_ALNUM:
4221           for (i = Lmin; i < Lmax; i++)
4222             {
4223             int category;
4224             int len = 1;
4225             if (Feptr >= mb->end_subject)
4226               {
4227               SCHECK_PARTIAL();
4228               break;
4229               }
4230             GETCHARLENTEST(fc, Feptr, len);
4231             category = UCD_CATEGORY(fc);
4232             if ((category == ucp_L || category == ucp_N) == notmatch)
4233               break;
4234             Feptr+= len;
4235             }
4236           break;
4237 
4238           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4239           which means that Perl space and POSIX space are now identical. PCRE
4240           was changed at release 8.34. */
4241 
4242           case PT_SPACE:    /* Perl space */
4243           case PT_PXSPACE:  /* POSIX space */
4244           for (i = Lmin; i < Lmax; i++)
4245             {
4246             int len = 1;
4247             if (Feptr >= mb->end_subject)
4248               {
4249               SCHECK_PARTIAL();
4250               break;
4251               }
4252             GETCHARLENTEST(fc, Feptr, len);
4253             switch(fc)
4254               {
4255               HSPACE_CASES:
4256               VSPACE_CASES:
4257               if (notmatch) goto ENDLOOP99;  /* Break the loop */
4258               break;
4259 
4260               default:
4261               if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
4262                 goto ENDLOOP99;   /* Break the loop */
4263               break;
4264               }
4265             Feptr+= len;
4266             }
4267           ENDLOOP99:
4268           break;
4269 
4270           case PT_WORD:
4271           for (i = Lmin; i < Lmax; i++)
4272             {
4273             int chartype, category;
4274             int len = 1;
4275             if (Feptr >= mb->end_subject)
4276               {
4277               SCHECK_PARTIAL();
4278               break;
4279               }
4280             GETCHARLENTEST(fc, Feptr, len);
4281             chartype = UCD_CHARTYPE(fc);
4282             category = PRIV(ucp_gentype)[chartype];
4283             if ((category == ucp_L ||
4284                  category == ucp_N ||
4285                  chartype == ucp_Mn ||
4286                  chartype == ucp_Pc) == notmatch)
4287               break;
4288             Feptr+= len;
4289             }
4290           break;
4291 
4292           case PT_CLIST:
4293           for (i = Lmin; i < Lmax; i++)
4294             {
4295             const uint32_t *cp;
4296             int len = 1;
4297             if (Feptr >= mb->end_subject)
4298               {
4299               SCHECK_PARTIAL();
4300               break;
4301               }
4302             GETCHARLENTEST(fc, Feptr, len);
4303 #if PCRE2_CODE_UNIT_WIDTH == 32
4304             if (fc > MAX_UTF_CODE_POINT)
4305               {
4306               if (!notmatch) goto GOT_MAX;
4307               }
4308             else
4309 #endif
4310               {
4311               cp = PRIV(ucd_caseless_sets) + Lpropvalue;
4312               for (;;)
4313                 {
4314                 if (fc < *cp)
4315                   { if (notmatch) break; else goto GOT_MAX; }
4316                 if (fc == *cp++)
4317                   { if (notmatch) goto GOT_MAX; else break; }
4318                 }
4319               }
4320 
4321             Feptr += len;
4322             }
4323           GOT_MAX:
4324           break;
4325 
4326           case PT_UCNC:
4327           for (i = Lmin; i < Lmax; i++)
4328             {
4329             int len = 1;
4330             if (Feptr >= mb->end_subject)
4331               {
4332               SCHECK_PARTIAL();
4333               break;
4334               }
4335             GETCHARLENTEST(fc, Feptr, len);
4336             if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
4337                  fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
4338                  fc >= 0xe000) == notmatch)
4339               break;
4340             Feptr += len;
4341             }
4342           break;
4343 
4344           case PT_BIDICL:
4345           for (i = Lmin; i < Lmax; i++)
4346             {
4347             int len = 1;
4348             if (Feptr >= mb->end_subject)
4349               {
4350               SCHECK_PARTIAL();
4351               break;
4352               }
4353             GETCHARLENTEST(fc, Feptr, len);
4354             if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch) break;
4355             Feptr+= len;
4356             }
4357           break;
4358 
4359           case PT_BOOL:
4360           for (i = Lmin; i < Lmax; i++)
4361             {
4362             BOOL ok;
4363             const ucd_record *prop;
4364             int len = 1;
4365             if (Feptr >= mb->end_subject)
4366               {
4367               SCHECK_PARTIAL();
4368               break;
4369               }
4370             GETCHARLENTEST(fc, Feptr, len);
4371             prop = GET_UCD(fc);
4372             ok = MAPBIT(PRIV(ucd_boolprop_sets) +
4373               UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
4374             if (ok == notmatch) break;
4375             Feptr+= len;
4376             }
4377           break;
4378 
4379           default:
4380           return PCRE2_ERROR_INTERNAL;
4381           }
4382 
4383         /* Feptr is now past the end of the maximum run */
4384 
4385         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4386 
4387         /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4388         Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
4389         go too far. */
4390 
4391         for(;;)
4392           {
4393           if (Feptr <= Lstart_eptr) break;
4394           RMATCH(Fecode, RM222);
4395           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4396           Feptr--;
4397           if (utf) BACKCHAR(Feptr);
4398           }
4399         }
4400 
4401       /* Match extended Unicode grapheme clusters. We will get here only if the
4402       support is in the binary; otherwise a compile-time error occurs. */
4403 
4404       else if (Lctype == OP_EXTUNI)
4405         {
4406         for (i = Lmin; i < Lmax; i++)
4407           {
4408           if (Feptr >= mb->end_subject)
4409             {
4410             SCHECK_PARTIAL();
4411             break;
4412             }
4413           else
4414             {
4415             GETCHARINCTEST(fc, Feptr);
4416             Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
4417               utf, NULL);
4418             }
4419           CHECK_PARTIAL();
4420           }
4421 
4422         /* Feptr is now past the end of the maximum run */
4423 
4424         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4425 
4426         /* We use <= Lstart_eptr rather than == Lstart_eptr to detect the start
4427         of the run while backtracking because the use of \C in UTF mode can
4428         cause BACKCHAR to move back past Lstart_eptr. This is just palliative;
4429         the use of \C in UTF mode is fraught with danger. */
4430 
4431         for(;;)
4432           {
4433           int lgb, rgb;
4434           PCRE2_SPTR fptr;
4435 
4436           if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4437           RMATCH(Fecode, RM220);
4438           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4439 
4440           /* Backtracking over an extended grapheme cluster involves inspecting
4441           the previous two characters (if present) to see if a break is
4442           permitted between them. */
4443 
4444           Feptr--;
4445           if (!utf) fc = *Feptr; else
4446             {
4447             BACKCHAR(Feptr);
4448             GETCHAR(fc, Feptr);
4449             }
4450           rgb = UCD_GRAPHBREAK(fc);
4451 
4452           for (;;)
4453             {
4454             if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4455             fptr = Feptr - 1;
4456             if (!utf) fc = *fptr; else
4457               {
4458               BACKCHAR(fptr);
4459               GETCHAR(fc, fptr);
4460               }
4461             lgb = UCD_GRAPHBREAK(fc);
4462             if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
4463             Feptr = fptr;
4464             rgb = lgb;
4465             }
4466           }
4467         }
4468 
4469       else
4470 #endif   /* SUPPORT_UNICODE */
4471 
4472 #ifdef SUPPORT_UNICODE
4473       if (utf)
4474         {
4475         switch(Lctype)
4476           {
4477           case OP_ANY:
4478           for (i = Lmin; i < Lmax; i++)
4479             {
4480             if (Feptr >= mb->end_subject)
4481               {
4482               SCHECK_PARTIAL();
4483               break;
4484               }
4485             if (IS_NEWLINE(Feptr)) break;
4486             if (mb->partial != 0 &&    /* Take care with CRLF partial */
4487                 Feptr + 1 >= mb->end_subject &&
4488                 NLBLOCK->nltype == NLTYPE_FIXED &&
4489                 NLBLOCK->nllen == 2 &&
4490                 UCHAR21(Feptr) == NLBLOCK->nl[0])
4491               {
4492               mb->hitend = TRUE;
4493               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4494               }
4495             Feptr++;
4496             ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4497             }
4498           break;
4499 
4500           case OP_ALLANY:
4501           if (Lmax < UINT32_MAX)
4502             {
4503             for (i = Lmin; i < Lmax; i++)
4504               {
4505               if (Feptr >= mb->end_subject)
4506                 {
4507                 SCHECK_PARTIAL();
4508                 break;
4509                 }
4510               Feptr++;
4511               ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4512               }
4513             }
4514           else
4515             {
4516             Feptr = mb->end_subject;   /* Unlimited UTF-8 repeat */
4517             SCHECK_PARTIAL();
4518             }
4519           break;
4520 
4521           /* The "byte" (i.e. "code unit") case is the same as non-UTF */
4522 
4523           case OP_ANYBYTE:
4524           fc = Lmax - Lmin;
4525           if (fc > (uint32_t)(mb->end_subject - Feptr))
4526             {
4527             Feptr = mb->end_subject;
4528             SCHECK_PARTIAL();
4529             }
4530           else Feptr += fc;
4531           break;
4532 
4533           case OP_ANYNL:
4534           for (i = Lmin; i < Lmax; i++)
4535             {
4536             int len = 1;
4537             if (Feptr >= mb->end_subject)
4538               {
4539               SCHECK_PARTIAL();
4540               break;
4541               }
4542             GETCHARLEN(fc, Feptr, len);
4543             if (fc == CHAR_CR)
4544               {
4545               if (++Feptr >= mb->end_subject) break;
4546               if (UCHAR21(Feptr) == CHAR_LF) Feptr++;
4547               }
4548             else
4549               {
4550               if (fc != CHAR_LF &&
4551                   (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4552                    (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4553 #ifndef EBCDIC
4554                     && fc != 0x2028 && fc != 0x2029
4555 #endif  /* Not EBCDIC */
4556                     )))
4557                 break;
4558               Feptr += len;
4559               }
4560             }
4561           break;
4562 
4563           case OP_NOT_HSPACE:
4564           case OP_HSPACE:
4565           for (i = Lmin; i < Lmax; i++)
4566             {
4567             BOOL gotspace;
4568             int len = 1;
4569             if (Feptr >= mb->end_subject)
4570               {
4571               SCHECK_PARTIAL();
4572               break;
4573               }
4574             GETCHARLEN(fc, Feptr, len);
4575             switch(fc)
4576               {
4577               HSPACE_CASES: gotspace = TRUE; break;
4578               default: gotspace = FALSE; break;
4579               }
4580             if (gotspace == (Lctype == OP_NOT_HSPACE)) break;
4581             Feptr += len;
4582             }
4583           break;
4584 
4585           case OP_NOT_VSPACE:
4586           case OP_VSPACE:
4587           for (i = Lmin; i < Lmax; i++)
4588             {
4589             BOOL gotspace;
4590             int len = 1;
4591             if (Feptr >= mb->end_subject)
4592               {
4593               SCHECK_PARTIAL();
4594               break;
4595               }
4596             GETCHARLEN(fc, Feptr, len);
4597             switch(fc)
4598               {
4599               VSPACE_CASES: gotspace = TRUE; break;
4600               default: gotspace = FALSE; break;
4601               }
4602             if (gotspace == (Lctype == OP_NOT_VSPACE)) break;
4603             Feptr += len;
4604             }
4605           break;
4606 
4607           case OP_NOT_DIGIT:
4608           for (i = Lmin; i < Lmax; i++)
4609             {
4610             int len = 1;
4611             if (Feptr >= mb->end_subject)
4612               {
4613               SCHECK_PARTIAL();
4614               break;
4615               }
4616             GETCHARLEN(fc, Feptr, len);
4617             if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0) break;
4618             Feptr+= len;
4619             }
4620           break;
4621 
4622           case OP_DIGIT:
4623           for (i = Lmin; i < Lmax; i++)
4624             {
4625             int len = 1;
4626             if (Feptr >= mb->end_subject)
4627               {
4628               SCHECK_PARTIAL();
4629               break;
4630               }
4631             GETCHARLEN(fc, Feptr, len);
4632             if (fc >= 256 ||(mb->ctypes[fc] & ctype_digit) == 0) break;
4633             Feptr+= len;
4634             }
4635           break;
4636 
4637           case OP_NOT_WHITESPACE:
4638           for (i = Lmin; i < Lmax; i++)
4639             {
4640             int len = 1;
4641             if (Feptr >= mb->end_subject)
4642               {
4643               SCHECK_PARTIAL();
4644               break;
4645               }
4646             GETCHARLEN(fc, Feptr, len);
4647             if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0) break;
4648             Feptr+= len;
4649             }
4650           break;
4651 
4652           case OP_WHITESPACE:
4653           for (i = Lmin; i < Lmax; i++)
4654             {
4655             int len = 1;
4656             if (Feptr >= mb->end_subject)
4657               {
4658               SCHECK_PARTIAL();
4659               break;
4660               }
4661             GETCHARLEN(fc, Feptr, len);
4662             if (fc >= 256 ||(mb->ctypes[fc] & ctype_space) == 0) break;
4663             Feptr+= len;
4664             }
4665           break;
4666 
4667           case OP_NOT_WORDCHAR:
4668           for (i = Lmin; i < Lmax; i++)
4669             {
4670             int len = 1;
4671             if (Feptr >= mb->end_subject)
4672               {
4673               SCHECK_PARTIAL();
4674               break;
4675               }
4676             GETCHARLEN(fc, Feptr, len);
4677             if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0) break;
4678             Feptr+= len;
4679             }
4680           break;
4681 
4682           case OP_WORDCHAR:
4683           for (i = Lmin; i < Lmax; i++)
4684             {
4685             int len = 1;
4686             if (Feptr >= mb->end_subject)
4687               {
4688               SCHECK_PARTIAL();
4689               break;
4690               }
4691             GETCHARLEN(fc, Feptr, len);
4692             if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0) break;
4693             Feptr+= len;
4694             }
4695           break;
4696 
4697           default:
4698           return PCRE2_ERROR_INTERNAL;
4699           }
4700 
4701         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4702 
4703         /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4704         Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't go
4705         too far. */
4706 
4707         for(;;)
4708           {
4709           if (Feptr <= Lstart_eptr) break;
4710           RMATCH(Fecode, RM221);
4711           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4712           Feptr--;
4713           BACKCHAR(Feptr);
4714           if (Lctype == OP_ANYNL && Feptr > Lstart_eptr &&
4715               UCHAR21(Feptr) == CHAR_NL && UCHAR21(Feptr - 1) == CHAR_CR)
4716             Feptr--;
4717           }
4718         }
4719       else
4720 #endif  /* SUPPORT_UNICODE */
4721 
4722       /* Not UTF mode */
4723         {
4724         switch(Lctype)
4725           {
4726           case OP_ANY:
4727           for (i = Lmin; i < Lmax; i++)
4728             {
4729             if (Feptr >= mb->end_subject)
4730               {
4731               SCHECK_PARTIAL();
4732               break;
4733               }
4734             if (IS_NEWLINE(Feptr)) break;
4735             if (mb->partial != 0 &&    /* Take care with CRLF partial */
4736                 Feptr + 1 >= mb->end_subject &&
4737                 NLBLOCK->nltype == NLTYPE_FIXED &&
4738                 NLBLOCK->nllen == 2 &&
4739                 *Feptr == NLBLOCK->nl[0])
4740               {
4741               mb->hitend = TRUE;
4742               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4743               }
4744             Feptr++;
4745             }
4746           break;
4747 
4748           case OP_ALLANY:
4749           case OP_ANYBYTE:
4750           fc = Lmax - Lmin;
4751           if (fc > (uint32_t)(mb->end_subject - Feptr))
4752             {
4753             Feptr = mb->end_subject;
4754             SCHECK_PARTIAL();
4755             }
4756           else Feptr += fc;
4757           break;
4758 
4759           case OP_ANYNL:
4760           for (i = Lmin; i < Lmax; i++)
4761             {
4762             if (Feptr >= mb->end_subject)
4763               {
4764               SCHECK_PARTIAL();
4765               break;
4766               }
4767             fc = *Feptr;
4768             if (fc == CHAR_CR)
4769               {
4770               if (++Feptr >= mb->end_subject) break;
4771               if (*Feptr == CHAR_LF) Feptr++;
4772               }
4773             else
4774               {
4775               if (fc != CHAR_LF && (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4776                  (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4777 #if PCRE2_CODE_UNIT_WIDTH != 8
4778                  && fc != 0x2028 && fc != 0x2029
4779 #endif
4780                  ))) break;
4781               Feptr++;
4782               }
4783             }
4784           break;
4785 
4786           case OP_NOT_HSPACE:
4787           for (i = Lmin; i < Lmax; i++)
4788             {
4789             if (Feptr >= mb->end_subject)
4790               {
4791               SCHECK_PARTIAL();
4792               break;
4793               }
4794             switch(*Feptr)
4795               {
4796               default: Feptr++; break;
4797               HSPACE_BYTE_CASES:
4798 #if PCRE2_CODE_UNIT_WIDTH != 8
4799               HSPACE_MULTIBYTE_CASES:
4800 #endif
4801               goto ENDLOOP00;
4802               }
4803             }
4804           ENDLOOP00:
4805           break;
4806 
4807           case OP_HSPACE:
4808           for (i = Lmin; i < Lmax; i++)
4809             {
4810             if (Feptr >= mb->end_subject)
4811               {
4812               SCHECK_PARTIAL();
4813               break;
4814               }
4815             switch(*Feptr)
4816               {
4817               default: goto ENDLOOP01;
4818               HSPACE_BYTE_CASES:
4819 #if PCRE2_CODE_UNIT_WIDTH != 8
4820               HSPACE_MULTIBYTE_CASES:
4821 #endif
4822               Feptr++; break;
4823               }
4824             }
4825           ENDLOOP01:
4826           break;
4827 
4828           case OP_NOT_VSPACE:
4829           for (i = Lmin; i < Lmax; i++)
4830             {
4831             if (Feptr >= mb->end_subject)
4832               {
4833               SCHECK_PARTIAL();
4834               break;
4835               }
4836             switch(*Feptr)
4837               {
4838               default: Feptr++; break;
4839               VSPACE_BYTE_CASES:
4840 #if PCRE2_CODE_UNIT_WIDTH != 8
4841               VSPACE_MULTIBYTE_CASES:
4842 #endif
4843               goto ENDLOOP02;
4844               }
4845             }
4846           ENDLOOP02:
4847           break;
4848 
4849           case OP_VSPACE:
4850           for (i = Lmin; i < Lmax; i++)
4851             {
4852             if (Feptr >= mb->end_subject)
4853               {
4854               SCHECK_PARTIAL();
4855               break;
4856               }
4857             switch(*Feptr)
4858               {
4859               default: goto ENDLOOP03;
4860               VSPACE_BYTE_CASES:
4861 #if PCRE2_CODE_UNIT_WIDTH != 8
4862               VSPACE_MULTIBYTE_CASES:
4863 #endif
4864               Feptr++; break;
4865               }
4866             }
4867           ENDLOOP03:
4868           break;
4869 
4870           case OP_NOT_DIGIT:
4871           for (i = Lmin; i < Lmax; i++)
4872             {
4873             if (Feptr >= mb->end_subject)
4874               {
4875               SCHECK_PARTIAL();
4876               break;
4877               }
4878             if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
4879               break;
4880             Feptr++;
4881             }
4882           break;
4883 
4884           case OP_DIGIT:
4885           for (i = Lmin; i < Lmax; i++)
4886             {
4887             if (Feptr >= mb->end_subject)
4888               {
4889               SCHECK_PARTIAL();
4890               break;
4891               }
4892             if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
4893               break;
4894             Feptr++;
4895             }
4896           break;
4897 
4898           case OP_NOT_WHITESPACE:
4899           for (i = Lmin; i < Lmax; i++)
4900             {
4901             if (Feptr >= mb->end_subject)
4902               {
4903               SCHECK_PARTIAL();
4904               break;
4905               }
4906             if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
4907               break;
4908             Feptr++;
4909             }
4910           break;
4911 
4912           case OP_WHITESPACE:
4913           for (i = Lmin; i < Lmax; i++)
4914             {
4915             if (Feptr >= mb->end_subject)
4916               {
4917               SCHECK_PARTIAL();
4918               break;
4919               }
4920             if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
4921               break;
4922             Feptr++;
4923             }
4924           break;
4925 
4926           case OP_NOT_WORDCHAR:
4927           for (i = Lmin; i < Lmax; i++)
4928             {
4929             if (Feptr >= mb->end_subject)
4930               {
4931               SCHECK_PARTIAL();
4932               break;
4933               }
4934             if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
4935               break;
4936             Feptr++;
4937             }
4938           break;
4939 
4940           case OP_WORDCHAR:
4941           for (i = Lmin; i < Lmax; i++)
4942             {
4943             if (Feptr >= mb->end_subject)
4944               {
4945               SCHECK_PARTIAL();
4946               break;
4947               }
4948             if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
4949               break;
4950             Feptr++;
4951             }
4952           break;
4953 
4954           default:
4955           return PCRE2_ERROR_INTERNAL;
4956           }
4957 
4958         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4959 
4960         for (;;)
4961           {
4962           if (Feptr == Lstart_eptr) break;
4963           RMATCH(Fecode, RM34);
4964           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4965           Feptr--;
4966           if (Lctype == OP_ANYNL && Feptr > Lstart_eptr && *Feptr == CHAR_LF &&
4967               Feptr[-1] == CHAR_CR) Feptr--;
4968           }
4969         }
4970       }
4971     break;  /* End of repeat character type processing */
4972 
4973 #undef Lstart_eptr
4974 #undef Lmin
4975 #undef Lmax
4976 #undef Lctype
4977 #undef Lpropvalue
4978 
4979 
4980     /* ===================================================================== */
4981     /* Match a back reference, possibly repeatedly. Look past the end of the
4982     item to see if there is repeat information following. The OP_REF and
4983     OP_REFI opcodes are used for a reference to a numbered group or to a
4984     non-duplicated named group. For a duplicated named group, OP_DNREF and
4985     OP_DNREFI are used. In this case we must scan the list of groups to which
4986     the name refers, and use the first one that is set. */
4987 
4988 #define Lmin      F->temp_32[0]
4989 #define Lmax      F->temp_32[1]
4990 #define Lcaseless F->temp_32[2]
4991 #define Lstart    F->temp_sptr[0]
4992 #define Loffset   F->temp_size
4993 
4994     case OP_DNREF:
4995     case OP_DNREFI:
4996     Lcaseless = (Fop == OP_DNREFI);
4997       {
4998       int count = GET2(Fecode, 1+IMM2_SIZE);
4999       PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5000       Fecode += 1 + 2*IMM2_SIZE;
5001 
5002       while (count-- > 0)
5003         {
5004         Loffset = (GET2(slot, 0) << 1) - 2;
5005         if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET) break;
5006         slot += mb->name_entry_size;
5007         }
5008       }
5009     goto REF_REPEAT;
5010 
5011     case OP_REF:
5012     case OP_REFI:
5013     Lcaseless = (Fop == OP_REFI);
5014     Loffset = (GET2(Fecode, 1) << 1) - 2;
5015     Fecode += 1 + IMM2_SIZE;
5016 
5017     /* Set up for repetition, or handle the non-repeated case. The maximum and
5018     minimum must be in the heap frame, but as they are short-term values, we
5019     use temporary fields. */
5020 
5021     REF_REPEAT:
5022     switch (*Fecode)
5023       {
5024       case OP_CRSTAR:
5025       case OP_CRMINSTAR:
5026       case OP_CRPLUS:
5027       case OP_CRMINPLUS:
5028       case OP_CRQUERY:
5029       case OP_CRMINQUERY:
5030       fc = *Fecode++ - OP_CRSTAR;
5031       Lmin = rep_min[fc];
5032       Lmax = rep_max[fc];
5033       reptype = rep_typ[fc];
5034       break;
5035 
5036       case OP_CRRANGE:
5037       case OP_CRMINRANGE:
5038       Lmin = GET2(Fecode, 1);
5039       Lmax = GET2(Fecode, 1 + IMM2_SIZE);
5040       reptype = rep_typ[*Fecode - OP_CRSTAR];
5041       if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
5042       Fecode += 1 + 2 * IMM2_SIZE;
5043       break;
5044 
5045       default:                  /* No repeat follows */
5046         {
5047         rrc = match_ref(Loffset, Lcaseless, F, mb, &length);
5048         if (rrc != 0)
5049           {
5050           if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5051           CHECK_PARTIAL();
5052           RRETURN(MATCH_NOMATCH);
5053           }
5054         }
5055       Feptr += length;
5056       continue;              /* With the main loop */
5057       }
5058 
5059     /* Handle repeated back references. If a set group has length zero, just
5060     continue with the main loop, because it matches however many times. For an
5061     unset reference, if the minimum is zero, we can also just continue. We can
5062     also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
5063     group behave as a zero-length group. For any other unset cases, carrying
5064     on will result in NOMATCH. */
5065 
5066     if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET)
5067       {
5068       if (Fovector[Loffset] == Fovector[Loffset + 1]) continue;
5069       }
5070     else  /* Group is not set */
5071       {
5072       if (Lmin == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
5073         continue;
5074       }
5075 
5076     /* First, ensure the minimum number of matches are present. */
5077 
5078     for (i = 1; i <= Lmin; i++)
5079       {
5080       PCRE2_SIZE slength;
5081       rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
5082       if (rrc != 0)
5083         {
5084         if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5085         CHECK_PARTIAL();
5086         RRETURN(MATCH_NOMATCH);
5087         }
5088       Feptr += slength;
5089       }
5090 
5091     /* If min = max, we are done. They are not both allowed to be zero. */
5092 
5093     if (Lmin == Lmax) continue;
5094 
5095     /* If minimizing, keep trying and advancing the pointer. */
5096 
5097     if (reptype == REPTYPE_MIN)
5098       {
5099       for (;;)
5100         {
5101         PCRE2_SIZE slength;
5102         RMATCH(Fecode, RM20);
5103         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5104         if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
5105         rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
5106         if (rrc != 0)
5107           {
5108           if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5109           CHECK_PARTIAL();
5110           RRETURN(MATCH_NOMATCH);
5111           }
5112         Feptr += slength;
5113         }
5114       /* Control never gets here */
5115       }
5116 
5117     /* If maximizing, find the longest string and work backwards, as long as
5118     the matched lengths for each iteration are the same. */
5119 
5120     else
5121       {
5122       BOOL samelengths = TRUE;
5123       Lstart = Feptr;     /* Starting position */
5124       Flength = Fovector[Loffset+1] - Fovector[Loffset];
5125 
5126       for (i = Lmin; i < Lmax; i++)
5127         {
5128         PCRE2_SIZE slength;
5129         rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
5130         if (rrc != 0)
5131           {
5132           /* Can't use CHECK_PARTIAL because we don't want to update Feptr in
5133           the soft partial matching case. */
5134 
5135           if (rrc > 0 && mb->partial != 0 &&
5136               mb->end_subject > mb->start_used_ptr)
5137             {
5138             mb->hitend = TRUE;
5139             if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5140             }
5141           break;
5142           }
5143 
5144         if (slength != Flength) samelengths = FALSE;
5145         Feptr += slength;
5146         }
5147 
5148       /* If the length matched for each repetition is the same as the length of
5149       the captured group, we can easily work backwards. This is the normal
5150       case. However, in caseless UTF-8 mode there are pairs of case-equivalent
5151       characters whose lengths (in terms of code units) differ. However, this
5152       is very rare, so we handle it by re-matching fewer and fewer times. */
5153 
5154       if (samelengths)
5155         {
5156         while (Feptr >= Lstart)
5157           {
5158           RMATCH(Fecode, RM21);
5159           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5160           Feptr -= Flength;
5161           }
5162         }
5163 
5164       /* The rare case of non-matching lengths. Re-scan the repetition for each
5165       iteration. We know that match_ref() will succeed every time. */
5166 
5167       else
5168         {
5169         Lmax = i;
5170         for (;;)
5171           {
5172           RMATCH(Fecode, RM22);
5173           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5174           if (Feptr == Lstart) break; /* Failed after minimal repetition */
5175           Feptr = Lstart;
5176           Lmax--;
5177           for (i = Lmin; i < Lmax; i++)
5178             {
5179             PCRE2_SIZE slength;
5180             (void)match_ref(Loffset, Lcaseless, F, mb, &slength);
5181             Feptr += slength;
5182             }
5183           }
5184         }
5185 
5186       RRETURN(MATCH_NOMATCH);
5187       }
5188     /* Control never gets here */
5189 
5190 #undef Lcaseless
5191 #undef Lmin
5192 #undef Lmax
5193 #undef Lstart
5194 #undef Loffset
5195 
5196 
5197 
5198 /* ========================================================================= */
5199 /*           Opcodes for the start of various parenthesized items            */
5200 /* ========================================================================= */
5201 
5202     /* In all cases, if the result of RMATCH() is MATCH_THEN, check whether the
5203     (*THEN) is within the current branch by comparing the address of OP_THEN
5204     that is passed back with the end of the branch. If (*THEN) is within the
5205     current branch, and the branch is one of two or more alternatives (it
5206     either starts or ends with OP_ALT), we have reached the limit of THEN's
5207     action, so convert the return code to NOMATCH, which will cause normal
5208     backtracking to happen from now on. Otherwise, THEN is passed back to an
5209     outer alternative. This implements Perl's treatment of parenthesized
5210     groups, where a group not containing | does not affect the current
5211     alternative, that is, (X) is NOT the same as (X|(*F)). */
5212 
5213 
5214     /* ===================================================================== */
5215     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a non-possessive
5216     bracket group, indicating that it may occur zero times. It may repeat
5217     infinitely, or not at all - i.e. it could be ()* or ()? or even (){0} in
5218     the pattern. Brackets with fixed upper repeat limits are compiled as a
5219     number of copies, with the optional ones preceded by BRAZERO or BRAMINZERO.
5220     Possessive groups with possible zero repeats are preceded by BRAPOSZERO. */
5221 
5222 #define Lnext_ecode F->temp_sptr[0]
5223 
5224     case OP_BRAZERO:
5225     Lnext_ecode = Fecode + 1;
5226     RMATCH(Lnext_ecode, RM9);
5227     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5228     do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
5229     Fecode = Lnext_ecode + 1 + LINK_SIZE;
5230     break;
5231 
5232     case OP_BRAMINZERO:
5233     Lnext_ecode = Fecode + 1;
5234     do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
5235     RMATCH(Lnext_ecode + 1 + LINK_SIZE, RM10);
5236     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5237     Fecode++;
5238     break;
5239 
5240 #undef Lnext_ecode
5241 
5242     case OP_SKIPZERO:
5243     Fecode++;
5244     do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
5245     Fecode += 1 + LINK_SIZE;
5246     break;
5247 
5248 
5249     /* ===================================================================== */
5250     /* Handle possessive brackets with an unlimited repeat. The end of these
5251     brackets will always be OP_KETRPOS, which returns MATCH_KETRPOS without
5252     going further in the pattern. */
5253 
5254 #define Lframe_type    F->temp_32[0]
5255 #define Lmatched_once  F->temp_32[1]
5256 #define Lzero_allowed  F->temp_32[2]
5257 #define Lstart_eptr    F->temp_sptr[0]
5258 #define Lstart_group   F->temp_sptr[1]
5259 
5260     case OP_BRAPOSZERO:
5261     Lzero_allowed = TRUE;                /* Zero repeat is allowed */
5262     Fecode += 1;
5263     if (*Fecode == OP_CBRAPOS || *Fecode == OP_SCBRAPOS)
5264       goto POSSESSIVE_CAPTURE;
5265     goto POSSESSIVE_NON_CAPTURE;
5266 
5267     case OP_BRAPOS:
5268     case OP_SBRAPOS:
5269     Lzero_allowed = FALSE;               /* Zero repeat not allowed */
5270 
5271     POSSESSIVE_NON_CAPTURE:
5272     Lframe_type = GF_NOCAPTURE;          /* Remembered frame type */
5273     goto POSSESSIVE_GROUP;
5274 
5275     case OP_CBRAPOS:
5276     case OP_SCBRAPOS:
5277     Lzero_allowed = FALSE;               /* Zero repeat not allowed */
5278 
5279     POSSESSIVE_CAPTURE:
5280     number = GET2(Fecode, 1+LINK_SIZE);
5281     Lframe_type = GF_CAPTURE | number;   /* Remembered frame type */
5282 
5283     POSSESSIVE_GROUP:
5284     Lmatched_once = FALSE;               /* Never matched */
5285     Lstart_group = Fecode;               /* Start of this group */
5286 
5287     for (;;)
5288       {
5289       Lstart_eptr = Feptr;               /* Position at group start */
5290       group_frame_type = Lframe_type;
5291       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM8);
5292       if (rrc == MATCH_KETRPOS)
5293         {
5294         Lmatched_once = TRUE;            /* Matched at least once */
5295         if (Feptr == Lstart_eptr)        /* Empty match; skip to end */
5296           {
5297           do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5298           break;
5299           }
5300 
5301         Fecode = Lstart_group;
5302         continue;
5303         }
5304 
5305       /* See comment above about handling THEN. */
5306 
5307       if (rrc == MATCH_THEN)
5308         {
5309         PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5310         if (mb->verb_ecode_ptr < next_ecode &&
5311             (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5312           rrc = MATCH_NOMATCH;
5313         }
5314 
5315       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5316       Fecode += GET(Fecode, 1);
5317       if (*Fecode != OP_ALT) break;
5318       }
5319 
5320     /* Success if matched something or zero repeat allowed */
5321 
5322     if (Lmatched_once || Lzero_allowed)
5323       {
5324       Fecode += 1 + LINK_SIZE;
5325       break;
5326       }
5327 
5328     RRETURN(MATCH_NOMATCH);
5329 
5330 #undef Lmatched_once
5331 #undef Lzero_allowed
5332 #undef Lframe_type
5333 #undef Lstart_eptr
5334 #undef Lstart_group
5335 
5336 
5337     /* ===================================================================== */
5338     /* Handle non-capturing brackets that cannot match an empty string. When we
5339     get to the final alternative within the brackets, as long as there are no
5340     THEN's in the pattern, we can optimize by not recording a new backtracking
5341     point. (Ideally we should test for a THEN within this group, but we don't
5342     have that information.) Don't do this if we are at the very top level,
5343     however, because that would make handling assertions and once-only brackets
5344     messier when there is nothing to go back to. */
5345 
5346 #define Lframe_type F->temp_32[0]     /* Set for all that use GROUPLOOP */
5347 #define Lnext_branch F->temp_sptr[0]  /* Used only in OP_BRA handling */
5348 
5349     case OP_BRA:
5350     if (mb->hasthen || Frdepth == 0)
5351       {
5352       Lframe_type = 0;
5353       goto GROUPLOOP;
5354       }
5355 
5356     for (;;)
5357       {
5358       Lnext_branch = Fecode + GET(Fecode, 1);
5359       if (*Lnext_branch != OP_ALT) break;
5360 
5361       /* This is never the final branch. We do not need to test for MATCH_THEN
5362       here because this code is not used when there is a THEN in the pattern. */
5363 
5364       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM1);
5365       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5366       Fecode = Lnext_branch;
5367       }
5368 
5369     /* Hit the start of the final branch. Continue at this level. */
5370 
5371     Fecode += PRIV(OP_lengths)[*Fecode];
5372     break;
5373 
5374 #undef Lnext_branch
5375 
5376 
5377     /* ===================================================================== */
5378     /* Handle a capturing bracket, other than those that are possessive with an
5379     unlimited repeat. */
5380 
5381     case OP_CBRA:
5382     case OP_SCBRA:
5383     Lframe_type = GF_CAPTURE | GET2(Fecode, 1+LINK_SIZE);
5384     goto GROUPLOOP;
5385 
5386 
5387     /* ===================================================================== */
5388     /* Atomic groups and non-capturing brackets that can match an empty string
5389     must record a backtracking point and also set up a chained frame. */
5390 
5391     case OP_ONCE:
5392     case OP_SCRIPT_RUN:
5393     case OP_SBRA:
5394     Lframe_type = GF_NOCAPTURE | Fop;
5395 
5396     GROUPLOOP:
5397     for (;;)
5398       {
5399       group_frame_type = Lframe_type;
5400       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM2);
5401       if (rrc == MATCH_THEN)
5402         {
5403         PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5404         if (mb->verb_ecode_ptr < next_ecode &&
5405             (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5406           rrc = MATCH_NOMATCH;
5407         }
5408       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5409       Fecode += GET(Fecode, 1);
5410       if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5411       }
5412     /* Control never reaches here. */
5413 
5414 #undef Lframe_type
5415 
5416 
5417     /* ===================================================================== */
5418     /* Pattern recursion either matches the current regex, or some
5419     subexpression. The offset data is the offset to the starting bracket from
5420     the start of the whole pattern. This is so that it works from duplicated
5421     subpatterns. For a whole-pattern recursion, we have to infer the number
5422     zero. */
5423 
5424 #define Lframe_type F->temp_32[0]
5425 #define Lstart_branch F->temp_sptr[0]
5426 
5427     case OP_RECURSE:
5428     bracode = mb->start_code + GET(Fecode, 1);
5429     number = (bracode == mb->start_code)? 0 : GET2(bracode, 1 + LINK_SIZE);
5430 
5431     /* If we are already in a pattern recursion, check for repeating the same
5432     one without changing the subject pointer or the last referenced character
5433     in the subject. This should catch convoluted mutual recursions; some
5434     simple cases are caught at compile time. However, there are rare cases when
5435     this check needs to be turned off. In this case, actual recursion loops
5436     will be caught by the match or heap limits. */
5437 
5438     if (Fcurrent_recurse != RECURSE_UNSET)
5439       {
5440       offset = Flast_group_offset;
5441       while (offset != PCRE2_UNSET)
5442         {
5443         N = (heapframe *)((char *)match_data->heapframes + offset);
5444         P = (heapframe *)((char *)N - frame_size);
5445         if (N->group_frame_type == (GF_RECURSE | number))
5446           {
5447           if (Feptr == P->eptr && mb->last_used_ptr == P->recurse_last_used &&
5448                (mb->moptions & PCRE2_DISABLE_RECURSELOOP_CHECK) == 0)
5449             return PCRE2_ERROR_RECURSELOOP;
5450           break;
5451           }
5452         offset = P->last_group_offset;
5453         }
5454       }
5455 
5456     /* Remember the current last referenced character and then run the
5457     recursion branch by branch. */
5458 
5459     F->recurse_last_used = mb->last_used_ptr;
5460     Lstart_branch = bracode;
5461     Lframe_type = GF_RECURSE | number;
5462 
5463     for (;;)
5464       {
5465       PCRE2_SPTR next_ecode;
5466 
5467       group_frame_type = Lframe_type;
5468       RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM11);
5469       next_ecode = Lstart_branch + GET(Lstart_branch,1);
5470 
5471       /* Handle backtracking verbs, which are defined in a range that can
5472       easily be tested for. PCRE does not allow THEN, SKIP, PRUNE or COMMIT to
5473       escape beyond a recursion; they cause a NOMATCH for the entire recursion.
5474 
5475       When one of these verbs triggers, the current recursion group number is
5476       recorded. If it matches the recursion we are processing, the verb
5477       happened within the recursion and we must deal with it. Otherwise it must
5478       have happened after the recursion completed, and so has to be passed
5479       back. See comment above about handling THEN. */
5480 
5481       if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX &&
5482           mb->verb_current_recurse == (Lframe_type ^ GF_RECURSE))
5483         {
5484         if (rrc == MATCH_THEN && mb->verb_ecode_ptr < next_ecode &&
5485             (*Lstart_branch == OP_ALT || *next_ecode == OP_ALT))
5486           rrc = MATCH_NOMATCH;
5487         else RRETURN(MATCH_NOMATCH);
5488         }
5489 
5490       /* Note that carrying on after (*ACCEPT) in a recursion is handled in the
5491       OP_ACCEPT code. Nothing needs to be done here. */
5492 
5493       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5494       Lstart_branch = next_ecode;
5495       if (*Lstart_branch != OP_ALT) RRETURN(MATCH_NOMATCH);
5496       }
5497     /* Control never reaches here. */
5498 
5499 #undef Lframe_type
5500 #undef Lstart_branch
5501 
5502 
5503     /* ===================================================================== */
5504     /* Positive assertions are like other groups except that PCRE doesn't allow
5505     the effect of (*THEN) to escape beyond an assertion; it is therefore
5506     treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its
5507     captures and mark retained. Any other return is an error. */
5508 
5509 #define Lframe_type  F->temp_32[0]
5510 
5511     case OP_ASSERT:
5512     case OP_ASSERTBACK:
5513     case OP_ASSERT_NA:
5514     case OP_ASSERTBACK_NA:
5515     Lframe_type = GF_NOCAPTURE | Fop;
5516     for (;;)
5517       {
5518       group_frame_type = Lframe_type;
5519       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM3);
5520       if (rrc == MATCH_ACCEPT)
5521         {
5522         memcpy(Fovector,
5523               (char *)assert_accept_frame + offsetof(heapframe, ovector),
5524               assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5525         Foffset_top = assert_accept_frame->offset_top;
5526         Fmark = assert_accept_frame->mark;
5527         break;
5528         }
5529       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
5530       Fecode += GET(Fecode, 1);
5531       if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5532       }
5533 
5534     do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5535     Fecode += 1 + LINK_SIZE;
5536     break;
5537 
5538 #undef Lframe_type
5539 
5540 
5541     /* ===================================================================== */
5542     /* Handle negative assertions. Loop for each non-matching branch as for
5543     positive assertions. */
5544 
5545 #define Lframe_type  F->temp_32[0]
5546 
5547     case OP_ASSERT_NOT:
5548     case OP_ASSERTBACK_NOT:
5549     Lframe_type  = GF_NOCAPTURE | Fop;
5550 
5551     for (;;)
5552       {
5553       group_frame_type = Lframe_type;
5554       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM4);
5555       switch(rrc)
5556         {
5557         case MATCH_ACCEPT:   /* Assertion matched, therefore it fails. */
5558         case MATCH_MATCH:
5559         RRETURN (MATCH_NOMATCH);
5560 
5561         case MATCH_NOMATCH:  /* Branch failed, try next if present. */
5562         case MATCH_THEN:
5563         Fecode += GET(Fecode, 1);
5564         if (*Fecode != OP_ALT) goto ASSERT_NOT_FAILED;
5565         break;
5566 
5567         case MATCH_COMMIT:   /* Assertion forced to fail, therefore continue. */
5568         case MATCH_SKIP:
5569         case MATCH_PRUNE:
5570         do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5571         goto ASSERT_NOT_FAILED;
5572 
5573         default:             /* Pass back any other return */
5574         RRETURN(rrc);
5575         }
5576       }
5577 
5578     /* None of the branches have matched or there was a backtrack to (*COMMIT),
5579     (*SKIP), (*PRUNE), or (*THEN) in the last branch. This is success for a
5580     negative assertion, so carry on. */
5581 
5582     ASSERT_NOT_FAILED:
5583     Fecode += 1 + LINK_SIZE;
5584     break;
5585 
5586 #undef Lframe_type
5587 
5588 
5589     /* ===================================================================== */
5590     /* The callout item calls an external function, if one is provided, passing
5591     details of the match so far. This is mainly for debugging, though the
5592     function is able to force a failure. */
5593 
5594     case OP_CALLOUT:
5595     case OP_CALLOUT_STR:
5596     rrc = do_callout(F, mb, &length);
5597     if (rrc > 0) RRETURN(MATCH_NOMATCH);
5598     if (rrc < 0) RRETURN(rrc);
5599     Fecode += length;
5600     break;
5601 
5602 
5603     /* ===================================================================== */
5604     /* Conditional group: compilation checked that there are no more than two
5605     branches. If the condition is false, skipping the first branch takes us
5606     past the end of the item if there is only one branch, but that's exactly
5607     what we want. */
5608 
5609     case OP_COND:
5610     case OP_SCOND:
5611 
5612     /* The variable Flength will be added to Fecode when the condition is
5613     false, to get to the second branch. Setting it to the offset to the ALT or
5614     KET, then incrementing Fecode achieves this effect. However, if the second
5615     branch is non-existent, we must point to the KET so that the end of the
5616     group is correctly processed. We now have Fecode pointing to the condition
5617     or callout. */
5618 
5619     Flength = GET(Fecode, 1);    /* Offset to the second branch */
5620     if (Fecode[Flength] != OP_ALT) Flength -= 1 + LINK_SIZE;
5621     Fecode += 1 + LINK_SIZE;     /* From this opcode */
5622 
5623     /* Because of the way auto-callout works during compile, a callout item is
5624     inserted between OP_COND and an assertion condition. Such a callout can
5625     also be inserted manually. */
5626 
5627     if (*Fecode == OP_CALLOUT || *Fecode == OP_CALLOUT_STR)
5628       {
5629       rrc = do_callout(F, mb, &length);
5630       if (rrc > 0) RRETURN(MATCH_NOMATCH);
5631       if (rrc < 0) RRETURN(rrc);
5632 
5633       /* Advance Fecode past the callout, so it now points to the condition. We
5634       must adjust Flength so that the value of Fecode+Flength is unchanged. */
5635 
5636       Fecode += length;
5637       Flength -= length;
5638       }
5639 
5640     /* Test the various possible conditions */
5641 
5642     condition = FALSE;
5643     switch(*Fecode)
5644       {
5645       case OP_RREF:                  /* Group recursion test */
5646       if (Fcurrent_recurse != RECURSE_UNSET)
5647         {
5648         number = GET2(Fecode, 1);
5649         condition = (number == RREF_ANY || number == Fcurrent_recurse);
5650         }
5651       break;
5652 
5653       case OP_DNRREF:       /* Duplicate named group recursion test */
5654       if (Fcurrent_recurse != RECURSE_UNSET)
5655         {
5656         int count = GET2(Fecode, 1 + IMM2_SIZE);
5657         PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5658         while (count-- > 0)
5659           {
5660           number = GET2(slot, 0);
5661           condition = number == Fcurrent_recurse;
5662           if (condition) break;
5663           slot += mb->name_entry_size;
5664           }
5665         }
5666       break;
5667 
5668       case OP_CREF:                         /* Numbered group used test */
5669       offset = (GET2(Fecode, 1) << 1) - 2;  /* Doubled ref number */
5670       condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5671       break;
5672 
5673       case OP_DNCREF:      /* Duplicate named group used test */
5674         {
5675         int count = GET2(Fecode, 1 + IMM2_SIZE);
5676         PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5677         while (count-- > 0)
5678           {
5679           offset = (GET2(slot, 0) << 1) - 2;
5680           condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5681           if (condition) break;
5682           slot += mb->name_entry_size;
5683           }
5684         }
5685       break;
5686 
5687       case OP_FALSE:
5688       case OP_FAIL:   /* The assertion (?!) becomes OP_FAIL */
5689       break;
5690 
5691       case OP_TRUE:
5692       condition = TRUE;
5693       break;
5694 
5695       /* The condition is an assertion. Run code similar to the assertion code
5696       above. */
5697 
5698 #define Lpositive      F->temp_32[0]
5699 #define Lstart_branch  F->temp_sptr[0]
5700 
5701       default:
5702       Lpositive = (*Fecode == OP_ASSERT || *Fecode == OP_ASSERTBACK);
5703       Lstart_branch = Fecode;
5704 
5705       for (;;)
5706         {
5707         group_frame_type = GF_CONDASSERT | *Fecode;
5708         RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM5);
5709 
5710         switch(rrc)
5711           {
5712           case MATCH_ACCEPT:  /* Save captures */
5713           memcpy(Fovector,
5714                 (char *)assert_accept_frame + offsetof(heapframe, ovector),
5715                 assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5716           Foffset_top = assert_accept_frame->offset_top;
5717 
5718           /* Fall through */
5719           /* In the case of a match, the captures have already been put into
5720           the current frame. */
5721 
5722           case MATCH_MATCH:
5723           condition = Lpositive;   /* TRUE for positive assertion */
5724           break;
5725 
5726           /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
5727           assertion; it is therefore always treated as NOMATCH. */
5728 
5729           case MATCH_NOMATCH:
5730           case MATCH_THEN:
5731           Lstart_branch += GET(Lstart_branch, 1);
5732           if (*Lstart_branch == OP_ALT) continue;  /* Try next branch */
5733           condition = !Lpositive;  /* TRUE for negative assertion */
5734           break;
5735 
5736           /* These force no match without checking other branches. */
5737 
5738           case MATCH_COMMIT:
5739           case MATCH_SKIP:
5740           case MATCH_PRUNE:
5741           condition = !Lpositive;
5742           break;
5743 
5744           default:
5745           RRETURN(rrc);
5746           }
5747         break;  /* Out of the branch loop */
5748         }
5749 
5750       /* If the condition is true, find the end of the assertion so that
5751       advancing past it gets us to the start of the first branch. */
5752 
5753       if (condition)
5754         {
5755         do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5756         }
5757       break;  /* End of assertion condition */
5758       }
5759 
5760 #undef Lpositive
5761 #undef Lstart_branch
5762 
5763     /* Choose branch according to the condition. */
5764 
5765     Fecode += condition? PRIV(OP_lengths)[*Fecode] : Flength;
5766 
5767     /* If the opcode is OP_SCOND it means we are at a repeated conditional
5768     group that might match an empty string. We must therefore descend a level
5769     so that the start is remembered for checking. For OP_COND we can just
5770     continue at this level. */
5771 
5772     if (Fop == OP_SCOND)
5773       {
5774       group_frame_type  = GF_NOCAPTURE | Fop;
5775       RMATCH(Fecode, RM35);
5776       RRETURN(rrc);
5777       }
5778     break;
5779 
5780 
5781 
5782 /* ========================================================================= */
5783 /*                  End of start of parenthesis opcodes                      */
5784 /* ========================================================================= */
5785 
5786 
5787     /* ===================================================================== */
5788     /* Move the subject pointer back by one fixed amount. This occurs at the
5789     start of each branch that has a fixed length in a lookbehind assertion. If
5790     we are too close to the start to move back, fail. When working with UTF-8
5791     we move back a number of characters, not bytes. */
5792 
5793     case OP_REVERSE:
5794     number = GET2(Fecode, 1);
5795 #ifdef SUPPORT_UNICODE
5796     if (utf)
5797       {
5798       while (number-- > 0)
5799         {
5800         if (Feptr <= mb->check_subject) RRETURN(MATCH_NOMATCH);
5801         Feptr--;
5802         BACKCHAR(Feptr);
5803         }
5804       }
5805     else
5806 #endif
5807 
5808     /* No UTF support, or not in UTF mode: count is code unit count */
5809 
5810       {
5811       if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
5812       Feptr -= number;
5813       }
5814 
5815     /* Save the earliest consulted character, then skip to next opcode */
5816 
5817     if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
5818     Fecode += 1 + IMM2_SIZE;
5819     break;
5820 
5821 
5822     /* ===================================================================== */
5823     /* Move the subject pointer back by a variable amount. This occurs at the
5824     start of each branch of a lookbehind assertion when the branch has a
5825     variable, but limited, length. A loop is needed to try matching the branch
5826     after moving back different numbers of characters. If we are too close to
5827     the start to move back even the minimum amount, fail. When working with
5828     UTF-8 we move back a number of characters, not bytes. */
5829 
5830 #define Lmin F->temp_32[0]
5831 #define Lmax F->temp_32[1]
5832 #define Leptr F->temp_sptr[0]
5833 
5834     case OP_VREVERSE:
5835     Lmin = GET2(Fecode, 1);
5836     Lmax = GET2(Fecode, 1 + IMM2_SIZE);
5837     Leptr = Feptr;
5838 
5839     /* Move back by the maximum branch length and then work forwards. This
5840     ensures that items such as \d{3,5} get the maximum length, which is
5841     relevant for captures, and makes for Perl compatibility. */
5842 
5843 #ifdef SUPPORT_UNICODE
5844     if (utf)
5845       {
5846       for (i = 0; i < Lmax; i++)
5847         {
5848         if (Feptr == mb->start_subject)
5849           {
5850           if (i < Lmin) RRETURN(MATCH_NOMATCH);
5851           Lmax = i;
5852           break;
5853           }
5854         Feptr--;
5855         BACKCHAR(Feptr);
5856         }
5857       }
5858     else
5859 #endif
5860 
5861     /* No UTF support or not in UTF mode */
5862 
5863       {
5864       ptrdiff_t diff = Feptr - mb->start_subject;
5865       uint32_t available = (diff > 65535)? 65535 : ((diff > 0)? diff : 0);
5866       if (Lmin > available) RRETURN(MATCH_NOMATCH);
5867       if (Lmax > available) Lmax = available;
5868       Feptr -= Lmax;
5869       }
5870 
5871     /* Now try matching, moving forward one character on failure, until we
5872     reach the mimimum back length. */
5873 
5874     for (;;)
5875       {
5876       RMATCH(Fecode + 1 + 2 * IMM2_SIZE, RM37);
5877       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5878       if (Lmax-- <= Lmin) RRETURN(MATCH_NOMATCH);
5879       Feptr++;
5880 #ifdef SUPPORT_UNICODE
5881       if (utf) { FORWARDCHARTEST(Feptr, mb->end_subject); }
5882 #endif
5883       }
5884     /* Control never reaches here */
5885 
5886 #undef Lmin
5887 #undef Lmax
5888 #undef Leptr
5889 
5890     /* ===================================================================== */
5891     /* An alternation is the end of a branch; scan along to find the end of the
5892     bracketed group. */
5893 
5894     case OP_ALT:
5895     branch_end = Fecode;
5896     do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
5897     break;
5898 
5899 
5900     /* ===================================================================== */
5901     /* The end of a parenthesized group. For all but OP_BRA and OP_COND, the
5902     starting frame was added to the chained frames in order to remember the
5903     starting subject position for the group. (Not true for OP_BRA when it's a
5904     whole pattern recursion, but that is handled separately below.)*/
5905 
5906     case OP_KET:
5907     case OP_KETRMIN:
5908     case OP_KETRMAX:
5909     case OP_KETRPOS:
5910 
5911     bracode = Fecode - GET(Fecode, 1);
5912 
5913     if (branch_end == NULL) branch_end = Fecode;
5914     branch_start = bracode;
5915     while (branch_start + GET(branch_start, 1) != branch_end)
5916       branch_start += GET(branch_start, 1);
5917     branch_end = NULL;
5918 
5919     /* Point N to the frame at the start of the most recent group, and P to its
5920     predecessor. Remember the subject pointer at the start of the group. */
5921 
5922     if (*bracode != OP_BRA && *bracode != OP_COND)
5923       {
5924       N = (heapframe *)((char *)match_data->heapframes + Flast_group_offset);
5925       P = (heapframe *)((char *)N - frame_size);
5926       Flast_group_offset = P->last_group_offset;
5927 
5928 #ifdef DEBUG_SHOW_RMATCH
5929       fprintf(stderr, "++ KET for frame=%d type=%x prev char offset=%lu\n",
5930         N->rdepth, N->group_frame_type,
5931         (char *)P->eptr - (char *)mb->start_subject);
5932 #endif
5933 
5934       /* If we are at the end of an assertion that is a condition, return a
5935       match, discarding any intermediate backtracking points. Copy back the
5936       mark setting and the captures into the frame before N so that they are
5937       set on return. Doing this for all assertions, both positive and negative,
5938       seems to match what Perl does. */
5939 
5940       if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
5941         {
5942         memcpy((char *)P + offsetof(heapframe, ovector), Fovector,
5943           Foffset_top * sizeof(PCRE2_SIZE));
5944         P->offset_top = Foffset_top;
5945         P->mark = Fmark;
5946         Fback_frame = (char *)F - (char *)P;
5947         RRETURN(MATCH_MATCH);
5948         }
5949       }
5950     else P = NULL;   /* Indicates starting frame not recorded */
5951 
5952     /* The group was not a conditional assertion. */
5953 
5954     switch (*bracode)
5955       {
5956       /* Whole pattern recursion is handled as a recursion into group 0, but
5957       the entire pattern is wrapped in OP_BRA/OP_KET rather than a capturing
5958       group - a design mistake: it should perhaps have been capture group 0.
5959       Anyway, that means the end of such recursion must be handled here. It is
5960       detected by checking for an immediately following OP_END when we are
5961       recursing in group 0. If this is not the end of a whole-pattern
5962       recursion, there is nothing to be done. */
5963 
5964       case OP_BRA:
5965       if (Fcurrent_recurse != 0 || Fecode[1+LINK_SIZE] != OP_END) break;
5966 
5967       /* It is the end of whole-pattern recursion. */
5968 
5969       offset = Flast_group_offset;
5970       if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
5971       N = (heapframe *)((char *)match_data->heapframes + offset);
5972       P = (heapframe *)((char *)N - frame_size);
5973       Flast_group_offset = P->last_group_offset;
5974 
5975       /* Reinstate the previous set of captures and then carry on after the
5976       recursion call. */
5977 
5978       memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
5979         Foffset_top * sizeof(PCRE2_SIZE));
5980       Foffset_top = P->offset_top;
5981       Fcapture_last = P->capture_last;
5982       Fcurrent_recurse = P->current_recurse;
5983       Fecode = P->ecode + 1 + LINK_SIZE;
5984       continue;  /* With next opcode */
5985 
5986       case OP_COND:     /* No need to do anything for these */
5987       case OP_SCOND:
5988       break;
5989 
5990       /* Non-atomic positive assertions are like OP_BRA, except that the
5991       subject pointer must be put back to where it was at the start of the
5992       assertion. For a variable lookbehind, check its end point. */
5993 
5994       case OP_ASSERTBACK_NA:
5995       if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
5996         RRETURN(MATCH_NOMATCH);
5997       /* Fall through */
5998 
5999       case OP_ASSERT_NA:
6000       if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6001       Feptr = P->eptr;
6002       break;
6003 
6004       /* Atomic positive assertions are like OP_ONCE, except that in addition
6005       the subject pointer must be put back to where it was at the start of the
6006       assertion. For a variable lookbehind, check its end point. */
6007 
6008       case OP_ASSERTBACK:
6009       if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
6010         RRETURN(MATCH_NOMATCH);
6011       /* Fall through */
6012 
6013       case OP_ASSERT:
6014       if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6015       Feptr = P->eptr;
6016       /* Fall through */
6017 
6018       /* For an atomic group, discard internal backtracking points. We must
6019       also ensure that any remaining branches within the top-level of the group
6020       are not tried. Do this by adjusting the code pointer within the backtrack
6021       frame so that it points to the final branch. */
6022 
6023       case OP_ONCE:
6024       Fback_frame = ((char *)F - (char *)P);
6025       for (;;)
6026         {
6027         uint32_t y = GET(P->ecode,1);
6028         if ((P->ecode)[y] != OP_ALT) break;
6029         P->ecode += y;
6030         }
6031       break;
6032 
6033       /* A matching negative assertion returns MATCH, which is turned into
6034       NOMATCH at the assertion level. For a variable lookbehind, check its end
6035       point. */
6036 
6037       case OP_ASSERTBACK_NOT:
6038       if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
6039         RRETURN(MATCH_NOMATCH);
6040       /* Fall through */
6041 
6042       case OP_ASSERT_NOT:
6043       RRETURN(MATCH_MATCH);
6044 
6045       /* At the end of a script run, apply the script-checking rules. This code
6046       will never by exercised if Unicode support it not compiled, because in
6047       that environment script runs cause an error at compile time. */
6048 
6049       case OP_SCRIPT_RUN:
6050       if (!PRIV(script_run)(P->eptr, Feptr, utf)) RRETURN(MATCH_NOMATCH);
6051       break;
6052 
6053       /* Whole-pattern recursion is coded as a recurse into group 0, and is
6054       handled with OP_BRA above. Other recursion is handled here. */
6055 
6056       case OP_CBRA:
6057       case OP_CBRAPOS:
6058       case OP_SCBRA:
6059       case OP_SCBRAPOS:
6060       number = GET2(bracode, 1+LINK_SIZE);
6061 
6062       /* Handle a recursively called group. We reinstate the previous set of
6063       captures and then carry on after the recursion call. */
6064 
6065       if (Fcurrent_recurse == number)
6066         {
6067         P = (heapframe *)((char *)N - frame_size);
6068         memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
6069           Foffset_top * sizeof(PCRE2_SIZE));
6070         Foffset_top = P->offset_top;
6071         Fcapture_last = P->capture_last;
6072         Fcurrent_recurse = P->current_recurse;
6073         Fecode = P->ecode + 1 + LINK_SIZE;
6074         continue;  /* With next opcode */
6075         }
6076 
6077       /* Deal with actual capturing. */
6078 
6079       offset = (number << 1) - 2;
6080       Fcapture_last = number;
6081       Fovector[offset] = P->eptr - mb->start_subject;
6082       Fovector[offset+1] = Feptr - mb->start_subject;
6083       if (offset >= Foffset_top) Foffset_top = offset + 2;
6084       break;
6085       }  /* End actions relating to the starting opcode */
6086 
6087     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
6088     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
6089     at a time from the outer level. This must precede the empty string test -
6090     in this case that test is done at the outer level. */
6091 
6092     if (*Fecode == OP_KETRPOS)
6093       {
6094       memcpy((char *)P + offsetof(heapframe, eptr),
6095              (char *)F + offsetof(heapframe, eptr),
6096              frame_copy_size);
6097       RRETURN(MATCH_KETRPOS);
6098       }
6099 
6100     /* Handle the different kinds of closing brackets. A non-repeating ket
6101     needs no special action, just continuing at this level. This also happens
6102     for the repeating kets if the group matched no characters, in order to
6103     forcibly break infinite loops. Otherwise, the repeating kets try the rest
6104     of the pattern or restart from the preceding bracket, in the appropriate
6105     order. */
6106 
6107     if (Fop != OP_KET && (P == NULL || Feptr != P->eptr))
6108       {
6109       if (Fop == OP_KETRMIN)
6110         {
6111         RMATCH(Fecode + 1 + LINK_SIZE, RM6);
6112         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6113         Fecode -= GET(Fecode, 1);
6114         break;   /* End of ket processing */
6115         }
6116 
6117       /* Repeat the maximum number of times (KETRMAX) */
6118 
6119       RMATCH(bracode, RM7);
6120       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6121       }
6122 
6123     /* Carry on at this level for a non-repeating ket, or after matching an
6124     empty string, or after repeating for a maximum number of times. */
6125 
6126     Fecode += 1 + LINK_SIZE;
6127     break;
6128 
6129 
6130     /* ===================================================================== */
6131     /* Start and end of line assertions, not multiline mode. */
6132 
6133     case OP_CIRC:   /* Start of line, unless PCRE2_NOTBOL is set. */
6134     if (Feptr != mb->start_subject || (mb->moptions & PCRE2_NOTBOL) != 0)
6135       RRETURN(MATCH_NOMATCH);
6136     Fecode++;
6137     break;
6138 
6139     case OP_SOD:    /* Unconditional start of subject */
6140     if (Feptr != mb->start_subject) RRETURN(MATCH_NOMATCH);
6141     Fecode++;
6142     break;
6143 
6144     /* When PCRE2_NOTEOL is unset, assert before the subject end, or a
6145     terminating newline unless PCRE2_DOLLAR_ENDONLY is set. */
6146 
6147     case OP_DOLL:
6148     if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
6149     if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
6150 
6151     /* Fall through */
6152     /* Unconditional end of subject assertion (\z). */
6153 
6154     case OP_EOD:
6155     if (Feptr < mb->true_end_subject) RRETURN(MATCH_NOMATCH);
6156     if (mb->partial != 0)
6157       {
6158       mb->hitend = TRUE;
6159       if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6160       }
6161     Fecode++;
6162     break;
6163 
6164     /* End of subject or ending \n assertion (\Z) */
6165 
6166     case OP_EODN:
6167     ASSERT_NL_OR_EOS:
6168     if (Feptr < mb->end_subject &&
6169         (!IS_NEWLINE(Feptr) || Feptr != mb->end_subject - mb->nllen))
6170       {
6171       if (mb->partial != 0 &&
6172           Feptr + 1 >= mb->end_subject &&
6173           NLBLOCK->nltype == NLTYPE_FIXED &&
6174           NLBLOCK->nllen == 2 &&
6175           UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
6176         {
6177         mb->hitend = TRUE;
6178         if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6179         }
6180       RRETURN(MATCH_NOMATCH);
6181       }
6182 
6183     /* Either at end of string or \n before end. */
6184 
6185     if (mb->partial != 0)
6186       {
6187       mb->hitend = TRUE;
6188       if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6189       }
6190     Fecode++;
6191     break;
6192 
6193 
6194     /* ===================================================================== */
6195     /* Start and end of line assertions, multiline mode. */
6196 
6197     /* Start of subject unless notbol, or after any newline except for one at
6198     the very end, unless PCRE2_ALT_CIRCUMFLEX is set. */
6199 
6200     case OP_CIRCM:
6201     if ((mb->moptions & PCRE2_NOTBOL) != 0 && Feptr == mb->start_subject)
6202       RRETURN(MATCH_NOMATCH);
6203     if (Feptr != mb->start_subject &&
6204         ((Feptr == mb->end_subject &&
6205            (mb->poptions & PCRE2_ALT_CIRCUMFLEX) == 0) ||
6206          !WAS_NEWLINE(Feptr)))
6207       RRETURN(MATCH_NOMATCH);
6208     Fecode++;
6209     break;
6210 
6211     /* Assert before any newline, or before end of subject unless noteol is
6212     set. */
6213 
6214     case OP_DOLLM:
6215     if (Feptr < mb->end_subject)
6216       {
6217       if (!IS_NEWLINE(Feptr))
6218         {
6219         if (mb->partial != 0 &&
6220             Feptr + 1 >= mb->end_subject &&
6221             NLBLOCK->nltype == NLTYPE_FIXED &&
6222             NLBLOCK->nllen == 2 &&
6223             UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
6224           {
6225           mb->hitend = TRUE;
6226           if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6227           }
6228         RRETURN(MATCH_NOMATCH);
6229         }
6230       }
6231     else
6232       {
6233       if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
6234       SCHECK_PARTIAL();
6235       }
6236     Fecode++;
6237     break;
6238 
6239 
6240     /* ===================================================================== */
6241     /* Start of match assertion */
6242 
6243     case OP_SOM:
6244     if (Feptr != mb->start_subject + mb->start_offset) RRETURN(MATCH_NOMATCH);
6245     Fecode++;
6246     break;
6247 
6248 
6249     /* ===================================================================== */
6250     /* Reset the start of match point */
6251 
6252     case OP_SET_SOM:
6253     Fstart_match = Feptr;
6254     Fecode++;
6255     break;
6256 
6257 
6258     /* ===================================================================== */
6259     /* Word boundary assertions. Find out if the previous and current
6260     characters are "word" characters. It takes a bit more work in UTF mode.
6261     Characters > 255 are assumed to be "non-word" characters when PCRE2_UCP is
6262     not set. When it is set, use Unicode properties if available, even when not
6263     in UTF mode. Remember the earliest and latest consulted characters. */
6264 
6265     case OP_NOT_WORD_BOUNDARY:
6266     case OP_WORD_BOUNDARY:
6267     case OP_NOT_UCP_WORD_BOUNDARY:
6268     case OP_UCP_WORD_BOUNDARY:
6269     if (Feptr == mb->check_subject) prev_is_word = FALSE; else
6270       {
6271       PCRE2_SPTR lastptr = Feptr - 1;
6272 #ifdef SUPPORT_UNICODE
6273       if (utf)
6274         {
6275         BACKCHAR(lastptr);
6276         GETCHAR(fc, lastptr);
6277         }
6278       else
6279 #endif  /* SUPPORT_UNICODE */
6280       fc = *lastptr;
6281       if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr;
6282 #ifdef SUPPORT_UNICODE
6283       if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY)
6284         {
6285         int chartype = UCD_CHARTYPE(fc);
6286         int category = PRIV(ucp_gentype)[chartype];
6287         prev_is_word = (category == ucp_L || category == ucp_N ||
6288           chartype == ucp_Mn || chartype == ucp_Pc);
6289         }
6290       else
6291 #endif  /* SUPPORT_UNICODE */
6292       prev_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
6293       }
6294 
6295     /* Get status of next character */
6296 
6297     if (Feptr >= mb->end_subject)
6298       {
6299       SCHECK_PARTIAL();
6300       cur_is_word = FALSE;
6301       }
6302     else
6303       {
6304       PCRE2_SPTR nextptr = Feptr + 1;
6305 #ifdef SUPPORT_UNICODE
6306       if (utf)
6307         {
6308         FORWARDCHARTEST(nextptr, mb->end_subject);
6309         GETCHAR(fc, Feptr);
6310         }
6311       else
6312 #endif  /* SUPPORT_UNICODE */
6313       fc = *Feptr;
6314       if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
6315 #ifdef SUPPORT_UNICODE
6316       if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY)
6317         {
6318         int chartype = UCD_CHARTYPE(fc);
6319         int category = PRIV(ucp_gentype)[chartype];
6320         cur_is_word = (category == ucp_L || category == ucp_N ||
6321           chartype == ucp_Mn || chartype == ucp_Pc);
6322         }
6323       else
6324 #endif  /* SUPPORT_UNICODE */
6325       cur_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
6326       }
6327 
6328     /* Now see if the situation is what we want */
6329 
6330     if ((*Fecode++ == OP_WORD_BOUNDARY || Fop == OP_UCP_WORD_BOUNDARY)?
6331          cur_is_word == prev_is_word : cur_is_word != prev_is_word)
6332       RRETURN(MATCH_NOMATCH);
6333     break;
6334 
6335 
6336     /* ===================================================================== */
6337     /* Backtracking (*VERB)s, with and without arguments. Note that if the
6338     pattern is successfully matched, we do not come back from RMATCH. */
6339 
6340     case OP_MARK:
6341     Fmark = mb->nomatch_mark = Fecode + 2;
6342     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM12);
6343 
6344     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
6345     argument, and we must check whether that argument matches this MARK's
6346     argument. It is passed back in mb->verb_skip_ptr. If it does match, we
6347     return MATCH_SKIP with mb->verb_skip_ptr now pointing to the subject
6348     position that corresponds to this mark. Otherwise, pass back the return
6349     code unaltered. */
6350 
6351     if (rrc == MATCH_SKIP_ARG &&
6352              PRIV(strcmp)(Fecode + 2, mb->verb_skip_ptr) == 0)
6353       {
6354       mb->verb_skip_ptr = Feptr;   /* Pass back current position */
6355       RRETURN(MATCH_SKIP);
6356       }
6357     RRETURN(rrc);
6358 
6359     case OP_FAIL:
6360     RRETURN(MATCH_NOMATCH);
6361 
6362     /* Record the current recursing group number in mb->verb_current_recurse
6363     when a backtracking return such as MATCH_COMMIT is given. This enables the
6364     recurse processing to catch verbs from within the recursion. */
6365 
6366     case OP_COMMIT:
6367     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM13);
6368     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6369     mb->verb_current_recurse = Fcurrent_recurse;
6370     RRETURN(MATCH_COMMIT);
6371 
6372     case OP_COMMIT_ARG:
6373     Fmark = mb->nomatch_mark = Fecode + 2;
6374     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36);
6375     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6376     mb->verb_current_recurse = Fcurrent_recurse;
6377     RRETURN(MATCH_COMMIT);
6378 
6379     case OP_PRUNE:
6380     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14);
6381     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6382     mb->verb_current_recurse = Fcurrent_recurse;
6383     RRETURN(MATCH_PRUNE);
6384 
6385     case OP_PRUNE_ARG:
6386     Fmark = mb->nomatch_mark = Fecode + 2;
6387     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM15);
6388     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6389     mb->verb_current_recurse = Fcurrent_recurse;
6390     RRETURN(MATCH_PRUNE);
6391 
6392     case OP_SKIP:
6393     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM16);
6394     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6395     mb->verb_skip_ptr = Feptr;   /* Pass back current position */
6396     mb->verb_current_recurse = Fcurrent_recurse;
6397     RRETURN(MATCH_SKIP);
6398 
6399     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
6400     nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
6401     not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
6402     that failed and any that precede it (either they also failed, or were not
6403     triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
6404     SKIP_ARG gets to top level, the match is re-run with mb->ignore_skip_arg
6405     set to the count of the one that failed. */
6406 
6407     case OP_SKIP_ARG:
6408     mb->skip_arg_count++;
6409     if (mb->skip_arg_count <= mb->ignore_skip_arg)
6410       {
6411       Fecode += PRIV(OP_lengths)[*Fecode] + Fecode[1];
6412       break;
6413       }
6414     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM17);
6415     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6416 
6417     /* Pass back the current skip name and return the special MATCH_SKIP_ARG
6418     return code. This will either be caught by a matching MARK, or get to the
6419     top, where it causes a rematch with mb->ignore_skip_arg set to the value of
6420     mb->skip_arg_count. */
6421 
6422     mb->verb_skip_ptr = Fecode + 2;
6423     mb->verb_current_recurse = Fcurrent_recurse;
6424     RRETURN(MATCH_SKIP_ARG);
6425 
6426     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
6427     the branch in which it occurs can be determined. */
6428 
6429     case OP_THEN:
6430     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM18);
6431     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6432     mb->verb_ecode_ptr = Fecode;
6433     mb->verb_current_recurse = Fcurrent_recurse;
6434     RRETURN(MATCH_THEN);
6435 
6436     case OP_THEN_ARG:
6437     Fmark = mb->nomatch_mark = Fecode + 2;
6438     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM19);
6439     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6440     mb->verb_ecode_ptr = Fecode;
6441     mb->verb_current_recurse = Fcurrent_recurse;
6442     RRETURN(MATCH_THEN);
6443 
6444 
6445     /* ===================================================================== */
6446     /* There's been some horrible disaster. Arrival here can only mean there is
6447     something seriously wrong in the code above or the OP_xxx definitions. */
6448 
6449     default:
6450     return PCRE2_ERROR_INTERNAL;
6451     }
6452 
6453   /* Do not insert any code in here without much thought; it is assumed
6454   that "continue" in the code above comes out to here to repeat the main
6455   loop. */
6456 
6457   }  /* End of main loop */
6458 /* Control never reaches here */
6459 
6460 
6461 /* ========================================================================= */
6462 /* The RRETURN() macro jumps here. The number that is saved in Freturn_id
6463 indicates which label we actually want to return to. The value in Frdepth is
6464 the index number of the frame in the vector. The return value has been placed
6465 in rrc. */
6466 
6467 #define LBL(val) case val: goto L_RM##val;
6468 
6469 RETURN_SWITCH:
6470 if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6471 if (Frdepth == 0) return rrc;                     /* Exit from the top level */
6472 F = (heapframe *)((char *)F - Fback_frame);       /* Backtrack */
6473 mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
6474 
6475 #ifdef DEBUG_SHOW_RMATCH
6476 fprintf(stderr, "++ RETURN %d to RM%d\n", rrc, Freturn_id);
6477 #endif
6478 
6479 switch (Freturn_id)
6480   {
6481   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6482   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
6483   LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
6484   LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
6485   LBL(33) LBL(34) LBL(35) LBL(36) LBL(37)
6486 
6487 #ifdef SUPPORT_WIDE_CHARS
6488   LBL(100) LBL(101)
6489 #endif
6490 
6491 #ifdef SUPPORT_UNICODE
6492   LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206)
6493   LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213)
6494   LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220)
6495   LBL(221) LBL(222) LBL(223) LBL(224) LBL(225)
6496 #endif
6497 
6498   default:
6499   return PCRE2_ERROR_INTERNAL;
6500   }
6501 #undef LBL
6502 }
6503 
6504 
6505 /*************************************************
6506 *           Match a Regular Expression           *
6507 *************************************************/
6508 
6509 /* This function applies a compiled pattern to a subject string and picks out
6510 portions of the string if it matches. Two elements in the vector are set for
6511 each substring: the offsets to the start and end of the substring.
6512 
6513 Arguments:
6514   code            points to the compiled expression
6515   subject         points to the subject string
6516   length          length of subject string (may contain binary zeros)
6517   start_offset    where to start in the subject string
6518   options         option bits
6519   match_data      points to a match_data block
6520   mcontext        points a PCRE2 context
6521 
6522 Returns:          > 0 => success; value is the number of ovector pairs filled
6523                   = 0 => success, but ovector is not big enough
6524                   = -1 => failed to match (PCRE2_ERROR_NOMATCH)
6525                   = -2 => partial match (PCRE2_ERROR_PARTIAL)
6526                   < -2 => some kind of unexpected problem
6527 */
6528 
6529 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_match(const pcre2_code * code,PCRE2_SPTR subject,PCRE2_SIZE length,PCRE2_SIZE start_offset,uint32_t options,pcre2_match_data * match_data,pcre2_match_context * mcontext)6530 pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
6531   PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
6532   pcre2_match_context *mcontext)
6533 {
6534 int rc;
6535 int was_zero_terminated = 0;
6536 const uint8_t *start_bits = NULL;
6537 const pcre2_real_code *re = (const pcre2_real_code *)code;
6538 
6539 BOOL anchored;
6540 BOOL firstline;
6541 BOOL has_first_cu = FALSE;
6542 BOOL has_req_cu = FALSE;
6543 BOOL startline;
6544 
6545 #if PCRE2_CODE_UNIT_WIDTH == 8
6546 PCRE2_SPTR memchr_found_first_cu;
6547 PCRE2_SPTR memchr_found_first_cu2;
6548 #endif
6549 
6550 PCRE2_UCHAR first_cu = 0;
6551 PCRE2_UCHAR first_cu2 = 0;
6552 PCRE2_UCHAR req_cu = 0;
6553 PCRE2_UCHAR req_cu2 = 0;
6554 
6555 PCRE2_SPTR bumpalong_limit;
6556 PCRE2_SPTR end_subject;
6557 PCRE2_SPTR true_end_subject;
6558 PCRE2_SPTR start_match;
6559 PCRE2_SPTR req_cu_ptr;
6560 PCRE2_SPTR start_partial;
6561 PCRE2_SPTR match_partial;
6562 
6563 #ifdef SUPPORT_JIT
6564 BOOL use_jit;
6565 #endif
6566 
6567 /* This flag is needed even when Unicode is not supported for convenience
6568 (it is used by the IS_NEWLINE macro). */
6569 
6570 BOOL utf = FALSE;
6571 
6572 #ifdef SUPPORT_UNICODE
6573 BOOL ucp = FALSE;
6574 BOOL allow_invalid;
6575 uint32_t fragment_options = 0;
6576 #ifdef SUPPORT_JIT
6577 BOOL jit_checked_utf = FALSE;
6578 #endif
6579 #endif  /* SUPPORT_UNICODE */
6580 
6581 PCRE2_SIZE frame_size;
6582 PCRE2_SIZE heapframes_size;
6583 
6584 /* We need to have mb as a pointer to a match block, because the IS_NEWLINE
6585 macro is used below, and it expects NLBLOCK to be defined as a pointer. */
6586 
6587 pcre2_callout_block cb;
6588 match_block actual_match_block;
6589 match_block *mb = &actual_match_block;
6590 
6591 /* Recognize NULL, length 0 as an empty string. */
6592 
6593 if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
6594 
6595 /* Plausibility checks */
6596 
6597 if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
6598 if (code == NULL || subject == NULL || match_data == NULL)
6599   return PCRE2_ERROR_NULL;
6600 
6601 start_match = subject + start_offset;
6602 req_cu_ptr = start_match - 1;
6603 if (length == PCRE2_ZERO_TERMINATED)
6604   {
6605   length = PRIV(strlen)(subject);
6606   was_zero_terminated = 1;
6607   }
6608 true_end_subject = end_subject = subject + length;
6609 
6610 if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
6611 
6612 /* Check that the first field in the block is the magic number. */
6613 
6614 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
6615 
6616 /* Check the code unit width. */
6617 
6618 if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
6619   return PCRE2_ERROR_BADMODE;
6620 
6621 /* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
6622 options variable for this function. Users of PCRE2 who are not calling the
6623 function directly would like to have a way of setting these flags, in the same
6624 way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
6625 constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
6626 (*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which we now
6627 transfer to the options for this function. The bits are guaranteed to be
6628 adjacent, but do not have the same values. This bit of Boolean trickery assumes
6629 that the match-time bits are not more significant than the flag bits. If by
6630 accident this is not the case, a compile-time division by zero error will
6631 occur. */
6632 
6633 #define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
6634 #define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
6635 options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
6636 #undef FF
6637 #undef OO
6638 
6639 /* If the pattern was successfully studied with JIT support, we will run the
6640 JIT executable instead of the rest of this function. Most options must be set
6641 at compile time for the JIT code to be usable. */
6642 
6643 #ifdef SUPPORT_JIT
6644 use_jit = (re->executable_jit != NULL &&
6645           (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0);
6646 #endif
6647 
6648 /* Initialize UTF/UCP parameters. */
6649 
6650 #ifdef SUPPORT_UNICODE
6651 utf = (re->overall_options & PCRE2_UTF) != 0;
6652 allow_invalid = (re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0;
6653 ucp = (re->overall_options & PCRE2_UCP) != 0;
6654 #endif  /* SUPPORT_UNICODE */
6655 
6656 /* Convert the partial matching flags into an integer. */
6657 
6658 mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
6659               ((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
6660 
6661 /* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
6662 time. */
6663 
6664 if (mb->partial != 0 &&
6665    ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
6666   return PCRE2_ERROR_BADOPTION;
6667 
6668 /* It is an error to set an offset limit without setting the flag at compile
6669 time. */
6670 
6671 if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
6672      (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
6673   return PCRE2_ERROR_BADOFFSETLIMIT;
6674 
6675 /* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
6676 free the memory that was obtained. Set the field to NULL for no match cases. */
6677 
6678 if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
6679   {
6680   match_data->memctl.free((void *)match_data->subject,
6681     match_data->memctl.memory_data);
6682   match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
6683   }
6684 match_data->subject = NULL;
6685 
6686 /* Zero the error offset in case the first code unit is invalid UTF. */
6687 
6688 match_data->startchar = 0;
6689 
6690 
6691 /* ============================= JIT matching ============================== */
6692 
6693 /* Prepare for JIT matching. Check a UTF string for validity unless no check is
6694 requested or invalid UTF can be handled. We check only the portion of the
6695 subject that might be be inspected during matching - from the offset minus the
6696 maximum lookbehind to the given length. This saves time when a small part of a
6697 large subject is being matched by the use of a starting offset. Note that the
6698 maximum lookbehind is a number of characters, not code units. */
6699 
6700 #ifdef SUPPORT_JIT
6701 if (use_jit)
6702   {
6703 #ifdef SUPPORT_UNICODE
6704   if (utf && (options & PCRE2_NO_UTF_CHECK) == 0 && !allow_invalid)
6705     {
6706 #if PCRE2_CODE_UNIT_WIDTH != 32
6707     unsigned int i;
6708 #endif
6709 
6710     /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
6711     character start. */
6712 
6713 #if PCRE2_CODE_UNIT_WIDTH != 32
6714     if (start_match < end_subject && NOT_FIRSTCU(*start_match))
6715       {
6716       if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
6717 #if PCRE2_CODE_UNIT_WIDTH == 8
6718       return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
6719 #else
6720       return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
6721 #endif
6722       }
6723 #endif  /* WIDTH != 32 */
6724 
6725     /* Move back by the maximum lookbehind, just in case it happens at the very
6726     start of matching. */
6727 
6728 #if PCRE2_CODE_UNIT_WIDTH != 32
6729     for (i = re->max_lookbehind; i > 0 && start_match > subject; i--)
6730       {
6731       start_match--;
6732       while (start_match > subject &&
6733 #if PCRE2_CODE_UNIT_WIDTH == 8
6734       (*start_match & 0xc0) == 0x80)
6735 #else  /* 16-bit */
6736       (*start_match & 0xfc00) == 0xdc00)
6737 #endif
6738         start_match--;
6739       }
6740 #else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6741 
6742     /* In the 32-bit library, one code unit equals one character. However,
6743     we cannot just subtract the lookbehind and then compare pointers, because
6744     a very large lookbehind could create an invalid pointer. */
6745 
6746     if (start_offset >= re->max_lookbehind)
6747       start_match -= re->max_lookbehind;
6748     else
6749       start_match = subject;
6750 #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6751 
6752     /* Validate the relevant portion of the subject. Adjust the offset of an
6753     invalid code point to be an absolute offset in the whole string. */
6754 
6755     match_data->rc = PRIV(valid_utf)(start_match,
6756       length - (start_match - subject), &(match_data->startchar));
6757     if (match_data->rc != 0)
6758       {
6759       match_data->startchar += start_match - subject;
6760       return match_data->rc;
6761       }
6762     jit_checked_utf = TRUE;
6763     }
6764 #endif  /* SUPPORT_UNICODE */
6765 
6766   /* If JIT returns BADOPTION, which means that the selected complete or
6767   partial matching mode was not compiled, fall through to the interpreter. */
6768 
6769   rc = pcre2_jit_match(code, subject, length, start_offset, options,
6770     match_data, mcontext);
6771   if (rc != PCRE2_ERROR_JIT_BADOPTION)
6772     {
6773     match_data->subject_length = length;
6774     if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
6775       {
6776       length = CU2BYTES(length + was_zero_terminated);
6777       match_data->subject = match_data->memctl.malloc(length,
6778         match_data->memctl.memory_data);
6779       if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
6780       memcpy((void *)match_data->subject, subject, length);
6781       match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
6782       }
6783     return rc;
6784     }
6785   }
6786 #endif  /* SUPPORT_JIT */
6787 
6788 /* ========================= End of JIT matching ========================== */
6789 
6790 
6791 /* Proceed with non-JIT matching. The default is to allow lookbehinds to the
6792 start of the subject. A UTF check when there is a non-zero offset may change
6793 this. */
6794 
6795 mb->check_subject = subject;
6796 
6797 /* If a UTF subject string was not checked for validity in the JIT code above,
6798 check it here, and handle support for invalid UTF strings. The check above
6799 happens only when invalid UTF is not supported and PCRE2_NO_CHECK_UTF is unset.
6800 If we get here in those circumstances, it means the subject string is valid,
6801 but for some reason JIT matching was not successful. There is no need to check
6802 the subject again.
6803 
6804 We check only the portion of the subject that might be be inspected during
6805 matching - from the offset minus the maximum lookbehind to the given length.
6806 This saves time when a small part of a large subject is being matched by the
6807 use of a starting offset. Note that the maximum lookbehind is a number of
6808 characters, not code units.
6809 
6810 Note also that support for invalid UTF forces a check, overriding the setting
6811 of PCRE2_NO_CHECK_UTF. */
6812 
6813 #ifdef SUPPORT_UNICODE
6814 if (utf &&
6815 #ifdef SUPPORT_JIT
6816     !jit_checked_utf &&
6817 #endif
6818     ((options & PCRE2_NO_UTF_CHECK) == 0 || allow_invalid))
6819   {
6820 #if PCRE2_CODE_UNIT_WIDTH != 32
6821   BOOL skipped_bad_start = FALSE;
6822 #endif
6823 
6824   /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
6825   character start. If we are handling invalid UTF, just skip over such code
6826   units. Otherwise, give an appropriate error. */
6827 
6828 #if PCRE2_CODE_UNIT_WIDTH != 32
6829   if (allow_invalid)
6830     {
6831     while (start_match < end_subject && NOT_FIRSTCU(*start_match))
6832       {
6833       start_match++;
6834       skipped_bad_start = TRUE;
6835       }
6836     }
6837   else if (start_match < end_subject && NOT_FIRSTCU(*start_match))
6838     {
6839     if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
6840 #if PCRE2_CODE_UNIT_WIDTH == 8
6841     return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
6842 #else
6843     return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
6844 #endif
6845     }
6846 #endif  /* WIDTH != 32 */
6847 
6848   /* The mb->check_subject field points to the start of UTF checking;
6849   lookbehinds can go back no further than this. */
6850 
6851   mb->check_subject = start_match;
6852 
6853   /* Move back by the maximum lookbehind, just in case it happens at the very
6854   start of matching, but don't do this if we skipped bad 8-bit or 16-bit code
6855   units above. */
6856 
6857 #if PCRE2_CODE_UNIT_WIDTH != 32
6858   if (!skipped_bad_start)
6859     {
6860     unsigned int i;
6861     for (i = re->max_lookbehind; i > 0 && mb->check_subject > subject; i--)
6862       {
6863       mb->check_subject--;
6864       while (mb->check_subject > subject &&
6865 #if PCRE2_CODE_UNIT_WIDTH == 8
6866       (*mb->check_subject & 0xc0) == 0x80)
6867 #else  /* 16-bit */
6868       (*mb->check_subject & 0xfc00) == 0xdc00)
6869 #endif
6870         mb->check_subject--;
6871       }
6872     }
6873 #else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6874 
6875   /* In the 32-bit library, one code unit equals one character. However,
6876   we cannot just subtract the lookbehind and then compare pointers, because
6877   a very large lookbehind could create an invalid pointer. */
6878 
6879   if (start_offset >= re->max_lookbehind)
6880     mb->check_subject -= re->max_lookbehind;
6881   else
6882     mb->check_subject = subject;
6883 #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6884 
6885   /* Validate the relevant portion of the subject. There's a loop in case we
6886   encounter bad UTF in the characters preceding start_match which we are
6887   scanning because of a lookbehind. */
6888 
6889   for (;;)
6890     {
6891     match_data->rc = PRIV(valid_utf)(mb->check_subject,
6892       length - (mb->check_subject - subject), &(match_data->startchar));
6893 
6894     if (match_data->rc == 0) break;   /* Valid UTF string */
6895 
6896     /* Invalid UTF string. Adjust the offset to be an absolute offset in the
6897     whole string. If we are handling invalid UTF strings, set end_subject to
6898     stop before the bad code unit, and set the options to "not end of line".
6899     Otherwise return the error. */
6900 
6901     match_data->startchar += mb->check_subject - subject;
6902     if (!allow_invalid || match_data->rc > 0) return match_data->rc;
6903     end_subject = subject + match_data->startchar;
6904 
6905     /* If the end precedes start_match, it means there is invalid UTF in the
6906     extra code units we reversed over because of a lookbehind. Advance past the
6907     first bad code unit, and then skip invalid character starting code units in
6908     8-bit and 16-bit modes, and try again with the original end point. */
6909 
6910     if (end_subject < start_match)
6911       {
6912       mb->check_subject = end_subject + 1;
6913 #if PCRE2_CODE_UNIT_WIDTH != 32
6914       while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject))
6915         mb->check_subject++;
6916 #endif
6917       end_subject = true_end_subject;
6918       }
6919 
6920     /* Otherwise, set the not end of line option, and do the match. */
6921 
6922     else
6923       {
6924       fragment_options = PCRE2_NOTEOL;
6925       break;
6926       }
6927     }
6928   }
6929 #endif  /* SUPPORT_UNICODE */
6930 
6931 /* A NULL match context means "use a default context", but we take the memory
6932 control functions from the pattern. */
6933 
6934 if (mcontext == NULL)
6935   {
6936   mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
6937   mb->memctl = re->memctl;
6938   }
6939 else mb->memctl = mcontext->memctl;
6940 
6941 anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
6942 firstline = !anchored && (re->overall_options & PCRE2_FIRSTLINE) != 0;
6943 startline = (re->flags & PCRE2_STARTLINE) != 0;
6944 bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
6945   true_end_subject : subject + mcontext->offset_limit;
6946 
6947 /* Initialize and set up the fixed fields in the callout block, with a pointer
6948 in the match block. */
6949 
6950 mb->cb = &cb;
6951 cb.version = 2;
6952 cb.subject = subject;
6953 cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
6954 cb.callout_flags = 0;
6955 
6956 /* Fill in the remaining fields in the match block, except for moptions, which
6957 gets set later. */
6958 
6959 mb->callout = mcontext->callout;
6960 mb->callout_data = mcontext->callout_data;
6961 
6962 mb->start_subject = subject;
6963 mb->start_offset = start_offset;
6964 mb->end_subject = end_subject;
6965 mb->true_end_subject = true_end_subject;
6966 mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
6967 mb->allowemptypartial = (re->max_lookbehind > 0) ||
6968     (re->flags & PCRE2_MATCH_EMPTY) != 0;
6969 mb->poptions = re->overall_options;          /* Pattern options */
6970 mb->ignore_skip_arg = 0;
6971 mb->mark = mb->nomatch_mark = NULL;          /* In case never set */
6972 
6973 /* The name table is needed for finding all the numbers associated with a
6974 given name, for condition testing. The code follows the name table. */
6975 
6976 mb->name_table = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code));
6977 mb->name_count = re->name_count;
6978 mb->name_entry_size = re->name_entry_size;
6979 mb->start_code = mb->name_table + re->name_count * re->name_entry_size;
6980 
6981 /* Process the \R and newline settings. */
6982 
6983 mb->bsr_convention = re->bsr_convention;
6984 mb->nltype = NLTYPE_FIXED;
6985 switch(re->newline_convention)
6986   {
6987   case PCRE2_NEWLINE_CR:
6988   mb->nllen = 1;
6989   mb->nl[0] = CHAR_CR;
6990   break;
6991 
6992   case PCRE2_NEWLINE_LF:
6993   mb->nllen = 1;
6994   mb->nl[0] = CHAR_NL;
6995   break;
6996 
6997   case PCRE2_NEWLINE_NUL:
6998   mb->nllen = 1;
6999   mb->nl[0] = CHAR_NUL;
7000   break;
7001 
7002   case PCRE2_NEWLINE_CRLF:
7003   mb->nllen = 2;
7004   mb->nl[0] = CHAR_CR;
7005   mb->nl[1] = CHAR_NL;
7006   break;
7007 
7008   case PCRE2_NEWLINE_ANY:
7009   mb->nltype = NLTYPE_ANY;
7010   break;
7011 
7012   case PCRE2_NEWLINE_ANYCRLF:
7013   mb->nltype = NLTYPE_ANYCRLF;
7014   break;
7015 
7016   default: return PCRE2_ERROR_INTERNAL;
7017   }
7018 
7019 /* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
7020 vector at the end, whose size depends on the number of capturing parentheses in
7021 the pattern. It is not used at all if there are no capturing parentheses.
7022 
7023   frame_size                   is the total size of each frame
7024   match_data->heapframes       is the pointer to the frames vector
7025   match_data->heapframes_size  is the allocated size of the vector
7026 
7027 We must pad the frame_size for alignment to ensure subsequent frames are as
7028 aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE
7029 array, that does not guarantee it is suitably aligned for pointers, as some
7030 architectures have pointers that are larger than a size_t. */
7031 
7032 frame_size = (offsetof(heapframe, ovector) +
7033   re->top_bracket * 2 * sizeof(PCRE2_SIZE) + HEAPFRAME_ALIGNMENT - 1) &
7034   ~(HEAPFRAME_ALIGNMENT - 1);
7035 
7036 /* Limits set in the pattern override the match context only if they are
7037 smaller. */
7038 
7039 mb->heap_limit = ((mcontext->heap_limit < re->limit_heap)?
7040   mcontext->heap_limit : re->limit_heap);
7041 
7042 mb->match_limit = (mcontext->match_limit < re->limit_match)?
7043   mcontext->match_limit : re->limit_match;
7044 
7045 mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
7046   mcontext->depth_limit : re->limit_depth;
7047 
7048 /* If a pattern has very many capturing parentheses, the frame size may be very
7049 large. Set the initial frame vector size to ensure that there are at least 10
7050 available frames, but enforce a minimum of START_FRAMES_SIZE. If this is
7051 greater than the heap limit, get as large a vector as possible. */
7052 
7053 heapframes_size = frame_size * 10;
7054 if (heapframes_size < START_FRAMES_SIZE) heapframes_size = START_FRAMES_SIZE;
7055 if (heapframes_size / 1024 > mb->heap_limit)
7056   {
7057   PCRE2_SIZE max_size = 1024 * mb->heap_limit;
7058   if (max_size < frame_size) return PCRE2_ERROR_HEAPLIMIT;
7059   heapframes_size = max_size;
7060   }
7061 
7062 /* If an existing frame vector in the match_data block is large enough, we can
7063 use it. Otherwise, free any pre-existing vector and get a new one. */
7064 
7065 if (match_data->heapframes_size < heapframes_size)
7066   {
7067   match_data->memctl.free(match_data->heapframes,
7068     match_data->memctl.memory_data);
7069   match_data->heapframes = match_data->memctl.malloc(heapframes_size,
7070     match_data->memctl.memory_data);
7071   if (match_data->heapframes == NULL)
7072     {
7073     match_data->heapframes_size = 0;
7074     return PCRE2_ERROR_NOMEMORY;
7075     }
7076   match_data->heapframes_size = heapframes_size;
7077   }
7078 
7079 /* Write to the ovector within the first frame to mark every capture unset and
7080 to avoid uninitialized memory read errors when it is copied to a new frame. */
7081 
7082 memset((char *)(match_data->heapframes) + offsetof(heapframe, ovector), 0xff,
7083   frame_size - offsetof(heapframe, ovector));
7084 
7085 /* Pointers to the individual character tables */
7086 
7087 mb->lcc = re->tables + lcc_offset;
7088 mb->fcc = re->tables + fcc_offset;
7089 mb->ctypes = re->tables + ctypes_offset;
7090 
7091 /* Set up the first code unit to match, if available. If there's no first code
7092 unit there may be a bitmap of possible first characters. */
7093 
7094 if ((re->flags & PCRE2_FIRSTSET) != 0)
7095   {
7096   has_first_cu = TRUE;
7097   first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
7098   if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
7099     {
7100     first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
7101 #ifdef SUPPORT_UNICODE
7102 #if PCRE2_CODE_UNIT_WIDTH == 8
7103     if (first_cu > 127 && ucp && !utf) first_cu2 = UCD_OTHERCASE(first_cu);
7104 #else
7105     if (first_cu > 127 && (utf || ucp)) first_cu2 = UCD_OTHERCASE(first_cu);
7106 #endif
7107 #endif  /* SUPPORT_UNICODE */
7108     }
7109   }
7110 else
7111   if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
7112     start_bits = re->start_bitmap;
7113 
7114 /* There may also be a "last known required character" set. */
7115 
7116 if ((re->flags & PCRE2_LASTSET) != 0)
7117   {
7118   has_req_cu = TRUE;
7119   req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
7120   if ((re->flags & PCRE2_LASTCASELESS) != 0)
7121     {
7122     req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
7123 #ifdef SUPPORT_UNICODE
7124 #if PCRE2_CODE_UNIT_WIDTH == 8
7125     if (req_cu > 127 && ucp && !utf) req_cu2 = UCD_OTHERCASE(req_cu);
7126 #else
7127     if (req_cu > 127 && (utf || ucp)) req_cu2 = UCD_OTHERCASE(req_cu);
7128 #endif
7129 #endif  /* SUPPORT_UNICODE */
7130     }
7131   }
7132 
7133 
7134 /* ==========================================================================*/
7135 
7136 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
7137 the loop runs just once. */
7138 
7139 #ifdef SUPPORT_UNICODE
7140 FRAGMENT_RESTART:
7141 #endif
7142 
7143 start_partial = match_partial = NULL;
7144 mb->hitend = FALSE;
7145 
7146 #if PCRE2_CODE_UNIT_WIDTH == 8
7147 memchr_found_first_cu = NULL;
7148 memchr_found_first_cu2 = NULL;
7149 #endif
7150 
7151 for(;;)
7152   {
7153   PCRE2_SPTR new_start_match;
7154 
7155   /* ----------------- Start of match optimizations ---------------- */
7156 
7157   /* There are some optimizations that avoid running the match if a known
7158   starting point is not found, or if a known later code unit is not present.
7159   However, there is an option (settable at compile time) that disables these,
7160   for testing and for ensuring that all callouts do actually occur. */
7161 
7162   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
7163     {
7164     /* If firstline is TRUE, the start of the match is constrained to the first
7165     line of a multiline string. That is, the match must be before or at the
7166     first newline following the start of matching. Temporarily adjust
7167     end_subject so that we stop the scans for a first code unit at a newline.
7168     If the match fails at the newline, later code breaks the loop. */
7169 
7170     if (firstline)
7171       {
7172       PCRE2_SPTR t = start_match;
7173 #ifdef SUPPORT_UNICODE
7174       if (utf)
7175         {
7176         while (t < end_subject && !IS_NEWLINE(t))
7177           {
7178           t++;
7179           ACROSSCHAR(t < end_subject, t, t++);
7180           }
7181         }
7182       else
7183 #endif
7184       while (t < end_subject && !IS_NEWLINE(t)) t++;
7185       end_subject = t;
7186       }
7187 
7188     /* Anchored: check the first code unit if one is recorded. This may seem
7189     pointless but it can help in detecting a no match case without scanning for
7190     the required code unit. */
7191 
7192     if (anchored)
7193       {
7194       if (has_first_cu || start_bits != NULL)
7195         {
7196         BOOL ok = start_match < end_subject;
7197         if (ok)
7198           {
7199           PCRE2_UCHAR c = UCHAR21TEST(start_match);
7200           ok = has_first_cu && (c == first_cu || c == first_cu2);
7201           if (!ok && start_bits != NULL)
7202             {
7203 #if PCRE2_CODE_UNIT_WIDTH != 8
7204             if (c > 255) c = 255;
7205 #endif
7206             ok = (start_bits[c/8] & (1u << (c&7))) != 0;
7207             }
7208           }
7209         if (!ok)
7210           {
7211           rc = MATCH_NOMATCH;
7212           break;
7213           }
7214         }
7215       }
7216 
7217     /* Not anchored. Advance to a unique first code unit if there is one. */
7218 
7219     else
7220       {
7221       if (has_first_cu)
7222         {
7223         if (first_cu != first_cu2)  /* Caseless */
7224           {
7225           /* In 16-bit and 32_bit modes we have to do our own search, so can
7226           look for both cases at once. */
7227 
7228 #if PCRE2_CODE_UNIT_WIDTH != 8
7229           PCRE2_UCHAR smc;
7230           while (start_match < end_subject &&
7231                 (smc = UCHAR21TEST(start_match)) != first_cu &&
7232                  smc != first_cu2)
7233             start_match++;
7234 #else
7235           /* In 8-bit mode, the use of memchr() gives a big speed up, even
7236           though we have to call it twice in order to find the earliest
7237           occurrence of the code unit in either of its cases. Caching is used
7238           to remember the positions of previously found code units. This can
7239           make a huge difference when the strings are very long and only one
7240           case is actually present. */
7241 
7242           PCRE2_SPTR pp1 = NULL;
7243           PCRE2_SPTR pp2 = NULL;
7244           PCRE2_SIZE searchlength = end_subject - start_match;
7245 
7246           /* If we haven't got a previously found position for first_cu, or if
7247           the current starting position is later, we need to do a search. If
7248           the code unit is not found, set it to the end. */
7249 
7250           if (memchr_found_first_cu == NULL ||
7251               start_match > memchr_found_first_cu)
7252             {
7253             pp1 = memchr(start_match, first_cu, searchlength);
7254             memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1;
7255             }
7256 
7257           /* If the start is before a previously found position, use the
7258           previous position, or NULL if a previous search failed. */
7259 
7260           else pp1 = (memchr_found_first_cu == end_subject)? NULL :
7261             memchr_found_first_cu;
7262 
7263           /* Do the same thing for the other case. */
7264 
7265           if (memchr_found_first_cu2 == NULL ||
7266               start_match > memchr_found_first_cu2)
7267             {
7268             pp2 = memchr(start_match, first_cu2, searchlength);
7269             memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2;
7270             }
7271 
7272           else pp2 = (memchr_found_first_cu2 == end_subject)? NULL :
7273             memchr_found_first_cu2;
7274 
7275           /* Set the start to the end of the subject if neither case was found.
7276           Otherwise, use the earlier found point. */
7277 
7278           if (pp1 == NULL)
7279             start_match = (pp2 == NULL)? end_subject : pp2;
7280           else
7281             start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
7282 
7283 #endif  /* 8-bit handling */
7284           }
7285 
7286         /* The caseful case is much simpler. */
7287 
7288         else
7289           {
7290 #if PCRE2_CODE_UNIT_WIDTH != 8
7291           while (start_match < end_subject && UCHAR21TEST(start_match) !=
7292                  first_cu)
7293             start_match++;
7294 #else
7295           start_match = memchr(start_match, first_cu, end_subject - start_match);
7296           if (start_match == NULL) start_match = end_subject;
7297 #endif
7298           }
7299 
7300         /* If we can't find the required first code unit, having reached the
7301         true end of the subject, break the bumpalong loop, to force a match
7302         failure, except when doing partial matching, when we let the next cycle
7303         run at the end of the subject. To see why, consider the pattern
7304         /(?<=abc)def/, which partially matches "abc", even though the string
7305         does not contain the starting character "d". If we have not reached the
7306         true end of the subject (PCRE2_FIRSTLINE caused end_subject to be
7307         temporarily modified) we also let the cycle run, because the matching
7308         string is legitimately allowed to start with the first code unit of a
7309         newline. */
7310 
7311         if (mb->partial == 0 && start_match >= mb->end_subject)
7312           {
7313           rc = MATCH_NOMATCH;
7314           break;
7315           }
7316         }
7317 
7318       /* If there's no first code unit, advance to just after a linebreak for a
7319       multiline match if required. */
7320 
7321       else if (startline)
7322         {
7323         if (start_match > mb->start_subject + start_offset)
7324           {
7325 #ifdef SUPPORT_UNICODE
7326           if (utf)
7327             {
7328             while (start_match < end_subject && !WAS_NEWLINE(start_match))
7329               {
7330               start_match++;
7331               ACROSSCHAR(start_match < end_subject, start_match, start_match++);
7332               }
7333             }
7334           else
7335 #endif
7336           while (start_match < end_subject && !WAS_NEWLINE(start_match))
7337             start_match++;
7338 
7339           /* If we have just passed a CR and the newline option is ANY or
7340           ANYCRLF, and we are now at a LF, advance the match position by one
7341           more code unit. */
7342 
7343           if (start_match[-1] == CHAR_CR &&
7344                (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
7345                start_match < end_subject &&
7346                UCHAR21TEST(start_match) == CHAR_NL)
7347             start_match++;
7348           }
7349         }
7350 
7351       /* If there's no first code unit or a requirement for a multiline line
7352       start, advance to a non-unique first code unit if any have been
7353       identified. The bitmap contains only 256 bits. When code units are 16 or
7354       32 bits wide, all code units greater than 254 set the 255 bit. */
7355 
7356       else if (start_bits != NULL)
7357         {
7358         while (start_match < end_subject)
7359           {
7360           uint32_t c = UCHAR21TEST(start_match);
7361 #if PCRE2_CODE_UNIT_WIDTH != 8
7362           if (c > 255) c = 255;
7363 #endif
7364           if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
7365           start_match++;
7366           }
7367 
7368         /* See comment above in first_cu checking about the next few lines. */
7369 
7370         if (mb->partial == 0 && start_match >= mb->end_subject)
7371           {
7372           rc = MATCH_NOMATCH;
7373           break;
7374           }
7375         }
7376       }   /* End first code unit handling */
7377 
7378     /* Restore fudged end_subject */
7379 
7380     end_subject = mb->end_subject;
7381 
7382     /* The following two optimizations must be disabled for partial matching. */
7383 
7384     if (mb->partial == 0)
7385       {
7386       PCRE2_SPTR p;
7387 
7388       /* The minimum matching length is a lower bound; no string of that length
7389       may actually match the pattern. Although the value is, strictly, in
7390       characters, we treat it as code units to avoid spending too much time in
7391       this optimization. */
7392 
7393       if (end_subject - start_match < re->minlength)
7394         {
7395         rc = MATCH_NOMATCH;
7396         break;
7397         }
7398 
7399       /* If req_cu is set, we know that that code unit must appear in the
7400       subject for the (non-partial) match to succeed. If the first code unit is
7401       set, req_cu must be later in the subject; otherwise the test starts at
7402       the match point. This optimization can save a huge amount of backtracking
7403       in patterns with nested unlimited repeats that aren't going to match.
7404       Writing separate code for caseful/caseless versions makes it go faster,
7405       as does using an autoincrement and backing off on a match. As in the case
7406       of the first code unit, using memchr() in the 8-bit library gives a big
7407       speed up. Unlike the first_cu check above, we do not need to call
7408       memchr() twice in the caseless case because we only need to check for the
7409       presence of the character in either case, not find the first occurrence.
7410 
7411       The search can be skipped if the code unit was found later than the
7412       current starting point in a previous iteration of the bumpalong loop.
7413 
7414       HOWEVER: when the subject string is very, very long, searching to its end
7415       can take a long time, and give bad performance on quite ordinary
7416       anchored patterns. This showed up when somebody was matching something
7417       like /^\d+C/ on a 32-megabyte string... so we don't do this when the
7418       string is sufficiently long, but it's worth searching a lot more for
7419       unanchored patterns. */
7420 
7421       p = start_match + (has_first_cu? 1:0);
7422       if (has_req_cu && p > req_cu_ptr)
7423         {
7424         PCRE2_SIZE check_length = end_subject - start_match;
7425 
7426         if (check_length < REQ_CU_MAX ||
7427               (!anchored && check_length < REQ_CU_MAX * 1000))
7428           {
7429           if (req_cu != req_cu2)  /* Caseless */
7430             {
7431 #if PCRE2_CODE_UNIT_WIDTH != 8
7432             while (p < end_subject)
7433               {
7434               uint32_t pp = UCHAR21INCTEST(p);
7435               if (pp == req_cu || pp == req_cu2) { p--; break; }
7436               }
7437 #else  /* 8-bit code units */
7438             PCRE2_SPTR pp = p;
7439             p = memchr(pp, req_cu, end_subject - pp);
7440             if (p == NULL)
7441               {
7442               p = memchr(pp, req_cu2, end_subject - pp);
7443               if (p == NULL) p = end_subject;
7444               }
7445 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
7446             }
7447 
7448           /* The caseful case */
7449 
7450           else
7451             {
7452 #if PCRE2_CODE_UNIT_WIDTH != 8
7453             while (p < end_subject)
7454               {
7455               if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
7456               }
7457 
7458 #else  /* 8-bit code units */
7459             p = memchr(p, req_cu, end_subject - p);
7460             if (p == NULL) p = end_subject;
7461 #endif
7462             }
7463 
7464           /* If we can't find the required code unit, break the bumpalong loop,
7465           forcing a match failure. */
7466 
7467           if (p >= end_subject)
7468             {
7469             rc = MATCH_NOMATCH;
7470             break;
7471             }
7472 
7473           /* If we have found the required code unit, save the point where we
7474           found it, so that we don't search again next time round the bumpalong
7475           loop if the start hasn't yet passed this code unit. */
7476 
7477           req_cu_ptr = p;
7478           }
7479         }
7480       }
7481     }
7482 
7483   /* ------------ End of start of match optimizations ------------ */
7484 
7485   /* Give no match if we have passed the bumpalong limit. */
7486 
7487   if (start_match > bumpalong_limit)
7488     {
7489     rc = MATCH_NOMATCH;
7490     break;
7491     }
7492 
7493   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
7494   first starting point for which a partial match was found. */
7495 
7496   cb.start_match = (PCRE2_SIZE)(start_match - subject);
7497   cb.callout_flags |= PCRE2_CALLOUT_STARTMATCH;
7498 
7499   mb->start_used_ptr = start_match;
7500   mb->last_used_ptr = start_match;
7501 #ifdef SUPPORT_UNICODE
7502   mb->moptions = options | fragment_options;
7503 #else
7504   mb->moptions = options;
7505 #endif
7506   mb->match_call_count = 0;
7507   mb->end_offset_top = 0;
7508   mb->skip_arg_count = 0;
7509 
7510 #ifdef DEBUG_SHOW_OPS
7511   fprintf(stderr, "++ Calling match()\n");
7512 #endif
7513 
7514   rc = match(start_match, mb->start_code, re->top_bracket, frame_size,
7515     match_data, mb);
7516 
7517 #ifdef DEBUG_SHOW_OPS
7518   fprintf(stderr, "++ match() returned %d\n\n", rc);
7519 #endif
7520 
7521   if (mb->hitend && start_partial == NULL)
7522     {
7523     start_partial = mb->start_used_ptr;
7524     match_partial = start_match;
7525     }
7526 
7527   switch(rc)
7528     {
7529     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
7530     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
7531     entirely. The only way we can do that is to re-do the match at the same
7532     point, with a flag to force SKIP with an argument to be ignored. Just
7533     treating this case as NOMATCH does not work because it does not check other
7534     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
7535 
7536     case MATCH_SKIP_ARG:
7537     new_start_match = start_match;
7538     mb->ignore_skip_arg = mb->skip_arg_count;
7539     break;
7540 
7541     /* SKIP passes back the next starting point explicitly, but if it is no
7542     greater than the match we have just done, treat it as NOMATCH. */
7543 
7544     case MATCH_SKIP:
7545     if (mb->verb_skip_ptr > start_match)
7546       {
7547       new_start_match = mb->verb_skip_ptr;
7548       break;
7549       }
7550     /* Fall through */
7551 
7552     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
7553     exactly like PRUNE. Unset ignore SKIP-with-argument. */
7554 
7555     case MATCH_NOMATCH:
7556     case MATCH_PRUNE:
7557     case MATCH_THEN:
7558     mb->ignore_skip_arg = 0;
7559     new_start_match = start_match + 1;
7560 #ifdef SUPPORT_UNICODE
7561     if (utf)
7562       ACROSSCHAR(new_start_match < end_subject, new_start_match,
7563         new_start_match++);
7564 #endif
7565     break;
7566 
7567     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
7568 
7569     case MATCH_COMMIT:
7570     rc = MATCH_NOMATCH;
7571     goto ENDLOOP;
7572 
7573     /* Any other return is either a match, or some kind of error. */
7574 
7575     default:
7576     goto ENDLOOP;
7577     }
7578 
7579   /* Control reaches here for the various types of "no match at this point"
7580   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
7581 
7582   rc = MATCH_NOMATCH;
7583 
7584   /* If PCRE2_FIRSTLINE is set, the match must happen before or at the first
7585   newline in the subject (though it may continue over the newline). Therefore,
7586   if we have just failed to match, starting at a newline, do not continue. */
7587 
7588   if (firstline && IS_NEWLINE(start_match)) break;
7589 
7590   /* Advance to new matching position */
7591 
7592   start_match = new_start_match;
7593 
7594   /* Break the loop if the pattern is anchored or if we have passed the end of
7595   the subject. */
7596 
7597   if (anchored || start_match > end_subject) break;
7598 
7599   /* If we have just passed a CR and we are now at a LF, and the pattern does
7600   not contain any explicit matches for \r or \n, and the newline option is CRLF
7601   or ANY or ANYCRLF, advance the match position by one more code unit. In
7602   normal matching start_match will aways be greater than the first position at
7603   this stage, but a failed *SKIP can cause a return at the same point, which is
7604   why the first test exists. */
7605 
7606   if (start_match > subject + start_offset &&
7607       start_match[-1] == CHAR_CR &&
7608       start_match < end_subject &&
7609       *start_match == CHAR_NL &&
7610       (re->flags & PCRE2_HASCRORLF) == 0 &&
7611         (mb->nltype == NLTYPE_ANY ||
7612          mb->nltype == NLTYPE_ANYCRLF ||
7613          mb->nllen == 2))
7614     start_match++;
7615 
7616   mb->mark = NULL;   /* Reset for start of next match attempt */
7617   }                  /* End of for(;;) "bumpalong" loop */
7618 
7619 /* ==========================================================================*/
7620 
7621 /* When we reach here, one of the following stopping conditions is true:
7622 
7623 (1) The match succeeded, either completely, or partially;
7624 
7625 (2) The pattern is anchored or the match was failed after (*COMMIT);
7626 
7627 (3) We are past the end of the subject or the bumpalong limit;
7628 
7629 (4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because
7630     this option requests that a match occur at or before the first newline in
7631     the subject.
7632 
7633 (5) Some kind of error occurred.
7634 
7635 */
7636 
7637 ENDLOOP:
7638 
7639 /* If end_subject != true_end_subject, it means we are handling invalid UTF,
7640 and have just processed a non-terminal fragment. If this resulted in no match
7641 or a partial match we must carry on to the next fragment (a partial match is
7642 returned to the caller only at the very end of the subject). A loop is used to
7643 avoid trying to match against empty fragments; if the pattern can match an
7644 empty string it would have done so already. */
7645 
7646 #ifdef SUPPORT_UNICODE
7647 if (utf && end_subject != true_end_subject &&
7648     (rc == MATCH_NOMATCH || rc == PCRE2_ERROR_PARTIAL))
7649   {
7650   for (;;)
7651     {
7652     /* Advance past the first bad code unit, and then skip invalid character
7653     starting code units in 8-bit and 16-bit modes. */
7654 
7655     start_match = end_subject + 1;
7656 
7657 #if PCRE2_CODE_UNIT_WIDTH != 32
7658     while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
7659       start_match++;
7660 #endif
7661 
7662     /* If we have hit the end of the subject, there isn't another non-empty
7663     fragment, so give up. */
7664 
7665     if (start_match >= true_end_subject)
7666       {
7667       rc = MATCH_NOMATCH;  /* In case it was partial */
7668       match_partial = NULL;
7669       break;
7670       }
7671 
7672     /* Check the rest of the subject */
7673 
7674     mb->check_subject = start_match;
7675     rc = PRIV(valid_utf)(start_match, length - (start_match - subject),
7676       &(match_data->startchar));
7677 
7678     /* The rest of the subject is valid UTF. */
7679 
7680     if (rc == 0)
7681       {
7682       mb->end_subject = end_subject = true_end_subject;
7683       fragment_options = PCRE2_NOTBOL;
7684       goto FRAGMENT_RESTART;
7685       }
7686 
7687     /* A subsequent UTF error has been found; if the next fragment is
7688     non-empty, set up to process it. Otherwise, let the loop advance. */
7689 
7690     else if (rc < 0)
7691       {
7692       mb->end_subject = end_subject = start_match + match_data->startchar;
7693       if (end_subject > start_match)
7694         {
7695         fragment_options = PCRE2_NOTBOL|PCRE2_NOTEOL;
7696         goto FRAGMENT_RESTART;
7697         }
7698       }
7699     }
7700   }
7701 #endif  /* SUPPORT_UNICODE */
7702 
7703 /* Fill in fields that are always returned in the match data. */
7704 
7705 match_data->code = re;
7706 match_data->mark = mb->mark;
7707 match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER;
7708 
7709 /* Handle a fully successful match. Set the return code to the number of
7710 captured strings, or 0 if there were too many to fit into the ovector, and then
7711 set the remaining returned values before returning. Make a copy of the subject
7712 string if requested. */
7713 
7714 if (rc == MATCH_MATCH)
7715   {
7716   match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)?
7717     0 : (int)mb->end_offset_top/2 + 1;
7718   match_data->subject_length = length;
7719   match_data->startchar = start_match - subject;
7720   match_data->leftchar = mb->start_used_ptr - subject;
7721   match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
7722     mb->last_used_ptr : mb->end_match_ptr) - subject;
7723   if ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
7724     {
7725     length = CU2BYTES(length + was_zero_terminated);
7726     match_data->subject = match_data->memctl.malloc(length,
7727       match_data->memctl.memory_data);
7728     if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
7729     memcpy((void *)match_data->subject, subject, length);
7730     match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
7731     }
7732   else match_data->subject = subject;
7733 
7734   return match_data->rc;
7735   }
7736 
7737 /* Control gets here if there has been a partial match, an error, or if the
7738 overall match attempt has failed at all permitted starting positions. Any mark
7739 data is in the nomatch_mark field. */
7740 
7741 match_data->mark = mb->nomatch_mark;
7742 
7743 /* For anything other than nomatch or partial match, just return the code. */
7744 
7745 if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL) match_data->rc = rc;
7746 
7747 /* Handle a partial match. If a "soft" partial match was requested, searching
7748 for a complete match will have continued, and the value of rc at this point
7749 will be MATCH_NOMATCH. For a "hard" partial match, it will already be
7750 PCRE2_ERROR_PARTIAL. */
7751 
7752 else if (match_partial != NULL)
7753   {
7754   match_data->subject = subject;
7755   match_data->subject_length = length;
7756   match_data->ovector[0] = match_partial - subject;
7757   match_data->ovector[1] = end_subject - subject;
7758   match_data->startchar = match_partial - subject;
7759   match_data->leftchar = start_partial - subject;
7760   match_data->rightchar = end_subject - subject;
7761   match_data->rc = PCRE2_ERROR_PARTIAL;
7762   }
7763 
7764 /* Else this is the classic nomatch case. */
7765 
7766 else match_data->rc = PCRE2_ERROR_NOMATCH;
7767 
7768 return match_data->rc;
7769 }
7770 
7771 /* These #undefs are here to enable unity builds with CMake. */
7772 
7773 #undef NLBLOCK /* Block containing newline information */
7774 #undef PSSTART /* Field containing processed string start */
7775 #undef PSEND   /* Field containing processed string end */
7776 
7777 /* End of pcre2_match.c */
7778