xref: /PHP-8.2/ext/pcre/pcre2lib/pcre2_match.c (revision 1ea8a10c)
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10           New API code Copyright (c) 2015-2022 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45 
46 /* These defines enable debugging code */
47 
48 /* #define DEBUG_FRAMES_DISPLAY */
49 /* #define DEBUG_SHOW_OPS */
50 /* #define DEBUG_SHOW_RMATCH */
51 
52 #ifdef DEBUG_FRAMES_DISPLAY
53 #include <stdarg.h>
54 #endif
55 
56 /* These defines identify the name of the block containing "static"
57 information, and fields within it. */
58 
59 #define NLBLOCK mb              /* Block containing newline information */
60 #define PSSTART start_subject   /* Field containing processed string start */
61 #define PSEND   end_subject     /* Field containing processed string end */
62 
63 #include "pcre2_internal.h"
64 
65 #define RECURSE_UNSET 0xffffffffu  /* Bigger than max group number */
66 
67 /* Masks for identifying the public options that are permitted at match time. */
68 
69 #define PUBLIC_MATCH_OPTIONS \
70   (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
71    PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
72    PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT)
73 
74 #define PUBLIC_JIT_MATCH_OPTIONS \
75    (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
76     PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD|\
77     PCRE2_COPY_MATCHED_SUBJECT)
78 
79 /* Non-error returns from and within the match() function. Error returns are
80 externally defined PCRE2_ERROR_xxx codes, which are all negative. */
81 
82 #define MATCH_MATCH        1
83 #define MATCH_NOMATCH      0
84 
85 /* Special internal returns used in the match() function. Make them
86 sufficiently negative to avoid the external error codes. */
87 
88 #define MATCH_ACCEPT       (-999)
89 #define MATCH_KETRPOS      (-998)
90 /* The next 5 must be kept together and in sequence so that a test that checks
91 for any one of them can use a range. */
92 #define MATCH_COMMIT       (-997)
93 #define MATCH_PRUNE        (-996)
94 #define MATCH_SKIP         (-995)
95 #define MATCH_SKIP_ARG     (-994)
96 #define MATCH_THEN         (-993)
97 #define MATCH_BACKTRACK_MAX MATCH_THEN
98 #define MATCH_BACKTRACK_MIN MATCH_COMMIT
99 
100 /* Group frame type values. Zero means the frame is not a group frame. The
101 lower 16 bits are used for data (e.g. the capture number). Group frames are
102 used for most groups so that information about the start is easily available at
103 the end without having to scan back through intermediate frames (backtrack
104 points). */
105 
106 #define GF_CAPTURE     0x00010000u
107 #define GF_NOCAPTURE   0x00020000u
108 #define GF_CONDASSERT  0x00030000u
109 #define GF_RECURSE     0x00040000u
110 
111 /* Masks for the identity and data parts of the group frame type. */
112 
113 #define GF_IDMASK(a)   ((a) & 0xffff0000u)
114 #define GF_DATAMASK(a) ((a) & 0x0000ffffu)
115 
116 /* Repetition types */
117 
118 enum { REPTYPE_MIN, REPTYPE_MAX, REPTYPE_POS };
119 
120 /* Min and max values for the common repeats; a maximum of UINT32_MAX =>
121 infinity. */
122 
123 static const uint32_t rep_min[] = {
124   0, 0,       /* * and *? */
125   1, 1,       /* + and +? */
126   0, 0,       /* ? and ?? */
127   0, 0,       /* dummy placefillers for OP_CR[MIN]RANGE */
128   0, 1, 0 };  /* OP_CRPOS{STAR, PLUS, QUERY} */
129 
130 static const uint32_t rep_max[] = {
131   UINT32_MAX, UINT32_MAX,      /* * and *? */
132   UINT32_MAX, UINT32_MAX,      /* + and +? */
133   1, 1,                        /* ? and ?? */
134   0, 0,                        /* dummy placefillers for OP_CR[MIN]RANGE */
135   UINT32_MAX, UINT32_MAX, 1 }; /* OP_CRPOS{STAR, PLUS, QUERY} */
136 
137 /* Repetition types - must include OP_CRPOSRANGE (not needed above) */
138 
139 static const uint32_t rep_typ[] = {
140   REPTYPE_MAX, REPTYPE_MIN,    /* * and *? */
141   REPTYPE_MAX, REPTYPE_MIN,    /* + and +? */
142   REPTYPE_MAX, REPTYPE_MIN,    /* ? and ?? */
143   REPTYPE_MAX, REPTYPE_MIN,    /* OP_CRRANGE and OP_CRMINRANGE */
144   REPTYPE_POS, REPTYPE_POS,    /* OP_CRPOSSTAR, OP_CRPOSPLUS */
145   REPTYPE_POS, REPTYPE_POS };  /* OP_CRPOSQUERY, OP_CRPOSRANGE */
146 
147 /* Numbers for RMATCH calls at backtracking points. When these lists are
148 changed, the code at RETURN_SWITCH below must be updated in sync.  */
149 
150 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
151        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
152        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
153        RM31,  RM32, RM33, RM34, RM35, RM36 };
154 
155 #ifdef SUPPORT_WIDE_CHARS
156 enum { RM100=100, RM101 };
157 #endif
158 
159 #ifdef SUPPORT_UNICODE
160 enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207,
161        RM208,     RM209, RM210, RM211, RM212, RM213, RM214, RM215,
162        RM216,     RM217, RM218, RM219, RM220, RM221, RM222, RM223,
163        RM224,     RM225 };
164 #endif
165 
166 /* Define short names for general fields in the current backtrack frame, which
167 is always pointed to by the F variable. Occasional references to fields in
168 other frames are written out explicitly. There are also some fields in the
169 current frame whose names start with "temp" that are used for short-term,
170 localised backtracking memory. These are #defined with Lxxx names at the point
171 of use and undefined afterwards. */
172 
173 #define Fback_frame        F->back_frame
174 #define Fcapture_last      F->capture_last
175 #define Fcurrent_recurse   F->current_recurse
176 #define Fecode             F->ecode
177 #define Feptr              F->eptr
178 #define Fgroup_frame_type  F->group_frame_type
179 #define Flast_group_offset F->last_group_offset
180 #define Flength            F->length
181 #define Fmark              F->mark
182 #define Frdepth            F->rdepth
183 #define Fstart_match       F->start_match
184 #define Foffset_top        F->offset_top
185 #define Foccu              F->occu
186 #define Fop                F->op
187 #define Fovector           F->ovector
188 #define Freturn_id         F->return_id
189 
190 
191 #ifdef DEBUG_FRAMES_DISPLAY
192 /*************************************************
193 *      Display current frames and contents       *
194 *************************************************/
195 
196 /* This debugging function displays the current set of frames and their
197 contents. It is not called automatically from anywhere, the intention being
198 that calls can be inserted where necessary when debugging frame-related
199 problems.
200 
201 Arguments:
202   f           the file to write to
203   F           the current top frame
204   P           a previous frame of interest
205   frame_size  the frame size
206   mb          points to the match block
207   s           identification text
208 
209 Returns:    nothing
210 */
211 
212 static void
display_frames(FILE * f,heapframe * F,heapframe * P,PCRE2_SIZE frame_size,match_block * mb,const char * s,...)213 display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size,
214   match_block *mb, const char *s, ...)
215 {
216 uint32_t i;
217 heapframe *Q;
218 va_list ap;
219 va_start(ap, s);
220 
221 fprintf(f, "FRAMES ");
222 vfprintf(f, s, ap);
223 va_end(ap);
224 
225 if (P != NULL) fprintf(f, " P=%lu",
226   ((char *)P - (char *)(mb->match_frames))/frame_size);
227 fprintf(f, "\n");
228 
229 for (i = 0, Q = mb->match_frames;
230      Q <= F;
231      i++, Q = (heapframe *)((char *)Q + frame_size))
232   {
233   fprintf(f, "Frame %d type=%x subj=%lu code=%d back=%lu id=%d",
234     i, Q->group_frame_type, Q->eptr - mb->start_subject, *(Q->ecode),
235     Q->back_frame, Q->return_id);
236 
237   if (Q->last_group_offset == PCRE2_UNSET)
238     fprintf(f, " lgoffset=unset\n");
239   else
240     fprintf(f, " lgoffset=%lu\n",  Q->last_group_offset/frame_size);
241   }
242 }
243 
244 #endif
245 
246 
247 
248 /*************************************************
249 *                Process a callout               *
250 *************************************************/
251 
252 /* This function is called for all callouts, whether "standalone" or at the
253 start of a conditional group. Feptr will be pointing to either OP_CALLOUT or
254 OP_CALLOUT_STR. A callout block is allocated in pcre2_match() and initialized
255 with fixed values.
256 
257 Arguments:
258   F          points to the current backtracking frame
259   mb         points to the match block
260   lengthptr  where to return the length of the callout item
261 
262 Returns:     the return from the callout
263              or 0 if no callout function exists
264 */
265 
266 static int
do_callout(heapframe * F,match_block * mb,PCRE2_SIZE * lengthptr)267 do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr)
268 {
269 int rc;
270 PCRE2_SIZE save0, save1;
271 PCRE2_SIZE *callout_ovector;
272 pcre2_callout_block *cb;
273 
274 *lengthptr = (*Fecode == OP_CALLOUT)?
275   PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE);
276 
277 if (mb->callout == NULL) return 0;   /* No callout function provided */
278 
279 /* The original matching code (pre 10.30) worked directly with the ovector
280 passed by the user, and this was passed to callouts. Now that the working
281 ovector is in the backtracking frame, it no longer needs to reserve space for
282 the overall match offsets (which would waste space in the frame). For backward
283 compatibility, however, we pass capture_top and offset_vector to the callout as
284 if for the extended ovector, and we ensure that the first two slots are unset
285 by preserving and restoring their current contents. Picky compilers complain if
286 references such as Fovector[-2] are use directly, so we set up a separate
287 pointer. */
288 
289 callout_ovector = (PCRE2_SIZE *)(Fovector) - 2;
290 
291 /* The cb->version, cb->subject, cb->subject_length, and cb->start_match fields
292 are set externally. The first 3 never change; the last is updated for each
293 bumpalong. */
294 
295 cb = mb->cb;
296 cb->capture_top      = (uint32_t)Foffset_top/2 + 1;
297 cb->capture_last     = Fcapture_last;
298 cb->offset_vector    = callout_ovector;
299 cb->mark             = mb->nomatch_mark;
300 cb->current_position = (PCRE2_SIZE)(Feptr - mb->start_subject);
301 cb->pattern_position = GET(Fecode, 1);
302 cb->next_item_length = GET(Fecode, 1 + LINK_SIZE);
303 
304 if (*Fecode == OP_CALLOUT)  /* Numerical callout */
305   {
306   cb->callout_number = Fecode[1 + 2*LINK_SIZE];
307   cb->callout_string_offset = 0;
308   cb->callout_string = NULL;
309   cb->callout_string_length = 0;
310   }
311 else  /* String callout */
312   {
313   cb->callout_number = 0;
314   cb->callout_string_offset = GET(Fecode, 1 + 3*LINK_SIZE);
315   cb->callout_string = Fecode + (1 + 4*LINK_SIZE) + 1;
316   cb->callout_string_length =
317     *lengthptr - (1 + 4*LINK_SIZE) - 2;
318   }
319 
320 save0 = callout_ovector[0];
321 save1 = callout_ovector[1];
322 callout_ovector[0] = callout_ovector[1] = PCRE2_UNSET;
323 rc = mb->callout(cb, mb->callout_data);
324 callout_ovector[0] = save0;
325 callout_ovector[1] = save1;
326 cb->callout_flags = 0;
327 return rc;
328 }
329 
330 
331 
332 /*************************************************
333 *          Match a back-reference                *
334 *************************************************/
335 
336 /* This function is called only when it is known that the offset lies within
337 the offsets that have so far been used in the match. Note that in caseless
338 UTF-8 mode, the number of subject bytes matched may be different to the number
339 of reference bytes. (In theory this could also happen in UTF-16 mode, but it
340 seems unlikely.)
341 
342 Arguments:
343   offset      index into the offset vector
344   caseless    TRUE if caseless
345   F           the current backtracking frame pointer
346   mb          points to match block
347   lengthptr   pointer for returning the length matched
348 
349 Returns:      = 0 sucessful match; number of code units matched is set
350               < 0 no match
351               > 0 partial match
352 */
353 
354 static int
match_ref(PCRE2_SIZE offset,BOOL caseless,heapframe * F,match_block * mb,PCRE2_SIZE * lengthptr)355 match_ref(PCRE2_SIZE offset, BOOL caseless, heapframe *F, match_block *mb,
356   PCRE2_SIZE *lengthptr)
357 {
358 PCRE2_SPTR p;
359 PCRE2_SIZE length;
360 PCRE2_SPTR eptr;
361 PCRE2_SPTR eptr_start;
362 
363 /* Deal with an unset group. The default is no match, but there is an option to
364 match an empty string. */
365 
366 if (offset >= Foffset_top || Fovector[offset] == PCRE2_UNSET)
367   {
368   if ((mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
369     {
370     *lengthptr = 0;
371     return 0;      /* Match */
372     }
373   else return -1;  /* No match */
374   }
375 
376 /* Separate the caseless and UTF cases for speed. */
377 
378 eptr = eptr_start = Feptr;
379 p = mb->start_subject + Fovector[offset];
380 length = Fovector[offset+1] - Fovector[offset];
381 
382 if (caseless)
383   {
384 #if defined SUPPORT_UNICODE
385   BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
386 
387   if (utf || (mb->poptions & PCRE2_UCP) != 0)
388     {
389     PCRE2_SPTR endptr = p + length;
390 
391     /* Match characters up to the end of the reference. NOTE: the number of
392     code units matched may differ, because in UTF-8 there are some characters
393     whose upper and lower case codes have different numbers of bytes. For
394     example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65 (3
395     bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
396     sequence of two of the latter. It is important, therefore, to check the
397     length along the reference, not along the subject (earlier code did this
398     wrong). UCP without uses Unicode properties but without UTF encoding. */
399 
400     while (p < endptr)
401       {
402       uint32_t c, d;
403       const ucd_record *ur;
404       if (eptr >= mb->end_subject) return 1;   /* Partial match */
405 
406       if (utf)
407         {
408         GETCHARINC(c, eptr);
409         GETCHARINC(d, p);
410         }
411       else
412         {
413         c = *eptr++;
414         d = *p++;
415         }
416 
417       ur = GET_UCD(d);
418       if (c != d && c != (uint32_t)((int)d + ur->other_case))
419         {
420         const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset;
421         for (;;)
422           {
423           if (c < *pp) return -1;  /* No match */
424           if (c == *pp++) break;
425           }
426         }
427       }
428     }
429   else
430 #endif
431 
432   /* Not in UTF or UCP mode */
433     {
434     for (; length > 0; length--)
435       {
436       uint32_t cc, cp;
437       if (eptr >= mb->end_subject) return 1;   /* Partial match */
438       cc = UCHAR21TEST(eptr);
439       cp = UCHAR21TEST(p);
440       if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
441         return -1;  /* No match */
442       p++;
443       eptr++;
444       }
445     }
446   }
447 
448 /* In the caseful case, we can just compare the code units, whether or not we
449 are in UTF and/or UCP mode. When partial matching, we have to do this unit by
450 unit. */
451 
452 else
453   {
454   if (mb->partial != 0)
455     {
456     for (; length > 0; length--)
457       {
458       if (eptr >= mb->end_subject) return 1;   /* Partial match */
459       if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;  /* No match */
460       }
461     }
462 
463   /* Not partial matching */
464 
465   else
466     {
467     if ((PCRE2_SIZE)(mb->end_subject - eptr) < length) return 1; /* Partial */
468     if (memcmp(p, eptr, CU2BYTES(length)) != 0) return -1;  /* No match */
469     eptr += length;
470     }
471   }
472 
473 *lengthptr = eptr - eptr_start;
474 return 0;  /* Match */
475 }
476 
477 
478 
479 /******************************************************************************
480 *******************************************************************************
481                    "Recursion" in the match() function
482 
483 The original match() function was highly recursive, but this proved to be the
484 source of a number of problems over the years, mostly because of the relatively
485 small system stacks that are commonly found. As new features were added to
486 patterns, various kludges were invented to reduce the amount of stack used,
487 making the code hard to understand in places.
488 
489 A version did exist that used individual frames on the heap instead of calling
490 match() recursively, but this ran substantially slower. The current version is
491 a refactoring that uses a vector of frames to remember backtracking points.
492 This runs no slower, and possibly even a bit faster than the original recursive
493 implementation. An initial vector of size START_FRAMES_SIZE (enough for maybe
494 50 frames) is allocated on the system stack. If this is not big enough, the
495 heap is used for a larger vector.
496 
497 *******************************************************************************
498 ******************************************************************************/
499 
500 
501 
502 
503 /*************************************************
504 *       Macros for the match() function          *
505 *************************************************/
506 
507 /* These macros pack up tests that are used for partial matching several times
508 in the code. The second one is used when we already know we are past the end of
509 the subject. We set the "hit end" flag if the pointer is at the end of the
510 subject and either (a) the pointer is past the earliest inspected character
511 (i.e. something has been matched, even if not part of the actual matched
512 string), or (b) the pattern contains a lookbehind. These are the conditions for
513 which adding more characters may allow the current match to continue.
514 
515 For hard partial matching, we immediately return a partial match. Otherwise,
516 carrying on means that a complete match on the current subject will be sought.
517 A partial match is returned only if no complete match can be found. */
518 
519 #define CHECK_PARTIAL()\
520   if (Feptr >= mb->end_subject) \
521     { \
522     SCHECK_PARTIAL(); \
523     }
524 
525 #define SCHECK_PARTIAL()\
526   if (mb->partial != 0 && \
527       (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \
528     { \
529     mb->hitend = TRUE; \
530     if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
531     }
532 
533 
534 /* These macros are used to implement backtracking. They simulate a recursive
535 call to the match() function by means of a local vector of frames which
536 remember the backtracking points. */
537 
538 #define RMATCH(ra,rb)\
539   {\
540   start_ecode = ra;\
541   Freturn_id = rb;\
542   goto MATCH_RECURSE;\
543   L_##rb:;\
544   }
545 
546 #define RRETURN(ra)\
547   {\
548   rrc = ra;\
549   goto RETURN_SWITCH;\
550   }
551 
552 
553 
554 /*************************************************
555 *         Match from current position            *
556 *************************************************/
557 
558 /* This function is called to run one match attempt at a single starting point
559 in the subject.
560 
561 Performance note: It might be tempting to extract commonly used fields from the
562 mb structure (e.g. end_subject) into individual variables to improve
563 performance. Tests using gcc on a SPARC disproved this; in the first case, it
564 made performance worse.
565 
566 Arguments:
567    start_eptr   starting character in subject
568    start_ecode  starting position in compiled code
569    ovector      pointer to the final output vector
570    oveccount    number of pairs in ovector
571    top_bracket  number of capturing parentheses in the pattern
572    frame_size   size of each backtracking frame
573    mb           pointer to "static" variables block
574 
575 Returns:        MATCH_MATCH if matched            )  these values are >= 0
576                 MATCH_NOMATCH if failed to match  )
577                 negative MATCH_xxx value for PRUNE, SKIP, etc
578                 negative PCRE2_ERROR_xxx value if aborted by an error condition
579                 (e.g. stopped by repeated call or depth limit)
580 */
581 
582 static int
match(PCRE2_SPTR start_eptr,PCRE2_SPTR start_ecode,PCRE2_SIZE * ovector,uint16_t oveccount,uint16_t top_bracket,PCRE2_SIZE frame_size,match_block * mb)583 match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, PCRE2_SIZE *ovector,
584   uint16_t oveccount, uint16_t top_bracket, PCRE2_SIZE frame_size,
585   match_block *mb)
586 {
587 /* Frame-handling variables */
588 
589 heapframe *F;           /* Current frame pointer */
590 heapframe *N = NULL;    /* Temporary frame pointers */
591 heapframe *P = NULL;
592 heapframe *assert_accept_frame = NULL;  /* For passing back a frame with captures */
593 PCRE2_SIZE frame_copy_size;     /* Amount to copy when creating a new frame */
594 
595 /* Local variables that do not need to be preserved over calls to RRMATCH(). */
596 
597 PCRE2_SPTR bracode;     /* Temp pointer to start of group */
598 PCRE2_SIZE offset;      /* Used for group offsets */
599 PCRE2_SIZE length;      /* Used for various length calculations */
600 
601 int rrc;                /* Return from functions & backtracking "recursions" */
602 #ifdef SUPPORT_UNICODE
603 int proptype;           /* Type of character property */
604 #endif
605 
606 uint32_t i;             /* Used for local loops */
607 uint32_t fc;            /* Character values */
608 uint32_t number;        /* Used for group and other numbers */
609 uint32_t reptype = 0;   /* Type of repetition (0 to avoid compiler warning) */
610 uint32_t group_frame_type;  /* Specifies type for new group frames */
611 
612 BOOL condition;         /* Used in conditional groups */
613 BOOL cur_is_word;       /* Used in "word" tests */
614 BOOL prev_is_word;      /* Used in "word" tests */
615 
616 /* UTF and UCP flags */
617 
618 #ifdef SUPPORT_UNICODE
619 BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
620 BOOL ucp = (mb->poptions & PCRE2_UCP) != 0;
621 #else
622 BOOL utf = FALSE;  /* Required for convenience even when no Unicode support */
623 #endif
624 
625 /* This is the length of the last part of a backtracking frame that must be
626 copied when a new frame is created. */
627 
628 frame_copy_size = frame_size - offsetof(heapframe, eptr);
629 
630 /* Set up the first current frame at the start of the vector, and initialize
631 fields that are not reset for new frames. */
632 
633 F = mb->match_frames;
634 Frdepth = 0;                        /* "Recursion" depth */
635 Fcapture_last = 0;                  /* Number of most recent capture */
636 Fcurrent_recurse = RECURSE_UNSET;   /* Not pattern recursing. */
637 Fstart_match = Feptr = start_eptr;  /* Current data pointer and start match */
638 Fmark = NULL;                       /* Most recent mark */
639 Foffset_top = 0;                    /* End of captures within the frame */
640 Flast_group_offset = PCRE2_UNSET;   /* Saved frame of most recent group */
641 group_frame_type = 0;               /* Not a start of group frame */
642 goto NEW_FRAME;                     /* Start processing with this frame */
643 
644 /* Come back here when we want to create a new frame for remembering a
645 backtracking point. */
646 
647 MATCH_RECURSE:
648 
649 /* Set up a new backtracking frame. If the vector is full, get a new one
650 on the heap, doubling the size, but constrained by the heap limit. */
651 
652 N = (heapframe *)((char *)F + frame_size);
653 if (N >= mb->match_frames_top)
654   {
655   PCRE2_SIZE newsize = mb->frame_vector_size * 2;
656   heapframe *new;
657 
658   if ((newsize / 1024) > mb->heap_limit)
659     {
660     PCRE2_SIZE maxsize = ((mb->heap_limit * 1024)/frame_size) * frame_size;
661     if (mb->frame_vector_size >= maxsize) return PCRE2_ERROR_HEAPLIMIT;
662     newsize = maxsize;
663     }
664 
665   new = mb->memctl.malloc(newsize, mb->memctl.memory_data);
666   if (new == NULL) return PCRE2_ERROR_NOMEMORY;
667   memcpy(new, mb->match_frames, mb->frame_vector_size);
668 
669   F = (heapframe *)((char *)new + ((char *)F - (char *)mb->match_frames));
670   N = (heapframe *)((char *)F + frame_size);
671 
672   if (mb->match_frames != mb->stack_frames)
673     mb->memctl.free(mb->match_frames, mb->memctl.memory_data);
674   mb->match_frames = new;
675   mb->match_frames_top = (heapframe *)((char *)mb->match_frames + newsize);
676   mb->frame_vector_size = newsize;
677   }
678 
679 #ifdef DEBUG_SHOW_RMATCH
680 fprintf(stderr, "++ RMATCH %2d frame=%d", Freturn_id, Frdepth + 1);
681 if (group_frame_type != 0)
682   {
683   fprintf(stderr, " type=%x ", group_frame_type);
684   switch (GF_IDMASK(group_frame_type))
685     {
686     case GF_CAPTURE:
687     fprintf(stderr, "capture=%d", GF_DATAMASK(group_frame_type));
688     break;
689 
690     case GF_NOCAPTURE:
691     fprintf(stderr, "nocapture op=%d", GF_DATAMASK(group_frame_type));
692     break;
693 
694     case GF_CONDASSERT:
695     fprintf(stderr, "condassert op=%d", GF_DATAMASK(group_frame_type));
696     break;
697 
698     case GF_RECURSE:
699     fprintf(stderr, "recurse=%d", GF_DATAMASK(group_frame_type));
700     break;
701 
702     default:
703     fprintf(stderr, "*** unknown ***");
704     break;
705     }
706   }
707 fprintf(stderr, "\n");
708 #endif
709 
710 /* Copy those fields that must be copied into the new frame, increase the
711 "recursion" depth (i.e. the new frame's index) and then make the new frame
712 current. */
713 
714 memcpy((char *)N + offsetof(heapframe, eptr),
715        (char *)F + offsetof(heapframe, eptr),
716        frame_copy_size);
717 
718 N->rdepth = Frdepth + 1;
719 F = N;
720 
721 /* Carry on processing with a new frame. */
722 
723 NEW_FRAME:
724 Fgroup_frame_type = group_frame_type;
725 Fecode = start_ecode;      /* Starting code pointer */
726 Fback_frame = frame_size;  /* Default is go back one frame */
727 
728 /* If this is a special type of group frame, remember its offset for quick
729 access at the end of the group. If this is a recursion, set a new current
730 recursion value. */
731 
732 if (group_frame_type != 0)
733   {
734   Flast_group_offset = (char *)F - (char *)mb->match_frames;
735   if (GF_IDMASK(group_frame_type) == GF_RECURSE)
736     Fcurrent_recurse = GF_DATAMASK(group_frame_type);
737   group_frame_type = 0;
738   }
739 
740 
741 /* ========================================================================= */
742 /* This is the main processing loop. First check that we haven't recorded too
743 many backtracks (search tree is too large), or that we haven't exceeded the
744 recursive depth limit (used too many backtracking frames). If not, process the
745 opcodes. */
746 
747 if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
748 if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
749 
750 for (;;)
751   {
752 #ifdef DEBUG_SHOW_OPS
753 fprintf(stderr, "++ op=%d\n", *Fecode);
754 #endif
755 
756   Fop = (uint8_t)(*Fecode);  /* Cast needed for 16-bit and 32-bit modes */
757   switch(Fop)
758     {
759     /* ===================================================================== */
760     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, to close
761     any currently open capturing brackets. Unlike reaching the end of a group,
762     where we know the starting frame is at the top of the chained frames, in
763     this case we have to search back for the relevant frame in case other types
764     of group that use chained frames have intervened. Multiple OP_CLOSEs always
765     come innermost first, which matches the chain order. We can ignore this in
766     a recursion, because captures are not passed out of recursions. */
767 
768     case OP_CLOSE:
769     if (Fcurrent_recurse == RECURSE_UNSET)
770       {
771       number = GET2(Fecode, 1);
772       offset = Flast_group_offset;
773       for(;;)
774         {
775         if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
776         N = (heapframe *)((char *)mb->match_frames + offset);
777         P = (heapframe *)((char *)N - frame_size);
778         if (N->group_frame_type == (GF_CAPTURE | number)) break;
779         offset = P->last_group_offset;
780         }
781       offset = (number << 1) - 2;
782       Fcapture_last = number;
783       Fovector[offset] = P->eptr - mb->start_subject;
784       Fovector[offset+1] = Feptr - mb->start_subject;
785       if (offset >= Foffset_top) Foffset_top = offset + 2;
786       }
787     Fecode += PRIV(OP_lengths)[*Fecode];
788     break;
789 
790 
791     /* ===================================================================== */
792     /* Real or forced end of the pattern, assertion, or recursion. In an
793     assertion ACCEPT, update the last used pointer and remember the current
794     frame so that the captures and mark can be fished out of it. */
795 
796     case OP_ASSERT_ACCEPT:
797     if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
798     assert_accept_frame = F;
799     RRETURN(MATCH_ACCEPT);
800 
801     /* If recursing, we have to find the most recent recursion. */
802 
803     case OP_ACCEPT:
804     case OP_END:
805 
806     /* Handle end of a recursion. */
807 
808     if (Fcurrent_recurse != RECURSE_UNSET)
809       {
810       offset = Flast_group_offset;
811       for(;;)
812         {
813         if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
814         N = (heapframe *)((char *)mb->match_frames + offset);
815         P = (heapframe *)((char *)N - frame_size);
816         if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break;
817         offset = P->last_group_offset;
818         }
819 
820       /* N is now the frame of the recursion; the previous frame is at the
821       OP_RECURSE position. Go back there, copying the current subject position
822       and mark, and the start_match position (\K might have changed it), and
823       then move on past the OP_RECURSE. */
824 
825       P->eptr = Feptr;
826       P->mark = Fmark;
827       P->start_match = Fstart_match;
828       F = P;
829       Fecode += 1 + LINK_SIZE;
830       continue;
831       }
832 
833     /* Not a recursion. Fail for an empty string match if either PCRE2_NOTEMPTY
834     is set, or if PCRE2_NOTEMPTY_ATSTART is set and we have matched at the
835     start of the subject. In both cases, backtracking will then try other
836     alternatives, if any. */
837 
838     if (Feptr == Fstart_match &&
839          ((mb->moptions & PCRE2_NOTEMPTY) != 0 ||
840            ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 &&
841              Fstart_match == mb->start_subject + mb->start_offset)))
842       RRETURN(MATCH_NOMATCH);
843 
844     /* Also fail if PCRE2_ENDANCHORED is set and the end of the match is not
845     the end of the subject. After (*ACCEPT) we fail the entire match (at this
846     position) but backtrack on reaching the end of the pattern. */
847 
848     if (Feptr < mb->end_subject &&
849         ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0)
850       {
851       if (Fop == OP_END) RRETURN(MATCH_NOMATCH);
852       return MATCH_NOMATCH;
853       }
854 
855     /* We have a successful match of the whole pattern. Record the result and
856     then do a direct return from the function. If there is space in the offset
857     vector, set any pairs that follow the highest-numbered captured string but
858     are less than the number of capturing groups in the pattern to PCRE2_UNSET.
859     It is documented that this happens. "Gaps" are set to PCRE2_UNSET
860     dynamically. It is only those at the end that need setting here. */
861 
862     mb->end_match_ptr = Feptr;           /* Record where we ended */
863     mb->end_offset_top = Foffset_top;    /* and how many extracts were taken */
864     mb->mark = Fmark;                    /* and the last success mark */
865     if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
866 
867     ovector[0] = Fstart_match - mb->start_subject;
868     ovector[1] = Feptr - mb->start_subject;
869 
870     /* Set i to the smaller of the sizes of the external and frame ovectors. */
871 
872     i = 2 * ((top_bracket + 1 > oveccount)? oveccount : top_bracket + 1);
873     memcpy(ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
874     while (--i >= Foffset_top + 2) ovector[i] = PCRE2_UNSET;
875     return MATCH_MATCH;  /* Note: NOT RRETURN */
876 
877 
878     /*===================================================================== */
879     /* Match any single character type except newline; have to take care with
880     CRLF newlines and partial matching. */
881 
882     case OP_ANY:
883     if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
884     if (mb->partial != 0 &&
885         Feptr == mb->end_subject - 1 &&
886         NLBLOCK->nltype == NLTYPE_FIXED &&
887         NLBLOCK->nllen == 2 &&
888         UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
889       {
890       mb->hitend = TRUE;
891       if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
892       }
893     /* Fall through */
894 
895     /* Match any single character whatsoever. */
896 
897     case OP_ALLANY:
898     if (Feptr >= mb->end_subject)  /* DO NOT merge the Feptr++ here; it must */
899       {                            /* not be updated before SCHECK_PARTIAL. */
900       SCHECK_PARTIAL();
901       RRETURN(MATCH_NOMATCH);
902       }
903     Feptr++;
904 #ifdef SUPPORT_UNICODE
905     if (utf) ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
906 #endif
907     Fecode++;
908     break;
909 
910 
911     /* ===================================================================== */
912     /* Match a single code unit, even in UTF mode. This opcode really does
913     match any code unit, even newline. (It really should be called ANYCODEUNIT,
914     of course - the byte name is from pre-16 bit days.) */
915 
916     case OP_ANYBYTE:
917     if (Feptr >= mb->end_subject)   /* DO NOT merge the Feptr++ here; it must */
918       {                             /* not be updated before SCHECK_PARTIAL. */
919       SCHECK_PARTIAL();
920       RRETURN(MATCH_NOMATCH);
921       }
922     Feptr++;
923     Fecode++;
924     break;
925 
926 
927     /* ===================================================================== */
928     /* Match a single character, casefully */
929 
930     case OP_CHAR:
931 #ifdef SUPPORT_UNICODE
932     if (utf)
933       {
934       Flength = 1;
935       Fecode++;
936       GETCHARLEN(fc, Fecode, Flength);
937       if (Flength > (PCRE2_SIZE)(mb->end_subject - Feptr))
938         {
939         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
940         RRETURN(MATCH_NOMATCH);
941         }
942       for (; Flength > 0; Flength--)
943         {
944         if (*Fecode++ != UCHAR21INC(Feptr)) RRETURN(MATCH_NOMATCH);
945         }
946       }
947     else
948 #endif
949 
950     /* Not UTF mode */
951       {
952       if (mb->end_subject - Feptr < 1)
953         {
954         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
955         RRETURN(MATCH_NOMATCH);
956         }
957       if (Fecode[1] != *Feptr++) RRETURN(MATCH_NOMATCH);
958       Fecode += 2;
959       }
960     break;
961 
962 
963     /* ===================================================================== */
964     /* Match a single character, caselessly. If we are at the end of the
965     subject, give up immediately. We get here only when the pattern character
966     has at most one other case. Characters with more than two cases are coded
967     as OP_PROP with the pseudo-property PT_CLIST. */
968 
969     case OP_CHARI:
970     if (Feptr >= mb->end_subject)
971       {
972       SCHECK_PARTIAL();
973       RRETURN(MATCH_NOMATCH);
974       }
975 
976 #ifdef SUPPORT_UNICODE
977     if (utf)
978       {
979       Flength = 1;
980       Fecode++;
981       GETCHARLEN(fc, Fecode, Flength);
982 
983       /* If the pattern character's value is < 128, we know that its other case
984       (if any) is also < 128 (and therefore only one code unit long in all
985       code-unit widths), so we can use the fast lookup table. We checked above
986       that there is at least one character left in the subject. */
987 
988       if (fc < 128)
989         {
990         uint32_t cc = UCHAR21(Feptr);
991         if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
992         Fecode++;
993         Feptr++;
994         }
995 
996       /* Otherwise we must pick up the subject character and use Unicode
997       property support to test its other case. Note that we cannot use the
998       value of "Flength" to check for sufficient bytes left, because the other
999       case of the character may have more or fewer code units. */
1000 
1001       else
1002         {
1003         uint32_t dc;
1004         GETCHARINC(dc, Feptr);
1005         Fecode += Flength;
1006         if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1007         }
1008       }
1009 
1010     /* If UCP is set without UTF we must do the same as above, but with one
1011     character per code unit. */
1012 
1013     else if (ucp)
1014       {
1015       uint32_t cc = UCHAR21(Feptr);
1016       fc = Fecode[1];
1017       if (fc < 128)
1018         {
1019         if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
1020         }
1021       else
1022         {
1023         if (cc != fc && cc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1024         }
1025       Feptr++;
1026       Fecode += 2;
1027       }
1028 
1029     else
1030 #endif   /* SUPPORT_UNICODE */
1031 
1032     /* Not UTF or UCP mode; use the table for characters < 256. */
1033       {
1034       if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
1035           != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);
1036       Feptr++;
1037       Fecode += 2;
1038       }
1039     break;
1040 
1041 
1042     /* ===================================================================== */
1043     /* Match not a single character. */
1044 
1045     case OP_NOT:
1046     case OP_NOTI:
1047     if (Feptr >= mb->end_subject)
1048       {
1049       SCHECK_PARTIAL();
1050       RRETURN(MATCH_NOMATCH);
1051       }
1052 
1053 #ifdef SUPPORT_UNICODE
1054     if (utf)
1055       {
1056       uint32_t ch;
1057       Fecode++;
1058       GETCHARINC(ch, Fecode);
1059       GETCHARINC(fc, Feptr);
1060       if (ch == fc)
1061         {
1062         RRETURN(MATCH_NOMATCH);  /* Caseful match */
1063         }
1064       else if (Fop == OP_NOTI)   /* If caseless */
1065         {
1066         if (ch > 127)
1067           ch = UCD_OTHERCASE(ch);
1068         else
1069           ch = (mb->fcc)[ch];
1070         if (ch == fc) RRETURN(MATCH_NOMATCH);
1071         }
1072       }
1073 
1074     /* UCP without UTF is as above, but with one character per code unit. */
1075 
1076     else if (ucp)
1077       {
1078       uint32_t ch;
1079       fc = UCHAR21INC(Feptr);
1080       ch = Fecode[1];
1081       Fecode += 2;
1082 
1083       if (ch == fc)
1084         {
1085         RRETURN(MATCH_NOMATCH);  /* Caseful match */
1086         }
1087       else if (Fop == OP_NOTI)   /* If caseless */
1088         {
1089         if (ch > 127)
1090           ch = UCD_OTHERCASE(ch);
1091         else
1092           ch = (mb->fcc)[ch];
1093         if (ch == fc) RRETURN(MATCH_NOMATCH);
1094         }
1095       }
1096 
1097     else
1098 #endif  /* SUPPORT_UNICODE */
1099 
1100     /* Neither UTF nor UCP is set */
1101 
1102       {
1103       uint32_t ch = Fecode[1];
1104       fc = UCHAR21INC(Feptr);
1105       if (ch == fc || (Fop == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == fc))
1106         RRETURN(MATCH_NOMATCH);
1107       Fecode += 2;
1108       }
1109     break;
1110 
1111 
1112     /* ===================================================================== */
1113     /* Match a single character repeatedly. */
1114 
1115 #define Loclength    F->temp_size
1116 #define Lstart_eptr  F->temp_sptr[0]
1117 #define Lcharptr     F->temp_sptr[1]
1118 #define Lmin         F->temp_32[0]
1119 #define Lmax         F->temp_32[1]
1120 #define Lc           F->temp_32[2]
1121 #define Loc          F->temp_32[3]
1122 
1123     case OP_EXACT:
1124     case OP_EXACTI:
1125     Lmin = Lmax = GET2(Fecode, 1);
1126     Fecode += 1 + IMM2_SIZE;
1127     goto REPEATCHAR;
1128 
1129     case OP_POSUPTO:
1130     case OP_POSUPTOI:
1131     reptype = REPTYPE_POS;
1132     Lmin = 0;
1133     Lmax = GET2(Fecode, 1);
1134     Fecode += 1 + IMM2_SIZE;
1135     goto REPEATCHAR;
1136 
1137     case OP_UPTO:
1138     case OP_UPTOI:
1139     reptype = REPTYPE_MAX;
1140     Lmin = 0;
1141     Lmax = GET2(Fecode, 1);
1142     Fecode += 1 + IMM2_SIZE;
1143     goto REPEATCHAR;
1144 
1145     case OP_MINUPTO:
1146     case OP_MINUPTOI:
1147     reptype = REPTYPE_MIN;
1148     Lmin = 0;
1149     Lmax = GET2(Fecode, 1);
1150     Fecode += 1 + IMM2_SIZE;
1151     goto REPEATCHAR;
1152 
1153     case OP_POSSTAR:
1154     case OP_POSSTARI:
1155     reptype = REPTYPE_POS;
1156     Lmin = 0;
1157     Lmax = UINT32_MAX;
1158     Fecode++;
1159     goto REPEATCHAR;
1160 
1161     case OP_POSPLUS:
1162     case OP_POSPLUSI:
1163     reptype = REPTYPE_POS;
1164     Lmin = 1;
1165     Lmax = UINT32_MAX;
1166     Fecode++;
1167     goto REPEATCHAR;
1168 
1169     case OP_POSQUERY:
1170     case OP_POSQUERYI:
1171     reptype = REPTYPE_POS;
1172     Lmin = 0;
1173     Lmax = 1;
1174     Fecode++;
1175     goto REPEATCHAR;
1176 
1177     case OP_STAR:
1178     case OP_STARI:
1179     case OP_MINSTAR:
1180     case OP_MINSTARI:
1181     case OP_PLUS:
1182     case OP_PLUSI:
1183     case OP_MINPLUS:
1184     case OP_MINPLUSI:
1185     case OP_QUERY:
1186     case OP_QUERYI:
1187     case OP_MINQUERY:
1188     case OP_MINQUERYI:
1189     fc = *Fecode++ - ((Fop < OP_STARI)? OP_STAR : OP_STARI);
1190     Lmin = rep_min[fc];
1191     Lmax = rep_max[fc];
1192     reptype = rep_typ[fc];
1193 
1194     /* Common code for all repeated single-character matches. We first check
1195     for the minimum number of characters. If the minimum equals the maximum, we
1196     are done. Otherwise, if minimizing, check the rest of the pattern for a
1197     match; if there isn't one, advance up to the maximum, one character at a
1198     time.
1199 
1200     If maximizing, advance up to the maximum number of matching characters,
1201     until Feptr is past the end of the maximum run. If possessive, we are
1202     then done (no backing up). Otherwise, match at this position; anything
1203     other than no match is immediately returned. For nomatch, back up one
1204     character, unless we are matching \R and the last thing matched was
1205     \r\n, in which case, back up two code units until we reach the first
1206     optional character position.
1207 
1208     The various UTF/non-UTF and caseful/caseless cases are handled separately,
1209     for speed. */
1210 
1211     REPEATCHAR:
1212 #ifdef SUPPORT_UNICODE
1213     if (utf)
1214       {
1215       Flength = 1;
1216       Lcharptr = Fecode;
1217       GETCHARLEN(fc, Fecode, Flength);
1218       Fecode += Flength;
1219 
1220       /* Handle multi-code-unit character matching, caseful and caseless. */
1221 
1222       if (Flength > 1)
1223         {
1224         uint32_t othercase;
1225 
1226         if (Fop >= OP_STARI &&     /* Caseless */
1227             (othercase = UCD_OTHERCASE(fc)) != fc)
1228           Loclength = PRIV(ord2utf)(othercase, Foccu);
1229         else Loclength = 0;
1230 
1231         for (i = 1; i <= Lmin; i++)
1232           {
1233           if (Feptr <= mb->end_subject - Flength &&
1234             memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1235           else if (Loclength > 0 &&
1236                    Feptr <= mb->end_subject - Loclength &&
1237                    memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1238             Feptr += Loclength;
1239           else
1240             {
1241             CHECK_PARTIAL();
1242             RRETURN(MATCH_NOMATCH);
1243             }
1244           }
1245 
1246         if (Lmin == Lmax) continue;
1247 
1248         if (reptype == REPTYPE_MIN)
1249           {
1250           for (;;)
1251             {
1252             RMATCH(Fecode, RM202);
1253             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1254             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1255             if (Feptr <= mb->end_subject - Flength &&
1256               memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1257             else if (Loclength > 0 &&
1258                      Feptr <= mb->end_subject - Loclength &&
1259                      memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1260               Feptr += Loclength;
1261             else
1262               {
1263               CHECK_PARTIAL();
1264               RRETURN(MATCH_NOMATCH);
1265               }
1266             }
1267           /* Control never gets here */
1268           }
1269 
1270         else  /* Maximize */
1271           {
1272           Lstart_eptr = Feptr;
1273           for (i = Lmin; i < Lmax; i++)
1274             {
1275             if (Feptr <= mb->end_subject - Flength &&
1276                 memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0)
1277               Feptr += Flength;
1278             else if (Loclength > 0 &&
1279                      Feptr <= mb->end_subject - Loclength &&
1280                      memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1281               Feptr += Loclength;
1282             else
1283               {
1284               CHECK_PARTIAL();
1285               break;
1286               }
1287             }
1288 
1289           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1290           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1291           go too far. */
1292 
1293           if (reptype != REPTYPE_POS) for(;;)
1294             {
1295             if (Feptr <= Lstart_eptr) break;
1296             RMATCH(Fecode, RM203);
1297             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1298             Feptr--;
1299             BACKCHAR(Feptr);
1300             }
1301           }
1302         break;   /* End of repeated wide character handling */
1303         }
1304 
1305       /* Length of UTF character is 1. Put it into the preserved variable and
1306       fall through to the non-UTF code. */
1307 
1308       Lc = fc;
1309       }
1310     else
1311 #endif  /* SUPPORT_UNICODE */
1312 
1313     /* When not in UTF mode, load a single-code-unit character. Then proceed as
1314     above, using Unicode casing if either UTF or UCP is set. */
1315 
1316     Lc = *Fecode++;
1317 
1318     /* Caseless comparison */
1319 
1320     if (Fop >= OP_STARI)
1321       {
1322 #if PCRE2_CODE_UNIT_WIDTH == 8
1323 #ifdef SUPPORT_UNICODE
1324       if (ucp && !utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1325       else
1326 #endif  /* SUPPORT_UNICODE */
1327       /* Lc will be < 128 in UTF-8 mode. */
1328       Loc = mb->fcc[Lc];
1329 #else /* 16-bit & 32-bit */
1330 #ifdef SUPPORT_UNICODE
1331       if ((utf || ucp) && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1332       else
1333 #endif  /* SUPPORT_UNICODE */
1334       Loc = TABLE_GET(Lc, mb->fcc, Lc);
1335 #endif  /* PCRE2_CODE_UNIT_WIDTH == 8 */
1336 
1337       for (i = 1; i <= Lmin; i++)
1338         {
1339         uint32_t cc;                 /* Faster than PCRE2_UCHAR */
1340         if (Feptr >= mb->end_subject)
1341           {
1342           SCHECK_PARTIAL();
1343           RRETURN(MATCH_NOMATCH);
1344           }
1345         cc = UCHAR21TEST(Feptr);
1346         if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1347         Feptr++;
1348         }
1349       if (Lmin == Lmax) continue;
1350 
1351       if (reptype == REPTYPE_MIN)
1352         {
1353         for (;;)
1354           {
1355           uint32_t cc;               /* Faster than PCRE2_UCHAR */
1356           RMATCH(Fecode, RM25);
1357           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1358           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1359           if (Feptr >= mb->end_subject)
1360             {
1361             SCHECK_PARTIAL();
1362             RRETURN(MATCH_NOMATCH);
1363             }
1364           cc = UCHAR21TEST(Feptr);
1365           if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1366           Feptr++;
1367           }
1368         /* Control never gets here */
1369         }
1370 
1371       else  /* Maximize */
1372         {
1373         Lstart_eptr = Feptr;
1374         for (i = Lmin; i < Lmax; i++)
1375           {
1376           uint32_t cc;               /* Faster than PCRE2_UCHAR */
1377           if (Feptr >= mb->end_subject)
1378             {
1379             SCHECK_PARTIAL();
1380             break;
1381             }
1382           cc = UCHAR21TEST(Feptr);
1383           if (Lc != cc && Loc != cc) break;
1384           Feptr++;
1385           }
1386         if (reptype != REPTYPE_POS) for (;;)
1387           {
1388           if (Feptr == Lstart_eptr) break;
1389           RMATCH(Fecode, RM26);
1390           Feptr--;
1391           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1392           }
1393         }
1394       }
1395 
1396     /* Caseful comparisons (includes all multi-byte characters) */
1397 
1398     else
1399       {
1400       for (i = 1; i <= Lmin; i++)
1401         {
1402         if (Feptr >= mb->end_subject)
1403           {
1404           SCHECK_PARTIAL();
1405           RRETURN(MATCH_NOMATCH);
1406           }
1407         if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1408         }
1409 
1410       if (Lmin == Lmax) continue;
1411 
1412       if (reptype == REPTYPE_MIN)
1413         {
1414         for (;;)
1415           {
1416           RMATCH(Fecode, RM27);
1417           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1418           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1419           if (Feptr >= mb->end_subject)
1420             {
1421             SCHECK_PARTIAL();
1422             RRETURN(MATCH_NOMATCH);
1423             }
1424           if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1425           }
1426         /* Control never gets here */
1427         }
1428       else  /* Maximize */
1429         {
1430         Lstart_eptr = Feptr;
1431         for (i = Lmin; i < Lmax; i++)
1432           {
1433           if (Feptr >= mb->end_subject)
1434             {
1435             SCHECK_PARTIAL();
1436             break;
1437             }
1438 
1439           if (Lc != UCHAR21TEST(Feptr)) break;
1440           Feptr++;
1441           }
1442 
1443         if (reptype != REPTYPE_POS) for (;;)
1444           {
1445           if (Feptr <= Lstart_eptr) break;
1446           RMATCH(Fecode, RM28);
1447           Feptr--;
1448           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1449           }
1450         }
1451       }
1452     break;
1453 
1454 #undef Loclength
1455 #undef Lstart_eptr
1456 #undef Lcharptr
1457 #undef Lmin
1458 #undef Lmax
1459 #undef Lc
1460 #undef Loc
1461 
1462 
1463     /* ===================================================================== */
1464     /* Match a negated single one-byte character repeatedly. This is almost a
1465     repeat of the code for a repeated single character, but I haven't found a
1466     nice way of commoning these up that doesn't require a test of the
1467     positive/negative option for each character match. Maybe that wouldn't add
1468     very much to the time taken, but character matching *is* what this is all
1469     about... */
1470 
1471 #define Lstart_eptr  F->temp_sptr[0]
1472 #define Lmin         F->temp_32[0]
1473 #define Lmax         F->temp_32[1]
1474 #define Lc           F->temp_32[2]
1475 #define Loc          F->temp_32[3]
1476 
1477     case OP_NOTEXACT:
1478     case OP_NOTEXACTI:
1479     Lmin = Lmax = GET2(Fecode, 1);
1480     Fecode += 1 + IMM2_SIZE;
1481     goto REPEATNOTCHAR;
1482 
1483     case OP_NOTUPTO:
1484     case OP_NOTUPTOI:
1485     Lmin = 0;
1486     Lmax = GET2(Fecode, 1);
1487     reptype = REPTYPE_MAX;
1488     Fecode += 1 + IMM2_SIZE;
1489     goto REPEATNOTCHAR;
1490 
1491     case OP_NOTMINUPTO:
1492     case OP_NOTMINUPTOI:
1493     Lmin = 0;
1494     Lmax = GET2(Fecode, 1);
1495     reptype = REPTYPE_MIN;
1496     Fecode += 1 + IMM2_SIZE;
1497     goto REPEATNOTCHAR;
1498 
1499     case OP_NOTPOSSTAR:
1500     case OP_NOTPOSSTARI:
1501     reptype = REPTYPE_POS;
1502     Lmin = 0;
1503     Lmax = UINT32_MAX;
1504     Fecode++;
1505     goto REPEATNOTCHAR;
1506 
1507     case OP_NOTPOSPLUS:
1508     case OP_NOTPOSPLUSI:
1509     reptype = REPTYPE_POS;
1510     Lmin = 1;
1511     Lmax = UINT32_MAX;
1512     Fecode++;
1513     goto REPEATNOTCHAR;
1514 
1515     case OP_NOTPOSQUERY:
1516     case OP_NOTPOSQUERYI:
1517     reptype = REPTYPE_POS;
1518     Lmin = 0;
1519     Lmax = 1;
1520     Fecode++;
1521     goto REPEATNOTCHAR;
1522 
1523     case OP_NOTPOSUPTO:
1524     case OP_NOTPOSUPTOI:
1525     reptype = REPTYPE_POS;
1526     Lmin = 0;
1527     Lmax = GET2(Fecode, 1);
1528     Fecode += 1 + IMM2_SIZE;
1529     goto REPEATNOTCHAR;
1530 
1531     case OP_NOTSTAR:
1532     case OP_NOTSTARI:
1533     case OP_NOTMINSTAR:
1534     case OP_NOTMINSTARI:
1535     case OP_NOTPLUS:
1536     case OP_NOTPLUSI:
1537     case OP_NOTMINPLUS:
1538     case OP_NOTMINPLUSI:
1539     case OP_NOTQUERY:
1540     case OP_NOTQUERYI:
1541     case OP_NOTMINQUERY:
1542     case OP_NOTMINQUERYI:
1543     fc = *Fecode++ - ((Fop >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
1544     Lmin = rep_min[fc];
1545     Lmax = rep_max[fc];
1546     reptype = rep_typ[fc];
1547 
1548     /* Common code for all repeated single-character non-matches. */
1549 
1550     REPEATNOTCHAR:
1551     GETCHARINCTEST(Lc, Fecode);
1552 
1553     /* The code is duplicated for the caseless and caseful cases, for speed,
1554     since matching characters is likely to be quite common. First, ensure the
1555     minimum number of matches are present. If Lmin = Lmax, we are done.
1556     Otherwise, if minimizing, keep trying the rest of the expression and
1557     advancing one matching character if failing, up to the maximum.
1558     Alternatively, if maximizing, find the maximum number of characters and
1559     work backwards. */
1560 
1561     if (Fop >= OP_NOTSTARI)     /* Caseless */
1562       {
1563 #ifdef SUPPORT_UNICODE
1564       if ((utf || ucp) && Lc > 127)
1565         Loc = UCD_OTHERCASE(Lc);
1566       else
1567 #endif /* SUPPORT_UNICODE */
1568 
1569       Loc = TABLE_GET(Lc, mb->fcc, Lc);  /* Other case from table */
1570 
1571 #ifdef SUPPORT_UNICODE
1572       if (utf)
1573         {
1574         uint32_t d;
1575         for (i = 1; i <= Lmin; i++)
1576           {
1577           if (Feptr >= mb->end_subject)
1578             {
1579             SCHECK_PARTIAL();
1580             RRETURN(MATCH_NOMATCH);
1581             }
1582           GETCHARINC(d, Feptr);
1583           if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1584           }
1585         }
1586       else
1587 #endif  /* SUPPORT_UNICODE */
1588 
1589       /* Not UTF mode */
1590         {
1591         for (i = 1; i <= Lmin; i++)
1592           {
1593           if (Feptr >= mb->end_subject)
1594             {
1595             SCHECK_PARTIAL();
1596             RRETURN(MATCH_NOMATCH);
1597             }
1598           if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1599           Feptr++;
1600           }
1601         }
1602 
1603       if (Lmin == Lmax) continue;  /* Finished for exact count */
1604 
1605       if (reptype == REPTYPE_MIN)
1606         {
1607 #ifdef SUPPORT_UNICODE
1608         if (utf)
1609           {
1610           uint32_t d;
1611           for (;;)
1612             {
1613             RMATCH(Fecode, RM204);
1614             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1615             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1616             if (Feptr >= mb->end_subject)
1617               {
1618               SCHECK_PARTIAL();
1619               RRETURN(MATCH_NOMATCH);
1620               }
1621             GETCHARINC(d, Feptr);
1622             if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1623             }
1624           }
1625         else
1626 #endif  /*SUPPORT_UNICODE */
1627 
1628         /* Not UTF mode */
1629           {
1630           for (;;)
1631             {
1632             RMATCH(Fecode, RM29);
1633             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1634             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1635             if (Feptr >= mb->end_subject)
1636               {
1637               SCHECK_PARTIAL();
1638               RRETURN(MATCH_NOMATCH);
1639               }
1640             if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1641             Feptr++;
1642             }
1643           }
1644         /* Control never gets here */
1645         }
1646 
1647       /* Maximize case */
1648 
1649       else
1650         {
1651         Lstart_eptr = Feptr;
1652 
1653 #ifdef SUPPORT_UNICODE
1654         if (utf)
1655           {
1656           uint32_t d;
1657           for (i = Lmin; i < Lmax; i++)
1658             {
1659             int len = 1;
1660             if (Feptr >= mb->end_subject)
1661               {
1662               SCHECK_PARTIAL();
1663               break;
1664               }
1665             GETCHARLEN(d, Feptr, len);
1666             if (Lc == d || Loc == d) break;
1667             Feptr += len;
1668             }
1669 
1670           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1671           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1672           go too far. */
1673 
1674           if (reptype != REPTYPE_POS) for(;;)
1675             {
1676             if (Feptr <= Lstart_eptr) break;
1677             RMATCH(Fecode, RM205);
1678             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1679             Feptr--;
1680             BACKCHAR(Feptr);
1681             }
1682           }
1683         else
1684 #endif  /* SUPPORT_UNICODE */
1685 
1686         /* Not UTF mode */
1687           {
1688           for (i = Lmin; i < Lmax; i++)
1689             {
1690             if (Feptr >= mb->end_subject)
1691               {
1692               SCHECK_PARTIAL();
1693               break;
1694               }
1695             if (Lc == *Feptr || Loc == *Feptr) break;
1696             Feptr++;
1697             }
1698           if (reptype != REPTYPE_POS) for (;;)
1699             {
1700             if (Feptr == Lstart_eptr) break;
1701             RMATCH(Fecode, RM30);
1702             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1703             Feptr--;
1704             }
1705           }
1706         }
1707       }
1708 
1709     /* Caseful comparisons */
1710 
1711     else
1712       {
1713 #ifdef SUPPORT_UNICODE
1714       if (utf)
1715         {
1716         uint32_t d;
1717         for (i = 1; i <= Lmin; i++)
1718           {
1719           if (Feptr >= mb->end_subject)
1720             {
1721             SCHECK_PARTIAL();
1722             RRETURN(MATCH_NOMATCH);
1723             }
1724           GETCHARINC(d, Feptr);
1725           if (Lc == d) RRETURN(MATCH_NOMATCH);
1726           }
1727         }
1728       else
1729 #endif
1730       /* Not UTF mode */
1731         {
1732         for (i = 1; i <= Lmin; i++)
1733           {
1734           if (Feptr >= mb->end_subject)
1735             {
1736             SCHECK_PARTIAL();
1737             RRETURN(MATCH_NOMATCH);
1738             }
1739           if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1740           }
1741         }
1742 
1743       if (Lmin == Lmax) continue;
1744 
1745       if (reptype == REPTYPE_MIN)
1746         {
1747 #ifdef SUPPORT_UNICODE
1748         if (utf)
1749           {
1750           uint32_t d;
1751           for (;;)
1752             {
1753             RMATCH(Fecode, RM206);
1754             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1755             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1756             if (Feptr >= mb->end_subject)
1757               {
1758               SCHECK_PARTIAL();
1759               RRETURN(MATCH_NOMATCH);
1760               }
1761             GETCHARINC(d, Feptr);
1762             if (Lc == d) RRETURN(MATCH_NOMATCH);
1763             }
1764           }
1765         else
1766 #endif
1767         /* Not UTF mode */
1768           {
1769           for (;;)
1770             {
1771             RMATCH(Fecode, RM31);
1772             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1773             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1774             if (Feptr >= mb->end_subject)
1775               {
1776               SCHECK_PARTIAL();
1777               RRETURN(MATCH_NOMATCH);
1778               }
1779             if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1780             }
1781           }
1782         /* Control never gets here */
1783         }
1784 
1785       /* Maximize case */
1786 
1787       else
1788         {
1789         Lstart_eptr = Feptr;
1790 
1791 #ifdef SUPPORT_UNICODE
1792         if (utf)
1793           {
1794           uint32_t d;
1795           for (i = Lmin; i < Lmax; i++)
1796             {
1797             int len = 1;
1798             if (Feptr >= mb->end_subject)
1799               {
1800               SCHECK_PARTIAL();
1801               break;
1802               }
1803             GETCHARLEN(d, Feptr, len);
1804             if (Lc == d) break;
1805             Feptr += len;
1806             }
1807 
1808           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1809           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1810           go too far. */
1811 
1812           if (reptype != REPTYPE_POS) for(;;)
1813             {
1814             if (Feptr <= Lstart_eptr) break;
1815             RMATCH(Fecode, RM207);
1816             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1817             Feptr--;
1818             BACKCHAR(Feptr);
1819             }
1820           }
1821         else
1822 #endif
1823         /* Not UTF mode */
1824           {
1825           for (i = Lmin; i < Lmax; i++)
1826             {
1827             if (Feptr >= mb->end_subject)
1828               {
1829               SCHECK_PARTIAL();
1830               break;
1831               }
1832             if (Lc == *Feptr) break;
1833             Feptr++;
1834             }
1835           if (reptype != REPTYPE_POS) for (;;)
1836             {
1837             if (Feptr == Lstart_eptr) break;
1838             RMATCH(Fecode, RM32);
1839             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1840             Feptr--;
1841             }
1842           }
1843         }
1844       }
1845     break;
1846 
1847 #undef Lstart_eptr
1848 #undef Lmin
1849 #undef Lmax
1850 #undef Lc
1851 #undef Loc
1852 
1853 
1854     /* ===================================================================== */
1855     /* Match a bit-mapped character class, possibly repeatedly. These opcodes
1856     are used when all the characters in the class have values in the range
1857     0-255, and either the matching is caseful, or the characters are in the
1858     range 0-127 when UTF processing is enabled. The only difference between
1859     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1860     encountered. */
1861 
1862 #define Lmin               F->temp_32[0]
1863 #define Lmax               F->temp_32[1]
1864 #define Lstart_eptr        F->temp_sptr[0]
1865 #define Lbyte_map_address  F->temp_sptr[1]
1866 #define Lbyte_map          ((unsigned char *)Lbyte_map_address)
1867 
1868     case OP_NCLASS:
1869     case OP_CLASS:
1870       {
1871       Lbyte_map_address = Fecode + 1;           /* Save for matching */
1872       Fecode += 1 + (32 / sizeof(PCRE2_UCHAR)); /* Advance past the item */
1873 
1874       /* Look past the end of the item to see if there is repeat information
1875       following. Then obey similar code to character type repeats. */
1876 
1877       switch (*Fecode)
1878         {
1879         case OP_CRSTAR:
1880         case OP_CRMINSTAR:
1881         case OP_CRPLUS:
1882         case OP_CRMINPLUS:
1883         case OP_CRQUERY:
1884         case OP_CRMINQUERY:
1885         case OP_CRPOSSTAR:
1886         case OP_CRPOSPLUS:
1887         case OP_CRPOSQUERY:
1888         fc = *Fecode++ - OP_CRSTAR;
1889         Lmin = rep_min[fc];
1890         Lmax = rep_max[fc];
1891         reptype = rep_typ[fc];
1892         break;
1893 
1894         case OP_CRRANGE:
1895         case OP_CRMINRANGE:
1896         case OP_CRPOSRANGE:
1897         Lmin = GET2(Fecode, 1);
1898         Lmax = GET2(Fecode, 1 + IMM2_SIZE);
1899         if (Lmax == 0) Lmax = UINT32_MAX;       /* Max 0 => infinity */
1900         reptype = rep_typ[*Fecode - OP_CRSTAR];
1901         Fecode += 1 + 2 * IMM2_SIZE;
1902         break;
1903 
1904         default:               /* No repeat follows */
1905         Lmin = Lmax = 1;
1906         break;
1907         }
1908 
1909       /* First, ensure the minimum number of matches are present. */
1910 
1911 #ifdef SUPPORT_UNICODE
1912       if (utf)
1913         {
1914         for (i = 1; i <= Lmin; i++)
1915           {
1916           if (Feptr >= mb->end_subject)
1917             {
1918             SCHECK_PARTIAL();
1919             RRETURN(MATCH_NOMATCH);
1920             }
1921           GETCHARINC(fc, Feptr);
1922           if (fc > 255)
1923             {
1924             if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1925             }
1926           else
1927             if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1928           }
1929         }
1930       else
1931 #endif
1932       /* Not UTF mode */
1933         {
1934         for (i = 1; i <= Lmin; i++)
1935           {
1936           if (Feptr >= mb->end_subject)
1937             {
1938             SCHECK_PARTIAL();
1939             RRETURN(MATCH_NOMATCH);
1940             }
1941           fc = *Feptr++;
1942 #if PCRE2_CODE_UNIT_WIDTH != 8
1943           if (fc > 255)
1944             {
1945             if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1946             }
1947           else
1948 #endif
1949           if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1950           }
1951         }
1952 
1953       /* If Lmax == Lmin we are done. Continue with main loop. */
1954 
1955       if (Lmin == Lmax) continue;
1956 
1957       /* If minimizing, keep testing the rest of the expression and advancing
1958       the pointer while it matches the class. */
1959 
1960       if (reptype == REPTYPE_MIN)
1961         {
1962 #ifdef SUPPORT_UNICODE
1963         if (utf)
1964           {
1965           for (;;)
1966             {
1967             RMATCH(Fecode, RM200);
1968             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1969             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1970             if (Feptr >= mb->end_subject)
1971               {
1972               SCHECK_PARTIAL();
1973               RRETURN(MATCH_NOMATCH);
1974               }
1975             GETCHARINC(fc, Feptr);
1976             if (fc > 255)
1977               {
1978               if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1979               }
1980             else
1981               if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1982             }
1983           }
1984         else
1985 #endif
1986         /* Not UTF mode */
1987           {
1988           for (;;)
1989             {
1990             RMATCH(Fecode, RM23);
1991             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1992             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1993             if (Feptr >= mb->end_subject)
1994               {
1995               SCHECK_PARTIAL();
1996               RRETURN(MATCH_NOMATCH);
1997               }
1998             fc = *Feptr++;
1999 #if PCRE2_CODE_UNIT_WIDTH != 8
2000             if (fc > 255)
2001               {
2002               if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2003               }
2004             else
2005 #endif
2006             if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2007             }
2008           }
2009         /* Control never gets here */
2010         }
2011 
2012       /* If maximizing, find the longest possible run, then work backwards. */
2013 
2014       else
2015         {
2016         Lstart_eptr = Feptr;
2017 
2018 #ifdef SUPPORT_UNICODE
2019         if (utf)
2020           {
2021           for (i = Lmin; i < Lmax; i++)
2022             {
2023             int len = 1;
2024             if (Feptr >= mb->end_subject)
2025               {
2026               SCHECK_PARTIAL();
2027               break;
2028               }
2029             GETCHARLEN(fc, Feptr, len);
2030             if (fc > 255)
2031               {
2032               if (Fop == OP_CLASS) break;
2033               }
2034             else
2035               if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2036             Feptr += len;
2037             }
2038 
2039           if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2040 
2041           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2042           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2043           go too far. */
2044 
2045           for (;;)
2046             {
2047             RMATCH(Fecode, RM201);
2048             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2049             if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2050             BACKCHAR(Feptr);
2051             }
2052           }
2053         else
2054 #endif
2055           /* Not UTF mode */
2056           {
2057           for (i = Lmin; i < Lmax; i++)
2058             {
2059             if (Feptr >= mb->end_subject)
2060               {
2061               SCHECK_PARTIAL();
2062               break;
2063               }
2064             fc = *Feptr;
2065 #if PCRE2_CODE_UNIT_WIDTH != 8
2066             if (fc > 255)
2067               {
2068               if (Fop == OP_CLASS) break;
2069               }
2070             else
2071 #endif
2072             if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2073             Feptr++;
2074             }
2075 
2076           if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2077 
2078           while (Feptr >= Lstart_eptr)
2079             {
2080             RMATCH(Fecode, RM24);
2081             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2082             Feptr--;
2083             }
2084           }
2085 
2086         RRETURN(MATCH_NOMATCH);
2087         }
2088       }
2089     /* Control never gets here */
2090 
2091 #undef Lbyte_map_address
2092 #undef Lbyte_map
2093 #undef Lstart_eptr
2094 #undef Lmin
2095 #undef Lmax
2096 
2097 
2098     /* ===================================================================== */
2099     /* Match an extended character class. In the 8-bit library, this opcode is
2100     encountered only when UTF-8 mode mode is supported. In the 16-bit and
2101     32-bit libraries, codepoints greater than 255 may be encountered even when
2102     UTF is not supported. */
2103 
2104 #define Lstart_eptr  F->temp_sptr[0]
2105 #define Lxclass_data F->temp_sptr[1]
2106 #define Lmin         F->temp_32[0]
2107 #define Lmax         F->temp_32[1]
2108 
2109 #ifdef SUPPORT_WIDE_CHARS
2110     case OP_XCLASS:
2111       {
2112       Lxclass_data = Fecode + 1 + LINK_SIZE;  /* Save for matching */
2113       Fecode += GET(Fecode, 1);               /* Advance past the item */
2114 
2115       switch (*Fecode)
2116         {
2117         case OP_CRSTAR:
2118         case OP_CRMINSTAR:
2119         case OP_CRPLUS:
2120         case OP_CRMINPLUS:
2121         case OP_CRQUERY:
2122         case OP_CRMINQUERY:
2123         case OP_CRPOSSTAR:
2124         case OP_CRPOSPLUS:
2125         case OP_CRPOSQUERY:
2126         fc = *Fecode++ - OP_CRSTAR;
2127         Lmin = rep_min[fc];
2128         Lmax = rep_max[fc];
2129         reptype = rep_typ[fc];
2130         break;
2131 
2132         case OP_CRRANGE:
2133         case OP_CRMINRANGE:
2134         case OP_CRPOSRANGE:
2135         Lmin = GET2(Fecode, 1);
2136         Lmax = GET2(Fecode, 1 + IMM2_SIZE);
2137         if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
2138         reptype = rep_typ[*Fecode - OP_CRSTAR];
2139         Fecode += 1 + 2 * IMM2_SIZE;
2140         break;
2141 
2142         default:               /* No repeat follows */
2143         Lmin = Lmax = 1;
2144         break;
2145         }
2146 
2147       /* First, ensure the minimum number of matches are present. */
2148 
2149       for (i = 1; i <= Lmin; i++)
2150         {
2151         if (Feptr >= mb->end_subject)
2152           {
2153           SCHECK_PARTIAL();
2154           RRETURN(MATCH_NOMATCH);
2155           }
2156         GETCHARINCTEST(fc, Feptr);
2157         if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
2158         }
2159 
2160       /* If Lmax == Lmin we can just continue with the main loop. */
2161 
2162       if (Lmin == Lmax) continue;
2163 
2164       /* If minimizing, keep testing the rest of the expression and advancing
2165       the pointer while it matches the class. */
2166 
2167       if (reptype == REPTYPE_MIN)
2168         {
2169         for (;;)
2170           {
2171           RMATCH(Fecode, RM100);
2172           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2173           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2174           if (Feptr >= mb->end_subject)
2175             {
2176             SCHECK_PARTIAL();
2177             RRETURN(MATCH_NOMATCH);
2178             }
2179           GETCHARINCTEST(fc, Feptr);
2180           if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
2181           }
2182         /* Control never gets here */
2183         }
2184 
2185       /* If maximizing, find the longest possible run, then work backwards. */
2186 
2187       else
2188         {
2189         Lstart_eptr = Feptr;
2190         for (i = Lmin; i < Lmax; i++)
2191           {
2192           int len = 1;
2193           if (Feptr >= mb->end_subject)
2194             {
2195             SCHECK_PARTIAL();
2196             break;
2197             }
2198 #ifdef SUPPORT_UNICODE
2199           GETCHARLENTEST(fc, Feptr, len);
2200 #else
2201           fc = *Feptr;
2202 #endif
2203           if (!PRIV(xclass)(fc, Lxclass_data, utf)) break;
2204           Feptr += len;
2205           }
2206 
2207         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2208 
2209         /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2210         Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2211         go too far. */
2212 
2213         for(;;)
2214           {
2215           RMATCH(Fecode, RM101);
2216           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2217           if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2218 #ifdef SUPPORT_UNICODE
2219           if (utf) BACKCHAR(Feptr);
2220 #endif
2221           }
2222         RRETURN(MATCH_NOMATCH);
2223         }
2224 
2225       /* Control never gets here */
2226       }
2227 #endif  /* SUPPORT_WIDE_CHARS: end of XCLASS */
2228 
2229 #undef Lstart_eptr
2230 #undef Lxclass_data
2231 #undef Lmin
2232 #undef Lmax
2233 
2234 
2235     /* ===================================================================== */
2236     /* Match various character types when PCRE2_UCP is not set. These opcodes
2237     are not generated when PCRE2_UCP is set - instead appropriate property
2238     tests are compiled. */
2239 
2240     case OP_NOT_DIGIT:
2241     if (Feptr >= mb->end_subject)
2242       {
2243       SCHECK_PARTIAL();
2244       RRETURN(MATCH_NOMATCH);
2245       }
2246     GETCHARINCTEST(fc, Feptr);
2247     if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
2248       RRETURN(MATCH_NOMATCH);
2249     Fecode++;
2250     break;
2251 
2252     case OP_DIGIT:
2253     if (Feptr >= mb->end_subject)
2254       {
2255       SCHECK_PARTIAL();
2256       RRETURN(MATCH_NOMATCH);
2257       }
2258     GETCHARINCTEST(fc, Feptr);
2259     if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
2260       RRETURN(MATCH_NOMATCH);
2261     Fecode++;
2262     break;
2263 
2264     case OP_NOT_WHITESPACE:
2265     if (Feptr >= mb->end_subject)
2266       {
2267       SCHECK_PARTIAL();
2268       RRETURN(MATCH_NOMATCH);
2269       }
2270     GETCHARINCTEST(fc, Feptr);
2271     if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
2272       RRETURN(MATCH_NOMATCH);
2273     Fecode++;
2274     break;
2275 
2276     case OP_WHITESPACE:
2277     if (Feptr >= mb->end_subject)
2278       {
2279       SCHECK_PARTIAL();
2280       RRETURN(MATCH_NOMATCH);
2281       }
2282     GETCHARINCTEST(fc, Feptr);
2283     if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
2284       RRETURN(MATCH_NOMATCH);
2285     Fecode++;
2286     break;
2287 
2288     case OP_NOT_WORDCHAR:
2289     if (Feptr >= mb->end_subject)
2290       {
2291       SCHECK_PARTIAL();
2292       RRETURN(MATCH_NOMATCH);
2293       }
2294     GETCHARINCTEST(fc, Feptr);
2295     if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
2296       RRETURN(MATCH_NOMATCH);
2297     Fecode++;
2298     break;
2299 
2300     case OP_WORDCHAR:
2301     if (Feptr >= mb->end_subject)
2302       {
2303       SCHECK_PARTIAL();
2304       RRETURN(MATCH_NOMATCH);
2305       }
2306     GETCHARINCTEST(fc, Feptr);
2307     if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
2308       RRETURN(MATCH_NOMATCH);
2309     Fecode++;
2310     break;
2311 
2312     case OP_ANYNL:
2313     if (Feptr >= mb->end_subject)
2314       {
2315       SCHECK_PARTIAL();
2316       RRETURN(MATCH_NOMATCH);
2317       }
2318     GETCHARINCTEST(fc, Feptr);
2319     switch(fc)
2320       {
2321       default: RRETURN(MATCH_NOMATCH);
2322 
2323       case CHAR_CR:
2324       if (Feptr >= mb->end_subject)
2325         {
2326         SCHECK_PARTIAL();
2327         }
2328       else if (UCHAR21TEST(Feptr) == CHAR_LF) Feptr++;
2329       break;
2330 
2331       case CHAR_LF:
2332       break;
2333 
2334       case CHAR_VT:
2335       case CHAR_FF:
2336       case CHAR_NEL:
2337 #ifndef EBCDIC
2338       case 0x2028:
2339       case 0x2029:
2340 #endif  /* Not EBCDIC */
2341       if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
2342       break;
2343       }
2344     Fecode++;
2345     break;
2346 
2347     case OP_NOT_HSPACE:
2348     if (Feptr >= mb->end_subject)
2349       {
2350       SCHECK_PARTIAL();
2351       RRETURN(MATCH_NOMATCH);
2352       }
2353     GETCHARINCTEST(fc, Feptr);
2354     switch(fc)
2355       {
2356       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2357       default: break;
2358       }
2359     Fecode++;
2360     break;
2361 
2362     case OP_HSPACE:
2363     if (Feptr >= mb->end_subject)
2364       {
2365       SCHECK_PARTIAL();
2366       RRETURN(MATCH_NOMATCH);
2367       }
2368     GETCHARINCTEST(fc, Feptr);
2369     switch(fc)
2370       {
2371       HSPACE_CASES: break;  /* Byte and multibyte cases */
2372       default: RRETURN(MATCH_NOMATCH);
2373       }
2374     Fecode++;
2375     break;
2376 
2377     case OP_NOT_VSPACE:
2378     if (Feptr >= mb->end_subject)
2379       {
2380       SCHECK_PARTIAL();
2381       RRETURN(MATCH_NOMATCH);
2382       }
2383     GETCHARINCTEST(fc, Feptr);
2384     switch(fc)
2385       {
2386       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2387       default: break;
2388       }
2389     Fecode++;
2390     break;
2391 
2392     case OP_VSPACE:
2393     if (Feptr >= mb->end_subject)
2394       {
2395       SCHECK_PARTIAL();
2396       RRETURN(MATCH_NOMATCH);
2397       }
2398     GETCHARINCTEST(fc, Feptr);
2399     switch(fc)
2400       {
2401       VSPACE_CASES: break;
2402       default: RRETURN(MATCH_NOMATCH);
2403       }
2404     Fecode++;
2405     break;
2406 
2407 
2408 #ifdef SUPPORT_UNICODE
2409 
2410     /* ===================================================================== */
2411     /* Check the next character by Unicode property. We will get here only
2412     if the support is in the binary; otherwise a compile-time error occurs. */
2413 
2414     case OP_PROP:
2415     case OP_NOTPROP:
2416     if (Feptr >= mb->end_subject)
2417       {
2418       SCHECK_PARTIAL();
2419       RRETURN(MATCH_NOMATCH);
2420       }
2421     GETCHARINCTEST(fc, Feptr);
2422       {
2423       const uint32_t *cp;
2424       const ucd_record *prop = GET_UCD(fc);
2425       BOOL notmatch = Fop == OP_NOTPROP;
2426 
2427       switch(Fecode[1])
2428         {
2429         case PT_ANY:
2430         if (notmatch) RRETURN(MATCH_NOMATCH);
2431         break;
2432 
2433         case PT_LAMP:
2434         if ((prop->chartype == ucp_Lu ||
2435              prop->chartype == ucp_Ll ||
2436              prop->chartype == ucp_Lt) == notmatch)
2437           RRETURN(MATCH_NOMATCH);
2438         break;
2439 
2440         case PT_GC:
2441         if ((Fecode[2] == PRIV(ucp_gentype)[prop->chartype]) == notmatch)
2442           RRETURN(MATCH_NOMATCH);
2443         break;
2444 
2445         case PT_PC:
2446         if ((Fecode[2] == prop->chartype) == notmatch)
2447           RRETURN(MATCH_NOMATCH);
2448         break;
2449 
2450         case PT_SC:
2451         if ((Fecode[2] == prop->script) == notmatch)
2452           RRETURN(MATCH_NOMATCH);
2453         break;
2454 
2455         case PT_SCX:
2456           {
2457           BOOL ok = (Fecode[2] == prop->script ||
2458                      MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Fecode[2]) != 0);
2459           if (ok == notmatch) RRETURN(MATCH_NOMATCH);
2460           }
2461         break;
2462 
2463         /* These are specials */
2464 
2465         case PT_ALNUM:
2466         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2467              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == notmatch)
2468           RRETURN(MATCH_NOMATCH);
2469         break;
2470 
2471         /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2472         which means that Perl space and POSIX space are now identical. PCRE
2473         was changed at release 8.34. */
2474 
2475         case PT_SPACE:    /* Perl space */
2476         case PT_PXSPACE:  /* POSIX space */
2477         switch(fc)
2478           {
2479           HSPACE_CASES:
2480           VSPACE_CASES:
2481           if (notmatch) RRETURN(MATCH_NOMATCH);
2482           break;
2483 
2484           default:
2485           if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == notmatch)
2486             RRETURN(MATCH_NOMATCH);
2487           break;
2488           }
2489         break;
2490 
2491         case PT_WORD:
2492         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2493              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2494              fc == CHAR_UNDERSCORE) == notmatch)
2495           RRETURN(MATCH_NOMATCH);
2496         break;
2497 
2498         case PT_CLIST:
2499         cp = PRIV(ucd_caseless_sets) + Fecode[2];
2500         for (;;)
2501           {
2502           if (fc < *cp)
2503             { if (notmatch) break; else { RRETURN(MATCH_NOMATCH); } }
2504           if (fc == *cp++)
2505             { if (notmatch) { RRETURN(MATCH_NOMATCH); } else break; }
2506           }
2507         break;
2508 
2509         case PT_UCNC:
2510         if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2511              fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2512              fc >= 0xe000) == notmatch)
2513           RRETURN(MATCH_NOMATCH);
2514         break;
2515 
2516         case PT_BIDICL:
2517         if ((UCD_BIDICLASS_PROP(prop) == Fecode[2]) == notmatch)
2518           RRETURN(MATCH_NOMATCH);
2519         break;
2520 
2521         case PT_BOOL:
2522           {
2523           BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) +
2524             UCD_BPROPS_PROP(prop), Fecode[2]) != 0;
2525           if (ok == notmatch) RRETURN(MATCH_NOMATCH);
2526           }
2527         break;
2528 
2529         /* This should never occur */
2530 
2531         default:
2532         return PCRE2_ERROR_INTERNAL;
2533         }
2534 
2535       Fecode += 3;
2536       }
2537     break;
2538 
2539 
2540     /* ===================================================================== */
2541     /* Match an extended Unicode sequence. We will get here only if the support
2542     is in the binary; otherwise a compile-time error occurs. */
2543 
2544     case OP_EXTUNI:
2545     if (Feptr >= mb->end_subject)
2546       {
2547       SCHECK_PARTIAL();
2548       RRETURN(MATCH_NOMATCH);
2549       }
2550     else
2551       {
2552       GETCHARINCTEST(fc, Feptr);
2553       Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf,
2554         NULL);
2555       }
2556     CHECK_PARTIAL();
2557     Fecode++;
2558     break;
2559 
2560 #endif  /* SUPPORT_UNICODE */
2561 
2562 
2563     /* ===================================================================== */
2564     /* Match a single character type repeatedly. Note that the property type
2565     does not need to be in a stack frame as it is not used within an RMATCH()
2566     loop. */
2567 
2568 #define Lstart_eptr  F->temp_sptr[0]
2569 #define Lmin         F->temp_32[0]
2570 #define Lmax         F->temp_32[1]
2571 #define Lctype       F->temp_32[2]
2572 #define Lpropvalue   F->temp_32[3]
2573 
2574     case OP_TYPEEXACT:
2575     Lmin = Lmax = GET2(Fecode, 1);
2576     Fecode += 1 + IMM2_SIZE;
2577     goto REPEATTYPE;
2578 
2579     case OP_TYPEUPTO:
2580     case OP_TYPEMINUPTO:
2581     Lmin = 0;
2582     Lmax = GET2(Fecode, 1);
2583     reptype = (*Fecode == OP_TYPEMINUPTO)? REPTYPE_MIN : REPTYPE_MAX;
2584     Fecode += 1 + IMM2_SIZE;
2585     goto REPEATTYPE;
2586 
2587     case OP_TYPEPOSSTAR:
2588     reptype = REPTYPE_POS;
2589     Lmin = 0;
2590     Lmax = UINT32_MAX;
2591     Fecode++;
2592     goto REPEATTYPE;
2593 
2594     case OP_TYPEPOSPLUS:
2595     reptype = REPTYPE_POS;
2596     Lmin = 1;
2597     Lmax = UINT32_MAX;
2598     Fecode++;
2599     goto REPEATTYPE;
2600 
2601     case OP_TYPEPOSQUERY:
2602     reptype = REPTYPE_POS;
2603     Lmin = 0;
2604     Lmax = 1;
2605     Fecode++;
2606     goto REPEATTYPE;
2607 
2608     case OP_TYPEPOSUPTO:
2609     reptype = REPTYPE_POS;
2610     Lmin = 0;
2611     Lmax = GET2(Fecode, 1);
2612     Fecode += 1 + IMM2_SIZE;
2613     goto REPEATTYPE;
2614 
2615     case OP_TYPESTAR:
2616     case OP_TYPEMINSTAR:
2617     case OP_TYPEPLUS:
2618     case OP_TYPEMINPLUS:
2619     case OP_TYPEQUERY:
2620     case OP_TYPEMINQUERY:
2621     fc = *Fecode++ - OP_TYPESTAR;
2622     Lmin = rep_min[fc];
2623     Lmax = rep_max[fc];
2624     reptype = rep_typ[fc];
2625 
2626     /* Common code for all repeated character type matches. */
2627 
2628     REPEATTYPE:
2629     Lctype = *Fecode++;      /* Code for the character type */
2630 
2631 #ifdef SUPPORT_UNICODE
2632     if (Lctype == OP_PROP || Lctype == OP_NOTPROP)
2633       {
2634       proptype = *Fecode++;
2635       Lpropvalue = *Fecode++;
2636       }
2637     else proptype = -1;
2638 #endif
2639 
2640     /* First, ensure the minimum number of matches are present. Use inline
2641     code for maximizing the speed, and do the type test once at the start
2642     (i.e. keep it out of the loops). As there are no calls to RMATCH in the
2643     loops, we can use an ordinary variable for "notmatch". The code for UTF
2644     mode is separated out for tidiness, except for Unicode property tests. */
2645 
2646     if (Lmin > 0)
2647       {
2648 #ifdef SUPPORT_UNICODE
2649       if (proptype >= 0)  /* Property tests in all modes */
2650         {
2651         BOOL notmatch = Lctype == OP_NOTPROP;
2652         switch(proptype)
2653           {
2654           case PT_ANY:
2655           if (notmatch) RRETURN(MATCH_NOMATCH);
2656           for (i = 1; i <= Lmin; i++)
2657             {
2658             if (Feptr >= mb->end_subject)
2659               {
2660               SCHECK_PARTIAL();
2661               RRETURN(MATCH_NOMATCH);
2662               }
2663             GETCHARINCTEST(fc, Feptr);
2664             }
2665           break;
2666 
2667           case PT_LAMP:
2668           for (i = 1; i <= Lmin; i++)
2669             {
2670             int chartype;
2671             if (Feptr >= mb->end_subject)
2672               {
2673               SCHECK_PARTIAL();
2674               RRETURN(MATCH_NOMATCH);
2675               }
2676             GETCHARINCTEST(fc, Feptr);
2677             chartype = UCD_CHARTYPE(fc);
2678             if ((chartype == ucp_Lu ||
2679                  chartype == ucp_Ll ||
2680                  chartype == ucp_Lt) == notmatch)
2681               RRETURN(MATCH_NOMATCH);
2682             }
2683           break;
2684 
2685           case PT_GC:
2686           for (i = 1; i <= Lmin; i++)
2687             {
2688             if (Feptr >= mb->end_subject)
2689               {
2690               SCHECK_PARTIAL();
2691               RRETURN(MATCH_NOMATCH);
2692               }
2693             GETCHARINCTEST(fc, Feptr);
2694             if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch)
2695               RRETURN(MATCH_NOMATCH);
2696             }
2697           break;
2698 
2699           case PT_PC:
2700           for (i = 1; i <= Lmin; i++)
2701             {
2702             if (Feptr >= mb->end_subject)
2703               {
2704               SCHECK_PARTIAL();
2705               RRETURN(MATCH_NOMATCH);
2706               }
2707             GETCHARINCTEST(fc, Feptr);
2708             if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch)
2709               RRETURN(MATCH_NOMATCH);
2710             }
2711           break;
2712 
2713           case PT_SC:
2714           for (i = 1; i <= Lmin; i++)
2715             {
2716             if (Feptr >= mb->end_subject)
2717               {
2718               SCHECK_PARTIAL();
2719               RRETURN(MATCH_NOMATCH);
2720               }
2721             GETCHARINCTEST(fc, Feptr);
2722             if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch)
2723               RRETURN(MATCH_NOMATCH);
2724             }
2725           break;
2726 
2727           case PT_SCX:
2728           for (i = 1; i <= Lmin; i++)
2729             {
2730             BOOL ok;
2731             const ucd_record *prop;
2732             if (Feptr >= mb->end_subject)
2733               {
2734               SCHECK_PARTIAL();
2735               RRETURN(MATCH_NOMATCH);
2736               }
2737             GETCHARINCTEST(fc, Feptr);
2738             prop = GET_UCD(fc);
2739             ok = (prop->script == Lpropvalue ||
2740                   MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
2741             if (ok == notmatch)
2742               RRETURN(MATCH_NOMATCH);
2743             }
2744           break;
2745 
2746           case PT_ALNUM:
2747           for (i = 1; i <= Lmin; i++)
2748             {
2749             int category;
2750             if (Feptr >= mb->end_subject)
2751               {
2752               SCHECK_PARTIAL();
2753               RRETURN(MATCH_NOMATCH);
2754               }
2755             GETCHARINCTEST(fc, Feptr);
2756             category = UCD_CATEGORY(fc);
2757             if ((category == ucp_L || category == ucp_N) == notmatch)
2758               RRETURN(MATCH_NOMATCH);
2759             }
2760           break;
2761 
2762           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2763           which means that Perl space and POSIX space are now identical. PCRE
2764           was changed at release 8.34. */
2765 
2766           case PT_SPACE:    /* Perl space */
2767           case PT_PXSPACE:  /* POSIX space */
2768           for (i = 1; i <= Lmin; i++)
2769             {
2770             if (Feptr >= mb->end_subject)
2771               {
2772               SCHECK_PARTIAL();
2773               RRETURN(MATCH_NOMATCH);
2774               }
2775             GETCHARINCTEST(fc, Feptr);
2776             switch(fc)
2777               {
2778               HSPACE_CASES:
2779               VSPACE_CASES:
2780               if (notmatch) RRETURN(MATCH_NOMATCH);
2781               break;
2782 
2783               default:
2784               if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
2785                 RRETURN(MATCH_NOMATCH);
2786               break;
2787               }
2788             }
2789           break;
2790 
2791           case PT_WORD:
2792           for (i = 1; i <= Lmin; i++)
2793             {
2794             int category;
2795             if (Feptr >= mb->end_subject)
2796               {
2797               SCHECK_PARTIAL();
2798               RRETURN(MATCH_NOMATCH);
2799               }
2800             GETCHARINCTEST(fc, Feptr);
2801             category = UCD_CATEGORY(fc);
2802             if ((category == ucp_L || category == ucp_N ||
2803                 fc == CHAR_UNDERSCORE) == notmatch)
2804               RRETURN(MATCH_NOMATCH);
2805             }
2806           break;
2807 
2808           case PT_CLIST:
2809           for (i = 1; i <= Lmin; i++)
2810             {
2811             const uint32_t *cp;
2812             if (Feptr >= mb->end_subject)
2813               {
2814               SCHECK_PARTIAL();
2815               RRETURN(MATCH_NOMATCH);
2816               }
2817             GETCHARINCTEST(fc, Feptr);
2818             cp = PRIV(ucd_caseless_sets) + Lpropvalue;
2819             for (;;)
2820               {
2821               if (fc < *cp)
2822                 {
2823                 if (notmatch) break;
2824                 RRETURN(MATCH_NOMATCH);
2825                 }
2826               if (fc == *cp++)
2827                 {
2828                 if (notmatch) RRETURN(MATCH_NOMATCH);
2829                 break;
2830                 }
2831               }
2832             }
2833           break;
2834 
2835           case PT_UCNC:
2836           for (i = 1; i <= Lmin; i++)
2837             {
2838             if (Feptr >= mb->end_subject)
2839               {
2840               SCHECK_PARTIAL();
2841               RRETURN(MATCH_NOMATCH);
2842               }
2843             GETCHARINCTEST(fc, Feptr);
2844             if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2845                  fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2846                  fc >= 0xe000) == notmatch)
2847               RRETURN(MATCH_NOMATCH);
2848             }
2849           break;
2850 
2851           case PT_BIDICL:
2852           for (i = 1; i <= Lmin; i++)
2853             {
2854             if (Feptr >= mb->end_subject)
2855               {
2856               SCHECK_PARTIAL();
2857               RRETURN(MATCH_NOMATCH);
2858               }
2859             GETCHARINCTEST(fc, Feptr);
2860             if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch)
2861               RRETURN(MATCH_NOMATCH);
2862             }
2863           break;
2864 
2865           case PT_BOOL:
2866           for (i = 1; i <= Lmin; i++)
2867             {
2868             BOOL ok;
2869             const ucd_record *prop;
2870             if (Feptr >= mb->end_subject)
2871               {
2872               SCHECK_PARTIAL();
2873               RRETURN(MATCH_NOMATCH);
2874               }
2875             GETCHARINCTEST(fc, Feptr);
2876             prop = GET_UCD(fc);
2877             ok = MAPBIT(PRIV(ucd_boolprop_sets) +
2878               UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
2879             if (ok == notmatch)
2880               RRETURN(MATCH_NOMATCH);
2881             }
2882           break;
2883 
2884           /* This should not occur */
2885 
2886           default:
2887           return PCRE2_ERROR_INTERNAL;
2888           }
2889         }
2890 
2891       /* Match extended Unicode sequences. We will get here only if the
2892       support is in the binary; otherwise a compile-time error occurs. */
2893 
2894       else if (Lctype == OP_EXTUNI)
2895         {
2896         for (i = 1; i <= Lmin; i++)
2897           {
2898           if (Feptr >= mb->end_subject)
2899             {
2900             SCHECK_PARTIAL();
2901             RRETURN(MATCH_NOMATCH);
2902             }
2903           else
2904             {
2905             GETCHARINCTEST(fc, Feptr);
2906             Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject,
2907               mb->end_subject, utf, NULL);
2908             }
2909           CHECK_PARTIAL();
2910           }
2911         }
2912       else
2913 #endif     /* SUPPORT_UNICODE */
2914 
2915 /* Handle all other cases in UTF mode */
2916 
2917 #ifdef SUPPORT_UNICODE
2918       if (utf) switch(Lctype)
2919         {
2920         case OP_ANY:
2921         for (i = 1; i <= Lmin; i++)
2922           {
2923           if (Feptr >= mb->end_subject)
2924             {
2925             SCHECK_PARTIAL();
2926             RRETURN(MATCH_NOMATCH);
2927             }
2928           if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
2929           if (mb->partial != 0 &&
2930               Feptr + 1 >= mb->end_subject &&
2931               NLBLOCK->nltype == NLTYPE_FIXED &&
2932               NLBLOCK->nllen == 2 &&
2933               UCHAR21(Feptr) == NLBLOCK->nl[0])
2934             {
2935             mb->hitend = TRUE;
2936             if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
2937             }
2938           Feptr++;
2939           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
2940           }
2941         break;
2942 
2943         case OP_ALLANY:
2944         for (i = 1; i <= Lmin; i++)
2945           {
2946           if (Feptr >= mb->end_subject)
2947             {
2948             SCHECK_PARTIAL();
2949             RRETURN(MATCH_NOMATCH);
2950             }
2951           Feptr++;
2952           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
2953           }
2954         break;
2955 
2956         case OP_ANYBYTE:
2957         if (Feptr > mb->end_subject - Lmin) RRETURN(MATCH_NOMATCH);
2958         Feptr += Lmin;
2959         break;
2960 
2961         case OP_ANYNL:
2962         for (i = 1; i <= Lmin; i++)
2963           {
2964           if (Feptr >= mb->end_subject)
2965             {
2966             SCHECK_PARTIAL();
2967             RRETURN(MATCH_NOMATCH);
2968             }
2969           GETCHARINC(fc, Feptr);
2970           switch(fc)
2971             {
2972             default: RRETURN(MATCH_NOMATCH);
2973 
2974             case CHAR_CR:
2975             if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
2976             break;
2977 
2978             case CHAR_LF:
2979             break;
2980 
2981             case CHAR_VT:
2982             case CHAR_FF:
2983             case CHAR_NEL:
2984 #ifndef EBCDIC
2985             case 0x2028:
2986             case 0x2029:
2987 #endif  /* Not EBCDIC */
2988             if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
2989             break;
2990             }
2991           }
2992         break;
2993 
2994         case OP_NOT_HSPACE:
2995         for (i = 1; i <= Lmin; i++)
2996           {
2997           if (Feptr >= mb->end_subject)
2998             {
2999             SCHECK_PARTIAL();
3000             RRETURN(MATCH_NOMATCH);
3001             }
3002           GETCHARINC(fc, Feptr);
3003           switch(fc)
3004             {
3005             HSPACE_CASES: RRETURN(MATCH_NOMATCH);
3006             default: break;
3007             }
3008           }
3009         break;
3010 
3011         case OP_HSPACE:
3012         for (i = 1; i <= Lmin; i++)
3013           {
3014           if (Feptr >= mb->end_subject)
3015             {
3016             SCHECK_PARTIAL();
3017             RRETURN(MATCH_NOMATCH);
3018             }
3019           GETCHARINC(fc, Feptr);
3020           switch(fc)
3021             {
3022             HSPACE_CASES: break;
3023             default: RRETURN(MATCH_NOMATCH);
3024             }
3025           }
3026         break;
3027 
3028         case OP_NOT_VSPACE:
3029         for (i = 1; i <= Lmin; i++)
3030           {
3031           if (Feptr >= mb->end_subject)
3032             {
3033             SCHECK_PARTIAL();
3034             RRETURN(MATCH_NOMATCH);
3035             }
3036           GETCHARINC(fc, Feptr);
3037           switch(fc)
3038             {
3039             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
3040             default: break;
3041             }
3042           }
3043         break;
3044 
3045         case OP_VSPACE:
3046         for (i = 1; i <= Lmin; i++)
3047           {
3048           if (Feptr >= mb->end_subject)
3049             {
3050             SCHECK_PARTIAL();
3051             RRETURN(MATCH_NOMATCH);
3052             }
3053           GETCHARINC(fc, Feptr);
3054           switch(fc)
3055             {
3056             VSPACE_CASES: break;
3057             default: RRETURN(MATCH_NOMATCH);
3058             }
3059           }
3060         break;
3061 
3062         case OP_NOT_DIGIT:
3063         for (i = 1; i <= Lmin; i++)
3064           {
3065           if (Feptr >= mb->end_subject)
3066             {
3067             SCHECK_PARTIAL();
3068             RRETURN(MATCH_NOMATCH);
3069             }
3070           GETCHARINC(fc, Feptr);
3071           if (fc < 128 && (mb->ctypes[fc] & ctype_digit) != 0)
3072             RRETURN(MATCH_NOMATCH);
3073           }
3074         break;
3075 
3076         case OP_DIGIT:
3077         for (i = 1; i <= Lmin; i++)
3078           {
3079           uint32_t cc;
3080           if (Feptr >= mb->end_subject)
3081             {
3082             SCHECK_PARTIAL();
3083             RRETURN(MATCH_NOMATCH);
3084             }
3085           cc = UCHAR21(Feptr);
3086           if (cc >= 128 || (mb->ctypes[cc] & ctype_digit) == 0)
3087             RRETURN(MATCH_NOMATCH);
3088           Feptr++;
3089           /* No need to skip more code units - we know it has only one. */
3090           }
3091         break;
3092 
3093         case OP_NOT_WHITESPACE:
3094         for (i = 1; i <= Lmin; i++)
3095           {
3096           uint32_t cc;
3097           if (Feptr >= mb->end_subject)
3098             {
3099             SCHECK_PARTIAL();
3100             RRETURN(MATCH_NOMATCH);
3101             }
3102           cc = UCHAR21(Feptr);
3103           if (cc < 128 && (mb->ctypes[cc] & ctype_space) != 0)
3104             RRETURN(MATCH_NOMATCH);
3105           Feptr++;
3106           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3107           }
3108         break;
3109 
3110         case OP_WHITESPACE:
3111         for (i = 1; i <= Lmin; i++)
3112           {
3113           uint32_t cc;
3114           if (Feptr >= mb->end_subject)
3115             {
3116             SCHECK_PARTIAL();
3117             RRETURN(MATCH_NOMATCH);
3118             }
3119           cc = UCHAR21(Feptr);
3120           if (cc >= 128 || (mb->ctypes[cc] & ctype_space) == 0)
3121             RRETURN(MATCH_NOMATCH);
3122           Feptr++;
3123           /* No need to skip more code units - we know it has only one. */
3124           }
3125         break;
3126 
3127         case OP_NOT_WORDCHAR:
3128         for (i = 1; i <= Lmin; i++)
3129           {
3130           uint32_t cc;
3131           if (Feptr >= mb->end_subject)
3132             {
3133             SCHECK_PARTIAL();
3134             RRETURN(MATCH_NOMATCH);
3135             }
3136           cc = UCHAR21(Feptr);
3137           if (cc < 128 && (mb->ctypes[cc] & ctype_word) != 0)
3138             RRETURN(MATCH_NOMATCH);
3139           Feptr++;
3140           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3141           }
3142         break;
3143 
3144         case OP_WORDCHAR:
3145         for (i = 1; i <= Lmin; i++)
3146           {
3147           uint32_t cc;
3148           if (Feptr >= mb->end_subject)
3149             {
3150             SCHECK_PARTIAL();
3151             RRETURN(MATCH_NOMATCH);
3152             }
3153           cc = UCHAR21(Feptr);
3154           if (cc >= 128 || (mb->ctypes[cc] & ctype_word) == 0)
3155             RRETURN(MATCH_NOMATCH);
3156           Feptr++;
3157           /* No need to skip more code units - we know it has only one. */
3158           }
3159         break;
3160 
3161         default:
3162         return PCRE2_ERROR_INTERNAL;
3163         }  /* End switch(Lctype) */
3164 
3165       else
3166 #endif     /* SUPPORT_UNICODE */
3167 
3168       /* Code for the non-UTF case for minimum matching of operators other
3169       than OP_PROP and OP_NOTPROP. */
3170 
3171       switch(Lctype)
3172         {
3173         case OP_ANY:
3174         for (i = 1; i <= Lmin; i++)
3175           {
3176           if (Feptr >= mb->end_subject)
3177             {
3178             SCHECK_PARTIAL();
3179             RRETURN(MATCH_NOMATCH);
3180             }
3181           if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3182           if (mb->partial != 0 &&
3183               Feptr + 1 >= mb->end_subject &&
3184               NLBLOCK->nltype == NLTYPE_FIXED &&
3185               NLBLOCK->nllen == 2 &&
3186               *Feptr == NLBLOCK->nl[0])
3187             {
3188             mb->hitend = TRUE;
3189             if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3190             }
3191           Feptr++;
3192           }
3193         break;
3194 
3195         case OP_ALLANY:
3196         if (Feptr > mb->end_subject - Lmin)
3197           {
3198           SCHECK_PARTIAL();
3199           RRETURN(MATCH_NOMATCH);
3200           }
3201         Feptr += Lmin;
3202         break;
3203 
3204         /* This OP_ANYBYTE case will never be reached because \C gets turned
3205         into OP_ALLANY in non-UTF mode. Cut out the code so that coverage
3206         reports don't complain about it's never being used. */
3207 
3208 /*        case OP_ANYBYTE:
3209 *        if (Feptr > mb->end_subject - Lmin)
3210 *          {
3211 *          SCHECK_PARTIAL();
3212 *          RRETURN(MATCH_NOMATCH);
3213 *          }
3214 *        Feptr += Lmin;
3215 *        break;
3216 */
3217         case OP_ANYNL:
3218         for (i = 1; i <= Lmin; i++)
3219           {
3220           if (Feptr >= mb->end_subject)
3221             {
3222             SCHECK_PARTIAL();
3223             RRETURN(MATCH_NOMATCH);
3224             }
3225           switch(*Feptr++)
3226             {
3227             default: RRETURN(MATCH_NOMATCH);
3228 
3229             case CHAR_CR:
3230             if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
3231             break;
3232 
3233             case CHAR_LF:
3234             break;
3235 
3236             case CHAR_VT:
3237             case CHAR_FF:
3238             case CHAR_NEL:
3239 #if PCRE2_CODE_UNIT_WIDTH != 8
3240             case 0x2028:
3241             case 0x2029:
3242 #endif
3243             if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3244             break;
3245             }
3246           }
3247         break;
3248 
3249         case OP_NOT_HSPACE:
3250         for (i = 1; i <= Lmin; i++)
3251           {
3252           if (Feptr >= mb->end_subject)
3253             {
3254             SCHECK_PARTIAL();
3255             RRETURN(MATCH_NOMATCH);
3256             }
3257           switch(*Feptr++)
3258             {
3259             default: break;
3260             HSPACE_BYTE_CASES:
3261 #if PCRE2_CODE_UNIT_WIDTH != 8
3262             HSPACE_MULTIBYTE_CASES:
3263 #endif
3264             RRETURN(MATCH_NOMATCH);
3265             }
3266           }
3267         break;
3268 
3269         case OP_HSPACE:
3270         for (i = 1; i <= Lmin; i++)
3271           {
3272           if (Feptr >= mb->end_subject)
3273             {
3274             SCHECK_PARTIAL();
3275             RRETURN(MATCH_NOMATCH);
3276             }
3277           switch(*Feptr++)
3278             {
3279             default: RRETURN(MATCH_NOMATCH);
3280             HSPACE_BYTE_CASES:
3281 #if PCRE2_CODE_UNIT_WIDTH != 8
3282             HSPACE_MULTIBYTE_CASES:
3283 #endif
3284             break;
3285             }
3286           }
3287         break;
3288 
3289         case OP_NOT_VSPACE:
3290         for (i = 1; i <= Lmin; i++)
3291           {
3292           if (Feptr >= mb->end_subject)
3293             {
3294             SCHECK_PARTIAL();
3295             RRETURN(MATCH_NOMATCH);
3296             }
3297           switch(*Feptr++)
3298             {
3299             VSPACE_BYTE_CASES:
3300 #if PCRE2_CODE_UNIT_WIDTH != 8
3301             VSPACE_MULTIBYTE_CASES:
3302 #endif
3303             RRETURN(MATCH_NOMATCH);
3304             default: break;
3305             }
3306           }
3307         break;
3308 
3309         case OP_VSPACE:
3310         for (i = 1; i <= Lmin; i++)
3311           {
3312           if (Feptr >= mb->end_subject)
3313             {
3314             SCHECK_PARTIAL();
3315             RRETURN(MATCH_NOMATCH);
3316             }
3317           switch(*Feptr++)
3318             {
3319             default: RRETURN(MATCH_NOMATCH);
3320             VSPACE_BYTE_CASES:
3321 #if PCRE2_CODE_UNIT_WIDTH != 8
3322             VSPACE_MULTIBYTE_CASES:
3323 #endif
3324             break;
3325             }
3326           }
3327         break;
3328 
3329         case OP_NOT_DIGIT:
3330         for (i = 1; i <= Lmin; i++)
3331           {
3332           if (Feptr >= mb->end_subject)
3333             {
3334             SCHECK_PARTIAL();
3335             RRETURN(MATCH_NOMATCH);
3336             }
3337           if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
3338             RRETURN(MATCH_NOMATCH);
3339           Feptr++;
3340           }
3341         break;
3342 
3343         case OP_DIGIT:
3344         for (i = 1; i <= Lmin; i++)
3345           {
3346           if (Feptr >= mb->end_subject)
3347             {
3348             SCHECK_PARTIAL();
3349             RRETURN(MATCH_NOMATCH);
3350             }
3351           if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
3352             RRETURN(MATCH_NOMATCH);
3353           Feptr++;
3354           }
3355         break;
3356 
3357         case OP_NOT_WHITESPACE:
3358         for (i = 1; i <= Lmin; i++)
3359           {
3360           if (Feptr >= mb->end_subject)
3361             {
3362             SCHECK_PARTIAL();
3363             RRETURN(MATCH_NOMATCH);
3364             }
3365           if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
3366             RRETURN(MATCH_NOMATCH);
3367           Feptr++;
3368           }
3369         break;
3370 
3371         case OP_WHITESPACE:
3372         for (i = 1; i <= Lmin; i++)
3373           {
3374           if (Feptr >= mb->end_subject)
3375             {
3376             SCHECK_PARTIAL();
3377             RRETURN(MATCH_NOMATCH);
3378             }
3379           if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
3380             RRETURN(MATCH_NOMATCH);
3381           Feptr++;
3382           }
3383         break;
3384 
3385         case OP_NOT_WORDCHAR:
3386         for (i = 1; i <= Lmin; i++)
3387           {
3388           if (Feptr >= mb->end_subject)
3389             {
3390             SCHECK_PARTIAL();
3391             RRETURN(MATCH_NOMATCH);
3392             }
3393           if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
3394             RRETURN(MATCH_NOMATCH);
3395           Feptr++;
3396           }
3397         break;
3398 
3399         case OP_WORDCHAR:
3400         for (i = 1; i <= Lmin; i++)
3401           {
3402           if (Feptr >= mb->end_subject)
3403             {
3404             SCHECK_PARTIAL();
3405             RRETURN(MATCH_NOMATCH);
3406             }
3407           if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
3408             RRETURN(MATCH_NOMATCH);
3409           Feptr++;
3410           }
3411         break;
3412 
3413         default:
3414         return PCRE2_ERROR_INTERNAL;
3415         }
3416       }
3417 
3418     /* If Lmin = Lmax we are done. Continue with the main loop. */
3419 
3420     if (Lmin == Lmax) continue;
3421 
3422     /* If minimizing, we have to test the rest of the pattern before each
3423     subsequent match. This means we cannot use a local "notmatch" variable as
3424     in the other cases. As all 4 temporary 32-bit values in the frame are
3425     already in use, just test the type each time. */
3426 
3427     if (reptype == REPTYPE_MIN)
3428       {
3429 #ifdef SUPPORT_UNICODE
3430       if (proptype >= 0)
3431         {
3432         switch(proptype)
3433           {
3434           case PT_ANY:
3435           for (;;)
3436             {
3437             RMATCH(Fecode, RM208);
3438             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3439             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3440             if (Feptr >= mb->end_subject)
3441               {
3442               SCHECK_PARTIAL();
3443               RRETURN(MATCH_NOMATCH);
3444               }
3445             GETCHARINCTEST(fc, Feptr);
3446             if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3447             }
3448           /* Control never gets here */
3449 
3450           case PT_LAMP:
3451           for (;;)
3452             {
3453             int chartype;
3454             RMATCH(Fecode, RM209);
3455             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3456             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3457             if (Feptr >= mb->end_subject)
3458               {
3459               SCHECK_PARTIAL();
3460               RRETURN(MATCH_NOMATCH);
3461               }
3462             GETCHARINCTEST(fc, Feptr);
3463             chartype = UCD_CHARTYPE(fc);
3464             if ((chartype == ucp_Lu ||
3465                  chartype == ucp_Ll ||
3466                  chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
3467               RRETURN(MATCH_NOMATCH);
3468             }
3469           /* Control never gets here */
3470 
3471           case PT_GC:
3472           for (;;)
3473             {
3474             RMATCH(Fecode, RM210);
3475             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3476             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3477             if (Feptr >= mb->end_subject)
3478               {
3479               SCHECK_PARTIAL();
3480               RRETURN(MATCH_NOMATCH);
3481               }
3482             GETCHARINCTEST(fc, Feptr);
3483             if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3484               RRETURN(MATCH_NOMATCH);
3485             }
3486           /* Control never gets here */
3487 
3488           case PT_PC:
3489           for (;;)
3490             {
3491             RMATCH(Fecode, RM211);
3492             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3493             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3494             if (Feptr >= mb->end_subject)
3495               {
3496               SCHECK_PARTIAL();
3497               RRETURN(MATCH_NOMATCH);
3498               }
3499             GETCHARINCTEST(fc, Feptr);
3500             if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3501               RRETURN(MATCH_NOMATCH);
3502             }
3503           /* Control never gets here */
3504 
3505           case PT_SC:
3506           for (;;)
3507             {
3508             RMATCH(Fecode, RM212);
3509             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3510             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3511             if (Feptr >= mb->end_subject)
3512               {
3513               SCHECK_PARTIAL();
3514               RRETURN(MATCH_NOMATCH);
3515               }
3516             GETCHARINCTEST(fc, Feptr);
3517             if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3518               RRETURN(MATCH_NOMATCH);
3519             }
3520           /* Control never gets here */
3521 
3522           case PT_SCX:
3523           for (;;)
3524             {
3525             BOOL ok;
3526             const ucd_record *prop;
3527             RMATCH(Fecode, RM225);
3528             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3529             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3530             if (Feptr >= mb->end_subject)
3531               {
3532               SCHECK_PARTIAL();
3533               RRETURN(MATCH_NOMATCH);
3534               }
3535             GETCHARINCTEST(fc, Feptr);
3536             prop = GET_UCD(fc);
3537             ok = (prop->script == Lpropvalue
3538                   || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
3539             if (ok == (Lctype == OP_NOTPROP))
3540               RRETURN(MATCH_NOMATCH);
3541             }
3542           /* Control never gets here */
3543 
3544           case PT_ALNUM:
3545           for (;;)
3546             {
3547             int category;
3548             RMATCH(Fecode, RM213);
3549             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3550             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3551             if (Feptr >= mb->end_subject)
3552               {
3553               SCHECK_PARTIAL();
3554               RRETURN(MATCH_NOMATCH);
3555               }
3556             GETCHARINCTEST(fc, Feptr);
3557             category = UCD_CATEGORY(fc);
3558             if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP))
3559               RRETURN(MATCH_NOMATCH);
3560             }
3561           /* Control never gets here */
3562 
3563           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3564           which means that Perl space and POSIX space are now identical. PCRE
3565           was changed at release 8.34. */
3566 
3567           case PT_SPACE:    /* Perl space */
3568           case PT_PXSPACE:  /* POSIX space */
3569           for (;;)
3570             {
3571             RMATCH(Fecode, RM214);
3572             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3573             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3574             if (Feptr >= mb->end_subject)
3575               {
3576               SCHECK_PARTIAL();
3577               RRETURN(MATCH_NOMATCH);
3578               }
3579             GETCHARINCTEST(fc, Feptr);
3580             switch(fc)
3581               {
3582               HSPACE_CASES:
3583               VSPACE_CASES:
3584               if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3585               break;
3586 
3587               default:
3588               if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
3589                 RRETURN(MATCH_NOMATCH);
3590               break;
3591               }
3592             }
3593           /* Control never gets here */
3594 
3595           case PT_WORD:
3596           for (;;)
3597             {
3598             int category;
3599             RMATCH(Fecode, RM215);
3600             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3601             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3602             if (Feptr >= mb->end_subject)
3603               {
3604               SCHECK_PARTIAL();
3605               RRETURN(MATCH_NOMATCH);
3606               }
3607             GETCHARINCTEST(fc, Feptr);
3608             category = UCD_CATEGORY(fc);
3609             if ((category == ucp_L ||
3610                  category == ucp_N ||
3611                  fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
3612               RRETURN(MATCH_NOMATCH);
3613             }
3614           /* Control never gets here */
3615 
3616           case PT_CLIST:
3617           for (;;)
3618             {
3619             const uint32_t *cp;
3620             RMATCH(Fecode, RM216);
3621             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3622             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3623             if (Feptr >= mb->end_subject)
3624               {
3625               SCHECK_PARTIAL();
3626               RRETURN(MATCH_NOMATCH);
3627               }
3628             GETCHARINCTEST(fc, Feptr);
3629             cp = PRIV(ucd_caseless_sets) + Lpropvalue;
3630             for (;;)
3631               {
3632               if (fc < *cp)
3633                 {
3634                 if (Lctype == OP_NOTPROP) break;
3635                 RRETURN(MATCH_NOMATCH);
3636                 }
3637               if (fc == *cp++)
3638                 {
3639                 if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3640                 break;
3641                 }
3642               }
3643             }
3644           /* Control never gets here */
3645 
3646           case PT_UCNC:
3647           for (;;)
3648             {
3649             RMATCH(Fecode, RM217);
3650             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3651             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3652             if (Feptr >= mb->end_subject)
3653               {
3654               SCHECK_PARTIAL();
3655               RRETURN(MATCH_NOMATCH);
3656               }
3657             GETCHARINCTEST(fc, Feptr);
3658             if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
3659                  fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
3660                  fc >= 0xe000) == (Lctype == OP_NOTPROP))
3661               RRETURN(MATCH_NOMATCH);
3662             }
3663           /* Control never gets here */
3664 
3665           case PT_BIDICL:
3666           for (;;)
3667             {
3668             RMATCH(Fecode, RM224);
3669             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3670             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3671             if (Feptr >= mb->end_subject)
3672               {
3673               SCHECK_PARTIAL();
3674               RRETURN(MATCH_NOMATCH);
3675               }
3676             GETCHARINCTEST(fc, Feptr);
3677             if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3678               RRETURN(MATCH_NOMATCH);
3679             }
3680           /* Control never gets here */
3681 
3682           case PT_BOOL:
3683           for (;;)
3684             {
3685             BOOL ok;
3686             const ucd_record *prop;
3687             RMATCH(Fecode, RM223);
3688             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3689             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3690             if (Feptr >= mb->end_subject)
3691               {
3692               SCHECK_PARTIAL();
3693               RRETURN(MATCH_NOMATCH);
3694               }
3695             GETCHARINCTEST(fc, Feptr);
3696             prop = GET_UCD(fc);
3697             ok = MAPBIT(PRIV(ucd_boolprop_sets) +
3698               UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
3699             if (ok == (Lctype == OP_NOTPROP))
3700               RRETURN(MATCH_NOMATCH);
3701             }
3702           /* Control never gets here */
3703 
3704           /* This should never occur */
3705           default:
3706           return PCRE2_ERROR_INTERNAL;
3707           }
3708         }
3709 
3710       /* Match extended Unicode sequences. We will get here only if the
3711       support is in the binary; otherwise a compile-time error occurs. */
3712 
3713       else if (Lctype == OP_EXTUNI)
3714         {
3715         for (;;)
3716           {
3717           RMATCH(Fecode, RM218);
3718           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3719           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3720           if (Feptr >= mb->end_subject)
3721             {
3722             SCHECK_PARTIAL();
3723             RRETURN(MATCH_NOMATCH);
3724             }
3725           else
3726             {
3727             GETCHARINCTEST(fc, Feptr);
3728             Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
3729               utf, NULL);
3730             }
3731           CHECK_PARTIAL();
3732           }
3733         }
3734       else
3735 #endif     /* SUPPORT_UNICODE */
3736 
3737       /* UTF mode for non-property testing character types. */
3738 
3739 #ifdef SUPPORT_UNICODE
3740       if (utf)
3741         {
3742         for (;;)
3743           {
3744           RMATCH(Fecode, RM219);
3745           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3746           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3747           if (Feptr >= mb->end_subject)
3748             {
3749             SCHECK_PARTIAL();
3750             RRETURN(MATCH_NOMATCH);
3751             }
3752           if (Lctype == OP_ANY && IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3753           GETCHARINC(fc, Feptr);
3754           switch(Lctype)
3755             {
3756             case OP_ANY:               /* This is the non-NL case */
3757             if (mb->partial != 0 &&    /* Take care with CRLF partial */
3758                 Feptr >= mb->end_subject &&
3759                 NLBLOCK->nltype == NLTYPE_FIXED &&
3760                 NLBLOCK->nllen == 2 &&
3761                 fc == NLBLOCK->nl[0])
3762               {
3763               mb->hitend = TRUE;
3764               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3765               }
3766             break;
3767 
3768             case OP_ALLANY:
3769             case OP_ANYBYTE:
3770             break;
3771 
3772             case OP_ANYNL:
3773             switch(fc)
3774               {
3775               default: RRETURN(MATCH_NOMATCH);
3776 
3777               case CHAR_CR:
3778               if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
3779               break;
3780 
3781               case CHAR_LF:
3782               break;
3783 
3784               case CHAR_VT:
3785               case CHAR_FF:
3786               case CHAR_NEL:
3787 #ifndef EBCDIC
3788               case 0x2028:
3789               case 0x2029:
3790 #endif  /* Not EBCDIC */
3791               if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
3792                 RRETURN(MATCH_NOMATCH);
3793               break;
3794               }
3795             break;
3796 
3797             case OP_NOT_HSPACE:
3798             switch(fc)
3799               {
3800               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
3801               default: break;
3802               }
3803             break;
3804 
3805             case OP_HSPACE:
3806             switch(fc)
3807               {
3808               HSPACE_CASES: break;
3809               default: RRETURN(MATCH_NOMATCH);
3810               }
3811             break;
3812 
3813             case OP_NOT_VSPACE:
3814             switch(fc)
3815               {
3816               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
3817               default: break;
3818               }
3819             break;
3820 
3821             case OP_VSPACE:
3822             switch(fc)
3823               {
3824               VSPACE_CASES: break;
3825               default: RRETURN(MATCH_NOMATCH);
3826               }
3827             break;
3828 
3829             case OP_NOT_DIGIT:
3830             if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0)
3831               RRETURN(MATCH_NOMATCH);
3832             break;
3833 
3834             case OP_DIGIT:
3835             if (fc >= 256 || (mb->ctypes[fc] & ctype_digit) == 0)
3836               RRETURN(MATCH_NOMATCH);
3837             break;
3838 
3839             case OP_NOT_WHITESPACE:
3840             if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0)
3841               RRETURN(MATCH_NOMATCH);
3842             break;
3843 
3844             case OP_WHITESPACE:
3845             if (fc >= 256 || (mb->ctypes[fc] & ctype_space) == 0)
3846               RRETURN(MATCH_NOMATCH);
3847             break;
3848 
3849             case OP_NOT_WORDCHAR:
3850             if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0)
3851               RRETURN(MATCH_NOMATCH);
3852             break;
3853 
3854             case OP_WORDCHAR:
3855             if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0)
3856               RRETURN(MATCH_NOMATCH);
3857             break;
3858 
3859             default:
3860             return PCRE2_ERROR_INTERNAL;
3861             }
3862           }
3863         }
3864       else
3865 #endif  /* SUPPORT_UNICODE */
3866 
3867       /* Not UTF mode */
3868         {
3869         for (;;)
3870           {
3871           RMATCH(Fecode, RM33);
3872           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3873           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3874           if (Feptr >= mb->end_subject)
3875             {
3876             SCHECK_PARTIAL();
3877             RRETURN(MATCH_NOMATCH);
3878             }
3879           if (Lctype == OP_ANY && IS_NEWLINE(Feptr))
3880             RRETURN(MATCH_NOMATCH);
3881           fc = *Feptr++;
3882           switch(Lctype)
3883             {
3884             case OP_ANY:               /* This is the non-NL case */
3885             if (mb->partial != 0 &&    /* Take care with CRLF partial */
3886                 Feptr >= mb->end_subject &&
3887                 NLBLOCK->nltype == NLTYPE_FIXED &&
3888                 NLBLOCK->nllen == 2 &&
3889                 fc == NLBLOCK->nl[0])
3890               {
3891               mb->hitend = TRUE;
3892               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3893               }
3894             break;
3895 
3896             case OP_ALLANY:
3897             case OP_ANYBYTE:
3898             break;
3899 
3900             case OP_ANYNL:
3901             switch(fc)
3902               {
3903               default: RRETURN(MATCH_NOMATCH);
3904 
3905               case CHAR_CR:
3906               if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
3907               break;
3908 
3909               case CHAR_LF:
3910               break;
3911 
3912               case CHAR_VT:
3913               case CHAR_FF:
3914               case CHAR_NEL:
3915 #if PCRE2_CODE_UNIT_WIDTH != 8
3916               case 0x2028:
3917               case 0x2029:
3918 #endif
3919               if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
3920                 RRETURN(MATCH_NOMATCH);
3921               break;
3922               }
3923             break;
3924 
3925             case OP_NOT_HSPACE:
3926             switch(fc)
3927               {
3928               default: break;
3929               HSPACE_BYTE_CASES:
3930 #if PCRE2_CODE_UNIT_WIDTH != 8
3931               HSPACE_MULTIBYTE_CASES:
3932 #endif
3933               RRETURN(MATCH_NOMATCH);
3934               }
3935             break;
3936 
3937             case OP_HSPACE:
3938             switch(fc)
3939               {
3940               default: RRETURN(MATCH_NOMATCH);
3941               HSPACE_BYTE_CASES:
3942 #if PCRE2_CODE_UNIT_WIDTH != 8
3943               HSPACE_MULTIBYTE_CASES:
3944 #endif
3945               break;
3946               }
3947             break;
3948 
3949             case OP_NOT_VSPACE:
3950             switch(fc)
3951               {
3952               default: break;
3953               VSPACE_BYTE_CASES:
3954 #if PCRE2_CODE_UNIT_WIDTH != 8
3955               VSPACE_MULTIBYTE_CASES:
3956 #endif
3957               RRETURN(MATCH_NOMATCH);
3958               }
3959             break;
3960 
3961             case OP_VSPACE:
3962             switch(fc)
3963               {
3964               default: RRETURN(MATCH_NOMATCH);
3965               VSPACE_BYTE_CASES:
3966 #if PCRE2_CODE_UNIT_WIDTH != 8
3967               VSPACE_MULTIBYTE_CASES:
3968 #endif
3969               break;
3970               }
3971             break;
3972 
3973             case OP_NOT_DIGIT:
3974             if (MAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
3975               RRETURN(MATCH_NOMATCH);
3976             break;
3977 
3978             case OP_DIGIT:
3979             if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
3980               RRETURN(MATCH_NOMATCH);
3981             break;
3982 
3983             case OP_NOT_WHITESPACE:
3984             if (MAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
3985               RRETURN(MATCH_NOMATCH);
3986             break;
3987 
3988             case OP_WHITESPACE:
3989             if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
3990               RRETURN(MATCH_NOMATCH);
3991             break;
3992 
3993             case OP_NOT_WORDCHAR:
3994             if (MAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
3995               RRETURN(MATCH_NOMATCH);
3996             break;
3997 
3998             case OP_WORDCHAR:
3999             if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
4000               RRETURN(MATCH_NOMATCH);
4001             break;
4002 
4003             default:
4004             return PCRE2_ERROR_INTERNAL;
4005             }
4006           }
4007         }
4008       /* Control never gets here */
4009       }
4010 
4011     /* If maximizing, it is worth using inline code for speed, doing the type
4012     test once at the start (i.e. keep it out of the loops). Once again,
4013     "notmatch" can be an ordinary local variable because the loops do not call
4014     RMATCH. */
4015 
4016     else
4017       {
4018       Lstart_eptr = Feptr;  /* Remember where we started */
4019 
4020 #ifdef SUPPORT_UNICODE
4021       if (proptype >= 0)
4022         {
4023         BOOL notmatch = Lctype == OP_NOTPROP;
4024         switch(proptype)
4025           {
4026           case PT_ANY:
4027           for (i = Lmin; i < Lmax; i++)
4028             {
4029             int len = 1;
4030             if (Feptr >= mb->end_subject)
4031               {
4032               SCHECK_PARTIAL();
4033               break;
4034               }
4035             GETCHARLENTEST(fc, Feptr, len);
4036             if (notmatch) break;
4037             Feptr+= len;
4038             }
4039           break;
4040 
4041           case PT_LAMP:
4042           for (i = Lmin; i < Lmax; i++)
4043             {
4044             int chartype;
4045             int len = 1;
4046             if (Feptr >= mb->end_subject)
4047               {
4048               SCHECK_PARTIAL();
4049               break;
4050               }
4051             GETCHARLENTEST(fc, Feptr, len);
4052             chartype = UCD_CHARTYPE(fc);
4053             if ((chartype == ucp_Lu ||
4054                  chartype == ucp_Ll ||
4055                  chartype == ucp_Lt) == notmatch)
4056               break;
4057             Feptr+= len;
4058             }
4059           break;
4060 
4061           case PT_GC:
4062           for (i = Lmin; i < Lmax; i++)
4063             {
4064             int len = 1;
4065             if (Feptr >= mb->end_subject)
4066               {
4067               SCHECK_PARTIAL();
4068               break;
4069               }
4070             GETCHARLENTEST(fc, Feptr, len);
4071             if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch) break;
4072             Feptr+= len;
4073             }
4074           break;
4075 
4076           case PT_PC:
4077           for (i = Lmin; i < Lmax; i++)
4078             {
4079             int len = 1;
4080             if (Feptr >= mb->end_subject)
4081               {
4082               SCHECK_PARTIAL();
4083               break;
4084               }
4085             GETCHARLENTEST(fc, Feptr, len);
4086             if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch) break;
4087             Feptr+= len;
4088             }
4089           break;
4090 
4091           case PT_SC:
4092           for (i = Lmin; i < Lmax; i++)
4093             {
4094             int len = 1;
4095             if (Feptr >= mb->end_subject)
4096               {
4097               SCHECK_PARTIAL();
4098               break;
4099               }
4100             GETCHARLENTEST(fc, Feptr, len);
4101             if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch) break;
4102             Feptr+= len;
4103             }
4104           break;
4105 
4106           case PT_SCX:
4107           for (i = Lmin; i < Lmax; i++)
4108             {
4109             BOOL ok;
4110             const ucd_record *prop;
4111             int len = 1;
4112             if (Feptr >= mb->end_subject)
4113               {
4114               SCHECK_PARTIAL();
4115               break;
4116               }
4117             GETCHARLENTEST(fc, Feptr, len);
4118             prop = GET_UCD(fc);
4119             ok = (prop->script == Lpropvalue ||
4120                   MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
4121             if (ok == notmatch) break;
4122             Feptr+= len;
4123             }
4124           break;
4125 
4126           case PT_ALNUM:
4127           for (i = Lmin; i < Lmax; i++)
4128             {
4129             int category;
4130             int len = 1;
4131             if (Feptr >= mb->end_subject)
4132               {
4133               SCHECK_PARTIAL();
4134               break;
4135               }
4136             GETCHARLENTEST(fc, Feptr, len);
4137             category = UCD_CATEGORY(fc);
4138             if ((category == ucp_L || category == ucp_N) == notmatch)
4139               break;
4140             Feptr+= len;
4141             }
4142           break;
4143 
4144           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4145           which means that Perl space and POSIX space are now identical. PCRE
4146           was changed at release 8.34. */
4147 
4148           case PT_SPACE:    /* Perl space */
4149           case PT_PXSPACE:  /* POSIX space */
4150           for (i = Lmin; i < Lmax; i++)
4151             {
4152             int len = 1;
4153             if (Feptr >= mb->end_subject)
4154               {
4155               SCHECK_PARTIAL();
4156               break;
4157               }
4158             GETCHARLENTEST(fc, Feptr, len);
4159             switch(fc)
4160               {
4161               HSPACE_CASES:
4162               VSPACE_CASES:
4163               if (notmatch) goto ENDLOOP99;  /* Break the loop */
4164               break;
4165 
4166               default:
4167               if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
4168                 goto ENDLOOP99;   /* Break the loop */
4169               break;
4170               }
4171             Feptr+= len;
4172             }
4173           ENDLOOP99:
4174           break;
4175 
4176           case PT_WORD:
4177           for (i = Lmin; i < Lmax; i++)
4178             {
4179             int category;
4180             int len = 1;
4181             if (Feptr >= mb->end_subject)
4182               {
4183               SCHECK_PARTIAL();
4184               break;
4185               }
4186             GETCHARLENTEST(fc, Feptr, len);
4187             category = UCD_CATEGORY(fc);
4188             if ((category == ucp_L || category == ucp_N ||
4189                  fc == CHAR_UNDERSCORE) == notmatch)
4190               break;
4191             Feptr+= len;
4192             }
4193           break;
4194 
4195           case PT_CLIST:
4196           for (i = Lmin; i < Lmax; i++)
4197             {
4198             const uint32_t *cp;
4199             int len = 1;
4200             if (Feptr >= mb->end_subject)
4201               {
4202               SCHECK_PARTIAL();
4203               break;
4204               }
4205             GETCHARLENTEST(fc, Feptr, len);
4206             cp = PRIV(ucd_caseless_sets) + Lpropvalue;
4207             for (;;)
4208               {
4209               if (fc < *cp)
4210                 { if (notmatch) break; else goto GOT_MAX; }
4211               if (fc == *cp++)
4212                 { if (notmatch) goto GOT_MAX; else break; }
4213               }
4214             Feptr += len;
4215             }
4216           GOT_MAX:
4217           break;
4218 
4219           case PT_UCNC:
4220           for (i = Lmin; i < Lmax; i++)
4221             {
4222             int len = 1;
4223             if (Feptr >= mb->end_subject)
4224               {
4225               SCHECK_PARTIAL();
4226               break;
4227               }
4228             GETCHARLENTEST(fc, Feptr, len);
4229             if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
4230                  fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
4231                  fc >= 0xe000) == notmatch)
4232               break;
4233             Feptr += len;
4234             }
4235           break;
4236 
4237           case PT_BIDICL:
4238           for (i = Lmin; i < Lmax; i++)
4239             {
4240             int len = 1;
4241             if (Feptr >= mb->end_subject)
4242               {
4243               SCHECK_PARTIAL();
4244               break;
4245               }
4246             GETCHARLENTEST(fc, Feptr, len);
4247             if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch) break;
4248             Feptr+= len;
4249             }
4250           break;
4251 
4252           case PT_BOOL:
4253           for (i = Lmin; i < Lmax; i++)
4254             {
4255             BOOL ok;
4256             const ucd_record *prop;
4257             int len = 1;
4258             if (Feptr >= mb->end_subject)
4259               {
4260               SCHECK_PARTIAL();
4261               break;
4262               }
4263             GETCHARLENTEST(fc, Feptr, len);
4264             prop = GET_UCD(fc);
4265             ok = MAPBIT(PRIV(ucd_boolprop_sets) +
4266               UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
4267             if (ok == notmatch) break;
4268             Feptr+= len;
4269             }
4270           break;
4271 
4272           default:
4273           return PCRE2_ERROR_INTERNAL;
4274           }
4275 
4276         /* Feptr is now past the end of the maximum run */
4277 
4278         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4279 
4280         /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4281         Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
4282         go too far. */
4283 
4284         for(;;)
4285           {
4286           if (Feptr <= Lstart_eptr) break;
4287           RMATCH(Fecode, RM222);
4288           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4289           Feptr--;
4290           if (utf) BACKCHAR(Feptr);
4291           }
4292         }
4293 
4294       /* Match extended Unicode grapheme clusters. We will get here only if the
4295       support is in the binary; otherwise a compile-time error occurs. */
4296 
4297       else if (Lctype == OP_EXTUNI)
4298         {
4299         for (i = Lmin; i < Lmax; i++)
4300           {
4301           if (Feptr >= mb->end_subject)
4302             {
4303             SCHECK_PARTIAL();
4304             break;
4305             }
4306           else
4307             {
4308             GETCHARINCTEST(fc, Feptr);
4309             Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
4310               utf, NULL);
4311             }
4312           CHECK_PARTIAL();
4313           }
4314 
4315         /* Feptr is now past the end of the maximum run */
4316 
4317         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4318 
4319         /* We use <= Lstart_eptr rather than == Lstart_eptr to detect the start
4320         of the run while backtracking because the use of \C in UTF mode can
4321         cause BACKCHAR to move back past Lstart_eptr. This is just palliative;
4322         the use of \C in UTF mode is fraught with danger. */
4323 
4324         for(;;)
4325           {
4326           int lgb, rgb;
4327           PCRE2_SPTR fptr;
4328 
4329           if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4330           RMATCH(Fecode, RM220);
4331           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4332 
4333           /* Backtracking over an extended grapheme cluster involves inspecting
4334           the previous two characters (if present) to see if a break is
4335           permitted between them. */
4336 
4337           Feptr--;
4338           if (!utf) fc = *Feptr; else
4339             {
4340             BACKCHAR(Feptr);
4341             GETCHAR(fc, Feptr);
4342             }
4343           rgb = UCD_GRAPHBREAK(fc);
4344 
4345           for (;;)
4346             {
4347             if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4348             fptr = Feptr - 1;
4349             if (!utf) fc = *fptr; else
4350               {
4351               BACKCHAR(fptr);
4352               GETCHAR(fc, fptr);
4353               }
4354             lgb = UCD_GRAPHBREAK(fc);
4355             if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
4356             Feptr = fptr;
4357             rgb = lgb;
4358             }
4359           }
4360         }
4361 
4362       else
4363 #endif   /* SUPPORT_UNICODE */
4364 
4365 #ifdef SUPPORT_UNICODE
4366       if (utf)
4367         {
4368         switch(Lctype)
4369           {
4370           case OP_ANY:
4371           for (i = Lmin; i < Lmax; i++)
4372             {
4373             if (Feptr >= mb->end_subject)
4374               {
4375               SCHECK_PARTIAL();
4376               break;
4377               }
4378             if (IS_NEWLINE(Feptr)) break;
4379             if (mb->partial != 0 &&    /* Take care with CRLF partial */
4380                 Feptr + 1 >= mb->end_subject &&
4381                 NLBLOCK->nltype == NLTYPE_FIXED &&
4382                 NLBLOCK->nllen == 2 &&
4383                 UCHAR21(Feptr) == NLBLOCK->nl[0])
4384               {
4385               mb->hitend = TRUE;
4386               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4387               }
4388             Feptr++;
4389             ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4390             }
4391           break;
4392 
4393           case OP_ALLANY:
4394           if (Lmax < UINT32_MAX)
4395             {
4396             for (i = Lmin; i < Lmax; i++)
4397               {
4398               if (Feptr >= mb->end_subject)
4399                 {
4400                 SCHECK_PARTIAL();
4401                 break;
4402                 }
4403               Feptr++;
4404               ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4405               }
4406             }
4407           else
4408             {
4409             Feptr = mb->end_subject;   /* Unlimited UTF-8 repeat */
4410             SCHECK_PARTIAL();
4411             }
4412           break;
4413 
4414           /* The "byte" (i.e. "code unit") case is the same as non-UTF */
4415 
4416           case OP_ANYBYTE:
4417           fc = Lmax - Lmin;
4418           if (fc > (uint32_t)(mb->end_subject - Feptr))
4419             {
4420             Feptr = mb->end_subject;
4421             SCHECK_PARTIAL();
4422             }
4423           else Feptr += fc;
4424           break;
4425 
4426           case OP_ANYNL:
4427           for (i = Lmin; i < Lmax; i++)
4428             {
4429             int len = 1;
4430             if (Feptr >= mb->end_subject)
4431               {
4432               SCHECK_PARTIAL();
4433               break;
4434               }
4435             GETCHARLEN(fc, Feptr, len);
4436             if (fc == CHAR_CR)
4437               {
4438               if (++Feptr >= mb->end_subject) break;
4439               if (UCHAR21(Feptr) == CHAR_LF) Feptr++;
4440               }
4441             else
4442               {
4443               if (fc != CHAR_LF &&
4444                   (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4445                    (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4446 #ifndef EBCDIC
4447                     && fc != 0x2028 && fc != 0x2029
4448 #endif  /* Not EBCDIC */
4449                     )))
4450                 break;
4451               Feptr += len;
4452               }
4453             }
4454           break;
4455 
4456           case OP_NOT_HSPACE:
4457           case OP_HSPACE:
4458           for (i = Lmin; i < Lmax; i++)
4459             {
4460             BOOL gotspace;
4461             int len = 1;
4462             if (Feptr >= mb->end_subject)
4463               {
4464               SCHECK_PARTIAL();
4465               break;
4466               }
4467             GETCHARLEN(fc, Feptr, len);
4468             switch(fc)
4469               {
4470               HSPACE_CASES: gotspace = TRUE; break;
4471               default: gotspace = FALSE; break;
4472               }
4473             if (gotspace == (Lctype == OP_NOT_HSPACE)) break;
4474             Feptr += len;
4475             }
4476           break;
4477 
4478           case OP_NOT_VSPACE:
4479           case OP_VSPACE:
4480           for (i = Lmin; i < Lmax; i++)
4481             {
4482             BOOL gotspace;
4483             int len = 1;
4484             if (Feptr >= mb->end_subject)
4485               {
4486               SCHECK_PARTIAL();
4487               break;
4488               }
4489             GETCHARLEN(fc, Feptr, len);
4490             switch(fc)
4491               {
4492               VSPACE_CASES: gotspace = TRUE; break;
4493               default: gotspace = FALSE; break;
4494               }
4495             if (gotspace == (Lctype == OP_NOT_VSPACE)) break;
4496             Feptr += len;
4497             }
4498           break;
4499 
4500           case OP_NOT_DIGIT:
4501           for (i = Lmin; i < Lmax; i++)
4502             {
4503             int len = 1;
4504             if (Feptr >= mb->end_subject)
4505               {
4506               SCHECK_PARTIAL();
4507               break;
4508               }
4509             GETCHARLEN(fc, Feptr, len);
4510             if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0) break;
4511             Feptr+= len;
4512             }
4513           break;
4514 
4515           case OP_DIGIT:
4516           for (i = Lmin; i < Lmax; i++)
4517             {
4518             int len = 1;
4519             if (Feptr >= mb->end_subject)
4520               {
4521               SCHECK_PARTIAL();
4522               break;
4523               }
4524             GETCHARLEN(fc, Feptr, len);
4525             if (fc >= 256 ||(mb->ctypes[fc] & ctype_digit) == 0) break;
4526             Feptr+= len;
4527             }
4528           break;
4529 
4530           case OP_NOT_WHITESPACE:
4531           for (i = Lmin; i < Lmax; i++)
4532             {
4533             int len = 1;
4534             if (Feptr >= mb->end_subject)
4535               {
4536               SCHECK_PARTIAL();
4537               break;
4538               }
4539             GETCHARLEN(fc, Feptr, len);
4540             if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0) break;
4541             Feptr+= len;
4542             }
4543           break;
4544 
4545           case OP_WHITESPACE:
4546           for (i = Lmin; i < Lmax; i++)
4547             {
4548             int len = 1;
4549             if (Feptr >= mb->end_subject)
4550               {
4551               SCHECK_PARTIAL();
4552               break;
4553               }
4554             GETCHARLEN(fc, Feptr, len);
4555             if (fc >= 256 ||(mb->ctypes[fc] & ctype_space) == 0) break;
4556             Feptr+= len;
4557             }
4558           break;
4559 
4560           case OP_NOT_WORDCHAR:
4561           for (i = Lmin; i < Lmax; i++)
4562             {
4563             int len = 1;
4564             if (Feptr >= mb->end_subject)
4565               {
4566               SCHECK_PARTIAL();
4567               break;
4568               }
4569             GETCHARLEN(fc, Feptr, len);
4570             if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0) break;
4571             Feptr+= len;
4572             }
4573           break;
4574 
4575           case OP_WORDCHAR:
4576           for (i = Lmin; i < Lmax; i++)
4577             {
4578             int len = 1;
4579             if (Feptr >= mb->end_subject)
4580               {
4581               SCHECK_PARTIAL();
4582               break;
4583               }
4584             GETCHARLEN(fc, Feptr, len);
4585             if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0) break;
4586             Feptr+= len;
4587             }
4588           break;
4589 
4590           default:
4591           return PCRE2_ERROR_INTERNAL;
4592           }
4593 
4594         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4595 
4596         /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4597         Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't go
4598         too far. */
4599 
4600         for(;;)
4601           {
4602           if (Feptr <= Lstart_eptr) break;
4603           RMATCH(Fecode, RM221);
4604           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4605           Feptr--;
4606           BACKCHAR(Feptr);
4607           if (Lctype == OP_ANYNL && Feptr > Lstart_eptr &&
4608               UCHAR21(Feptr) == CHAR_NL && UCHAR21(Feptr - 1) == CHAR_CR)
4609             Feptr--;
4610           }
4611         }
4612       else
4613 #endif  /* SUPPORT_UNICODE */
4614 
4615       /* Not UTF mode */
4616         {
4617         switch(Lctype)
4618           {
4619           case OP_ANY:
4620           for (i = Lmin; i < Lmax; i++)
4621             {
4622             if (Feptr >= mb->end_subject)
4623               {
4624               SCHECK_PARTIAL();
4625               break;
4626               }
4627             if (IS_NEWLINE(Feptr)) break;
4628             if (mb->partial != 0 &&    /* Take care with CRLF partial */
4629                 Feptr + 1 >= mb->end_subject &&
4630                 NLBLOCK->nltype == NLTYPE_FIXED &&
4631                 NLBLOCK->nllen == 2 &&
4632                 *Feptr == NLBLOCK->nl[0])
4633               {
4634               mb->hitend = TRUE;
4635               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4636               }
4637             Feptr++;
4638             }
4639           break;
4640 
4641           case OP_ALLANY:
4642           case OP_ANYBYTE:
4643           fc = Lmax - Lmin;
4644           if (fc > (uint32_t)(mb->end_subject - Feptr))
4645             {
4646             Feptr = mb->end_subject;
4647             SCHECK_PARTIAL();
4648             }
4649           else Feptr += fc;
4650           break;
4651 
4652           case OP_ANYNL:
4653           for (i = Lmin; i < Lmax; i++)
4654             {
4655             if (Feptr >= mb->end_subject)
4656               {
4657               SCHECK_PARTIAL();
4658               break;
4659               }
4660             fc = *Feptr;
4661             if (fc == CHAR_CR)
4662               {
4663               if (++Feptr >= mb->end_subject) break;
4664               if (*Feptr == CHAR_LF) Feptr++;
4665               }
4666             else
4667               {
4668               if (fc != CHAR_LF && (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4669                  (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4670 #if PCRE2_CODE_UNIT_WIDTH != 8
4671                  && fc != 0x2028 && fc != 0x2029
4672 #endif
4673                  ))) break;
4674               Feptr++;
4675               }
4676             }
4677           break;
4678 
4679           case OP_NOT_HSPACE:
4680           for (i = Lmin; i < Lmax; i++)
4681             {
4682             if (Feptr >= mb->end_subject)
4683               {
4684               SCHECK_PARTIAL();
4685               break;
4686               }
4687             switch(*Feptr)
4688               {
4689               default: Feptr++; break;
4690               HSPACE_BYTE_CASES:
4691 #if PCRE2_CODE_UNIT_WIDTH != 8
4692               HSPACE_MULTIBYTE_CASES:
4693 #endif
4694               goto ENDLOOP00;
4695               }
4696             }
4697           ENDLOOP00:
4698           break;
4699 
4700           case OP_HSPACE:
4701           for (i = Lmin; i < Lmax; i++)
4702             {
4703             if (Feptr >= mb->end_subject)
4704               {
4705               SCHECK_PARTIAL();
4706               break;
4707               }
4708             switch(*Feptr)
4709               {
4710               default: goto ENDLOOP01;
4711               HSPACE_BYTE_CASES:
4712 #if PCRE2_CODE_UNIT_WIDTH != 8
4713               HSPACE_MULTIBYTE_CASES:
4714 #endif
4715               Feptr++; break;
4716               }
4717             }
4718           ENDLOOP01:
4719           break;
4720 
4721           case OP_NOT_VSPACE:
4722           for (i = Lmin; i < Lmax; i++)
4723             {
4724             if (Feptr >= mb->end_subject)
4725               {
4726               SCHECK_PARTIAL();
4727               break;
4728               }
4729             switch(*Feptr)
4730               {
4731               default: Feptr++; break;
4732               VSPACE_BYTE_CASES:
4733 #if PCRE2_CODE_UNIT_WIDTH != 8
4734               VSPACE_MULTIBYTE_CASES:
4735 #endif
4736               goto ENDLOOP02;
4737               }
4738             }
4739           ENDLOOP02:
4740           break;
4741 
4742           case OP_VSPACE:
4743           for (i = Lmin; i < Lmax; i++)
4744             {
4745             if (Feptr >= mb->end_subject)
4746               {
4747               SCHECK_PARTIAL();
4748               break;
4749               }
4750             switch(*Feptr)
4751               {
4752               default: goto ENDLOOP03;
4753               VSPACE_BYTE_CASES:
4754 #if PCRE2_CODE_UNIT_WIDTH != 8
4755               VSPACE_MULTIBYTE_CASES:
4756 #endif
4757               Feptr++; break;
4758               }
4759             }
4760           ENDLOOP03:
4761           break;
4762 
4763           case OP_NOT_DIGIT:
4764           for (i = Lmin; i < Lmax; i++)
4765             {
4766             if (Feptr >= mb->end_subject)
4767               {
4768               SCHECK_PARTIAL();
4769               break;
4770               }
4771             if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
4772               break;
4773             Feptr++;
4774             }
4775           break;
4776 
4777           case OP_DIGIT:
4778           for (i = Lmin; i < Lmax; i++)
4779             {
4780             if (Feptr >= mb->end_subject)
4781               {
4782               SCHECK_PARTIAL();
4783               break;
4784               }
4785             if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
4786               break;
4787             Feptr++;
4788             }
4789           break;
4790 
4791           case OP_NOT_WHITESPACE:
4792           for (i = Lmin; i < Lmax; i++)
4793             {
4794             if (Feptr >= mb->end_subject)
4795               {
4796               SCHECK_PARTIAL();
4797               break;
4798               }
4799             if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
4800               break;
4801             Feptr++;
4802             }
4803           break;
4804 
4805           case OP_WHITESPACE:
4806           for (i = Lmin; i < Lmax; i++)
4807             {
4808             if (Feptr >= mb->end_subject)
4809               {
4810               SCHECK_PARTIAL();
4811               break;
4812               }
4813             if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
4814               break;
4815             Feptr++;
4816             }
4817           break;
4818 
4819           case OP_NOT_WORDCHAR:
4820           for (i = Lmin; i < Lmax; i++)
4821             {
4822             if (Feptr >= mb->end_subject)
4823               {
4824               SCHECK_PARTIAL();
4825               break;
4826               }
4827             if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
4828               break;
4829             Feptr++;
4830             }
4831           break;
4832 
4833           case OP_WORDCHAR:
4834           for (i = Lmin; i < Lmax; i++)
4835             {
4836             if (Feptr >= mb->end_subject)
4837               {
4838               SCHECK_PARTIAL();
4839               break;
4840               }
4841             if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
4842               break;
4843             Feptr++;
4844             }
4845           break;
4846 
4847           default:
4848           return PCRE2_ERROR_INTERNAL;
4849           }
4850 
4851         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4852 
4853         for (;;)
4854           {
4855           if (Feptr == Lstart_eptr) break;
4856           RMATCH(Fecode, RM34);
4857           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4858           Feptr--;
4859           if (Lctype == OP_ANYNL && Feptr > Lstart_eptr && *Feptr == CHAR_LF &&
4860               Feptr[-1] == CHAR_CR) Feptr--;
4861           }
4862         }
4863       }
4864     break;  /* End of repeat character type processing */
4865 
4866 #undef Lstart_eptr
4867 #undef Lmin
4868 #undef Lmax
4869 #undef Lctype
4870 #undef Lpropvalue
4871 
4872 
4873     /* ===================================================================== */
4874     /* Match a back reference, possibly repeatedly. Look past the end of the
4875     item to see if there is repeat information following. The OP_REF and
4876     OP_REFI opcodes are used for a reference to a numbered group or to a
4877     non-duplicated named group. For a duplicated named group, OP_DNREF and
4878     OP_DNREFI are used. In this case we must scan the list of groups to which
4879     the name refers, and use the first one that is set. */
4880 
4881 #define Lmin      F->temp_32[0]
4882 #define Lmax      F->temp_32[1]
4883 #define Lcaseless F->temp_32[2]
4884 #define Lstart    F->temp_sptr[0]
4885 #define Loffset   F->temp_size
4886 
4887     case OP_DNREF:
4888     case OP_DNREFI:
4889     Lcaseless = (Fop == OP_DNREFI);
4890       {
4891       int count = GET2(Fecode, 1+IMM2_SIZE);
4892       PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
4893       Fecode += 1 + 2*IMM2_SIZE;
4894 
4895       while (count-- > 0)
4896         {
4897         Loffset = (GET2(slot, 0) << 1) - 2;
4898         if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET) break;
4899         slot += mb->name_entry_size;
4900         }
4901       }
4902     goto REF_REPEAT;
4903 
4904     case OP_REF:
4905     case OP_REFI:
4906     Lcaseless = (Fop == OP_REFI);
4907     Loffset = (GET2(Fecode, 1) << 1) - 2;
4908     Fecode += 1 + IMM2_SIZE;
4909 
4910     /* Set up for repetition, or handle the non-repeated case. The maximum and
4911     minimum must be in the heap frame, but as they are short-term values, we
4912     use temporary fields. */
4913 
4914     REF_REPEAT:
4915     switch (*Fecode)
4916       {
4917       case OP_CRSTAR:
4918       case OP_CRMINSTAR:
4919       case OP_CRPLUS:
4920       case OP_CRMINPLUS:
4921       case OP_CRQUERY:
4922       case OP_CRMINQUERY:
4923       fc = *Fecode++ - OP_CRSTAR;
4924       Lmin = rep_min[fc];
4925       Lmax = rep_max[fc];
4926       reptype = rep_typ[fc];
4927       break;
4928 
4929       case OP_CRRANGE:
4930       case OP_CRMINRANGE:
4931       Lmin = GET2(Fecode, 1);
4932       Lmax = GET2(Fecode, 1 + IMM2_SIZE);
4933       reptype = rep_typ[*Fecode - OP_CRSTAR];
4934       if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
4935       Fecode += 1 + 2 * IMM2_SIZE;
4936       break;
4937 
4938       default:                  /* No repeat follows */
4939         {
4940         rrc = match_ref(Loffset, Lcaseless, F, mb, &length);
4941         if (rrc != 0)
4942           {
4943           if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
4944           CHECK_PARTIAL();
4945           RRETURN(MATCH_NOMATCH);
4946           }
4947         }
4948       Feptr += length;
4949       continue;              /* With the main loop */
4950       }
4951 
4952     /* Handle repeated back references. If a set group has length zero, just
4953     continue with the main loop, because it matches however many times. For an
4954     unset reference, if the minimum is zero, we can also just continue. We can
4955     also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
4956     group behave as a zero-length group. For any other unset cases, carrying
4957     on will result in NOMATCH. */
4958 
4959     if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET)
4960       {
4961       if (Fovector[Loffset] == Fovector[Loffset + 1]) continue;
4962       }
4963     else  /* Group is not set */
4964       {
4965       if (Lmin == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
4966         continue;
4967       }
4968 
4969     /* First, ensure the minimum number of matches are present. */
4970 
4971     for (i = 1; i <= Lmin; i++)
4972       {
4973       PCRE2_SIZE slength;
4974       rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
4975       if (rrc != 0)
4976         {
4977         if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
4978         CHECK_PARTIAL();
4979         RRETURN(MATCH_NOMATCH);
4980         }
4981       Feptr += slength;
4982       }
4983 
4984     /* If min = max, we are done. They are not both allowed to be zero. */
4985 
4986     if (Lmin == Lmax) continue;
4987 
4988     /* If minimizing, keep trying and advancing the pointer. */
4989 
4990     if (reptype == REPTYPE_MIN)
4991       {
4992       for (;;)
4993         {
4994         PCRE2_SIZE slength;
4995         RMATCH(Fecode, RM20);
4996         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4997         if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
4998         rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
4999         if (rrc != 0)
5000           {
5001           if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5002           CHECK_PARTIAL();
5003           RRETURN(MATCH_NOMATCH);
5004           }
5005         Feptr += slength;
5006         }
5007       /* Control never gets here */
5008       }
5009 
5010     /* If maximizing, find the longest string and work backwards, as long as
5011     the matched lengths for each iteration are the same. */
5012 
5013     else
5014       {
5015       BOOL samelengths = TRUE;
5016       Lstart = Feptr;     /* Starting position */
5017       Flength = Fovector[Loffset+1] - Fovector[Loffset];
5018 
5019       for (i = Lmin; i < Lmax; i++)
5020         {
5021         PCRE2_SIZE slength;
5022         rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
5023         if (rrc != 0)
5024           {
5025           /* Can't use CHECK_PARTIAL because we don't want to update Feptr in
5026           the soft partial matching case. */
5027 
5028           if (rrc > 0 && mb->partial != 0 &&
5029               mb->end_subject > mb->start_used_ptr)
5030             {
5031             mb->hitend = TRUE;
5032             if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5033             }
5034           break;
5035           }
5036 
5037         if (slength != Flength) samelengths = FALSE;
5038         Feptr += slength;
5039         }
5040 
5041       /* If the length matched for each repetition is the same as the length of
5042       the captured group, we can easily work backwards. This is the normal
5043       case. However, in caseless UTF-8 mode there are pairs of case-equivalent
5044       characters whose lengths (in terms of code units) differ. However, this
5045       is very rare, so we handle it by re-matching fewer and fewer times. */
5046 
5047       if (samelengths)
5048         {
5049         while (Feptr >= Lstart)
5050           {
5051           RMATCH(Fecode, RM21);
5052           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5053           Feptr -= Flength;
5054           }
5055         }
5056 
5057       /* The rare case of non-matching lengths. Re-scan the repetition for each
5058       iteration. We know that match_ref() will succeed every time. */
5059 
5060       else
5061         {
5062         Lmax = i;
5063         for (;;)
5064           {
5065           RMATCH(Fecode, RM22);
5066           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5067           if (Feptr == Lstart) break; /* Failed after minimal repetition */
5068           Feptr = Lstart;
5069           Lmax--;
5070           for (i = Lmin; i < Lmax; i++)
5071             {
5072             PCRE2_SIZE slength;
5073             (void)match_ref(Loffset, Lcaseless, F, mb, &slength);
5074             Feptr += slength;
5075             }
5076           }
5077         }
5078 
5079       RRETURN(MATCH_NOMATCH);
5080       }
5081     /* Control never gets here */
5082 
5083 #undef Lcaseless
5084 #undef Lmin
5085 #undef Lmax
5086 #undef Lstart
5087 #undef Loffset
5088 
5089 
5090 
5091 /* ========================================================================= */
5092 /*           Opcodes for the start of various parenthesized items            */
5093 /* ========================================================================= */
5094 
5095     /* In all cases, if the result of RMATCH() is MATCH_THEN, check whether the
5096     (*THEN) is within the current branch by comparing the address of OP_THEN
5097     that is passed back with the end of the branch. If (*THEN) is within the
5098     current branch, and the branch is one of two or more alternatives (it
5099     either starts or ends with OP_ALT), we have reached the limit of THEN's
5100     action, so convert the return code to NOMATCH, which will cause normal
5101     backtracking to happen from now on. Otherwise, THEN is passed back to an
5102     outer alternative. This implements Perl's treatment of parenthesized
5103     groups, where a group not containing | does not affect the current
5104     alternative, that is, (X) is NOT the same as (X|(*F)). */
5105 
5106 
5107     /* ===================================================================== */
5108     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a non-possessive
5109     bracket group, indicating that it may occur zero times. It may repeat
5110     infinitely, or not at all - i.e. it could be ()* or ()? or even (){0} in
5111     the pattern. Brackets with fixed upper repeat limits are compiled as a
5112     number of copies, with the optional ones preceded by BRAZERO or BRAMINZERO.
5113     Possessive groups with possible zero repeats are preceded by BRAPOSZERO. */
5114 
5115 #define Lnext_ecode F->temp_sptr[0]
5116 
5117     case OP_BRAZERO:
5118     Lnext_ecode = Fecode + 1;
5119     RMATCH(Lnext_ecode, RM9);
5120     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5121     do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
5122     Fecode = Lnext_ecode + 1 + LINK_SIZE;
5123     break;
5124 
5125     case OP_BRAMINZERO:
5126     Lnext_ecode = Fecode + 1;
5127     do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
5128     RMATCH(Lnext_ecode + 1 + LINK_SIZE, RM10);
5129     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5130     Fecode++;
5131     break;
5132 
5133 #undef Lnext_ecode
5134 
5135     case OP_SKIPZERO:
5136     Fecode++;
5137     do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
5138     Fecode += 1 + LINK_SIZE;
5139     break;
5140 
5141 
5142     /* ===================================================================== */
5143     /* Handle possessive brackets with an unlimited repeat. The end of these
5144     brackets will always be OP_KETRPOS, which returns MATCH_KETRPOS without
5145     going further in the pattern. */
5146 
5147 #define Lframe_type    F->temp_32[0]
5148 #define Lmatched_once  F->temp_32[1]
5149 #define Lzero_allowed  F->temp_32[2]
5150 #define Lstart_eptr    F->temp_sptr[0]
5151 #define Lstart_group   F->temp_sptr[1]
5152 
5153     case OP_BRAPOSZERO:
5154     Lzero_allowed = TRUE;                /* Zero repeat is allowed */
5155     Fecode += 1;
5156     if (*Fecode == OP_CBRAPOS || *Fecode == OP_SCBRAPOS)
5157       goto POSSESSIVE_CAPTURE;
5158     goto POSSESSIVE_NON_CAPTURE;
5159 
5160     case OP_BRAPOS:
5161     case OP_SBRAPOS:
5162     Lzero_allowed = FALSE;               /* Zero repeat not allowed */
5163 
5164     POSSESSIVE_NON_CAPTURE:
5165     Lframe_type = GF_NOCAPTURE;          /* Remembered frame type */
5166     goto POSSESSIVE_GROUP;
5167 
5168     case OP_CBRAPOS:
5169     case OP_SCBRAPOS:
5170     Lzero_allowed = FALSE;               /* Zero repeat not allowed */
5171 
5172     POSSESSIVE_CAPTURE:
5173     number = GET2(Fecode, 1+LINK_SIZE);
5174     Lframe_type = GF_CAPTURE | number;   /* Remembered frame type */
5175 
5176     POSSESSIVE_GROUP:
5177     Lmatched_once = FALSE;               /* Never matched */
5178     Lstart_group = Fecode;               /* Start of this group */
5179 
5180     for (;;)
5181       {
5182       Lstart_eptr = Feptr;               /* Position at group start */
5183       group_frame_type = Lframe_type;
5184       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM8);
5185       if (rrc == MATCH_KETRPOS)
5186         {
5187         Lmatched_once = TRUE;            /* Matched at least once */
5188         if (Feptr == Lstart_eptr)        /* Empty match; skip to end */
5189           {
5190           do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5191           break;
5192           }
5193 
5194         Fecode = Lstart_group;
5195         continue;
5196         }
5197 
5198       /* See comment above about handling THEN. */
5199 
5200       if (rrc == MATCH_THEN)
5201         {
5202         PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5203         if (mb->verb_ecode_ptr < next_ecode &&
5204             (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5205           rrc = MATCH_NOMATCH;
5206         }
5207 
5208       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5209       Fecode += GET(Fecode, 1);
5210       if (*Fecode != OP_ALT) break;
5211       }
5212 
5213     /* Success if matched something or zero repeat allowed */
5214 
5215     if (Lmatched_once || Lzero_allowed)
5216       {
5217       Fecode += 1 + LINK_SIZE;
5218       break;
5219       }
5220 
5221     RRETURN(MATCH_NOMATCH);
5222 
5223 #undef Lmatched_once
5224 #undef Lzero_allowed
5225 #undef Lframe_type
5226 #undef Lstart_eptr
5227 #undef Lstart_group
5228 
5229 
5230     /* ===================================================================== */
5231     /* Handle non-capturing brackets that cannot match an empty string. When we
5232     get to the final alternative within the brackets, as long as there are no
5233     THEN's in the pattern, we can optimize by not recording a new backtracking
5234     point. (Ideally we should test for a THEN within this group, but we don't
5235     have that information.) Don't do this if we are at the very top level,
5236     however, because that would make handling assertions and once-only brackets
5237     messier when there is nothing to go back to. */
5238 
5239 #define Lframe_type F->temp_32[0]     /* Set for all that use GROUPLOOP */
5240 #define Lnext_branch F->temp_sptr[0]  /* Used only in OP_BRA handling */
5241 
5242     case OP_BRA:
5243     if (mb->hasthen || Frdepth == 0)
5244       {
5245       Lframe_type = 0;
5246       goto GROUPLOOP;
5247       }
5248 
5249     for (;;)
5250       {
5251       Lnext_branch = Fecode + GET(Fecode, 1);
5252       if (*Lnext_branch != OP_ALT) break;
5253 
5254       /* This is never the final branch. We do not need to test for MATCH_THEN
5255       here because this code is not used when there is a THEN in the pattern. */
5256 
5257       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM1);
5258       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5259       Fecode = Lnext_branch;
5260       }
5261 
5262     /* Hit the start of the final branch. Continue at this level. */
5263 
5264     Fecode += PRIV(OP_lengths)[*Fecode];
5265     break;
5266 
5267 #undef Lnext_branch
5268 
5269 
5270     /* ===================================================================== */
5271     /* Handle a capturing bracket, other than those that are possessive with an
5272     unlimited repeat. */
5273 
5274     case OP_CBRA:
5275     case OP_SCBRA:
5276     Lframe_type = GF_CAPTURE | GET2(Fecode, 1+LINK_SIZE);
5277     goto GROUPLOOP;
5278 
5279 
5280     /* ===================================================================== */
5281     /* Atomic groups and non-capturing brackets that can match an empty string
5282     must record a backtracking point and also set up a chained frame. */
5283 
5284     case OP_ONCE:
5285     case OP_SCRIPT_RUN:
5286     case OP_SBRA:
5287     Lframe_type = GF_NOCAPTURE | Fop;
5288 
5289     GROUPLOOP:
5290     for (;;)
5291       {
5292       group_frame_type = Lframe_type;
5293       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM2);
5294       if (rrc == MATCH_THEN)
5295         {
5296         PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5297         if (mb->verb_ecode_ptr < next_ecode &&
5298             (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5299           rrc = MATCH_NOMATCH;
5300         }
5301       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5302       Fecode += GET(Fecode, 1);
5303       if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5304       }
5305     /* Control never reaches here. */
5306 
5307 #undef Lframe_type
5308 
5309 
5310     /* ===================================================================== */
5311     /* Recursion either matches the current regex, or some subexpression. The
5312     offset data is the offset to the starting bracket from the start of the
5313     whole pattern. (This is so that it works from duplicated subpatterns.) */
5314 
5315 #define Lframe_type F->temp_32[0]
5316 #define Lstart_branch F->temp_sptr[0]
5317 
5318     case OP_RECURSE:
5319     bracode = mb->start_code + GET(Fecode, 1);
5320     number = (bracode == mb->start_code)? 0 : GET2(bracode, 1 + LINK_SIZE);
5321 
5322     /* If we are already in a recursion, check for repeating the same one
5323     without advancing the subject pointer. This should catch convoluted mutual
5324     recursions. (Some simple cases are caught at compile time.) */
5325 
5326     if (Fcurrent_recurse != RECURSE_UNSET)
5327       {
5328       offset = Flast_group_offset;
5329       while (offset != PCRE2_UNSET)
5330         {
5331         N = (heapframe *)((char *)mb->match_frames + offset);
5332         P = (heapframe *)((char *)N - frame_size);
5333         if (N->group_frame_type == (GF_RECURSE | number))
5334           {
5335           if (Feptr == P->eptr) return PCRE2_ERROR_RECURSELOOP;
5336           break;
5337           }
5338         offset = P->last_group_offset;
5339         }
5340       }
5341 
5342     /* Now run the recursion, branch by branch. */
5343 
5344     Lstart_branch = bracode;
5345     Lframe_type = GF_RECURSE | number;
5346 
5347     for (;;)
5348       {
5349       PCRE2_SPTR next_ecode;
5350 
5351       group_frame_type = Lframe_type;
5352       RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM11);
5353       next_ecode = Lstart_branch + GET(Lstart_branch,1);
5354 
5355       /* Handle backtracking verbs, which are defined in a range that can
5356       easily be tested for. PCRE does not allow THEN, SKIP, PRUNE or COMMIT to
5357       escape beyond a recursion; they cause a NOMATCH for the entire recursion.
5358 
5359       When one of these verbs triggers, the current recursion group number is
5360       recorded. If it matches the recursion we are processing, the verb
5361       happened within the recursion and we must deal with it. Otherwise it must
5362       have happened after the recursion completed, and so has to be passed
5363       back. See comment above about handling THEN. */
5364 
5365       if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX &&
5366           mb->verb_current_recurse == (Lframe_type ^ GF_RECURSE))
5367         {
5368         if (rrc == MATCH_THEN && mb->verb_ecode_ptr < next_ecode &&
5369             (*Lstart_branch == OP_ALT || *next_ecode == OP_ALT))
5370           rrc = MATCH_NOMATCH;
5371         else RRETURN(MATCH_NOMATCH);
5372         }
5373 
5374       /* Note that carrying on after (*ACCEPT) in a recursion is handled in the
5375       OP_ACCEPT code. Nothing needs to be done here. */
5376 
5377       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5378       Lstart_branch = next_ecode;
5379       if (*Lstart_branch != OP_ALT) RRETURN(MATCH_NOMATCH);
5380       }
5381     /* Control never reaches here. */
5382 
5383 #undef Lframe_type
5384 #undef Lstart_branch
5385 
5386 
5387     /* ===================================================================== */
5388     /* Positive assertions are like other groups except that PCRE doesn't allow
5389     the effect of (*THEN) to escape beyond an assertion; it is therefore
5390     treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its
5391     captures and mark retained. Any other return is an error. */
5392 
5393 #define Lframe_type  F->temp_32[0]
5394 
5395     case OP_ASSERT:
5396     case OP_ASSERTBACK:
5397     case OP_ASSERT_NA:
5398     case OP_ASSERTBACK_NA:
5399     Lframe_type = GF_NOCAPTURE | Fop;
5400     for (;;)
5401       {
5402       group_frame_type = Lframe_type;
5403       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM3);
5404       if (rrc == MATCH_ACCEPT)
5405         {
5406         memcpy(Fovector,
5407               (char *)assert_accept_frame + offsetof(heapframe, ovector),
5408               assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5409         Foffset_top = assert_accept_frame->offset_top;
5410         Fmark = assert_accept_frame->mark;
5411         break;
5412         }
5413       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
5414       Fecode += GET(Fecode, 1);
5415       if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5416       }
5417 
5418     do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5419     Fecode += 1 + LINK_SIZE;
5420     break;
5421 
5422 #undef Lframe_type
5423 
5424 
5425     /* ===================================================================== */
5426     /* Handle negative assertions. Loop for each non-matching branch as for
5427     positive assertions. */
5428 
5429 #define Lframe_type  F->temp_32[0]
5430 
5431     case OP_ASSERT_NOT:
5432     case OP_ASSERTBACK_NOT:
5433     Lframe_type  = GF_NOCAPTURE | Fop;
5434 
5435     for (;;)
5436       {
5437       group_frame_type = Lframe_type;
5438       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM4);
5439       switch(rrc)
5440         {
5441         case MATCH_ACCEPT:   /* Assertion matched, therefore it fails. */
5442         case MATCH_MATCH:
5443         RRETURN (MATCH_NOMATCH);
5444 
5445         case MATCH_NOMATCH:  /* Branch failed, try next if present. */
5446         case MATCH_THEN:
5447         Fecode += GET(Fecode, 1);
5448         if (*Fecode != OP_ALT) goto ASSERT_NOT_FAILED;
5449         break;
5450 
5451         case MATCH_COMMIT:   /* Assertion forced to fail, therefore continue. */
5452         case MATCH_SKIP:
5453         case MATCH_PRUNE:
5454         do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5455         goto ASSERT_NOT_FAILED;
5456 
5457         default:             /* Pass back any other return */
5458         RRETURN(rrc);
5459         }
5460       }
5461 
5462     /* None of the branches have matched or there was a backtrack to (*COMMIT),
5463     (*SKIP), (*PRUNE), or (*THEN) in the last branch. This is success for a
5464     negative assertion, so carry on. */
5465 
5466     ASSERT_NOT_FAILED:
5467     Fecode += 1 + LINK_SIZE;
5468     break;
5469 
5470 #undef Lframe_type
5471 
5472 
5473     /* ===================================================================== */
5474     /* The callout item calls an external function, if one is provided, passing
5475     details of the match so far. This is mainly for debugging, though the
5476     function is able to force a failure. */
5477 
5478     case OP_CALLOUT:
5479     case OP_CALLOUT_STR:
5480     rrc = do_callout(F, mb, &length);
5481     if (rrc > 0) RRETURN(MATCH_NOMATCH);
5482     if (rrc < 0) RRETURN(rrc);
5483     Fecode += length;
5484     break;
5485 
5486 
5487     /* ===================================================================== */
5488     /* Conditional group: compilation checked that there are no more than two
5489     branches. If the condition is false, skipping the first branch takes us
5490     past the end of the item if there is only one branch, but that's exactly
5491     what we want. */
5492 
5493     case OP_COND:
5494     case OP_SCOND:
5495 
5496     /* The variable Flength will be added to Fecode when the condition is
5497     false, to get to the second branch. Setting it to the offset to the ALT or
5498     KET, then incrementing Fecode achieves this effect. However, if the second
5499     branch is non-existent, we must point to the KET so that the end of the
5500     group is correctly processed. We now have Fecode pointing to the condition
5501     or callout. */
5502 
5503     Flength = GET(Fecode, 1);    /* Offset to the second branch */
5504     if (Fecode[Flength] != OP_ALT) Flength -= 1 + LINK_SIZE;
5505     Fecode += 1 + LINK_SIZE;     /* From this opcode */
5506 
5507     /* Because of the way auto-callout works during compile, a callout item is
5508     inserted between OP_COND and an assertion condition. Such a callout can
5509     also be inserted manually. */
5510 
5511     if (*Fecode == OP_CALLOUT || *Fecode == OP_CALLOUT_STR)
5512       {
5513       rrc = do_callout(F, mb, &length);
5514       if (rrc > 0) RRETURN(MATCH_NOMATCH);
5515       if (rrc < 0) RRETURN(rrc);
5516 
5517       /* Advance Fecode past the callout, so it now points to the condition. We
5518       must adjust Flength so that the value of Fecode+Flength is unchanged. */
5519 
5520       Fecode += length;
5521       Flength -= length;
5522       }
5523 
5524     /* Test the various possible conditions */
5525 
5526     condition = FALSE;
5527     switch(*Fecode)
5528       {
5529       case OP_RREF:                  /* Group recursion test */
5530       if (Fcurrent_recurse != RECURSE_UNSET)
5531         {
5532         number = GET2(Fecode, 1);
5533         condition = (number == RREF_ANY || number == Fcurrent_recurse);
5534         }
5535       break;
5536 
5537       case OP_DNRREF:       /* Duplicate named group recursion test */
5538       if (Fcurrent_recurse != RECURSE_UNSET)
5539         {
5540         int count = GET2(Fecode, 1 + IMM2_SIZE);
5541         PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5542         while (count-- > 0)
5543           {
5544           number = GET2(slot, 0);
5545           condition = number == Fcurrent_recurse;
5546           if (condition) break;
5547           slot += mb->name_entry_size;
5548           }
5549         }
5550       break;
5551 
5552       case OP_CREF:                         /* Numbered group used test */
5553       offset = (GET2(Fecode, 1) << 1) - 2;  /* Doubled ref number */
5554       condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5555       break;
5556 
5557       case OP_DNCREF:      /* Duplicate named group used test */
5558         {
5559         int count = GET2(Fecode, 1 + IMM2_SIZE);
5560         PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5561         while (count-- > 0)
5562           {
5563           offset = (GET2(slot, 0) << 1) - 2;
5564           condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5565           if (condition) break;
5566           slot += mb->name_entry_size;
5567           }
5568         }
5569       break;
5570 
5571       case OP_FALSE:
5572       case OP_FAIL:   /* The assertion (?!) becomes OP_FAIL */
5573       break;
5574 
5575       case OP_TRUE:
5576       condition = TRUE;
5577       break;
5578 
5579       /* The condition is an assertion. Run code similar to the assertion code
5580       above. */
5581 
5582 #define Lpositive      F->temp_32[0]
5583 #define Lstart_branch  F->temp_sptr[0]
5584 
5585       default:
5586       Lpositive = (*Fecode == OP_ASSERT || *Fecode == OP_ASSERTBACK);
5587       Lstart_branch = Fecode;
5588 
5589       for (;;)
5590         {
5591         group_frame_type = GF_CONDASSERT | *Fecode;
5592         RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM5);
5593 
5594         switch(rrc)
5595           {
5596           case MATCH_ACCEPT:  /* Save captures */
5597           memcpy(Fovector,
5598                 (char *)assert_accept_frame + offsetof(heapframe, ovector),
5599                 assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5600           Foffset_top = assert_accept_frame->offset_top;
5601 
5602           /* Fall through */
5603           /* In the case of a match, the captures have already been put into
5604           the current frame. */
5605 
5606           case MATCH_MATCH:
5607           condition = Lpositive;   /* TRUE for positive assertion */
5608           break;
5609 
5610           /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
5611           assertion; it is therefore always treated as NOMATCH. */
5612 
5613           case MATCH_NOMATCH:
5614           case MATCH_THEN:
5615           Lstart_branch += GET(Lstart_branch, 1);
5616           if (*Lstart_branch == OP_ALT) continue;  /* Try next branch */
5617           condition = !Lpositive;  /* TRUE for negative assertion */
5618           break;
5619 
5620           /* These force no match without checking other branches. */
5621 
5622           case MATCH_COMMIT:
5623           case MATCH_SKIP:
5624           case MATCH_PRUNE:
5625           condition = !Lpositive;
5626           break;
5627 
5628           default:
5629           RRETURN(rrc);
5630           }
5631         break;  /* Out of the branch loop */
5632         }
5633 
5634       /* If the condition is true, find the end of the assertion so that
5635       advancing past it gets us to the start of the first branch. */
5636 
5637       if (condition)
5638         {
5639         do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5640         }
5641       break;  /* End of assertion condition */
5642       }
5643 
5644 #undef Lpositive
5645 #undef Lstart_branch
5646 
5647     /* Choose branch according to the condition. */
5648 
5649     Fecode += condition? PRIV(OP_lengths)[*Fecode] : Flength;
5650 
5651     /* If the opcode is OP_SCOND it means we are at a repeated conditional
5652     group that might match an empty string. We must therefore descend a level
5653     so that the start is remembered for checking. For OP_COND we can just
5654     continue at this level. */
5655 
5656     if (Fop == OP_SCOND)
5657       {
5658       group_frame_type  = GF_NOCAPTURE | Fop;
5659       RMATCH(Fecode, RM35);
5660       RRETURN(rrc);
5661       }
5662     break;
5663 
5664 
5665 
5666 /* ========================================================================= */
5667 /*                  End of start of parenthesis opcodes                      */
5668 /* ========================================================================= */
5669 
5670 
5671     /* ===================================================================== */
5672     /* Move the subject pointer back. This occurs only at the start of each
5673     branch of a lookbehind assertion. If we are too close to the start to move
5674     back, fail. When working with UTF-8 we move back a number of characters,
5675     not bytes. */
5676 
5677     case OP_REVERSE:
5678     number = GET(Fecode, 1);
5679 #ifdef SUPPORT_UNICODE
5680     if (utf)
5681       {
5682       while (number-- > 0)
5683         {
5684         if (Feptr <= mb->check_subject) RRETURN(MATCH_NOMATCH);
5685         Feptr--;
5686         BACKCHAR(Feptr);
5687         }
5688       }
5689     else
5690 #endif
5691 
5692     /* No UTF-8 support, or not in UTF-8 mode: count is code unit count */
5693 
5694       {
5695       if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
5696       Feptr -= number;
5697       }
5698 
5699     /* Save the earliest consulted character, then skip to next opcode */
5700 
5701     if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
5702     Fecode += 1 + LINK_SIZE;
5703     break;
5704 
5705 
5706     /* ===================================================================== */
5707     /* An alternation is the end of a branch; scan along to find the end of the
5708     bracketed group. */
5709 
5710     case OP_ALT:
5711     do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
5712     break;
5713 
5714 
5715     /* ===================================================================== */
5716     /* The end of a parenthesized group. For all but OP_BRA and OP_COND, the
5717     starting frame was added to the chained frames in order to remember the
5718     starting subject position for the group. */
5719 
5720     case OP_KET:
5721     case OP_KETRMIN:
5722     case OP_KETRMAX:
5723     case OP_KETRPOS:
5724 
5725     bracode = Fecode - GET(Fecode, 1);
5726 
5727     /* Point N to the frame at the start of the most recent group.
5728     Remember the subject pointer at the start of the group. */
5729 
5730     if (*bracode != OP_BRA && *bracode != OP_COND)
5731       {
5732       N = (heapframe *)((char *)mb->match_frames + Flast_group_offset);
5733       P = (heapframe *)((char *)N - frame_size);
5734       Flast_group_offset = P->last_group_offset;
5735 
5736 #ifdef DEBUG_SHOW_RMATCH
5737       fprintf(stderr, "++ KET for frame=%d type=%x prev char offset=%lu\n",
5738         N->rdepth, N->group_frame_type,
5739         (char *)P->eptr - (char *)mb->start_subject);
5740 #endif
5741 
5742       /* If we are at the end of an assertion that is a condition, return a
5743       match, discarding any intermediate backtracking points. Copy back the
5744       mark setting and the captures into the frame before N so that they are
5745       set on return. Doing this for all assertions, both positive and negative,
5746       seems to match what Perl does. */
5747 
5748       if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
5749         {
5750         memcpy((char *)P + offsetof(heapframe, ovector), Fovector,
5751           Foffset_top * sizeof(PCRE2_SIZE));
5752         P->offset_top = Foffset_top;
5753         P->mark = Fmark;
5754         Fback_frame = (char *)F - (char *)P;
5755         RRETURN(MATCH_MATCH);
5756         }
5757       }
5758     else P = NULL;   /* Indicates starting frame not recorded */
5759 
5760     /* The group was not a conditional assertion. */
5761 
5762     switch (*bracode)
5763       {
5764       case OP_BRA:    /* No need to do anything for these */
5765       case OP_COND:
5766       case OP_SCOND:
5767       break;
5768 
5769       /* Non-atomic positive assertions are like OP_BRA, except that the
5770       subject pointer must be put back to where it was at the start of the
5771       assertion. */
5772 
5773       case OP_ASSERT_NA:
5774       case OP_ASSERTBACK_NA:
5775       if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
5776       Feptr = P->eptr;
5777       break;
5778 
5779       /* Atomic positive assertions are like OP_ONCE, except that in addition
5780       the subject pointer must be put back to where it was at the start of the
5781       assertion. */
5782 
5783       case OP_ASSERT:
5784       case OP_ASSERTBACK:
5785       if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
5786       Feptr = P->eptr;
5787       /* Fall through */
5788 
5789       /* For an atomic group, discard internal backtracking points. We must
5790       also ensure that any remaining branches within the top-level of the group
5791       are not tried. Do this by adjusting the code pointer within the backtrack
5792       frame so that it points to the final branch. */
5793 
5794       case OP_ONCE:
5795       Fback_frame = ((char *)F - (char *)P);
5796       for (;;)
5797         {
5798         uint32_t y = GET(P->ecode,1);
5799         if ((P->ecode)[y] != OP_ALT) break;
5800         P->ecode += y;
5801         }
5802       break;
5803 
5804       /* A matching negative assertion returns MATCH, which is turned into
5805       NOMATCH at the assertion level. */
5806 
5807       case OP_ASSERT_NOT:
5808       case OP_ASSERTBACK_NOT:
5809       RRETURN(MATCH_MATCH);
5810 
5811       /* At the end of a script run, apply the script-checking rules. This code
5812       will never by exercised if Unicode support it not compiled, because in
5813       that environment script runs cause an error at compile time. */
5814 
5815       case OP_SCRIPT_RUN:
5816       if (!PRIV(script_run)(P->eptr, Feptr, utf)) RRETURN(MATCH_NOMATCH);
5817       break;
5818 
5819       /* Whole-pattern recursion is coded as a recurse into group 0, so it
5820       won't be picked up here. Instead, we catch it when the OP_END is reached.
5821       Other recursion is handled here. */
5822 
5823       case OP_CBRA:
5824       case OP_CBRAPOS:
5825       case OP_SCBRA:
5826       case OP_SCBRAPOS:
5827       number = GET2(bracode, 1+LINK_SIZE);
5828 
5829       /* Handle a recursively called group. We reinstate the previous set of
5830       captures and then carry on after the recursion call. */
5831 
5832       if (Fcurrent_recurse == number)
5833         {
5834         P = (heapframe *)((char *)N - frame_size);
5835         memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
5836           Foffset_top * sizeof(PCRE2_SIZE));
5837         Foffset_top = P->offset_top;
5838         Fcapture_last = P->capture_last;
5839         Fcurrent_recurse = P->current_recurse;
5840         Fecode = P->ecode + 1 + LINK_SIZE;
5841         continue;  /* With next opcode */
5842         }
5843 
5844       /* Deal with actual capturing. */
5845 
5846       offset = (number << 1) - 2;
5847       Fcapture_last = number;
5848       Fovector[offset] = P->eptr - mb->start_subject;
5849       Fovector[offset+1] = Feptr - mb->start_subject;
5850       if (offset >= Foffset_top) Foffset_top = offset + 2;
5851       break;
5852       }  /* End actions relating to the starting opcode */
5853 
5854     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
5855     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
5856     at a time from the outer level. This must precede the empty string test -
5857     in this case that test is done at the outer level. */
5858 
5859     if (*Fecode == OP_KETRPOS)
5860       {
5861       memcpy((char *)P + offsetof(heapframe, eptr),
5862              (char *)F + offsetof(heapframe, eptr),
5863              frame_copy_size);
5864       RRETURN(MATCH_KETRPOS);
5865       }
5866 
5867     /* Handle the different kinds of closing brackets. A non-repeating ket
5868     needs no special action, just continuing at this level. This also happens
5869     for the repeating kets if the group matched no characters, in order to
5870     forcibly break infinite loops. Otherwise, the repeating kets try the rest
5871     of the pattern or restart from the preceding bracket, in the appropriate
5872     order. */
5873 
5874     if (Fop != OP_KET && (P == NULL || Feptr != P->eptr))
5875       {
5876       if (Fop == OP_KETRMIN)
5877         {
5878         RMATCH(Fecode + 1 + LINK_SIZE, RM6);
5879         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5880         Fecode -= GET(Fecode, 1);
5881         break;   /* End of ket processing */
5882         }
5883 
5884       /* Repeat the maximum number of times (KETRMAX) */
5885 
5886       RMATCH(bracode, RM7);
5887       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5888       }
5889 
5890     /* Carry on at this level for a non-repeating ket, or after matching an
5891     empty string, or after repeating for a maximum number of times. */
5892 
5893     Fecode += 1 + LINK_SIZE;
5894     break;
5895 
5896 
5897     /* ===================================================================== */
5898     /* Start and end of line assertions, not multiline mode. */
5899 
5900     case OP_CIRC:   /* Start of line, unless PCRE2_NOTBOL is set. */
5901     if (Feptr != mb->start_subject || (mb->moptions & PCRE2_NOTBOL) != 0)
5902       RRETURN(MATCH_NOMATCH);
5903     Fecode++;
5904     break;
5905 
5906     case OP_SOD:    /* Unconditional start of subject */
5907     if (Feptr != mb->start_subject) RRETURN(MATCH_NOMATCH);
5908     Fecode++;
5909     break;
5910 
5911     /* When PCRE2_NOTEOL is unset, assert before the subject end, or a
5912     terminating newline unless PCRE2_DOLLAR_ENDONLY is set. */
5913 
5914     case OP_DOLL:
5915     if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
5916     if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
5917 
5918     /* Fall through */
5919     /* Unconditional end of subject assertion (\z) */
5920 
5921     case OP_EOD:
5922     if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
5923     if (mb->partial != 0)
5924       {
5925       mb->hitend = TRUE;
5926       if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5927       }
5928     Fecode++;
5929     break;
5930 
5931     /* End of subject or ending \n assertion (\Z) */
5932 
5933     case OP_EODN:
5934     ASSERT_NL_OR_EOS:
5935     if (Feptr < mb->end_subject &&
5936         (!IS_NEWLINE(Feptr) || Feptr != mb->end_subject - mb->nllen))
5937       {
5938       if (mb->partial != 0 &&
5939           Feptr + 1 >= mb->end_subject &&
5940           NLBLOCK->nltype == NLTYPE_FIXED &&
5941           NLBLOCK->nllen == 2 &&
5942           UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
5943         {
5944         mb->hitend = TRUE;
5945         if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5946         }
5947       RRETURN(MATCH_NOMATCH);
5948       }
5949 
5950     /* Either at end of string or \n before end. */
5951 
5952     if (mb->partial != 0)
5953       {
5954       mb->hitend = TRUE;
5955       if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5956       }
5957     Fecode++;
5958     break;
5959 
5960 
5961     /* ===================================================================== */
5962     /* Start and end of line assertions, multiline mode. */
5963 
5964     /* Start of subject unless notbol, or after any newline except for one at
5965     the very end, unless PCRE2_ALT_CIRCUMFLEX is set. */
5966 
5967     case OP_CIRCM:
5968     if ((mb->moptions & PCRE2_NOTBOL) != 0 && Feptr == mb->start_subject)
5969       RRETURN(MATCH_NOMATCH);
5970     if (Feptr != mb->start_subject &&
5971         ((Feptr == mb->end_subject &&
5972            (mb->poptions & PCRE2_ALT_CIRCUMFLEX) == 0) ||
5973          !WAS_NEWLINE(Feptr)))
5974       RRETURN(MATCH_NOMATCH);
5975     Fecode++;
5976     break;
5977 
5978     /* Assert before any newline, or before end of subject unless noteol is
5979     set. */
5980 
5981     case OP_DOLLM:
5982     if (Feptr < mb->end_subject)
5983       {
5984       if (!IS_NEWLINE(Feptr))
5985         {
5986         if (mb->partial != 0 &&
5987             Feptr + 1 >= mb->end_subject &&
5988             NLBLOCK->nltype == NLTYPE_FIXED &&
5989             NLBLOCK->nllen == 2 &&
5990             UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
5991           {
5992           mb->hitend = TRUE;
5993           if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5994           }
5995         RRETURN(MATCH_NOMATCH);
5996         }
5997       }
5998     else
5999       {
6000       if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
6001       SCHECK_PARTIAL();
6002       }
6003     Fecode++;
6004     break;
6005 
6006 
6007     /* ===================================================================== */
6008     /* Start of match assertion */
6009 
6010     case OP_SOM:
6011     if (Feptr != mb->start_subject + mb->start_offset) RRETURN(MATCH_NOMATCH);
6012     Fecode++;
6013     break;
6014 
6015 
6016     /* ===================================================================== */
6017     /* Reset the start of match point */
6018 
6019     case OP_SET_SOM:
6020     Fstart_match = Feptr;
6021     Fecode++;
6022     break;
6023 
6024 
6025     /* ===================================================================== */
6026     /* Word boundary assertions. Find out if the previous and current
6027     characters are "word" characters. It takes a bit more work in UTF mode.
6028     Characters > 255 are assumed to be "non-word" characters when PCRE2_UCP is
6029     not set. When it is set, use Unicode properties if available, even when not
6030     in UTF mode. Remember the earliest and latest consulted characters. */
6031 
6032     case OP_NOT_WORD_BOUNDARY:
6033     case OP_WORD_BOUNDARY:
6034     if (Feptr == mb->check_subject) prev_is_word = FALSE; else
6035       {
6036       PCRE2_SPTR lastptr = Feptr - 1;
6037 #ifdef SUPPORT_UNICODE
6038       if (utf)
6039         {
6040         BACKCHAR(lastptr);
6041         GETCHAR(fc, lastptr);
6042         }
6043       else
6044 #endif  /* SUPPORT_UNICODE */
6045       fc = *lastptr;
6046       if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr;
6047 #ifdef SUPPORT_UNICODE
6048       if ((mb->poptions & PCRE2_UCP) != 0)
6049         {
6050         if (fc == '_') prev_is_word = TRUE; else
6051           {
6052           int cat = UCD_CATEGORY(fc);
6053           prev_is_word = (cat == ucp_L || cat == ucp_N);
6054           }
6055         }
6056       else
6057 #endif  /* SUPPORT_UNICODE */
6058       prev_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
6059       }
6060 
6061     /* Get status of next character */
6062 
6063     if (Feptr >= mb->end_subject)
6064       {
6065       SCHECK_PARTIAL();
6066       cur_is_word = FALSE;
6067       }
6068     else
6069       {
6070       PCRE2_SPTR nextptr = Feptr + 1;
6071 #ifdef SUPPORT_UNICODE
6072       if (utf)
6073         {
6074         FORWARDCHARTEST(nextptr, mb->end_subject);
6075         GETCHAR(fc, Feptr);
6076         }
6077       else
6078 #endif  /* SUPPORT_UNICODE */
6079       fc = *Feptr;
6080       if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
6081 #ifdef SUPPORT_UNICODE
6082       if ((mb->poptions & PCRE2_UCP) != 0)
6083         {
6084         if (fc == '_') cur_is_word = TRUE; else
6085           {
6086           int cat = UCD_CATEGORY(fc);
6087           cur_is_word = (cat == ucp_L || cat == ucp_N);
6088           }
6089         }
6090       else
6091 #endif  /* SUPPORT_UNICODE */
6092       cur_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
6093       }
6094 
6095     /* Now see if the situation is what we want */
6096 
6097     if ((*Fecode++ == OP_WORD_BOUNDARY)?
6098          cur_is_word == prev_is_word : cur_is_word != prev_is_word)
6099       RRETURN(MATCH_NOMATCH);
6100     break;
6101 
6102 
6103     /* ===================================================================== */
6104     /* Backtracking (*VERB)s, with and without arguments. Note that if the
6105     pattern is successfully matched, we do not come back from RMATCH. */
6106 
6107     case OP_MARK:
6108     Fmark = mb->nomatch_mark = Fecode + 2;
6109     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM12);
6110 
6111     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
6112     argument, and we must check whether that argument matches this MARK's
6113     argument. It is passed back in mb->verb_skip_ptr. If it does match, we
6114     return MATCH_SKIP with mb->verb_skip_ptr now pointing to the subject
6115     position that corresponds to this mark. Otherwise, pass back the return
6116     code unaltered. */
6117 
6118     if (rrc == MATCH_SKIP_ARG &&
6119              PRIV(strcmp)(Fecode + 2, mb->verb_skip_ptr) == 0)
6120       {
6121       mb->verb_skip_ptr = Feptr;   /* Pass back current position */
6122       RRETURN(MATCH_SKIP);
6123       }
6124     RRETURN(rrc);
6125 
6126     case OP_FAIL:
6127     RRETURN(MATCH_NOMATCH);
6128 
6129     /* Record the current recursing group number in mb->verb_current_recurse
6130     when a backtracking return such as MATCH_COMMIT is given. This enables the
6131     recurse processing to catch verbs from within the recursion. */
6132 
6133     case OP_COMMIT:
6134     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM13);
6135     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6136     mb->verb_current_recurse = Fcurrent_recurse;
6137     RRETURN(MATCH_COMMIT);
6138 
6139     case OP_COMMIT_ARG:
6140     Fmark = mb->nomatch_mark = Fecode + 2;
6141     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36);
6142     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6143     mb->verb_current_recurse = Fcurrent_recurse;
6144     RRETURN(MATCH_COMMIT);
6145 
6146     case OP_PRUNE:
6147     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14);
6148     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6149     mb->verb_current_recurse = Fcurrent_recurse;
6150     RRETURN(MATCH_PRUNE);
6151 
6152     case OP_PRUNE_ARG:
6153     Fmark = mb->nomatch_mark = Fecode + 2;
6154     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM15);
6155     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6156     mb->verb_current_recurse = Fcurrent_recurse;
6157     RRETURN(MATCH_PRUNE);
6158 
6159     case OP_SKIP:
6160     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM16);
6161     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6162     mb->verb_skip_ptr = Feptr;   /* Pass back current position */
6163     mb->verb_current_recurse = Fcurrent_recurse;
6164     RRETURN(MATCH_SKIP);
6165 
6166     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
6167     nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
6168     not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
6169     that failed and any that precede it (either they also failed, or were not
6170     triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
6171     SKIP_ARG gets to top level, the match is re-run with mb->ignore_skip_arg
6172     set to the count of the one that failed. */
6173 
6174     case OP_SKIP_ARG:
6175     mb->skip_arg_count++;
6176     if (mb->skip_arg_count <= mb->ignore_skip_arg)
6177       {
6178       Fecode += PRIV(OP_lengths)[*Fecode] + Fecode[1];
6179       break;
6180       }
6181     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM17);
6182     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6183 
6184     /* Pass back the current skip name and return the special MATCH_SKIP_ARG
6185     return code. This will either be caught by a matching MARK, or get to the
6186     top, where it causes a rematch with mb->ignore_skip_arg set to the value of
6187     mb->skip_arg_count. */
6188 
6189     mb->verb_skip_ptr = Fecode + 2;
6190     mb->verb_current_recurse = Fcurrent_recurse;
6191     RRETURN(MATCH_SKIP_ARG);
6192 
6193     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
6194     the branch in which it occurs can be determined. */
6195 
6196     case OP_THEN:
6197     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM18);
6198     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6199     mb->verb_ecode_ptr = Fecode;
6200     mb->verb_current_recurse = Fcurrent_recurse;
6201     RRETURN(MATCH_THEN);
6202 
6203     case OP_THEN_ARG:
6204     Fmark = mb->nomatch_mark = Fecode + 2;
6205     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM19);
6206     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6207     mb->verb_ecode_ptr = Fecode;
6208     mb->verb_current_recurse = Fcurrent_recurse;
6209     RRETURN(MATCH_THEN);
6210 
6211 
6212     /* ===================================================================== */
6213     /* There's been some horrible disaster. Arrival here can only mean there is
6214     something seriously wrong in the code above or the OP_xxx definitions. */
6215 
6216     default:
6217     return PCRE2_ERROR_INTERNAL;
6218     }
6219 
6220   /* Do not insert any code in here without much thought; it is assumed
6221   that "continue" in the code above comes out to here to repeat the main
6222   loop. */
6223 
6224   }  /* End of main loop */
6225 /* Control never reaches here */
6226 
6227 
6228 /* ========================================================================= */
6229 /* The RRETURN() macro jumps here. The number that is saved in Freturn_id
6230 indicates which label we actually want to return to. The value in Frdepth is
6231 the index number of the frame in the vector. The return value has been placed
6232 in rrc. */
6233 
6234 #define LBL(val) case val: goto L_RM##val;
6235 
6236 RETURN_SWITCH:
6237 if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6238 if (Frdepth == 0) return rrc;                     /* Exit from the top level */
6239 F = (heapframe *)((char *)F - Fback_frame);       /* Backtrack */
6240 mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
6241 
6242 #ifdef DEBUG_SHOW_RMATCH
6243 fprintf(stderr, "++ RETURN %d to %d\n", rrc, Freturn_id);
6244 #endif
6245 
6246 switch (Freturn_id)
6247   {
6248   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6249   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
6250   LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
6251   LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
6252   LBL(33) LBL(34) LBL(35) LBL(36)
6253 
6254 #ifdef SUPPORT_WIDE_CHARS
6255   LBL(100) LBL(101)
6256 #endif
6257 
6258 #ifdef SUPPORT_UNICODE
6259   LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206)
6260   LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213)
6261   LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220)
6262   LBL(221) LBL(222) LBL(223) LBL(224) LBL(225)
6263 #endif
6264 
6265   default:
6266   return PCRE2_ERROR_INTERNAL;
6267   }
6268 #undef LBL
6269 }
6270 
6271 
6272 /*************************************************
6273 *           Match a Regular Expression           *
6274 *************************************************/
6275 
6276 /* This function applies a compiled pattern to a subject string and picks out
6277 portions of the string if it matches. Two elements in the vector are set for
6278 each substring: the offsets to the start and end of the substring.
6279 
6280 Arguments:
6281   code            points to the compiled expression
6282   subject         points to the subject string
6283   length          length of subject string (may contain binary zeros)
6284   start_offset    where to start in the subject string
6285   options         option bits
6286   match_data      points to a match_data block
6287   mcontext        points a PCRE2 context
6288 
6289 Returns:          > 0 => success; value is the number of ovector pairs filled
6290                   = 0 => success, but ovector is not big enough
6291                   = -1 => failed to match (PCRE2_ERROR_NOMATCH)
6292                   = -2 => partial match (PCRE2_ERROR_PARTIAL)
6293                   < -2 => some kind of unexpected problem
6294 */
6295 
6296 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_match(const pcre2_code * code,PCRE2_SPTR subject,PCRE2_SIZE length,PCRE2_SIZE start_offset,uint32_t options,pcre2_match_data * match_data,pcre2_match_context * mcontext)6297 pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
6298   PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
6299   pcre2_match_context *mcontext)
6300 {
6301 int rc;
6302 int was_zero_terminated = 0;
6303 const uint8_t *start_bits = NULL;
6304 const pcre2_real_code *re = (const pcre2_real_code *)code;
6305 
6306 BOOL anchored;
6307 BOOL firstline;
6308 BOOL has_first_cu = FALSE;
6309 BOOL has_req_cu = FALSE;
6310 BOOL startline;
6311 
6312 #if PCRE2_CODE_UNIT_WIDTH == 8
6313 PCRE2_SPTR memchr_found_first_cu;
6314 PCRE2_SPTR memchr_found_first_cu2;
6315 #endif
6316 
6317 PCRE2_UCHAR first_cu = 0;
6318 PCRE2_UCHAR first_cu2 = 0;
6319 PCRE2_UCHAR req_cu = 0;
6320 PCRE2_UCHAR req_cu2 = 0;
6321 
6322 PCRE2_SPTR bumpalong_limit;
6323 PCRE2_SPTR end_subject;
6324 PCRE2_SPTR true_end_subject;
6325 PCRE2_SPTR start_match;
6326 PCRE2_SPTR req_cu_ptr;
6327 PCRE2_SPTR start_partial;
6328 PCRE2_SPTR match_partial;
6329 
6330 #ifdef SUPPORT_JIT
6331 BOOL use_jit;
6332 #endif
6333 
6334 /* This flag is needed even when Unicode is not supported for convenience
6335 (it is used by the IS_NEWLINE macro). */
6336 
6337 BOOL utf = FALSE;
6338 
6339 #ifdef SUPPORT_UNICODE
6340 BOOL ucp = FALSE;
6341 BOOL allow_invalid;
6342 uint32_t fragment_options = 0;
6343 #ifdef SUPPORT_JIT
6344 BOOL jit_checked_utf = FALSE;
6345 #endif
6346 #endif  /* SUPPORT_UNICODE */
6347 
6348 PCRE2_SIZE frame_size;
6349 
6350 /* We need to have mb as a pointer to a match block, because the IS_NEWLINE
6351 macro is used below, and it expects NLBLOCK to be defined as a pointer. */
6352 
6353 pcre2_callout_block cb;
6354 match_block actual_match_block;
6355 match_block *mb = &actual_match_block;
6356 
6357 /* Allocate an initial vector of backtracking frames on the stack. If this
6358 proves to be too small, it is replaced by a larger one on the heap. To get a
6359 vector of the size required that is aligned for pointers, allocate it as a
6360 vector of pointers. */
6361 
6362 PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)]
6363     PCRE2_KEEP_UNINITIALIZED;
6364 mb->stack_frames = (heapframe *)stack_frames_vector;
6365 
6366 /* Recognize NULL, length 0 as an empty string. */
6367 
6368 if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
6369 
6370 /* Plausibility checks */
6371 
6372 if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
6373 if (code == NULL || subject == NULL || match_data == NULL)
6374   return PCRE2_ERROR_NULL;
6375 
6376 start_match = subject + start_offset;
6377 req_cu_ptr = start_match - 1;
6378 if (length == PCRE2_ZERO_TERMINATED)
6379   {
6380   length = PRIV(strlen)(subject);
6381   was_zero_terminated = 1;
6382   }
6383 true_end_subject = end_subject = subject + length;
6384 
6385 if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
6386 
6387 /* Check that the first field in the block is the magic number. */
6388 
6389 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
6390 
6391 /* Check the code unit width. */
6392 
6393 if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
6394   return PCRE2_ERROR_BADMODE;
6395 
6396 /* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
6397 options variable for this function. Users of PCRE2 who are not calling the
6398 function directly would like to have a way of setting these flags, in the same
6399 way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
6400 constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
6401 (*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which we now
6402 transfer to the options for this function. The bits are guaranteed to be
6403 adjacent, but do not have the same values. This bit of Boolean trickery assumes
6404 that the match-time bits are not more significant than the flag bits. If by
6405 accident this is not the case, a compile-time division by zero error will
6406 occur. */
6407 
6408 #define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
6409 #define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
6410 options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
6411 #undef FF
6412 #undef OO
6413 
6414 /* If the pattern was successfully studied with JIT support, we will run the
6415 JIT executable instead of the rest of this function. Most options must be set
6416 at compile time for the JIT code to be usable. */
6417 
6418 #ifdef SUPPORT_JIT
6419 use_jit = (re->executable_jit != NULL &&
6420           (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0);
6421 #endif
6422 
6423 /* Initialize UTF/UCP parameters. */
6424 
6425 #ifdef SUPPORT_UNICODE
6426 utf = (re->overall_options & PCRE2_UTF) != 0;
6427 allow_invalid = (re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0;
6428 ucp = (re->overall_options & PCRE2_UCP) != 0;
6429 #endif  /* SUPPORT_UNICODE */
6430 
6431 /* Convert the partial matching flags into an integer. */
6432 
6433 mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
6434               ((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
6435 
6436 /* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
6437 time. */
6438 
6439 if (mb->partial != 0 &&
6440    ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
6441   return PCRE2_ERROR_BADOPTION;
6442 
6443 /* It is an error to set an offset limit without setting the flag at compile
6444 time. */
6445 
6446 if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
6447      (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
6448   return PCRE2_ERROR_BADOFFSETLIMIT;
6449 
6450 /* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
6451 free the memory that was obtained. Set the field to NULL for no match cases. */
6452 
6453 if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
6454   {
6455   match_data->memctl.free((void *)match_data->subject,
6456     match_data->memctl.memory_data);
6457   match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
6458   }
6459 match_data->subject = NULL;
6460 
6461 /* Zero the error offset in case the first code unit is invalid UTF. */
6462 
6463 match_data->startchar = 0;
6464 
6465 
6466 /* ============================= JIT matching ============================== */
6467 
6468 /* Prepare for JIT matching. Check a UTF string for validity unless no check is
6469 requested or invalid UTF can be handled. We check only the portion of the
6470 subject that might be be inspected during matching - from the offset minus the
6471 maximum lookbehind to the given length. This saves time when a small part of a
6472 large subject is being matched by the use of a starting offset. Note that the
6473 maximum lookbehind is a number of characters, not code units. */
6474 
6475 #ifdef SUPPORT_JIT
6476 if (use_jit)
6477   {
6478 #ifdef SUPPORT_UNICODE
6479   if (utf && (options & PCRE2_NO_UTF_CHECK) == 0 && !allow_invalid)
6480     {
6481 #if PCRE2_CODE_UNIT_WIDTH != 32
6482     unsigned int i;
6483 #endif
6484 
6485     /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
6486     character start. */
6487 
6488 #if PCRE2_CODE_UNIT_WIDTH != 32
6489     if (start_match < end_subject && NOT_FIRSTCU(*start_match))
6490       {
6491       if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
6492 #if PCRE2_CODE_UNIT_WIDTH == 8
6493       return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
6494 #else
6495       return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
6496 #endif
6497       }
6498 #endif  /* WIDTH != 32 */
6499 
6500     /* Move back by the maximum lookbehind, just in case it happens at the very
6501     start of matching. */
6502 
6503 #if PCRE2_CODE_UNIT_WIDTH != 32
6504     for (i = re->max_lookbehind; i > 0 && start_match > subject; i--)
6505       {
6506       start_match--;
6507       while (start_match > subject &&
6508 #if PCRE2_CODE_UNIT_WIDTH == 8
6509       (*start_match & 0xc0) == 0x80)
6510 #else  /* 16-bit */
6511       (*start_match & 0xfc00) == 0xdc00)
6512 #endif
6513         start_match--;
6514       }
6515 #else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6516 
6517     /* In the 32-bit library, one code unit equals one character. However,
6518     we cannot just subtract the lookbehind and then compare pointers, because
6519     a very large lookbehind could create an invalid pointer. */
6520 
6521     if (start_offset >= re->max_lookbehind)
6522       start_match -= re->max_lookbehind;
6523     else
6524       start_match = subject;
6525 #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6526 
6527     /* Validate the relevant portion of the subject. Adjust the offset of an
6528     invalid code point to be an absolute offset in the whole string. */
6529 
6530     match_data->rc = PRIV(valid_utf)(start_match,
6531       length - (start_match - subject), &(match_data->startchar));
6532     if (match_data->rc != 0)
6533       {
6534       match_data->startchar += start_match - subject;
6535       return match_data->rc;
6536       }
6537     jit_checked_utf = TRUE;
6538     }
6539 #endif  /* SUPPORT_UNICODE */
6540 
6541   /* If JIT returns BADOPTION, which means that the selected complete or
6542   partial matching mode was not compiled, fall through to the interpreter. */
6543 
6544   rc = pcre2_jit_match(code, subject, length, start_offset, options,
6545     match_data, mcontext);
6546   if (rc != PCRE2_ERROR_JIT_BADOPTION)
6547     {
6548     if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
6549       {
6550       length = CU2BYTES(length + was_zero_terminated);
6551       match_data->subject = match_data->memctl.malloc(length,
6552         match_data->memctl.memory_data);
6553       if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
6554       memcpy((void *)match_data->subject, subject, length);
6555       match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
6556       }
6557     return rc;
6558     }
6559   }
6560 #endif  /* SUPPORT_JIT */
6561 
6562 /* ========================= End of JIT matching ========================== */
6563 
6564 
6565 /* Proceed with non-JIT matching. The default is to allow lookbehinds to the
6566 start of the subject. A UTF check when there is a non-zero offset may change
6567 this. */
6568 
6569 mb->check_subject = subject;
6570 
6571 /* If a UTF subject string was not checked for validity in the JIT code above,
6572 check it here, and handle support for invalid UTF strings. The check above
6573 happens only when invalid UTF is not supported and PCRE2_NO_CHECK_UTF is unset.
6574 If we get here in those circumstances, it means the subject string is valid,
6575 but for some reason JIT matching was not successful. There is no need to check
6576 the subject again.
6577 
6578 We check only the portion of the subject that might be be inspected during
6579 matching - from the offset minus the maximum lookbehind to the given length.
6580 This saves time when a small part of a large subject is being matched by the
6581 use of a starting offset. Note that the maximum lookbehind is a number of
6582 characters, not code units.
6583 
6584 Note also that support for invalid UTF forces a check, overriding the setting
6585 of PCRE2_NO_CHECK_UTF. */
6586 
6587 #ifdef SUPPORT_UNICODE
6588 if (utf &&
6589 #ifdef SUPPORT_JIT
6590     !jit_checked_utf &&
6591 #endif
6592     ((options & PCRE2_NO_UTF_CHECK) == 0 || allow_invalid))
6593   {
6594 #if PCRE2_CODE_UNIT_WIDTH != 32
6595   BOOL skipped_bad_start = FALSE;
6596 #endif
6597 
6598   /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
6599   character start. If we are handling invalid UTF, just skip over such code
6600   units. Otherwise, give an appropriate error. */
6601 
6602 #if PCRE2_CODE_UNIT_WIDTH != 32
6603   if (allow_invalid)
6604     {
6605     while (start_match < end_subject && NOT_FIRSTCU(*start_match))
6606       {
6607       start_match++;
6608       skipped_bad_start = TRUE;
6609       }
6610     }
6611   else if (start_match < end_subject && NOT_FIRSTCU(*start_match))
6612     {
6613     if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
6614 #if PCRE2_CODE_UNIT_WIDTH == 8
6615     return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
6616 #else
6617     return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
6618 #endif
6619     }
6620 #endif  /* WIDTH != 32 */
6621 
6622   /* The mb->check_subject field points to the start of UTF checking;
6623   lookbehinds can go back no further than this. */
6624 
6625   mb->check_subject = start_match;
6626 
6627   /* Move back by the maximum lookbehind, just in case it happens at the very
6628   start of matching, but don't do this if we skipped bad 8-bit or 16-bit code
6629   units above. */
6630 
6631 #if PCRE2_CODE_UNIT_WIDTH != 32
6632   if (!skipped_bad_start)
6633     {
6634     unsigned int i;
6635     for (i = re->max_lookbehind; i > 0 && mb->check_subject > subject; i--)
6636       {
6637       mb->check_subject--;
6638       while (mb->check_subject > subject &&
6639 #if PCRE2_CODE_UNIT_WIDTH == 8
6640       (*mb->check_subject & 0xc0) == 0x80)
6641 #else  /* 16-bit */
6642       (*mb->check_subject & 0xfc00) == 0xdc00)
6643 #endif
6644         mb->check_subject--;
6645       }
6646     }
6647 #else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6648 
6649   /* In the 32-bit library, one code unit equals one character. However,
6650   we cannot just subtract the lookbehind and then compare pointers, because
6651   a very large lookbehind could create an invalid pointer. */
6652 
6653   if (start_offset >= re->max_lookbehind)
6654     mb->check_subject -= re->max_lookbehind;
6655   else
6656     mb->check_subject = subject;
6657 #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6658 
6659   /* Validate the relevant portion of the subject. There's a loop in case we
6660   encounter bad UTF in the characters preceding start_match which we are
6661   scanning because of a lookbehind. */
6662 
6663   for (;;)
6664     {
6665     match_data->rc = PRIV(valid_utf)(mb->check_subject,
6666       length - (mb->check_subject - subject), &(match_data->startchar));
6667 
6668     if (match_data->rc == 0) break;   /* Valid UTF string */
6669 
6670     /* Invalid UTF string. Adjust the offset to be an absolute offset in the
6671     whole string. If we are handling invalid UTF strings, set end_subject to
6672     stop before the bad code unit, and set the options to "not end of line".
6673     Otherwise return the error. */
6674 
6675     match_data->startchar += mb->check_subject - subject;
6676     if (!allow_invalid || match_data->rc > 0) return match_data->rc;
6677     end_subject = subject + match_data->startchar;
6678 
6679     /* If the end precedes start_match, it means there is invalid UTF in the
6680     extra code units we reversed over because of a lookbehind. Advance past the
6681     first bad code unit, and then skip invalid character starting code units in
6682     8-bit and 16-bit modes, and try again with the original end point. */
6683 
6684     if (end_subject < start_match)
6685       {
6686       mb->check_subject = end_subject + 1;
6687 #if PCRE2_CODE_UNIT_WIDTH != 32
6688       while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject))
6689         mb->check_subject++;
6690 #endif
6691       end_subject = true_end_subject;
6692       }
6693 
6694     /* Otherwise, set the not end of line option, and do the match. */
6695 
6696     else
6697       {
6698       fragment_options = PCRE2_NOTEOL;
6699       break;
6700       }
6701     }
6702   }
6703 #endif  /* SUPPORT_UNICODE */
6704 
6705 /* A NULL match context means "use a default context", but we take the memory
6706 control functions from the pattern. */
6707 
6708 if (mcontext == NULL)
6709   {
6710   mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
6711   mb->memctl = re->memctl;
6712   }
6713 else mb->memctl = mcontext->memctl;
6714 
6715 anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
6716 firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
6717 startline = (re->flags & PCRE2_STARTLINE) != 0;
6718 bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
6719   true_end_subject : subject + mcontext->offset_limit;
6720 
6721 /* Initialize and set up the fixed fields in the callout block, with a pointer
6722 in the match block. */
6723 
6724 mb->cb = &cb;
6725 cb.version = 2;
6726 cb.subject = subject;
6727 cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
6728 cb.callout_flags = 0;
6729 
6730 /* Fill in the remaining fields in the match block, except for moptions, which
6731 gets set later. */
6732 
6733 mb->callout = mcontext->callout;
6734 mb->callout_data = mcontext->callout_data;
6735 
6736 mb->start_subject = subject;
6737 mb->start_offset = start_offset;
6738 mb->end_subject = end_subject;
6739 mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
6740 mb->allowemptypartial = (re->max_lookbehind > 0) ||
6741     (re->flags & PCRE2_MATCH_EMPTY) != 0;
6742 mb->poptions = re->overall_options;          /* Pattern options */
6743 mb->ignore_skip_arg = 0;
6744 mb->mark = mb->nomatch_mark = NULL;          /* In case never set */
6745 
6746 /* The name table is needed for finding all the numbers associated with a
6747 given name, for condition testing. The code follows the name table. */
6748 
6749 mb->name_table = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code));
6750 mb->name_count = re->name_count;
6751 mb->name_entry_size = re->name_entry_size;
6752 mb->start_code = mb->name_table + re->name_count * re->name_entry_size;
6753 
6754 /* Process the \R and newline settings. */
6755 
6756 mb->bsr_convention = re->bsr_convention;
6757 mb->nltype = NLTYPE_FIXED;
6758 switch(re->newline_convention)
6759   {
6760   case PCRE2_NEWLINE_CR:
6761   mb->nllen = 1;
6762   mb->nl[0] = CHAR_CR;
6763   break;
6764 
6765   case PCRE2_NEWLINE_LF:
6766   mb->nllen = 1;
6767   mb->nl[0] = CHAR_NL;
6768   break;
6769 
6770   case PCRE2_NEWLINE_NUL:
6771   mb->nllen = 1;
6772   mb->nl[0] = CHAR_NUL;
6773   break;
6774 
6775   case PCRE2_NEWLINE_CRLF:
6776   mb->nllen = 2;
6777   mb->nl[0] = CHAR_CR;
6778   mb->nl[1] = CHAR_NL;
6779   break;
6780 
6781   case PCRE2_NEWLINE_ANY:
6782   mb->nltype = NLTYPE_ANY;
6783   break;
6784 
6785   case PCRE2_NEWLINE_ANYCRLF:
6786   mb->nltype = NLTYPE_ANYCRLF;
6787   break;
6788 
6789   default: return PCRE2_ERROR_INTERNAL;
6790   }
6791 
6792 /* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
6793 vector at the end, whose size depends on the number of capturing parentheses in
6794 the pattern. It is not used at all if there are no capturing parentheses.
6795 
6796   frame_size             is the total size of each frame
6797   mb->frame_vector_size  is the total usable size of the vector (rounded down
6798                            to a whole number of frames)
6799 
6800 The last of these is changed within the match() function if the frame vector
6801 has to be expanded. We therefore put it into the match block so that it is
6802 correct when calling match() more than once for non-anchored patterns.
6803 
6804 We must also pad frame_size for alignment to ensure subsequent frames are as
6805 aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE
6806 array, that does not guarantee it is suitably aligned for pointers, as some
6807 architectures have pointers that are larger than a size_t. */
6808 
6809 frame_size = (offsetof(heapframe, ovector) +
6810   re->top_bracket * 2 * sizeof(PCRE2_SIZE) + HEAPFRAME_ALIGNMENT - 1) &
6811   ~(HEAPFRAME_ALIGNMENT - 1);
6812 
6813 /* Limits set in the pattern override the match context only if they are
6814 smaller. */
6815 
6816 mb->heap_limit = (mcontext->heap_limit < re->limit_heap)?
6817   mcontext->heap_limit : re->limit_heap;
6818 
6819 mb->match_limit = (mcontext->match_limit < re->limit_match)?
6820   mcontext->match_limit : re->limit_match;
6821 
6822 mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
6823   mcontext->depth_limit : re->limit_depth;
6824 
6825 /* If a pattern has very many capturing parentheses, the frame size may be very
6826 large. Ensure that there are at least 10 available frames by getting an initial
6827 vector on the heap if necessary, except when the heap limit prevents this. Get
6828 fewer if possible. (The heap limit is in kibibytes.) */
6829 
6830 if (frame_size <= START_FRAMES_SIZE/10)
6831   {
6832   mb->match_frames = mb->stack_frames;   /* Initial frame vector on the stack */
6833   mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size);
6834   }
6835 else
6836   {
6837   mb->frame_vector_size = frame_size * 10;
6838   if ((mb->frame_vector_size / 1024) > mb->heap_limit)
6839     {
6840     if (frame_size > mb->heap_limit * 1024) return PCRE2_ERROR_HEAPLIMIT;
6841     mb->frame_vector_size = ((mb->heap_limit * 1024)/frame_size) * frame_size;
6842     }
6843   mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
6844     mb->memctl.memory_data);
6845   if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
6846   }
6847 
6848 mb->match_frames_top =
6849   (heapframe *)((char *)mb->match_frames + mb->frame_vector_size);
6850 
6851 /* Write to the ovector within the first frame to mark every capture unset and
6852 to avoid uninitialized memory read errors when it is copied to a new frame. */
6853 
6854 memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
6855   frame_size - offsetof(heapframe, ovector));
6856 
6857 /* Pointers to the individual character tables */
6858 
6859 mb->lcc = re->tables + lcc_offset;
6860 mb->fcc = re->tables + fcc_offset;
6861 mb->ctypes = re->tables + ctypes_offset;
6862 
6863 /* Set up the first code unit to match, if available. If there's no first code
6864 unit there may be a bitmap of possible first characters. */
6865 
6866 if ((re->flags & PCRE2_FIRSTSET) != 0)
6867   {
6868   has_first_cu = TRUE;
6869   first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
6870   if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
6871     {
6872     first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
6873 #ifdef SUPPORT_UNICODE
6874 #if PCRE2_CODE_UNIT_WIDTH == 8
6875     if (first_cu > 127 && ucp && !utf) first_cu2 = UCD_OTHERCASE(first_cu);
6876 #else
6877     if (first_cu > 127 && (utf || ucp)) first_cu2 = UCD_OTHERCASE(first_cu);
6878 #endif
6879 #endif  /* SUPPORT_UNICODE */
6880     }
6881   }
6882 else
6883   if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
6884     start_bits = re->start_bitmap;
6885 
6886 /* There may also be a "last known required character" set. */
6887 
6888 if ((re->flags & PCRE2_LASTSET) != 0)
6889   {
6890   has_req_cu = TRUE;
6891   req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
6892   if ((re->flags & PCRE2_LASTCASELESS) != 0)
6893     {
6894     req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
6895 #ifdef SUPPORT_UNICODE
6896 #if PCRE2_CODE_UNIT_WIDTH == 8
6897     if (req_cu > 127 && ucp && !utf) req_cu2 = UCD_OTHERCASE(req_cu);
6898 #else
6899     if (req_cu > 127 && (utf || ucp)) req_cu2 = UCD_OTHERCASE(req_cu);
6900 #endif
6901 #endif  /* SUPPORT_UNICODE */
6902     }
6903   }
6904 
6905 
6906 /* ==========================================================================*/
6907 
6908 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
6909 the loop runs just once. */
6910 
6911 #ifdef SUPPORT_UNICODE
6912 FRAGMENT_RESTART:
6913 #endif
6914 
6915 start_partial = match_partial = NULL;
6916 mb->hitend = FALSE;
6917 
6918 #if PCRE2_CODE_UNIT_WIDTH == 8
6919 memchr_found_first_cu = NULL;
6920 memchr_found_first_cu2 = NULL;
6921 #endif
6922 
6923 for(;;)
6924   {
6925   PCRE2_SPTR new_start_match;
6926 
6927   /* ----------------- Start of match optimizations ---------------- */
6928 
6929   /* There are some optimizations that avoid running the match if a known
6930   starting point is not found, or if a known later code unit is not present.
6931   However, there is an option (settable at compile time) that disables these,
6932   for testing and for ensuring that all callouts do actually occur. */
6933 
6934   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
6935     {
6936     /* If firstline is TRUE, the start of the match is constrained to the first
6937     line of a multiline string. That is, the match must be before or at the
6938     first newline following the start of matching. Temporarily adjust
6939     end_subject so that we stop the scans for a first code unit at a newline.
6940     If the match fails at the newline, later code breaks the loop. */
6941 
6942     if (firstline)
6943       {
6944       PCRE2_SPTR t = start_match;
6945 #ifdef SUPPORT_UNICODE
6946       if (utf)
6947         {
6948         while (t < end_subject && !IS_NEWLINE(t))
6949           {
6950           t++;
6951           ACROSSCHAR(t < end_subject, t, t++);
6952           }
6953         }
6954       else
6955 #endif
6956       while (t < end_subject && !IS_NEWLINE(t)) t++;
6957       end_subject = t;
6958       }
6959 
6960     /* Anchored: check the first code unit if one is recorded. This may seem
6961     pointless but it can help in detecting a no match case without scanning for
6962     the required code unit. */
6963 
6964     if (anchored)
6965       {
6966       if (has_first_cu || start_bits != NULL)
6967         {
6968         BOOL ok = start_match < end_subject;
6969         if (ok)
6970           {
6971           PCRE2_UCHAR c = UCHAR21TEST(start_match);
6972           ok = has_first_cu && (c == first_cu || c == first_cu2);
6973           if (!ok && start_bits != NULL)
6974             {
6975 #if PCRE2_CODE_UNIT_WIDTH != 8
6976             if (c > 255) c = 255;
6977 #endif
6978             ok = (start_bits[c/8] & (1u << (c&7))) != 0;
6979             }
6980           }
6981         if (!ok)
6982           {
6983           rc = MATCH_NOMATCH;
6984           break;
6985           }
6986         }
6987       }
6988 
6989     /* Not anchored. Advance to a unique first code unit if there is one. */
6990 
6991     else
6992       {
6993       if (has_first_cu)
6994         {
6995         if (first_cu != first_cu2)  /* Caseless */
6996           {
6997           /* In 16-bit and 32_bit modes we have to do our own search, so can
6998           look for both cases at once. */
6999 
7000 #if PCRE2_CODE_UNIT_WIDTH != 8
7001           PCRE2_UCHAR smc;
7002           while (start_match < end_subject &&
7003                 (smc = UCHAR21TEST(start_match)) != first_cu &&
7004                  smc != first_cu2)
7005             start_match++;
7006 #else
7007           /* In 8-bit mode, the use of memchr() gives a big speed up, even
7008           though we have to call it twice in order to find the earliest
7009           occurrence of the code unit in either of its cases. Caching is used
7010           to remember the positions of previously found code units. This can
7011           make a huge difference when the strings are very long and only one
7012           case is actually present. */
7013 
7014           PCRE2_SPTR pp1 = NULL;
7015           PCRE2_SPTR pp2 = NULL;
7016           PCRE2_SIZE searchlength = end_subject - start_match;
7017 
7018           /* If we haven't got a previously found position for first_cu, or if
7019           the current starting position is later, we need to do a search. If
7020           the code unit is not found, set it to the end. */
7021 
7022           if (memchr_found_first_cu == NULL ||
7023               start_match > memchr_found_first_cu)
7024             {
7025             pp1 = memchr(start_match, first_cu, searchlength);
7026             memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1;
7027             }
7028 
7029           /* If the start is before a previously found position, use the
7030           previous position, or NULL if a previous search failed. */
7031 
7032           else pp1 = (memchr_found_first_cu == end_subject)? NULL :
7033             memchr_found_first_cu;
7034 
7035           /* Do the same thing for the other case. */
7036 
7037           if (memchr_found_first_cu2 == NULL ||
7038               start_match > memchr_found_first_cu2)
7039             {
7040             pp2 = memchr(start_match, first_cu2, searchlength);
7041             memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2;
7042             }
7043 
7044           else pp2 = (memchr_found_first_cu2 == end_subject)? NULL :
7045             memchr_found_first_cu2;
7046 
7047           /* Set the start to the end of the subject if neither case was found.
7048           Otherwise, use the earlier found point. */
7049 
7050           if (pp1 == NULL)
7051             start_match = (pp2 == NULL)? end_subject : pp2;
7052           else
7053             start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
7054 
7055 #endif  /* 8-bit handling */
7056           }
7057 
7058         /* The caseful case is much simpler. */
7059 
7060         else
7061           {
7062 #if PCRE2_CODE_UNIT_WIDTH != 8
7063           while (start_match < end_subject && UCHAR21TEST(start_match) !=
7064                  first_cu)
7065             start_match++;
7066 #else
7067           start_match = memchr(start_match, first_cu, end_subject - start_match);
7068           if (start_match == NULL) start_match = end_subject;
7069 #endif
7070           }
7071 
7072         /* If we can't find the required first code unit, having reached the
7073         true end of the subject, break the bumpalong loop, to force a match
7074         failure, except when doing partial matching, when we let the next cycle
7075         run at the end of the subject. To see why, consider the pattern
7076         /(?<=abc)def/, which partially matches "abc", even though the string
7077         does not contain the starting character "d". If we have not reached the
7078         true end of the subject (PCRE2_FIRSTLINE caused end_subject to be
7079         temporarily modified) we also let the cycle run, because the matching
7080         string is legitimately allowed to start with the first code unit of a
7081         newline. */
7082 
7083         if (mb->partial == 0 && start_match >= mb->end_subject)
7084           {
7085           rc = MATCH_NOMATCH;
7086           break;
7087           }
7088         }
7089 
7090       /* If there's no first code unit, advance to just after a linebreak for a
7091       multiline match if required. */
7092 
7093       else if (startline)
7094         {
7095         if (start_match > mb->start_subject + start_offset)
7096           {
7097 #ifdef SUPPORT_UNICODE
7098           if (utf)
7099             {
7100             while (start_match < end_subject && !WAS_NEWLINE(start_match))
7101               {
7102               start_match++;
7103               ACROSSCHAR(start_match < end_subject, start_match, start_match++);
7104               }
7105             }
7106           else
7107 #endif
7108           while (start_match < end_subject && !WAS_NEWLINE(start_match))
7109             start_match++;
7110 
7111           /* If we have just passed a CR and the newline option is ANY or
7112           ANYCRLF, and we are now at a LF, advance the match position by one
7113           more code unit. */
7114 
7115           if (start_match[-1] == CHAR_CR &&
7116                (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
7117                start_match < end_subject &&
7118                UCHAR21TEST(start_match) == CHAR_NL)
7119             start_match++;
7120           }
7121         }
7122 
7123       /* If there's no first code unit or a requirement for a multiline line
7124       start, advance to a non-unique first code unit if any have been
7125       identified. The bitmap contains only 256 bits. When code units are 16 or
7126       32 bits wide, all code units greater than 254 set the 255 bit. */
7127 
7128       else if (start_bits != NULL)
7129         {
7130         while (start_match < end_subject)
7131           {
7132           uint32_t c = UCHAR21TEST(start_match);
7133 #if PCRE2_CODE_UNIT_WIDTH != 8
7134           if (c > 255) c = 255;
7135 #endif
7136           if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
7137           start_match++;
7138           }
7139 
7140         /* See comment above in first_cu checking about the next few lines. */
7141 
7142         if (mb->partial == 0 && start_match >= mb->end_subject)
7143           {
7144           rc = MATCH_NOMATCH;
7145           break;
7146           }
7147         }
7148       }   /* End first code unit handling */
7149 
7150     /* Restore fudged end_subject */
7151 
7152     end_subject = mb->end_subject;
7153 
7154     /* The following two optimizations must be disabled for partial matching. */
7155 
7156     if (mb->partial == 0)
7157       {
7158       PCRE2_SPTR p;
7159 
7160       /* The minimum matching length is a lower bound; no string of that length
7161       may actually match the pattern. Although the value is, strictly, in
7162       characters, we treat it as code units to avoid spending too much time in
7163       this optimization. */
7164 
7165       if (end_subject - start_match < re->minlength)
7166         {
7167         rc = MATCH_NOMATCH;
7168         break;
7169         }
7170 
7171       /* If req_cu is set, we know that that code unit must appear in the
7172       subject for the (non-partial) match to succeed. If the first code unit is
7173       set, req_cu must be later in the subject; otherwise the test starts at
7174       the match point. This optimization can save a huge amount of backtracking
7175       in patterns with nested unlimited repeats that aren't going to match.
7176       Writing separate code for caseful/caseless versions makes it go faster,
7177       as does using an autoincrement and backing off on a match. As in the case
7178       of the first code unit, using memchr() in the 8-bit library gives a big
7179       speed up. Unlike the first_cu check above, we do not need to call
7180       memchr() twice in the caseless case because we only need to check for the
7181       presence of the character in either case, not find the first occurrence.
7182 
7183       The search can be skipped if the code unit was found later than the
7184       current starting point in a previous iteration of the bumpalong loop.
7185 
7186       HOWEVER: when the subject string is very, very long, searching to its end
7187       can take a long time, and give bad performance on quite ordinary
7188       anchored patterns. This showed up when somebody was matching something
7189       like /^\d+C/ on a 32-megabyte string... so we don't do this when the
7190       string is sufficiently long, but it's worth searching a lot more for
7191       unanchored patterns. */
7192 
7193       p = start_match + (has_first_cu? 1:0);
7194       if (has_req_cu && p > req_cu_ptr)
7195         {
7196         PCRE2_SIZE check_length = end_subject - start_match;
7197 
7198         if (check_length < REQ_CU_MAX ||
7199               (!anchored && check_length < REQ_CU_MAX * 1000))
7200           {
7201           if (req_cu != req_cu2)  /* Caseless */
7202             {
7203 #if PCRE2_CODE_UNIT_WIDTH != 8
7204             while (p < end_subject)
7205               {
7206               uint32_t pp = UCHAR21INCTEST(p);
7207               if (pp == req_cu || pp == req_cu2) { p--; break; }
7208               }
7209 #else  /* 8-bit code units */
7210             PCRE2_SPTR pp = p;
7211             p = memchr(pp, req_cu, end_subject - pp);
7212             if (p == NULL)
7213               {
7214               p = memchr(pp, req_cu2, end_subject - pp);
7215               if (p == NULL) p = end_subject;
7216               }
7217 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
7218             }
7219 
7220           /* The caseful case */
7221 
7222           else
7223             {
7224 #if PCRE2_CODE_UNIT_WIDTH != 8
7225             while (p < end_subject)
7226               {
7227               if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
7228               }
7229 
7230 #else  /* 8-bit code units */
7231             p = memchr(p, req_cu, end_subject - p);
7232             if (p == NULL) p = end_subject;
7233 #endif
7234             }
7235 
7236           /* If we can't find the required code unit, break the bumpalong loop,
7237           forcing a match failure. */
7238 
7239           if (p >= end_subject)
7240             {
7241             rc = MATCH_NOMATCH;
7242             break;
7243             }
7244 
7245           /* If we have found the required code unit, save the point where we
7246           found it, so that we don't search again next time round the bumpalong
7247           loop if the start hasn't yet passed this code unit. */
7248 
7249           req_cu_ptr = p;
7250           }
7251         }
7252       }
7253     }
7254 
7255   /* ------------ End of start of match optimizations ------------ */
7256 
7257   /* Give no match if we have passed the bumpalong limit. */
7258 
7259   if (start_match > bumpalong_limit)
7260     {
7261     rc = MATCH_NOMATCH;
7262     break;
7263     }
7264 
7265   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
7266   first starting point for which a partial match was found. */
7267 
7268   cb.start_match = (PCRE2_SIZE)(start_match - subject);
7269   cb.callout_flags |= PCRE2_CALLOUT_STARTMATCH;
7270 
7271   mb->start_used_ptr = start_match;
7272   mb->last_used_ptr = start_match;
7273 #ifdef SUPPORT_UNICODE
7274   mb->moptions = options | fragment_options;
7275 #else
7276   mb->moptions = options;
7277 #endif
7278   mb->match_call_count = 0;
7279   mb->end_offset_top = 0;
7280   mb->skip_arg_count = 0;
7281 
7282   rc = match(start_match, mb->start_code, match_data->ovector,
7283     match_data->oveccount, re->top_bracket, frame_size, mb);
7284 
7285   if (mb->hitend && start_partial == NULL)
7286     {
7287     start_partial = mb->start_used_ptr;
7288     match_partial = start_match;
7289     }
7290 
7291   switch(rc)
7292     {
7293     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
7294     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
7295     entirely. The only way we can do that is to re-do the match at the same
7296     point, with a flag to force SKIP with an argument to be ignored. Just
7297     treating this case as NOMATCH does not work because it does not check other
7298     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
7299 
7300     case MATCH_SKIP_ARG:
7301     new_start_match = start_match;
7302     mb->ignore_skip_arg = mb->skip_arg_count;
7303     break;
7304 
7305     /* SKIP passes back the next starting point explicitly, but if it is no
7306     greater than the match we have just done, treat it as NOMATCH. */
7307 
7308     case MATCH_SKIP:
7309     if (mb->verb_skip_ptr > start_match)
7310       {
7311       new_start_match = mb->verb_skip_ptr;
7312       break;
7313       }
7314     /* Fall through */
7315 
7316     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
7317     exactly like PRUNE. Unset ignore SKIP-with-argument. */
7318 
7319     case MATCH_NOMATCH:
7320     case MATCH_PRUNE:
7321     case MATCH_THEN:
7322     mb->ignore_skip_arg = 0;
7323     new_start_match = start_match + 1;
7324 #ifdef SUPPORT_UNICODE
7325     if (utf)
7326       ACROSSCHAR(new_start_match < end_subject, new_start_match,
7327         new_start_match++);
7328 #endif
7329     break;
7330 
7331     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
7332 
7333     case MATCH_COMMIT:
7334     rc = MATCH_NOMATCH;
7335     goto ENDLOOP;
7336 
7337     /* Any other return is either a match, or some kind of error. */
7338 
7339     default:
7340     goto ENDLOOP;
7341     }
7342 
7343   /* Control reaches here for the various types of "no match at this point"
7344   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
7345 
7346   rc = MATCH_NOMATCH;
7347 
7348   /* If PCRE2_FIRSTLINE is set, the match must happen before or at the first
7349   newline in the subject (though it may continue over the newline). Therefore,
7350   if we have just failed to match, starting at a newline, do not continue. */
7351 
7352   if (firstline && IS_NEWLINE(start_match)) break;
7353 
7354   /* Advance to new matching position */
7355 
7356   start_match = new_start_match;
7357 
7358   /* Break the loop if the pattern is anchored or if we have passed the end of
7359   the subject. */
7360 
7361   if (anchored || start_match > end_subject) break;
7362 
7363   /* If we have just passed a CR and we are now at a LF, and the pattern does
7364   not contain any explicit matches for \r or \n, and the newline option is CRLF
7365   or ANY or ANYCRLF, advance the match position by one more code unit. In
7366   normal matching start_match will aways be greater than the first position at
7367   this stage, but a failed *SKIP can cause a return at the same point, which is
7368   why the first test exists. */
7369 
7370   if (start_match > subject + start_offset &&
7371       start_match[-1] == CHAR_CR &&
7372       start_match < end_subject &&
7373       *start_match == CHAR_NL &&
7374       (re->flags & PCRE2_HASCRORLF) == 0 &&
7375         (mb->nltype == NLTYPE_ANY ||
7376          mb->nltype == NLTYPE_ANYCRLF ||
7377          mb->nllen == 2))
7378     start_match++;
7379 
7380   mb->mark = NULL;   /* Reset for start of next match attempt */
7381   }                  /* End of for(;;) "bumpalong" loop */
7382 
7383 /* ==========================================================================*/
7384 
7385 /* When we reach here, one of the following stopping conditions is true:
7386 
7387 (1) The match succeeded, either completely, or partially;
7388 
7389 (2) The pattern is anchored or the match was failed after (*COMMIT);
7390 
7391 (3) We are past the end of the subject or the bumpalong limit;
7392 
7393 (4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because
7394     this option requests that a match occur at or before the first newline in
7395     the subject.
7396 
7397 (5) Some kind of error occurred.
7398 
7399 */
7400 
7401 ENDLOOP:
7402 
7403 /* If end_subject != true_end_subject, it means we are handling invalid UTF,
7404 and have just processed a non-terminal fragment. If this resulted in no match
7405 or a partial match we must carry on to the next fragment (a partial match is
7406 returned to the caller only at the very end of the subject). A loop is used to
7407 avoid trying to match against empty fragments; if the pattern can match an
7408 empty string it would have done so already. */
7409 
7410 #ifdef SUPPORT_UNICODE
7411 if (utf && end_subject != true_end_subject &&
7412     (rc == MATCH_NOMATCH || rc == PCRE2_ERROR_PARTIAL))
7413   {
7414   for (;;)
7415     {
7416     /* Advance past the first bad code unit, and then skip invalid character
7417     starting code units in 8-bit and 16-bit modes. */
7418 
7419     start_match = end_subject + 1;
7420 
7421 #if PCRE2_CODE_UNIT_WIDTH != 32
7422     while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
7423       start_match++;
7424 #endif
7425 
7426     /* If we have hit the end of the subject, there isn't another non-empty
7427     fragment, so give up. */
7428 
7429     if (start_match >= true_end_subject)
7430       {
7431       rc = MATCH_NOMATCH;  /* In case it was partial */
7432       break;
7433       }
7434 
7435     /* Check the rest of the subject */
7436 
7437     mb->check_subject = start_match;
7438     rc = PRIV(valid_utf)(start_match, length - (start_match - subject),
7439       &(match_data->startchar));
7440 
7441     /* The rest of the subject is valid UTF. */
7442 
7443     if (rc == 0)
7444       {
7445       mb->end_subject = end_subject = true_end_subject;
7446       fragment_options = PCRE2_NOTBOL;
7447       goto FRAGMENT_RESTART;
7448       }
7449 
7450     /* A subsequent UTF error has been found; if the next fragment is
7451     non-empty, set up to process it. Otherwise, let the loop advance. */
7452 
7453     else if (rc < 0)
7454       {
7455       mb->end_subject = end_subject = start_match + match_data->startchar;
7456       if (end_subject > start_match)
7457         {
7458         fragment_options = PCRE2_NOTBOL|PCRE2_NOTEOL;
7459         goto FRAGMENT_RESTART;
7460         }
7461       }
7462     }
7463   }
7464 #endif  /* SUPPORT_UNICODE */
7465 
7466 /* Release an enlarged frame vector that is on the heap. */
7467 
7468 if (mb->match_frames != mb->stack_frames)
7469   mb->memctl.free(mb->match_frames, mb->memctl.memory_data);
7470 
7471 /* Fill in fields that are always returned in the match data. */
7472 
7473 match_data->code = re;
7474 match_data->mark = mb->mark;
7475 match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER;
7476 
7477 /* Handle a fully successful match. Set the return code to the number of
7478 captured strings, or 0 if there were too many to fit into the ovector, and then
7479 set the remaining returned values before returning. Make a copy of the subject
7480 string if requested. */
7481 
7482 if (rc == MATCH_MATCH)
7483   {
7484   match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)?
7485     0 : (int)mb->end_offset_top/2 + 1;
7486   match_data->startchar = start_match - subject;
7487   match_data->leftchar = mb->start_used_ptr - subject;
7488   match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
7489     mb->last_used_ptr : mb->end_match_ptr) - subject;
7490   if ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
7491     {
7492     length = CU2BYTES(length + was_zero_terminated);
7493     match_data->subject = match_data->memctl.malloc(length,
7494       match_data->memctl.memory_data);
7495     if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
7496     memcpy((void *)match_data->subject, subject, length);
7497     match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
7498     }
7499   else match_data->subject = subject;
7500   return match_data->rc;
7501   }
7502 
7503 /* Control gets here if there has been a partial match, an error, or if the
7504 overall match attempt has failed at all permitted starting positions. Any mark
7505 data is in the nomatch_mark field. */
7506 
7507 match_data->mark = mb->nomatch_mark;
7508 
7509 /* For anything other than nomatch or partial match, just return the code. */
7510 
7511 if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL) match_data->rc = rc;
7512 
7513 /* Handle a partial match. If a "soft" partial match was requested, searching
7514 for a complete match will have continued, and the value of rc at this point
7515 will be MATCH_NOMATCH. For a "hard" partial match, it will already be
7516 PCRE2_ERROR_PARTIAL. */
7517 
7518 else if (match_partial != NULL)
7519   {
7520   match_data->subject = subject;
7521   match_data->ovector[0] = match_partial - subject;
7522   match_data->ovector[1] = end_subject - subject;
7523   match_data->startchar = match_partial - subject;
7524   match_data->leftchar = start_partial - subject;
7525   match_data->rightchar = end_subject - subject;
7526   match_data->rc = PCRE2_ERROR_PARTIAL;
7527   }
7528 
7529 /* Else this is the classic nomatch case. */
7530 
7531 else match_data->rc = PCRE2_ERROR_NOMATCH;
7532 
7533 return match_data->rc;
7534 }
7535 
7536 /* End of pcre2_match.c */
7537