xref: /PHP-8.2/ext/pcre/pcre2lib/pcre2_match.c (revision c4e8f652)
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10           New API code Copyright (c) 2015-2022 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45 
46 /* These defines enable debugging code */
47 
48 /* #define DEBUG_FRAMES_DISPLAY */
49 /* #define DEBUG_SHOW_OPS */
50 /* #define DEBUG_SHOW_RMATCH */
51 
52 #ifdef DEBUG_FRAMES_DISPLAY
53 #include <stdarg.h>
54 #endif
55 
56 /* These defines identify the name of the block containing "static"
57 information, and fields within it. */
58 
59 #define NLBLOCK mb              /* Block containing newline information */
60 #define PSSTART start_subject   /* Field containing processed string start */
61 #define PSEND   end_subject     /* Field containing processed string end */
62 
63 #include "pcre2_internal.h"
64 
65 #define RECURSE_UNSET 0xffffffffu  /* Bigger than max group number */
66 
67 /* Masks for identifying the public options that are permitted at match time. */
68 
69 #define PUBLIC_MATCH_OPTIONS \
70   (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
71    PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
72    PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT)
73 
74 #define PUBLIC_JIT_MATCH_OPTIONS \
75    (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
76     PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD|\
77     PCRE2_COPY_MATCHED_SUBJECT)
78 
79 /* Non-error returns from and within the match() function. Error returns are
80 externally defined PCRE2_ERROR_xxx codes, which are all negative. */
81 
82 #define MATCH_MATCH        1
83 #define MATCH_NOMATCH      0
84 
85 /* Special internal returns used in the match() function. Make them
86 sufficiently negative to avoid the external error codes. */
87 
88 #define MATCH_ACCEPT       (-999)
89 #define MATCH_KETRPOS      (-998)
90 /* The next 5 must be kept together and in sequence so that a test that checks
91 for any one of them can use a range. */
92 #define MATCH_COMMIT       (-997)
93 #define MATCH_PRUNE        (-996)
94 #define MATCH_SKIP         (-995)
95 #define MATCH_SKIP_ARG     (-994)
96 #define MATCH_THEN         (-993)
97 #define MATCH_BACKTRACK_MAX MATCH_THEN
98 #define MATCH_BACKTRACK_MIN MATCH_COMMIT
99 
100 /* Group frame type values. Zero means the frame is not a group frame. The
101 lower 16 bits are used for data (e.g. the capture number). Group frames are
102 used for most groups so that information about the start is easily available at
103 the end without having to scan back through intermediate frames (backtrack
104 points). */
105 
106 #define GF_CAPTURE     0x00010000u
107 #define GF_NOCAPTURE   0x00020000u
108 #define GF_CONDASSERT  0x00030000u
109 #define GF_RECURSE     0x00040000u
110 
111 /* Masks for the identity and data parts of the group frame type. */
112 
113 #define GF_IDMASK(a)   ((a) & 0xffff0000u)
114 #define GF_DATAMASK(a) ((a) & 0x0000ffffu)
115 
116 /* Repetition types */
117 
118 enum { REPTYPE_MIN, REPTYPE_MAX, REPTYPE_POS };
119 
120 /* Min and max values for the common repeats; a maximum of UINT32_MAX =>
121 infinity. */
122 
123 static const uint32_t rep_min[] = {
124   0, 0,       /* * and *? */
125   1, 1,       /* + and +? */
126   0, 0,       /* ? and ?? */
127   0, 0,       /* dummy placefillers for OP_CR[MIN]RANGE */
128   0, 1, 0 };  /* OP_CRPOS{STAR, PLUS, QUERY} */
129 
130 static const uint32_t rep_max[] = {
131   UINT32_MAX, UINT32_MAX,      /* * and *? */
132   UINT32_MAX, UINT32_MAX,      /* + and +? */
133   1, 1,                        /* ? and ?? */
134   0, 0,                        /* dummy placefillers for OP_CR[MIN]RANGE */
135   UINT32_MAX, UINT32_MAX, 1 }; /* OP_CRPOS{STAR, PLUS, QUERY} */
136 
137 /* Repetition types - must include OP_CRPOSRANGE (not needed above) */
138 
139 static const uint32_t rep_typ[] = {
140   REPTYPE_MAX, REPTYPE_MIN,    /* * and *? */
141   REPTYPE_MAX, REPTYPE_MIN,    /* + and +? */
142   REPTYPE_MAX, REPTYPE_MIN,    /* ? and ?? */
143   REPTYPE_MAX, REPTYPE_MIN,    /* OP_CRRANGE and OP_CRMINRANGE */
144   REPTYPE_POS, REPTYPE_POS,    /* OP_CRPOSSTAR, OP_CRPOSPLUS */
145   REPTYPE_POS, REPTYPE_POS };  /* OP_CRPOSQUERY, OP_CRPOSRANGE */
146 
147 /* Numbers for RMATCH calls at backtracking points. When these lists are
148 changed, the code at RETURN_SWITCH below must be updated in sync.  */
149 
150 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
151        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
152        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
153        RM31,  RM32, RM33, RM34, RM35, RM36 };
154 
155 #ifdef SUPPORT_WIDE_CHARS
156 enum { RM100=100, RM101 };
157 #endif
158 
159 #ifdef SUPPORT_UNICODE
160 enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207,
161        RM208,     RM209, RM210, RM211, RM212, RM213, RM214, RM215,
162        RM216,     RM217, RM218, RM219, RM220, RM221, RM222, RM223,
163        RM224,     RM225 };
164 #endif
165 
166 /* Define short names for general fields in the current backtrack frame, which
167 is always pointed to by the F variable. Occasional references to fields in
168 other frames are written out explicitly. There are also some fields in the
169 current frame whose names start with "temp" that are used for short-term,
170 localised backtracking memory. These are #defined with Lxxx names at the point
171 of use and undefined afterwards. */
172 
173 #define Fback_frame        F->back_frame
174 #define Fcapture_last      F->capture_last
175 #define Fcurrent_recurse   F->current_recurse
176 #define Fecode             F->ecode
177 #define Feptr              F->eptr
178 #define Fgroup_frame_type  F->group_frame_type
179 #define Flast_group_offset F->last_group_offset
180 #define Flength            F->length
181 #define Fmark              F->mark
182 #define Frdepth            F->rdepth
183 #define Fstart_match       F->start_match
184 #define Foffset_top        F->offset_top
185 #define Foccu              F->occu
186 #define Fop                F->op
187 #define Fovector           F->ovector
188 #define Freturn_id         F->return_id
189 
190 
191 #ifdef DEBUG_FRAMES_DISPLAY
192 /*************************************************
193 *      Display current frames and contents       *
194 *************************************************/
195 
196 /* This debugging function displays the current set of frames and their
197 contents. It is not called automatically from anywhere, the intention being
198 that calls can be inserted where necessary when debugging frame-related
199 problems.
200 
201 Arguments:
202   f           the file to write to
203   F           the current top frame
204   P           a previous frame of interest
205   frame_size  the frame size
206   mb          points to the match block
207   match_data  points to the match data block
208   s           identification text
209 
210 Returns:    nothing
211 */
212 
213 static void
display_frames(FILE * f,heapframe * F,heapframe * P,PCRE2_SIZE frame_size,match_block * mb,pcre2_match_data * match_data,const char * s,...)214 display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size,
215   match_block *mb, pcre2_match_data *match_data, const char *s, ...)
216 {
217 uint32_t i;
218 heapframe *Q;
219 va_list ap;
220 va_start(ap, s);
221 
222 fprintf(f, "FRAMES ");
223 vfprintf(f, s, ap);
224 va_end(ap);
225 
226 if (P != NULL) fprintf(f, " P=%lu",
227   ((char *)P - (char *)(match_data->heapframes))/frame_size);
228 fprintf(f, "\n");
229 
230 for (i = 0, Q = match_data->heapframes;
231      Q <= F;
232      i++, Q = (heapframe *)((char *)Q + frame_size))
233   {
234   fprintf(f, "Frame %d type=%x subj=%lu code=%d back=%lu id=%d",
235     i, Q->group_frame_type, Q->eptr - mb->start_subject, *(Q->ecode),
236     Q->back_frame, Q->return_id);
237 
238   if (Q->last_group_offset == PCRE2_UNSET)
239     fprintf(f, " lgoffset=unset\n");
240   else
241     fprintf(f, " lgoffset=%lu\n",  Q->last_group_offset/frame_size);
242   }
243 }
244 
245 #endif
246 
247 
248 
249 /*************************************************
250 *                Process a callout               *
251 *************************************************/
252 
253 /* This function is called for all callouts, whether "standalone" or at the
254 start of a conditional group. Feptr will be pointing to either OP_CALLOUT or
255 OP_CALLOUT_STR. A callout block is allocated in pcre2_match() and initialized
256 with fixed values.
257 
258 Arguments:
259   F          points to the current backtracking frame
260   mb         points to the match block
261   lengthptr  where to return the length of the callout item
262 
263 Returns:     the return from the callout
264              or 0 if no callout function exists
265 */
266 
267 static int
do_callout(heapframe * F,match_block * mb,PCRE2_SIZE * lengthptr)268 do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr)
269 {
270 int rc;
271 PCRE2_SIZE save0, save1;
272 PCRE2_SIZE *callout_ovector;
273 pcre2_callout_block *cb;
274 
275 *lengthptr = (*Fecode == OP_CALLOUT)?
276   PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE);
277 
278 if (mb->callout == NULL) return 0;   /* No callout function provided */
279 
280 /* The original matching code (pre 10.30) worked directly with the ovector
281 passed by the user, and this was passed to callouts. Now that the working
282 ovector is in the backtracking frame, it no longer needs to reserve space for
283 the overall match offsets (which would waste space in the frame). For backward
284 compatibility, however, we pass capture_top and offset_vector to the callout as
285 if for the extended ovector, and we ensure that the first two slots are unset
286 by preserving and restoring their current contents. Picky compilers complain if
287 references such as Fovector[-2] are use directly, so we set up a separate
288 pointer. */
289 
290 callout_ovector = (PCRE2_SIZE *)(Fovector) - 2;
291 
292 /* The cb->version, cb->subject, cb->subject_length, and cb->start_match fields
293 are set externally. The first 3 never change; the last is updated for each
294 bumpalong. */
295 
296 cb = mb->cb;
297 cb->capture_top      = (uint32_t)Foffset_top/2 + 1;
298 cb->capture_last     = Fcapture_last;
299 cb->offset_vector    = callout_ovector;
300 cb->mark             = mb->nomatch_mark;
301 cb->current_position = (PCRE2_SIZE)(Feptr - mb->start_subject);
302 cb->pattern_position = GET(Fecode, 1);
303 cb->next_item_length = GET(Fecode, 1 + LINK_SIZE);
304 
305 if (*Fecode == OP_CALLOUT)  /* Numerical callout */
306   {
307   cb->callout_number = Fecode[1 + 2*LINK_SIZE];
308   cb->callout_string_offset = 0;
309   cb->callout_string = NULL;
310   cb->callout_string_length = 0;
311   }
312 else  /* String callout */
313   {
314   cb->callout_number = 0;
315   cb->callout_string_offset = GET(Fecode, 1 + 3*LINK_SIZE);
316   cb->callout_string = Fecode + (1 + 4*LINK_SIZE) + 1;
317   cb->callout_string_length =
318     *lengthptr - (1 + 4*LINK_SIZE) - 2;
319   }
320 
321 save0 = callout_ovector[0];
322 save1 = callout_ovector[1];
323 callout_ovector[0] = callout_ovector[1] = PCRE2_UNSET;
324 rc = mb->callout(cb, mb->callout_data);
325 callout_ovector[0] = save0;
326 callout_ovector[1] = save1;
327 cb->callout_flags = 0;
328 return rc;
329 }
330 
331 
332 
333 /*************************************************
334 *          Match a back-reference                *
335 *************************************************/
336 
337 /* This function is called only when it is known that the offset lies within
338 the offsets that have so far been used in the match. Note that in caseless
339 UTF-8 mode, the number of subject bytes matched may be different to the number
340 of reference bytes. (In theory this could also happen in UTF-16 mode, but it
341 seems unlikely.)
342 
343 Arguments:
344   offset      index into the offset vector
345   caseless    TRUE if caseless
346   F           the current backtracking frame pointer
347   mb          points to match block
348   lengthptr   pointer for returning the length matched
349 
350 Returns:      = 0 sucessful match; number of code units matched is set
351               < 0 no match
352               > 0 partial match
353 */
354 
355 static int
match_ref(PCRE2_SIZE offset,BOOL caseless,heapframe * F,match_block * mb,PCRE2_SIZE * lengthptr)356 match_ref(PCRE2_SIZE offset, BOOL caseless, heapframe *F, match_block *mb,
357   PCRE2_SIZE *lengthptr)
358 {
359 PCRE2_SPTR p;
360 PCRE2_SIZE length;
361 PCRE2_SPTR eptr;
362 PCRE2_SPTR eptr_start;
363 
364 /* Deal with an unset group. The default is no match, but there is an option to
365 match an empty string. */
366 
367 if (offset >= Foffset_top || Fovector[offset] == PCRE2_UNSET)
368   {
369   if ((mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
370     {
371     *lengthptr = 0;
372     return 0;      /* Match */
373     }
374   else return -1;  /* No match */
375   }
376 
377 /* Separate the caseless and UTF cases for speed. */
378 
379 eptr = eptr_start = Feptr;
380 p = mb->start_subject + Fovector[offset];
381 length = Fovector[offset+1] - Fovector[offset];
382 
383 if (caseless)
384   {
385 #if defined SUPPORT_UNICODE
386   BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
387 
388   if (utf || (mb->poptions & PCRE2_UCP) != 0)
389     {
390     PCRE2_SPTR endptr = p + length;
391 
392     /* Match characters up to the end of the reference. NOTE: the number of
393     code units matched may differ, because in UTF-8 there are some characters
394     whose upper and lower case codes have different numbers of bytes. For
395     example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65 (3
396     bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
397     sequence of two of the latter. It is important, therefore, to check the
398     length along the reference, not along the subject (earlier code did this
399     wrong). UCP without uses Unicode properties but without UTF encoding. */
400 
401     while (p < endptr)
402       {
403       uint32_t c, d;
404       const ucd_record *ur;
405       if (eptr >= mb->end_subject) return 1;   /* Partial match */
406 
407       if (utf)
408         {
409         GETCHARINC(c, eptr);
410         GETCHARINC(d, p);
411         }
412       else
413         {
414         c = *eptr++;
415         d = *p++;
416         }
417 
418       ur = GET_UCD(d);
419       if (c != d && c != (uint32_t)((int)d + ur->other_case))
420         {
421         const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset;
422         for (;;)
423           {
424           if (c < *pp) return -1;  /* No match */
425           if (c == *pp++) break;
426           }
427         }
428       }
429     }
430   else
431 #endif
432 
433   /* Not in UTF or UCP mode */
434     {
435     for (; length > 0; length--)
436       {
437       uint32_t cc, cp;
438       if (eptr >= mb->end_subject) return 1;   /* Partial match */
439       cc = UCHAR21TEST(eptr);
440       cp = UCHAR21TEST(p);
441       if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
442         return -1;  /* No match */
443       p++;
444       eptr++;
445       }
446     }
447   }
448 
449 /* In the caseful case, we can just compare the code units, whether or not we
450 are in UTF and/or UCP mode. When partial matching, we have to do this unit by
451 unit. */
452 
453 else
454   {
455   if (mb->partial != 0)
456     {
457     for (; length > 0; length--)
458       {
459       if (eptr >= mb->end_subject) return 1;   /* Partial match */
460       if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;  /* No match */
461       }
462     }
463 
464   /* Not partial matching */
465 
466   else
467     {
468     if ((PCRE2_SIZE)(mb->end_subject - eptr) < length) return 1; /* Partial */
469     if (memcmp(p, eptr, CU2BYTES(length)) != 0) return -1;  /* No match */
470     eptr += length;
471     }
472   }
473 
474 *lengthptr = eptr - eptr_start;
475 return 0;  /* Match */
476 }
477 
478 
479 
480 /******************************************************************************
481 *******************************************************************************
482                    "Recursion" in the match() function
483 
484 The original match() function was highly recursive, but this proved to be the
485 source of a number of problems over the years, mostly because of the relatively
486 small system stacks that are commonly found. As new features were added to
487 patterns, various kludges were invented to reduce the amount of stack used,
488 making the code hard to understand in places.
489 
490 A version did exist that used individual frames on the heap instead of calling
491 match() recursively, but this ran substantially slower. The current version is
492 a refactoring that uses a vector of frames to remember backtracking points.
493 This runs no slower, and possibly even a bit faster than the original recursive
494 implementation.
495 
496 At first, an initial vector of size START_FRAMES_SIZE (enough for maybe 50
497 frames) was allocated on the system stack. If this was not big enough, the heap
498 was used for a larger vector. However, it turns out that there are environments
499 where taking as little as 20KiB from the system stack is an embarrassment.
500 After another refactoring, the heap is used exclusively, but a pointer the
501 frames vector and its size are cached in the match_data block, so that there is
502 no new memory allocation if the same match_data block is used for multiple
503 matches (unless the frames vector has to be extended).
504 *******************************************************************************
505 ******************************************************************************/
506 
507 
508 
509 
510 /*************************************************
511 *       Macros for the match() function          *
512 *************************************************/
513 
514 /* These macros pack up tests that are used for partial matching several times
515 in the code. The second one is used when we already know we are past the end of
516 the subject. We set the "hit end" flag if the pointer is at the end of the
517 subject and either (a) the pointer is past the earliest inspected character
518 (i.e. something has been matched, even if not part of the actual matched
519 string), or (b) the pattern contains a lookbehind. These are the conditions for
520 which adding more characters may allow the current match to continue.
521 
522 For hard partial matching, we immediately return a partial match. Otherwise,
523 carrying on means that a complete match on the current subject will be sought.
524 A partial match is returned only if no complete match can be found. */
525 
526 #define CHECK_PARTIAL()\
527   if (Feptr >= mb->end_subject) \
528     { \
529     SCHECK_PARTIAL(); \
530     }
531 
532 #define SCHECK_PARTIAL()\
533   if (mb->partial != 0 && \
534       (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \
535     { \
536     mb->hitend = TRUE; \
537     if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
538     }
539 
540 
541 /* These macros are used to implement backtracking. They simulate a recursive
542 call to the match() function by means of a local vector of frames which
543 remember the backtracking points. */
544 
545 #define RMATCH(ra,rb)\
546   {\
547   start_ecode = ra;\
548   Freturn_id = rb;\
549   goto MATCH_RECURSE;\
550   L_##rb:;\
551   }
552 
553 #define RRETURN(ra)\
554   {\
555   rrc = ra;\
556   goto RETURN_SWITCH;\
557   }
558 
559 
560 
561 /*************************************************
562 *         Match from current position            *
563 *************************************************/
564 
565 /* This function is called to run one match attempt at a single starting point
566 in the subject.
567 
568 Performance note: It might be tempting to extract commonly used fields from the
569 mb structure (e.g. end_subject) into individual variables to improve
570 performance. Tests using gcc on a SPARC disproved this; in the first case, it
571 made performance worse.
572 
573 Arguments:
574    start_eptr   starting character in subject
575    start_ecode  starting position in compiled code
576    top_bracket  number of capturing parentheses in the pattern
577    frame_size   size of each backtracking frame
578    match_data   pointer to the match_data block
579    mb           pointer to "static" variables block
580 
581 Returns:        MATCH_MATCH if matched            )  these values are >= 0
582                 MATCH_NOMATCH if failed to match  )
583                 negative MATCH_xxx value for PRUNE, SKIP, etc
584                 negative PCRE2_ERROR_xxx value if aborted by an error condition
585                 (e.g. stopped by repeated call or depth limit)
586 */
587 
588 static int
match(PCRE2_SPTR start_eptr,PCRE2_SPTR start_ecode,uint16_t top_bracket,PCRE2_SIZE frame_size,pcre2_match_data * match_data,match_block * mb)589 match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket,
590   PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
591 {
592 /* Frame-handling variables */
593 
594 heapframe *F;           /* Current frame pointer */
595 heapframe *N = NULL;    /* Temporary frame pointers */
596 heapframe *P = NULL;
597 
598 heapframe *frames_top;  /* End of frames vector */
599 heapframe *assert_accept_frame = NULL;  /* For passing back a frame with captures */
600 PCRE2_SIZE heapframes_size;   /* Usable size of frames vector */
601 PCRE2_SIZE frame_copy_size;   /* Amount to copy when creating a new frame */
602 
603 /* Local variables that do not need to be preserved over calls to RRMATCH(). */
604 
605 PCRE2_SPTR bracode;     /* Temp pointer to start of group */
606 PCRE2_SIZE offset;      /* Used for group offsets */
607 PCRE2_SIZE length;      /* Used for various length calculations */
608 
609 int rrc;                /* Return from functions & backtracking "recursions" */
610 #ifdef SUPPORT_UNICODE
611 int proptype;           /* Type of character property */
612 #endif
613 
614 uint32_t i;             /* Used for local loops */
615 uint32_t fc;            /* Character values */
616 uint32_t number;        /* Used for group and other numbers */
617 uint32_t reptype = 0;   /* Type of repetition (0 to avoid compiler warning) */
618 uint32_t group_frame_type;  /* Specifies type for new group frames */
619 
620 BOOL condition;         /* Used in conditional groups */
621 BOOL cur_is_word;       /* Used in "word" tests */
622 BOOL prev_is_word;      /* Used in "word" tests */
623 
624 /* UTF and UCP flags */
625 
626 #ifdef SUPPORT_UNICODE
627 BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
628 BOOL ucp = (mb->poptions & PCRE2_UCP) != 0;
629 #else
630 BOOL utf = FALSE;  /* Required for convenience even when no Unicode support */
631 #endif
632 
633 /* This is the length of the last part of a backtracking frame that must be
634 copied when a new frame is created. */
635 
636 frame_copy_size = frame_size - offsetof(heapframe, eptr);
637 
638 /* Set up the first frame and the end of the frames vector. We set the local
639 heapframes_size to the usuable amount of the vector, that is, a whole number of
640 frames. */
641 
642 F = match_data->heapframes;
643 heapframes_size = (match_data->heapframes_size / frame_size) * frame_size;
644 frames_top = (heapframe *)((char *)F + heapframes_size);
645 
646 Frdepth = 0;                        /* "Recursion" depth */
647 Fcapture_last = 0;                  /* Number of most recent capture */
648 Fcurrent_recurse = RECURSE_UNSET;   /* Not pattern recursing. */
649 Fstart_match = Feptr = start_eptr;  /* Current data pointer and start match */
650 Fmark = NULL;                       /* Most recent mark */
651 Foffset_top = 0;                    /* End of captures within the frame */
652 Flast_group_offset = PCRE2_UNSET;   /* Saved frame of most recent group */
653 group_frame_type = 0;               /* Not a start of group frame */
654 goto NEW_FRAME;                     /* Start processing with this frame */
655 
656 /* Come back here when we want to create a new frame for remembering a
657 backtracking point. */
658 
659 MATCH_RECURSE:
660 
661 /* Set up a new backtracking frame. If the vector is full, get a new one,
662 doubling the size, but constrained by the heap limit (which is in KiB). */
663 
664 N = (heapframe *)((char *)F + frame_size);
665 if (N >= frames_top)
666   {
667   heapframe *new;
668   PCRE2_SIZE newsize = match_data->heapframes_size * 2;
669 
670   if (newsize > mb->heap_limit)
671     {
672     PCRE2_SIZE maxsize = (mb->heap_limit/frame_size) * frame_size;
673     if (match_data->heapframes_size >= maxsize) return PCRE2_ERROR_HEAPLIMIT;
674     newsize = maxsize;
675     }
676 
677   new = match_data->memctl.malloc(newsize, match_data->memctl.memory_data);
678   if (new == NULL) return PCRE2_ERROR_NOMEMORY;
679   memcpy(new, match_data->heapframes, heapframes_size);
680 
681   F = (heapframe *)((char *)new + ((char *)F - (char *)match_data->heapframes));
682   N = (heapframe *)((char *)F + frame_size);
683 
684   match_data->memctl.free(match_data->heapframes, match_data->memctl.memory_data);
685   match_data->heapframes = new;
686   match_data->heapframes_size = newsize;
687 
688   heapframes_size = (newsize / frame_size) * frame_size;
689   frames_top = (heapframe *)((char *)new + heapframes_size);
690   }
691 
692 #ifdef DEBUG_SHOW_RMATCH
693 fprintf(stderr, "++ RMATCH %2d frame=%d", Freturn_id, Frdepth + 1);
694 if (group_frame_type != 0)
695   {
696   fprintf(stderr, " type=%x ", group_frame_type);
697   switch (GF_IDMASK(group_frame_type))
698     {
699     case GF_CAPTURE:
700     fprintf(stderr, "capture=%d", GF_DATAMASK(group_frame_type));
701     break;
702 
703     case GF_NOCAPTURE:
704     fprintf(stderr, "nocapture op=%d", GF_DATAMASK(group_frame_type));
705     break;
706 
707     case GF_CONDASSERT:
708     fprintf(stderr, "condassert op=%d", GF_DATAMASK(group_frame_type));
709     break;
710 
711     case GF_RECURSE:
712     fprintf(stderr, "recurse=%d", GF_DATAMASK(group_frame_type));
713     break;
714 
715     default:
716     fprintf(stderr, "*** unknown ***");
717     break;
718     }
719   }
720 fprintf(stderr, "\n");
721 #endif
722 
723 /* Copy those fields that must be copied into the new frame, increase the
724 "recursion" depth (i.e. the new frame's index) and then make the new frame
725 current. */
726 
727 memcpy((char *)N + offsetof(heapframe, eptr),
728        (char *)F + offsetof(heapframe, eptr),
729        frame_copy_size);
730 
731 N->rdepth = Frdepth + 1;
732 F = N;
733 
734 /* Carry on processing with a new frame. */
735 
736 NEW_FRAME:
737 Fgroup_frame_type = group_frame_type;
738 Fecode = start_ecode;      /* Starting code pointer */
739 Fback_frame = frame_size;  /* Default is go back one frame */
740 
741 /* If this is a special type of group frame, remember its offset for quick
742 access at the end of the group. If this is a recursion, set a new current
743 recursion value. */
744 
745 if (group_frame_type != 0)
746   {
747   Flast_group_offset = (char *)F - (char *)match_data->heapframes;
748   if (GF_IDMASK(group_frame_type) == GF_RECURSE)
749     Fcurrent_recurse = GF_DATAMASK(group_frame_type);
750   group_frame_type = 0;
751   }
752 
753 
754 /* ========================================================================= */
755 /* This is the main processing loop. First check that we haven't recorded too
756 many backtracks (search tree is too large), or that we haven't exceeded the
757 recursive depth limit (used too many backtracking frames). If not, process the
758 opcodes. */
759 
760 if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
761 if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
762 
763 for (;;)
764   {
765 #ifdef DEBUG_SHOW_OPS
766 fprintf(stderr, "++ op=%d\n", *Fecode);
767 #endif
768 
769   Fop = (uint8_t)(*Fecode);  /* Cast needed for 16-bit and 32-bit modes */
770   switch(Fop)
771     {
772     /* ===================================================================== */
773     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, to close
774     any currently open capturing brackets. Unlike reaching the end of a group,
775     where we know the starting frame is at the top of the chained frames, in
776     this case we have to search back for the relevant frame in case other types
777     of group that use chained frames have intervened. Multiple OP_CLOSEs always
778     come innermost first, which matches the chain order. We can ignore this in
779     a recursion, because captures are not passed out of recursions. */
780 
781     case OP_CLOSE:
782     if (Fcurrent_recurse == RECURSE_UNSET)
783       {
784       number = GET2(Fecode, 1);
785       offset = Flast_group_offset;
786       for(;;)
787         {
788         if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
789         N = (heapframe *)((char *)match_data->heapframes + offset);
790         P = (heapframe *)((char *)N - frame_size);
791         if (N->group_frame_type == (GF_CAPTURE | number)) break;
792         offset = P->last_group_offset;
793         }
794       offset = (number << 1) - 2;
795       Fcapture_last = number;
796       Fovector[offset] = P->eptr - mb->start_subject;
797       Fovector[offset+1] = Feptr - mb->start_subject;
798       if (offset >= Foffset_top) Foffset_top = offset + 2;
799       }
800     Fecode += PRIV(OP_lengths)[*Fecode];
801     break;
802 
803 
804     /* ===================================================================== */
805     /* Real or forced end of the pattern, assertion, or recursion. In an
806     assertion ACCEPT, update the last used pointer and remember the current
807     frame so that the captures and mark can be fished out of it. */
808 
809     case OP_ASSERT_ACCEPT:
810     if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
811     assert_accept_frame = F;
812     RRETURN(MATCH_ACCEPT);
813 
814     /* If recursing, we have to find the most recent recursion. */
815 
816     case OP_ACCEPT:
817     case OP_END:
818 
819     /* Handle end of a recursion. */
820 
821     if (Fcurrent_recurse != RECURSE_UNSET)
822       {
823       offset = Flast_group_offset;
824       for(;;)
825         {
826         if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
827         N = (heapframe *)((char *)match_data->heapframes + offset);
828         P = (heapframe *)((char *)N - frame_size);
829         if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break;
830         offset = P->last_group_offset;
831         }
832 
833       /* N is now the frame of the recursion; the previous frame is at the
834       OP_RECURSE position. Go back there, copying the current subject position
835       and mark, and the start_match position (\K might have changed it), and
836       then move on past the OP_RECURSE. */
837 
838       P->eptr = Feptr;
839       P->mark = Fmark;
840       P->start_match = Fstart_match;
841       F = P;
842       Fecode += 1 + LINK_SIZE;
843       continue;
844       }
845 
846     /* Not a recursion. Fail for an empty string match if either PCRE2_NOTEMPTY
847     is set, or if PCRE2_NOTEMPTY_ATSTART is set and we have matched at the
848     start of the subject. In both cases, backtracking will then try other
849     alternatives, if any. */
850 
851     if (Feptr == Fstart_match &&
852          ((mb->moptions & PCRE2_NOTEMPTY) != 0 ||
853            ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 &&
854              Fstart_match == mb->start_subject + mb->start_offset)))
855       RRETURN(MATCH_NOMATCH);
856 
857     /* Also fail if PCRE2_ENDANCHORED is set and the end of the match is not
858     the end of the subject. After (*ACCEPT) we fail the entire match (at this
859     position) but backtrack on reaching the end of the pattern. */
860 
861     if (Feptr < mb->end_subject &&
862         ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0)
863       {
864       if (Fop == OP_END) RRETURN(MATCH_NOMATCH);
865       return MATCH_NOMATCH;
866       }
867 
868     /* We have a successful match of the whole pattern. Record the result and
869     then do a direct return from the function. If there is space in the offset
870     vector, set any pairs that follow the highest-numbered captured string but
871     are less than the number of capturing groups in the pattern to PCRE2_UNSET.
872     It is documented that this happens. "Gaps" are set to PCRE2_UNSET
873     dynamically. It is only those at the end that need setting here. */
874 
875     mb->end_match_ptr = Feptr;           /* Record where we ended */
876     mb->end_offset_top = Foffset_top;    /* and how many extracts were taken */
877     mb->mark = Fmark;                    /* and the last success mark */
878     if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
879 
880     match_data->ovector[0] = Fstart_match - mb->start_subject;
881     match_data->ovector[1] = Feptr - mb->start_subject;
882 
883     /* Set i to the smaller of the sizes of the external and frame ovectors. */
884 
885     i = 2 * ((top_bracket + 1 > match_data->oveccount)?
886       match_data->oveccount : top_bracket + 1);
887     memcpy(match_data->ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
888     while (--i >= Foffset_top + 2) match_data->ovector[i] = PCRE2_UNSET;
889     return MATCH_MATCH;  /* Note: NOT RRETURN */
890 
891 
892     /*===================================================================== */
893     /* Match any single character type except newline; have to take care with
894     CRLF newlines and partial matching. */
895 
896     case OP_ANY:
897     if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
898     if (mb->partial != 0 &&
899         Feptr == mb->end_subject - 1 &&
900         NLBLOCK->nltype == NLTYPE_FIXED &&
901         NLBLOCK->nllen == 2 &&
902         UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
903       {
904       mb->hitend = TRUE;
905       if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
906       }
907     /* Fall through */
908 
909     /* Match any single character whatsoever. */
910 
911     case OP_ALLANY:
912     if (Feptr >= mb->end_subject)  /* DO NOT merge the Feptr++ here; it must */
913       {                            /* not be updated before SCHECK_PARTIAL. */
914       SCHECK_PARTIAL();
915       RRETURN(MATCH_NOMATCH);
916       }
917     Feptr++;
918 #ifdef SUPPORT_UNICODE
919     if (utf) ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
920 #endif
921     Fecode++;
922     break;
923 
924 
925     /* ===================================================================== */
926     /* Match a single code unit, even in UTF mode. This opcode really does
927     match any code unit, even newline. (It really should be called ANYCODEUNIT,
928     of course - the byte name is from pre-16 bit days.) */
929 
930     case OP_ANYBYTE:
931     if (Feptr >= mb->end_subject)   /* DO NOT merge the Feptr++ here; it must */
932       {                             /* not be updated before SCHECK_PARTIAL. */
933       SCHECK_PARTIAL();
934       RRETURN(MATCH_NOMATCH);
935       }
936     Feptr++;
937     Fecode++;
938     break;
939 
940 
941     /* ===================================================================== */
942     /* Match a single character, casefully */
943 
944     case OP_CHAR:
945 #ifdef SUPPORT_UNICODE
946     if (utf)
947       {
948       Flength = 1;
949       Fecode++;
950       GETCHARLEN(fc, Fecode, Flength);
951       if (Flength > (PCRE2_SIZE)(mb->end_subject - Feptr))
952         {
953         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
954         RRETURN(MATCH_NOMATCH);
955         }
956       for (; Flength > 0; Flength--)
957         {
958         if (*Fecode++ != UCHAR21INC(Feptr)) RRETURN(MATCH_NOMATCH);
959         }
960       }
961     else
962 #endif
963 
964     /* Not UTF mode */
965       {
966       if (mb->end_subject - Feptr < 1)
967         {
968         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
969         RRETURN(MATCH_NOMATCH);
970         }
971       if (Fecode[1] != *Feptr++) RRETURN(MATCH_NOMATCH);
972       Fecode += 2;
973       }
974     break;
975 
976 
977     /* ===================================================================== */
978     /* Match a single character, caselessly. If we are at the end of the
979     subject, give up immediately. We get here only when the pattern character
980     has at most one other case. Characters with more than two cases are coded
981     as OP_PROP with the pseudo-property PT_CLIST. */
982 
983     case OP_CHARI:
984     if (Feptr >= mb->end_subject)
985       {
986       SCHECK_PARTIAL();
987       RRETURN(MATCH_NOMATCH);
988       }
989 
990 #ifdef SUPPORT_UNICODE
991     if (utf)
992       {
993       Flength = 1;
994       Fecode++;
995       GETCHARLEN(fc, Fecode, Flength);
996 
997       /* If the pattern character's value is < 128, we know that its other case
998       (if any) is also < 128 (and therefore only one code unit long in all
999       code-unit widths), so we can use the fast lookup table. We checked above
1000       that there is at least one character left in the subject. */
1001 
1002       if (fc < 128)
1003         {
1004         uint32_t cc = UCHAR21(Feptr);
1005         if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
1006         Fecode++;
1007         Feptr++;
1008         }
1009 
1010       /* Otherwise we must pick up the subject character and use Unicode
1011       property support to test its other case. Note that we cannot use the
1012       value of "Flength" to check for sufficient bytes left, because the other
1013       case of the character may have more or fewer code units. */
1014 
1015       else
1016         {
1017         uint32_t dc;
1018         GETCHARINC(dc, Feptr);
1019         Fecode += Flength;
1020         if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1021         }
1022       }
1023 
1024     /* If UCP is set without UTF we must do the same as above, but with one
1025     character per code unit. */
1026 
1027     else if (ucp)
1028       {
1029       uint32_t cc = UCHAR21(Feptr);
1030       fc = Fecode[1];
1031       if (fc < 128)
1032         {
1033         if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
1034         }
1035       else
1036         {
1037         if (cc != fc && cc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1038         }
1039       Feptr++;
1040       Fecode += 2;
1041       }
1042 
1043     else
1044 #endif   /* SUPPORT_UNICODE */
1045 
1046     /* Not UTF or UCP mode; use the table for characters < 256. */
1047       {
1048       if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
1049           != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);
1050       Feptr++;
1051       Fecode += 2;
1052       }
1053     break;
1054 
1055 
1056     /* ===================================================================== */
1057     /* Match not a single character. */
1058 
1059     case OP_NOT:
1060     case OP_NOTI:
1061     if (Feptr >= mb->end_subject)
1062       {
1063       SCHECK_PARTIAL();
1064       RRETURN(MATCH_NOMATCH);
1065       }
1066 
1067 #ifdef SUPPORT_UNICODE
1068     if (utf)
1069       {
1070       uint32_t ch;
1071       Fecode++;
1072       GETCHARINC(ch, Fecode);
1073       GETCHARINC(fc, Feptr);
1074       if (ch == fc)
1075         {
1076         RRETURN(MATCH_NOMATCH);  /* Caseful match */
1077         }
1078       else if (Fop == OP_NOTI)   /* If caseless */
1079         {
1080         if (ch > 127)
1081           ch = UCD_OTHERCASE(ch);
1082         else
1083           ch = (mb->fcc)[ch];
1084         if (ch == fc) RRETURN(MATCH_NOMATCH);
1085         }
1086       }
1087 
1088     /* UCP without UTF is as above, but with one character per code unit. */
1089 
1090     else if (ucp)
1091       {
1092       uint32_t ch;
1093       fc = UCHAR21INC(Feptr);
1094       ch = Fecode[1];
1095       Fecode += 2;
1096 
1097       if (ch == fc)
1098         {
1099         RRETURN(MATCH_NOMATCH);  /* Caseful match */
1100         }
1101       else if (Fop == OP_NOTI)   /* If caseless */
1102         {
1103         if (ch > 127)
1104           ch = UCD_OTHERCASE(ch);
1105         else
1106           ch = (mb->fcc)[ch];
1107         if (ch == fc) RRETURN(MATCH_NOMATCH);
1108         }
1109       }
1110 
1111     else
1112 #endif  /* SUPPORT_UNICODE */
1113 
1114     /* Neither UTF nor UCP is set */
1115 
1116       {
1117       uint32_t ch = Fecode[1];
1118       fc = UCHAR21INC(Feptr);
1119       if (ch == fc || (Fop == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == fc))
1120         RRETURN(MATCH_NOMATCH);
1121       Fecode += 2;
1122       }
1123     break;
1124 
1125 
1126     /* ===================================================================== */
1127     /* Match a single character repeatedly. */
1128 
1129 #define Loclength    F->temp_size
1130 #define Lstart_eptr  F->temp_sptr[0]
1131 #define Lcharptr     F->temp_sptr[1]
1132 #define Lmin         F->temp_32[0]
1133 #define Lmax         F->temp_32[1]
1134 #define Lc           F->temp_32[2]
1135 #define Loc          F->temp_32[3]
1136 
1137     case OP_EXACT:
1138     case OP_EXACTI:
1139     Lmin = Lmax = GET2(Fecode, 1);
1140     Fecode += 1 + IMM2_SIZE;
1141     goto REPEATCHAR;
1142 
1143     case OP_POSUPTO:
1144     case OP_POSUPTOI:
1145     reptype = REPTYPE_POS;
1146     Lmin = 0;
1147     Lmax = GET2(Fecode, 1);
1148     Fecode += 1 + IMM2_SIZE;
1149     goto REPEATCHAR;
1150 
1151     case OP_UPTO:
1152     case OP_UPTOI:
1153     reptype = REPTYPE_MAX;
1154     Lmin = 0;
1155     Lmax = GET2(Fecode, 1);
1156     Fecode += 1 + IMM2_SIZE;
1157     goto REPEATCHAR;
1158 
1159     case OP_MINUPTO:
1160     case OP_MINUPTOI:
1161     reptype = REPTYPE_MIN;
1162     Lmin = 0;
1163     Lmax = GET2(Fecode, 1);
1164     Fecode += 1 + IMM2_SIZE;
1165     goto REPEATCHAR;
1166 
1167     case OP_POSSTAR:
1168     case OP_POSSTARI:
1169     reptype = REPTYPE_POS;
1170     Lmin = 0;
1171     Lmax = UINT32_MAX;
1172     Fecode++;
1173     goto REPEATCHAR;
1174 
1175     case OP_POSPLUS:
1176     case OP_POSPLUSI:
1177     reptype = REPTYPE_POS;
1178     Lmin = 1;
1179     Lmax = UINT32_MAX;
1180     Fecode++;
1181     goto REPEATCHAR;
1182 
1183     case OP_POSQUERY:
1184     case OP_POSQUERYI:
1185     reptype = REPTYPE_POS;
1186     Lmin = 0;
1187     Lmax = 1;
1188     Fecode++;
1189     goto REPEATCHAR;
1190 
1191     case OP_STAR:
1192     case OP_STARI:
1193     case OP_MINSTAR:
1194     case OP_MINSTARI:
1195     case OP_PLUS:
1196     case OP_PLUSI:
1197     case OP_MINPLUS:
1198     case OP_MINPLUSI:
1199     case OP_QUERY:
1200     case OP_QUERYI:
1201     case OP_MINQUERY:
1202     case OP_MINQUERYI:
1203     fc = *Fecode++ - ((Fop < OP_STARI)? OP_STAR : OP_STARI);
1204     Lmin = rep_min[fc];
1205     Lmax = rep_max[fc];
1206     reptype = rep_typ[fc];
1207 
1208     /* Common code for all repeated single-character matches. We first check
1209     for the minimum number of characters. If the minimum equals the maximum, we
1210     are done. Otherwise, if minimizing, check the rest of the pattern for a
1211     match; if there isn't one, advance up to the maximum, one character at a
1212     time.
1213 
1214     If maximizing, advance up to the maximum number of matching characters,
1215     until Feptr is past the end of the maximum run. If possessive, we are
1216     then done (no backing up). Otherwise, match at this position; anything
1217     other than no match is immediately returned. For nomatch, back up one
1218     character, unless we are matching \R and the last thing matched was
1219     \r\n, in which case, back up two code units until we reach the first
1220     optional character position.
1221 
1222     The various UTF/non-UTF and caseful/caseless cases are handled separately,
1223     for speed. */
1224 
1225     REPEATCHAR:
1226 #ifdef SUPPORT_UNICODE
1227     if (utf)
1228       {
1229       Flength = 1;
1230       Lcharptr = Fecode;
1231       GETCHARLEN(fc, Fecode, Flength);
1232       Fecode += Flength;
1233 
1234       /* Handle multi-code-unit character matching, caseful and caseless. */
1235 
1236       if (Flength > 1)
1237         {
1238         uint32_t othercase;
1239 
1240         if (Fop >= OP_STARI &&     /* Caseless */
1241             (othercase = UCD_OTHERCASE(fc)) != fc)
1242           Loclength = PRIV(ord2utf)(othercase, Foccu);
1243         else Loclength = 0;
1244 
1245         for (i = 1; i <= Lmin; i++)
1246           {
1247           if (Feptr <= mb->end_subject - Flength &&
1248             memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1249           else if (Loclength > 0 &&
1250                    Feptr <= mb->end_subject - Loclength &&
1251                    memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1252             Feptr += Loclength;
1253           else
1254             {
1255             CHECK_PARTIAL();
1256             RRETURN(MATCH_NOMATCH);
1257             }
1258           }
1259 
1260         if (Lmin == Lmax) continue;
1261 
1262         if (reptype == REPTYPE_MIN)
1263           {
1264           for (;;)
1265             {
1266             RMATCH(Fecode, RM202);
1267             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1268             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1269             if (Feptr <= mb->end_subject - Flength &&
1270               memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1271             else if (Loclength > 0 &&
1272                      Feptr <= mb->end_subject - Loclength &&
1273                      memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1274               Feptr += Loclength;
1275             else
1276               {
1277               CHECK_PARTIAL();
1278               RRETURN(MATCH_NOMATCH);
1279               }
1280             }
1281           /* Control never gets here */
1282           }
1283 
1284         else  /* Maximize */
1285           {
1286           Lstart_eptr = Feptr;
1287           for (i = Lmin; i < Lmax; i++)
1288             {
1289             if (Feptr <= mb->end_subject - Flength &&
1290                 memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0)
1291               Feptr += Flength;
1292             else if (Loclength > 0 &&
1293                      Feptr <= mb->end_subject - Loclength &&
1294                      memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1295               Feptr += Loclength;
1296             else
1297               {
1298               CHECK_PARTIAL();
1299               break;
1300               }
1301             }
1302 
1303           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1304           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1305           go too far. */
1306 
1307           if (reptype != REPTYPE_POS) for(;;)
1308             {
1309             if (Feptr <= Lstart_eptr) break;
1310             RMATCH(Fecode, RM203);
1311             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1312             Feptr--;
1313             BACKCHAR(Feptr);
1314             }
1315           }
1316         break;   /* End of repeated wide character handling */
1317         }
1318 
1319       /* Length of UTF character is 1. Put it into the preserved variable and
1320       fall through to the non-UTF code. */
1321 
1322       Lc = fc;
1323       }
1324     else
1325 #endif  /* SUPPORT_UNICODE */
1326 
1327     /* When not in UTF mode, load a single-code-unit character. Then proceed as
1328     above, using Unicode casing if either UTF or UCP is set. */
1329 
1330     Lc = *Fecode++;
1331 
1332     /* Caseless comparison */
1333 
1334     if (Fop >= OP_STARI)
1335       {
1336 #if PCRE2_CODE_UNIT_WIDTH == 8
1337 #ifdef SUPPORT_UNICODE
1338       if (ucp && !utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1339       else
1340 #endif  /* SUPPORT_UNICODE */
1341       /* Lc will be < 128 in UTF-8 mode. */
1342       Loc = mb->fcc[Lc];
1343 #else /* 16-bit & 32-bit */
1344 #ifdef SUPPORT_UNICODE
1345       if ((utf || ucp) && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1346       else
1347 #endif  /* SUPPORT_UNICODE */
1348       Loc = TABLE_GET(Lc, mb->fcc, Lc);
1349 #endif  /* PCRE2_CODE_UNIT_WIDTH == 8 */
1350 
1351       for (i = 1; i <= Lmin; i++)
1352         {
1353         uint32_t cc;                 /* Faster than PCRE2_UCHAR */
1354         if (Feptr >= mb->end_subject)
1355           {
1356           SCHECK_PARTIAL();
1357           RRETURN(MATCH_NOMATCH);
1358           }
1359         cc = UCHAR21TEST(Feptr);
1360         if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1361         Feptr++;
1362         }
1363       if (Lmin == Lmax) continue;
1364 
1365       if (reptype == REPTYPE_MIN)
1366         {
1367         for (;;)
1368           {
1369           uint32_t cc;               /* Faster than PCRE2_UCHAR */
1370           RMATCH(Fecode, RM25);
1371           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1372           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1373           if (Feptr >= mb->end_subject)
1374             {
1375             SCHECK_PARTIAL();
1376             RRETURN(MATCH_NOMATCH);
1377             }
1378           cc = UCHAR21TEST(Feptr);
1379           if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1380           Feptr++;
1381           }
1382         /* Control never gets here */
1383         }
1384 
1385       else  /* Maximize */
1386         {
1387         Lstart_eptr = Feptr;
1388         for (i = Lmin; i < Lmax; i++)
1389           {
1390           uint32_t cc;               /* Faster than PCRE2_UCHAR */
1391           if (Feptr >= mb->end_subject)
1392             {
1393             SCHECK_PARTIAL();
1394             break;
1395             }
1396           cc = UCHAR21TEST(Feptr);
1397           if (Lc != cc && Loc != cc) break;
1398           Feptr++;
1399           }
1400         if (reptype != REPTYPE_POS) for (;;)
1401           {
1402           if (Feptr == Lstart_eptr) break;
1403           RMATCH(Fecode, RM26);
1404           Feptr--;
1405           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1406           }
1407         }
1408       }
1409 
1410     /* Caseful comparisons (includes all multi-byte characters) */
1411 
1412     else
1413       {
1414       for (i = 1; i <= Lmin; i++)
1415         {
1416         if (Feptr >= mb->end_subject)
1417           {
1418           SCHECK_PARTIAL();
1419           RRETURN(MATCH_NOMATCH);
1420           }
1421         if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1422         }
1423 
1424       if (Lmin == Lmax) continue;
1425 
1426       if (reptype == REPTYPE_MIN)
1427         {
1428         for (;;)
1429           {
1430           RMATCH(Fecode, RM27);
1431           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1432           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1433           if (Feptr >= mb->end_subject)
1434             {
1435             SCHECK_PARTIAL();
1436             RRETURN(MATCH_NOMATCH);
1437             }
1438           if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1439           }
1440         /* Control never gets here */
1441         }
1442       else  /* Maximize */
1443         {
1444         Lstart_eptr = Feptr;
1445         for (i = Lmin; i < Lmax; i++)
1446           {
1447           if (Feptr >= mb->end_subject)
1448             {
1449             SCHECK_PARTIAL();
1450             break;
1451             }
1452 
1453           if (Lc != UCHAR21TEST(Feptr)) break;
1454           Feptr++;
1455           }
1456 
1457         if (reptype != REPTYPE_POS) for (;;)
1458           {
1459           if (Feptr <= Lstart_eptr) break;
1460           RMATCH(Fecode, RM28);
1461           Feptr--;
1462           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1463           }
1464         }
1465       }
1466     break;
1467 
1468 #undef Loclength
1469 #undef Lstart_eptr
1470 #undef Lcharptr
1471 #undef Lmin
1472 #undef Lmax
1473 #undef Lc
1474 #undef Loc
1475 
1476 
1477     /* ===================================================================== */
1478     /* Match a negated single one-byte character repeatedly. This is almost a
1479     repeat of the code for a repeated single character, but I haven't found a
1480     nice way of commoning these up that doesn't require a test of the
1481     positive/negative option for each character match. Maybe that wouldn't add
1482     very much to the time taken, but character matching *is* what this is all
1483     about... */
1484 
1485 #define Lstart_eptr  F->temp_sptr[0]
1486 #define Lmin         F->temp_32[0]
1487 #define Lmax         F->temp_32[1]
1488 #define Lc           F->temp_32[2]
1489 #define Loc          F->temp_32[3]
1490 
1491     case OP_NOTEXACT:
1492     case OP_NOTEXACTI:
1493     Lmin = Lmax = GET2(Fecode, 1);
1494     Fecode += 1 + IMM2_SIZE;
1495     goto REPEATNOTCHAR;
1496 
1497     case OP_NOTUPTO:
1498     case OP_NOTUPTOI:
1499     Lmin = 0;
1500     Lmax = GET2(Fecode, 1);
1501     reptype = REPTYPE_MAX;
1502     Fecode += 1 + IMM2_SIZE;
1503     goto REPEATNOTCHAR;
1504 
1505     case OP_NOTMINUPTO:
1506     case OP_NOTMINUPTOI:
1507     Lmin = 0;
1508     Lmax = GET2(Fecode, 1);
1509     reptype = REPTYPE_MIN;
1510     Fecode += 1 + IMM2_SIZE;
1511     goto REPEATNOTCHAR;
1512 
1513     case OP_NOTPOSSTAR:
1514     case OP_NOTPOSSTARI:
1515     reptype = REPTYPE_POS;
1516     Lmin = 0;
1517     Lmax = UINT32_MAX;
1518     Fecode++;
1519     goto REPEATNOTCHAR;
1520 
1521     case OP_NOTPOSPLUS:
1522     case OP_NOTPOSPLUSI:
1523     reptype = REPTYPE_POS;
1524     Lmin = 1;
1525     Lmax = UINT32_MAX;
1526     Fecode++;
1527     goto REPEATNOTCHAR;
1528 
1529     case OP_NOTPOSQUERY:
1530     case OP_NOTPOSQUERYI:
1531     reptype = REPTYPE_POS;
1532     Lmin = 0;
1533     Lmax = 1;
1534     Fecode++;
1535     goto REPEATNOTCHAR;
1536 
1537     case OP_NOTPOSUPTO:
1538     case OP_NOTPOSUPTOI:
1539     reptype = REPTYPE_POS;
1540     Lmin = 0;
1541     Lmax = GET2(Fecode, 1);
1542     Fecode += 1 + IMM2_SIZE;
1543     goto REPEATNOTCHAR;
1544 
1545     case OP_NOTSTAR:
1546     case OP_NOTSTARI:
1547     case OP_NOTMINSTAR:
1548     case OP_NOTMINSTARI:
1549     case OP_NOTPLUS:
1550     case OP_NOTPLUSI:
1551     case OP_NOTMINPLUS:
1552     case OP_NOTMINPLUSI:
1553     case OP_NOTQUERY:
1554     case OP_NOTQUERYI:
1555     case OP_NOTMINQUERY:
1556     case OP_NOTMINQUERYI:
1557     fc = *Fecode++ - ((Fop >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
1558     Lmin = rep_min[fc];
1559     Lmax = rep_max[fc];
1560     reptype = rep_typ[fc];
1561 
1562     /* Common code for all repeated single-character non-matches. */
1563 
1564     REPEATNOTCHAR:
1565     GETCHARINCTEST(Lc, Fecode);
1566 
1567     /* The code is duplicated for the caseless and caseful cases, for speed,
1568     since matching characters is likely to be quite common. First, ensure the
1569     minimum number of matches are present. If Lmin = Lmax, we are done.
1570     Otherwise, if minimizing, keep trying the rest of the expression and
1571     advancing one matching character if failing, up to the maximum.
1572     Alternatively, if maximizing, find the maximum number of characters and
1573     work backwards. */
1574 
1575     if (Fop >= OP_NOTSTARI)     /* Caseless */
1576       {
1577 #ifdef SUPPORT_UNICODE
1578       if ((utf || ucp) && Lc > 127)
1579         Loc = UCD_OTHERCASE(Lc);
1580       else
1581 #endif /* SUPPORT_UNICODE */
1582 
1583       Loc = TABLE_GET(Lc, mb->fcc, Lc);  /* Other case from table */
1584 
1585 #ifdef SUPPORT_UNICODE
1586       if (utf)
1587         {
1588         uint32_t d;
1589         for (i = 1; i <= Lmin; i++)
1590           {
1591           if (Feptr >= mb->end_subject)
1592             {
1593             SCHECK_PARTIAL();
1594             RRETURN(MATCH_NOMATCH);
1595             }
1596           GETCHARINC(d, Feptr);
1597           if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1598           }
1599         }
1600       else
1601 #endif  /* SUPPORT_UNICODE */
1602 
1603       /* Not UTF mode */
1604         {
1605         for (i = 1; i <= Lmin; i++)
1606           {
1607           if (Feptr >= mb->end_subject)
1608             {
1609             SCHECK_PARTIAL();
1610             RRETURN(MATCH_NOMATCH);
1611             }
1612           if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1613           Feptr++;
1614           }
1615         }
1616 
1617       if (Lmin == Lmax) continue;  /* Finished for exact count */
1618 
1619       if (reptype == REPTYPE_MIN)
1620         {
1621 #ifdef SUPPORT_UNICODE
1622         if (utf)
1623           {
1624           uint32_t d;
1625           for (;;)
1626             {
1627             RMATCH(Fecode, RM204);
1628             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1629             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1630             if (Feptr >= mb->end_subject)
1631               {
1632               SCHECK_PARTIAL();
1633               RRETURN(MATCH_NOMATCH);
1634               }
1635             GETCHARINC(d, Feptr);
1636             if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1637             }
1638           }
1639         else
1640 #endif  /*SUPPORT_UNICODE */
1641 
1642         /* Not UTF mode */
1643           {
1644           for (;;)
1645             {
1646             RMATCH(Fecode, RM29);
1647             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1648             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1649             if (Feptr >= mb->end_subject)
1650               {
1651               SCHECK_PARTIAL();
1652               RRETURN(MATCH_NOMATCH);
1653               }
1654             if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1655             Feptr++;
1656             }
1657           }
1658         /* Control never gets here */
1659         }
1660 
1661       /* Maximize case */
1662 
1663       else
1664         {
1665         Lstart_eptr = Feptr;
1666 
1667 #ifdef SUPPORT_UNICODE
1668         if (utf)
1669           {
1670           uint32_t d;
1671           for (i = Lmin; i < Lmax; i++)
1672             {
1673             int len = 1;
1674             if (Feptr >= mb->end_subject)
1675               {
1676               SCHECK_PARTIAL();
1677               break;
1678               }
1679             GETCHARLEN(d, Feptr, len);
1680             if (Lc == d || Loc == d) break;
1681             Feptr += len;
1682             }
1683 
1684           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1685           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1686           go too far. */
1687 
1688           if (reptype != REPTYPE_POS) for(;;)
1689             {
1690             if (Feptr <= Lstart_eptr) break;
1691             RMATCH(Fecode, RM205);
1692             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1693             Feptr--;
1694             BACKCHAR(Feptr);
1695             }
1696           }
1697         else
1698 #endif  /* SUPPORT_UNICODE */
1699 
1700         /* Not UTF mode */
1701           {
1702           for (i = Lmin; i < Lmax; i++)
1703             {
1704             if (Feptr >= mb->end_subject)
1705               {
1706               SCHECK_PARTIAL();
1707               break;
1708               }
1709             if (Lc == *Feptr || Loc == *Feptr) break;
1710             Feptr++;
1711             }
1712           if (reptype != REPTYPE_POS) for (;;)
1713             {
1714             if (Feptr == Lstart_eptr) break;
1715             RMATCH(Fecode, RM30);
1716             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1717             Feptr--;
1718             }
1719           }
1720         }
1721       }
1722 
1723     /* Caseful comparisons */
1724 
1725     else
1726       {
1727 #ifdef SUPPORT_UNICODE
1728       if (utf)
1729         {
1730         uint32_t d;
1731         for (i = 1; i <= Lmin; i++)
1732           {
1733           if (Feptr >= mb->end_subject)
1734             {
1735             SCHECK_PARTIAL();
1736             RRETURN(MATCH_NOMATCH);
1737             }
1738           GETCHARINC(d, Feptr);
1739           if (Lc == d) RRETURN(MATCH_NOMATCH);
1740           }
1741         }
1742       else
1743 #endif
1744       /* Not UTF mode */
1745         {
1746         for (i = 1; i <= Lmin; i++)
1747           {
1748           if (Feptr >= mb->end_subject)
1749             {
1750             SCHECK_PARTIAL();
1751             RRETURN(MATCH_NOMATCH);
1752             }
1753           if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1754           }
1755         }
1756 
1757       if (Lmin == Lmax) continue;
1758 
1759       if (reptype == REPTYPE_MIN)
1760         {
1761 #ifdef SUPPORT_UNICODE
1762         if (utf)
1763           {
1764           uint32_t d;
1765           for (;;)
1766             {
1767             RMATCH(Fecode, RM206);
1768             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1769             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1770             if (Feptr >= mb->end_subject)
1771               {
1772               SCHECK_PARTIAL();
1773               RRETURN(MATCH_NOMATCH);
1774               }
1775             GETCHARINC(d, Feptr);
1776             if (Lc == d) RRETURN(MATCH_NOMATCH);
1777             }
1778           }
1779         else
1780 #endif
1781         /* Not UTF mode */
1782           {
1783           for (;;)
1784             {
1785             RMATCH(Fecode, RM31);
1786             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1787             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1788             if (Feptr >= mb->end_subject)
1789               {
1790               SCHECK_PARTIAL();
1791               RRETURN(MATCH_NOMATCH);
1792               }
1793             if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1794             }
1795           }
1796         /* Control never gets here */
1797         }
1798 
1799       /* Maximize case */
1800 
1801       else
1802         {
1803         Lstart_eptr = Feptr;
1804 
1805 #ifdef SUPPORT_UNICODE
1806         if (utf)
1807           {
1808           uint32_t d;
1809           for (i = Lmin; i < Lmax; i++)
1810             {
1811             int len = 1;
1812             if (Feptr >= mb->end_subject)
1813               {
1814               SCHECK_PARTIAL();
1815               break;
1816               }
1817             GETCHARLEN(d, Feptr, len);
1818             if (Lc == d) break;
1819             Feptr += len;
1820             }
1821 
1822           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1823           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1824           go too far. */
1825 
1826           if (reptype != REPTYPE_POS) for(;;)
1827             {
1828             if (Feptr <= Lstart_eptr) break;
1829             RMATCH(Fecode, RM207);
1830             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1831             Feptr--;
1832             BACKCHAR(Feptr);
1833             }
1834           }
1835         else
1836 #endif
1837         /* Not UTF mode */
1838           {
1839           for (i = Lmin; i < Lmax; i++)
1840             {
1841             if (Feptr >= mb->end_subject)
1842               {
1843               SCHECK_PARTIAL();
1844               break;
1845               }
1846             if (Lc == *Feptr) break;
1847             Feptr++;
1848             }
1849           if (reptype != REPTYPE_POS) for (;;)
1850             {
1851             if (Feptr == Lstart_eptr) break;
1852             RMATCH(Fecode, RM32);
1853             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1854             Feptr--;
1855             }
1856           }
1857         }
1858       }
1859     break;
1860 
1861 #undef Lstart_eptr
1862 #undef Lmin
1863 #undef Lmax
1864 #undef Lc
1865 #undef Loc
1866 
1867 
1868     /* ===================================================================== */
1869     /* Match a bit-mapped character class, possibly repeatedly. These opcodes
1870     are used when all the characters in the class have values in the range
1871     0-255, and either the matching is caseful, or the characters are in the
1872     range 0-127 when UTF processing is enabled. The only difference between
1873     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1874     encountered. */
1875 
1876 #define Lmin               F->temp_32[0]
1877 #define Lmax               F->temp_32[1]
1878 #define Lstart_eptr        F->temp_sptr[0]
1879 #define Lbyte_map_address  F->temp_sptr[1]
1880 #define Lbyte_map          ((unsigned char *)Lbyte_map_address)
1881 
1882     case OP_NCLASS:
1883     case OP_CLASS:
1884       {
1885       Lbyte_map_address = Fecode + 1;           /* Save for matching */
1886       Fecode += 1 + (32 / sizeof(PCRE2_UCHAR)); /* Advance past the item */
1887 
1888       /* Look past the end of the item to see if there is repeat information
1889       following. Then obey similar code to character type repeats. */
1890 
1891       switch (*Fecode)
1892         {
1893         case OP_CRSTAR:
1894         case OP_CRMINSTAR:
1895         case OP_CRPLUS:
1896         case OP_CRMINPLUS:
1897         case OP_CRQUERY:
1898         case OP_CRMINQUERY:
1899         case OP_CRPOSSTAR:
1900         case OP_CRPOSPLUS:
1901         case OP_CRPOSQUERY:
1902         fc = *Fecode++ - OP_CRSTAR;
1903         Lmin = rep_min[fc];
1904         Lmax = rep_max[fc];
1905         reptype = rep_typ[fc];
1906         break;
1907 
1908         case OP_CRRANGE:
1909         case OP_CRMINRANGE:
1910         case OP_CRPOSRANGE:
1911         Lmin = GET2(Fecode, 1);
1912         Lmax = GET2(Fecode, 1 + IMM2_SIZE);
1913         if (Lmax == 0) Lmax = UINT32_MAX;       /* Max 0 => infinity */
1914         reptype = rep_typ[*Fecode - OP_CRSTAR];
1915         Fecode += 1 + 2 * IMM2_SIZE;
1916         break;
1917 
1918         default:               /* No repeat follows */
1919         Lmin = Lmax = 1;
1920         break;
1921         }
1922 
1923       /* First, ensure the minimum number of matches are present. */
1924 
1925 #ifdef SUPPORT_UNICODE
1926       if (utf)
1927         {
1928         for (i = 1; i <= Lmin; i++)
1929           {
1930           if (Feptr >= mb->end_subject)
1931             {
1932             SCHECK_PARTIAL();
1933             RRETURN(MATCH_NOMATCH);
1934             }
1935           GETCHARINC(fc, Feptr);
1936           if (fc > 255)
1937             {
1938             if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1939             }
1940           else
1941             if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1942           }
1943         }
1944       else
1945 #endif
1946       /* Not UTF mode */
1947         {
1948         for (i = 1; i <= Lmin; i++)
1949           {
1950           if (Feptr >= mb->end_subject)
1951             {
1952             SCHECK_PARTIAL();
1953             RRETURN(MATCH_NOMATCH);
1954             }
1955           fc = *Feptr++;
1956 #if PCRE2_CODE_UNIT_WIDTH != 8
1957           if (fc > 255)
1958             {
1959             if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1960             }
1961           else
1962 #endif
1963           if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1964           }
1965         }
1966 
1967       /* If Lmax == Lmin we are done. Continue with main loop. */
1968 
1969       if (Lmin == Lmax) continue;
1970 
1971       /* If minimizing, keep testing the rest of the expression and advancing
1972       the pointer while it matches the class. */
1973 
1974       if (reptype == REPTYPE_MIN)
1975         {
1976 #ifdef SUPPORT_UNICODE
1977         if (utf)
1978           {
1979           for (;;)
1980             {
1981             RMATCH(Fecode, RM200);
1982             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1983             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1984             if (Feptr >= mb->end_subject)
1985               {
1986               SCHECK_PARTIAL();
1987               RRETURN(MATCH_NOMATCH);
1988               }
1989             GETCHARINC(fc, Feptr);
1990             if (fc > 255)
1991               {
1992               if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1993               }
1994             else
1995               if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1996             }
1997           }
1998         else
1999 #endif
2000         /* Not UTF mode */
2001           {
2002           for (;;)
2003             {
2004             RMATCH(Fecode, RM23);
2005             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2006             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2007             if (Feptr >= mb->end_subject)
2008               {
2009               SCHECK_PARTIAL();
2010               RRETURN(MATCH_NOMATCH);
2011               }
2012             fc = *Feptr++;
2013 #if PCRE2_CODE_UNIT_WIDTH != 8
2014             if (fc > 255)
2015               {
2016               if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2017               }
2018             else
2019 #endif
2020             if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2021             }
2022           }
2023         /* Control never gets here */
2024         }
2025 
2026       /* If maximizing, find the longest possible run, then work backwards. */
2027 
2028       else
2029         {
2030         Lstart_eptr = Feptr;
2031 
2032 #ifdef SUPPORT_UNICODE
2033         if (utf)
2034           {
2035           for (i = Lmin; i < Lmax; i++)
2036             {
2037             int len = 1;
2038             if (Feptr >= mb->end_subject)
2039               {
2040               SCHECK_PARTIAL();
2041               break;
2042               }
2043             GETCHARLEN(fc, Feptr, len);
2044             if (fc > 255)
2045               {
2046               if (Fop == OP_CLASS) break;
2047               }
2048             else
2049               if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2050             Feptr += len;
2051             }
2052 
2053           if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2054 
2055           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2056           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2057           go too far. */
2058 
2059           for (;;)
2060             {
2061             RMATCH(Fecode, RM201);
2062             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2063             if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2064             BACKCHAR(Feptr);
2065             }
2066           }
2067         else
2068 #endif
2069           /* Not UTF mode */
2070           {
2071           for (i = Lmin; i < Lmax; i++)
2072             {
2073             if (Feptr >= mb->end_subject)
2074               {
2075               SCHECK_PARTIAL();
2076               break;
2077               }
2078             fc = *Feptr;
2079 #if PCRE2_CODE_UNIT_WIDTH != 8
2080             if (fc > 255)
2081               {
2082               if (Fop == OP_CLASS) break;
2083               }
2084             else
2085 #endif
2086             if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2087             Feptr++;
2088             }
2089 
2090           if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2091 
2092           while (Feptr >= Lstart_eptr)
2093             {
2094             RMATCH(Fecode, RM24);
2095             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2096             Feptr--;
2097             }
2098           }
2099 
2100         RRETURN(MATCH_NOMATCH);
2101         }
2102       }
2103     /* Control never gets here */
2104 
2105 #undef Lbyte_map_address
2106 #undef Lbyte_map
2107 #undef Lstart_eptr
2108 #undef Lmin
2109 #undef Lmax
2110 
2111 
2112     /* ===================================================================== */
2113     /* Match an extended character class. In the 8-bit library, this opcode is
2114     encountered only when UTF-8 mode mode is supported. In the 16-bit and
2115     32-bit libraries, codepoints greater than 255 may be encountered even when
2116     UTF is not supported. */
2117 
2118 #define Lstart_eptr  F->temp_sptr[0]
2119 #define Lxclass_data F->temp_sptr[1]
2120 #define Lmin         F->temp_32[0]
2121 #define Lmax         F->temp_32[1]
2122 
2123 #ifdef SUPPORT_WIDE_CHARS
2124     case OP_XCLASS:
2125       {
2126       Lxclass_data = Fecode + 1 + LINK_SIZE;  /* Save for matching */
2127       Fecode += GET(Fecode, 1);               /* Advance past the item */
2128 
2129       switch (*Fecode)
2130         {
2131         case OP_CRSTAR:
2132         case OP_CRMINSTAR:
2133         case OP_CRPLUS:
2134         case OP_CRMINPLUS:
2135         case OP_CRQUERY:
2136         case OP_CRMINQUERY:
2137         case OP_CRPOSSTAR:
2138         case OP_CRPOSPLUS:
2139         case OP_CRPOSQUERY:
2140         fc = *Fecode++ - OP_CRSTAR;
2141         Lmin = rep_min[fc];
2142         Lmax = rep_max[fc];
2143         reptype = rep_typ[fc];
2144         break;
2145 
2146         case OP_CRRANGE:
2147         case OP_CRMINRANGE:
2148         case OP_CRPOSRANGE:
2149         Lmin = GET2(Fecode, 1);
2150         Lmax = GET2(Fecode, 1 + IMM2_SIZE);
2151         if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
2152         reptype = rep_typ[*Fecode - OP_CRSTAR];
2153         Fecode += 1 + 2 * IMM2_SIZE;
2154         break;
2155 
2156         default:               /* No repeat follows */
2157         Lmin = Lmax = 1;
2158         break;
2159         }
2160 
2161       /* First, ensure the minimum number of matches are present. */
2162 
2163       for (i = 1; i <= Lmin; i++)
2164         {
2165         if (Feptr >= mb->end_subject)
2166           {
2167           SCHECK_PARTIAL();
2168           RRETURN(MATCH_NOMATCH);
2169           }
2170         GETCHARINCTEST(fc, Feptr);
2171         if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
2172         }
2173 
2174       /* If Lmax == Lmin we can just continue with the main loop. */
2175 
2176       if (Lmin == Lmax) continue;
2177 
2178       /* If minimizing, keep testing the rest of the expression and advancing
2179       the pointer while it matches the class. */
2180 
2181       if (reptype == REPTYPE_MIN)
2182         {
2183         for (;;)
2184           {
2185           RMATCH(Fecode, RM100);
2186           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2187           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2188           if (Feptr >= mb->end_subject)
2189             {
2190             SCHECK_PARTIAL();
2191             RRETURN(MATCH_NOMATCH);
2192             }
2193           GETCHARINCTEST(fc, Feptr);
2194           if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
2195           }
2196         /* Control never gets here */
2197         }
2198 
2199       /* If maximizing, find the longest possible run, then work backwards. */
2200 
2201       else
2202         {
2203         Lstart_eptr = Feptr;
2204         for (i = Lmin; i < Lmax; i++)
2205           {
2206           int len = 1;
2207           if (Feptr >= mb->end_subject)
2208             {
2209             SCHECK_PARTIAL();
2210             break;
2211             }
2212 #ifdef SUPPORT_UNICODE
2213           GETCHARLENTEST(fc, Feptr, len);
2214 #else
2215           fc = *Feptr;
2216 #endif
2217           if (!PRIV(xclass)(fc, Lxclass_data, utf)) break;
2218           Feptr += len;
2219           }
2220 
2221         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2222 
2223         /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2224         Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2225         go too far. */
2226 
2227         for(;;)
2228           {
2229           RMATCH(Fecode, RM101);
2230           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2231           if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2232 #ifdef SUPPORT_UNICODE
2233           if (utf) BACKCHAR(Feptr);
2234 #endif
2235           }
2236         RRETURN(MATCH_NOMATCH);
2237         }
2238 
2239       /* Control never gets here */
2240       }
2241 #endif  /* SUPPORT_WIDE_CHARS: end of XCLASS */
2242 
2243 #undef Lstart_eptr
2244 #undef Lxclass_data
2245 #undef Lmin
2246 #undef Lmax
2247 
2248 
2249     /* ===================================================================== */
2250     /* Match various character types when PCRE2_UCP is not set. These opcodes
2251     are not generated when PCRE2_UCP is set - instead appropriate property
2252     tests are compiled. */
2253 
2254     case OP_NOT_DIGIT:
2255     if (Feptr >= mb->end_subject)
2256       {
2257       SCHECK_PARTIAL();
2258       RRETURN(MATCH_NOMATCH);
2259       }
2260     GETCHARINCTEST(fc, Feptr);
2261     if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
2262       RRETURN(MATCH_NOMATCH);
2263     Fecode++;
2264     break;
2265 
2266     case OP_DIGIT:
2267     if (Feptr >= mb->end_subject)
2268       {
2269       SCHECK_PARTIAL();
2270       RRETURN(MATCH_NOMATCH);
2271       }
2272     GETCHARINCTEST(fc, Feptr);
2273     if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
2274       RRETURN(MATCH_NOMATCH);
2275     Fecode++;
2276     break;
2277 
2278     case OP_NOT_WHITESPACE:
2279     if (Feptr >= mb->end_subject)
2280       {
2281       SCHECK_PARTIAL();
2282       RRETURN(MATCH_NOMATCH);
2283       }
2284     GETCHARINCTEST(fc, Feptr);
2285     if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
2286       RRETURN(MATCH_NOMATCH);
2287     Fecode++;
2288     break;
2289 
2290     case OP_WHITESPACE:
2291     if (Feptr >= mb->end_subject)
2292       {
2293       SCHECK_PARTIAL();
2294       RRETURN(MATCH_NOMATCH);
2295       }
2296     GETCHARINCTEST(fc, Feptr);
2297     if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
2298       RRETURN(MATCH_NOMATCH);
2299     Fecode++;
2300     break;
2301 
2302     case OP_NOT_WORDCHAR:
2303     if (Feptr >= mb->end_subject)
2304       {
2305       SCHECK_PARTIAL();
2306       RRETURN(MATCH_NOMATCH);
2307       }
2308     GETCHARINCTEST(fc, Feptr);
2309     if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
2310       RRETURN(MATCH_NOMATCH);
2311     Fecode++;
2312     break;
2313 
2314     case OP_WORDCHAR:
2315     if (Feptr >= mb->end_subject)
2316       {
2317       SCHECK_PARTIAL();
2318       RRETURN(MATCH_NOMATCH);
2319       }
2320     GETCHARINCTEST(fc, Feptr);
2321     if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
2322       RRETURN(MATCH_NOMATCH);
2323     Fecode++;
2324     break;
2325 
2326     case OP_ANYNL:
2327     if (Feptr >= mb->end_subject)
2328       {
2329       SCHECK_PARTIAL();
2330       RRETURN(MATCH_NOMATCH);
2331       }
2332     GETCHARINCTEST(fc, Feptr);
2333     switch(fc)
2334       {
2335       default: RRETURN(MATCH_NOMATCH);
2336 
2337       case CHAR_CR:
2338       if (Feptr >= mb->end_subject)
2339         {
2340         SCHECK_PARTIAL();
2341         }
2342       else if (UCHAR21TEST(Feptr) == CHAR_LF) Feptr++;
2343       break;
2344 
2345       case CHAR_LF:
2346       break;
2347 
2348       case CHAR_VT:
2349       case CHAR_FF:
2350       case CHAR_NEL:
2351 #ifndef EBCDIC
2352       case 0x2028:
2353       case 0x2029:
2354 #endif  /* Not EBCDIC */
2355       if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
2356       break;
2357       }
2358     Fecode++;
2359     break;
2360 
2361     case OP_NOT_HSPACE:
2362     if (Feptr >= mb->end_subject)
2363       {
2364       SCHECK_PARTIAL();
2365       RRETURN(MATCH_NOMATCH);
2366       }
2367     GETCHARINCTEST(fc, Feptr);
2368     switch(fc)
2369       {
2370       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2371       default: break;
2372       }
2373     Fecode++;
2374     break;
2375 
2376     case OP_HSPACE:
2377     if (Feptr >= mb->end_subject)
2378       {
2379       SCHECK_PARTIAL();
2380       RRETURN(MATCH_NOMATCH);
2381       }
2382     GETCHARINCTEST(fc, Feptr);
2383     switch(fc)
2384       {
2385       HSPACE_CASES: break;  /* Byte and multibyte cases */
2386       default: RRETURN(MATCH_NOMATCH);
2387       }
2388     Fecode++;
2389     break;
2390 
2391     case OP_NOT_VSPACE:
2392     if (Feptr >= mb->end_subject)
2393       {
2394       SCHECK_PARTIAL();
2395       RRETURN(MATCH_NOMATCH);
2396       }
2397     GETCHARINCTEST(fc, Feptr);
2398     switch(fc)
2399       {
2400       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2401       default: break;
2402       }
2403     Fecode++;
2404     break;
2405 
2406     case OP_VSPACE:
2407     if (Feptr >= mb->end_subject)
2408       {
2409       SCHECK_PARTIAL();
2410       RRETURN(MATCH_NOMATCH);
2411       }
2412     GETCHARINCTEST(fc, Feptr);
2413     switch(fc)
2414       {
2415       VSPACE_CASES: break;
2416       default: RRETURN(MATCH_NOMATCH);
2417       }
2418     Fecode++;
2419     break;
2420 
2421 
2422 #ifdef SUPPORT_UNICODE
2423 
2424     /* ===================================================================== */
2425     /* Check the next character by Unicode property. We will get here only
2426     if the support is in the binary; otherwise a compile-time error occurs. */
2427 
2428     case OP_PROP:
2429     case OP_NOTPROP:
2430     if (Feptr >= mb->end_subject)
2431       {
2432       SCHECK_PARTIAL();
2433       RRETURN(MATCH_NOMATCH);
2434       }
2435     GETCHARINCTEST(fc, Feptr);
2436       {
2437       const uint32_t *cp;
2438       const ucd_record *prop = GET_UCD(fc);
2439       BOOL notmatch = Fop == OP_NOTPROP;
2440 
2441       switch(Fecode[1])
2442         {
2443         case PT_ANY:
2444         if (notmatch) RRETURN(MATCH_NOMATCH);
2445         break;
2446 
2447         case PT_LAMP:
2448         if ((prop->chartype == ucp_Lu ||
2449              prop->chartype == ucp_Ll ||
2450              prop->chartype == ucp_Lt) == notmatch)
2451           RRETURN(MATCH_NOMATCH);
2452         break;
2453 
2454         case PT_GC:
2455         if ((Fecode[2] == PRIV(ucp_gentype)[prop->chartype]) == notmatch)
2456           RRETURN(MATCH_NOMATCH);
2457         break;
2458 
2459         case PT_PC:
2460         if ((Fecode[2] == prop->chartype) == notmatch)
2461           RRETURN(MATCH_NOMATCH);
2462         break;
2463 
2464         case PT_SC:
2465         if ((Fecode[2] == prop->script) == notmatch)
2466           RRETURN(MATCH_NOMATCH);
2467         break;
2468 
2469         case PT_SCX:
2470           {
2471           BOOL ok = (Fecode[2] == prop->script ||
2472                      MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Fecode[2]) != 0);
2473           if (ok == notmatch) RRETURN(MATCH_NOMATCH);
2474           }
2475         break;
2476 
2477         /* These are specials */
2478 
2479         case PT_ALNUM:
2480         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2481              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == notmatch)
2482           RRETURN(MATCH_NOMATCH);
2483         break;
2484 
2485         /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2486         which means that Perl space and POSIX space are now identical. PCRE
2487         was changed at release 8.34. */
2488 
2489         case PT_SPACE:    /* Perl space */
2490         case PT_PXSPACE:  /* POSIX space */
2491         switch(fc)
2492           {
2493           HSPACE_CASES:
2494           VSPACE_CASES:
2495           if (notmatch) RRETURN(MATCH_NOMATCH);
2496           break;
2497 
2498           default:
2499           if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == notmatch)
2500             RRETURN(MATCH_NOMATCH);
2501           break;
2502           }
2503         break;
2504 
2505         case PT_WORD:
2506         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2507              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2508              fc == CHAR_UNDERSCORE) == notmatch)
2509           RRETURN(MATCH_NOMATCH);
2510         break;
2511 
2512         case PT_CLIST:
2513         cp = PRIV(ucd_caseless_sets) + Fecode[2];
2514         for (;;)
2515           {
2516           if (fc < *cp)
2517             { if (notmatch) break; else { RRETURN(MATCH_NOMATCH); } }
2518           if (fc == *cp++)
2519             { if (notmatch) { RRETURN(MATCH_NOMATCH); } else break; }
2520           }
2521         break;
2522 
2523         case PT_UCNC:
2524         if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2525              fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2526              fc >= 0xe000) == notmatch)
2527           RRETURN(MATCH_NOMATCH);
2528         break;
2529 
2530         case PT_BIDICL:
2531         if ((UCD_BIDICLASS_PROP(prop) == Fecode[2]) == notmatch)
2532           RRETURN(MATCH_NOMATCH);
2533         break;
2534 
2535         case PT_BOOL:
2536           {
2537           BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) +
2538             UCD_BPROPS_PROP(prop), Fecode[2]) != 0;
2539           if (ok == notmatch) RRETURN(MATCH_NOMATCH);
2540           }
2541         break;
2542 
2543         /* This should never occur */
2544 
2545         default:
2546         return PCRE2_ERROR_INTERNAL;
2547         }
2548 
2549       Fecode += 3;
2550       }
2551     break;
2552 
2553 
2554     /* ===================================================================== */
2555     /* Match an extended Unicode sequence. We will get here only if the support
2556     is in the binary; otherwise a compile-time error occurs. */
2557 
2558     case OP_EXTUNI:
2559     if (Feptr >= mb->end_subject)
2560       {
2561       SCHECK_PARTIAL();
2562       RRETURN(MATCH_NOMATCH);
2563       }
2564     else
2565       {
2566       GETCHARINCTEST(fc, Feptr);
2567       Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf,
2568         NULL);
2569       }
2570     CHECK_PARTIAL();
2571     Fecode++;
2572     break;
2573 
2574 #endif  /* SUPPORT_UNICODE */
2575 
2576 
2577     /* ===================================================================== */
2578     /* Match a single character type repeatedly. Note that the property type
2579     does not need to be in a stack frame as it is not used within an RMATCH()
2580     loop. */
2581 
2582 #define Lstart_eptr  F->temp_sptr[0]
2583 #define Lmin         F->temp_32[0]
2584 #define Lmax         F->temp_32[1]
2585 #define Lctype       F->temp_32[2]
2586 #define Lpropvalue   F->temp_32[3]
2587 
2588     case OP_TYPEEXACT:
2589     Lmin = Lmax = GET2(Fecode, 1);
2590     Fecode += 1 + IMM2_SIZE;
2591     goto REPEATTYPE;
2592 
2593     case OP_TYPEUPTO:
2594     case OP_TYPEMINUPTO:
2595     Lmin = 0;
2596     Lmax = GET2(Fecode, 1);
2597     reptype = (*Fecode == OP_TYPEMINUPTO)? REPTYPE_MIN : REPTYPE_MAX;
2598     Fecode += 1 + IMM2_SIZE;
2599     goto REPEATTYPE;
2600 
2601     case OP_TYPEPOSSTAR:
2602     reptype = REPTYPE_POS;
2603     Lmin = 0;
2604     Lmax = UINT32_MAX;
2605     Fecode++;
2606     goto REPEATTYPE;
2607 
2608     case OP_TYPEPOSPLUS:
2609     reptype = REPTYPE_POS;
2610     Lmin = 1;
2611     Lmax = UINT32_MAX;
2612     Fecode++;
2613     goto REPEATTYPE;
2614 
2615     case OP_TYPEPOSQUERY:
2616     reptype = REPTYPE_POS;
2617     Lmin = 0;
2618     Lmax = 1;
2619     Fecode++;
2620     goto REPEATTYPE;
2621 
2622     case OP_TYPEPOSUPTO:
2623     reptype = REPTYPE_POS;
2624     Lmin = 0;
2625     Lmax = GET2(Fecode, 1);
2626     Fecode += 1 + IMM2_SIZE;
2627     goto REPEATTYPE;
2628 
2629     case OP_TYPESTAR:
2630     case OP_TYPEMINSTAR:
2631     case OP_TYPEPLUS:
2632     case OP_TYPEMINPLUS:
2633     case OP_TYPEQUERY:
2634     case OP_TYPEMINQUERY:
2635     fc = *Fecode++ - OP_TYPESTAR;
2636     Lmin = rep_min[fc];
2637     Lmax = rep_max[fc];
2638     reptype = rep_typ[fc];
2639 
2640     /* Common code for all repeated character type matches. */
2641 
2642     REPEATTYPE:
2643     Lctype = *Fecode++;      /* Code for the character type */
2644 
2645 #ifdef SUPPORT_UNICODE
2646     if (Lctype == OP_PROP || Lctype == OP_NOTPROP)
2647       {
2648       proptype = *Fecode++;
2649       Lpropvalue = *Fecode++;
2650       }
2651     else proptype = -1;
2652 #endif
2653 
2654     /* First, ensure the minimum number of matches are present. Use inline
2655     code for maximizing the speed, and do the type test once at the start
2656     (i.e. keep it out of the loops). As there are no calls to RMATCH in the
2657     loops, we can use an ordinary variable for "notmatch". The code for UTF
2658     mode is separated out for tidiness, except for Unicode property tests. */
2659 
2660     if (Lmin > 0)
2661       {
2662 #ifdef SUPPORT_UNICODE
2663       if (proptype >= 0)  /* Property tests in all modes */
2664         {
2665         BOOL notmatch = Lctype == OP_NOTPROP;
2666         switch(proptype)
2667           {
2668           case PT_ANY:
2669           if (notmatch) RRETURN(MATCH_NOMATCH);
2670           for (i = 1; i <= Lmin; i++)
2671             {
2672             if (Feptr >= mb->end_subject)
2673               {
2674               SCHECK_PARTIAL();
2675               RRETURN(MATCH_NOMATCH);
2676               }
2677             GETCHARINCTEST(fc, Feptr);
2678             }
2679           break;
2680 
2681           case PT_LAMP:
2682           for (i = 1; i <= Lmin; i++)
2683             {
2684             int chartype;
2685             if (Feptr >= mb->end_subject)
2686               {
2687               SCHECK_PARTIAL();
2688               RRETURN(MATCH_NOMATCH);
2689               }
2690             GETCHARINCTEST(fc, Feptr);
2691             chartype = UCD_CHARTYPE(fc);
2692             if ((chartype == ucp_Lu ||
2693                  chartype == ucp_Ll ||
2694                  chartype == ucp_Lt) == notmatch)
2695               RRETURN(MATCH_NOMATCH);
2696             }
2697           break;
2698 
2699           case PT_GC:
2700           for (i = 1; i <= Lmin; i++)
2701             {
2702             if (Feptr >= mb->end_subject)
2703               {
2704               SCHECK_PARTIAL();
2705               RRETURN(MATCH_NOMATCH);
2706               }
2707             GETCHARINCTEST(fc, Feptr);
2708             if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch)
2709               RRETURN(MATCH_NOMATCH);
2710             }
2711           break;
2712 
2713           case PT_PC:
2714           for (i = 1; i <= Lmin; i++)
2715             {
2716             if (Feptr >= mb->end_subject)
2717               {
2718               SCHECK_PARTIAL();
2719               RRETURN(MATCH_NOMATCH);
2720               }
2721             GETCHARINCTEST(fc, Feptr);
2722             if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch)
2723               RRETURN(MATCH_NOMATCH);
2724             }
2725           break;
2726 
2727           case PT_SC:
2728           for (i = 1; i <= Lmin; i++)
2729             {
2730             if (Feptr >= mb->end_subject)
2731               {
2732               SCHECK_PARTIAL();
2733               RRETURN(MATCH_NOMATCH);
2734               }
2735             GETCHARINCTEST(fc, Feptr);
2736             if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch)
2737               RRETURN(MATCH_NOMATCH);
2738             }
2739           break;
2740 
2741           case PT_SCX:
2742           for (i = 1; i <= Lmin; i++)
2743             {
2744             BOOL ok;
2745             const ucd_record *prop;
2746             if (Feptr >= mb->end_subject)
2747               {
2748               SCHECK_PARTIAL();
2749               RRETURN(MATCH_NOMATCH);
2750               }
2751             GETCHARINCTEST(fc, Feptr);
2752             prop = GET_UCD(fc);
2753             ok = (prop->script == Lpropvalue ||
2754                   MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
2755             if (ok == notmatch)
2756               RRETURN(MATCH_NOMATCH);
2757             }
2758           break;
2759 
2760           case PT_ALNUM:
2761           for (i = 1; i <= Lmin; i++)
2762             {
2763             int category;
2764             if (Feptr >= mb->end_subject)
2765               {
2766               SCHECK_PARTIAL();
2767               RRETURN(MATCH_NOMATCH);
2768               }
2769             GETCHARINCTEST(fc, Feptr);
2770             category = UCD_CATEGORY(fc);
2771             if ((category == ucp_L || category == ucp_N) == notmatch)
2772               RRETURN(MATCH_NOMATCH);
2773             }
2774           break;
2775 
2776           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2777           which means that Perl space and POSIX space are now identical. PCRE
2778           was changed at release 8.34. */
2779 
2780           case PT_SPACE:    /* Perl space */
2781           case PT_PXSPACE:  /* POSIX space */
2782           for (i = 1; i <= Lmin; i++)
2783             {
2784             if (Feptr >= mb->end_subject)
2785               {
2786               SCHECK_PARTIAL();
2787               RRETURN(MATCH_NOMATCH);
2788               }
2789             GETCHARINCTEST(fc, Feptr);
2790             switch(fc)
2791               {
2792               HSPACE_CASES:
2793               VSPACE_CASES:
2794               if (notmatch) RRETURN(MATCH_NOMATCH);
2795               break;
2796 
2797               default:
2798               if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
2799                 RRETURN(MATCH_NOMATCH);
2800               break;
2801               }
2802             }
2803           break;
2804 
2805           case PT_WORD:
2806           for (i = 1; i <= Lmin; i++)
2807             {
2808             int category;
2809             if (Feptr >= mb->end_subject)
2810               {
2811               SCHECK_PARTIAL();
2812               RRETURN(MATCH_NOMATCH);
2813               }
2814             GETCHARINCTEST(fc, Feptr);
2815             category = UCD_CATEGORY(fc);
2816             if ((category == ucp_L || category == ucp_N ||
2817                 fc == CHAR_UNDERSCORE) == notmatch)
2818               RRETURN(MATCH_NOMATCH);
2819             }
2820           break;
2821 
2822           case PT_CLIST:
2823           for (i = 1; i <= Lmin; i++)
2824             {
2825             const uint32_t *cp;
2826             if (Feptr >= mb->end_subject)
2827               {
2828               SCHECK_PARTIAL();
2829               RRETURN(MATCH_NOMATCH);
2830               }
2831             GETCHARINCTEST(fc, Feptr);
2832             cp = PRIV(ucd_caseless_sets) + Lpropvalue;
2833             for (;;)
2834               {
2835               if (fc < *cp)
2836                 {
2837                 if (notmatch) break;
2838                 RRETURN(MATCH_NOMATCH);
2839                 }
2840               if (fc == *cp++)
2841                 {
2842                 if (notmatch) RRETURN(MATCH_NOMATCH);
2843                 break;
2844                 }
2845               }
2846             }
2847           break;
2848 
2849           case PT_UCNC:
2850           for (i = 1; i <= Lmin; i++)
2851             {
2852             if (Feptr >= mb->end_subject)
2853               {
2854               SCHECK_PARTIAL();
2855               RRETURN(MATCH_NOMATCH);
2856               }
2857             GETCHARINCTEST(fc, Feptr);
2858             if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2859                  fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2860                  fc >= 0xe000) == notmatch)
2861               RRETURN(MATCH_NOMATCH);
2862             }
2863           break;
2864 
2865           case PT_BIDICL:
2866           for (i = 1; i <= Lmin; i++)
2867             {
2868             if (Feptr >= mb->end_subject)
2869               {
2870               SCHECK_PARTIAL();
2871               RRETURN(MATCH_NOMATCH);
2872               }
2873             GETCHARINCTEST(fc, Feptr);
2874             if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch)
2875               RRETURN(MATCH_NOMATCH);
2876             }
2877           break;
2878 
2879           case PT_BOOL:
2880           for (i = 1; i <= Lmin; i++)
2881             {
2882             BOOL ok;
2883             const ucd_record *prop;
2884             if (Feptr >= mb->end_subject)
2885               {
2886               SCHECK_PARTIAL();
2887               RRETURN(MATCH_NOMATCH);
2888               }
2889             GETCHARINCTEST(fc, Feptr);
2890             prop = GET_UCD(fc);
2891             ok = MAPBIT(PRIV(ucd_boolprop_sets) +
2892               UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
2893             if (ok == notmatch)
2894               RRETURN(MATCH_NOMATCH);
2895             }
2896           break;
2897 
2898           /* This should not occur */
2899 
2900           default:
2901           return PCRE2_ERROR_INTERNAL;
2902           }
2903         }
2904 
2905       /* Match extended Unicode sequences. We will get here only if the
2906       support is in the binary; otherwise a compile-time error occurs. */
2907 
2908       else if (Lctype == OP_EXTUNI)
2909         {
2910         for (i = 1; i <= Lmin; i++)
2911           {
2912           if (Feptr >= mb->end_subject)
2913             {
2914             SCHECK_PARTIAL();
2915             RRETURN(MATCH_NOMATCH);
2916             }
2917           else
2918             {
2919             GETCHARINCTEST(fc, Feptr);
2920             Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject,
2921               mb->end_subject, utf, NULL);
2922             }
2923           CHECK_PARTIAL();
2924           }
2925         }
2926       else
2927 #endif     /* SUPPORT_UNICODE */
2928 
2929 /* Handle all other cases in UTF mode */
2930 
2931 #ifdef SUPPORT_UNICODE
2932       if (utf) switch(Lctype)
2933         {
2934         case OP_ANY:
2935         for (i = 1; i <= Lmin; i++)
2936           {
2937           if (Feptr >= mb->end_subject)
2938             {
2939             SCHECK_PARTIAL();
2940             RRETURN(MATCH_NOMATCH);
2941             }
2942           if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
2943           if (mb->partial != 0 &&
2944               Feptr + 1 >= mb->end_subject &&
2945               NLBLOCK->nltype == NLTYPE_FIXED &&
2946               NLBLOCK->nllen == 2 &&
2947               UCHAR21(Feptr) == NLBLOCK->nl[0])
2948             {
2949             mb->hitend = TRUE;
2950             if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
2951             }
2952           Feptr++;
2953           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
2954           }
2955         break;
2956 
2957         case OP_ALLANY:
2958         for (i = 1; i <= Lmin; i++)
2959           {
2960           if (Feptr >= mb->end_subject)
2961             {
2962             SCHECK_PARTIAL();
2963             RRETURN(MATCH_NOMATCH);
2964             }
2965           Feptr++;
2966           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
2967           }
2968         break;
2969 
2970         case OP_ANYBYTE:
2971         if (Feptr > mb->end_subject - Lmin) RRETURN(MATCH_NOMATCH);
2972         Feptr += Lmin;
2973         break;
2974 
2975         case OP_ANYNL:
2976         for (i = 1; i <= Lmin; i++)
2977           {
2978           if (Feptr >= mb->end_subject)
2979             {
2980             SCHECK_PARTIAL();
2981             RRETURN(MATCH_NOMATCH);
2982             }
2983           GETCHARINC(fc, Feptr);
2984           switch(fc)
2985             {
2986             default: RRETURN(MATCH_NOMATCH);
2987 
2988             case CHAR_CR:
2989             if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
2990             break;
2991 
2992             case CHAR_LF:
2993             break;
2994 
2995             case CHAR_VT:
2996             case CHAR_FF:
2997             case CHAR_NEL:
2998 #ifndef EBCDIC
2999             case 0x2028:
3000             case 0x2029:
3001 #endif  /* Not EBCDIC */
3002             if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3003             break;
3004             }
3005           }
3006         break;
3007 
3008         case OP_NOT_HSPACE:
3009         for (i = 1; i <= Lmin; i++)
3010           {
3011           if (Feptr >= mb->end_subject)
3012             {
3013             SCHECK_PARTIAL();
3014             RRETURN(MATCH_NOMATCH);
3015             }
3016           GETCHARINC(fc, Feptr);
3017           switch(fc)
3018             {
3019             HSPACE_CASES: RRETURN(MATCH_NOMATCH);
3020             default: break;
3021             }
3022           }
3023         break;
3024 
3025         case OP_HSPACE:
3026         for (i = 1; i <= Lmin; i++)
3027           {
3028           if (Feptr >= mb->end_subject)
3029             {
3030             SCHECK_PARTIAL();
3031             RRETURN(MATCH_NOMATCH);
3032             }
3033           GETCHARINC(fc, Feptr);
3034           switch(fc)
3035             {
3036             HSPACE_CASES: break;
3037             default: RRETURN(MATCH_NOMATCH);
3038             }
3039           }
3040         break;
3041 
3042         case OP_NOT_VSPACE:
3043         for (i = 1; i <= Lmin; i++)
3044           {
3045           if (Feptr >= mb->end_subject)
3046             {
3047             SCHECK_PARTIAL();
3048             RRETURN(MATCH_NOMATCH);
3049             }
3050           GETCHARINC(fc, Feptr);
3051           switch(fc)
3052             {
3053             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
3054             default: break;
3055             }
3056           }
3057         break;
3058 
3059         case OP_VSPACE:
3060         for (i = 1; i <= Lmin; i++)
3061           {
3062           if (Feptr >= mb->end_subject)
3063             {
3064             SCHECK_PARTIAL();
3065             RRETURN(MATCH_NOMATCH);
3066             }
3067           GETCHARINC(fc, Feptr);
3068           switch(fc)
3069             {
3070             VSPACE_CASES: break;
3071             default: RRETURN(MATCH_NOMATCH);
3072             }
3073           }
3074         break;
3075 
3076         case OP_NOT_DIGIT:
3077         for (i = 1; i <= Lmin; i++)
3078           {
3079           if (Feptr >= mb->end_subject)
3080             {
3081             SCHECK_PARTIAL();
3082             RRETURN(MATCH_NOMATCH);
3083             }
3084           GETCHARINC(fc, Feptr);
3085           if (fc < 128 && (mb->ctypes[fc] & ctype_digit) != 0)
3086             RRETURN(MATCH_NOMATCH);
3087           }
3088         break;
3089 
3090         case OP_DIGIT:
3091         for (i = 1; i <= Lmin; i++)
3092           {
3093           uint32_t cc;
3094           if (Feptr >= mb->end_subject)
3095             {
3096             SCHECK_PARTIAL();
3097             RRETURN(MATCH_NOMATCH);
3098             }
3099           cc = UCHAR21(Feptr);
3100           if (cc >= 128 || (mb->ctypes[cc] & ctype_digit) == 0)
3101             RRETURN(MATCH_NOMATCH);
3102           Feptr++;
3103           /* No need to skip more code units - we know it has only one. */
3104           }
3105         break;
3106 
3107         case OP_NOT_WHITESPACE:
3108         for (i = 1; i <= Lmin; i++)
3109           {
3110           uint32_t cc;
3111           if (Feptr >= mb->end_subject)
3112             {
3113             SCHECK_PARTIAL();
3114             RRETURN(MATCH_NOMATCH);
3115             }
3116           cc = UCHAR21(Feptr);
3117           if (cc < 128 && (mb->ctypes[cc] & ctype_space) != 0)
3118             RRETURN(MATCH_NOMATCH);
3119           Feptr++;
3120           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3121           }
3122         break;
3123 
3124         case OP_WHITESPACE:
3125         for (i = 1; i <= Lmin; i++)
3126           {
3127           uint32_t cc;
3128           if (Feptr >= mb->end_subject)
3129             {
3130             SCHECK_PARTIAL();
3131             RRETURN(MATCH_NOMATCH);
3132             }
3133           cc = UCHAR21(Feptr);
3134           if (cc >= 128 || (mb->ctypes[cc] & ctype_space) == 0)
3135             RRETURN(MATCH_NOMATCH);
3136           Feptr++;
3137           /* No need to skip more code units - we know it has only one. */
3138           }
3139         break;
3140 
3141         case OP_NOT_WORDCHAR:
3142         for (i = 1; i <= Lmin; i++)
3143           {
3144           uint32_t cc;
3145           if (Feptr >= mb->end_subject)
3146             {
3147             SCHECK_PARTIAL();
3148             RRETURN(MATCH_NOMATCH);
3149             }
3150           cc = UCHAR21(Feptr);
3151           if (cc < 128 && (mb->ctypes[cc] & ctype_word) != 0)
3152             RRETURN(MATCH_NOMATCH);
3153           Feptr++;
3154           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3155           }
3156         break;
3157 
3158         case OP_WORDCHAR:
3159         for (i = 1; i <= Lmin; i++)
3160           {
3161           uint32_t cc;
3162           if (Feptr >= mb->end_subject)
3163             {
3164             SCHECK_PARTIAL();
3165             RRETURN(MATCH_NOMATCH);
3166             }
3167           cc = UCHAR21(Feptr);
3168           if (cc >= 128 || (mb->ctypes[cc] & ctype_word) == 0)
3169             RRETURN(MATCH_NOMATCH);
3170           Feptr++;
3171           /* No need to skip more code units - we know it has only one. */
3172           }
3173         break;
3174 
3175         default:
3176         return PCRE2_ERROR_INTERNAL;
3177         }  /* End switch(Lctype) */
3178 
3179       else
3180 #endif     /* SUPPORT_UNICODE */
3181 
3182       /* Code for the non-UTF case for minimum matching of operators other
3183       than OP_PROP and OP_NOTPROP. */
3184 
3185       switch(Lctype)
3186         {
3187         case OP_ANY:
3188         for (i = 1; i <= Lmin; i++)
3189           {
3190           if (Feptr >= mb->end_subject)
3191             {
3192             SCHECK_PARTIAL();
3193             RRETURN(MATCH_NOMATCH);
3194             }
3195           if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3196           if (mb->partial != 0 &&
3197               Feptr + 1 >= mb->end_subject &&
3198               NLBLOCK->nltype == NLTYPE_FIXED &&
3199               NLBLOCK->nllen == 2 &&
3200               *Feptr == NLBLOCK->nl[0])
3201             {
3202             mb->hitend = TRUE;
3203             if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3204             }
3205           Feptr++;
3206           }
3207         break;
3208 
3209         case OP_ALLANY:
3210         if (Feptr > mb->end_subject - Lmin)
3211           {
3212           SCHECK_PARTIAL();
3213           RRETURN(MATCH_NOMATCH);
3214           }
3215         Feptr += Lmin;
3216         break;
3217 
3218         /* This OP_ANYBYTE case will never be reached because \C gets turned
3219         into OP_ALLANY in non-UTF mode. Cut out the code so that coverage
3220         reports don't complain about it's never being used. */
3221 
3222 /*        case OP_ANYBYTE:
3223 *        if (Feptr > mb->end_subject - Lmin)
3224 *          {
3225 *          SCHECK_PARTIAL();
3226 *          RRETURN(MATCH_NOMATCH);
3227 *          }
3228 *        Feptr += Lmin;
3229 *        break;
3230 */
3231         case OP_ANYNL:
3232         for (i = 1; i <= Lmin; i++)
3233           {
3234           if (Feptr >= mb->end_subject)
3235             {
3236             SCHECK_PARTIAL();
3237             RRETURN(MATCH_NOMATCH);
3238             }
3239           switch(*Feptr++)
3240             {
3241             default: RRETURN(MATCH_NOMATCH);
3242 
3243             case CHAR_CR:
3244             if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
3245             break;
3246 
3247             case CHAR_LF:
3248             break;
3249 
3250             case CHAR_VT:
3251             case CHAR_FF:
3252             case CHAR_NEL:
3253 #if PCRE2_CODE_UNIT_WIDTH != 8
3254             case 0x2028:
3255             case 0x2029:
3256 #endif
3257             if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3258             break;
3259             }
3260           }
3261         break;
3262 
3263         case OP_NOT_HSPACE:
3264         for (i = 1; i <= Lmin; i++)
3265           {
3266           if (Feptr >= mb->end_subject)
3267             {
3268             SCHECK_PARTIAL();
3269             RRETURN(MATCH_NOMATCH);
3270             }
3271           switch(*Feptr++)
3272             {
3273             default: break;
3274             HSPACE_BYTE_CASES:
3275 #if PCRE2_CODE_UNIT_WIDTH != 8
3276             HSPACE_MULTIBYTE_CASES:
3277 #endif
3278             RRETURN(MATCH_NOMATCH);
3279             }
3280           }
3281         break;
3282 
3283         case OP_HSPACE:
3284         for (i = 1; i <= Lmin; i++)
3285           {
3286           if (Feptr >= mb->end_subject)
3287             {
3288             SCHECK_PARTIAL();
3289             RRETURN(MATCH_NOMATCH);
3290             }
3291           switch(*Feptr++)
3292             {
3293             default: RRETURN(MATCH_NOMATCH);
3294             HSPACE_BYTE_CASES:
3295 #if PCRE2_CODE_UNIT_WIDTH != 8
3296             HSPACE_MULTIBYTE_CASES:
3297 #endif
3298             break;
3299             }
3300           }
3301         break;
3302 
3303         case OP_NOT_VSPACE:
3304         for (i = 1; i <= Lmin; i++)
3305           {
3306           if (Feptr >= mb->end_subject)
3307             {
3308             SCHECK_PARTIAL();
3309             RRETURN(MATCH_NOMATCH);
3310             }
3311           switch(*Feptr++)
3312             {
3313             VSPACE_BYTE_CASES:
3314 #if PCRE2_CODE_UNIT_WIDTH != 8
3315             VSPACE_MULTIBYTE_CASES:
3316 #endif
3317             RRETURN(MATCH_NOMATCH);
3318             default: break;
3319             }
3320           }
3321         break;
3322 
3323         case OP_VSPACE:
3324         for (i = 1; i <= Lmin; i++)
3325           {
3326           if (Feptr >= mb->end_subject)
3327             {
3328             SCHECK_PARTIAL();
3329             RRETURN(MATCH_NOMATCH);
3330             }
3331           switch(*Feptr++)
3332             {
3333             default: RRETURN(MATCH_NOMATCH);
3334             VSPACE_BYTE_CASES:
3335 #if PCRE2_CODE_UNIT_WIDTH != 8
3336             VSPACE_MULTIBYTE_CASES:
3337 #endif
3338             break;
3339             }
3340           }
3341         break;
3342 
3343         case OP_NOT_DIGIT:
3344         for (i = 1; i <= Lmin; i++)
3345           {
3346           if (Feptr >= mb->end_subject)
3347             {
3348             SCHECK_PARTIAL();
3349             RRETURN(MATCH_NOMATCH);
3350             }
3351           if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
3352             RRETURN(MATCH_NOMATCH);
3353           Feptr++;
3354           }
3355         break;
3356 
3357         case OP_DIGIT:
3358         for (i = 1; i <= Lmin; i++)
3359           {
3360           if (Feptr >= mb->end_subject)
3361             {
3362             SCHECK_PARTIAL();
3363             RRETURN(MATCH_NOMATCH);
3364             }
3365           if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
3366             RRETURN(MATCH_NOMATCH);
3367           Feptr++;
3368           }
3369         break;
3370 
3371         case OP_NOT_WHITESPACE:
3372         for (i = 1; i <= Lmin; i++)
3373           {
3374           if (Feptr >= mb->end_subject)
3375             {
3376             SCHECK_PARTIAL();
3377             RRETURN(MATCH_NOMATCH);
3378             }
3379           if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
3380             RRETURN(MATCH_NOMATCH);
3381           Feptr++;
3382           }
3383         break;
3384 
3385         case OP_WHITESPACE:
3386         for (i = 1; i <= Lmin; i++)
3387           {
3388           if (Feptr >= mb->end_subject)
3389             {
3390             SCHECK_PARTIAL();
3391             RRETURN(MATCH_NOMATCH);
3392             }
3393           if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
3394             RRETURN(MATCH_NOMATCH);
3395           Feptr++;
3396           }
3397         break;
3398 
3399         case OP_NOT_WORDCHAR:
3400         for (i = 1; i <= Lmin; i++)
3401           {
3402           if (Feptr >= mb->end_subject)
3403             {
3404             SCHECK_PARTIAL();
3405             RRETURN(MATCH_NOMATCH);
3406             }
3407           if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
3408             RRETURN(MATCH_NOMATCH);
3409           Feptr++;
3410           }
3411         break;
3412 
3413         case OP_WORDCHAR:
3414         for (i = 1; i <= Lmin; i++)
3415           {
3416           if (Feptr >= mb->end_subject)
3417             {
3418             SCHECK_PARTIAL();
3419             RRETURN(MATCH_NOMATCH);
3420             }
3421           if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
3422             RRETURN(MATCH_NOMATCH);
3423           Feptr++;
3424           }
3425         break;
3426 
3427         default:
3428         return PCRE2_ERROR_INTERNAL;
3429         }
3430       }
3431 
3432     /* If Lmin = Lmax we are done. Continue with the main loop. */
3433 
3434     if (Lmin == Lmax) continue;
3435 
3436     /* If minimizing, we have to test the rest of the pattern before each
3437     subsequent match. This means we cannot use a local "notmatch" variable as
3438     in the other cases. As all 4 temporary 32-bit values in the frame are
3439     already in use, just test the type each time. */
3440 
3441     if (reptype == REPTYPE_MIN)
3442       {
3443 #ifdef SUPPORT_UNICODE
3444       if (proptype >= 0)
3445         {
3446         switch(proptype)
3447           {
3448           case PT_ANY:
3449           for (;;)
3450             {
3451             RMATCH(Fecode, RM208);
3452             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3453             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3454             if (Feptr >= mb->end_subject)
3455               {
3456               SCHECK_PARTIAL();
3457               RRETURN(MATCH_NOMATCH);
3458               }
3459             GETCHARINCTEST(fc, Feptr);
3460             if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3461             }
3462           /* Control never gets here */
3463 
3464           case PT_LAMP:
3465           for (;;)
3466             {
3467             int chartype;
3468             RMATCH(Fecode, RM209);
3469             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3470             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3471             if (Feptr >= mb->end_subject)
3472               {
3473               SCHECK_PARTIAL();
3474               RRETURN(MATCH_NOMATCH);
3475               }
3476             GETCHARINCTEST(fc, Feptr);
3477             chartype = UCD_CHARTYPE(fc);
3478             if ((chartype == ucp_Lu ||
3479                  chartype == ucp_Ll ||
3480                  chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
3481               RRETURN(MATCH_NOMATCH);
3482             }
3483           /* Control never gets here */
3484 
3485           case PT_GC:
3486           for (;;)
3487             {
3488             RMATCH(Fecode, RM210);
3489             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3490             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3491             if (Feptr >= mb->end_subject)
3492               {
3493               SCHECK_PARTIAL();
3494               RRETURN(MATCH_NOMATCH);
3495               }
3496             GETCHARINCTEST(fc, Feptr);
3497             if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3498               RRETURN(MATCH_NOMATCH);
3499             }
3500           /* Control never gets here */
3501 
3502           case PT_PC:
3503           for (;;)
3504             {
3505             RMATCH(Fecode, RM211);
3506             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3507             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3508             if (Feptr >= mb->end_subject)
3509               {
3510               SCHECK_PARTIAL();
3511               RRETURN(MATCH_NOMATCH);
3512               }
3513             GETCHARINCTEST(fc, Feptr);
3514             if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3515               RRETURN(MATCH_NOMATCH);
3516             }
3517           /* Control never gets here */
3518 
3519           case PT_SC:
3520           for (;;)
3521             {
3522             RMATCH(Fecode, RM212);
3523             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3524             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3525             if (Feptr >= mb->end_subject)
3526               {
3527               SCHECK_PARTIAL();
3528               RRETURN(MATCH_NOMATCH);
3529               }
3530             GETCHARINCTEST(fc, Feptr);
3531             if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3532               RRETURN(MATCH_NOMATCH);
3533             }
3534           /* Control never gets here */
3535 
3536           case PT_SCX:
3537           for (;;)
3538             {
3539             BOOL ok;
3540             const ucd_record *prop;
3541             RMATCH(Fecode, RM225);
3542             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3543             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3544             if (Feptr >= mb->end_subject)
3545               {
3546               SCHECK_PARTIAL();
3547               RRETURN(MATCH_NOMATCH);
3548               }
3549             GETCHARINCTEST(fc, Feptr);
3550             prop = GET_UCD(fc);
3551             ok = (prop->script == Lpropvalue
3552                   || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
3553             if (ok == (Lctype == OP_NOTPROP))
3554               RRETURN(MATCH_NOMATCH);
3555             }
3556           /* Control never gets here */
3557 
3558           case PT_ALNUM:
3559           for (;;)
3560             {
3561             int category;
3562             RMATCH(Fecode, RM213);
3563             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3564             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3565             if (Feptr >= mb->end_subject)
3566               {
3567               SCHECK_PARTIAL();
3568               RRETURN(MATCH_NOMATCH);
3569               }
3570             GETCHARINCTEST(fc, Feptr);
3571             category = UCD_CATEGORY(fc);
3572             if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP))
3573               RRETURN(MATCH_NOMATCH);
3574             }
3575           /* Control never gets here */
3576 
3577           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3578           which means that Perl space and POSIX space are now identical. PCRE
3579           was changed at release 8.34. */
3580 
3581           case PT_SPACE:    /* Perl space */
3582           case PT_PXSPACE:  /* POSIX space */
3583           for (;;)
3584             {
3585             RMATCH(Fecode, RM214);
3586             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3587             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3588             if (Feptr >= mb->end_subject)
3589               {
3590               SCHECK_PARTIAL();
3591               RRETURN(MATCH_NOMATCH);
3592               }
3593             GETCHARINCTEST(fc, Feptr);
3594             switch(fc)
3595               {
3596               HSPACE_CASES:
3597               VSPACE_CASES:
3598               if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3599               break;
3600 
3601               default:
3602               if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
3603                 RRETURN(MATCH_NOMATCH);
3604               break;
3605               }
3606             }
3607           /* Control never gets here */
3608 
3609           case PT_WORD:
3610           for (;;)
3611             {
3612             int category;
3613             RMATCH(Fecode, RM215);
3614             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3615             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3616             if (Feptr >= mb->end_subject)
3617               {
3618               SCHECK_PARTIAL();
3619               RRETURN(MATCH_NOMATCH);
3620               }
3621             GETCHARINCTEST(fc, Feptr);
3622             category = UCD_CATEGORY(fc);
3623             if ((category == ucp_L ||
3624                  category == ucp_N ||
3625                  fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
3626               RRETURN(MATCH_NOMATCH);
3627             }
3628           /* Control never gets here */
3629 
3630           case PT_CLIST:
3631           for (;;)
3632             {
3633             const uint32_t *cp;
3634             RMATCH(Fecode, RM216);
3635             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3636             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3637             if (Feptr >= mb->end_subject)
3638               {
3639               SCHECK_PARTIAL();
3640               RRETURN(MATCH_NOMATCH);
3641               }
3642             GETCHARINCTEST(fc, Feptr);
3643             cp = PRIV(ucd_caseless_sets) + Lpropvalue;
3644             for (;;)
3645               {
3646               if (fc < *cp)
3647                 {
3648                 if (Lctype == OP_NOTPROP) break;
3649                 RRETURN(MATCH_NOMATCH);
3650                 }
3651               if (fc == *cp++)
3652                 {
3653                 if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3654                 break;
3655                 }
3656               }
3657             }
3658           /* Control never gets here */
3659 
3660           case PT_UCNC:
3661           for (;;)
3662             {
3663             RMATCH(Fecode, RM217);
3664             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3665             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3666             if (Feptr >= mb->end_subject)
3667               {
3668               SCHECK_PARTIAL();
3669               RRETURN(MATCH_NOMATCH);
3670               }
3671             GETCHARINCTEST(fc, Feptr);
3672             if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
3673                  fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
3674                  fc >= 0xe000) == (Lctype == OP_NOTPROP))
3675               RRETURN(MATCH_NOMATCH);
3676             }
3677           /* Control never gets here */
3678 
3679           case PT_BIDICL:
3680           for (;;)
3681             {
3682             RMATCH(Fecode, RM224);
3683             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3684             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3685             if (Feptr >= mb->end_subject)
3686               {
3687               SCHECK_PARTIAL();
3688               RRETURN(MATCH_NOMATCH);
3689               }
3690             GETCHARINCTEST(fc, Feptr);
3691             if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3692               RRETURN(MATCH_NOMATCH);
3693             }
3694           /* Control never gets here */
3695 
3696           case PT_BOOL:
3697           for (;;)
3698             {
3699             BOOL ok;
3700             const ucd_record *prop;
3701             RMATCH(Fecode, RM223);
3702             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3703             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3704             if (Feptr >= mb->end_subject)
3705               {
3706               SCHECK_PARTIAL();
3707               RRETURN(MATCH_NOMATCH);
3708               }
3709             GETCHARINCTEST(fc, Feptr);
3710             prop = GET_UCD(fc);
3711             ok = MAPBIT(PRIV(ucd_boolprop_sets) +
3712               UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
3713             if (ok == (Lctype == OP_NOTPROP))
3714               RRETURN(MATCH_NOMATCH);
3715             }
3716           /* Control never gets here */
3717 
3718           /* This should never occur */
3719           default:
3720           return PCRE2_ERROR_INTERNAL;
3721           }
3722         }
3723 
3724       /* Match extended Unicode sequences. We will get here only if the
3725       support is in the binary; otherwise a compile-time error occurs. */
3726 
3727       else if (Lctype == OP_EXTUNI)
3728         {
3729         for (;;)
3730           {
3731           RMATCH(Fecode, RM218);
3732           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3733           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3734           if (Feptr >= mb->end_subject)
3735             {
3736             SCHECK_PARTIAL();
3737             RRETURN(MATCH_NOMATCH);
3738             }
3739           else
3740             {
3741             GETCHARINCTEST(fc, Feptr);
3742             Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
3743               utf, NULL);
3744             }
3745           CHECK_PARTIAL();
3746           }
3747         }
3748       else
3749 #endif     /* SUPPORT_UNICODE */
3750 
3751       /* UTF mode for non-property testing character types. */
3752 
3753 #ifdef SUPPORT_UNICODE
3754       if (utf)
3755         {
3756         for (;;)
3757           {
3758           RMATCH(Fecode, RM219);
3759           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3760           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3761           if (Feptr >= mb->end_subject)
3762             {
3763             SCHECK_PARTIAL();
3764             RRETURN(MATCH_NOMATCH);
3765             }
3766           if (Lctype == OP_ANY && IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3767           GETCHARINC(fc, Feptr);
3768           switch(Lctype)
3769             {
3770             case OP_ANY:               /* This is the non-NL case */
3771             if (mb->partial != 0 &&    /* Take care with CRLF partial */
3772                 Feptr >= mb->end_subject &&
3773                 NLBLOCK->nltype == NLTYPE_FIXED &&
3774                 NLBLOCK->nllen == 2 &&
3775                 fc == NLBLOCK->nl[0])
3776               {
3777               mb->hitend = TRUE;
3778               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3779               }
3780             break;
3781 
3782             case OP_ALLANY:
3783             case OP_ANYBYTE:
3784             break;
3785 
3786             case OP_ANYNL:
3787             switch(fc)
3788               {
3789               default: RRETURN(MATCH_NOMATCH);
3790 
3791               case CHAR_CR:
3792               if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
3793               break;
3794 
3795               case CHAR_LF:
3796               break;
3797 
3798               case CHAR_VT:
3799               case CHAR_FF:
3800               case CHAR_NEL:
3801 #ifndef EBCDIC
3802               case 0x2028:
3803               case 0x2029:
3804 #endif  /* Not EBCDIC */
3805               if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
3806                 RRETURN(MATCH_NOMATCH);
3807               break;
3808               }
3809             break;
3810 
3811             case OP_NOT_HSPACE:
3812             switch(fc)
3813               {
3814               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
3815               default: break;
3816               }
3817             break;
3818 
3819             case OP_HSPACE:
3820             switch(fc)
3821               {
3822               HSPACE_CASES: break;
3823               default: RRETURN(MATCH_NOMATCH);
3824               }
3825             break;
3826 
3827             case OP_NOT_VSPACE:
3828             switch(fc)
3829               {
3830               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
3831               default: break;
3832               }
3833             break;
3834 
3835             case OP_VSPACE:
3836             switch(fc)
3837               {
3838               VSPACE_CASES: break;
3839               default: RRETURN(MATCH_NOMATCH);
3840               }
3841             break;
3842 
3843             case OP_NOT_DIGIT:
3844             if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0)
3845               RRETURN(MATCH_NOMATCH);
3846             break;
3847 
3848             case OP_DIGIT:
3849             if (fc >= 256 || (mb->ctypes[fc] & ctype_digit) == 0)
3850               RRETURN(MATCH_NOMATCH);
3851             break;
3852 
3853             case OP_NOT_WHITESPACE:
3854             if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0)
3855               RRETURN(MATCH_NOMATCH);
3856             break;
3857 
3858             case OP_WHITESPACE:
3859             if (fc >= 256 || (mb->ctypes[fc] & ctype_space) == 0)
3860               RRETURN(MATCH_NOMATCH);
3861             break;
3862 
3863             case OP_NOT_WORDCHAR:
3864             if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0)
3865               RRETURN(MATCH_NOMATCH);
3866             break;
3867 
3868             case OP_WORDCHAR:
3869             if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0)
3870               RRETURN(MATCH_NOMATCH);
3871             break;
3872 
3873             default:
3874             return PCRE2_ERROR_INTERNAL;
3875             }
3876           }
3877         }
3878       else
3879 #endif  /* SUPPORT_UNICODE */
3880 
3881       /* Not UTF mode */
3882         {
3883         for (;;)
3884           {
3885           RMATCH(Fecode, RM33);
3886           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3887           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3888           if (Feptr >= mb->end_subject)
3889             {
3890             SCHECK_PARTIAL();
3891             RRETURN(MATCH_NOMATCH);
3892             }
3893           if (Lctype == OP_ANY && IS_NEWLINE(Feptr))
3894             RRETURN(MATCH_NOMATCH);
3895           fc = *Feptr++;
3896           switch(Lctype)
3897             {
3898             case OP_ANY:               /* This is the non-NL case */
3899             if (mb->partial != 0 &&    /* Take care with CRLF partial */
3900                 Feptr >= mb->end_subject &&
3901                 NLBLOCK->nltype == NLTYPE_FIXED &&
3902                 NLBLOCK->nllen == 2 &&
3903                 fc == NLBLOCK->nl[0])
3904               {
3905               mb->hitend = TRUE;
3906               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3907               }
3908             break;
3909 
3910             case OP_ALLANY:
3911             case OP_ANYBYTE:
3912             break;
3913 
3914             case OP_ANYNL:
3915             switch(fc)
3916               {
3917               default: RRETURN(MATCH_NOMATCH);
3918 
3919               case CHAR_CR:
3920               if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
3921               break;
3922 
3923               case CHAR_LF:
3924               break;
3925 
3926               case CHAR_VT:
3927               case CHAR_FF:
3928               case CHAR_NEL:
3929 #if PCRE2_CODE_UNIT_WIDTH != 8
3930               case 0x2028:
3931               case 0x2029:
3932 #endif
3933               if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
3934                 RRETURN(MATCH_NOMATCH);
3935               break;
3936               }
3937             break;
3938 
3939             case OP_NOT_HSPACE:
3940             switch(fc)
3941               {
3942               default: break;
3943               HSPACE_BYTE_CASES:
3944 #if PCRE2_CODE_UNIT_WIDTH != 8
3945               HSPACE_MULTIBYTE_CASES:
3946 #endif
3947               RRETURN(MATCH_NOMATCH);
3948               }
3949             break;
3950 
3951             case OP_HSPACE:
3952             switch(fc)
3953               {
3954               default: RRETURN(MATCH_NOMATCH);
3955               HSPACE_BYTE_CASES:
3956 #if PCRE2_CODE_UNIT_WIDTH != 8
3957               HSPACE_MULTIBYTE_CASES:
3958 #endif
3959               break;
3960               }
3961             break;
3962 
3963             case OP_NOT_VSPACE:
3964             switch(fc)
3965               {
3966               default: break;
3967               VSPACE_BYTE_CASES:
3968 #if PCRE2_CODE_UNIT_WIDTH != 8
3969               VSPACE_MULTIBYTE_CASES:
3970 #endif
3971               RRETURN(MATCH_NOMATCH);
3972               }
3973             break;
3974 
3975             case OP_VSPACE:
3976             switch(fc)
3977               {
3978               default: RRETURN(MATCH_NOMATCH);
3979               VSPACE_BYTE_CASES:
3980 #if PCRE2_CODE_UNIT_WIDTH != 8
3981               VSPACE_MULTIBYTE_CASES:
3982 #endif
3983               break;
3984               }
3985             break;
3986 
3987             case OP_NOT_DIGIT:
3988             if (MAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
3989               RRETURN(MATCH_NOMATCH);
3990             break;
3991 
3992             case OP_DIGIT:
3993             if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
3994               RRETURN(MATCH_NOMATCH);
3995             break;
3996 
3997             case OP_NOT_WHITESPACE:
3998             if (MAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
3999               RRETURN(MATCH_NOMATCH);
4000             break;
4001 
4002             case OP_WHITESPACE:
4003             if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
4004               RRETURN(MATCH_NOMATCH);
4005             break;
4006 
4007             case OP_NOT_WORDCHAR:
4008             if (MAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
4009               RRETURN(MATCH_NOMATCH);
4010             break;
4011 
4012             case OP_WORDCHAR:
4013             if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
4014               RRETURN(MATCH_NOMATCH);
4015             break;
4016 
4017             default:
4018             return PCRE2_ERROR_INTERNAL;
4019             }
4020           }
4021         }
4022       /* Control never gets here */
4023       }
4024 
4025     /* If maximizing, it is worth using inline code for speed, doing the type
4026     test once at the start (i.e. keep it out of the loops). Once again,
4027     "notmatch" can be an ordinary local variable because the loops do not call
4028     RMATCH. */
4029 
4030     else
4031       {
4032       Lstart_eptr = Feptr;  /* Remember where we started */
4033 
4034 #ifdef SUPPORT_UNICODE
4035       if (proptype >= 0)
4036         {
4037         BOOL notmatch = Lctype == OP_NOTPROP;
4038         switch(proptype)
4039           {
4040           case PT_ANY:
4041           for (i = Lmin; i < Lmax; i++)
4042             {
4043             int len = 1;
4044             if (Feptr >= mb->end_subject)
4045               {
4046               SCHECK_PARTIAL();
4047               break;
4048               }
4049             GETCHARLENTEST(fc, Feptr, len);
4050             if (notmatch) break;
4051             Feptr+= len;
4052             }
4053           break;
4054 
4055           case PT_LAMP:
4056           for (i = Lmin; i < Lmax; i++)
4057             {
4058             int chartype;
4059             int len = 1;
4060             if (Feptr >= mb->end_subject)
4061               {
4062               SCHECK_PARTIAL();
4063               break;
4064               }
4065             GETCHARLENTEST(fc, Feptr, len);
4066             chartype = UCD_CHARTYPE(fc);
4067             if ((chartype == ucp_Lu ||
4068                  chartype == ucp_Ll ||
4069                  chartype == ucp_Lt) == notmatch)
4070               break;
4071             Feptr+= len;
4072             }
4073           break;
4074 
4075           case PT_GC:
4076           for (i = Lmin; i < Lmax; i++)
4077             {
4078             int len = 1;
4079             if (Feptr >= mb->end_subject)
4080               {
4081               SCHECK_PARTIAL();
4082               break;
4083               }
4084             GETCHARLENTEST(fc, Feptr, len);
4085             if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch) break;
4086             Feptr+= len;
4087             }
4088           break;
4089 
4090           case PT_PC:
4091           for (i = Lmin; i < Lmax; i++)
4092             {
4093             int len = 1;
4094             if (Feptr >= mb->end_subject)
4095               {
4096               SCHECK_PARTIAL();
4097               break;
4098               }
4099             GETCHARLENTEST(fc, Feptr, len);
4100             if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch) break;
4101             Feptr+= len;
4102             }
4103           break;
4104 
4105           case PT_SC:
4106           for (i = Lmin; i < Lmax; i++)
4107             {
4108             int len = 1;
4109             if (Feptr >= mb->end_subject)
4110               {
4111               SCHECK_PARTIAL();
4112               break;
4113               }
4114             GETCHARLENTEST(fc, Feptr, len);
4115             if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch) break;
4116             Feptr+= len;
4117             }
4118           break;
4119 
4120           case PT_SCX:
4121           for (i = Lmin; i < Lmax; i++)
4122             {
4123             BOOL ok;
4124             const ucd_record *prop;
4125             int len = 1;
4126             if (Feptr >= mb->end_subject)
4127               {
4128               SCHECK_PARTIAL();
4129               break;
4130               }
4131             GETCHARLENTEST(fc, Feptr, len);
4132             prop = GET_UCD(fc);
4133             ok = (prop->script == Lpropvalue ||
4134                   MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
4135             if (ok == notmatch) break;
4136             Feptr+= len;
4137             }
4138           break;
4139 
4140           case PT_ALNUM:
4141           for (i = Lmin; i < Lmax; i++)
4142             {
4143             int category;
4144             int len = 1;
4145             if (Feptr >= mb->end_subject)
4146               {
4147               SCHECK_PARTIAL();
4148               break;
4149               }
4150             GETCHARLENTEST(fc, Feptr, len);
4151             category = UCD_CATEGORY(fc);
4152             if ((category == ucp_L || category == ucp_N) == notmatch)
4153               break;
4154             Feptr+= len;
4155             }
4156           break;
4157 
4158           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4159           which means that Perl space and POSIX space are now identical. PCRE
4160           was changed at release 8.34. */
4161 
4162           case PT_SPACE:    /* Perl space */
4163           case PT_PXSPACE:  /* POSIX space */
4164           for (i = Lmin; i < Lmax; i++)
4165             {
4166             int len = 1;
4167             if (Feptr >= mb->end_subject)
4168               {
4169               SCHECK_PARTIAL();
4170               break;
4171               }
4172             GETCHARLENTEST(fc, Feptr, len);
4173             switch(fc)
4174               {
4175               HSPACE_CASES:
4176               VSPACE_CASES:
4177               if (notmatch) goto ENDLOOP99;  /* Break the loop */
4178               break;
4179 
4180               default:
4181               if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
4182                 goto ENDLOOP99;   /* Break the loop */
4183               break;
4184               }
4185             Feptr+= len;
4186             }
4187           ENDLOOP99:
4188           break;
4189 
4190           case PT_WORD:
4191           for (i = Lmin; i < Lmax; i++)
4192             {
4193             int category;
4194             int len = 1;
4195             if (Feptr >= mb->end_subject)
4196               {
4197               SCHECK_PARTIAL();
4198               break;
4199               }
4200             GETCHARLENTEST(fc, Feptr, len);
4201             category = UCD_CATEGORY(fc);
4202             if ((category == ucp_L || category == ucp_N ||
4203                  fc == CHAR_UNDERSCORE) == notmatch)
4204               break;
4205             Feptr+= len;
4206             }
4207           break;
4208 
4209           case PT_CLIST:
4210           for (i = Lmin; i < Lmax; i++)
4211             {
4212             const uint32_t *cp;
4213             int len = 1;
4214             if (Feptr >= mb->end_subject)
4215               {
4216               SCHECK_PARTIAL();
4217               break;
4218               }
4219             GETCHARLENTEST(fc, Feptr, len);
4220             cp = PRIV(ucd_caseless_sets) + Lpropvalue;
4221             for (;;)
4222               {
4223               if (fc < *cp)
4224                 { if (notmatch) break; else goto GOT_MAX; }
4225               if (fc == *cp++)
4226                 { if (notmatch) goto GOT_MAX; else break; }
4227               }
4228             Feptr += len;
4229             }
4230           GOT_MAX:
4231           break;
4232 
4233           case PT_UCNC:
4234           for (i = Lmin; i < Lmax; i++)
4235             {
4236             int len = 1;
4237             if (Feptr >= mb->end_subject)
4238               {
4239               SCHECK_PARTIAL();
4240               break;
4241               }
4242             GETCHARLENTEST(fc, Feptr, len);
4243             if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
4244                  fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
4245                  fc >= 0xe000) == notmatch)
4246               break;
4247             Feptr += len;
4248             }
4249           break;
4250 
4251           case PT_BIDICL:
4252           for (i = Lmin; i < Lmax; i++)
4253             {
4254             int len = 1;
4255             if (Feptr >= mb->end_subject)
4256               {
4257               SCHECK_PARTIAL();
4258               break;
4259               }
4260             GETCHARLENTEST(fc, Feptr, len);
4261             if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch) break;
4262             Feptr+= len;
4263             }
4264           break;
4265 
4266           case PT_BOOL:
4267           for (i = Lmin; i < Lmax; i++)
4268             {
4269             BOOL ok;
4270             const ucd_record *prop;
4271             int len = 1;
4272             if (Feptr >= mb->end_subject)
4273               {
4274               SCHECK_PARTIAL();
4275               break;
4276               }
4277             GETCHARLENTEST(fc, Feptr, len);
4278             prop = GET_UCD(fc);
4279             ok = MAPBIT(PRIV(ucd_boolprop_sets) +
4280               UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
4281             if (ok == notmatch) break;
4282             Feptr+= len;
4283             }
4284           break;
4285 
4286           default:
4287           return PCRE2_ERROR_INTERNAL;
4288           }
4289 
4290         /* Feptr is now past the end of the maximum run */
4291 
4292         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4293 
4294         /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4295         Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
4296         go too far. */
4297 
4298         for(;;)
4299           {
4300           if (Feptr <= Lstart_eptr) break;
4301           RMATCH(Fecode, RM222);
4302           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4303           Feptr--;
4304           if (utf) BACKCHAR(Feptr);
4305           }
4306         }
4307 
4308       /* Match extended Unicode grapheme clusters. We will get here only if the
4309       support is in the binary; otherwise a compile-time error occurs. */
4310 
4311       else if (Lctype == OP_EXTUNI)
4312         {
4313         for (i = Lmin; i < Lmax; i++)
4314           {
4315           if (Feptr >= mb->end_subject)
4316             {
4317             SCHECK_PARTIAL();
4318             break;
4319             }
4320           else
4321             {
4322             GETCHARINCTEST(fc, Feptr);
4323             Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
4324               utf, NULL);
4325             }
4326           CHECK_PARTIAL();
4327           }
4328 
4329         /* Feptr is now past the end of the maximum run */
4330 
4331         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4332 
4333         /* We use <= Lstart_eptr rather than == Lstart_eptr to detect the start
4334         of the run while backtracking because the use of \C in UTF mode can
4335         cause BACKCHAR to move back past Lstart_eptr. This is just palliative;
4336         the use of \C in UTF mode is fraught with danger. */
4337 
4338         for(;;)
4339           {
4340           int lgb, rgb;
4341           PCRE2_SPTR fptr;
4342 
4343           if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4344           RMATCH(Fecode, RM220);
4345           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4346 
4347           /* Backtracking over an extended grapheme cluster involves inspecting
4348           the previous two characters (if present) to see if a break is
4349           permitted between them. */
4350 
4351           Feptr--;
4352           if (!utf) fc = *Feptr; else
4353             {
4354             BACKCHAR(Feptr);
4355             GETCHAR(fc, Feptr);
4356             }
4357           rgb = UCD_GRAPHBREAK(fc);
4358 
4359           for (;;)
4360             {
4361             if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4362             fptr = Feptr - 1;
4363             if (!utf) fc = *fptr; else
4364               {
4365               BACKCHAR(fptr);
4366               GETCHAR(fc, fptr);
4367               }
4368             lgb = UCD_GRAPHBREAK(fc);
4369             if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
4370             Feptr = fptr;
4371             rgb = lgb;
4372             }
4373           }
4374         }
4375 
4376       else
4377 #endif   /* SUPPORT_UNICODE */
4378 
4379 #ifdef SUPPORT_UNICODE
4380       if (utf)
4381         {
4382         switch(Lctype)
4383           {
4384           case OP_ANY:
4385           for (i = Lmin; i < Lmax; i++)
4386             {
4387             if (Feptr >= mb->end_subject)
4388               {
4389               SCHECK_PARTIAL();
4390               break;
4391               }
4392             if (IS_NEWLINE(Feptr)) break;
4393             if (mb->partial != 0 &&    /* Take care with CRLF partial */
4394                 Feptr + 1 >= mb->end_subject &&
4395                 NLBLOCK->nltype == NLTYPE_FIXED &&
4396                 NLBLOCK->nllen == 2 &&
4397                 UCHAR21(Feptr) == NLBLOCK->nl[0])
4398               {
4399               mb->hitend = TRUE;
4400               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4401               }
4402             Feptr++;
4403             ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4404             }
4405           break;
4406 
4407           case OP_ALLANY:
4408           if (Lmax < UINT32_MAX)
4409             {
4410             for (i = Lmin; i < Lmax; i++)
4411               {
4412               if (Feptr >= mb->end_subject)
4413                 {
4414                 SCHECK_PARTIAL();
4415                 break;
4416                 }
4417               Feptr++;
4418               ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4419               }
4420             }
4421           else
4422             {
4423             Feptr = mb->end_subject;   /* Unlimited UTF-8 repeat */
4424             SCHECK_PARTIAL();
4425             }
4426           break;
4427 
4428           /* The "byte" (i.e. "code unit") case is the same as non-UTF */
4429 
4430           case OP_ANYBYTE:
4431           fc = Lmax - Lmin;
4432           if (fc > (uint32_t)(mb->end_subject - Feptr))
4433             {
4434             Feptr = mb->end_subject;
4435             SCHECK_PARTIAL();
4436             }
4437           else Feptr += fc;
4438           break;
4439 
4440           case OP_ANYNL:
4441           for (i = Lmin; i < Lmax; i++)
4442             {
4443             int len = 1;
4444             if (Feptr >= mb->end_subject)
4445               {
4446               SCHECK_PARTIAL();
4447               break;
4448               }
4449             GETCHARLEN(fc, Feptr, len);
4450             if (fc == CHAR_CR)
4451               {
4452               if (++Feptr >= mb->end_subject) break;
4453               if (UCHAR21(Feptr) == CHAR_LF) Feptr++;
4454               }
4455             else
4456               {
4457               if (fc != CHAR_LF &&
4458                   (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4459                    (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4460 #ifndef EBCDIC
4461                     && fc != 0x2028 && fc != 0x2029
4462 #endif  /* Not EBCDIC */
4463                     )))
4464                 break;
4465               Feptr += len;
4466               }
4467             }
4468           break;
4469 
4470           case OP_NOT_HSPACE:
4471           case OP_HSPACE:
4472           for (i = Lmin; i < Lmax; i++)
4473             {
4474             BOOL gotspace;
4475             int len = 1;
4476             if (Feptr >= mb->end_subject)
4477               {
4478               SCHECK_PARTIAL();
4479               break;
4480               }
4481             GETCHARLEN(fc, Feptr, len);
4482             switch(fc)
4483               {
4484               HSPACE_CASES: gotspace = TRUE; break;
4485               default: gotspace = FALSE; break;
4486               }
4487             if (gotspace == (Lctype == OP_NOT_HSPACE)) break;
4488             Feptr += len;
4489             }
4490           break;
4491 
4492           case OP_NOT_VSPACE:
4493           case OP_VSPACE:
4494           for (i = Lmin; i < Lmax; i++)
4495             {
4496             BOOL gotspace;
4497             int len = 1;
4498             if (Feptr >= mb->end_subject)
4499               {
4500               SCHECK_PARTIAL();
4501               break;
4502               }
4503             GETCHARLEN(fc, Feptr, len);
4504             switch(fc)
4505               {
4506               VSPACE_CASES: gotspace = TRUE; break;
4507               default: gotspace = FALSE; break;
4508               }
4509             if (gotspace == (Lctype == OP_NOT_VSPACE)) break;
4510             Feptr += len;
4511             }
4512           break;
4513 
4514           case OP_NOT_DIGIT:
4515           for (i = Lmin; i < Lmax; i++)
4516             {
4517             int len = 1;
4518             if (Feptr >= mb->end_subject)
4519               {
4520               SCHECK_PARTIAL();
4521               break;
4522               }
4523             GETCHARLEN(fc, Feptr, len);
4524             if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0) break;
4525             Feptr+= len;
4526             }
4527           break;
4528 
4529           case OP_DIGIT:
4530           for (i = Lmin; i < Lmax; i++)
4531             {
4532             int len = 1;
4533             if (Feptr >= mb->end_subject)
4534               {
4535               SCHECK_PARTIAL();
4536               break;
4537               }
4538             GETCHARLEN(fc, Feptr, len);
4539             if (fc >= 256 ||(mb->ctypes[fc] & ctype_digit) == 0) break;
4540             Feptr+= len;
4541             }
4542           break;
4543 
4544           case OP_NOT_WHITESPACE:
4545           for (i = Lmin; i < Lmax; i++)
4546             {
4547             int len = 1;
4548             if (Feptr >= mb->end_subject)
4549               {
4550               SCHECK_PARTIAL();
4551               break;
4552               }
4553             GETCHARLEN(fc, Feptr, len);
4554             if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0) break;
4555             Feptr+= len;
4556             }
4557           break;
4558 
4559           case OP_WHITESPACE:
4560           for (i = Lmin; i < Lmax; i++)
4561             {
4562             int len = 1;
4563             if (Feptr >= mb->end_subject)
4564               {
4565               SCHECK_PARTIAL();
4566               break;
4567               }
4568             GETCHARLEN(fc, Feptr, len);
4569             if (fc >= 256 ||(mb->ctypes[fc] & ctype_space) == 0) break;
4570             Feptr+= len;
4571             }
4572           break;
4573 
4574           case OP_NOT_WORDCHAR:
4575           for (i = Lmin; i < Lmax; i++)
4576             {
4577             int len = 1;
4578             if (Feptr >= mb->end_subject)
4579               {
4580               SCHECK_PARTIAL();
4581               break;
4582               }
4583             GETCHARLEN(fc, Feptr, len);
4584             if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0) break;
4585             Feptr+= len;
4586             }
4587           break;
4588 
4589           case OP_WORDCHAR:
4590           for (i = Lmin; i < Lmax; i++)
4591             {
4592             int len = 1;
4593             if (Feptr >= mb->end_subject)
4594               {
4595               SCHECK_PARTIAL();
4596               break;
4597               }
4598             GETCHARLEN(fc, Feptr, len);
4599             if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0) break;
4600             Feptr+= len;
4601             }
4602           break;
4603 
4604           default:
4605           return PCRE2_ERROR_INTERNAL;
4606           }
4607 
4608         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4609 
4610         /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4611         Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't go
4612         too far. */
4613 
4614         for(;;)
4615           {
4616           if (Feptr <= Lstart_eptr) break;
4617           RMATCH(Fecode, RM221);
4618           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4619           Feptr--;
4620           BACKCHAR(Feptr);
4621           if (Lctype == OP_ANYNL && Feptr > Lstart_eptr &&
4622               UCHAR21(Feptr) == CHAR_NL && UCHAR21(Feptr - 1) == CHAR_CR)
4623             Feptr--;
4624           }
4625         }
4626       else
4627 #endif  /* SUPPORT_UNICODE */
4628 
4629       /* Not UTF mode */
4630         {
4631         switch(Lctype)
4632           {
4633           case OP_ANY:
4634           for (i = Lmin; i < Lmax; i++)
4635             {
4636             if (Feptr >= mb->end_subject)
4637               {
4638               SCHECK_PARTIAL();
4639               break;
4640               }
4641             if (IS_NEWLINE(Feptr)) break;
4642             if (mb->partial != 0 &&    /* Take care with CRLF partial */
4643                 Feptr + 1 >= mb->end_subject &&
4644                 NLBLOCK->nltype == NLTYPE_FIXED &&
4645                 NLBLOCK->nllen == 2 &&
4646                 *Feptr == NLBLOCK->nl[0])
4647               {
4648               mb->hitend = TRUE;
4649               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4650               }
4651             Feptr++;
4652             }
4653           break;
4654 
4655           case OP_ALLANY:
4656           case OP_ANYBYTE:
4657           fc = Lmax - Lmin;
4658           if (fc > (uint32_t)(mb->end_subject - Feptr))
4659             {
4660             Feptr = mb->end_subject;
4661             SCHECK_PARTIAL();
4662             }
4663           else Feptr += fc;
4664           break;
4665 
4666           case OP_ANYNL:
4667           for (i = Lmin; i < Lmax; i++)
4668             {
4669             if (Feptr >= mb->end_subject)
4670               {
4671               SCHECK_PARTIAL();
4672               break;
4673               }
4674             fc = *Feptr;
4675             if (fc == CHAR_CR)
4676               {
4677               if (++Feptr >= mb->end_subject) break;
4678               if (*Feptr == CHAR_LF) Feptr++;
4679               }
4680             else
4681               {
4682               if (fc != CHAR_LF && (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4683                  (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4684 #if PCRE2_CODE_UNIT_WIDTH != 8
4685                  && fc != 0x2028 && fc != 0x2029
4686 #endif
4687                  ))) break;
4688               Feptr++;
4689               }
4690             }
4691           break;
4692 
4693           case OP_NOT_HSPACE:
4694           for (i = Lmin; i < Lmax; i++)
4695             {
4696             if (Feptr >= mb->end_subject)
4697               {
4698               SCHECK_PARTIAL();
4699               break;
4700               }
4701             switch(*Feptr)
4702               {
4703               default: Feptr++; break;
4704               HSPACE_BYTE_CASES:
4705 #if PCRE2_CODE_UNIT_WIDTH != 8
4706               HSPACE_MULTIBYTE_CASES:
4707 #endif
4708               goto ENDLOOP00;
4709               }
4710             }
4711           ENDLOOP00:
4712           break;
4713 
4714           case OP_HSPACE:
4715           for (i = Lmin; i < Lmax; i++)
4716             {
4717             if (Feptr >= mb->end_subject)
4718               {
4719               SCHECK_PARTIAL();
4720               break;
4721               }
4722             switch(*Feptr)
4723               {
4724               default: goto ENDLOOP01;
4725               HSPACE_BYTE_CASES:
4726 #if PCRE2_CODE_UNIT_WIDTH != 8
4727               HSPACE_MULTIBYTE_CASES:
4728 #endif
4729               Feptr++; break;
4730               }
4731             }
4732           ENDLOOP01:
4733           break;
4734 
4735           case OP_NOT_VSPACE:
4736           for (i = Lmin; i < Lmax; i++)
4737             {
4738             if (Feptr >= mb->end_subject)
4739               {
4740               SCHECK_PARTIAL();
4741               break;
4742               }
4743             switch(*Feptr)
4744               {
4745               default: Feptr++; break;
4746               VSPACE_BYTE_CASES:
4747 #if PCRE2_CODE_UNIT_WIDTH != 8
4748               VSPACE_MULTIBYTE_CASES:
4749 #endif
4750               goto ENDLOOP02;
4751               }
4752             }
4753           ENDLOOP02:
4754           break;
4755 
4756           case OP_VSPACE:
4757           for (i = Lmin; i < Lmax; i++)
4758             {
4759             if (Feptr >= mb->end_subject)
4760               {
4761               SCHECK_PARTIAL();
4762               break;
4763               }
4764             switch(*Feptr)
4765               {
4766               default: goto ENDLOOP03;
4767               VSPACE_BYTE_CASES:
4768 #if PCRE2_CODE_UNIT_WIDTH != 8
4769               VSPACE_MULTIBYTE_CASES:
4770 #endif
4771               Feptr++; break;
4772               }
4773             }
4774           ENDLOOP03:
4775           break;
4776 
4777           case OP_NOT_DIGIT:
4778           for (i = Lmin; i < Lmax; i++)
4779             {
4780             if (Feptr >= mb->end_subject)
4781               {
4782               SCHECK_PARTIAL();
4783               break;
4784               }
4785             if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
4786               break;
4787             Feptr++;
4788             }
4789           break;
4790 
4791           case OP_DIGIT:
4792           for (i = Lmin; i < Lmax; i++)
4793             {
4794             if (Feptr >= mb->end_subject)
4795               {
4796               SCHECK_PARTIAL();
4797               break;
4798               }
4799             if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
4800               break;
4801             Feptr++;
4802             }
4803           break;
4804 
4805           case OP_NOT_WHITESPACE:
4806           for (i = Lmin; i < Lmax; i++)
4807             {
4808             if (Feptr >= mb->end_subject)
4809               {
4810               SCHECK_PARTIAL();
4811               break;
4812               }
4813             if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
4814               break;
4815             Feptr++;
4816             }
4817           break;
4818 
4819           case OP_WHITESPACE:
4820           for (i = Lmin; i < Lmax; i++)
4821             {
4822             if (Feptr >= mb->end_subject)
4823               {
4824               SCHECK_PARTIAL();
4825               break;
4826               }
4827             if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
4828               break;
4829             Feptr++;
4830             }
4831           break;
4832 
4833           case OP_NOT_WORDCHAR:
4834           for (i = Lmin; i < Lmax; i++)
4835             {
4836             if (Feptr >= mb->end_subject)
4837               {
4838               SCHECK_PARTIAL();
4839               break;
4840               }
4841             if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
4842               break;
4843             Feptr++;
4844             }
4845           break;
4846 
4847           case OP_WORDCHAR:
4848           for (i = Lmin; i < Lmax; i++)
4849             {
4850             if (Feptr >= mb->end_subject)
4851               {
4852               SCHECK_PARTIAL();
4853               break;
4854               }
4855             if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
4856               break;
4857             Feptr++;
4858             }
4859           break;
4860 
4861           default:
4862           return PCRE2_ERROR_INTERNAL;
4863           }
4864 
4865         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4866 
4867         for (;;)
4868           {
4869           if (Feptr == Lstart_eptr) break;
4870           RMATCH(Fecode, RM34);
4871           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4872           Feptr--;
4873           if (Lctype == OP_ANYNL && Feptr > Lstart_eptr && *Feptr == CHAR_LF &&
4874               Feptr[-1] == CHAR_CR) Feptr--;
4875           }
4876         }
4877       }
4878     break;  /* End of repeat character type processing */
4879 
4880 #undef Lstart_eptr
4881 #undef Lmin
4882 #undef Lmax
4883 #undef Lctype
4884 #undef Lpropvalue
4885 
4886 
4887     /* ===================================================================== */
4888     /* Match a back reference, possibly repeatedly. Look past the end of the
4889     item to see if there is repeat information following. The OP_REF and
4890     OP_REFI opcodes are used for a reference to a numbered group or to a
4891     non-duplicated named group. For a duplicated named group, OP_DNREF and
4892     OP_DNREFI are used. In this case we must scan the list of groups to which
4893     the name refers, and use the first one that is set. */
4894 
4895 #define Lmin      F->temp_32[0]
4896 #define Lmax      F->temp_32[1]
4897 #define Lcaseless F->temp_32[2]
4898 #define Lstart    F->temp_sptr[0]
4899 #define Loffset   F->temp_size
4900 
4901     case OP_DNREF:
4902     case OP_DNREFI:
4903     Lcaseless = (Fop == OP_DNREFI);
4904       {
4905       int count = GET2(Fecode, 1+IMM2_SIZE);
4906       PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
4907       Fecode += 1 + 2*IMM2_SIZE;
4908 
4909       while (count-- > 0)
4910         {
4911         Loffset = (GET2(slot, 0) << 1) - 2;
4912         if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET) break;
4913         slot += mb->name_entry_size;
4914         }
4915       }
4916     goto REF_REPEAT;
4917 
4918     case OP_REF:
4919     case OP_REFI:
4920     Lcaseless = (Fop == OP_REFI);
4921     Loffset = (GET2(Fecode, 1) << 1) - 2;
4922     Fecode += 1 + IMM2_SIZE;
4923 
4924     /* Set up for repetition, or handle the non-repeated case. The maximum and
4925     minimum must be in the heap frame, but as they are short-term values, we
4926     use temporary fields. */
4927 
4928     REF_REPEAT:
4929     switch (*Fecode)
4930       {
4931       case OP_CRSTAR:
4932       case OP_CRMINSTAR:
4933       case OP_CRPLUS:
4934       case OP_CRMINPLUS:
4935       case OP_CRQUERY:
4936       case OP_CRMINQUERY:
4937       fc = *Fecode++ - OP_CRSTAR;
4938       Lmin = rep_min[fc];
4939       Lmax = rep_max[fc];
4940       reptype = rep_typ[fc];
4941       break;
4942 
4943       case OP_CRRANGE:
4944       case OP_CRMINRANGE:
4945       Lmin = GET2(Fecode, 1);
4946       Lmax = GET2(Fecode, 1 + IMM2_SIZE);
4947       reptype = rep_typ[*Fecode - OP_CRSTAR];
4948       if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
4949       Fecode += 1 + 2 * IMM2_SIZE;
4950       break;
4951 
4952       default:                  /* No repeat follows */
4953         {
4954         rrc = match_ref(Loffset, Lcaseless, F, mb, &length);
4955         if (rrc != 0)
4956           {
4957           if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
4958           CHECK_PARTIAL();
4959           RRETURN(MATCH_NOMATCH);
4960           }
4961         }
4962       Feptr += length;
4963       continue;              /* With the main loop */
4964       }
4965 
4966     /* Handle repeated back references. If a set group has length zero, just
4967     continue with the main loop, because it matches however many times. For an
4968     unset reference, if the minimum is zero, we can also just continue. We can
4969     also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
4970     group behave as a zero-length group. For any other unset cases, carrying
4971     on will result in NOMATCH. */
4972 
4973     if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET)
4974       {
4975       if (Fovector[Loffset] == Fovector[Loffset + 1]) continue;
4976       }
4977     else  /* Group is not set */
4978       {
4979       if (Lmin == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
4980         continue;
4981       }
4982 
4983     /* First, ensure the minimum number of matches are present. */
4984 
4985     for (i = 1; i <= Lmin; i++)
4986       {
4987       PCRE2_SIZE slength;
4988       rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
4989       if (rrc != 0)
4990         {
4991         if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
4992         CHECK_PARTIAL();
4993         RRETURN(MATCH_NOMATCH);
4994         }
4995       Feptr += slength;
4996       }
4997 
4998     /* If min = max, we are done. They are not both allowed to be zero. */
4999 
5000     if (Lmin == Lmax) continue;
5001 
5002     /* If minimizing, keep trying and advancing the pointer. */
5003 
5004     if (reptype == REPTYPE_MIN)
5005       {
5006       for (;;)
5007         {
5008         PCRE2_SIZE slength;
5009         RMATCH(Fecode, RM20);
5010         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5011         if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
5012         rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
5013         if (rrc != 0)
5014           {
5015           if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5016           CHECK_PARTIAL();
5017           RRETURN(MATCH_NOMATCH);
5018           }
5019         Feptr += slength;
5020         }
5021       /* Control never gets here */
5022       }
5023 
5024     /* If maximizing, find the longest string and work backwards, as long as
5025     the matched lengths for each iteration are the same. */
5026 
5027     else
5028       {
5029       BOOL samelengths = TRUE;
5030       Lstart = Feptr;     /* Starting position */
5031       Flength = Fovector[Loffset+1] - Fovector[Loffset];
5032 
5033       for (i = Lmin; i < Lmax; i++)
5034         {
5035         PCRE2_SIZE slength;
5036         rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
5037         if (rrc != 0)
5038           {
5039           /* Can't use CHECK_PARTIAL because we don't want to update Feptr in
5040           the soft partial matching case. */
5041 
5042           if (rrc > 0 && mb->partial != 0 &&
5043               mb->end_subject > mb->start_used_ptr)
5044             {
5045             mb->hitend = TRUE;
5046             if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5047             }
5048           break;
5049           }
5050 
5051         if (slength != Flength) samelengths = FALSE;
5052         Feptr += slength;
5053         }
5054 
5055       /* If the length matched for each repetition is the same as the length of
5056       the captured group, we can easily work backwards. This is the normal
5057       case. However, in caseless UTF-8 mode there are pairs of case-equivalent
5058       characters whose lengths (in terms of code units) differ. However, this
5059       is very rare, so we handle it by re-matching fewer and fewer times. */
5060 
5061       if (samelengths)
5062         {
5063         while (Feptr >= Lstart)
5064           {
5065           RMATCH(Fecode, RM21);
5066           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5067           Feptr -= Flength;
5068           }
5069         }
5070 
5071       /* The rare case of non-matching lengths. Re-scan the repetition for each
5072       iteration. We know that match_ref() will succeed every time. */
5073 
5074       else
5075         {
5076         Lmax = i;
5077         for (;;)
5078           {
5079           RMATCH(Fecode, RM22);
5080           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5081           if (Feptr == Lstart) break; /* Failed after minimal repetition */
5082           Feptr = Lstart;
5083           Lmax--;
5084           for (i = Lmin; i < Lmax; i++)
5085             {
5086             PCRE2_SIZE slength;
5087             (void)match_ref(Loffset, Lcaseless, F, mb, &slength);
5088             Feptr += slength;
5089             }
5090           }
5091         }
5092 
5093       RRETURN(MATCH_NOMATCH);
5094       }
5095     /* Control never gets here */
5096 
5097 #undef Lcaseless
5098 #undef Lmin
5099 #undef Lmax
5100 #undef Lstart
5101 #undef Loffset
5102 
5103 
5104 
5105 /* ========================================================================= */
5106 /*           Opcodes for the start of various parenthesized items            */
5107 /* ========================================================================= */
5108 
5109     /* In all cases, if the result of RMATCH() is MATCH_THEN, check whether the
5110     (*THEN) is within the current branch by comparing the address of OP_THEN
5111     that is passed back with the end of the branch. If (*THEN) is within the
5112     current branch, and the branch is one of two or more alternatives (it
5113     either starts or ends with OP_ALT), we have reached the limit of THEN's
5114     action, so convert the return code to NOMATCH, which will cause normal
5115     backtracking to happen from now on. Otherwise, THEN is passed back to an
5116     outer alternative. This implements Perl's treatment of parenthesized
5117     groups, where a group not containing | does not affect the current
5118     alternative, that is, (X) is NOT the same as (X|(*F)). */
5119 
5120 
5121     /* ===================================================================== */
5122     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a non-possessive
5123     bracket group, indicating that it may occur zero times. It may repeat
5124     infinitely, or not at all - i.e. it could be ()* or ()? or even (){0} in
5125     the pattern. Brackets with fixed upper repeat limits are compiled as a
5126     number of copies, with the optional ones preceded by BRAZERO or BRAMINZERO.
5127     Possessive groups with possible zero repeats are preceded by BRAPOSZERO. */
5128 
5129 #define Lnext_ecode F->temp_sptr[0]
5130 
5131     case OP_BRAZERO:
5132     Lnext_ecode = Fecode + 1;
5133     RMATCH(Lnext_ecode, RM9);
5134     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5135     do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
5136     Fecode = Lnext_ecode + 1 + LINK_SIZE;
5137     break;
5138 
5139     case OP_BRAMINZERO:
5140     Lnext_ecode = Fecode + 1;
5141     do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
5142     RMATCH(Lnext_ecode + 1 + LINK_SIZE, RM10);
5143     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5144     Fecode++;
5145     break;
5146 
5147 #undef Lnext_ecode
5148 
5149     case OP_SKIPZERO:
5150     Fecode++;
5151     do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
5152     Fecode += 1 + LINK_SIZE;
5153     break;
5154 
5155 
5156     /* ===================================================================== */
5157     /* Handle possessive brackets with an unlimited repeat. The end of these
5158     brackets will always be OP_KETRPOS, which returns MATCH_KETRPOS without
5159     going further in the pattern. */
5160 
5161 #define Lframe_type    F->temp_32[0]
5162 #define Lmatched_once  F->temp_32[1]
5163 #define Lzero_allowed  F->temp_32[2]
5164 #define Lstart_eptr    F->temp_sptr[0]
5165 #define Lstart_group   F->temp_sptr[1]
5166 
5167     case OP_BRAPOSZERO:
5168     Lzero_allowed = TRUE;                /* Zero repeat is allowed */
5169     Fecode += 1;
5170     if (*Fecode == OP_CBRAPOS || *Fecode == OP_SCBRAPOS)
5171       goto POSSESSIVE_CAPTURE;
5172     goto POSSESSIVE_NON_CAPTURE;
5173 
5174     case OP_BRAPOS:
5175     case OP_SBRAPOS:
5176     Lzero_allowed = FALSE;               /* Zero repeat not allowed */
5177 
5178     POSSESSIVE_NON_CAPTURE:
5179     Lframe_type = GF_NOCAPTURE;          /* Remembered frame type */
5180     goto POSSESSIVE_GROUP;
5181 
5182     case OP_CBRAPOS:
5183     case OP_SCBRAPOS:
5184     Lzero_allowed = FALSE;               /* Zero repeat not allowed */
5185 
5186     POSSESSIVE_CAPTURE:
5187     number = GET2(Fecode, 1+LINK_SIZE);
5188     Lframe_type = GF_CAPTURE | number;   /* Remembered frame type */
5189 
5190     POSSESSIVE_GROUP:
5191     Lmatched_once = FALSE;               /* Never matched */
5192     Lstart_group = Fecode;               /* Start of this group */
5193 
5194     for (;;)
5195       {
5196       Lstart_eptr = Feptr;               /* Position at group start */
5197       group_frame_type = Lframe_type;
5198       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM8);
5199       if (rrc == MATCH_KETRPOS)
5200         {
5201         Lmatched_once = TRUE;            /* Matched at least once */
5202         if (Feptr == Lstart_eptr)        /* Empty match; skip to end */
5203           {
5204           do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5205           break;
5206           }
5207 
5208         Fecode = Lstart_group;
5209         continue;
5210         }
5211 
5212       /* See comment above about handling THEN. */
5213 
5214       if (rrc == MATCH_THEN)
5215         {
5216         PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5217         if (mb->verb_ecode_ptr < next_ecode &&
5218             (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5219           rrc = MATCH_NOMATCH;
5220         }
5221 
5222       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5223       Fecode += GET(Fecode, 1);
5224       if (*Fecode != OP_ALT) break;
5225       }
5226 
5227     /* Success if matched something or zero repeat allowed */
5228 
5229     if (Lmatched_once || Lzero_allowed)
5230       {
5231       Fecode += 1 + LINK_SIZE;
5232       break;
5233       }
5234 
5235     RRETURN(MATCH_NOMATCH);
5236 
5237 #undef Lmatched_once
5238 #undef Lzero_allowed
5239 #undef Lframe_type
5240 #undef Lstart_eptr
5241 #undef Lstart_group
5242 
5243 
5244     /* ===================================================================== */
5245     /* Handle non-capturing brackets that cannot match an empty string. When we
5246     get to the final alternative within the brackets, as long as there are no
5247     THEN's in the pattern, we can optimize by not recording a new backtracking
5248     point. (Ideally we should test for a THEN within this group, but we don't
5249     have that information.) Don't do this if we are at the very top level,
5250     however, because that would make handling assertions and once-only brackets
5251     messier when there is nothing to go back to. */
5252 
5253 #define Lframe_type F->temp_32[0]     /* Set for all that use GROUPLOOP */
5254 #define Lnext_branch F->temp_sptr[0]  /* Used only in OP_BRA handling */
5255 
5256     case OP_BRA:
5257     if (mb->hasthen || Frdepth == 0)
5258       {
5259       Lframe_type = 0;
5260       goto GROUPLOOP;
5261       }
5262 
5263     for (;;)
5264       {
5265       Lnext_branch = Fecode + GET(Fecode, 1);
5266       if (*Lnext_branch != OP_ALT) break;
5267 
5268       /* This is never the final branch. We do not need to test for MATCH_THEN
5269       here because this code is not used when there is a THEN in the pattern. */
5270 
5271       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM1);
5272       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5273       Fecode = Lnext_branch;
5274       }
5275 
5276     /* Hit the start of the final branch. Continue at this level. */
5277 
5278     Fecode += PRIV(OP_lengths)[*Fecode];
5279     break;
5280 
5281 #undef Lnext_branch
5282 
5283 
5284     /* ===================================================================== */
5285     /* Handle a capturing bracket, other than those that are possessive with an
5286     unlimited repeat. */
5287 
5288     case OP_CBRA:
5289     case OP_SCBRA:
5290     Lframe_type = GF_CAPTURE | GET2(Fecode, 1+LINK_SIZE);
5291     goto GROUPLOOP;
5292 
5293 
5294     /* ===================================================================== */
5295     /* Atomic groups and non-capturing brackets that can match an empty string
5296     must record a backtracking point and also set up a chained frame. */
5297 
5298     case OP_ONCE:
5299     case OP_SCRIPT_RUN:
5300     case OP_SBRA:
5301     Lframe_type = GF_NOCAPTURE | Fop;
5302 
5303     GROUPLOOP:
5304     for (;;)
5305       {
5306       group_frame_type = Lframe_type;
5307       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM2);
5308       if (rrc == MATCH_THEN)
5309         {
5310         PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5311         if (mb->verb_ecode_ptr < next_ecode &&
5312             (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5313           rrc = MATCH_NOMATCH;
5314         }
5315       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5316       Fecode += GET(Fecode, 1);
5317       if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5318       }
5319     /* Control never reaches here. */
5320 
5321 #undef Lframe_type
5322 
5323 
5324     /* ===================================================================== */
5325     /* Recursion either matches the current regex, or some subexpression. The
5326     offset data is the offset to the starting bracket from the start of the
5327     whole pattern. (This is so that it works from duplicated subpatterns.) */
5328 
5329 #define Lframe_type F->temp_32[0]
5330 #define Lstart_branch F->temp_sptr[0]
5331 
5332     case OP_RECURSE:
5333     bracode = mb->start_code + GET(Fecode, 1);
5334     number = (bracode == mb->start_code)? 0 : GET2(bracode, 1 + LINK_SIZE);
5335 
5336     /* If we are already in a recursion, check for repeating the same one
5337     without advancing the subject pointer. This should catch convoluted mutual
5338     recursions. (Some simple cases are caught at compile time.) */
5339 
5340     if (Fcurrent_recurse != RECURSE_UNSET)
5341       {
5342       offset = Flast_group_offset;
5343       while (offset != PCRE2_UNSET)
5344         {
5345         N = (heapframe *)((char *)match_data->heapframes + offset);
5346         P = (heapframe *)((char *)N - frame_size);
5347         if (N->group_frame_type == (GF_RECURSE | number))
5348           {
5349           if (Feptr == P->eptr) return PCRE2_ERROR_RECURSELOOP;
5350           break;
5351           }
5352         offset = P->last_group_offset;
5353         }
5354       }
5355 
5356     /* Now run the recursion, branch by branch. */
5357 
5358     Lstart_branch = bracode;
5359     Lframe_type = GF_RECURSE | number;
5360 
5361     for (;;)
5362       {
5363       PCRE2_SPTR next_ecode;
5364 
5365       group_frame_type = Lframe_type;
5366       RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM11);
5367       next_ecode = Lstart_branch + GET(Lstart_branch,1);
5368 
5369       /* Handle backtracking verbs, which are defined in a range that can
5370       easily be tested for. PCRE does not allow THEN, SKIP, PRUNE or COMMIT to
5371       escape beyond a recursion; they cause a NOMATCH for the entire recursion.
5372 
5373       When one of these verbs triggers, the current recursion group number is
5374       recorded. If it matches the recursion we are processing, the verb
5375       happened within the recursion and we must deal with it. Otherwise it must
5376       have happened after the recursion completed, and so has to be passed
5377       back. See comment above about handling THEN. */
5378 
5379       if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX &&
5380           mb->verb_current_recurse == (Lframe_type ^ GF_RECURSE))
5381         {
5382         if (rrc == MATCH_THEN && mb->verb_ecode_ptr < next_ecode &&
5383             (*Lstart_branch == OP_ALT || *next_ecode == OP_ALT))
5384           rrc = MATCH_NOMATCH;
5385         else RRETURN(MATCH_NOMATCH);
5386         }
5387 
5388       /* Note that carrying on after (*ACCEPT) in a recursion is handled in the
5389       OP_ACCEPT code. Nothing needs to be done here. */
5390 
5391       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5392       Lstart_branch = next_ecode;
5393       if (*Lstart_branch != OP_ALT) RRETURN(MATCH_NOMATCH);
5394       }
5395     /* Control never reaches here. */
5396 
5397 #undef Lframe_type
5398 #undef Lstart_branch
5399 
5400 
5401     /* ===================================================================== */
5402     /* Positive assertions are like other groups except that PCRE doesn't allow
5403     the effect of (*THEN) to escape beyond an assertion; it is therefore
5404     treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its
5405     captures and mark retained. Any other return is an error. */
5406 
5407 #define Lframe_type  F->temp_32[0]
5408 
5409     case OP_ASSERT:
5410     case OP_ASSERTBACK:
5411     case OP_ASSERT_NA:
5412     case OP_ASSERTBACK_NA:
5413     Lframe_type = GF_NOCAPTURE | Fop;
5414     for (;;)
5415       {
5416       group_frame_type = Lframe_type;
5417       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM3);
5418       if (rrc == MATCH_ACCEPT)
5419         {
5420         memcpy(Fovector,
5421               (char *)assert_accept_frame + offsetof(heapframe, ovector),
5422               assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5423         Foffset_top = assert_accept_frame->offset_top;
5424         Fmark = assert_accept_frame->mark;
5425         break;
5426         }
5427       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
5428       Fecode += GET(Fecode, 1);
5429       if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5430       }
5431 
5432     do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5433     Fecode += 1 + LINK_SIZE;
5434     break;
5435 
5436 #undef Lframe_type
5437 
5438 
5439     /* ===================================================================== */
5440     /* Handle negative assertions. Loop for each non-matching branch as for
5441     positive assertions. */
5442 
5443 #define Lframe_type  F->temp_32[0]
5444 
5445     case OP_ASSERT_NOT:
5446     case OP_ASSERTBACK_NOT:
5447     Lframe_type  = GF_NOCAPTURE | Fop;
5448 
5449     for (;;)
5450       {
5451       group_frame_type = Lframe_type;
5452       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM4);
5453       switch(rrc)
5454         {
5455         case MATCH_ACCEPT:   /* Assertion matched, therefore it fails. */
5456         case MATCH_MATCH:
5457         RRETURN (MATCH_NOMATCH);
5458 
5459         case MATCH_NOMATCH:  /* Branch failed, try next if present. */
5460         case MATCH_THEN:
5461         Fecode += GET(Fecode, 1);
5462         if (*Fecode != OP_ALT) goto ASSERT_NOT_FAILED;
5463         break;
5464 
5465         case MATCH_COMMIT:   /* Assertion forced to fail, therefore continue. */
5466         case MATCH_SKIP:
5467         case MATCH_PRUNE:
5468         do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5469         goto ASSERT_NOT_FAILED;
5470 
5471         default:             /* Pass back any other return */
5472         RRETURN(rrc);
5473         }
5474       }
5475 
5476     /* None of the branches have matched or there was a backtrack to (*COMMIT),
5477     (*SKIP), (*PRUNE), or (*THEN) in the last branch. This is success for a
5478     negative assertion, so carry on. */
5479 
5480     ASSERT_NOT_FAILED:
5481     Fecode += 1 + LINK_SIZE;
5482     break;
5483 
5484 #undef Lframe_type
5485 
5486 
5487     /* ===================================================================== */
5488     /* The callout item calls an external function, if one is provided, passing
5489     details of the match so far. This is mainly for debugging, though the
5490     function is able to force a failure. */
5491 
5492     case OP_CALLOUT:
5493     case OP_CALLOUT_STR:
5494     rrc = do_callout(F, mb, &length);
5495     if (rrc > 0) RRETURN(MATCH_NOMATCH);
5496     if (rrc < 0) RRETURN(rrc);
5497     Fecode += length;
5498     break;
5499 
5500 
5501     /* ===================================================================== */
5502     /* Conditional group: compilation checked that there are no more than two
5503     branches. If the condition is false, skipping the first branch takes us
5504     past the end of the item if there is only one branch, but that's exactly
5505     what we want. */
5506 
5507     case OP_COND:
5508     case OP_SCOND:
5509 
5510     /* The variable Flength will be added to Fecode when the condition is
5511     false, to get to the second branch. Setting it to the offset to the ALT or
5512     KET, then incrementing Fecode achieves this effect. However, if the second
5513     branch is non-existent, we must point to the KET so that the end of the
5514     group is correctly processed. We now have Fecode pointing to the condition
5515     or callout. */
5516 
5517     Flength = GET(Fecode, 1);    /* Offset to the second branch */
5518     if (Fecode[Flength] != OP_ALT) Flength -= 1 + LINK_SIZE;
5519     Fecode += 1 + LINK_SIZE;     /* From this opcode */
5520 
5521     /* Because of the way auto-callout works during compile, a callout item is
5522     inserted between OP_COND and an assertion condition. Such a callout can
5523     also be inserted manually. */
5524 
5525     if (*Fecode == OP_CALLOUT || *Fecode == OP_CALLOUT_STR)
5526       {
5527       rrc = do_callout(F, mb, &length);
5528       if (rrc > 0) RRETURN(MATCH_NOMATCH);
5529       if (rrc < 0) RRETURN(rrc);
5530 
5531       /* Advance Fecode past the callout, so it now points to the condition. We
5532       must adjust Flength so that the value of Fecode+Flength is unchanged. */
5533 
5534       Fecode += length;
5535       Flength -= length;
5536       }
5537 
5538     /* Test the various possible conditions */
5539 
5540     condition = FALSE;
5541     switch(*Fecode)
5542       {
5543       case OP_RREF:                  /* Group recursion test */
5544       if (Fcurrent_recurse != RECURSE_UNSET)
5545         {
5546         number = GET2(Fecode, 1);
5547         condition = (number == RREF_ANY || number == Fcurrent_recurse);
5548         }
5549       break;
5550 
5551       case OP_DNRREF:       /* Duplicate named group recursion test */
5552       if (Fcurrent_recurse != RECURSE_UNSET)
5553         {
5554         int count = GET2(Fecode, 1 + IMM2_SIZE);
5555         PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5556         while (count-- > 0)
5557           {
5558           number = GET2(slot, 0);
5559           condition = number == Fcurrent_recurse;
5560           if (condition) break;
5561           slot += mb->name_entry_size;
5562           }
5563         }
5564       break;
5565 
5566       case OP_CREF:                         /* Numbered group used test */
5567       offset = (GET2(Fecode, 1) << 1) - 2;  /* Doubled ref number */
5568       condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5569       break;
5570 
5571       case OP_DNCREF:      /* Duplicate named group used test */
5572         {
5573         int count = GET2(Fecode, 1 + IMM2_SIZE);
5574         PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5575         while (count-- > 0)
5576           {
5577           offset = (GET2(slot, 0) << 1) - 2;
5578           condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5579           if (condition) break;
5580           slot += mb->name_entry_size;
5581           }
5582         }
5583       break;
5584 
5585       case OP_FALSE:
5586       case OP_FAIL:   /* The assertion (?!) becomes OP_FAIL */
5587       break;
5588 
5589       case OP_TRUE:
5590       condition = TRUE;
5591       break;
5592 
5593       /* The condition is an assertion. Run code similar to the assertion code
5594       above. */
5595 
5596 #define Lpositive      F->temp_32[0]
5597 #define Lstart_branch  F->temp_sptr[0]
5598 
5599       default:
5600       Lpositive = (*Fecode == OP_ASSERT || *Fecode == OP_ASSERTBACK);
5601       Lstart_branch = Fecode;
5602 
5603       for (;;)
5604         {
5605         group_frame_type = GF_CONDASSERT | *Fecode;
5606         RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM5);
5607 
5608         switch(rrc)
5609           {
5610           case MATCH_ACCEPT:  /* Save captures */
5611           memcpy(Fovector,
5612                 (char *)assert_accept_frame + offsetof(heapframe, ovector),
5613                 assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5614           Foffset_top = assert_accept_frame->offset_top;
5615 
5616           /* Fall through */
5617           /* In the case of a match, the captures have already been put into
5618           the current frame. */
5619 
5620           case MATCH_MATCH:
5621           condition = Lpositive;   /* TRUE for positive assertion */
5622           break;
5623 
5624           /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
5625           assertion; it is therefore always treated as NOMATCH. */
5626 
5627           case MATCH_NOMATCH:
5628           case MATCH_THEN:
5629           Lstart_branch += GET(Lstart_branch, 1);
5630           if (*Lstart_branch == OP_ALT) continue;  /* Try next branch */
5631           condition = !Lpositive;  /* TRUE for negative assertion */
5632           break;
5633 
5634           /* These force no match without checking other branches. */
5635 
5636           case MATCH_COMMIT:
5637           case MATCH_SKIP:
5638           case MATCH_PRUNE:
5639           condition = !Lpositive;
5640           break;
5641 
5642           default:
5643           RRETURN(rrc);
5644           }
5645         break;  /* Out of the branch loop */
5646         }
5647 
5648       /* If the condition is true, find the end of the assertion so that
5649       advancing past it gets us to the start of the first branch. */
5650 
5651       if (condition)
5652         {
5653         do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5654         }
5655       break;  /* End of assertion condition */
5656       }
5657 
5658 #undef Lpositive
5659 #undef Lstart_branch
5660 
5661     /* Choose branch according to the condition. */
5662 
5663     Fecode += condition? PRIV(OP_lengths)[*Fecode] : Flength;
5664 
5665     /* If the opcode is OP_SCOND it means we are at a repeated conditional
5666     group that might match an empty string. We must therefore descend a level
5667     so that the start is remembered for checking. For OP_COND we can just
5668     continue at this level. */
5669 
5670     if (Fop == OP_SCOND)
5671       {
5672       group_frame_type  = GF_NOCAPTURE | Fop;
5673       RMATCH(Fecode, RM35);
5674       RRETURN(rrc);
5675       }
5676     break;
5677 
5678 
5679 
5680 /* ========================================================================= */
5681 /*                  End of start of parenthesis opcodes                      */
5682 /* ========================================================================= */
5683 
5684 
5685     /* ===================================================================== */
5686     /* Move the subject pointer back. This occurs only at the start of each
5687     branch of a lookbehind assertion. If we are too close to the start to move
5688     back, fail. When working with UTF-8 we move back a number of characters,
5689     not bytes. */
5690 
5691     case OP_REVERSE:
5692     number = GET(Fecode, 1);
5693 #ifdef SUPPORT_UNICODE
5694     if (utf)
5695       {
5696       while (number-- > 0)
5697         {
5698         if (Feptr <= mb->check_subject) RRETURN(MATCH_NOMATCH);
5699         Feptr--;
5700         BACKCHAR(Feptr);
5701         }
5702       }
5703     else
5704 #endif
5705 
5706     /* No UTF-8 support, or not in UTF-8 mode: count is code unit count */
5707 
5708       {
5709       if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
5710       Feptr -= number;
5711       }
5712 
5713     /* Save the earliest consulted character, then skip to next opcode */
5714 
5715     if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
5716     Fecode += 1 + LINK_SIZE;
5717     break;
5718 
5719 
5720     /* ===================================================================== */
5721     /* An alternation is the end of a branch; scan along to find the end of the
5722     bracketed group. */
5723 
5724     case OP_ALT:
5725     do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
5726     break;
5727 
5728 
5729     /* ===================================================================== */
5730     /* The end of a parenthesized group. For all but OP_BRA and OP_COND, the
5731     starting frame was added to the chained frames in order to remember the
5732     starting subject position for the group. */
5733 
5734     case OP_KET:
5735     case OP_KETRMIN:
5736     case OP_KETRMAX:
5737     case OP_KETRPOS:
5738 
5739     bracode = Fecode - GET(Fecode, 1);
5740 
5741     /* Point N to the frame at the start of the most recent group.
5742     Remember the subject pointer at the start of the group. */
5743 
5744     if (*bracode != OP_BRA && *bracode != OP_COND)
5745       {
5746       N = (heapframe *)((char *)match_data->heapframes + Flast_group_offset);
5747       P = (heapframe *)((char *)N - frame_size);
5748       Flast_group_offset = P->last_group_offset;
5749 
5750 #ifdef DEBUG_SHOW_RMATCH
5751       fprintf(stderr, "++ KET for frame=%d type=%x prev char offset=%lu\n",
5752         N->rdepth, N->group_frame_type,
5753         (char *)P->eptr - (char *)mb->start_subject);
5754 #endif
5755 
5756       /* If we are at the end of an assertion that is a condition, return a
5757       match, discarding any intermediate backtracking points. Copy back the
5758       mark setting and the captures into the frame before N so that they are
5759       set on return. Doing this for all assertions, both positive and negative,
5760       seems to match what Perl does. */
5761 
5762       if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
5763         {
5764         memcpy((char *)P + offsetof(heapframe, ovector), Fovector,
5765           Foffset_top * sizeof(PCRE2_SIZE));
5766         P->offset_top = Foffset_top;
5767         P->mark = Fmark;
5768         Fback_frame = (char *)F - (char *)P;
5769         RRETURN(MATCH_MATCH);
5770         }
5771       }
5772     else P = NULL;   /* Indicates starting frame not recorded */
5773 
5774     /* The group was not a conditional assertion. */
5775 
5776     switch (*bracode)
5777       {
5778       case OP_BRA:    /* No need to do anything for these */
5779       case OP_COND:
5780       case OP_SCOND:
5781       break;
5782 
5783       /* Non-atomic positive assertions are like OP_BRA, except that the
5784       subject pointer must be put back to where it was at the start of the
5785       assertion. */
5786 
5787       case OP_ASSERT_NA:
5788       case OP_ASSERTBACK_NA:
5789       if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
5790       Feptr = P->eptr;
5791       break;
5792 
5793       /* Atomic positive assertions are like OP_ONCE, except that in addition
5794       the subject pointer must be put back to where it was at the start of the
5795       assertion. */
5796 
5797       case OP_ASSERT:
5798       case OP_ASSERTBACK:
5799       if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
5800       Feptr = P->eptr;
5801       /* Fall through */
5802 
5803       /* For an atomic group, discard internal backtracking points. We must
5804       also ensure that any remaining branches within the top-level of the group
5805       are not tried. Do this by adjusting the code pointer within the backtrack
5806       frame so that it points to the final branch. */
5807 
5808       case OP_ONCE:
5809       Fback_frame = ((char *)F - (char *)P);
5810       for (;;)
5811         {
5812         uint32_t y = GET(P->ecode,1);
5813         if ((P->ecode)[y] != OP_ALT) break;
5814         P->ecode += y;
5815         }
5816       break;
5817 
5818       /* A matching negative assertion returns MATCH, which is turned into
5819       NOMATCH at the assertion level. */
5820 
5821       case OP_ASSERT_NOT:
5822       case OP_ASSERTBACK_NOT:
5823       RRETURN(MATCH_MATCH);
5824 
5825       /* At the end of a script run, apply the script-checking rules. This code
5826       will never by exercised if Unicode support it not compiled, because in
5827       that environment script runs cause an error at compile time. */
5828 
5829       case OP_SCRIPT_RUN:
5830       if (!PRIV(script_run)(P->eptr, Feptr, utf)) RRETURN(MATCH_NOMATCH);
5831       break;
5832 
5833       /* Whole-pattern recursion is coded as a recurse into group 0, so it
5834       won't be picked up here. Instead, we catch it when the OP_END is reached.
5835       Other recursion is handled here. */
5836 
5837       case OP_CBRA:
5838       case OP_CBRAPOS:
5839       case OP_SCBRA:
5840       case OP_SCBRAPOS:
5841       number = GET2(bracode, 1+LINK_SIZE);
5842 
5843       /* Handle a recursively called group. We reinstate the previous set of
5844       captures and then carry on after the recursion call. */
5845 
5846       if (Fcurrent_recurse == number)
5847         {
5848         P = (heapframe *)((char *)N - frame_size);
5849         memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
5850           Foffset_top * sizeof(PCRE2_SIZE));
5851         Foffset_top = P->offset_top;
5852         Fcapture_last = P->capture_last;
5853         Fcurrent_recurse = P->current_recurse;
5854         Fecode = P->ecode + 1 + LINK_SIZE;
5855         continue;  /* With next opcode */
5856         }
5857 
5858       /* Deal with actual capturing. */
5859 
5860       offset = (number << 1) - 2;
5861       Fcapture_last = number;
5862       Fovector[offset] = P->eptr - mb->start_subject;
5863       Fovector[offset+1] = Feptr - mb->start_subject;
5864       if (offset >= Foffset_top) Foffset_top = offset + 2;
5865       break;
5866       }  /* End actions relating to the starting opcode */
5867 
5868     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
5869     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
5870     at a time from the outer level. This must precede the empty string test -
5871     in this case that test is done at the outer level. */
5872 
5873     if (*Fecode == OP_KETRPOS)
5874       {
5875       memcpy((char *)P + offsetof(heapframe, eptr),
5876              (char *)F + offsetof(heapframe, eptr),
5877              frame_copy_size);
5878       RRETURN(MATCH_KETRPOS);
5879       }
5880 
5881     /* Handle the different kinds of closing brackets. A non-repeating ket
5882     needs no special action, just continuing at this level. This also happens
5883     for the repeating kets if the group matched no characters, in order to
5884     forcibly break infinite loops. Otherwise, the repeating kets try the rest
5885     of the pattern or restart from the preceding bracket, in the appropriate
5886     order. */
5887 
5888     if (Fop != OP_KET && (P == NULL || Feptr != P->eptr))
5889       {
5890       if (Fop == OP_KETRMIN)
5891         {
5892         RMATCH(Fecode + 1 + LINK_SIZE, RM6);
5893         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5894         Fecode -= GET(Fecode, 1);
5895         break;   /* End of ket processing */
5896         }
5897 
5898       /* Repeat the maximum number of times (KETRMAX) */
5899 
5900       RMATCH(bracode, RM7);
5901       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5902       }
5903 
5904     /* Carry on at this level for a non-repeating ket, or after matching an
5905     empty string, or after repeating for a maximum number of times. */
5906 
5907     Fecode += 1 + LINK_SIZE;
5908     break;
5909 
5910 
5911     /* ===================================================================== */
5912     /* Start and end of line assertions, not multiline mode. */
5913 
5914     case OP_CIRC:   /* Start of line, unless PCRE2_NOTBOL is set. */
5915     if (Feptr != mb->start_subject || (mb->moptions & PCRE2_NOTBOL) != 0)
5916       RRETURN(MATCH_NOMATCH);
5917     Fecode++;
5918     break;
5919 
5920     case OP_SOD:    /* Unconditional start of subject */
5921     if (Feptr != mb->start_subject) RRETURN(MATCH_NOMATCH);
5922     Fecode++;
5923     break;
5924 
5925     /* When PCRE2_NOTEOL is unset, assert before the subject end, or a
5926     terminating newline unless PCRE2_DOLLAR_ENDONLY is set. */
5927 
5928     case OP_DOLL:
5929     if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
5930     if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
5931 
5932     /* Fall through */
5933     /* Unconditional end of subject assertion (\z) */
5934 
5935     case OP_EOD:
5936     if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
5937     if (mb->partial != 0)
5938       {
5939       mb->hitend = TRUE;
5940       if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5941       }
5942     Fecode++;
5943     break;
5944 
5945     /* End of subject or ending \n assertion (\Z) */
5946 
5947     case OP_EODN:
5948     ASSERT_NL_OR_EOS:
5949     if (Feptr < mb->end_subject &&
5950         (!IS_NEWLINE(Feptr) || Feptr != mb->end_subject - mb->nllen))
5951       {
5952       if (mb->partial != 0 &&
5953           Feptr + 1 >= mb->end_subject &&
5954           NLBLOCK->nltype == NLTYPE_FIXED &&
5955           NLBLOCK->nllen == 2 &&
5956           UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
5957         {
5958         mb->hitend = TRUE;
5959         if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5960         }
5961       RRETURN(MATCH_NOMATCH);
5962       }
5963 
5964     /* Either at end of string or \n before end. */
5965 
5966     if (mb->partial != 0)
5967       {
5968       mb->hitend = TRUE;
5969       if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5970       }
5971     Fecode++;
5972     break;
5973 
5974 
5975     /* ===================================================================== */
5976     /* Start and end of line assertions, multiline mode. */
5977 
5978     /* Start of subject unless notbol, or after any newline except for one at
5979     the very end, unless PCRE2_ALT_CIRCUMFLEX is set. */
5980 
5981     case OP_CIRCM:
5982     if ((mb->moptions & PCRE2_NOTBOL) != 0 && Feptr == mb->start_subject)
5983       RRETURN(MATCH_NOMATCH);
5984     if (Feptr != mb->start_subject &&
5985         ((Feptr == mb->end_subject &&
5986            (mb->poptions & PCRE2_ALT_CIRCUMFLEX) == 0) ||
5987          !WAS_NEWLINE(Feptr)))
5988       RRETURN(MATCH_NOMATCH);
5989     Fecode++;
5990     break;
5991 
5992     /* Assert before any newline, or before end of subject unless noteol is
5993     set. */
5994 
5995     case OP_DOLLM:
5996     if (Feptr < mb->end_subject)
5997       {
5998       if (!IS_NEWLINE(Feptr))
5999         {
6000         if (mb->partial != 0 &&
6001             Feptr + 1 >= mb->end_subject &&
6002             NLBLOCK->nltype == NLTYPE_FIXED &&
6003             NLBLOCK->nllen == 2 &&
6004             UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
6005           {
6006           mb->hitend = TRUE;
6007           if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6008           }
6009         RRETURN(MATCH_NOMATCH);
6010         }
6011       }
6012     else
6013       {
6014       if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
6015       SCHECK_PARTIAL();
6016       }
6017     Fecode++;
6018     break;
6019 
6020 
6021     /* ===================================================================== */
6022     /* Start of match assertion */
6023 
6024     case OP_SOM:
6025     if (Feptr != mb->start_subject + mb->start_offset) RRETURN(MATCH_NOMATCH);
6026     Fecode++;
6027     break;
6028 
6029 
6030     /* ===================================================================== */
6031     /* Reset the start of match point */
6032 
6033     case OP_SET_SOM:
6034     Fstart_match = Feptr;
6035     Fecode++;
6036     break;
6037 
6038 
6039     /* ===================================================================== */
6040     /* Word boundary assertions. Find out if the previous and current
6041     characters are "word" characters. It takes a bit more work in UTF mode.
6042     Characters > 255 are assumed to be "non-word" characters when PCRE2_UCP is
6043     not set. When it is set, use Unicode properties if available, even when not
6044     in UTF mode. Remember the earliest and latest consulted characters. */
6045 
6046     case OP_NOT_WORD_BOUNDARY:
6047     case OP_WORD_BOUNDARY:
6048     if (Feptr == mb->check_subject) prev_is_word = FALSE; else
6049       {
6050       PCRE2_SPTR lastptr = Feptr - 1;
6051 #ifdef SUPPORT_UNICODE
6052       if (utf)
6053         {
6054         BACKCHAR(lastptr);
6055         GETCHAR(fc, lastptr);
6056         }
6057       else
6058 #endif  /* SUPPORT_UNICODE */
6059       fc = *lastptr;
6060       if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr;
6061 #ifdef SUPPORT_UNICODE
6062       if ((mb->poptions & PCRE2_UCP) != 0)
6063         {
6064         if (fc == '_') prev_is_word = TRUE; else
6065           {
6066           int cat = UCD_CATEGORY(fc);
6067           prev_is_word = (cat == ucp_L || cat == ucp_N);
6068           }
6069         }
6070       else
6071 #endif  /* SUPPORT_UNICODE */
6072       prev_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
6073       }
6074 
6075     /* Get status of next character */
6076 
6077     if (Feptr >= mb->end_subject)
6078       {
6079       SCHECK_PARTIAL();
6080       cur_is_word = FALSE;
6081       }
6082     else
6083       {
6084       PCRE2_SPTR nextptr = Feptr + 1;
6085 #ifdef SUPPORT_UNICODE
6086       if (utf)
6087         {
6088         FORWARDCHARTEST(nextptr, mb->end_subject);
6089         GETCHAR(fc, Feptr);
6090         }
6091       else
6092 #endif  /* SUPPORT_UNICODE */
6093       fc = *Feptr;
6094       if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
6095 #ifdef SUPPORT_UNICODE
6096       if ((mb->poptions & PCRE2_UCP) != 0)
6097         {
6098         if (fc == '_') cur_is_word = TRUE; else
6099           {
6100           int cat = UCD_CATEGORY(fc);
6101           cur_is_word = (cat == ucp_L || cat == ucp_N);
6102           }
6103         }
6104       else
6105 #endif  /* SUPPORT_UNICODE */
6106       cur_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
6107       }
6108 
6109     /* Now see if the situation is what we want */
6110 
6111     if ((*Fecode++ == OP_WORD_BOUNDARY)?
6112          cur_is_word == prev_is_word : cur_is_word != prev_is_word)
6113       RRETURN(MATCH_NOMATCH);
6114     break;
6115 
6116 
6117     /* ===================================================================== */
6118     /* Backtracking (*VERB)s, with and without arguments. Note that if the
6119     pattern is successfully matched, we do not come back from RMATCH. */
6120 
6121     case OP_MARK:
6122     Fmark = mb->nomatch_mark = Fecode + 2;
6123     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM12);
6124 
6125     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
6126     argument, and we must check whether that argument matches this MARK's
6127     argument. It is passed back in mb->verb_skip_ptr. If it does match, we
6128     return MATCH_SKIP with mb->verb_skip_ptr now pointing to the subject
6129     position that corresponds to this mark. Otherwise, pass back the return
6130     code unaltered. */
6131 
6132     if (rrc == MATCH_SKIP_ARG &&
6133              PRIV(strcmp)(Fecode + 2, mb->verb_skip_ptr) == 0)
6134       {
6135       mb->verb_skip_ptr = Feptr;   /* Pass back current position */
6136       RRETURN(MATCH_SKIP);
6137       }
6138     RRETURN(rrc);
6139 
6140     case OP_FAIL:
6141     RRETURN(MATCH_NOMATCH);
6142 
6143     /* Record the current recursing group number in mb->verb_current_recurse
6144     when a backtracking return such as MATCH_COMMIT is given. This enables the
6145     recurse processing to catch verbs from within the recursion. */
6146 
6147     case OP_COMMIT:
6148     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM13);
6149     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6150     mb->verb_current_recurse = Fcurrent_recurse;
6151     RRETURN(MATCH_COMMIT);
6152 
6153     case OP_COMMIT_ARG:
6154     Fmark = mb->nomatch_mark = Fecode + 2;
6155     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36);
6156     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6157     mb->verb_current_recurse = Fcurrent_recurse;
6158     RRETURN(MATCH_COMMIT);
6159 
6160     case OP_PRUNE:
6161     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14);
6162     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6163     mb->verb_current_recurse = Fcurrent_recurse;
6164     RRETURN(MATCH_PRUNE);
6165 
6166     case OP_PRUNE_ARG:
6167     Fmark = mb->nomatch_mark = Fecode + 2;
6168     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM15);
6169     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6170     mb->verb_current_recurse = Fcurrent_recurse;
6171     RRETURN(MATCH_PRUNE);
6172 
6173     case OP_SKIP:
6174     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM16);
6175     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6176     mb->verb_skip_ptr = Feptr;   /* Pass back current position */
6177     mb->verb_current_recurse = Fcurrent_recurse;
6178     RRETURN(MATCH_SKIP);
6179 
6180     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
6181     nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
6182     not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
6183     that failed and any that precede it (either they also failed, or were not
6184     triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
6185     SKIP_ARG gets to top level, the match is re-run with mb->ignore_skip_arg
6186     set to the count of the one that failed. */
6187 
6188     case OP_SKIP_ARG:
6189     mb->skip_arg_count++;
6190     if (mb->skip_arg_count <= mb->ignore_skip_arg)
6191       {
6192       Fecode += PRIV(OP_lengths)[*Fecode] + Fecode[1];
6193       break;
6194       }
6195     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM17);
6196     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6197 
6198     /* Pass back the current skip name and return the special MATCH_SKIP_ARG
6199     return code. This will either be caught by a matching MARK, or get to the
6200     top, where it causes a rematch with mb->ignore_skip_arg set to the value of
6201     mb->skip_arg_count. */
6202 
6203     mb->verb_skip_ptr = Fecode + 2;
6204     mb->verb_current_recurse = Fcurrent_recurse;
6205     RRETURN(MATCH_SKIP_ARG);
6206 
6207     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
6208     the branch in which it occurs can be determined. */
6209 
6210     case OP_THEN:
6211     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM18);
6212     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6213     mb->verb_ecode_ptr = Fecode;
6214     mb->verb_current_recurse = Fcurrent_recurse;
6215     RRETURN(MATCH_THEN);
6216 
6217     case OP_THEN_ARG:
6218     Fmark = mb->nomatch_mark = Fecode + 2;
6219     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM19);
6220     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6221     mb->verb_ecode_ptr = Fecode;
6222     mb->verb_current_recurse = Fcurrent_recurse;
6223     RRETURN(MATCH_THEN);
6224 
6225 
6226     /* ===================================================================== */
6227     /* There's been some horrible disaster. Arrival here can only mean there is
6228     something seriously wrong in the code above or the OP_xxx definitions. */
6229 
6230     default:
6231     return PCRE2_ERROR_INTERNAL;
6232     }
6233 
6234   /* Do not insert any code in here without much thought; it is assumed
6235   that "continue" in the code above comes out to here to repeat the main
6236   loop. */
6237 
6238   }  /* End of main loop */
6239 /* Control never reaches here */
6240 
6241 
6242 /* ========================================================================= */
6243 /* The RRETURN() macro jumps here. The number that is saved in Freturn_id
6244 indicates which label we actually want to return to. The value in Frdepth is
6245 the index number of the frame in the vector. The return value has been placed
6246 in rrc. */
6247 
6248 #define LBL(val) case val: goto L_RM##val;
6249 
6250 RETURN_SWITCH:
6251 if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6252 if (Frdepth == 0) return rrc;                     /* Exit from the top level */
6253 F = (heapframe *)((char *)F - Fback_frame);       /* Backtrack */
6254 mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
6255 
6256 #ifdef DEBUG_SHOW_RMATCH
6257 fprintf(stderr, "++ RETURN %d to %d\n", rrc, Freturn_id);
6258 #endif
6259 
6260 switch (Freturn_id)
6261   {
6262   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6263   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
6264   LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
6265   LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
6266   LBL(33) LBL(34) LBL(35) LBL(36)
6267 
6268 #ifdef SUPPORT_WIDE_CHARS
6269   LBL(100) LBL(101)
6270 #endif
6271 
6272 #ifdef SUPPORT_UNICODE
6273   LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206)
6274   LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213)
6275   LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220)
6276   LBL(221) LBL(222) LBL(223) LBL(224) LBL(225)
6277 #endif
6278 
6279   default:
6280   return PCRE2_ERROR_INTERNAL;
6281   }
6282 #undef LBL
6283 }
6284 
6285 
6286 /*************************************************
6287 *           Match a Regular Expression           *
6288 *************************************************/
6289 
6290 /* This function applies a compiled pattern to a subject string and picks out
6291 portions of the string if it matches. Two elements in the vector are set for
6292 each substring: the offsets to the start and end of the substring.
6293 
6294 Arguments:
6295   code            points to the compiled expression
6296   subject         points to the subject string
6297   length          length of subject string (may contain binary zeros)
6298   start_offset    where to start in the subject string
6299   options         option bits
6300   match_data      points to a match_data block
6301   mcontext        points a PCRE2 context
6302 
6303 Returns:          > 0 => success; value is the number of ovector pairs filled
6304                   = 0 => success, but ovector is not big enough
6305                   = -1 => failed to match (PCRE2_ERROR_NOMATCH)
6306                   = -2 => partial match (PCRE2_ERROR_PARTIAL)
6307                   < -2 => some kind of unexpected problem
6308 */
6309 
6310 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_match(const pcre2_code * code,PCRE2_SPTR subject,PCRE2_SIZE length,PCRE2_SIZE start_offset,uint32_t options,pcre2_match_data * match_data,pcre2_match_context * mcontext)6311 pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
6312   PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
6313   pcre2_match_context *mcontext)
6314 {
6315 int rc;
6316 int was_zero_terminated = 0;
6317 const uint8_t *start_bits = NULL;
6318 const pcre2_real_code *re = (const pcre2_real_code *)code;
6319 
6320 BOOL anchored;
6321 BOOL firstline;
6322 BOOL has_first_cu = FALSE;
6323 BOOL has_req_cu = FALSE;
6324 BOOL startline;
6325 
6326 #if PCRE2_CODE_UNIT_WIDTH == 8
6327 PCRE2_SPTR memchr_found_first_cu;
6328 PCRE2_SPTR memchr_found_first_cu2;
6329 #endif
6330 
6331 PCRE2_UCHAR first_cu = 0;
6332 PCRE2_UCHAR first_cu2 = 0;
6333 PCRE2_UCHAR req_cu = 0;
6334 PCRE2_UCHAR req_cu2 = 0;
6335 
6336 PCRE2_SPTR bumpalong_limit;
6337 PCRE2_SPTR end_subject;
6338 PCRE2_SPTR true_end_subject;
6339 PCRE2_SPTR start_match;
6340 PCRE2_SPTR req_cu_ptr;
6341 PCRE2_SPTR start_partial;
6342 PCRE2_SPTR match_partial;
6343 
6344 #ifdef SUPPORT_JIT
6345 BOOL use_jit;
6346 #endif
6347 
6348 /* This flag is needed even when Unicode is not supported for convenience
6349 (it is used by the IS_NEWLINE macro). */
6350 
6351 BOOL utf = FALSE;
6352 
6353 #ifdef SUPPORT_UNICODE
6354 BOOL ucp = FALSE;
6355 BOOL allow_invalid;
6356 uint32_t fragment_options = 0;
6357 #ifdef SUPPORT_JIT
6358 BOOL jit_checked_utf = FALSE;
6359 #endif
6360 #endif  /* SUPPORT_UNICODE */
6361 
6362 PCRE2_SIZE frame_size;
6363 PCRE2_SIZE heapframes_size;
6364 
6365 /* We need to have mb as a pointer to a match block, because the IS_NEWLINE
6366 macro is used below, and it expects NLBLOCK to be defined as a pointer. */
6367 
6368 pcre2_callout_block cb;
6369 match_block actual_match_block;
6370 match_block *mb = &actual_match_block;
6371 
6372 /* Recognize NULL, length 0 as an empty string. */
6373 
6374 if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
6375 
6376 /* Plausibility checks */
6377 
6378 if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
6379 if (code == NULL || subject == NULL || match_data == NULL)
6380   return PCRE2_ERROR_NULL;
6381 
6382 start_match = subject + start_offset;
6383 req_cu_ptr = start_match - 1;
6384 if (length == PCRE2_ZERO_TERMINATED)
6385   {
6386   length = PRIV(strlen)(subject);
6387   was_zero_terminated = 1;
6388   }
6389 true_end_subject = end_subject = subject + length;
6390 
6391 if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
6392 
6393 /* Check that the first field in the block is the magic number. */
6394 
6395 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
6396 
6397 /* Check the code unit width. */
6398 
6399 if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
6400   return PCRE2_ERROR_BADMODE;
6401 
6402 /* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
6403 options variable for this function. Users of PCRE2 who are not calling the
6404 function directly would like to have a way of setting these flags, in the same
6405 way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
6406 constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
6407 (*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which we now
6408 transfer to the options for this function. The bits are guaranteed to be
6409 adjacent, but do not have the same values. This bit of Boolean trickery assumes
6410 that the match-time bits are not more significant than the flag bits. If by
6411 accident this is not the case, a compile-time division by zero error will
6412 occur. */
6413 
6414 #define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
6415 #define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
6416 options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
6417 #undef FF
6418 #undef OO
6419 
6420 /* If the pattern was successfully studied with JIT support, we will run the
6421 JIT executable instead of the rest of this function. Most options must be set
6422 at compile time for the JIT code to be usable. */
6423 
6424 #ifdef SUPPORT_JIT
6425 use_jit = (re->executable_jit != NULL &&
6426           (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0);
6427 #endif
6428 
6429 /* Initialize UTF/UCP parameters. */
6430 
6431 #ifdef SUPPORT_UNICODE
6432 utf = (re->overall_options & PCRE2_UTF) != 0;
6433 allow_invalid = (re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0;
6434 ucp = (re->overall_options & PCRE2_UCP) != 0;
6435 #endif  /* SUPPORT_UNICODE */
6436 
6437 /* Convert the partial matching flags into an integer. */
6438 
6439 mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
6440               ((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
6441 
6442 /* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
6443 time. */
6444 
6445 if (mb->partial != 0 &&
6446    ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
6447   return PCRE2_ERROR_BADOPTION;
6448 
6449 /* It is an error to set an offset limit without setting the flag at compile
6450 time. */
6451 
6452 if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
6453      (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
6454   return PCRE2_ERROR_BADOFFSETLIMIT;
6455 
6456 /* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
6457 free the memory that was obtained. Set the field to NULL for no match cases. */
6458 
6459 if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
6460   {
6461   match_data->memctl.free((void *)match_data->subject,
6462     match_data->memctl.memory_data);
6463   match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
6464   }
6465 match_data->subject = NULL;
6466 
6467 /* Zero the error offset in case the first code unit is invalid UTF. */
6468 
6469 match_data->startchar = 0;
6470 
6471 
6472 /* ============================= JIT matching ============================== */
6473 
6474 /* Prepare for JIT matching. Check a UTF string for validity unless no check is
6475 requested or invalid UTF can be handled. We check only the portion of the
6476 subject that might be be inspected during matching - from the offset minus the
6477 maximum lookbehind to the given length. This saves time when a small part of a
6478 large subject is being matched by the use of a starting offset. Note that the
6479 maximum lookbehind is a number of characters, not code units. */
6480 
6481 #ifdef SUPPORT_JIT
6482 if (use_jit)
6483   {
6484 #ifdef SUPPORT_UNICODE
6485   if (utf && (options & PCRE2_NO_UTF_CHECK) == 0 && !allow_invalid)
6486     {
6487 #if PCRE2_CODE_UNIT_WIDTH != 32
6488     unsigned int i;
6489 #endif
6490 
6491     /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
6492     character start. */
6493 
6494 #if PCRE2_CODE_UNIT_WIDTH != 32
6495     if (start_match < end_subject && NOT_FIRSTCU(*start_match))
6496       {
6497       if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
6498 #if PCRE2_CODE_UNIT_WIDTH == 8
6499       return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
6500 #else
6501       return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
6502 #endif
6503       }
6504 #endif  /* WIDTH != 32 */
6505 
6506     /* Move back by the maximum lookbehind, just in case it happens at the very
6507     start of matching. */
6508 
6509 #if PCRE2_CODE_UNIT_WIDTH != 32
6510     for (i = re->max_lookbehind; i > 0 && start_match > subject; i--)
6511       {
6512       start_match--;
6513       while (start_match > subject &&
6514 #if PCRE2_CODE_UNIT_WIDTH == 8
6515       (*start_match & 0xc0) == 0x80)
6516 #else  /* 16-bit */
6517       (*start_match & 0xfc00) == 0xdc00)
6518 #endif
6519         start_match--;
6520       }
6521 #else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6522 
6523     /* In the 32-bit library, one code unit equals one character. However,
6524     we cannot just subtract the lookbehind and then compare pointers, because
6525     a very large lookbehind could create an invalid pointer. */
6526 
6527     if (start_offset >= re->max_lookbehind)
6528       start_match -= re->max_lookbehind;
6529     else
6530       start_match = subject;
6531 #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6532 
6533     /* Validate the relevant portion of the subject. Adjust the offset of an
6534     invalid code point to be an absolute offset in the whole string. */
6535 
6536     match_data->rc = PRIV(valid_utf)(start_match,
6537       length - (start_match - subject), &(match_data->startchar));
6538     if (match_data->rc != 0)
6539       {
6540       match_data->startchar += start_match - subject;
6541       return match_data->rc;
6542       }
6543     jit_checked_utf = TRUE;
6544     }
6545 #endif  /* SUPPORT_UNICODE */
6546 
6547   /* If JIT returns BADOPTION, which means that the selected complete or
6548   partial matching mode was not compiled, fall through to the interpreter. */
6549 
6550   rc = pcre2_jit_match(code, subject, length, start_offset, options,
6551     match_data, mcontext);
6552   if (rc != PCRE2_ERROR_JIT_BADOPTION)
6553     {
6554     if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
6555       {
6556       length = CU2BYTES(length + was_zero_terminated);
6557       match_data->subject = match_data->memctl.malloc(length,
6558         match_data->memctl.memory_data);
6559       if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
6560       memcpy((void *)match_data->subject, subject, length);
6561       match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
6562       }
6563     return rc;
6564     }
6565   }
6566 #endif  /* SUPPORT_JIT */
6567 
6568 /* ========================= End of JIT matching ========================== */
6569 
6570 
6571 /* Proceed with non-JIT matching. The default is to allow lookbehinds to the
6572 start of the subject. A UTF check when there is a non-zero offset may change
6573 this. */
6574 
6575 mb->check_subject = subject;
6576 
6577 /* If a UTF subject string was not checked for validity in the JIT code above,
6578 check it here, and handle support for invalid UTF strings. The check above
6579 happens only when invalid UTF is not supported and PCRE2_NO_CHECK_UTF is unset.
6580 If we get here in those circumstances, it means the subject string is valid,
6581 but for some reason JIT matching was not successful. There is no need to check
6582 the subject again.
6583 
6584 We check only the portion of the subject that might be be inspected during
6585 matching - from the offset minus the maximum lookbehind to the given length.
6586 This saves time when a small part of a large subject is being matched by the
6587 use of a starting offset. Note that the maximum lookbehind is a number of
6588 characters, not code units.
6589 
6590 Note also that support for invalid UTF forces a check, overriding the setting
6591 of PCRE2_NO_CHECK_UTF. */
6592 
6593 #ifdef SUPPORT_UNICODE
6594 if (utf &&
6595 #ifdef SUPPORT_JIT
6596     !jit_checked_utf &&
6597 #endif
6598     ((options & PCRE2_NO_UTF_CHECK) == 0 || allow_invalid))
6599   {
6600 #if PCRE2_CODE_UNIT_WIDTH != 32
6601   BOOL skipped_bad_start = FALSE;
6602 #endif
6603 
6604   /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
6605   character start. If we are handling invalid UTF, just skip over such code
6606   units. Otherwise, give an appropriate error. */
6607 
6608 #if PCRE2_CODE_UNIT_WIDTH != 32
6609   if (allow_invalid)
6610     {
6611     while (start_match < end_subject && NOT_FIRSTCU(*start_match))
6612       {
6613       start_match++;
6614       skipped_bad_start = TRUE;
6615       }
6616     }
6617   else if (start_match < end_subject && NOT_FIRSTCU(*start_match))
6618     {
6619     if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
6620 #if PCRE2_CODE_UNIT_WIDTH == 8
6621     return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
6622 #else
6623     return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
6624 #endif
6625     }
6626 #endif  /* WIDTH != 32 */
6627 
6628   /* The mb->check_subject field points to the start of UTF checking;
6629   lookbehinds can go back no further than this. */
6630 
6631   mb->check_subject = start_match;
6632 
6633   /* Move back by the maximum lookbehind, just in case it happens at the very
6634   start of matching, but don't do this if we skipped bad 8-bit or 16-bit code
6635   units above. */
6636 
6637 #if PCRE2_CODE_UNIT_WIDTH != 32
6638   if (!skipped_bad_start)
6639     {
6640     unsigned int i;
6641     for (i = re->max_lookbehind; i > 0 && mb->check_subject > subject; i--)
6642       {
6643       mb->check_subject--;
6644       while (mb->check_subject > subject &&
6645 #if PCRE2_CODE_UNIT_WIDTH == 8
6646       (*mb->check_subject & 0xc0) == 0x80)
6647 #else  /* 16-bit */
6648       (*mb->check_subject & 0xfc00) == 0xdc00)
6649 #endif
6650         mb->check_subject--;
6651       }
6652     }
6653 #else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6654 
6655   /* In the 32-bit library, one code unit equals one character. However,
6656   we cannot just subtract the lookbehind and then compare pointers, because
6657   a very large lookbehind could create an invalid pointer. */
6658 
6659   if (start_offset >= re->max_lookbehind)
6660     mb->check_subject -= re->max_lookbehind;
6661   else
6662     mb->check_subject = subject;
6663 #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6664 
6665   /* Validate the relevant portion of the subject. There's a loop in case we
6666   encounter bad UTF in the characters preceding start_match which we are
6667   scanning because of a lookbehind. */
6668 
6669   for (;;)
6670     {
6671     match_data->rc = PRIV(valid_utf)(mb->check_subject,
6672       length - (mb->check_subject - subject), &(match_data->startchar));
6673 
6674     if (match_data->rc == 0) break;   /* Valid UTF string */
6675 
6676     /* Invalid UTF string. Adjust the offset to be an absolute offset in the
6677     whole string. If we are handling invalid UTF strings, set end_subject to
6678     stop before the bad code unit, and set the options to "not end of line".
6679     Otherwise return the error. */
6680 
6681     match_data->startchar += mb->check_subject - subject;
6682     if (!allow_invalid || match_data->rc > 0) return match_data->rc;
6683     end_subject = subject + match_data->startchar;
6684 
6685     /* If the end precedes start_match, it means there is invalid UTF in the
6686     extra code units we reversed over because of a lookbehind. Advance past the
6687     first bad code unit, and then skip invalid character starting code units in
6688     8-bit and 16-bit modes, and try again with the original end point. */
6689 
6690     if (end_subject < start_match)
6691       {
6692       mb->check_subject = end_subject + 1;
6693 #if PCRE2_CODE_UNIT_WIDTH != 32
6694       while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject))
6695         mb->check_subject++;
6696 #endif
6697       end_subject = true_end_subject;
6698       }
6699 
6700     /* Otherwise, set the not end of line option, and do the match. */
6701 
6702     else
6703       {
6704       fragment_options = PCRE2_NOTEOL;
6705       break;
6706       }
6707     }
6708   }
6709 #endif  /* SUPPORT_UNICODE */
6710 
6711 /* A NULL match context means "use a default context", but we take the memory
6712 control functions from the pattern. */
6713 
6714 if (mcontext == NULL)
6715   {
6716   mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
6717   mb->memctl = re->memctl;
6718   }
6719 else mb->memctl = mcontext->memctl;
6720 
6721 anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
6722 firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
6723 startline = (re->flags & PCRE2_STARTLINE) != 0;
6724 bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
6725   true_end_subject : subject + mcontext->offset_limit;
6726 
6727 /* Initialize and set up the fixed fields in the callout block, with a pointer
6728 in the match block. */
6729 
6730 mb->cb = &cb;
6731 cb.version = 2;
6732 cb.subject = subject;
6733 cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
6734 cb.callout_flags = 0;
6735 
6736 /* Fill in the remaining fields in the match block, except for moptions, which
6737 gets set later. */
6738 
6739 mb->callout = mcontext->callout;
6740 mb->callout_data = mcontext->callout_data;
6741 
6742 mb->start_subject = subject;
6743 mb->start_offset = start_offset;
6744 mb->end_subject = end_subject;
6745 mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
6746 mb->allowemptypartial = (re->max_lookbehind > 0) ||
6747     (re->flags & PCRE2_MATCH_EMPTY) != 0;
6748 mb->poptions = re->overall_options;          /* Pattern options */
6749 mb->ignore_skip_arg = 0;
6750 mb->mark = mb->nomatch_mark = NULL;          /* In case never set */
6751 
6752 /* The name table is needed for finding all the numbers associated with a
6753 given name, for condition testing. The code follows the name table. */
6754 
6755 mb->name_table = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code));
6756 mb->name_count = re->name_count;
6757 mb->name_entry_size = re->name_entry_size;
6758 mb->start_code = mb->name_table + re->name_count * re->name_entry_size;
6759 
6760 /* Process the \R and newline settings. */
6761 
6762 mb->bsr_convention = re->bsr_convention;
6763 mb->nltype = NLTYPE_FIXED;
6764 switch(re->newline_convention)
6765   {
6766   case PCRE2_NEWLINE_CR:
6767   mb->nllen = 1;
6768   mb->nl[0] = CHAR_CR;
6769   break;
6770 
6771   case PCRE2_NEWLINE_LF:
6772   mb->nllen = 1;
6773   mb->nl[0] = CHAR_NL;
6774   break;
6775 
6776   case PCRE2_NEWLINE_NUL:
6777   mb->nllen = 1;
6778   mb->nl[0] = CHAR_NUL;
6779   break;
6780 
6781   case PCRE2_NEWLINE_CRLF:
6782   mb->nllen = 2;
6783   mb->nl[0] = CHAR_CR;
6784   mb->nl[1] = CHAR_NL;
6785   break;
6786 
6787   case PCRE2_NEWLINE_ANY:
6788   mb->nltype = NLTYPE_ANY;
6789   break;
6790 
6791   case PCRE2_NEWLINE_ANYCRLF:
6792   mb->nltype = NLTYPE_ANYCRLF;
6793   break;
6794 
6795   default: return PCRE2_ERROR_INTERNAL;
6796   }
6797 
6798 /* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
6799 vector at the end, whose size depends on the number of capturing parentheses in
6800 the pattern. It is not used at all if there are no capturing parentheses.
6801 
6802   frame_size                   is the total size of each frame
6803   match_data->heapframes       is the pointer to the frames vector
6804   match_data->heapframes_size  is the total size of the vector
6805 
6806 We must pad the frame_size for alignment to ensure subsequent frames are as
6807 aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE
6808 array, that does not guarantee it is suitably aligned for pointers, as some
6809 architectures have pointers that are larger than a size_t. */
6810 
6811 frame_size = (offsetof(heapframe, ovector) +
6812   re->top_bracket * 2 * sizeof(PCRE2_SIZE) + HEAPFRAME_ALIGNMENT - 1) &
6813   ~(HEAPFRAME_ALIGNMENT - 1);
6814 
6815 /* Limits set in the pattern override the match context only if they are
6816 smaller. */
6817 
6818 mb->heap_limit = ((mcontext->heap_limit < re->limit_heap)?
6819   mcontext->heap_limit : re->limit_heap) * 1024;
6820 
6821 mb->match_limit = (mcontext->match_limit < re->limit_match)?
6822   mcontext->match_limit : re->limit_match;
6823 
6824 mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
6825   mcontext->depth_limit : re->limit_depth;
6826 
6827 /* If a pattern has very many capturing parentheses, the frame size may be very
6828 large. Set the initial frame vector size to ensure that there are at least 10
6829 available frames, but enforce a minimum of START_FRAMES_SIZE. If this is
6830 greater than the heap limit, get as large a vector as possible. Always round
6831 the size to a multiple of the frame size. */
6832 
6833 heapframes_size = frame_size * 10;
6834 if (heapframes_size < START_FRAMES_SIZE) heapframes_size = START_FRAMES_SIZE;
6835 if (heapframes_size > mb->heap_limit)
6836   {
6837   if (frame_size > mb->heap_limit ) return PCRE2_ERROR_HEAPLIMIT;
6838   heapframes_size = mb->heap_limit;
6839   }
6840 
6841 /* If an existing frame vector in the match_data block is large enough, we can
6842 use it.Otherwise, free any pre-existing vector and get a new one. */
6843 
6844 if (match_data->heapframes_size < heapframes_size)
6845   {
6846   match_data->memctl.free(match_data->heapframes,
6847     match_data->memctl.memory_data);
6848   match_data->heapframes = match_data->memctl.malloc(heapframes_size,
6849     match_data->memctl.memory_data);
6850   if (match_data->heapframes == NULL)
6851     {
6852     match_data->heapframes_size = 0;
6853     return PCRE2_ERROR_NOMEMORY;
6854     }
6855   match_data->heapframes_size = heapframes_size;
6856   }
6857 
6858 /* Write to the ovector within the first frame to mark every capture unset and
6859 to avoid uninitialized memory read errors when it is copied to a new frame. */
6860 
6861 memset((char *)(match_data->heapframes) + offsetof(heapframe, ovector), 0xff,
6862   frame_size - offsetof(heapframe, ovector));
6863 
6864 /* Pointers to the individual character tables */
6865 
6866 mb->lcc = re->tables + lcc_offset;
6867 mb->fcc = re->tables + fcc_offset;
6868 mb->ctypes = re->tables + ctypes_offset;
6869 
6870 /* Set up the first code unit to match, if available. If there's no first code
6871 unit there may be a bitmap of possible first characters. */
6872 
6873 if ((re->flags & PCRE2_FIRSTSET) != 0)
6874   {
6875   has_first_cu = TRUE;
6876   first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
6877   if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
6878     {
6879     first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
6880 #ifdef SUPPORT_UNICODE
6881 #if PCRE2_CODE_UNIT_WIDTH == 8
6882     if (first_cu > 127 && ucp && !utf) first_cu2 = UCD_OTHERCASE(first_cu);
6883 #else
6884     if (first_cu > 127 && (utf || ucp)) first_cu2 = UCD_OTHERCASE(first_cu);
6885 #endif
6886 #endif  /* SUPPORT_UNICODE */
6887     }
6888   }
6889 else
6890   if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
6891     start_bits = re->start_bitmap;
6892 
6893 /* There may also be a "last known required character" set. */
6894 
6895 if ((re->flags & PCRE2_LASTSET) != 0)
6896   {
6897   has_req_cu = TRUE;
6898   req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
6899   if ((re->flags & PCRE2_LASTCASELESS) != 0)
6900     {
6901     req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
6902 #ifdef SUPPORT_UNICODE
6903 #if PCRE2_CODE_UNIT_WIDTH == 8
6904     if (req_cu > 127 && ucp && !utf) req_cu2 = UCD_OTHERCASE(req_cu);
6905 #else
6906     if (req_cu > 127 && (utf || ucp)) req_cu2 = UCD_OTHERCASE(req_cu);
6907 #endif
6908 #endif  /* SUPPORT_UNICODE */
6909     }
6910   }
6911 
6912 
6913 /* ==========================================================================*/
6914 
6915 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
6916 the loop runs just once. */
6917 
6918 #ifdef SUPPORT_UNICODE
6919 FRAGMENT_RESTART:
6920 #endif
6921 
6922 start_partial = match_partial = NULL;
6923 mb->hitend = FALSE;
6924 
6925 #if PCRE2_CODE_UNIT_WIDTH == 8
6926 memchr_found_first_cu = NULL;
6927 memchr_found_first_cu2 = NULL;
6928 #endif
6929 
6930 for(;;)
6931   {
6932   PCRE2_SPTR new_start_match;
6933 
6934   /* ----------------- Start of match optimizations ---------------- */
6935 
6936   /* There are some optimizations that avoid running the match if a known
6937   starting point is not found, or if a known later code unit is not present.
6938   However, there is an option (settable at compile time) that disables these,
6939   for testing and for ensuring that all callouts do actually occur. */
6940 
6941   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
6942     {
6943     /* If firstline is TRUE, the start of the match is constrained to the first
6944     line of a multiline string. That is, the match must be before or at the
6945     first newline following the start of matching. Temporarily adjust
6946     end_subject so that we stop the scans for a first code unit at a newline.
6947     If the match fails at the newline, later code breaks the loop. */
6948 
6949     if (firstline)
6950       {
6951       PCRE2_SPTR t = start_match;
6952 #ifdef SUPPORT_UNICODE
6953       if (utf)
6954         {
6955         while (t < end_subject && !IS_NEWLINE(t))
6956           {
6957           t++;
6958           ACROSSCHAR(t < end_subject, t, t++);
6959           }
6960         }
6961       else
6962 #endif
6963       while (t < end_subject && !IS_NEWLINE(t)) t++;
6964       end_subject = t;
6965       }
6966 
6967     /* Anchored: check the first code unit if one is recorded. This may seem
6968     pointless but it can help in detecting a no match case without scanning for
6969     the required code unit. */
6970 
6971     if (anchored)
6972       {
6973       if (has_first_cu || start_bits != NULL)
6974         {
6975         BOOL ok = start_match < end_subject;
6976         if (ok)
6977           {
6978           PCRE2_UCHAR c = UCHAR21TEST(start_match);
6979           ok = has_first_cu && (c == first_cu || c == first_cu2);
6980           if (!ok && start_bits != NULL)
6981             {
6982 #if PCRE2_CODE_UNIT_WIDTH != 8
6983             if (c > 255) c = 255;
6984 #endif
6985             ok = (start_bits[c/8] & (1u << (c&7))) != 0;
6986             }
6987           }
6988         if (!ok)
6989           {
6990           rc = MATCH_NOMATCH;
6991           break;
6992           }
6993         }
6994       }
6995 
6996     /* Not anchored. Advance to a unique first code unit if there is one. */
6997 
6998     else
6999       {
7000       if (has_first_cu)
7001         {
7002         if (first_cu != first_cu2)  /* Caseless */
7003           {
7004           /* In 16-bit and 32_bit modes we have to do our own search, so can
7005           look for both cases at once. */
7006 
7007 #if PCRE2_CODE_UNIT_WIDTH != 8
7008           PCRE2_UCHAR smc;
7009           while (start_match < end_subject &&
7010                 (smc = UCHAR21TEST(start_match)) != first_cu &&
7011                  smc != first_cu2)
7012             start_match++;
7013 #else
7014           /* In 8-bit mode, the use of memchr() gives a big speed up, even
7015           though we have to call it twice in order to find the earliest
7016           occurrence of the code unit in either of its cases. Caching is used
7017           to remember the positions of previously found code units. This can
7018           make a huge difference when the strings are very long and only one
7019           case is actually present. */
7020 
7021           PCRE2_SPTR pp1 = NULL;
7022           PCRE2_SPTR pp2 = NULL;
7023           PCRE2_SIZE searchlength = end_subject - start_match;
7024 
7025           /* If we haven't got a previously found position for first_cu, or if
7026           the current starting position is later, we need to do a search. If
7027           the code unit is not found, set it to the end. */
7028 
7029           if (memchr_found_first_cu == NULL ||
7030               start_match > memchr_found_first_cu)
7031             {
7032             pp1 = memchr(start_match, first_cu, searchlength);
7033             memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1;
7034             }
7035 
7036           /* If the start is before a previously found position, use the
7037           previous position, or NULL if a previous search failed. */
7038 
7039           else pp1 = (memchr_found_first_cu == end_subject)? NULL :
7040             memchr_found_first_cu;
7041 
7042           /* Do the same thing for the other case. */
7043 
7044           if (memchr_found_first_cu2 == NULL ||
7045               start_match > memchr_found_first_cu2)
7046             {
7047             pp2 = memchr(start_match, first_cu2, searchlength);
7048             memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2;
7049             }
7050 
7051           else pp2 = (memchr_found_first_cu2 == end_subject)? NULL :
7052             memchr_found_first_cu2;
7053 
7054           /* Set the start to the end of the subject if neither case was found.
7055           Otherwise, use the earlier found point. */
7056 
7057           if (pp1 == NULL)
7058             start_match = (pp2 == NULL)? end_subject : pp2;
7059           else
7060             start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
7061 
7062 #endif  /* 8-bit handling */
7063           }
7064 
7065         /* The caseful case is much simpler. */
7066 
7067         else
7068           {
7069 #if PCRE2_CODE_UNIT_WIDTH != 8
7070           while (start_match < end_subject && UCHAR21TEST(start_match) !=
7071                  first_cu)
7072             start_match++;
7073 #else
7074           start_match = memchr(start_match, first_cu, end_subject - start_match);
7075           if (start_match == NULL) start_match = end_subject;
7076 #endif
7077           }
7078 
7079         /* If we can't find the required first code unit, having reached the
7080         true end of the subject, break the bumpalong loop, to force a match
7081         failure, except when doing partial matching, when we let the next cycle
7082         run at the end of the subject. To see why, consider the pattern
7083         /(?<=abc)def/, which partially matches "abc", even though the string
7084         does not contain the starting character "d". If we have not reached the
7085         true end of the subject (PCRE2_FIRSTLINE caused end_subject to be
7086         temporarily modified) we also let the cycle run, because the matching
7087         string is legitimately allowed to start with the first code unit of a
7088         newline. */
7089 
7090         if (mb->partial == 0 && start_match >= mb->end_subject)
7091           {
7092           rc = MATCH_NOMATCH;
7093           break;
7094           }
7095         }
7096 
7097       /* If there's no first code unit, advance to just after a linebreak for a
7098       multiline match if required. */
7099 
7100       else if (startline)
7101         {
7102         if (start_match > mb->start_subject + start_offset)
7103           {
7104 #ifdef SUPPORT_UNICODE
7105           if (utf)
7106             {
7107             while (start_match < end_subject && !WAS_NEWLINE(start_match))
7108               {
7109               start_match++;
7110               ACROSSCHAR(start_match < end_subject, start_match, start_match++);
7111               }
7112             }
7113           else
7114 #endif
7115           while (start_match < end_subject && !WAS_NEWLINE(start_match))
7116             start_match++;
7117 
7118           /* If we have just passed a CR and the newline option is ANY or
7119           ANYCRLF, and we are now at a LF, advance the match position by one
7120           more code unit. */
7121 
7122           if (start_match[-1] == CHAR_CR &&
7123                (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
7124                start_match < end_subject &&
7125                UCHAR21TEST(start_match) == CHAR_NL)
7126             start_match++;
7127           }
7128         }
7129 
7130       /* If there's no first code unit or a requirement for a multiline line
7131       start, advance to a non-unique first code unit if any have been
7132       identified. The bitmap contains only 256 bits. When code units are 16 or
7133       32 bits wide, all code units greater than 254 set the 255 bit. */
7134 
7135       else if (start_bits != NULL)
7136         {
7137         while (start_match < end_subject)
7138           {
7139           uint32_t c = UCHAR21TEST(start_match);
7140 #if PCRE2_CODE_UNIT_WIDTH != 8
7141           if (c > 255) c = 255;
7142 #endif
7143           if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
7144           start_match++;
7145           }
7146 
7147         /* See comment above in first_cu checking about the next few lines. */
7148 
7149         if (mb->partial == 0 && start_match >= mb->end_subject)
7150           {
7151           rc = MATCH_NOMATCH;
7152           break;
7153           }
7154         }
7155       }   /* End first code unit handling */
7156 
7157     /* Restore fudged end_subject */
7158 
7159     end_subject = mb->end_subject;
7160 
7161     /* The following two optimizations must be disabled for partial matching. */
7162 
7163     if (mb->partial == 0)
7164       {
7165       PCRE2_SPTR p;
7166 
7167       /* The minimum matching length is a lower bound; no string of that length
7168       may actually match the pattern. Although the value is, strictly, in
7169       characters, we treat it as code units to avoid spending too much time in
7170       this optimization. */
7171 
7172       if (end_subject - start_match < re->minlength)
7173         {
7174         rc = MATCH_NOMATCH;
7175         break;
7176         }
7177 
7178       /* If req_cu is set, we know that that code unit must appear in the
7179       subject for the (non-partial) match to succeed. If the first code unit is
7180       set, req_cu must be later in the subject; otherwise the test starts at
7181       the match point. This optimization can save a huge amount of backtracking
7182       in patterns with nested unlimited repeats that aren't going to match.
7183       Writing separate code for caseful/caseless versions makes it go faster,
7184       as does using an autoincrement and backing off on a match. As in the case
7185       of the first code unit, using memchr() in the 8-bit library gives a big
7186       speed up. Unlike the first_cu check above, we do not need to call
7187       memchr() twice in the caseless case because we only need to check for the
7188       presence of the character in either case, not find the first occurrence.
7189 
7190       The search can be skipped if the code unit was found later than the
7191       current starting point in a previous iteration of the bumpalong loop.
7192 
7193       HOWEVER: when the subject string is very, very long, searching to its end
7194       can take a long time, and give bad performance on quite ordinary
7195       anchored patterns. This showed up when somebody was matching something
7196       like /^\d+C/ on a 32-megabyte string... so we don't do this when the
7197       string is sufficiently long, but it's worth searching a lot more for
7198       unanchored patterns. */
7199 
7200       p = start_match + (has_first_cu? 1:0);
7201       if (has_req_cu && p > req_cu_ptr)
7202         {
7203         PCRE2_SIZE check_length = end_subject - start_match;
7204 
7205         if (check_length < REQ_CU_MAX ||
7206               (!anchored && check_length < REQ_CU_MAX * 1000))
7207           {
7208           if (req_cu != req_cu2)  /* Caseless */
7209             {
7210 #if PCRE2_CODE_UNIT_WIDTH != 8
7211             while (p < end_subject)
7212               {
7213               uint32_t pp = UCHAR21INCTEST(p);
7214               if (pp == req_cu || pp == req_cu2) { p--; break; }
7215               }
7216 #else  /* 8-bit code units */
7217             PCRE2_SPTR pp = p;
7218             p = memchr(pp, req_cu, end_subject - pp);
7219             if (p == NULL)
7220               {
7221               p = memchr(pp, req_cu2, end_subject - pp);
7222               if (p == NULL) p = end_subject;
7223               }
7224 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
7225             }
7226 
7227           /* The caseful case */
7228 
7229           else
7230             {
7231 #if PCRE2_CODE_UNIT_WIDTH != 8
7232             while (p < end_subject)
7233               {
7234               if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
7235               }
7236 
7237 #else  /* 8-bit code units */
7238             p = memchr(p, req_cu, end_subject - p);
7239             if (p == NULL) p = end_subject;
7240 #endif
7241             }
7242 
7243           /* If we can't find the required code unit, break the bumpalong loop,
7244           forcing a match failure. */
7245 
7246           if (p >= end_subject)
7247             {
7248             rc = MATCH_NOMATCH;
7249             break;
7250             }
7251 
7252           /* If we have found the required code unit, save the point where we
7253           found it, so that we don't search again next time round the bumpalong
7254           loop if the start hasn't yet passed this code unit. */
7255 
7256           req_cu_ptr = p;
7257           }
7258         }
7259       }
7260     }
7261 
7262   /* ------------ End of start of match optimizations ------------ */
7263 
7264   /* Give no match if we have passed the bumpalong limit. */
7265 
7266   if (start_match > bumpalong_limit)
7267     {
7268     rc = MATCH_NOMATCH;
7269     break;
7270     }
7271 
7272   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
7273   first starting point for which a partial match was found. */
7274 
7275   cb.start_match = (PCRE2_SIZE)(start_match - subject);
7276   cb.callout_flags |= PCRE2_CALLOUT_STARTMATCH;
7277 
7278   mb->start_used_ptr = start_match;
7279   mb->last_used_ptr = start_match;
7280 #ifdef SUPPORT_UNICODE
7281   mb->moptions = options | fragment_options;
7282 #else
7283   mb->moptions = options;
7284 #endif
7285   mb->match_call_count = 0;
7286   mb->end_offset_top = 0;
7287   mb->skip_arg_count = 0;
7288 
7289   rc = match(start_match, mb->start_code, re->top_bracket, frame_size,
7290     match_data, mb);
7291 
7292   if (mb->hitend && start_partial == NULL)
7293     {
7294     start_partial = mb->start_used_ptr;
7295     match_partial = start_match;
7296     }
7297 
7298   switch(rc)
7299     {
7300     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
7301     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
7302     entirely. The only way we can do that is to re-do the match at the same
7303     point, with a flag to force SKIP with an argument to be ignored. Just
7304     treating this case as NOMATCH does not work because it does not check other
7305     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
7306 
7307     case MATCH_SKIP_ARG:
7308     new_start_match = start_match;
7309     mb->ignore_skip_arg = mb->skip_arg_count;
7310     break;
7311 
7312     /* SKIP passes back the next starting point explicitly, but if it is no
7313     greater than the match we have just done, treat it as NOMATCH. */
7314 
7315     case MATCH_SKIP:
7316     if (mb->verb_skip_ptr > start_match)
7317       {
7318       new_start_match = mb->verb_skip_ptr;
7319       break;
7320       }
7321     /* Fall through */
7322 
7323     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
7324     exactly like PRUNE. Unset ignore SKIP-with-argument. */
7325 
7326     case MATCH_NOMATCH:
7327     case MATCH_PRUNE:
7328     case MATCH_THEN:
7329     mb->ignore_skip_arg = 0;
7330     new_start_match = start_match + 1;
7331 #ifdef SUPPORT_UNICODE
7332     if (utf)
7333       ACROSSCHAR(new_start_match < end_subject, new_start_match,
7334         new_start_match++);
7335 #endif
7336     break;
7337 
7338     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
7339 
7340     case MATCH_COMMIT:
7341     rc = MATCH_NOMATCH;
7342     goto ENDLOOP;
7343 
7344     /* Any other return is either a match, or some kind of error. */
7345 
7346     default:
7347     goto ENDLOOP;
7348     }
7349 
7350   /* Control reaches here for the various types of "no match at this point"
7351   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
7352 
7353   rc = MATCH_NOMATCH;
7354 
7355   /* If PCRE2_FIRSTLINE is set, the match must happen before or at the first
7356   newline in the subject (though it may continue over the newline). Therefore,
7357   if we have just failed to match, starting at a newline, do not continue. */
7358 
7359   if (firstline && IS_NEWLINE(start_match)) break;
7360 
7361   /* Advance to new matching position */
7362 
7363   start_match = new_start_match;
7364 
7365   /* Break the loop if the pattern is anchored or if we have passed the end of
7366   the subject. */
7367 
7368   if (anchored || start_match > end_subject) break;
7369 
7370   /* If we have just passed a CR and we are now at a LF, and the pattern does
7371   not contain any explicit matches for \r or \n, and the newline option is CRLF
7372   or ANY or ANYCRLF, advance the match position by one more code unit. In
7373   normal matching start_match will aways be greater than the first position at
7374   this stage, but a failed *SKIP can cause a return at the same point, which is
7375   why the first test exists. */
7376 
7377   if (start_match > subject + start_offset &&
7378       start_match[-1] == CHAR_CR &&
7379       start_match < end_subject &&
7380       *start_match == CHAR_NL &&
7381       (re->flags & PCRE2_HASCRORLF) == 0 &&
7382         (mb->nltype == NLTYPE_ANY ||
7383          mb->nltype == NLTYPE_ANYCRLF ||
7384          mb->nllen == 2))
7385     start_match++;
7386 
7387   mb->mark = NULL;   /* Reset for start of next match attempt */
7388   }                  /* End of for(;;) "bumpalong" loop */
7389 
7390 /* ==========================================================================*/
7391 
7392 /* When we reach here, one of the following stopping conditions is true:
7393 
7394 (1) The match succeeded, either completely, or partially;
7395 
7396 (2) The pattern is anchored or the match was failed after (*COMMIT);
7397 
7398 (3) We are past the end of the subject or the bumpalong limit;
7399 
7400 (4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because
7401     this option requests that a match occur at or before the first newline in
7402     the subject.
7403 
7404 (5) Some kind of error occurred.
7405 
7406 */
7407 
7408 ENDLOOP:
7409 
7410 /* If end_subject != true_end_subject, it means we are handling invalid UTF,
7411 and have just processed a non-terminal fragment. If this resulted in no match
7412 or a partial match we must carry on to the next fragment (a partial match is
7413 returned to the caller only at the very end of the subject). A loop is used to
7414 avoid trying to match against empty fragments; if the pattern can match an
7415 empty string it would have done so already. */
7416 
7417 #ifdef SUPPORT_UNICODE
7418 if (utf && end_subject != true_end_subject &&
7419     (rc == MATCH_NOMATCH || rc == PCRE2_ERROR_PARTIAL))
7420   {
7421   for (;;)
7422     {
7423     /* Advance past the first bad code unit, and then skip invalid character
7424     starting code units in 8-bit and 16-bit modes. */
7425 
7426     start_match = end_subject + 1;
7427 
7428 #if PCRE2_CODE_UNIT_WIDTH != 32
7429     while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
7430       start_match++;
7431 #endif
7432 
7433     /* If we have hit the end of the subject, there isn't another non-empty
7434     fragment, so give up. */
7435 
7436     if (start_match >= true_end_subject)
7437       {
7438       rc = MATCH_NOMATCH;  /* In case it was partial */
7439       break;
7440       }
7441 
7442     /* Check the rest of the subject */
7443 
7444     mb->check_subject = start_match;
7445     rc = PRIV(valid_utf)(start_match, length - (start_match - subject),
7446       &(match_data->startchar));
7447 
7448     /* The rest of the subject is valid UTF. */
7449 
7450     if (rc == 0)
7451       {
7452       mb->end_subject = end_subject = true_end_subject;
7453       fragment_options = PCRE2_NOTBOL;
7454       goto FRAGMENT_RESTART;
7455       }
7456 
7457     /* A subsequent UTF error has been found; if the next fragment is
7458     non-empty, set up to process it. Otherwise, let the loop advance. */
7459 
7460     else if (rc < 0)
7461       {
7462       mb->end_subject = end_subject = start_match + match_data->startchar;
7463       if (end_subject > start_match)
7464         {
7465         fragment_options = PCRE2_NOTBOL|PCRE2_NOTEOL;
7466         goto FRAGMENT_RESTART;
7467         }
7468       }
7469     }
7470   }
7471 #endif  /* SUPPORT_UNICODE */
7472 
7473 /* Fill in fields that are always returned in the match data. */
7474 
7475 match_data->code = re;
7476 match_data->mark = mb->mark;
7477 match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER;
7478 
7479 /* Handle a fully successful match. Set the return code to the number of
7480 captured strings, or 0 if there were too many to fit into the ovector, and then
7481 set the remaining returned values before returning. Make a copy of the subject
7482 string if requested. */
7483 
7484 if (rc == MATCH_MATCH)
7485   {
7486   match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)?
7487     0 : (int)mb->end_offset_top/2 + 1;
7488   match_data->startchar = start_match - subject;
7489   match_data->leftchar = mb->start_used_ptr - subject;
7490   match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
7491     mb->last_used_ptr : mb->end_match_ptr) - subject;
7492   if ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
7493     {
7494     length = CU2BYTES(length + was_zero_terminated);
7495     match_data->subject = match_data->memctl.malloc(length,
7496       match_data->memctl.memory_data);
7497     if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
7498     memcpy((void *)match_data->subject, subject, length);
7499     match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
7500     }
7501   else match_data->subject = subject;
7502   return match_data->rc;
7503   }
7504 
7505 /* Control gets here if there has been a partial match, an error, or if the
7506 overall match attempt has failed at all permitted starting positions. Any mark
7507 data is in the nomatch_mark field. */
7508 
7509 match_data->mark = mb->nomatch_mark;
7510 
7511 /* For anything other than nomatch or partial match, just return the code. */
7512 
7513 if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL) match_data->rc = rc;
7514 
7515 /* Handle a partial match. If a "soft" partial match was requested, searching
7516 for a complete match will have continued, and the value of rc at this point
7517 will be MATCH_NOMATCH. For a "hard" partial match, it will already be
7518 PCRE2_ERROR_PARTIAL. */
7519 
7520 else if (match_partial != NULL)
7521   {
7522   match_data->subject = subject;
7523   match_data->ovector[0] = match_partial - subject;
7524   match_data->ovector[1] = end_subject - subject;
7525   match_data->startchar = match_partial - subject;
7526   match_data->leftchar = start_partial - subject;
7527   match_data->rightchar = end_subject - subject;
7528   match_data->rc = PCRE2_ERROR_PARTIAL;
7529   }
7530 
7531 /* Else this is the classic nomatch case. */
7532 
7533 else match_data->rc = PCRE2_ERROR_NOMATCH;
7534 
7535 return match_data->rc;
7536 }
7537 
7538 /* These #undefs are here to enable unity builds with CMake. */
7539 
7540 #undef NLBLOCK /* Block containing newline information */
7541 #undef PSSTART /* Field containing processed string start */
7542 #undef PSEND   /* Field containing processed string end */
7543 
7544 /* End of pcre2_match.c */
7545