xref: /PHP-8.1/ext/pcre/pcre2lib/pcre2_match.c (revision 83a505e8)
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10           New API code Copyright (c) 2015-2021 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45 
46 /* These defines enable debugging code */
47 
48 /* #define DEBUG_FRAMES_DISPLAY */
49 /* #define DEBUG_SHOW_OPS */
50 /* #define DEBUG_SHOW_RMATCH */
51 
52 #ifdef DEBUG_FRAME_DISPLAY
53 #include <stdarg.h>
54 #endif
55 
56 /* These defines identify the name of the block containing "static"
57 information, and fields within it. */
58 
59 #define NLBLOCK mb              /* Block containing newline information */
60 #define PSSTART start_subject   /* Field containing processed string start */
61 #define PSEND   end_subject     /* Field containing processed string end */
62 
63 #include "pcre2_internal.h"
64 
65 #define RECURSE_UNSET 0xffffffffu  /* Bigger than max group number */
66 
67 /* Masks for identifying the public options that are permitted at match time. */
68 
69 #define PUBLIC_MATCH_OPTIONS \
70   (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
71    PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
72    PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT)
73 
74 #define PUBLIC_JIT_MATCH_OPTIONS \
75    (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
76     PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD|\
77     PCRE2_COPY_MATCHED_SUBJECT)
78 
79 /* Non-error returns from and within the match() function. Error returns are
80 externally defined PCRE2_ERROR_xxx codes, which are all negative. */
81 
82 #define MATCH_MATCH        1
83 #define MATCH_NOMATCH      0
84 
85 /* Special internal returns used in the match() function. Make them
86 sufficiently negative to avoid the external error codes. */
87 
88 #define MATCH_ACCEPT       (-999)
89 #define MATCH_KETRPOS      (-998)
90 /* The next 5 must be kept together and in sequence so that a test that checks
91 for any one of them can use a range. */
92 #define MATCH_COMMIT       (-997)
93 #define MATCH_PRUNE        (-996)
94 #define MATCH_SKIP         (-995)
95 #define MATCH_SKIP_ARG     (-994)
96 #define MATCH_THEN         (-993)
97 #define MATCH_BACKTRACK_MAX MATCH_THEN
98 #define MATCH_BACKTRACK_MIN MATCH_COMMIT
99 
100 /* Group frame type values. Zero means the frame is not a group frame. The
101 lower 16 bits are used for data (e.g. the capture number). Group frames are
102 used for most groups so that information about the start is easily available at
103 the end without having to scan back through intermediate frames (backtrack
104 points). */
105 
106 #define GF_CAPTURE     0x00010000u
107 #define GF_NOCAPTURE   0x00020000u
108 #define GF_CONDASSERT  0x00030000u
109 #define GF_RECURSE     0x00040000u
110 
111 /* Masks for the identity and data parts of the group frame type. */
112 
113 #define GF_IDMASK(a)   ((a) & 0xffff0000u)
114 #define GF_DATAMASK(a) ((a) & 0x0000ffffu)
115 
116 /* Repetition types */
117 
118 enum { REPTYPE_MIN, REPTYPE_MAX, REPTYPE_POS };
119 
120 /* Min and max values for the common repeats; a maximum of UINT32_MAX =>
121 infinity. */
122 
123 static const uint32_t rep_min[] = {
124   0, 0,       /* * and *? */
125   1, 1,       /* + and +? */
126   0, 0,       /* ? and ?? */
127   0, 0,       /* dummy placefillers for OP_CR[MIN]RANGE */
128   0, 1, 0 };  /* OP_CRPOS{STAR, PLUS, QUERY} */
129 
130 static const uint32_t rep_max[] = {
131   UINT32_MAX, UINT32_MAX,      /* * and *? */
132   UINT32_MAX, UINT32_MAX,      /* + and +? */
133   1, 1,                        /* ? and ?? */
134   0, 0,                        /* dummy placefillers for OP_CR[MIN]RANGE */
135   UINT32_MAX, UINT32_MAX, 1 }; /* OP_CRPOS{STAR, PLUS, QUERY} */
136 
137 /* Repetition types - must include OP_CRPOSRANGE (not needed above) */
138 
139 static const uint32_t rep_typ[] = {
140   REPTYPE_MAX, REPTYPE_MIN,    /* * and *? */
141   REPTYPE_MAX, REPTYPE_MIN,    /* + and +? */
142   REPTYPE_MAX, REPTYPE_MIN,    /* ? and ?? */
143   REPTYPE_MAX, REPTYPE_MIN,    /* OP_CRRANGE and OP_CRMINRANGE */
144   REPTYPE_POS, REPTYPE_POS,    /* OP_CRPOSSTAR, OP_CRPOSPLUS */
145   REPTYPE_POS, REPTYPE_POS };  /* OP_CRPOSQUERY, OP_CRPOSRANGE */
146 
147 /* Numbers for RMATCH calls at backtracking points. When these lists are
148 changed, the code at RETURN_SWITCH below must be updated in sync.  */
149 
150 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
151        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
152        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
153        RM31,  RM32, RM33, RM34, RM35, RM36 };
154 
155 #ifdef SUPPORT_WIDE_CHARS
156 enum { RM100=100, RM101 };
157 #endif
158 
159 #ifdef SUPPORT_UNICODE
160 enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207,
161        RM208,     RM209, RM210, RM211, RM212, RM213, RM214, RM215,
162        RM216,     RM217, RM218, RM219, RM220, RM221, RM222 };
163 #endif
164 
165 /* Define short names for general fields in the current backtrack frame, which
166 is always pointed to by the F variable. Occasional references to fields in
167 other frames are written out explicitly. There are also some fields in the
168 current frame whose names start with "temp" that are used for short-term,
169 localised backtracking memory. These are #defined with Lxxx names at the point
170 of use and undefined afterwards. */
171 
172 #define Fback_frame        F->back_frame
173 #define Fcapture_last      F->capture_last
174 #define Fcurrent_recurse   F->current_recurse
175 #define Fecode             F->ecode
176 #define Feptr              F->eptr
177 #define Fgroup_frame_type  F->group_frame_type
178 #define Flast_group_offset F->last_group_offset
179 #define Flength            F->length
180 #define Fmark              F->mark
181 #define Frdepth            F->rdepth
182 #define Fstart_match       F->start_match
183 #define Foffset_top        F->offset_top
184 #define Foccu              F->occu
185 #define Fop                F->op
186 #define Fovector           F->ovector
187 #define Freturn_id         F->return_id
188 
189 
190 #ifdef DEBUG_FRAMES_DISPLAY
191 /*************************************************
192 *      Display current frames and contents       *
193 *************************************************/
194 
195 /* This debugging function displays the current set of frames and their
196 contents. It is not called automatically from anywhere, the intention being
197 that calls can be inserted where necessary when debugging frame-related
198 problems.
199 
200 Arguments:
201   f           the file to write to
202   F           the current top frame
203   P           a previous frame of interest
204   frame_size  the frame size
205   mb          points to the match block
206   s           identification text
207 
208 Returns:    nothing
209 */
210 
211 static void
display_frames(FILE * f,heapframe * F,heapframe * P,PCRE2_SIZE frame_size,match_block * mb,const char * s,...)212 display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size,
213   match_block *mb, const char *s, ...)
214 {
215 uint32_t i;
216 heapframe *Q;
217 va_list ap;
218 va_start(ap, s);
219 
220 fprintf(f, "FRAMES ");
221 vfprintf(f, s, ap);
222 va_end(ap);
223 
224 if (P != NULL) fprintf(f, " P=%lu",
225   ((char *)P - (char *)(mb->match_frames))/frame_size);
226 fprintf(f, "\n");
227 
228 for (i = 0, Q = mb->match_frames;
229      Q <= F;
230      i++, Q = (heapframe *)((char *)Q + frame_size))
231   {
232   fprintf(f, "Frame %d type=%x subj=%lu code=%d back=%lu id=%d",
233     i, Q->group_frame_type, Q->eptr - mb->start_subject, *(Q->ecode),
234     Q->back_frame, Q->return_id);
235 
236   if (Q->last_group_offset == PCRE2_UNSET)
237     fprintf(f, " lgoffset=unset\n");
238   else
239     fprintf(f, " lgoffset=%lu\n",  Q->last_group_offset/frame_size);
240   }
241 }
242 
243 #endif
244 
245 
246 
247 /*************************************************
248 *                Process a callout               *
249 *************************************************/
250 
251 /* This function is called for all callouts, whether "standalone" or at the
252 start of a conditional group. Feptr will be pointing to either OP_CALLOUT or
253 OP_CALLOUT_STR. A callout block is allocated in pcre2_match() and initialized
254 with fixed values.
255 
256 Arguments:
257   F          points to the current backtracking frame
258   mb         points to the match block
259   lengthptr  where to return the length of the callout item
260 
261 Returns:     the return from the callout
262              or 0 if no callout function exists
263 */
264 
265 static int
do_callout(heapframe * F,match_block * mb,PCRE2_SIZE * lengthptr)266 do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr)
267 {
268 int rc;
269 PCRE2_SIZE save0, save1;
270 PCRE2_SIZE *callout_ovector;
271 pcre2_callout_block *cb;
272 
273 *lengthptr = (*Fecode == OP_CALLOUT)?
274   PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE);
275 
276 if (mb->callout == NULL) return 0;   /* No callout function provided */
277 
278 /* The original matching code (pre 10.30) worked directly with the ovector
279 passed by the user, and this was passed to callouts. Now that the working
280 ovector is in the backtracking frame, it no longer needs to reserve space for
281 the overall match offsets (which would waste space in the frame). For backward
282 compatibility, however, we pass capture_top and offset_vector to the callout as
283 if for the extended ovector, and we ensure that the first two slots are unset
284 by preserving and restoring their current contents. Picky compilers complain if
285 references such as Fovector[-2] are use directly, so we set up a separate
286 pointer. */
287 
288 callout_ovector = (PCRE2_SIZE *)(Fovector) - 2;
289 
290 /* The cb->version, cb->subject, cb->subject_length, and cb->start_match fields
291 are set externally. The first 3 never change; the last is updated for each
292 bumpalong. */
293 
294 cb = mb->cb;
295 cb->capture_top      = (uint32_t)Foffset_top/2 + 1;
296 cb->capture_last     = Fcapture_last;
297 cb->offset_vector    = callout_ovector;
298 cb->mark             = mb->nomatch_mark;
299 cb->current_position = (PCRE2_SIZE)(Feptr - mb->start_subject);
300 cb->pattern_position = GET(Fecode, 1);
301 cb->next_item_length = GET(Fecode, 1 + LINK_SIZE);
302 
303 if (*Fecode == OP_CALLOUT)  /* Numerical callout */
304   {
305   cb->callout_number = Fecode[1 + 2*LINK_SIZE];
306   cb->callout_string_offset = 0;
307   cb->callout_string = NULL;
308   cb->callout_string_length = 0;
309   }
310 else  /* String callout */
311   {
312   cb->callout_number = 0;
313   cb->callout_string_offset = GET(Fecode, 1 + 3*LINK_SIZE);
314   cb->callout_string = Fecode + (1 + 4*LINK_SIZE) + 1;
315   cb->callout_string_length =
316     *lengthptr - (1 + 4*LINK_SIZE) - 2;
317   }
318 
319 save0 = callout_ovector[0];
320 save1 = callout_ovector[1];
321 callout_ovector[0] = callout_ovector[1] = PCRE2_UNSET;
322 rc = mb->callout(cb, mb->callout_data);
323 callout_ovector[0] = save0;
324 callout_ovector[1] = save1;
325 cb->callout_flags = 0;
326 return rc;
327 }
328 
329 
330 
331 /*************************************************
332 *          Match a back-reference                *
333 *************************************************/
334 
335 /* This function is called only when it is known that the offset lies within
336 the offsets that have so far been used in the match. Note that in caseless
337 UTF-8 mode, the number of subject bytes matched may be different to the number
338 of reference bytes. (In theory this could also happen in UTF-16 mode, but it
339 seems unlikely.)
340 
341 Arguments:
342   offset      index into the offset vector
343   caseless    TRUE if caseless
344   F           the current backtracking frame pointer
345   mb          points to match block
346   lengthptr   pointer for returning the length matched
347 
348 Returns:      = 0 sucessful match; number of code units matched is set
349               < 0 no match
350               > 0 partial match
351 */
352 
353 static int
match_ref(PCRE2_SIZE offset,BOOL caseless,heapframe * F,match_block * mb,PCRE2_SIZE * lengthptr)354 match_ref(PCRE2_SIZE offset, BOOL caseless, heapframe *F, match_block *mb,
355   PCRE2_SIZE *lengthptr)
356 {
357 PCRE2_SPTR p;
358 PCRE2_SIZE length;
359 PCRE2_SPTR eptr;
360 PCRE2_SPTR eptr_start;
361 
362 /* Deal with an unset group. The default is no match, but there is an option to
363 match an empty string. */
364 
365 if (offset >= Foffset_top || Fovector[offset] == PCRE2_UNSET)
366   {
367   if ((mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
368     {
369     *lengthptr = 0;
370     return 0;      /* Match */
371     }
372   else return -1;  /* No match */
373   }
374 
375 /* Separate the caseless and UTF cases for speed. */
376 
377 eptr = eptr_start = Feptr;
378 p = mb->start_subject + Fovector[offset];
379 length = Fovector[offset+1] - Fovector[offset];
380 
381 if (caseless)
382   {
383 #if defined SUPPORT_UNICODE
384   BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
385 
386   if (utf || (mb->poptions & PCRE2_UCP) != 0)
387     {
388     PCRE2_SPTR endptr = p + length;
389 
390     /* Match characters up to the end of the reference. NOTE: the number of
391     code units matched may differ, because in UTF-8 there are some characters
392     whose upper and lower case codes have different numbers of bytes. For
393     example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65 (3
394     bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
395     sequence of two of the latter. It is important, therefore, to check the
396     length along the reference, not along the subject (earlier code did this
397     wrong). UCP without uses Unicode properties but without UTF encoding. */
398 
399     while (p < endptr)
400       {
401       uint32_t c, d;
402       const ucd_record *ur;
403       if (eptr >= mb->end_subject) return 1;   /* Partial match */
404 
405       if (utf)
406         {
407         GETCHARINC(c, eptr);
408         GETCHARINC(d, p);
409         }
410       else
411         {
412         c = *eptr++;
413         d = *p++;
414         }
415 
416       ur = GET_UCD(d);
417       if (c != d && c != (uint32_t)((int)d + ur->other_case))
418         {
419         const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset;
420         for (;;)
421           {
422           if (c < *pp) return -1;  /* No match */
423           if (c == *pp++) break;
424           }
425         }
426       }
427     }
428   else
429 #endif
430 
431   /* Not in UTF or UCP mode */
432     {
433     for (; length > 0; length--)
434       {
435       uint32_t cc, cp;
436       if (eptr >= mb->end_subject) return 1;   /* Partial match */
437       cc = UCHAR21TEST(eptr);
438       cp = UCHAR21TEST(p);
439       if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
440         return -1;  /* No match */
441       p++;
442       eptr++;
443       }
444     }
445   }
446 
447 /* In the caseful case, we can just compare the code units, whether or not we
448 are in UTF and/or UCP mode. When partial matching, we have to do this unit by
449 unit. */
450 
451 else
452   {
453   if (mb->partial != 0)
454     {
455     for (; length > 0; length--)
456       {
457       if (eptr >= mb->end_subject) return 1;   /* Partial match */
458       if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;  /* No match */
459       }
460     }
461 
462   /* Not partial matching */
463 
464   else
465     {
466     if ((PCRE2_SIZE)(mb->end_subject - eptr) < length) return 1; /* Partial */
467     if (memcmp(p, eptr, CU2BYTES(length)) != 0) return -1;  /* No match */
468     eptr += length;
469     }
470   }
471 
472 *lengthptr = eptr - eptr_start;
473 return 0;  /* Match */
474 }
475 
476 
477 
478 /******************************************************************************
479 *******************************************************************************
480                    "Recursion" in the match() function
481 
482 The original match() function was highly recursive, but this proved to be the
483 source of a number of problems over the years, mostly because of the relatively
484 small system stacks that are commonly found. As new features were added to
485 patterns, various kludges were invented to reduce the amount of stack used,
486 making the code hard to understand in places.
487 
488 A version did exist that used individual frames on the heap instead of calling
489 match() recursively, but this ran substantially slower. The current version is
490 a refactoring that uses a vector of frames to remember backtracking points.
491 This runs no slower, and possibly even a bit faster than the original recursive
492 implementation. An initial vector of size START_FRAMES_SIZE (enough for maybe
493 50 frames) is allocated on the system stack. If this is not big enough, the
494 heap is used for a larger vector.
495 
496 *******************************************************************************
497 ******************************************************************************/
498 
499 
500 
501 
502 /*************************************************
503 *       Macros for the match() function          *
504 *************************************************/
505 
506 /* These macros pack up tests that are used for partial matching several times
507 in the code. The second one is used when we already know we are past the end of
508 the subject. We set the "hit end" flag if the pointer is at the end of the
509 subject and either (a) the pointer is past the earliest inspected character
510 (i.e. something has been matched, even if not part of the actual matched
511 string), or (b) the pattern contains a lookbehind. These are the conditions for
512 which adding more characters may allow the current match to continue.
513 
514 For hard partial matching, we immediately return a partial match. Otherwise,
515 carrying on means that a complete match on the current subject will be sought.
516 A partial match is returned only if no complete match can be found. */
517 
518 #define CHECK_PARTIAL()\
519   if (Feptr >= mb->end_subject) \
520     { \
521     SCHECK_PARTIAL(); \
522     }
523 
524 #define SCHECK_PARTIAL()\
525   if (mb->partial != 0 && \
526       (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \
527     { \
528     mb->hitend = TRUE; \
529     if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
530     }
531 
532 
533 /* These macros are used to implement backtracking. They simulate a recursive
534 call to the match() function by means of a local vector of frames which
535 remember the backtracking points. */
536 
537 #define RMATCH(ra,rb)\
538   {\
539   start_ecode = ra;\
540   Freturn_id = rb;\
541   goto MATCH_RECURSE;\
542   L_##rb:;\
543   }
544 
545 #define RRETURN(ra)\
546   {\
547   rrc = ra;\
548   goto RETURN_SWITCH;\
549   }
550 
551 
552 
553 /*************************************************
554 *         Match from current position            *
555 *************************************************/
556 
557 /* This function is called to run one match attempt at a single starting point
558 in the subject.
559 
560 Performance note: It might be tempting to extract commonly used fields from the
561 mb structure (e.g. end_subject) into individual variables to improve
562 performance. Tests using gcc on a SPARC disproved this; in the first case, it
563 made performance worse.
564 
565 Arguments:
566    start_eptr   starting character in subject
567    start_ecode  starting position in compiled code
568    ovector      pointer to the final output vector
569    oveccount    number of pairs in ovector
570    top_bracket  number of capturing parentheses in the pattern
571    frame_size   size of each backtracking frame
572    mb           pointer to "static" variables block
573 
574 Returns:        MATCH_MATCH if matched            )  these values are >= 0
575                 MATCH_NOMATCH if failed to match  )
576                 negative MATCH_xxx value for PRUNE, SKIP, etc
577                 negative PCRE2_ERROR_xxx value if aborted by an error condition
578                 (e.g. stopped by repeated call or depth limit)
579 */
580 
581 static int
match(PCRE2_SPTR start_eptr,PCRE2_SPTR start_ecode,PCRE2_SIZE * ovector,uint16_t oveccount,uint16_t top_bracket,PCRE2_SIZE frame_size,match_block * mb)582 match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, PCRE2_SIZE *ovector,
583   uint16_t oveccount, uint16_t top_bracket, PCRE2_SIZE frame_size,
584   match_block *mb)
585 {
586 /* Frame-handling variables */
587 
588 heapframe *F;           /* Current frame pointer */
589 heapframe *N = NULL;    /* Temporary frame pointers */
590 heapframe *P = NULL;
591 heapframe *assert_accept_frame = NULL;  /* For passing back a frame with captures */
592 PCRE2_SIZE frame_copy_size;     /* Amount to copy when creating a new frame */
593 
594 /* Local variables that do not need to be preserved over calls to RRMATCH(). */
595 
596 PCRE2_SPTR bracode;     /* Temp pointer to start of group */
597 PCRE2_SIZE offset;      /* Used for group offsets */
598 PCRE2_SIZE length;      /* Used for various length calculations */
599 
600 int rrc;                /* Return from functions & backtracking "recursions" */
601 #ifdef SUPPORT_UNICODE
602 int proptype;           /* Type of character property */
603 #endif
604 
605 uint32_t i;             /* Used for local loops */
606 uint32_t fc;            /* Character values */
607 uint32_t number;        /* Used for group and other numbers */
608 uint32_t reptype = 0;   /* Type of repetition (0 to avoid compiler warning) */
609 uint32_t group_frame_type;  /* Specifies type for new group frames */
610 
611 BOOL condition;         /* Used in conditional groups */
612 BOOL cur_is_word;       /* Used in "word" tests */
613 BOOL prev_is_word;      /* Used in "word" tests */
614 
615 /* UTF and UCP flags */
616 
617 #ifdef SUPPORT_UNICODE
618 BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
619 BOOL ucp = (mb->poptions & PCRE2_UCP) != 0;
620 #else
621 BOOL utf = FALSE;  /* Required for convenience even when no Unicode support */
622 #endif
623 
624 /* This is the length of the last part of a backtracking frame that must be
625 copied when a new frame is created. */
626 
627 frame_copy_size = frame_size - offsetof(heapframe, eptr);
628 
629 /* Set up the first current frame at the start of the vector, and initialize
630 fields that are not reset for new frames. */
631 
632 F = mb->match_frames;
633 Frdepth = 0;                        /* "Recursion" depth */
634 Fcapture_last = 0;                  /* Number of most recent capture */
635 Fcurrent_recurse = RECURSE_UNSET;   /* Not pattern recursing. */
636 Fstart_match = Feptr = start_eptr;  /* Current data pointer and start match */
637 Fmark = NULL;                       /* Most recent mark */
638 Foffset_top = 0;                    /* End of captures within the frame */
639 Flast_group_offset = PCRE2_UNSET;   /* Saved frame of most recent group */
640 group_frame_type = 0;               /* Not a start of group frame */
641 goto NEW_FRAME;                     /* Start processing with this frame */
642 
643 /* Come back here when we want to create a new frame for remembering a
644 backtracking point. */
645 
646 MATCH_RECURSE:
647 
648 /* Set up a new backtracking frame. If the vector is full, get a new one
649 on the heap, doubling the size, but constrained by the heap limit. */
650 
651 N = (heapframe *)((char *)F + frame_size);
652 if (N >= mb->match_frames_top)
653   {
654   PCRE2_SIZE newsize = mb->frame_vector_size * 2;
655   heapframe *new;
656 
657   if ((newsize / 1024) > mb->heap_limit)
658     {
659     PCRE2_SIZE maxsize = ((mb->heap_limit * 1024)/frame_size) * frame_size;
660     if (mb->frame_vector_size >= maxsize) return PCRE2_ERROR_HEAPLIMIT;
661     newsize = maxsize;
662     }
663 
664   new = mb->memctl.malloc(newsize, mb->memctl.memory_data);
665   if (new == NULL) return PCRE2_ERROR_NOMEMORY;
666   memcpy(new, mb->match_frames, mb->frame_vector_size);
667 
668   F = (heapframe *)((char *)new + ((char *)F - (char *)mb->match_frames));
669   N = (heapframe *)((char *)F + frame_size);
670 
671   if (mb->match_frames != mb->stack_frames)
672     mb->memctl.free(mb->match_frames, mb->memctl.memory_data);
673   mb->match_frames = new;
674   mb->match_frames_top = (heapframe *)((char *)mb->match_frames + newsize);
675   mb->frame_vector_size = newsize;
676   }
677 
678 #ifdef DEBUG_SHOW_RMATCH
679 fprintf(stderr, "++ RMATCH %2d frame=%d", Freturn_id, Frdepth + 1);
680 if (group_frame_type != 0)
681   {
682   fprintf(stderr, " type=%x ", group_frame_type);
683   switch (GF_IDMASK(group_frame_type))
684     {
685     case GF_CAPTURE:
686     fprintf(stderr, "capture=%d", GF_DATAMASK(group_frame_type));
687     break;
688 
689     case GF_NOCAPTURE:
690     fprintf(stderr, "nocapture op=%d", GF_DATAMASK(group_frame_type));
691     break;
692 
693     case GF_CONDASSERT:
694     fprintf(stderr, "condassert op=%d", GF_DATAMASK(group_frame_type));
695     break;
696 
697     case GF_RECURSE:
698     fprintf(stderr, "recurse=%d", GF_DATAMASK(group_frame_type));
699     break;
700 
701     default:
702     fprintf(stderr, "*** unknown ***");
703     break;
704     }
705   }
706 fprintf(stderr, "\n");
707 #endif
708 
709 /* Copy those fields that must be copied into the new frame, increase the
710 "recursion" depth (i.e. the new frame's index) and then make the new frame
711 current. */
712 
713 memcpy((char *)N + offsetof(heapframe, eptr),
714        (char *)F + offsetof(heapframe, eptr),
715        frame_copy_size);
716 
717 N->rdepth = Frdepth + 1;
718 F = N;
719 
720 /* Carry on processing with a new frame. */
721 
722 NEW_FRAME:
723 Fgroup_frame_type = group_frame_type;
724 Fecode = start_ecode;      /* Starting code pointer */
725 Fback_frame = frame_size;  /* Default is go back one frame */
726 
727 /* If this is a special type of group frame, remember its offset for quick
728 access at the end of the group. If this is a recursion, set a new current
729 recursion value. */
730 
731 if (group_frame_type != 0)
732   {
733   Flast_group_offset = (char *)F - (char *)mb->match_frames;
734   if (GF_IDMASK(group_frame_type) == GF_RECURSE)
735     Fcurrent_recurse = GF_DATAMASK(group_frame_type);
736   group_frame_type = 0;
737   }
738 
739 
740 /* ========================================================================= */
741 /* This is the main processing loop. First check that we haven't recorded too
742 many backtracks (search tree is too large), or that we haven't exceeded the
743 recursive depth limit (used too many backtracking frames). If not, process the
744 opcodes. */
745 
746 if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
747 if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
748 
749 for (;;)
750   {
751 #ifdef DEBUG_SHOW_OPS
752 fprintf(stderr, "++ op=%d\n", *Fecode);
753 #endif
754 
755   Fop = (uint8_t)(*Fecode);  /* Cast needed for 16-bit and 32-bit modes */
756   switch(Fop)
757     {
758     /* ===================================================================== */
759     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, to close
760     any currently open capturing brackets. Unlike reaching the end of a group,
761     where we know the starting frame is at the top of the chained frames, in
762     this case we have to search back for the relevant frame in case other types
763     of group that use chained frames have intervened. Multiple OP_CLOSEs always
764     come innermost first, which matches the chain order. We can ignore this in
765     a recursion, because captures are not passed out of recursions. */
766 
767     case OP_CLOSE:
768     if (Fcurrent_recurse == RECURSE_UNSET)
769       {
770       number = GET2(Fecode, 1);
771       offset = Flast_group_offset;
772       for(;;)
773         {
774         if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
775         N = (heapframe *)((char *)mb->match_frames + offset);
776         P = (heapframe *)((char *)N - frame_size);
777         if (N->group_frame_type == (GF_CAPTURE | number)) break;
778         offset = P->last_group_offset;
779         }
780       offset = (number << 1) - 2;
781       Fcapture_last = number;
782       Fovector[offset] = P->eptr - mb->start_subject;
783       Fovector[offset+1] = Feptr - mb->start_subject;
784       if (offset >= Foffset_top) Foffset_top = offset + 2;
785       }
786     Fecode += PRIV(OP_lengths)[*Fecode];
787     break;
788 
789 
790     /* ===================================================================== */
791     /* Real or forced end of the pattern, assertion, or recursion. In an
792     assertion ACCEPT, update the last used pointer and remember the current
793     frame so that the captures and mark can be fished out of it. */
794 
795     case OP_ASSERT_ACCEPT:
796     if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
797     assert_accept_frame = F;
798     RRETURN(MATCH_ACCEPT);
799 
800     /* If recursing, we have to find the most recent recursion. */
801 
802     case OP_ACCEPT:
803     case OP_END:
804 
805     /* Handle end of a recursion. */
806 
807     if (Fcurrent_recurse != RECURSE_UNSET)
808       {
809       offset = Flast_group_offset;
810       for(;;)
811         {
812         if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
813         N = (heapframe *)((char *)mb->match_frames + offset);
814         P = (heapframe *)((char *)N - frame_size);
815         if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break;
816         offset = P->last_group_offset;
817         }
818 
819       /* N is now the frame of the recursion; the previous frame is at the
820       OP_RECURSE position. Go back there, copying the current subject position
821       and mark, and the start_match position (\K might have changed it), and
822       then move on past the OP_RECURSE. */
823 
824       P->eptr = Feptr;
825       P->mark = Fmark;
826       P->start_match = Fstart_match;
827       F = P;
828       Fecode += 1 + LINK_SIZE;
829       continue;
830       }
831 
832     /* Not a recursion. Fail for an empty string match if either PCRE2_NOTEMPTY
833     is set, or if PCRE2_NOTEMPTY_ATSTART is set and we have matched at the
834     start of the subject. In both cases, backtracking will then try other
835     alternatives, if any. */
836 
837     if (Feptr == Fstart_match &&
838          ((mb->moptions & PCRE2_NOTEMPTY) != 0 ||
839            ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 &&
840              Fstart_match == mb->start_subject + mb->start_offset)))
841       RRETURN(MATCH_NOMATCH);
842 
843     /* Also fail if PCRE2_ENDANCHORED is set and the end of the match is not
844     the end of the subject. After (*ACCEPT) we fail the entire match (at this
845     position) but backtrack on reaching the end of the pattern. */
846 
847     if (Feptr < mb->end_subject &&
848         ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0)
849       {
850       if (Fop == OP_END) RRETURN(MATCH_NOMATCH);
851       return MATCH_NOMATCH;
852       }
853 
854     /* We have a successful match of the whole pattern. Record the result and
855     then do a direct return from the function. If there is space in the offset
856     vector, set any pairs that follow the highest-numbered captured string but
857     are less than the number of capturing groups in the pattern to PCRE2_UNSET.
858     It is documented that this happens. "Gaps" are set to PCRE2_UNSET
859     dynamically. It is only those at the end that need setting here. */
860 
861     mb->end_match_ptr = Feptr;           /* Record where we ended */
862     mb->end_offset_top = Foffset_top;    /* and how many extracts were taken */
863     mb->mark = Fmark;                    /* and the last success mark */
864     if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
865 
866     ovector[0] = Fstart_match - mb->start_subject;
867     ovector[1] = Feptr - mb->start_subject;
868 
869     /* Set i to the smaller of the sizes of the external and frame ovectors. */
870 
871     i = 2 * ((top_bracket + 1 > oveccount)? oveccount : top_bracket + 1);
872     memcpy(ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
873     while (--i >= Foffset_top + 2) ovector[i] = PCRE2_UNSET;
874     return MATCH_MATCH;  /* Note: NOT RRETURN */
875 
876 
877     /*===================================================================== */
878     /* Match any single character type except newline; have to take care with
879     CRLF newlines and partial matching. */
880 
881     case OP_ANY:
882     if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
883     if (mb->partial != 0 &&
884         Feptr == mb->end_subject - 1 &&
885         NLBLOCK->nltype == NLTYPE_FIXED &&
886         NLBLOCK->nllen == 2 &&
887         UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
888       {
889       mb->hitend = TRUE;
890       if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
891       }
892     /* Fall through */
893 
894     /* Match any single character whatsoever. */
895 
896     case OP_ALLANY:
897     if (Feptr >= mb->end_subject)  /* DO NOT merge the Feptr++ here; it must */
898       {                            /* not be updated before SCHECK_PARTIAL. */
899       SCHECK_PARTIAL();
900       RRETURN(MATCH_NOMATCH);
901       }
902     Feptr++;
903 #ifdef SUPPORT_UNICODE
904     if (utf) ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
905 #endif
906     Fecode++;
907     break;
908 
909 
910     /* ===================================================================== */
911     /* Match a single code unit, even in UTF mode. This opcode really does
912     match any code unit, even newline. (It really should be called ANYCODEUNIT,
913     of course - the byte name is from pre-16 bit days.) */
914 
915     case OP_ANYBYTE:
916     if (Feptr >= mb->end_subject)   /* DO NOT merge the Feptr++ here; it must */
917       {                             /* not be updated before SCHECK_PARTIAL. */
918       SCHECK_PARTIAL();
919       RRETURN(MATCH_NOMATCH);
920       }
921     Feptr++;
922     Fecode++;
923     break;
924 
925 
926     /* ===================================================================== */
927     /* Match a single character, casefully */
928 
929     case OP_CHAR:
930 #ifdef SUPPORT_UNICODE
931     if (utf)
932       {
933       Flength = 1;
934       Fecode++;
935       GETCHARLEN(fc, Fecode, Flength);
936       if (Flength > (PCRE2_SIZE)(mb->end_subject - Feptr))
937         {
938         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
939         RRETURN(MATCH_NOMATCH);
940         }
941       for (; Flength > 0; Flength--)
942         {
943         if (*Fecode++ != UCHAR21INC(Feptr)) RRETURN(MATCH_NOMATCH);
944         }
945       }
946     else
947 #endif
948 
949     /* Not UTF mode */
950       {
951       if (mb->end_subject - Feptr < 1)
952         {
953         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
954         RRETURN(MATCH_NOMATCH);
955         }
956       if (Fecode[1] != *Feptr++) RRETURN(MATCH_NOMATCH);
957       Fecode += 2;
958       }
959     break;
960 
961 
962     /* ===================================================================== */
963     /* Match a single character, caselessly. If we are at the end of the
964     subject, give up immediately. We get here only when the pattern character
965     has at most one other case. Characters with more than two cases are coded
966     as OP_PROP with the pseudo-property PT_CLIST. */
967 
968     case OP_CHARI:
969     if (Feptr >= mb->end_subject)
970       {
971       SCHECK_PARTIAL();
972       RRETURN(MATCH_NOMATCH);
973       }
974 
975 #ifdef SUPPORT_UNICODE
976     if (utf)
977       {
978       Flength = 1;
979       Fecode++;
980       GETCHARLEN(fc, Fecode, Flength);
981 
982       /* If the pattern character's value is < 128, we know that its other case
983       (if any) is also < 128 (and therefore only one code unit long in all
984       code-unit widths), so we can use the fast lookup table. We checked above
985       that there is at least one character left in the subject. */
986 
987       if (fc < 128)
988         {
989         uint32_t cc = UCHAR21(Feptr);
990         if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
991         Fecode++;
992         Feptr++;
993         }
994 
995       /* Otherwise we must pick up the subject character and use Unicode
996       property support to test its other case. Note that we cannot use the
997       value of "Flength" to check for sufficient bytes left, because the other
998       case of the character may have more or fewer code units. */
999 
1000       else
1001         {
1002         uint32_t dc;
1003         GETCHARINC(dc, Feptr);
1004         Fecode += Flength;
1005         if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1006         }
1007       }
1008 
1009     /* If UCP is set without UTF we must do the same as above, but with one
1010     character per code unit. */
1011 
1012     else if (ucp)
1013       {
1014       uint32_t cc = UCHAR21(Feptr);
1015       fc = Fecode[1];
1016       if (fc < 128)
1017         {
1018         if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
1019         }
1020       else
1021         {
1022         if (cc != fc && cc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1023         }
1024       Feptr++;
1025       Fecode += 2;
1026       }
1027 
1028     else
1029 #endif   /* SUPPORT_UNICODE */
1030 
1031     /* Not UTF or UCP mode; use the table for characters < 256. */
1032       {
1033       if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
1034           != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);
1035       Feptr++;
1036       Fecode += 2;
1037       }
1038     break;
1039 
1040 
1041     /* ===================================================================== */
1042     /* Match not a single character. */
1043 
1044     case OP_NOT:
1045     case OP_NOTI:
1046     if (Feptr >= mb->end_subject)
1047       {
1048       SCHECK_PARTIAL();
1049       RRETURN(MATCH_NOMATCH);
1050       }
1051 
1052 #ifdef SUPPORT_UNICODE
1053     if (utf)
1054       {
1055       uint32_t ch;
1056       Fecode++;
1057       GETCHARINC(ch, Fecode);
1058       GETCHARINC(fc, Feptr);
1059       if (ch == fc)
1060         {
1061         RRETURN(MATCH_NOMATCH);  /* Caseful match */
1062         }
1063       else if (Fop == OP_NOTI)   /* If caseless */
1064         {
1065         if (ch > 127)
1066           ch = UCD_OTHERCASE(ch);
1067         else
1068           ch = (mb->fcc)[ch];
1069         if (ch == fc) RRETURN(MATCH_NOMATCH);
1070         }
1071       }
1072 
1073     /* UCP without UTF is as above, but with one character per code unit. */
1074 
1075     else if (ucp)
1076       {
1077       uint32_t ch;
1078       fc = UCHAR21INC(Feptr);
1079       ch = Fecode[1];
1080       Fecode += 2;
1081 
1082       if (ch == fc)
1083         {
1084         RRETURN(MATCH_NOMATCH);  /* Caseful match */
1085         }
1086       else if (Fop == OP_NOTI)   /* If caseless */
1087         {
1088         if (ch > 127)
1089           ch = UCD_OTHERCASE(ch);
1090         else
1091           ch = (mb->fcc)[ch];
1092         if (ch == fc) RRETURN(MATCH_NOMATCH);
1093         }
1094       }
1095 
1096     else
1097 #endif  /* SUPPORT_UNICODE */
1098 
1099     /* Neither UTF nor UCP is set */
1100 
1101       {
1102       uint32_t ch = Fecode[1];
1103       fc = UCHAR21INC(Feptr);
1104       if (ch == fc || (Fop == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == fc))
1105         RRETURN(MATCH_NOMATCH);
1106       Fecode += 2;
1107       }
1108     break;
1109 
1110 
1111     /* ===================================================================== */
1112     /* Match a single character repeatedly. */
1113 
1114 #define Loclength    F->temp_size
1115 #define Lstart_eptr  F->temp_sptr[0]
1116 #define Lcharptr     F->temp_sptr[1]
1117 #define Lmin         F->temp_32[0]
1118 #define Lmax         F->temp_32[1]
1119 #define Lc           F->temp_32[2]
1120 #define Loc          F->temp_32[3]
1121 
1122     case OP_EXACT:
1123     case OP_EXACTI:
1124     Lmin = Lmax = GET2(Fecode, 1);
1125     Fecode += 1 + IMM2_SIZE;
1126     goto REPEATCHAR;
1127 
1128     case OP_POSUPTO:
1129     case OP_POSUPTOI:
1130     reptype = REPTYPE_POS;
1131     Lmin = 0;
1132     Lmax = GET2(Fecode, 1);
1133     Fecode += 1 + IMM2_SIZE;
1134     goto REPEATCHAR;
1135 
1136     case OP_UPTO:
1137     case OP_UPTOI:
1138     reptype = REPTYPE_MAX;
1139     Lmin = 0;
1140     Lmax = GET2(Fecode, 1);
1141     Fecode += 1 + IMM2_SIZE;
1142     goto REPEATCHAR;
1143 
1144     case OP_MINUPTO:
1145     case OP_MINUPTOI:
1146     reptype = REPTYPE_MIN;
1147     Lmin = 0;
1148     Lmax = GET2(Fecode, 1);
1149     Fecode += 1 + IMM2_SIZE;
1150     goto REPEATCHAR;
1151 
1152     case OP_POSSTAR:
1153     case OP_POSSTARI:
1154     reptype = REPTYPE_POS;
1155     Lmin = 0;
1156     Lmax = UINT32_MAX;
1157     Fecode++;
1158     goto REPEATCHAR;
1159 
1160     case OP_POSPLUS:
1161     case OP_POSPLUSI:
1162     reptype = REPTYPE_POS;
1163     Lmin = 1;
1164     Lmax = UINT32_MAX;
1165     Fecode++;
1166     goto REPEATCHAR;
1167 
1168     case OP_POSQUERY:
1169     case OP_POSQUERYI:
1170     reptype = REPTYPE_POS;
1171     Lmin = 0;
1172     Lmax = 1;
1173     Fecode++;
1174     goto REPEATCHAR;
1175 
1176     case OP_STAR:
1177     case OP_STARI:
1178     case OP_MINSTAR:
1179     case OP_MINSTARI:
1180     case OP_PLUS:
1181     case OP_PLUSI:
1182     case OP_MINPLUS:
1183     case OP_MINPLUSI:
1184     case OP_QUERY:
1185     case OP_QUERYI:
1186     case OP_MINQUERY:
1187     case OP_MINQUERYI:
1188     fc = *Fecode++ - ((Fop < OP_STARI)? OP_STAR : OP_STARI);
1189     Lmin = rep_min[fc];
1190     Lmax = rep_max[fc];
1191     reptype = rep_typ[fc];
1192 
1193     /* Common code for all repeated single-character matches. We first check
1194     for the minimum number of characters. If the minimum equals the maximum, we
1195     are done. Otherwise, if minimizing, check the rest of the pattern for a
1196     match; if there isn't one, advance up to the maximum, one character at a
1197     time.
1198 
1199     If maximizing, advance up to the maximum number of matching characters,
1200     until Feptr is past the end of the maximum run. If possessive, we are
1201     then done (no backing up). Otherwise, match at this position; anything
1202     other than no match is immediately returned. For nomatch, back up one
1203     character, unless we are matching \R and the last thing matched was
1204     \r\n, in which case, back up two code units until we reach the first
1205     optional character position.
1206 
1207     The various UTF/non-UTF and caseful/caseless cases are handled separately,
1208     for speed. */
1209 
1210     REPEATCHAR:
1211 #ifdef SUPPORT_UNICODE
1212     if (utf)
1213       {
1214       Flength = 1;
1215       Lcharptr = Fecode;
1216       GETCHARLEN(fc, Fecode, Flength);
1217       Fecode += Flength;
1218 
1219       /* Handle multi-code-unit character matching, caseful and caseless. */
1220 
1221       if (Flength > 1)
1222         {
1223         uint32_t othercase;
1224 
1225         if (Fop >= OP_STARI &&     /* Caseless */
1226             (othercase = UCD_OTHERCASE(fc)) != fc)
1227           Loclength = PRIV(ord2utf)(othercase, Foccu);
1228         else Loclength = 0;
1229 
1230         for (i = 1; i <= Lmin; i++)
1231           {
1232           if (Feptr <= mb->end_subject - Flength &&
1233             memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1234           else if (Loclength > 0 &&
1235                    Feptr <= mb->end_subject - Loclength &&
1236                    memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1237             Feptr += Loclength;
1238           else
1239             {
1240             CHECK_PARTIAL();
1241             RRETURN(MATCH_NOMATCH);
1242             }
1243           }
1244 
1245         if (Lmin == Lmax) continue;
1246 
1247         if (reptype == REPTYPE_MIN)
1248           {
1249           for (;;)
1250             {
1251             RMATCH(Fecode, RM202);
1252             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1253             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1254             if (Feptr <= mb->end_subject - Flength &&
1255               memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1256             else if (Loclength > 0 &&
1257                      Feptr <= mb->end_subject - Loclength &&
1258                      memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1259               Feptr += Loclength;
1260             else
1261               {
1262               CHECK_PARTIAL();
1263               RRETURN(MATCH_NOMATCH);
1264               }
1265             }
1266           /* Control never gets here */
1267           }
1268 
1269         else  /* Maximize */
1270           {
1271           Lstart_eptr = Feptr;
1272           for (i = Lmin; i < Lmax; i++)
1273             {
1274             if (Feptr <= mb->end_subject - Flength &&
1275                 memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0)
1276               Feptr += Flength;
1277             else if (Loclength > 0 &&
1278                      Feptr <= mb->end_subject - Loclength &&
1279                      memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1280               Feptr += Loclength;
1281             else
1282               {
1283               CHECK_PARTIAL();
1284               break;
1285               }
1286             }
1287 
1288           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1289           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1290           go too far. */
1291 
1292           if (reptype != REPTYPE_POS) for(;;)
1293             {
1294             if (Feptr <= Lstart_eptr) break;
1295             RMATCH(Fecode, RM203);
1296             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1297             Feptr--;
1298             BACKCHAR(Feptr);
1299             }
1300           }
1301         break;   /* End of repeated wide character handling */
1302         }
1303 
1304       /* Length of UTF character is 1. Put it into the preserved variable and
1305       fall through to the non-UTF code. */
1306 
1307       Lc = fc;
1308       }
1309     else
1310 #endif  /* SUPPORT_UNICODE */
1311 
1312     /* When not in UTF mode, load a single-code-unit character. Then proceed as
1313     above, using Unicode casing if either UTF or UCP is set. */
1314 
1315     Lc = *Fecode++;
1316 
1317     /* Caseless comparison */
1318 
1319     if (Fop >= OP_STARI)
1320       {
1321 #if PCRE2_CODE_UNIT_WIDTH == 8
1322 #ifdef SUPPORT_UNICODE
1323       if (ucp && !utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1324       else
1325 #endif  /* SUPPORT_UNICODE */
1326       /* Lc will be < 128 in UTF-8 mode. */
1327       Loc = mb->fcc[Lc];
1328 #else /* 16-bit & 32-bit */
1329 #ifdef SUPPORT_UNICODE
1330       if ((utf || ucp) && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1331       else
1332 #endif  /* SUPPORT_UNICODE */
1333       Loc = TABLE_GET(Lc, mb->fcc, Lc);
1334 #endif  /* PCRE2_CODE_UNIT_WIDTH == 8 */
1335 
1336       for (i = 1; i <= Lmin; i++)
1337         {
1338         uint32_t cc;                 /* Faster than PCRE2_UCHAR */
1339         if (Feptr >= mb->end_subject)
1340           {
1341           SCHECK_PARTIAL();
1342           RRETURN(MATCH_NOMATCH);
1343           }
1344         cc = UCHAR21TEST(Feptr);
1345         if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1346         Feptr++;
1347         }
1348       if (Lmin == Lmax) continue;
1349 
1350       if (reptype == REPTYPE_MIN)
1351         {
1352         for (;;)
1353           {
1354           uint32_t cc;               /* Faster than PCRE2_UCHAR */
1355           RMATCH(Fecode, RM25);
1356           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1357           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1358           if (Feptr >= mb->end_subject)
1359             {
1360             SCHECK_PARTIAL();
1361             RRETURN(MATCH_NOMATCH);
1362             }
1363           cc = UCHAR21TEST(Feptr);
1364           if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1365           Feptr++;
1366           }
1367         /* Control never gets here */
1368         }
1369 
1370       else  /* Maximize */
1371         {
1372         Lstart_eptr = Feptr;
1373         for (i = Lmin; i < Lmax; i++)
1374           {
1375           uint32_t cc;               /* Faster than PCRE2_UCHAR */
1376           if (Feptr >= mb->end_subject)
1377             {
1378             SCHECK_PARTIAL();
1379             break;
1380             }
1381           cc = UCHAR21TEST(Feptr);
1382           if (Lc != cc && Loc != cc) break;
1383           Feptr++;
1384           }
1385         if (reptype != REPTYPE_POS) for (;;)
1386           {
1387           if (Feptr == Lstart_eptr) break;
1388           RMATCH(Fecode, RM26);
1389           Feptr--;
1390           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1391           }
1392         }
1393       }
1394 
1395     /* Caseful comparisons (includes all multi-byte characters) */
1396 
1397     else
1398       {
1399       for (i = 1; i <= Lmin; i++)
1400         {
1401         if (Feptr >= mb->end_subject)
1402           {
1403           SCHECK_PARTIAL();
1404           RRETURN(MATCH_NOMATCH);
1405           }
1406         if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1407         }
1408 
1409       if (Lmin == Lmax) continue;
1410 
1411       if (reptype == REPTYPE_MIN)
1412         {
1413         for (;;)
1414           {
1415           RMATCH(Fecode, RM27);
1416           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1417           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1418           if (Feptr >= mb->end_subject)
1419             {
1420             SCHECK_PARTIAL();
1421             RRETURN(MATCH_NOMATCH);
1422             }
1423           if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1424           }
1425         /* Control never gets here */
1426         }
1427       else  /* Maximize */
1428         {
1429         Lstart_eptr = Feptr;
1430         for (i = Lmin; i < Lmax; i++)
1431           {
1432           if (Feptr >= mb->end_subject)
1433             {
1434             SCHECK_PARTIAL();
1435             break;
1436             }
1437 
1438           if (Lc != UCHAR21TEST(Feptr)) break;
1439           Feptr++;
1440           }
1441 
1442         if (reptype != REPTYPE_POS) for (;;)
1443           {
1444           if (Feptr <= Lstart_eptr) break;
1445           RMATCH(Fecode, RM28);
1446           Feptr--;
1447           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1448           }
1449         }
1450       }
1451     break;
1452 
1453 #undef Loclength
1454 #undef Lstart_eptr
1455 #undef Lcharptr
1456 #undef Lmin
1457 #undef Lmax
1458 #undef Lc
1459 #undef Loc
1460 
1461 
1462     /* ===================================================================== */
1463     /* Match a negated single one-byte character repeatedly. This is almost a
1464     repeat of the code for a repeated single character, but I haven't found a
1465     nice way of commoning these up that doesn't require a test of the
1466     positive/negative option for each character match. Maybe that wouldn't add
1467     very much to the time taken, but character matching *is* what this is all
1468     about... */
1469 
1470 #define Lstart_eptr  F->temp_sptr[0]
1471 #define Lmin         F->temp_32[0]
1472 #define Lmax         F->temp_32[1]
1473 #define Lc           F->temp_32[2]
1474 #define Loc          F->temp_32[3]
1475 
1476     case OP_NOTEXACT:
1477     case OP_NOTEXACTI:
1478     Lmin = Lmax = GET2(Fecode, 1);
1479     Fecode += 1 + IMM2_SIZE;
1480     goto REPEATNOTCHAR;
1481 
1482     case OP_NOTUPTO:
1483     case OP_NOTUPTOI:
1484     Lmin = 0;
1485     Lmax = GET2(Fecode, 1);
1486     reptype = REPTYPE_MAX;
1487     Fecode += 1 + IMM2_SIZE;
1488     goto REPEATNOTCHAR;
1489 
1490     case OP_NOTMINUPTO:
1491     case OP_NOTMINUPTOI:
1492     Lmin = 0;
1493     Lmax = GET2(Fecode, 1);
1494     reptype = REPTYPE_MIN;
1495     Fecode += 1 + IMM2_SIZE;
1496     goto REPEATNOTCHAR;
1497 
1498     case OP_NOTPOSSTAR:
1499     case OP_NOTPOSSTARI:
1500     reptype = REPTYPE_POS;
1501     Lmin = 0;
1502     Lmax = UINT32_MAX;
1503     Fecode++;
1504     goto REPEATNOTCHAR;
1505 
1506     case OP_NOTPOSPLUS:
1507     case OP_NOTPOSPLUSI:
1508     reptype = REPTYPE_POS;
1509     Lmin = 1;
1510     Lmax = UINT32_MAX;
1511     Fecode++;
1512     goto REPEATNOTCHAR;
1513 
1514     case OP_NOTPOSQUERY:
1515     case OP_NOTPOSQUERYI:
1516     reptype = REPTYPE_POS;
1517     Lmin = 0;
1518     Lmax = 1;
1519     Fecode++;
1520     goto REPEATNOTCHAR;
1521 
1522     case OP_NOTPOSUPTO:
1523     case OP_NOTPOSUPTOI:
1524     reptype = REPTYPE_POS;
1525     Lmin = 0;
1526     Lmax = GET2(Fecode, 1);
1527     Fecode += 1 + IMM2_SIZE;
1528     goto REPEATNOTCHAR;
1529 
1530     case OP_NOTSTAR:
1531     case OP_NOTSTARI:
1532     case OP_NOTMINSTAR:
1533     case OP_NOTMINSTARI:
1534     case OP_NOTPLUS:
1535     case OP_NOTPLUSI:
1536     case OP_NOTMINPLUS:
1537     case OP_NOTMINPLUSI:
1538     case OP_NOTQUERY:
1539     case OP_NOTQUERYI:
1540     case OP_NOTMINQUERY:
1541     case OP_NOTMINQUERYI:
1542     fc = *Fecode++ - ((Fop >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
1543     Lmin = rep_min[fc];
1544     Lmax = rep_max[fc];
1545     reptype = rep_typ[fc];
1546 
1547     /* Common code for all repeated single-character non-matches. */
1548 
1549     REPEATNOTCHAR:
1550     GETCHARINCTEST(Lc, Fecode);
1551 
1552     /* The code is duplicated for the caseless and caseful cases, for speed,
1553     since matching characters is likely to be quite common. First, ensure the
1554     minimum number of matches are present. If Lmin = Lmax, we are done.
1555     Otherwise, if minimizing, keep trying the rest of the expression and
1556     advancing one matching character if failing, up to the maximum.
1557     Alternatively, if maximizing, find the maximum number of characters and
1558     work backwards. */
1559 
1560     if (Fop >= OP_NOTSTARI)     /* Caseless */
1561       {
1562 #ifdef SUPPORT_UNICODE
1563       if ((utf || ucp) && Lc > 127)
1564         Loc = UCD_OTHERCASE(Lc);
1565       else
1566 #endif /* SUPPORT_UNICODE */
1567 
1568       Loc = TABLE_GET(Lc, mb->fcc, Lc);  /* Other case from table */
1569 
1570 #ifdef SUPPORT_UNICODE
1571       if (utf)
1572         {
1573         uint32_t d;
1574         for (i = 1; i <= Lmin; i++)
1575           {
1576           if (Feptr >= mb->end_subject)
1577             {
1578             SCHECK_PARTIAL();
1579             RRETURN(MATCH_NOMATCH);
1580             }
1581           GETCHARINC(d, Feptr);
1582           if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1583           }
1584         }
1585       else
1586 #endif  /* SUPPORT_UNICODE */
1587 
1588       /* Not UTF mode */
1589         {
1590         for (i = 1; i <= Lmin; i++)
1591           {
1592           if (Feptr >= mb->end_subject)
1593             {
1594             SCHECK_PARTIAL();
1595             RRETURN(MATCH_NOMATCH);
1596             }
1597           if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1598           Feptr++;
1599           }
1600         }
1601 
1602       if (Lmin == Lmax) continue;  /* Finished for exact count */
1603 
1604       if (reptype == REPTYPE_MIN)
1605         {
1606 #ifdef SUPPORT_UNICODE
1607         if (utf)
1608           {
1609           uint32_t d;
1610           for (;;)
1611             {
1612             RMATCH(Fecode, RM204);
1613             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1614             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1615             if (Feptr >= mb->end_subject)
1616               {
1617               SCHECK_PARTIAL();
1618               RRETURN(MATCH_NOMATCH);
1619               }
1620             GETCHARINC(d, Feptr);
1621             if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1622             }
1623           }
1624         else
1625 #endif  /*SUPPORT_UNICODE */
1626 
1627         /* Not UTF mode */
1628           {
1629           for (;;)
1630             {
1631             RMATCH(Fecode, RM29);
1632             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1633             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1634             if (Feptr >= mb->end_subject)
1635               {
1636               SCHECK_PARTIAL();
1637               RRETURN(MATCH_NOMATCH);
1638               }
1639             if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1640             Feptr++;
1641             }
1642           }
1643         /* Control never gets here */
1644         }
1645 
1646       /* Maximize case */
1647 
1648       else
1649         {
1650         Lstart_eptr = Feptr;
1651 
1652 #ifdef SUPPORT_UNICODE
1653         if (utf)
1654           {
1655           uint32_t d;
1656           for (i = Lmin; i < Lmax; i++)
1657             {
1658             int len = 1;
1659             if (Feptr >= mb->end_subject)
1660               {
1661               SCHECK_PARTIAL();
1662               break;
1663               }
1664             GETCHARLEN(d, Feptr, len);
1665             if (Lc == d || Loc == d) break;
1666             Feptr += len;
1667             }
1668 
1669           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1670           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1671           go too far. */
1672 
1673           if (reptype != REPTYPE_POS) for(;;)
1674             {
1675             if (Feptr <= Lstart_eptr) break;
1676             RMATCH(Fecode, RM205);
1677             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1678             Feptr--;
1679             BACKCHAR(Feptr);
1680             }
1681           }
1682         else
1683 #endif  /* SUPPORT_UNICODE */
1684 
1685         /* Not UTF mode */
1686           {
1687           for (i = Lmin; i < Lmax; i++)
1688             {
1689             if (Feptr >= mb->end_subject)
1690               {
1691               SCHECK_PARTIAL();
1692               break;
1693               }
1694             if (Lc == *Feptr || Loc == *Feptr) break;
1695             Feptr++;
1696             }
1697           if (reptype != REPTYPE_POS) for (;;)
1698             {
1699             if (Feptr == Lstart_eptr) break;
1700             RMATCH(Fecode, RM30);
1701             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1702             Feptr--;
1703             }
1704           }
1705         }
1706       }
1707 
1708     /* Caseful comparisons */
1709 
1710     else
1711       {
1712 #ifdef SUPPORT_UNICODE
1713       if (utf)
1714         {
1715         uint32_t d;
1716         for (i = 1; i <= Lmin; i++)
1717           {
1718           if (Feptr >= mb->end_subject)
1719             {
1720             SCHECK_PARTIAL();
1721             RRETURN(MATCH_NOMATCH);
1722             }
1723           GETCHARINC(d, Feptr);
1724           if (Lc == d) RRETURN(MATCH_NOMATCH);
1725           }
1726         }
1727       else
1728 #endif
1729       /* Not UTF mode */
1730         {
1731         for (i = 1; i <= Lmin; i++)
1732           {
1733           if (Feptr >= mb->end_subject)
1734             {
1735             SCHECK_PARTIAL();
1736             RRETURN(MATCH_NOMATCH);
1737             }
1738           if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1739           }
1740         }
1741 
1742       if (Lmin == Lmax) continue;
1743 
1744       if (reptype == REPTYPE_MIN)
1745         {
1746 #ifdef SUPPORT_UNICODE
1747         if (utf)
1748           {
1749           uint32_t d;
1750           for (;;)
1751             {
1752             RMATCH(Fecode, RM206);
1753             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1754             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1755             if (Feptr >= mb->end_subject)
1756               {
1757               SCHECK_PARTIAL();
1758               RRETURN(MATCH_NOMATCH);
1759               }
1760             GETCHARINC(d, Feptr);
1761             if (Lc == d) RRETURN(MATCH_NOMATCH);
1762             }
1763           }
1764         else
1765 #endif
1766         /* Not UTF mode */
1767           {
1768           for (;;)
1769             {
1770             RMATCH(Fecode, RM31);
1771             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1772             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1773             if (Feptr >= mb->end_subject)
1774               {
1775               SCHECK_PARTIAL();
1776               RRETURN(MATCH_NOMATCH);
1777               }
1778             if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1779             }
1780           }
1781         /* Control never gets here */
1782         }
1783 
1784       /* Maximize case */
1785 
1786       else
1787         {
1788         Lstart_eptr = Feptr;
1789 
1790 #ifdef SUPPORT_UNICODE
1791         if (utf)
1792           {
1793           uint32_t d;
1794           for (i = Lmin; i < Lmax; i++)
1795             {
1796             int len = 1;
1797             if (Feptr >= mb->end_subject)
1798               {
1799               SCHECK_PARTIAL();
1800               break;
1801               }
1802             GETCHARLEN(d, Feptr, len);
1803             if (Lc == d) break;
1804             Feptr += len;
1805             }
1806 
1807           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1808           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1809           go too far. */
1810 
1811           if (reptype != REPTYPE_POS) for(;;)
1812             {
1813             if (Feptr <= Lstart_eptr) break;
1814             RMATCH(Fecode, RM207);
1815             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1816             Feptr--;
1817             BACKCHAR(Feptr);
1818             }
1819           }
1820         else
1821 #endif
1822         /* Not UTF mode */
1823           {
1824           for (i = Lmin; i < Lmax; i++)
1825             {
1826             if (Feptr >= mb->end_subject)
1827               {
1828               SCHECK_PARTIAL();
1829               break;
1830               }
1831             if (Lc == *Feptr) break;
1832             Feptr++;
1833             }
1834           if (reptype != REPTYPE_POS) for (;;)
1835             {
1836             if (Feptr == Lstart_eptr) break;
1837             RMATCH(Fecode, RM32);
1838             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1839             Feptr--;
1840             }
1841           }
1842         }
1843       }
1844     break;
1845 
1846 #undef Lstart_eptr
1847 #undef Lmin
1848 #undef Lmax
1849 #undef Lc
1850 #undef Loc
1851 
1852 
1853     /* ===================================================================== */
1854     /* Match a bit-mapped character class, possibly repeatedly. These opcodes
1855     are used when all the characters in the class have values in the range
1856     0-255, and either the matching is caseful, or the characters are in the
1857     range 0-127 when UTF processing is enabled. The only difference between
1858     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1859     encountered. */
1860 
1861 #define Lmin               F->temp_32[0]
1862 #define Lmax               F->temp_32[1]
1863 #define Lstart_eptr        F->temp_sptr[0]
1864 #define Lbyte_map_address  F->temp_sptr[1]
1865 #define Lbyte_map          ((unsigned char *)Lbyte_map_address)
1866 
1867     case OP_NCLASS:
1868     case OP_CLASS:
1869       {
1870       Lbyte_map_address = Fecode + 1;           /* Save for matching */
1871       Fecode += 1 + (32 / sizeof(PCRE2_UCHAR)); /* Advance past the item */
1872 
1873       /* Look past the end of the item to see if there is repeat information
1874       following. Then obey similar code to character type repeats. */
1875 
1876       switch (*Fecode)
1877         {
1878         case OP_CRSTAR:
1879         case OP_CRMINSTAR:
1880         case OP_CRPLUS:
1881         case OP_CRMINPLUS:
1882         case OP_CRQUERY:
1883         case OP_CRMINQUERY:
1884         case OP_CRPOSSTAR:
1885         case OP_CRPOSPLUS:
1886         case OP_CRPOSQUERY:
1887         fc = *Fecode++ - OP_CRSTAR;
1888         Lmin = rep_min[fc];
1889         Lmax = rep_max[fc];
1890         reptype = rep_typ[fc];
1891         break;
1892 
1893         case OP_CRRANGE:
1894         case OP_CRMINRANGE:
1895         case OP_CRPOSRANGE:
1896         Lmin = GET2(Fecode, 1);
1897         Lmax = GET2(Fecode, 1 + IMM2_SIZE);
1898         if (Lmax == 0) Lmax = UINT32_MAX;       /* Max 0 => infinity */
1899         reptype = rep_typ[*Fecode - OP_CRSTAR];
1900         Fecode += 1 + 2 * IMM2_SIZE;
1901         break;
1902 
1903         default:               /* No repeat follows */
1904         Lmin = Lmax = 1;
1905         break;
1906         }
1907 
1908       /* First, ensure the minimum number of matches are present. */
1909 
1910 #ifdef SUPPORT_UNICODE
1911       if (utf)
1912         {
1913         for (i = 1; i <= Lmin; i++)
1914           {
1915           if (Feptr >= mb->end_subject)
1916             {
1917             SCHECK_PARTIAL();
1918             RRETURN(MATCH_NOMATCH);
1919             }
1920           GETCHARINC(fc, Feptr);
1921           if (fc > 255)
1922             {
1923             if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1924             }
1925           else
1926             if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1927           }
1928         }
1929       else
1930 #endif
1931       /* Not UTF mode */
1932         {
1933         for (i = 1; i <= Lmin; i++)
1934           {
1935           if (Feptr >= mb->end_subject)
1936             {
1937             SCHECK_PARTIAL();
1938             RRETURN(MATCH_NOMATCH);
1939             }
1940           fc = *Feptr++;
1941 #if PCRE2_CODE_UNIT_WIDTH != 8
1942           if (fc > 255)
1943             {
1944             if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1945             }
1946           else
1947 #endif
1948           if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1949           }
1950         }
1951 
1952       /* If Lmax == Lmin we are done. Continue with main loop. */
1953 
1954       if (Lmin == Lmax) continue;
1955 
1956       /* If minimizing, keep testing the rest of the expression and advancing
1957       the pointer while it matches the class. */
1958 
1959       if (reptype == REPTYPE_MIN)
1960         {
1961 #ifdef SUPPORT_UNICODE
1962         if (utf)
1963           {
1964           for (;;)
1965             {
1966             RMATCH(Fecode, RM200);
1967             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1968             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1969             if (Feptr >= mb->end_subject)
1970               {
1971               SCHECK_PARTIAL();
1972               RRETURN(MATCH_NOMATCH);
1973               }
1974             GETCHARINC(fc, Feptr);
1975             if (fc > 255)
1976               {
1977               if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1978               }
1979             else
1980               if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1981             }
1982           }
1983         else
1984 #endif
1985         /* Not UTF mode */
1986           {
1987           for (;;)
1988             {
1989             RMATCH(Fecode, RM23);
1990             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1991             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1992             if (Feptr >= mb->end_subject)
1993               {
1994               SCHECK_PARTIAL();
1995               RRETURN(MATCH_NOMATCH);
1996               }
1997             fc = *Feptr++;
1998 #if PCRE2_CODE_UNIT_WIDTH != 8
1999             if (fc > 255)
2000               {
2001               if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2002               }
2003             else
2004 #endif
2005             if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2006             }
2007           }
2008         /* Control never gets here */
2009         }
2010 
2011       /* If maximizing, find the longest possible run, then work backwards. */
2012 
2013       else
2014         {
2015         Lstart_eptr = Feptr;
2016 
2017 #ifdef SUPPORT_UNICODE
2018         if (utf)
2019           {
2020           for (i = Lmin; i < Lmax; i++)
2021             {
2022             int len = 1;
2023             if (Feptr >= mb->end_subject)
2024               {
2025               SCHECK_PARTIAL();
2026               break;
2027               }
2028             GETCHARLEN(fc, Feptr, len);
2029             if (fc > 255)
2030               {
2031               if (Fop == OP_CLASS) break;
2032               }
2033             else
2034               if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2035             Feptr += len;
2036             }
2037 
2038           if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2039 
2040           /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2041           Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2042           go too far. */
2043 
2044           for (;;)
2045             {
2046             RMATCH(Fecode, RM201);
2047             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2048             if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2049             BACKCHAR(Feptr);
2050             }
2051           }
2052         else
2053 #endif
2054           /* Not UTF mode */
2055           {
2056           for (i = Lmin; i < Lmax; i++)
2057             {
2058             if (Feptr >= mb->end_subject)
2059               {
2060               SCHECK_PARTIAL();
2061               break;
2062               }
2063             fc = *Feptr;
2064 #if PCRE2_CODE_UNIT_WIDTH != 8
2065             if (fc > 255)
2066               {
2067               if (Fop == OP_CLASS) break;
2068               }
2069             else
2070 #endif
2071             if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2072             Feptr++;
2073             }
2074 
2075           if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2076 
2077           while (Feptr >= Lstart_eptr)
2078             {
2079             RMATCH(Fecode, RM24);
2080             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2081             Feptr--;
2082             }
2083           }
2084 
2085         RRETURN(MATCH_NOMATCH);
2086         }
2087       }
2088     /* Control never gets here */
2089 
2090 #undef Lbyte_map_address
2091 #undef Lbyte_map
2092 #undef Lstart_eptr
2093 #undef Lmin
2094 #undef Lmax
2095 
2096 
2097     /* ===================================================================== */
2098     /* Match an extended character class. In the 8-bit library, this opcode is
2099     encountered only when UTF-8 mode mode is supported. In the 16-bit and
2100     32-bit libraries, codepoints greater than 255 may be encountered even when
2101     UTF is not supported. */
2102 
2103 #define Lstart_eptr  F->temp_sptr[0]
2104 #define Lxclass_data F->temp_sptr[1]
2105 #define Lmin         F->temp_32[0]
2106 #define Lmax         F->temp_32[1]
2107 
2108 #ifdef SUPPORT_WIDE_CHARS
2109     case OP_XCLASS:
2110       {
2111       Lxclass_data = Fecode + 1 + LINK_SIZE;  /* Save for matching */
2112       Fecode += GET(Fecode, 1);               /* Advance past the item */
2113 
2114       switch (*Fecode)
2115         {
2116         case OP_CRSTAR:
2117         case OP_CRMINSTAR:
2118         case OP_CRPLUS:
2119         case OP_CRMINPLUS:
2120         case OP_CRQUERY:
2121         case OP_CRMINQUERY:
2122         case OP_CRPOSSTAR:
2123         case OP_CRPOSPLUS:
2124         case OP_CRPOSQUERY:
2125         fc = *Fecode++ - OP_CRSTAR;
2126         Lmin = rep_min[fc];
2127         Lmax = rep_max[fc];
2128         reptype = rep_typ[fc];
2129         break;
2130 
2131         case OP_CRRANGE:
2132         case OP_CRMINRANGE:
2133         case OP_CRPOSRANGE:
2134         Lmin = GET2(Fecode, 1);
2135         Lmax = GET2(Fecode, 1 + IMM2_SIZE);
2136         if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
2137         reptype = rep_typ[*Fecode - OP_CRSTAR];
2138         Fecode += 1 + 2 * IMM2_SIZE;
2139         break;
2140 
2141         default:               /* No repeat follows */
2142         Lmin = Lmax = 1;
2143         break;
2144         }
2145 
2146       /* First, ensure the minimum number of matches are present. */
2147 
2148       for (i = 1; i <= Lmin; i++)
2149         {
2150         if (Feptr >= mb->end_subject)
2151           {
2152           SCHECK_PARTIAL();
2153           RRETURN(MATCH_NOMATCH);
2154           }
2155         GETCHARINCTEST(fc, Feptr);
2156         if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
2157         }
2158 
2159       /* If Lmax == Lmin we can just continue with the main loop. */
2160 
2161       if (Lmin == Lmax) continue;
2162 
2163       /* If minimizing, keep testing the rest of the expression and advancing
2164       the pointer while it matches the class. */
2165 
2166       if (reptype == REPTYPE_MIN)
2167         {
2168         for (;;)
2169           {
2170           RMATCH(Fecode, RM100);
2171           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2172           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2173           if (Feptr >= mb->end_subject)
2174             {
2175             SCHECK_PARTIAL();
2176             RRETURN(MATCH_NOMATCH);
2177             }
2178           GETCHARINCTEST(fc, Feptr);
2179           if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
2180           }
2181         /* Control never gets here */
2182         }
2183 
2184       /* If maximizing, find the longest possible run, then work backwards. */
2185 
2186       else
2187         {
2188         Lstart_eptr = Feptr;
2189         for (i = Lmin; i < Lmax; i++)
2190           {
2191           int len = 1;
2192           if (Feptr >= mb->end_subject)
2193             {
2194             SCHECK_PARTIAL();
2195             break;
2196             }
2197 #ifdef SUPPORT_UNICODE
2198           GETCHARLENTEST(fc, Feptr, len);
2199 #else
2200           fc = *Feptr;
2201 #endif
2202           if (!PRIV(xclass)(fc, Lxclass_data, utf)) break;
2203           Feptr += len;
2204           }
2205 
2206         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2207 
2208         /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2209         Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2210         go too far. */
2211 
2212         for(;;)
2213           {
2214           RMATCH(Fecode, RM101);
2215           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2216           if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2217 #ifdef SUPPORT_UNICODE
2218           if (utf) BACKCHAR(Feptr);
2219 #endif
2220           }
2221         RRETURN(MATCH_NOMATCH);
2222         }
2223 
2224       /* Control never gets here */
2225       }
2226 #endif  /* SUPPORT_WIDE_CHARS: end of XCLASS */
2227 
2228 #undef Lstart_eptr
2229 #undef Lxclass_data
2230 #undef Lmin
2231 #undef Lmax
2232 
2233 
2234     /* ===================================================================== */
2235     /* Match various character types when PCRE2_UCP is not set. These opcodes
2236     are not generated when PCRE2_UCP is set - instead appropriate property
2237     tests are compiled. */
2238 
2239     case OP_NOT_DIGIT:
2240     if (Feptr >= mb->end_subject)
2241       {
2242       SCHECK_PARTIAL();
2243       RRETURN(MATCH_NOMATCH);
2244       }
2245     GETCHARINCTEST(fc, Feptr);
2246     if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
2247       RRETURN(MATCH_NOMATCH);
2248     Fecode++;
2249     break;
2250 
2251     case OP_DIGIT:
2252     if (Feptr >= mb->end_subject)
2253       {
2254       SCHECK_PARTIAL();
2255       RRETURN(MATCH_NOMATCH);
2256       }
2257     GETCHARINCTEST(fc, Feptr);
2258     if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
2259       RRETURN(MATCH_NOMATCH);
2260     Fecode++;
2261     break;
2262 
2263     case OP_NOT_WHITESPACE:
2264     if (Feptr >= mb->end_subject)
2265       {
2266       SCHECK_PARTIAL();
2267       RRETURN(MATCH_NOMATCH);
2268       }
2269     GETCHARINCTEST(fc, Feptr);
2270     if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
2271       RRETURN(MATCH_NOMATCH);
2272     Fecode++;
2273     break;
2274 
2275     case OP_WHITESPACE:
2276     if (Feptr >= mb->end_subject)
2277       {
2278       SCHECK_PARTIAL();
2279       RRETURN(MATCH_NOMATCH);
2280       }
2281     GETCHARINCTEST(fc, Feptr);
2282     if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
2283       RRETURN(MATCH_NOMATCH);
2284     Fecode++;
2285     break;
2286 
2287     case OP_NOT_WORDCHAR:
2288     if (Feptr >= mb->end_subject)
2289       {
2290       SCHECK_PARTIAL();
2291       RRETURN(MATCH_NOMATCH);
2292       }
2293     GETCHARINCTEST(fc, Feptr);
2294     if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
2295       RRETURN(MATCH_NOMATCH);
2296     Fecode++;
2297     break;
2298 
2299     case OP_WORDCHAR:
2300     if (Feptr >= mb->end_subject)
2301       {
2302       SCHECK_PARTIAL();
2303       RRETURN(MATCH_NOMATCH);
2304       }
2305     GETCHARINCTEST(fc, Feptr);
2306     if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
2307       RRETURN(MATCH_NOMATCH);
2308     Fecode++;
2309     break;
2310 
2311     case OP_ANYNL:
2312     if (Feptr >= mb->end_subject)
2313       {
2314       SCHECK_PARTIAL();
2315       RRETURN(MATCH_NOMATCH);
2316       }
2317     GETCHARINCTEST(fc, Feptr);
2318     switch(fc)
2319       {
2320       default: RRETURN(MATCH_NOMATCH);
2321 
2322       case CHAR_CR:
2323       if (Feptr >= mb->end_subject)
2324         {
2325         SCHECK_PARTIAL();
2326         }
2327       else if (UCHAR21TEST(Feptr) == CHAR_LF) Feptr++;
2328       break;
2329 
2330       case CHAR_LF:
2331       break;
2332 
2333       case CHAR_VT:
2334       case CHAR_FF:
2335       case CHAR_NEL:
2336 #ifndef EBCDIC
2337       case 0x2028:
2338       case 0x2029:
2339 #endif  /* Not EBCDIC */
2340       if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
2341       break;
2342       }
2343     Fecode++;
2344     break;
2345 
2346     case OP_NOT_HSPACE:
2347     if (Feptr >= mb->end_subject)
2348       {
2349       SCHECK_PARTIAL();
2350       RRETURN(MATCH_NOMATCH);
2351       }
2352     GETCHARINCTEST(fc, Feptr);
2353     switch(fc)
2354       {
2355       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2356       default: break;
2357       }
2358     Fecode++;
2359     break;
2360 
2361     case OP_HSPACE:
2362     if (Feptr >= mb->end_subject)
2363       {
2364       SCHECK_PARTIAL();
2365       RRETURN(MATCH_NOMATCH);
2366       }
2367     GETCHARINCTEST(fc, Feptr);
2368     switch(fc)
2369       {
2370       HSPACE_CASES: break;  /* Byte and multibyte cases */
2371       default: RRETURN(MATCH_NOMATCH);
2372       }
2373     Fecode++;
2374     break;
2375 
2376     case OP_NOT_VSPACE:
2377     if (Feptr >= mb->end_subject)
2378       {
2379       SCHECK_PARTIAL();
2380       RRETURN(MATCH_NOMATCH);
2381       }
2382     GETCHARINCTEST(fc, Feptr);
2383     switch(fc)
2384       {
2385       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2386       default: break;
2387       }
2388     Fecode++;
2389     break;
2390 
2391     case OP_VSPACE:
2392     if (Feptr >= mb->end_subject)
2393       {
2394       SCHECK_PARTIAL();
2395       RRETURN(MATCH_NOMATCH);
2396       }
2397     GETCHARINCTEST(fc, Feptr);
2398     switch(fc)
2399       {
2400       VSPACE_CASES: break;
2401       default: RRETURN(MATCH_NOMATCH);
2402       }
2403     Fecode++;
2404     break;
2405 
2406 
2407 #ifdef SUPPORT_UNICODE
2408 
2409     /* ===================================================================== */
2410     /* Check the next character by Unicode property. We will get here only
2411     if the support is in the binary; otherwise a compile-time error occurs. */
2412 
2413     case OP_PROP:
2414     case OP_NOTPROP:
2415     if (Feptr >= mb->end_subject)
2416       {
2417       SCHECK_PARTIAL();
2418       RRETURN(MATCH_NOMATCH);
2419       }
2420     GETCHARINCTEST(fc, Feptr);
2421       {
2422       const uint32_t *cp;
2423       const ucd_record *prop = GET_UCD(fc);
2424 
2425       switch(Fecode[1])
2426         {
2427         case PT_ANY:
2428         if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2429         break;
2430 
2431         case PT_LAMP:
2432         if ((prop->chartype == ucp_Lu ||
2433              prop->chartype == ucp_Ll ||
2434              prop->chartype == ucp_Lt) == (Fop == OP_NOTPROP))
2435           RRETURN(MATCH_NOMATCH);
2436         break;
2437 
2438         case PT_GC:
2439         if ((Fecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (Fop == OP_PROP))
2440           RRETURN(MATCH_NOMATCH);
2441         break;
2442 
2443         case PT_PC:
2444         if ((Fecode[2] != prop->chartype) == (Fop == OP_PROP))
2445           RRETURN(MATCH_NOMATCH);
2446         break;
2447 
2448         case PT_SC:
2449         if ((Fecode[2] != prop->script) == (Fop == OP_PROP))
2450           RRETURN(MATCH_NOMATCH);
2451         break;
2452 
2453         /* These are specials */
2454 
2455         case PT_ALNUM:
2456         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2457              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (Fop == OP_NOTPROP))
2458           RRETURN(MATCH_NOMATCH);
2459         break;
2460 
2461         /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2462         which means that Perl space and POSIX space are now identical. PCRE
2463         was changed at release 8.34. */
2464 
2465         case PT_SPACE:    /* Perl space */
2466         case PT_PXSPACE:  /* POSIX space */
2467         switch(fc)
2468           {
2469           HSPACE_CASES:
2470           VSPACE_CASES:
2471           if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2472           break;
2473 
2474           default:
2475           if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
2476             (Fop == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
2477           break;
2478           }
2479         break;
2480 
2481         case PT_WORD:
2482         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2483              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2484              fc == CHAR_UNDERSCORE) == (Fop == OP_NOTPROP))
2485           RRETURN(MATCH_NOMATCH);
2486         break;
2487 
2488         case PT_CLIST:
2489         cp = PRIV(ucd_caseless_sets) + Fecode[2];
2490         for (;;)
2491           {
2492           if (fc < *cp)
2493             { if (Fop == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
2494           if (fc == *cp++)
2495             { if (Fop == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2496           }
2497         break;
2498 
2499         case PT_UCNC:
2500         if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2501              fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2502              fc >= 0xe000) == (Fop == OP_NOTPROP))
2503           RRETURN(MATCH_NOMATCH);
2504         break;
2505 
2506         /* This should never occur */
2507 
2508         default:
2509         return PCRE2_ERROR_INTERNAL;
2510         }
2511 
2512       Fecode += 3;
2513       }
2514     break;
2515 
2516 
2517     /* ===================================================================== */
2518     /* Match an extended Unicode sequence. We will get here only if the support
2519     is in the binary; otherwise a compile-time error occurs. */
2520 
2521     case OP_EXTUNI:
2522     if (Feptr >= mb->end_subject)
2523       {
2524       SCHECK_PARTIAL();
2525       RRETURN(MATCH_NOMATCH);
2526       }
2527     else
2528       {
2529       GETCHARINCTEST(fc, Feptr);
2530       Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf,
2531         NULL);
2532       }
2533     CHECK_PARTIAL();
2534     Fecode++;
2535     break;
2536 
2537 #endif  /* SUPPORT_UNICODE */
2538 
2539 
2540     /* ===================================================================== */
2541     /* Match a single character type repeatedly. Note that the property type
2542     does not need to be in a stack frame as it is not used within an RMATCH()
2543     loop. */
2544 
2545 #define Lstart_eptr  F->temp_sptr[0]
2546 #define Lmin         F->temp_32[0]
2547 #define Lmax         F->temp_32[1]
2548 #define Lctype       F->temp_32[2]
2549 #define Lpropvalue   F->temp_32[3]
2550 
2551     case OP_TYPEEXACT:
2552     Lmin = Lmax = GET2(Fecode, 1);
2553     Fecode += 1 + IMM2_SIZE;
2554     goto REPEATTYPE;
2555 
2556     case OP_TYPEUPTO:
2557     case OP_TYPEMINUPTO:
2558     Lmin = 0;
2559     Lmax = GET2(Fecode, 1);
2560     reptype = (*Fecode == OP_TYPEMINUPTO)? REPTYPE_MIN : REPTYPE_MAX;
2561     Fecode += 1 + IMM2_SIZE;
2562     goto REPEATTYPE;
2563 
2564     case OP_TYPEPOSSTAR:
2565     reptype = REPTYPE_POS;
2566     Lmin = 0;
2567     Lmax = UINT32_MAX;
2568     Fecode++;
2569     goto REPEATTYPE;
2570 
2571     case OP_TYPEPOSPLUS:
2572     reptype = REPTYPE_POS;
2573     Lmin = 1;
2574     Lmax = UINT32_MAX;
2575     Fecode++;
2576     goto REPEATTYPE;
2577 
2578     case OP_TYPEPOSQUERY:
2579     reptype = REPTYPE_POS;
2580     Lmin = 0;
2581     Lmax = 1;
2582     Fecode++;
2583     goto REPEATTYPE;
2584 
2585     case OP_TYPEPOSUPTO:
2586     reptype = REPTYPE_POS;
2587     Lmin = 0;
2588     Lmax = GET2(Fecode, 1);
2589     Fecode += 1 + IMM2_SIZE;
2590     goto REPEATTYPE;
2591 
2592     case OP_TYPESTAR:
2593     case OP_TYPEMINSTAR:
2594     case OP_TYPEPLUS:
2595     case OP_TYPEMINPLUS:
2596     case OP_TYPEQUERY:
2597     case OP_TYPEMINQUERY:
2598     fc = *Fecode++ - OP_TYPESTAR;
2599     Lmin = rep_min[fc];
2600     Lmax = rep_max[fc];
2601     reptype = rep_typ[fc];
2602 
2603     /* Common code for all repeated character type matches. */
2604 
2605     REPEATTYPE:
2606     Lctype = *Fecode++;      /* Code for the character type */
2607 
2608 #ifdef SUPPORT_UNICODE
2609     if (Lctype == OP_PROP || Lctype == OP_NOTPROP)
2610       {
2611       proptype = *Fecode++;
2612       Lpropvalue = *Fecode++;
2613       }
2614     else proptype = -1;
2615 #endif
2616 
2617     /* First, ensure the minimum number of matches are present. Use inline
2618     code for maximizing the speed, and do the type test once at the start
2619     (i.e. keep it out of the loop). The code for UTF mode is separated out for
2620     tidiness, except for Unicode property tests. */
2621 
2622     if (Lmin > 0)
2623       {
2624 #ifdef SUPPORT_UNICODE
2625       if (proptype >= 0)  /* Property tests in all modes */
2626         {
2627         switch(proptype)
2628           {
2629           case PT_ANY:
2630           if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2631           for (i = 1; i <= Lmin; i++)
2632             {
2633             if (Feptr >= mb->end_subject)
2634               {
2635               SCHECK_PARTIAL();
2636               RRETURN(MATCH_NOMATCH);
2637               }
2638             GETCHARINCTEST(fc, Feptr);
2639             }
2640           break;
2641 
2642           case PT_LAMP:
2643           for (i = 1; i <= Lmin; i++)
2644             {
2645             int chartype;
2646             if (Feptr >= mb->end_subject)
2647               {
2648               SCHECK_PARTIAL();
2649               RRETURN(MATCH_NOMATCH);
2650               }
2651             GETCHARINCTEST(fc, Feptr);
2652             chartype = UCD_CHARTYPE(fc);
2653             if ((chartype == ucp_Lu ||
2654                  chartype == ucp_Ll ||
2655                  chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
2656               RRETURN(MATCH_NOMATCH);
2657             }
2658           break;
2659 
2660           case PT_GC:
2661           for (i = 1; i <= Lmin; i++)
2662             {
2663             if (Feptr >= mb->end_subject)
2664               {
2665               SCHECK_PARTIAL();
2666               RRETURN(MATCH_NOMATCH);
2667               }
2668             GETCHARINCTEST(fc, Feptr);
2669             if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
2670               RRETURN(MATCH_NOMATCH);
2671             }
2672           break;
2673 
2674           case PT_PC:
2675           for (i = 1; i <= Lmin; i++)
2676             {
2677             if (Feptr >= mb->end_subject)
2678               {
2679               SCHECK_PARTIAL();
2680               RRETURN(MATCH_NOMATCH);
2681               }
2682             GETCHARINCTEST(fc, Feptr);
2683             if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
2684               RRETURN(MATCH_NOMATCH);
2685             }
2686           break;
2687 
2688           case PT_SC:
2689           for (i = 1; i <= Lmin; i++)
2690             {
2691             if (Feptr >= mb->end_subject)
2692               {
2693               SCHECK_PARTIAL();
2694               RRETURN(MATCH_NOMATCH);
2695               }
2696             GETCHARINCTEST(fc, Feptr);
2697             if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
2698               RRETURN(MATCH_NOMATCH);
2699             }
2700           break;
2701 
2702           case PT_ALNUM:
2703           for (i = 1; i <= Lmin; i++)
2704             {
2705             int category;
2706             if (Feptr >= mb->end_subject)
2707               {
2708               SCHECK_PARTIAL();
2709               RRETURN(MATCH_NOMATCH);
2710               }
2711             GETCHARINCTEST(fc, Feptr);
2712             category = UCD_CATEGORY(fc);
2713             if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP))
2714               RRETURN(MATCH_NOMATCH);
2715             }
2716           break;
2717 
2718           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2719           which means that Perl space and POSIX space are now identical. PCRE
2720           was changed at release 8.34. */
2721 
2722           case PT_SPACE:    /* Perl space */
2723           case PT_PXSPACE:  /* POSIX space */
2724           for (i = 1; i <= Lmin; i++)
2725             {
2726             if (Feptr >= mb->end_subject)
2727               {
2728               SCHECK_PARTIAL();
2729               RRETURN(MATCH_NOMATCH);
2730               }
2731             GETCHARINCTEST(fc, Feptr);
2732             switch(fc)
2733               {
2734               HSPACE_CASES:
2735               VSPACE_CASES:
2736               if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2737               break;
2738 
2739               default:
2740               if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
2741                 RRETURN(MATCH_NOMATCH);
2742               break;
2743               }
2744             }
2745           break;
2746 
2747           case PT_WORD:
2748           for (i = 1; i <= Lmin; i++)
2749             {
2750             int category;
2751             if (Feptr >= mb->end_subject)
2752               {
2753               SCHECK_PARTIAL();
2754               RRETURN(MATCH_NOMATCH);
2755               }
2756             GETCHARINCTEST(fc, Feptr);
2757             category = UCD_CATEGORY(fc);
2758             if ((category == ucp_L || category == ucp_N ||
2759                 fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
2760               RRETURN(MATCH_NOMATCH);
2761             }
2762           break;
2763 
2764           case PT_CLIST:
2765           for (i = 1; i <= Lmin; i++)
2766             {
2767             const uint32_t *cp;
2768             if (Feptr >= mb->end_subject)
2769               {
2770               SCHECK_PARTIAL();
2771               RRETURN(MATCH_NOMATCH);
2772               }
2773             GETCHARINCTEST(fc, Feptr);
2774             cp = PRIV(ucd_caseless_sets) + Lpropvalue;
2775             for (;;)
2776               {
2777               if (fc < *cp)
2778                 {
2779                 if (Lctype == OP_NOTPROP) break;
2780                 RRETURN(MATCH_NOMATCH);
2781                 }
2782               if (fc == *cp++)
2783                 {
2784                 if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2785                 break;
2786                 }
2787               }
2788             }
2789           break;
2790 
2791           case PT_UCNC:
2792           for (i = 1; i <= Lmin; i++)
2793             {
2794             if (Feptr >= mb->end_subject)
2795               {
2796               SCHECK_PARTIAL();
2797               RRETURN(MATCH_NOMATCH);
2798               }
2799             GETCHARINCTEST(fc, Feptr);
2800             if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2801                  fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2802                  fc >= 0xe000) == (Lctype == OP_NOTPROP))
2803               RRETURN(MATCH_NOMATCH);
2804             }
2805           break;
2806 
2807           /* This should not occur */
2808 
2809           default:
2810           return PCRE2_ERROR_INTERNAL;
2811           }
2812         }
2813 
2814       /* Match extended Unicode sequences. We will get here only if the
2815       support is in the binary; otherwise a compile-time error occurs. */
2816 
2817       else if (Lctype == OP_EXTUNI)
2818         {
2819         for (i = 1; i <= Lmin; i++)
2820           {
2821           if (Feptr >= mb->end_subject)
2822             {
2823             SCHECK_PARTIAL();
2824             RRETURN(MATCH_NOMATCH);
2825             }
2826           else
2827             {
2828             GETCHARINCTEST(fc, Feptr);
2829             Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject,
2830               mb->end_subject, utf, NULL);
2831             }
2832           CHECK_PARTIAL();
2833           }
2834         }
2835       else
2836 #endif     /* SUPPORT_UNICODE */
2837 
2838 /* Handle all other cases in UTF mode */
2839 
2840 #ifdef SUPPORT_UNICODE
2841       if (utf) switch(Lctype)
2842         {
2843         case OP_ANY:
2844         for (i = 1; i <= Lmin; i++)
2845           {
2846           if (Feptr >= mb->end_subject)
2847             {
2848             SCHECK_PARTIAL();
2849             RRETURN(MATCH_NOMATCH);
2850             }
2851           if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
2852           if (mb->partial != 0 &&
2853               Feptr + 1 >= mb->end_subject &&
2854               NLBLOCK->nltype == NLTYPE_FIXED &&
2855               NLBLOCK->nllen == 2 &&
2856               UCHAR21(Feptr) == NLBLOCK->nl[0])
2857             {
2858             mb->hitend = TRUE;
2859             if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
2860             }
2861           Feptr++;
2862           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
2863           }
2864         break;
2865 
2866         case OP_ALLANY:
2867         for (i = 1; i <= Lmin; i++)
2868           {
2869           if (Feptr >= mb->end_subject)
2870             {
2871             SCHECK_PARTIAL();
2872             RRETURN(MATCH_NOMATCH);
2873             }
2874           Feptr++;
2875           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
2876           }
2877         break;
2878 
2879         case OP_ANYBYTE:
2880         if (Feptr > mb->end_subject - Lmin) RRETURN(MATCH_NOMATCH);
2881         Feptr += Lmin;
2882         break;
2883 
2884         case OP_ANYNL:
2885         for (i = 1; i <= Lmin; i++)
2886           {
2887           if (Feptr >= mb->end_subject)
2888             {
2889             SCHECK_PARTIAL();
2890             RRETURN(MATCH_NOMATCH);
2891             }
2892           GETCHARINC(fc, Feptr);
2893           switch(fc)
2894             {
2895             default: RRETURN(MATCH_NOMATCH);
2896 
2897             case CHAR_CR:
2898             if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
2899             break;
2900 
2901             case CHAR_LF:
2902             break;
2903 
2904             case CHAR_VT:
2905             case CHAR_FF:
2906             case CHAR_NEL:
2907 #ifndef EBCDIC
2908             case 0x2028:
2909             case 0x2029:
2910 #endif  /* Not EBCDIC */
2911             if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
2912             break;
2913             }
2914           }
2915         break;
2916 
2917         case OP_NOT_HSPACE:
2918         for (i = 1; i <= Lmin; i++)
2919           {
2920           if (Feptr >= mb->end_subject)
2921             {
2922             SCHECK_PARTIAL();
2923             RRETURN(MATCH_NOMATCH);
2924             }
2925           GETCHARINC(fc, Feptr);
2926           switch(fc)
2927             {
2928             HSPACE_CASES: RRETURN(MATCH_NOMATCH);
2929             default: break;
2930             }
2931           }
2932         break;
2933 
2934         case OP_HSPACE:
2935         for (i = 1; i <= Lmin; i++)
2936           {
2937           if (Feptr >= mb->end_subject)
2938             {
2939             SCHECK_PARTIAL();
2940             RRETURN(MATCH_NOMATCH);
2941             }
2942           GETCHARINC(fc, Feptr);
2943           switch(fc)
2944             {
2945             HSPACE_CASES: break;
2946             default: RRETURN(MATCH_NOMATCH);
2947             }
2948           }
2949         break;
2950 
2951         case OP_NOT_VSPACE:
2952         for (i = 1; i <= Lmin; i++)
2953           {
2954           if (Feptr >= mb->end_subject)
2955             {
2956             SCHECK_PARTIAL();
2957             RRETURN(MATCH_NOMATCH);
2958             }
2959           GETCHARINC(fc, Feptr);
2960           switch(fc)
2961             {
2962             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2963             default: break;
2964             }
2965           }
2966         break;
2967 
2968         case OP_VSPACE:
2969         for (i = 1; i <= Lmin; i++)
2970           {
2971           if (Feptr >= mb->end_subject)
2972             {
2973             SCHECK_PARTIAL();
2974             RRETURN(MATCH_NOMATCH);
2975             }
2976           GETCHARINC(fc, Feptr);
2977           switch(fc)
2978             {
2979             VSPACE_CASES: break;
2980             default: RRETURN(MATCH_NOMATCH);
2981             }
2982           }
2983         break;
2984 
2985         case OP_NOT_DIGIT:
2986         for (i = 1; i <= Lmin; i++)
2987           {
2988           if (Feptr >= mb->end_subject)
2989             {
2990             SCHECK_PARTIAL();
2991             RRETURN(MATCH_NOMATCH);
2992             }
2993           GETCHARINC(fc, Feptr);
2994           if (fc < 128 && (mb->ctypes[fc] & ctype_digit) != 0)
2995             RRETURN(MATCH_NOMATCH);
2996           }
2997         break;
2998 
2999         case OP_DIGIT:
3000         for (i = 1; i <= Lmin; i++)
3001           {
3002           uint32_t cc;
3003           if (Feptr >= mb->end_subject)
3004             {
3005             SCHECK_PARTIAL();
3006             RRETURN(MATCH_NOMATCH);
3007             }
3008           cc = UCHAR21(Feptr);
3009           if (cc >= 128 || (mb->ctypes[cc] & ctype_digit) == 0)
3010             RRETURN(MATCH_NOMATCH);
3011           Feptr++;
3012           /* No need to skip more code units - we know it has only one. */
3013           }
3014         break;
3015 
3016         case OP_NOT_WHITESPACE:
3017         for (i = 1; i <= Lmin; i++)
3018           {
3019           uint32_t cc;
3020           if (Feptr >= mb->end_subject)
3021             {
3022             SCHECK_PARTIAL();
3023             RRETURN(MATCH_NOMATCH);
3024             }
3025           cc = UCHAR21(Feptr);
3026           if (cc < 128 && (mb->ctypes[cc] & ctype_space) != 0)
3027             RRETURN(MATCH_NOMATCH);
3028           Feptr++;
3029           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3030           }
3031         break;
3032 
3033         case OP_WHITESPACE:
3034         for (i = 1; i <= Lmin; i++)
3035           {
3036           uint32_t cc;
3037           if (Feptr >= mb->end_subject)
3038             {
3039             SCHECK_PARTIAL();
3040             RRETURN(MATCH_NOMATCH);
3041             }
3042           cc = UCHAR21(Feptr);
3043           if (cc >= 128 || (mb->ctypes[cc] & ctype_space) == 0)
3044             RRETURN(MATCH_NOMATCH);
3045           Feptr++;
3046           /* No need to skip more code units - we know it has only one. */
3047           }
3048         break;
3049 
3050         case OP_NOT_WORDCHAR:
3051         for (i = 1; i <= Lmin; i++)
3052           {
3053           uint32_t cc;
3054           if (Feptr >= mb->end_subject)
3055             {
3056             SCHECK_PARTIAL();
3057             RRETURN(MATCH_NOMATCH);
3058             }
3059           cc = UCHAR21(Feptr);
3060           if (cc < 128 && (mb->ctypes[cc] & ctype_word) != 0)
3061             RRETURN(MATCH_NOMATCH);
3062           Feptr++;
3063           ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3064           }
3065         break;
3066 
3067         case OP_WORDCHAR:
3068         for (i = 1; i <= Lmin; i++)
3069           {
3070           uint32_t cc;
3071           if (Feptr >= mb->end_subject)
3072             {
3073             SCHECK_PARTIAL();
3074             RRETURN(MATCH_NOMATCH);
3075             }
3076           cc = UCHAR21(Feptr);
3077           if (cc >= 128 || (mb->ctypes[cc] & ctype_word) == 0)
3078             RRETURN(MATCH_NOMATCH);
3079           Feptr++;
3080           /* No need to skip more code units - we know it has only one. */
3081           }
3082         break;
3083 
3084         default:
3085         return PCRE2_ERROR_INTERNAL;
3086         }  /* End switch(Lctype) */
3087 
3088       else
3089 #endif     /* SUPPORT_UNICODE */
3090 
3091       /* Code for the non-UTF case for minimum matching of operators other
3092       than OP_PROP and OP_NOTPROP. */
3093 
3094       switch(Lctype)
3095         {
3096         case OP_ANY:
3097         for (i = 1; i <= Lmin; i++)
3098           {
3099           if (Feptr >= mb->end_subject)
3100             {
3101             SCHECK_PARTIAL();
3102             RRETURN(MATCH_NOMATCH);
3103             }
3104           if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3105           if (mb->partial != 0 &&
3106               Feptr + 1 >= mb->end_subject &&
3107               NLBLOCK->nltype == NLTYPE_FIXED &&
3108               NLBLOCK->nllen == 2 &&
3109               *Feptr == NLBLOCK->nl[0])
3110             {
3111             mb->hitend = TRUE;
3112             if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3113             }
3114           Feptr++;
3115           }
3116         break;
3117 
3118         case OP_ALLANY:
3119         if (Feptr > mb->end_subject - Lmin)
3120           {
3121           SCHECK_PARTIAL();
3122           RRETURN(MATCH_NOMATCH);
3123           }
3124         Feptr += Lmin;
3125         break;
3126 
3127         /* This OP_ANYBYTE case will never be reached because \C gets turned
3128         into OP_ALLANY in non-UTF mode. Cut out the code so that coverage
3129         reports don't complain about it's never being used. */
3130 
3131 /*        case OP_ANYBYTE:
3132 *        if (Feptr > mb->end_subject - Lmin)
3133 *          {
3134 *          SCHECK_PARTIAL();
3135 *          RRETURN(MATCH_NOMATCH);
3136 *          }
3137 *        Feptr += Lmin;
3138 *        break;
3139 */
3140         case OP_ANYNL:
3141         for (i = 1; i <= Lmin; i++)
3142           {
3143           if (Feptr >= mb->end_subject)
3144             {
3145             SCHECK_PARTIAL();
3146             RRETURN(MATCH_NOMATCH);
3147             }
3148           switch(*Feptr++)
3149             {
3150             default: RRETURN(MATCH_NOMATCH);
3151 
3152             case CHAR_CR:
3153             if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
3154             break;
3155 
3156             case CHAR_LF:
3157             break;
3158 
3159             case CHAR_VT:
3160             case CHAR_FF:
3161             case CHAR_NEL:
3162 #if PCRE2_CODE_UNIT_WIDTH != 8
3163             case 0x2028:
3164             case 0x2029:
3165 #endif
3166             if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3167             break;
3168             }
3169           }
3170         break;
3171 
3172         case OP_NOT_HSPACE:
3173         for (i = 1; i <= Lmin; i++)
3174           {
3175           if (Feptr >= mb->end_subject)
3176             {
3177             SCHECK_PARTIAL();
3178             RRETURN(MATCH_NOMATCH);
3179             }
3180           switch(*Feptr++)
3181             {
3182             default: break;
3183             HSPACE_BYTE_CASES:
3184 #if PCRE2_CODE_UNIT_WIDTH != 8
3185             HSPACE_MULTIBYTE_CASES:
3186 #endif
3187             RRETURN(MATCH_NOMATCH);
3188             }
3189           }
3190         break;
3191 
3192         case OP_HSPACE:
3193         for (i = 1; i <= Lmin; i++)
3194           {
3195           if (Feptr >= mb->end_subject)
3196             {
3197             SCHECK_PARTIAL();
3198             RRETURN(MATCH_NOMATCH);
3199             }
3200           switch(*Feptr++)
3201             {
3202             default: RRETURN(MATCH_NOMATCH);
3203             HSPACE_BYTE_CASES:
3204 #if PCRE2_CODE_UNIT_WIDTH != 8
3205             HSPACE_MULTIBYTE_CASES:
3206 #endif
3207             break;
3208             }
3209           }
3210         break;
3211 
3212         case OP_NOT_VSPACE:
3213         for (i = 1; i <= Lmin; i++)
3214           {
3215           if (Feptr >= mb->end_subject)
3216             {
3217             SCHECK_PARTIAL();
3218             RRETURN(MATCH_NOMATCH);
3219             }
3220           switch(*Feptr++)
3221             {
3222             VSPACE_BYTE_CASES:
3223 #if PCRE2_CODE_UNIT_WIDTH != 8
3224             VSPACE_MULTIBYTE_CASES:
3225 #endif
3226             RRETURN(MATCH_NOMATCH);
3227             default: break;
3228             }
3229           }
3230         break;
3231 
3232         case OP_VSPACE:
3233         for (i = 1; i <= Lmin; i++)
3234           {
3235           if (Feptr >= mb->end_subject)
3236             {
3237             SCHECK_PARTIAL();
3238             RRETURN(MATCH_NOMATCH);
3239             }
3240           switch(*Feptr++)
3241             {
3242             default: RRETURN(MATCH_NOMATCH);
3243             VSPACE_BYTE_CASES:
3244 #if PCRE2_CODE_UNIT_WIDTH != 8
3245             VSPACE_MULTIBYTE_CASES:
3246 #endif
3247             break;
3248             }
3249           }
3250         break;
3251 
3252         case OP_NOT_DIGIT:
3253         for (i = 1; i <= Lmin; i++)
3254           {
3255           if (Feptr >= mb->end_subject)
3256             {
3257             SCHECK_PARTIAL();
3258             RRETURN(MATCH_NOMATCH);
3259             }
3260           if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
3261             RRETURN(MATCH_NOMATCH);
3262           Feptr++;
3263           }
3264         break;
3265 
3266         case OP_DIGIT:
3267         for (i = 1; i <= Lmin; i++)
3268           {
3269           if (Feptr >= mb->end_subject)
3270             {
3271             SCHECK_PARTIAL();
3272             RRETURN(MATCH_NOMATCH);
3273             }
3274           if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
3275             RRETURN(MATCH_NOMATCH);
3276           Feptr++;
3277           }
3278         break;
3279 
3280         case OP_NOT_WHITESPACE:
3281         for (i = 1; i <= Lmin; i++)
3282           {
3283           if (Feptr >= mb->end_subject)
3284             {
3285             SCHECK_PARTIAL();
3286             RRETURN(MATCH_NOMATCH);
3287             }
3288           if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
3289             RRETURN(MATCH_NOMATCH);
3290           Feptr++;
3291           }
3292         break;
3293 
3294         case OP_WHITESPACE:
3295         for (i = 1; i <= Lmin; i++)
3296           {
3297           if (Feptr >= mb->end_subject)
3298             {
3299             SCHECK_PARTIAL();
3300             RRETURN(MATCH_NOMATCH);
3301             }
3302           if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
3303             RRETURN(MATCH_NOMATCH);
3304           Feptr++;
3305           }
3306         break;
3307 
3308         case OP_NOT_WORDCHAR:
3309         for (i = 1; i <= Lmin; i++)
3310           {
3311           if (Feptr >= mb->end_subject)
3312             {
3313             SCHECK_PARTIAL();
3314             RRETURN(MATCH_NOMATCH);
3315             }
3316           if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
3317             RRETURN(MATCH_NOMATCH);
3318           Feptr++;
3319           }
3320         break;
3321 
3322         case OP_WORDCHAR:
3323         for (i = 1; i <= Lmin; i++)
3324           {
3325           if (Feptr >= mb->end_subject)
3326             {
3327             SCHECK_PARTIAL();
3328             RRETURN(MATCH_NOMATCH);
3329             }
3330           if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
3331             RRETURN(MATCH_NOMATCH);
3332           Feptr++;
3333           }
3334         break;
3335 
3336         default:
3337         return PCRE2_ERROR_INTERNAL;
3338         }
3339       }
3340 
3341     /* If Lmin = Lmax we are done. Continue with the main loop. */
3342 
3343     if (Lmin == Lmax) continue;
3344 
3345     /* If minimizing, we have to test the rest of the pattern before each
3346     subsequent match. */
3347 
3348     if (reptype == REPTYPE_MIN)
3349       {
3350 #ifdef SUPPORT_UNICODE
3351       if (proptype >= 0)
3352         {
3353         switch(proptype)
3354           {
3355           case PT_ANY:
3356           for (;;)
3357             {
3358             RMATCH(Fecode, RM208);
3359             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3360             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3361             if (Feptr >= mb->end_subject)
3362               {
3363               SCHECK_PARTIAL();
3364               RRETURN(MATCH_NOMATCH);
3365               }
3366             GETCHARINCTEST(fc, Feptr);
3367             if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3368             }
3369           /* Control never gets here */
3370 
3371           case PT_LAMP:
3372           for (;;)
3373             {
3374             int chartype;
3375             RMATCH(Fecode, RM209);
3376             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3377             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3378             if (Feptr >= mb->end_subject)
3379               {
3380               SCHECK_PARTIAL();
3381               RRETURN(MATCH_NOMATCH);
3382               }
3383             GETCHARINCTEST(fc, Feptr);
3384             chartype = UCD_CHARTYPE(fc);
3385             if ((chartype == ucp_Lu ||
3386                  chartype == ucp_Ll ||
3387                  chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
3388               RRETURN(MATCH_NOMATCH);
3389             }
3390           /* Control never gets here */
3391 
3392           case PT_GC:
3393           for (;;)
3394             {
3395             RMATCH(Fecode, RM210);
3396             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3397             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3398             if (Feptr >= mb->end_subject)
3399               {
3400               SCHECK_PARTIAL();
3401               RRETURN(MATCH_NOMATCH);
3402               }
3403             GETCHARINCTEST(fc, Feptr);
3404             if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3405               RRETURN(MATCH_NOMATCH);
3406             }
3407           /* Control never gets here */
3408 
3409           case PT_PC:
3410           for (;;)
3411             {
3412             RMATCH(Fecode, RM211);
3413             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3414             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3415             if (Feptr >= mb->end_subject)
3416               {
3417               SCHECK_PARTIAL();
3418               RRETURN(MATCH_NOMATCH);
3419               }
3420             GETCHARINCTEST(fc, Feptr);
3421             if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3422               RRETURN(MATCH_NOMATCH);
3423             }
3424           /* Control never gets here */
3425 
3426           case PT_SC:
3427           for (;;)
3428             {
3429             RMATCH(Fecode, RM212);
3430             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3431             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3432             if (Feptr >= mb->end_subject)
3433               {
3434               SCHECK_PARTIAL();
3435               RRETURN(MATCH_NOMATCH);
3436               }
3437             GETCHARINCTEST(fc, Feptr);
3438             if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3439               RRETURN(MATCH_NOMATCH);
3440             }
3441           /* Control never gets here */
3442 
3443           case PT_ALNUM:
3444           for (;;)
3445             {
3446             int category;
3447             RMATCH(Fecode, RM213);
3448             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3449             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3450             if (Feptr >= mb->end_subject)
3451               {
3452               SCHECK_PARTIAL();
3453               RRETURN(MATCH_NOMATCH);
3454               }
3455             GETCHARINCTEST(fc, Feptr);
3456             category = UCD_CATEGORY(fc);
3457             if ((category == ucp_L || category == ucp_N) ==
3458                 (Lctype == OP_NOTPROP))
3459               RRETURN(MATCH_NOMATCH);
3460             }
3461           /* Control never gets here */
3462 
3463           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3464           which means that Perl space and POSIX space are now identical. PCRE
3465           was changed at release 8.34. */
3466 
3467           case PT_SPACE:    /* Perl space */
3468           case PT_PXSPACE:  /* POSIX space */
3469           for (;;)
3470             {
3471             RMATCH(Fecode, RM214);
3472             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3473             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3474             if (Feptr >= mb->end_subject)
3475               {
3476               SCHECK_PARTIAL();
3477               RRETURN(MATCH_NOMATCH);
3478               }
3479             GETCHARINCTEST(fc, Feptr);
3480             switch(fc)
3481               {
3482               HSPACE_CASES:
3483               VSPACE_CASES:
3484               if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3485               break;
3486 
3487               default:
3488               if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
3489                 RRETURN(MATCH_NOMATCH);
3490               break;
3491               }
3492             }
3493           /* Control never gets here */
3494 
3495           case PT_WORD:
3496           for (;;)
3497             {
3498             int category;
3499             RMATCH(Fecode, RM215);
3500             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3501             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3502             if (Feptr >= mb->end_subject)
3503               {
3504               SCHECK_PARTIAL();
3505               RRETURN(MATCH_NOMATCH);
3506               }
3507             GETCHARINCTEST(fc, Feptr);
3508             category = UCD_CATEGORY(fc);
3509             if ((category == ucp_L ||
3510                  category == ucp_N ||
3511                  fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
3512               RRETURN(MATCH_NOMATCH);
3513             }
3514           /* Control never gets here */
3515 
3516           case PT_CLIST:
3517           for (;;)
3518             {
3519             const uint32_t *cp;
3520             RMATCH(Fecode, RM216);
3521             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3522             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3523             if (Feptr >= mb->end_subject)
3524               {
3525               SCHECK_PARTIAL();
3526               RRETURN(MATCH_NOMATCH);
3527               }
3528             GETCHARINCTEST(fc, Feptr);
3529             cp = PRIV(ucd_caseless_sets) + Lpropvalue;
3530             for (;;)
3531               {
3532               if (fc < *cp)
3533                 {
3534                 if (Lctype == OP_NOTPROP) break;
3535                 RRETURN(MATCH_NOMATCH);
3536                 }
3537               if (fc == *cp++)
3538                 {
3539                 if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3540                 break;
3541                 }
3542               }
3543             }
3544           /* Control never gets here */
3545 
3546           case PT_UCNC:
3547           for (;;)
3548             {
3549             RMATCH(Fecode, RM217);
3550             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3551             if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3552             if (Feptr >= mb->end_subject)
3553               {
3554               SCHECK_PARTIAL();
3555               RRETURN(MATCH_NOMATCH);
3556               }
3557             GETCHARINCTEST(fc, Feptr);
3558             if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
3559                  fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
3560                  fc >= 0xe000) == (Lctype == OP_NOTPROP))
3561               RRETURN(MATCH_NOMATCH);
3562             }
3563           /* Control never gets here */
3564 
3565           /* This should never occur */
3566           default:
3567           return PCRE2_ERROR_INTERNAL;
3568           }
3569         }
3570 
3571       /* Match extended Unicode sequences. We will get here only if the
3572       support is in the binary; otherwise a compile-time error occurs. */
3573 
3574       else if (Lctype == OP_EXTUNI)
3575         {
3576         for (;;)
3577           {
3578           RMATCH(Fecode, RM218);
3579           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3580           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3581           if (Feptr >= mb->end_subject)
3582             {
3583             SCHECK_PARTIAL();
3584             RRETURN(MATCH_NOMATCH);
3585             }
3586           else
3587             {
3588             GETCHARINCTEST(fc, Feptr);
3589             Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
3590               utf, NULL);
3591             }
3592           CHECK_PARTIAL();
3593           }
3594         }
3595       else
3596 #endif     /* SUPPORT_UNICODE */
3597 
3598       /* UTF mode for non-property testing character types. */
3599 
3600 #ifdef SUPPORT_UNICODE
3601       if (utf)
3602         {
3603         for (;;)
3604           {
3605           RMATCH(Fecode, RM219);
3606           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3607           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3608           if (Feptr >= mb->end_subject)
3609             {
3610             SCHECK_PARTIAL();
3611             RRETURN(MATCH_NOMATCH);
3612             }
3613           if (Lctype == OP_ANY && IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3614           GETCHARINC(fc, Feptr);
3615           switch(Lctype)
3616             {
3617             case OP_ANY:               /* This is the non-NL case */
3618             if (mb->partial != 0 &&    /* Take care with CRLF partial */
3619                 Feptr >= mb->end_subject &&
3620                 NLBLOCK->nltype == NLTYPE_FIXED &&
3621                 NLBLOCK->nllen == 2 &&
3622                 fc == NLBLOCK->nl[0])
3623               {
3624               mb->hitend = TRUE;
3625               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3626               }
3627             break;
3628 
3629             case OP_ALLANY:
3630             case OP_ANYBYTE:
3631             break;
3632 
3633             case OP_ANYNL:
3634             switch(fc)
3635               {
3636               default: RRETURN(MATCH_NOMATCH);
3637 
3638               case CHAR_CR:
3639               if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
3640               break;
3641 
3642               case CHAR_LF:
3643               break;
3644 
3645               case CHAR_VT:
3646               case CHAR_FF:
3647               case CHAR_NEL:
3648 #ifndef EBCDIC
3649               case 0x2028:
3650               case 0x2029:
3651 #endif  /* Not EBCDIC */
3652               if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
3653                 RRETURN(MATCH_NOMATCH);
3654               break;
3655               }
3656             break;
3657 
3658             case OP_NOT_HSPACE:
3659             switch(fc)
3660               {
3661               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
3662               default: break;
3663               }
3664             break;
3665 
3666             case OP_HSPACE:
3667             switch(fc)
3668               {
3669               HSPACE_CASES: break;
3670               default: RRETURN(MATCH_NOMATCH);
3671               }
3672             break;
3673 
3674             case OP_NOT_VSPACE:
3675             switch(fc)
3676               {
3677               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
3678               default: break;
3679               }
3680             break;
3681 
3682             case OP_VSPACE:
3683             switch(fc)
3684               {
3685               VSPACE_CASES: break;
3686               default: RRETURN(MATCH_NOMATCH);
3687               }
3688             break;
3689 
3690             case OP_NOT_DIGIT:
3691             if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0)
3692               RRETURN(MATCH_NOMATCH);
3693             break;
3694 
3695             case OP_DIGIT:
3696             if (fc >= 256 || (mb->ctypes[fc] & ctype_digit) == 0)
3697               RRETURN(MATCH_NOMATCH);
3698             break;
3699 
3700             case OP_NOT_WHITESPACE:
3701             if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0)
3702               RRETURN(MATCH_NOMATCH);
3703             break;
3704 
3705             case OP_WHITESPACE:
3706             if (fc >= 256 || (mb->ctypes[fc] & ctype_space) == 0)
3707               RRETURN(MATCH_NOMATCH);
3708             break;
3709 
3710             case OP_NOT_WORDCHAR:
3711             if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0)
3712               RRETURN(MATCH_NOMATCH);
3713             break;
3714 
3715             case OP_WORDCHAR:
3716             if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0)
3717               RRETURN(MATCH_NOMATCH);
3718             break;
3719 
3720             default:
3721             return PCRE2_ERROR_INTERNAL;
3722             }
3723           }
3724         }
3725       else
3726 #endif  /* SUPPORT_UNICODE */
3727 
3728       /* Not UTF mode */
3729         {
3730         for (;;)
3731           {
3732           RMATCH(Fecode, RM33);
3733           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3734           if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3735           if (Feptr >= mb->end_subject)
3736             {
3737             SCHECK_PARTIAL();
3738             RRETURN(MATCH_NOMATCH);
3739             }
3740           if (Lctype == OP_ANY && IS_NEWLINE(Feptr))
3741             RRETURN(MATCH_NOMATCH);
3742           fc = *Feptr++;
3743           switch(Lctype)
3744             {
3745             case OP_ANY:               /* This is the non-NL case */
3746             if (mb->partial != 0 &&    /* Take care with CRLF partial */
3747                 Feptr >= mb->end_subject &&
3748                 NLBLOCK->nltype == NLTYPE_FIXED &&
3749                 NLBLOCK->nllen == 2 &&
3750                 fc == NLBLOCK->nl[0])
3751               {
3752               mb->hitend = TRUE;
3753               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3754               }
3755             break;
3756 
3757             case OP_ALLANY:
3758             case OP_ANYBYTE:
3759             break;
3760 
3761             case OP_ANYNL:
3762             switch(fc)
3763               {
3764               default: RRETURN(MATCH_NOMATCH);
3765 
3766               case CHAR_CR:
3767               if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
3768               break;
3769 
3770               case CHAR_LF:
3771               break;
3772 
3773               case CHAR_VT:
3774               case CHAR_FF:
3775               case CHAR_NEL:
3776 #if PCRE2_CODE_UNIT_WIDTH != 8
3777               case 0x2028:
3778               case 0x2029:
3779 #endif
3780               if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
3781                 RRETURN(MATCH_NOMATCH);
3782               break;
3783               }
3784             break;
3785 
3786             case OP_NOT_HSPACE:
3787             switch(fc)
3788               {
3789               default: break;
3790               HSPACE_BYTE_CASES:
3791 #if PCRE2_CODE_UNIT_WIDTH != 8
3792               HSPACE_MULTIBYTE_CASES:
3793 #endif
3794               RRETURN(MATCH_NOMATCH);
3795               }
3796             break;
3797 
3798             case OP_HSPACE:
3799             switch(fc)
3800               {
3801               default: RRETURN(MATCH_NOMATCH);
3802               HSPACE_BYTE_CASES:
3803 #if PCRE2_CODE_UNIT_WIDTH != 8
3804               HSPACE_MULTIBYTE_CASES:
3805 #endif
3806               break;
3807               }
3808             break;
3809 
3810             case OP_NOT_VSPACE:
3811             switch(fc)
3812               {
3813               default: break;
3814               VSPACE_BYTE_CASES:
3815 #if PCRE2_CODE_UNIT_WIDTH != 8
3816               VSPACE_MULTIBYTE_CASES:
3817 #endif
3818               RRETURN(MATCH_NOMATCH);
3819               }
3820             break;
3821 
3822             case OP_VSPACE:
3823             switch(fc)
3824               {
3825               default: RRETURN(MATCH_NOMATCH);
3826               VSPACE_BYTE_CASES:
3827 #if PCRE2_CODE_UNIT_WIDTH != 8
3828               VSPACE_MULTIBYTE_CASES:
3829 #endif
3830               break;
3831               }
3832             break;
3833 
3834             case OP_NOT_DIGIT:
3835             if (MAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
3836               RRETURN(MATCH_NOMATCH);
3837             break;
3838 
3839             case OP_DIGIT:
3840             if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
3841               RRETURN(MATCH_NOMATCH);
3842             break;
3843 
3844             case OP_NOT_WHITESPACE:
3845             if (MAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
3846               RRETURN(MATCH_NOMATCH);
3847             break;
3848 
3849             case OP_WHITESPACE:
3850             if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
3851               RRETURN(MATCH_NOMATCH);
3852             break;
3853 
3854             case OP_NOT_WORDCHAR:
3855             if (MAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
3856               RRETURN(MATCH_NOMATCH);
3857             break;
3858 
3859             case OP_WORDCHAR:
3860             if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
3861               RRETURN(MATCH_NOMATCH);
3862             break;
3863 
3864             default:
3865             return PCRE2_ERROR_INTERNAL;
3866             }
3867           }
3868         }
3869       /* Control never gets here */
3870       }
3871 
3872     /* If maximizing, it is worth using inline code for speed, doing the type
3873     test once at the start (i.e. keep it out of the loop). */
3874 
3875     else
3876       {
3877       Lstart_eptr = Feptr;  /* Remember where we started */
3878 
3879 #ifdef SUPPORT_UNICODE
3880       if (proptype >= 0)
3881         {
3882         switch(proptype)
3883           {
3884           case PT_ANY:
3885           for (i = Lmin; i < Lmax; i++)
3886             {
3887             int len = 1;
3888             if (Feptr >= mb->end_subject)
3889               {
3890               SCHECK_PARTIAL();
3891               break;
3892               }
3893             GETCHARLENTEST(fc, Feptr, len);
3894             if (Lctype == OP_NOTPROP) break;
3895             Feptr+= len;
3896             }
3897           break;
3898 
3899           case PT_LAMP:
3900           for (i = Lmin; i < Lmax; i++)
3901             {
3902             int chartype;
3903             int len = 1;
3904             if (Feptr >= mb->end_subject)
3905               {
3906               SCHECK_PARTIAL();
3907               break;
3908               }
3909             GETCHARLENTEST(fc, Feptr, len);
3910             chartype = UCD_CHARTYPE(fc);
3911             if ((chartype == ucp_Lu ||
3912                  chartype == ucp_Ll ||
3913                  chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
3914               break;
3915             Feptr+= len;
3916             }
3917           break;
3918 
3919           case PT_GC:
3920           for (i = Lmin; i < Lmax; i++)
3921             {
3922             int len = 1;
3923             if (Feptr >= mb->end_subject)
3924               {
3925               SCHECK_PARTIAL();
3926               break;
3927               }
3928             GETCHARLENTEST(fc, Feptr, len);
3929             if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3930               break;
3931             Feptr+= len;
3932             }
3933           break;
3934 
3935           case PT_PC:
3936           for (i = Lmin; i < Lmax; i++)
3937             {
3938             int len = 1;
3939             if (Feptr >= mb->end_subject)
3940               {
3941               SCHECK_PARTIAL();
3942               break;
3943               }
3944             GETCHARLENTEST(fc, Feptr, len);
3945             if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3946               break;
3947             Feptr+= len;
3948             }
3949           break;
3950 
3951           case PT_SC:
3952           for (i = Lmin; i < Lmax; i++)
3953             {
3954             int len = 1;
3955             if (Feptr >= mb->end_subject)
3956               {
3957               SCHECK_PARTIAL();
3958               break;
3959               }
3960             GETCHARLENTEST(fc, Feptr, len);
3961             if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3962               break;
3963             Feptr+= len;
3964             }
3965           break;
3966 
3967           case PT_ALNUM:
3968           for (i = Lmin; i < Lmax; i++)
3969             {
3970             int category;
3971             int len = 1;
3972             if (Feptr >= mb->end_subject)
3973               {
3974               SCHECK_PARTIAL();
3975               break;
3976               }
3977             GETCHARLENTEST(fc, Feptr, len);
3978             category = UCD_CATEGORY(fc);
3979             if ((category == ucp_L || category == ucp_N) ==
3980                 (Lctype == OP_NOTPROP))
3981               break;
3982             Feptr+= len;
3983             }
3984           break;
3985 
3986           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3987           which means that Perl space and POSIX space are now identical. PCRE
3988           was changed at release 8.34. */
3989 
3990           case PT_SPACE:    /* Perl space */
3991           case PT_PXSPACE:  /* POSIX space */
3992           for (i = Lmin; i < Lmax; i++)
3993             {
3994             int len = 1;
3995             if (Feptr >= mb->end_subject)
3996               {
3997               SCHECK_PARTIAL();
3998               break;
3999               }
4000             GETCHARLENTEST(fc, Feptr, len);
4001             switch(fc)
4002               {
4003               HSPACE_CASES:
4004               VSPACE_CASES:
4005               if (Lctype == OP_NOTPROP) goto ENDLOOP99;  /* Break the loop */
4006               break;
4007 
4008               default:
4009               if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
4010                 goto ENDLOOP99;   /* Break the loop */
4011               break;
4012               }
4013             Feptr+= len;
4014             }
4015           ENDLOOP99:
4016           break;
4017 
4018           case PT_WORD:
4019           for (i = Lmin; i < Lmax; i++)
4020             {
4021             int category;
4022             int len = 1;
4023             if (Feptr >= mb->end_subject)
4024               {
4025               SCHECK_PARTIAL();
4026               break;
4027               }
4028             GETCHARLENTEST(fc, Feptr, len);
4029             category = UCD_CATEGORY(fc);
4030             if ((category == ucp_L || category == ucp_N ||
4031                  fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
4032               break;
4033             Feptr+= len;
4034             }
4035           break;
4036 
4037           case PT_CLIST:
4038           for (i = Lmin; i < Lmax; i++)
4039             {
4040             const uint32_t *cp;
4041             int len = 1;
4042             if (Feptr >= mb->end_subject)
4043               {
4044               SCHECK_PARTIAL();
4045               break;
4046               }
4047             GETCHARLENTEST(fc, Feptr, len);
4048             cp = PRIV(ucd_caseless_sets) + Lpropvalue;
4049             for (;;)
4050               {
4051               if (fc < *cp)
4052                 { if (Lctype == OP_NOTPROP) break; else goto GOT_MAX; }
4053               if (fc == *cp++)
4054                 { if (Lctype == OP_NOTPROP) goto GOT_MAX; else break; }
4055               }
4056             Feptr += len;
4057             }
4058           GOT_MAX:
4059           break;
4060 
4061           case PT_UCNC:
4062           for (i = Lmin; i < Lmax; i++)
4063             {
4064             int len = 1;
4065             if (Feptr >= mb->end_subject)
4066               {
4067               SCHECK_PARTIAL();
4068               break;
4069               }
4070             GETCHARLENTEST(fc, Feptr, len);
4071             if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
4072                  fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
4073                  fc >= 0xe000) == (Lctype == OP_NOTPROP))
4074               break;
4075             Feptr += len;
4076             }
4077           break;
4078 
4079           default:
4080           return PCRE2_ERROR_INTERNAL;
4081           }
4082 
4083         /* Feptr is now past the end of the maximum run */
4084 
4085         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4086 
4087         /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4088         Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
4089         go too far. */
4090 
4091         for(;;)
4092           {
4093           if (Feptr <= Lstart_eptr) break;
4094           RMATCH(Fecode, RM222);
4095           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4096           Feptr--;
4097           if (utf) BACKCHAR(Feptr);
4098           }
4099         }
4100 
4101       /* Match extended Unicode grapheme clusters. We will get here only if the
4102       support is in the binary; otherwise a compile-time error occurs. */
4103 
4104       else if (Lctype == OP_EXTUNI)
4105         {
4106         for (i = Lmin; i < Lmax; i++)
4107           {
4108           if (Feptr >= mb->end_subject)
4109             {
4110             SCHECK_PARTIAL();
4111             break;
4112             }
4113           else
4114             {
4115             GETCHARINCTEST(fc, Feptr);
4116             Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
4117               utf, NULL);
4118             }
4119           CHECK_PARTIAL();
4120           }
4121 
4122         /* Feptr is now past the end of the maximum run */
4123 
4124         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4125 
4126         /* We use <= Lstart_eptr rather than == Lstart_eptr to detect the start
4127         of the run while backtracking because the use of \C in UTF mode can
4128         cause BACKCHAR to move back past Lstart_eptr. This is just palliative;
4129         the use of \C in UTF mode is fraught with danger. */
4130 
4131         for(;;)
4132           {
4133           int lgb, rgb;
4134           PCRE2_SPTR fptr;
4135 
4136           if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4137           RMATCH(Fecode, RM220);
4138           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4139 
4140           /* Backtracking over an extended grapheme cluster involves inspecting
4141           the previous two characters (if present) to see if a break is
4142           permitted between them. */
4143 
4144           Feptr--;
4145           if (!utf) fc = *Feptr; else
4146             {
4147             BACKCHAR(Feptr);
4148             GETCHAR(fc, Feptr);
4149             }
4150           rgb = UCD_GRAPHBREAK(fc);
4151 
4152           for (;;)
4153             {
4154             if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4155             fptr = Feptr - 1;
4156             if (!utf) fc = *fptr; else
4157               {
4158               BACKCHAR(fptr);
4159               GETCHAR(fc, fptr);
4160               }
4161             lgb = UCD_GRAPHBREAK(fc);
4162             if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
4163             Feptr = fptr;
4164             rgb = lgb;
4165             }
4166           }
4167         }
4168 
4169       else
4170 #endif   /* SUPPORT_UNICODE */
4171 
4172 #ifdef SUPPORT_UNICODE
4173       if (utf)
4174         {
4175         switch(Lctype)
4176           {
4177           case OP_ANY:
4178           for (i = Lmin; i < Lmax; i++)
4179             {
4180             if (Feptr >= mb->end_subject)
4181               {
4182               SCHECK_PARTIAL();
4183               break;
4184               }
4185             if (IS_NEWLINE(Feptr)) break;
4186             if (mb->partial != 0 &&    /* Take care with CRLF partial */
4187                 Feptr + 1 >= mb->end_subject &&
4188                 NLBLOCK->nltype == NLTYPE_FIXED &&
4189                 NLBLOCK->nllen == 2 &&
4190                 UCHAR21(Feptr) == NLBLOCK->nl[0])
4191               {
4192               mb->hitend = TRUE;
4193               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4194               }
4195             Feptr++;
4196             ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4197             }
4198           break;
4199 
4200           case OP_ALLANY:
4201           if (Lmax < UINT32_MAX)
4202             {
4203             for (i = Lmin; i < Lmax; i++)
4204               {
4205               if (Feptr >= mb->end_subject)
4206                 {
4207                 SCHECK_PARTIAL();
4208                 break;
4209                 }
4210               Feptr++;
4211               ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4212               }
4213             }
4214           else
4215             {
4216             Feptr = mb->end_subject;   /* Unlimited UTF-8 repeat */
4217             SCHECK_PARTIAL();
4218             }
4219           break;
4220 
4221           /* The "byte" (i.e. "code unit") case is the same as non-UTF */
4222 
4223           case OP_ANYBYTE:
4224           fc = Lmax - Lmin;
4225           if (fc > (uint32_t)(mb->end_subject - Feptr))
4226             {
4227             Feptr = mb->end_subject;
4228             SCHECK_PARTIAL();
4229             }
4230           else Feptr += fc;
4231           break;
4232 
4233           case OP_ANYNL:
4234           for (i = Lmin; i < Lmax; i++)
4235             {
4236             int len = 1;
4237             if (Feptr >= mb->end_subject)
4238               {
4239               SCHECK_PARTIAL();
4240               break;
4241               }
4242             GETCHARLEN(fc, Feptr, len);
4243             if (fc == CHAR_CR)
4244               {
4245               if (++Feptr >= mb->end_subject) break;
4246               if (UCHAR21(Feptr) == CHAR_LF) Feptr++;
4247               }
4248             else
4249               {
4250               if (fc != CHAR_LF &&
4251                   (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4252                    (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4253 #ifndef EBCDIC
4254                     && fc != 0x2028 && fc != 0x2029
4255 #endif  /* Not EBCDIC */
4256                     )))
4257                 break;
4258               Feptr += len;
4259               }
4260             }
4261           break;
4262 
4263           case OP_NOT_HSPACE:
4264           case OP_HSPACE:
4265           for (i = Lmin; i < Lmax; i++)
4266             {
4267             BOOL gotspace;
4268             int len = 1;
4269             if (Feptr >= mb->end_subject)
4270               {
4271               SCHECK_PARTIAL();
4272               break;
4273               }
4274             GETCHARLEN(fc, Feptr, len);
4275             switch(fc)
4276               {
4277               HSPACE_CASES: gotspace = TRUE; break;
4278               default: gotspace = FALSE; break;
4279               }
4280             if (gotspace == (Lctype == OP_NOT_HSPACE)) break;
4281             Feptr += len;
4282             }
4283           break;
4284 
4285           case OP_NOT_VSPACE:
4286           case OP_VSPACE:
4287           for (i = Lmin; i < Lmax; i++)
4288             {
4289             BOOL gotspace;
4290             int len = 1;
4291             if (Feptr >= mb->end_subject)
4292               {
4293               SCHECK_PARTIAL();
4294               break;
4295               }
4296             GETCHARLEN(fc, Feptr, len);
4297             switch(fc)
4298               {
4299               VSPACE_CASES: gotspace = TRUE; break;
4300               default: gotspace = FALSE; break;
4301               }
4302             if (gotspace == (Lctype == OP_NOT_VSPACE)) break;
4303             Feptr += len;
4304             }
4305           break;
4306 
4307           case OP_NOT_DIGIT:
4308           for (i = Lmin; i < Lmax; i++)
4309             {
4310             int len = 1;
4311             if (Feptr >= mb->end_subject)
4312               {
4313               SCHECK_PARTIAL();
4314               break;
4315               }
4316             GETCHARLEN(fc, Feptr, len);
4317             if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0) break;
4318             Feptr+= len;
4319             }
4320           break;
4321 
4322           case OP_DIGIT:
4323           for (i = Lmin; i < Lmax; i++)
4324             {
4325             int len = 1;
4326             if (Feptr >= mb->end_subject)
4327               {
4328               SCHECK_PARTIAL();
4329               break;
4330               }
4331             GETCHARLEN(fc, Feptr, len);
4332             if (fc >= 256 ||(mb->ctypes[fc] & ctype_digit) == 0) break;
4333             Feptr+= len;
4334             }
4335           break;
4336 
4337           case OP_NOT_WHITESPACE:
4338           for (i = Lmin; i < Lmax; i++)
4339             {
4340             int len = 1;
4341             if (Feptr >= mb->end_subject)
4342               {
4343               SCHECK_PARTIAL();
4344               break;
4345               }
4346             GETCHARLEN(fc, Feptr, len);
4347             if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0) break;
4348             Feptr+= len;
4349             }
4350           break;
4351 
4352           case OP_WHITESPACE:
4353           for (i = Lmin; i < Lmax; i++)
4354             {
4355             int len = 1;
4356             if (Feptr >= mb->end_subject)
4357               {
4358               SCHECK_PARTIAL();
4359               break;
4360               }
4361             GETCHARLEN(fc, Feptr, len);
4362             if (fc >= 256 ||(mb->ctypes[fc] & ctype_space) == 0) break;
4363             Feptr+= len;
4364             }
4365           break;
4366 
4367           case OP_NOT_WORDCHAR:
4368           for (i = Lmin; i < Lmax; i++)
4369             {
4370             int len = 1;
4371             if (Feptr >= mb->end_subject)
4372               {
4373               SCHECK_PARTIAL();
4374               break;
4375               }
4376             GETCHARLEN(fc, Feptr, len);
4377             if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0) break;
4378             Feptr+= len;
4379             }
4380           break;
4381 
4382           case OP_WORDCHAR:
4383           for (i = Lmin; i < Lmax; i++)
4384             {
4385             int len = 1;
4386             if (Feptr >= mb->end_subject)
4387               {
4388               SCHECK_PARTIAL();
4389               break;
4390               }
4391             GETCHARLEN(fc, Feptr, len);
4392             if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0) break;
4393             Feptr+= len;
4394             }
4395           break;
4396 
4397           default:
4398           return PCRE2_ERROR_INTERNAL;
4399           }
4400 
4401         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4402 
4403         /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4404         Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't go
4405         too far. */
4406 
4407         for(;;)
4408           {
4409           if (Feptr <= Lstart_eptr) break;
4410           RMATCH(Fecode, RM221);
4411           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4412           Feptr--;
4413           BACKCHAR(Feptr);
4414           if (Lctype == OP_ANYNL && Feptr > Lstart_eptr &&
4415               UCHAR21(Feptr) == CHAR_NL && UCHAR21(Feptr - 1) == CHAR_CR)
4416             Feptr--;
4417           }
4418         }
4419       else
4420 #endif  /* SUPPORT_UNICODE */
4421 
4422       /* Not UTF mode */
4423         {
4424         switch(Lctype)
4425           {
4426           case OP_ANY:
4427           for (i = Lmin; i < Lmax; i++)
4428             {
4429             if (Feptr >= mb->end_subject)
4430               {
4431               SCHECK_PARTIAL();
4432               break;
4433               }
4434             if (IS_NEWLINE(Feptr)) break;
4435             if (mb->partial != 0 &&    /* Take care with CRLF partial */
4436                 Feptr + 1 >= mb->end_subject &&
4437                 NLBLOCK->nltype == NLTYPE_FIXED &&
4438                 NLBLOCK->nllen == 2 &&
4439                 *Feptr == NLBLOCK->nl[0])
4440               {
4441               mb->hitend = TRUE;
4442               if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4443               }
4444             Feptr++;
4445             }
4446           break;
4447 
4448           case OP_ALLANY:
4449           case OP_ANYBYTE:
4450           fc = Lmax - Lmin;
4451           if (fc > (uint32_t)(mb->end_subject - Feptr))
4452             {
4453             Feptr = mb->end_subject;
4454             SCHECK_PARTIAL();
4455             }
4456           else Feptr += fc;
4457           break;
4458 
4459           case OP_ANYNL:
4460           for (i = Lmin; i < Lmax; i++)
4461             {
4462             if (Feptr >= mb->end_subject)
4463               {
4464               SCHECK_PARTIAL();
4465               break;
4466               }
4467             fc = *Feptr;
4468             if (fc == CHAR_CR)
4469               {
4470               if (++Feptr >= mb->end_subject) break;
4471               if (*Feptr == CHAR_LF) Feptr++;
4472               }
4473             else
4474               {
4475               if (fc != CHAR_LF && (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4476                  (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4477 #if PCRE2_CODE_UNIT_WIDTH != 8
4478                  && fc != 0x2028 && fc != 0x2029
4479 #endif
4480                  ))) break;
4481               Feptr++;
4482               }
4483             }
4484           break;
4485 
4486           case OP_NOT_HSPACE:
4487           for (i = Lmin; i < Lmax; i++)
4488             {
4489             if (Feptr >= mb->end_subject)
4490               {
4491               SCHECK_PARTIAL();
4492               break;
4493               }
4494             switch(*Feptr)
4495               {
4496               default: Feptr++; break;
4497               HSPACE_BYTE_CASES:
4498 #if PCRE2_CODE_UNIT_WIDTH != 8
4499               HSPACE_MULTIBYTE_CASES:
4500 #endif
4501               goto ENDLOOP00;
4502               }
4503             }
4504           ENDLOOP00:
4505           break;
4506 
4507           case OP_HSPACE:
4508           for (i = Lmin; i < Lmax; i++)
4509             {
4510             if (Feptr >= mb->end_subject)
4511               {
4512               SCHECK_PARTIAL();
4513               break;
4514               }
4515             switch(*Feptr)
4516               {
4517               default: goto ENDLOOP01;
4518               HSPACE_BYTE_CASES:
4519 #if PCRE2_CODE_UNIT_WIDTH != 8
4520               HSPACE_MULTIBYTE_CASES:
4521 #endif
4522               Feptr++; break;
4523               }
4524             }
4525           ENDLOOP01:
4526           break;
4527 
4528           case OP_NOT_VSPACE:
4529           for (i = Lmin; i < Lmax; i++)
4530             {
4531             if (Feptr >= mb->end_subject)
4532               {
4533               SCHECK_PARTIAL();
4534               break;
4535               }
4536             switch(*Feptr)
4537               {
4538               default: Feptr++; break;
4539               VSPACE_BYTE_CASES:
4540 #if PCRE2_CODE_UNIT_WIDTH != 8
4541               VSPACE_MULTIBYTE_CASES:
4542 #endif
4543               goto ENDLOOP02;
4544               }
4545             }
4546           ENDLOOP02:
4547           break;
4548 
4549           case OP_VSPACE:
4550           for (i = Lmin; i < Lmax; i++)
4551             {
4552             if (Feptr >= mb->end_subject)
4553               {
4554               SCHECK_PARTIAL();
4555               break;
4556               }
4557             switch(*Feptr)
4558               {
4559               default: goto ENDLOOP03;
4560               VSPACE_BYTE_CASES:
4561 #if PCRE2_CODE_UNIT_WIDTH != 8
4562               VSPACE_MULTIBYTE_CASES:
4563 #endif
4564               Feptr++; break;
4565               }
4566             }
4567           ENDLOOP03:
4568           break;
4569 
4570           case OP_NOT_DIGIT:
4571           for (i = Lmin; i < Lmax; i++)
4572             {
4573             if (Feptr >= mb->end_subject)
4574               {
4575               SCHECK_PARTIAL();
4576               break;
4577               }
4578             if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
4579               break;
4580             Feptr++;
4581             }
4582           break;
4583 
4584           case OP_DIGIT:
4585           for (i = Lmin; i < Lmax; i++)
4586             {
4587             if (Feptr >= mb->end_subject)
4588               {
4589               SCHECK_PARTIAL();
4590               break;
4591               }
4592             if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
4593               break;
4594             Feptr++;
4595             }
4596           break;
4597 
4598           case OP_NOT_WHITESPACE:
4599           for (i = Lmin; i < Lmax; i++)
4600             {
4601             if (Feptr >= mb->end_subject)
4602               {
4603               SCHECK_PARTIAL();
4604               break;
4605               }
4606             if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
4607               break;
4608             Feptr++;
4609             }
4610           break;
4611 
4612           case OP_WHITESPACE:
4613           for (i = Lmin; i < Lmax; i++)
4614             {
4615             if (Feptr >= mb->end_subject)
4616               {
4617               SCHECK_PARTIAL();
4618               break;
4619               }
4620             if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
4621               break;
4622             Feptr++;
4623             }
4624           break;
4625 
4626           case OP_NOT_WORDCHAR:
4627           for (i = Lmin; i < Lmax; i++)
4628             {
4629             if (Feptr >= mb->end_subject)
4630               {
4631               SCHECK_PARTIAL();
4632               break;
4633               }
4634             if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
4635               break;
4636             Feptr++;
4637             }
4638           break;
4639 
4640           case OP_WORDCHAR:
4641           for (i = Lmin; i < Lmax; i++)
4642             {
4643             if (Feptr >= mb->end_subject)
4644               {
4645               SCHECK_PARTIAL();
4646               break;
4647               }
4648             if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
4649               break;
4650             Feptr++;
4651             }
4652           break;
4653 
4654           default:
4655           return PCRE2_ERROR_INTERNAL;
4656           }
4657 
4658         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4659 
4660         for (;;)
4661           {
4662           if (Feptr == Lstart_eptr) break;
4663           RMATCH(Fecode, RM34);
4664           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4665           Feptr--;
4666           if (Lctype == OP_ANYNL && Feptr > Lstart_eptr && *Feptr == CHAR_LF &&
4667               Feptr[-1] == CHAR_CR) Feptr--;
4668           }
4669         }
4670       }
4671     break;  /* End of repeat character type processing */
4672 
4673 #undef Lstart_eptr
4674 #undef Lmin
4675 #undef Lmax
4676 #undef Lctype
4677 #undef Lpropvalue
4678 
4679 
4680     /* ===================================================================== */
4681     /* Match a back reference, possibly repeatedly. Look past the end of the
4682     item to see if there is repeat information following. The OP_REF and
4683     OP_REFI opcodes are used for a reference to a numbered group or to a
4684     non-duplicated named group. For a duplicated named group, OP_DNREF and
4685     OP_DNREFI are used. In this case we must scan the list of groups to which
4686     the name refers, and use the first one that is set. */
4687 
4688 #define Lmin      F->temp_32[0]
4689 #define Lmax      F->temp_32[1]
4690 #define Lcaseless F->temp_32[2]
4691 #define Lstart    F->temp_sptr[0]
4692 #define Loffset   F->temp_size
4693 
4694     case OP_DNREF:
4695     case OP_DNREFI:
4696     Lcaseless = (Fop == OP_DNREFI);
4697       {
4698       int count = GET2(Fecode, 1+IMM2_SIZE);
4699       PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
4700       Fecode += 1 + 2*IMM2_SIZE;
4701 
4702       while (count-- > 0)
4703         {
4704         Loffset = (GET2(slot, 0) << 1) - 2;
4705         if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET) break;
4706         slot += mb->name_entry_size;
4707         }
4708       }
4709     goto REF_REPEAT;
4710 
4711     case OP_REF:
4712     case OP_REFI:
4713     Lcaseless = (Fop == OP_REFI);
4714     Loffset = (GET2(Fecode, 1) << 1) - 2;
4715     Fecode += 1 + IMM2_SIZE;
4716 
4717     /* Set up for repetition, or handle the non-repeated case. The maximum and
4718     minimum must be in the heap frame, but as they are short-term values, we
4719     use temporary fields. */
4720 
4721     REF_REPEAT:
4722     switch (*Fecode)
4723       {
4724       case OP_CRSTAR:
4725       case OP_CRMINSTAR:
4726       case OP_CRPLUS:
4727       case OP_CRMINPLUS:
4728       case OP_CRQUERY:
4729       case OP_CRMINQUERY:
4730       fc = *Fecode++ - OP_CRSTAR;
4731       Lmin = rep_min[fc];
4732       Lmax = rep_max[fc];
4733       reptype = rep_typ[fc];
4734       break;
4735 
4736       case OP_CRRANGE:
4737       case OP_CRMINRANGE:
4738       Lmin = GET2(Fecode, 1);
4739       Lmax = GET2(Fecode, 1 + IMM2_SIZE);
4740       reptype = rep_typ[*Fecode - OP_CRSTAR];
4741       if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
4742       Fecode += 1 + 2 * IMM2_SIZE;
4743       break;
4744 
4745       default:                  /* No repeat follows */
4746         {
4747         rrc = match_ref(Loffset, Lcaseless, F, mb, &length);
4748         if (rrc != 0)
4749           {
4750           if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
4751           CHECK_PARTIAL();
4752           RRETURN(MATCH_NOMATCH);
4753           }
4754         }
4755       Feptr += length;
4756       continue;              /* With the main loop */
4757       }
4758 
4759     /* Handle repeated back references. If a set group has length zero, just
4760     continue with the main loop, because it matches however many times. For an
4761     unset reference, if the minimum is zero, we can also just continue. We can
4762     also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
4763     group behave as a zero-length group. For any other unset cases, carrying
4764     on will result in NOMATCH. */
4765 
4766     if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET)
4767       {
4768       if (Fovector[Loffset] == Fovector[Loffset + 1]) continue;
4769       }
4770     else  /* Group is not set */
4771       {
4772       if (Lmin == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
4773         continue;
4774       }
4775 
4776     /* First, ensure the minimum number of matches are present. */
4777 
4778     for (i = 1; i <= Lmin; i++)
4779       {
4780       PCRE2_SIZE slength;
4781       rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
4782       if (rrc != 0)
4783         {
4784         if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
4785         CHECK_PARTIAL();
4786         RRETURN(MATCH_NOMATCH);
4787         }
4788       Feptr += slength;
4789       }
4790 
4791     /* If min = max, we are done. They are not both allowed to be zero. */
4792 
4793     if (Lmin == Lmax) continue;
4794 
4795     /* If minimizing, keep trying and advancing the pointer. */
4796 
4797     if (reptype == REPTYPE_MIN)
4798       {
4799       for (;;)
4800         {
4801         PCRE2_SIZE slength;
4802         RMATCH(Fecode, RM20);
4803         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4804         if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
4805         rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
4806         if (rrc != 0)
4807           {
4808           if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
4809           CHECK_PARTIAL();
4810           RRETURN(MATCH_NOMATCH);
4811           }
4812         Feptr += slength;
4813         }
4814       /* Control never gets here */
4815       }
4816 
4817     /* If maximizing, find the longest string and work backwards, as long as
4818     the matched lengths for each iteration are the same. */
4819 
4820     else
4821       {
4822       BOOL samelengths = TRUE;
4823       Lstart = Feptr;     /* Starting position */
4824       Flength = Fovector[Loffset+1] - Fovector[Loffset];
4825 
4826       for (i = Lmin; i < Lmax; i++)
4827         {
4828         PCRE2_SIZE slength;
4829         rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
4830         if (rrc != 0)
4831           {
4832           /* Can't use CHECK_PARTIAL because we don't want to update Feptr in
4833           the soft partial matching case. */
4834 
4835           if (rrc > 0 && mb->partial != 0 &&
4836               mb->end_subject > mb->start_used_ptr)
4837             {
4838             mb->hitend = TRUE;
4839             if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4840             }
4841           break;
4842           }
4843 
4844         if (slength != Flength) samelengths = FALSE;
4845         Feptr += slength;
4846         }
4847 
4848       /* If the length matched for each repetition is the same as the length of
4849       the captured group, we can easily work backwards. This is the normal
4850       case. However, in caseless UTF-8 mode there are pairs of case-equivalent
4851       characters whose lengths (in terms of code units) differ. However, this
4852       is very rare, so we handle it by re-matching fewer and fewer times. */
4853 
4854       if (samelengths)
4855         {
4856         while (Feptr >= Lstart)
4857           {
4858           RMATCH(Fecode, RM21);
4859           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4860           Feptr -= Flength;
4861           }
4862         }
4863 
4864       /* The rare case of non-matching lengths. Re-scan the repetition for each
4865       iteration. We know that match_ref() will succeed every time. */
4866 
4867       else
4868         {
4869         Lmax = i;
4870         for (;;)
4871           {
4872           RMATCH(Fecode, RM22);
4873           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4874           if (Feptr == Lstart) break; /* Failed after minimal repetition */
4875           Feptr = Lstart;
4876           Lmax--;
4877           for (i = Lmin; i < Lmax; i++)
4878             {
4879             PCRE2_SIZE slength;
4880             (void)match_ref(Loffset, Lcaseless, F, mb, &slength);
4881             Feptr += slength;
4882             }
4883           }
4884         }
4885 
4886       RRETURN(MATCH_NOMATCH);
4887       }
4888     /* Control never gets here */
4889 
4890 #undef Lcaseless
4891 #undef Lmin
4892 #undef Lmax
4893 #undef Lstart
4894 #undef Loffset
4895 
4896 
4897 
4898 /* ========================================================================= */
4899 /*           Opcodes for the start of various parenthesized items            */
4900 /* ========================================================================= */
4901 
4902     /* In all cases, if the result of RMATCH() is MATCH_THEN, check whether the
4903     (*THEN) is within the current branch by comparing the address of OP_THEN
4904     that is passed back with the end of the branch. If (*THEN) is within the
4905     current branch, and the branch is one of two or more alternatives (it
4906     either starts or ends with OP_ALT), we have reached the limit of THEN's
4907     action, so convert the return code to NOMATCH, which will cause normal
4908     backtracking to happen from now on. Otherwise, THEN is passed back to an
4909     outer alternative. This implements Perl's treatment of parenthesized
4910     groups, where a group not containing | does not affect the current
4911     alternative, that is, (X) is NOT the same as (X|(*F)). */
4912 
4913 
4914     /* ===================================================================== */
4915     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a non-possessive
4916     bracket group, indicating that it may occur zero times. It may repeat
4917     infinitely, or not at all - i.e. it could be ()* or ()? or even (){0} in
4918     the pattern. Brackets with fixed upper repeat limits are compiled as a
4919     number of copies, with the optional ones preceded by BRAZERO or BRAMINZERO.
4920     Possessive groups with possible zero repeats are preceded by BRAPOSZERO. */
4921 
4922 #define Lnext_ecode F->temp_sptr[0]
4923 
4924     case OP_BRAZERO:
4925     Lnext_ecode = Fecode + 1;
4926     RMATCH(Lnext_ecode, RM9);
4927     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4928     do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
4929     Fecode = Lnext_ecode + 1 + LINK_SIZE;
4930     break;
4931 
4932     case OP_BRAMINZERO:
4933     Lnext_ecode = Fecode + 1;
4934     do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
4935     RMATCH(Lnext_ecode + 1 + LINK_SIZE, RM10);
4936     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4937     Fecode++;
4938     break;
4939 
4940 #undef Lnext_ecode
4941 
4942     case OP_SKIPZERO:
4943     Fecode++;
4944     do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
4945     Fecode += 1 + LINK_SIZE;
4946     break;
4947 
4948 
4949     /* ===================================================================== */
4950     /* Handle possessive brackets with an unlimited repeat. The end of these
4951     brackets will always be OP_KETRPOS, which returns MATCH_KETRPOS without
4952     going further in the pattern. */
4953 
4954 #define Lframe_type    F->temp_32[0]
4955 #define Lmatched_once  F->temp_32[1]
4956 #define Lzero_allowed  F->temp_32[2]
4957 #define Lstart_eptr    F->temp_sptr[0]
4958 #define Lstart_group   F->temp_sptr[1]
4959 
4960     case OP_BRAPOSZERO:
4961     Lzero_allowed = TRUE;                /* Zero repeat is allowed */
4962     Fecode += 1;
4963     if (*Fecode == OP_CBRAPOS || *Fecode == OP_SCBRAPOS)
4964       goto POSSESSIVE_CAPTURE;
4965     goto POSSESSIVE_NON_CAPTURE;
4966 
4967     case OP_BRAPOS:
4968     case OP_SBRAPOS:
4969     Lzero_allowed = FALSE;               /* Zero repeat not allowed */
4970 
4971     POSSESSIVE_NON_CAPTURE:
4972     Lframe_type = GF_NOCAPTURE;          /* Remembered frame type */
4973     goto POSSESSIVE_GROUP;
4974 
4975     case OP_CBRAPOS:
4976     case OP_SCBRAPOS:
4977     Lzero_allowed = FALSE;               /* Zero repeat not allowed */
4978 
4979     POSSESSIVE_CAPTURE:
4980     number = GET2(Fecode, 1+LINK_SIZE);
4981     Lframe_type = GF_CAPTURE | number;   /* Remembered frame type */
4982 
4983     POSSESSIVE_GROUP:
4984     Lmatched_once = FALSE;               /* Never matched */
4985     Lstart_group = Fecode;               /* Start of this group */
4986 
4987     for (;;)
4988       {
4989       Lstart_eptr = Feptr;               /* Position at group start */
4990       group_frame_type = Lframe_type;
4991       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM8);
4992       if (rrc == MATCH_KETRPOS)
4993         {
4994         Lmatched_once = TRUE;            /* Matched at least once */
4995         if (Feptr == Lstart_eptr)        /* Empty match; skip to end */
4996           {
4997           do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
4998           break;
4999           }
5000 
5001         Fecode = Lstart_group;
5002         continue;
5003         }
5004 
5005       /* See comment above about handling THEN. */
5006 
5007       if (rrc == MATCH_THEN)
5008         {
5009         PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5010         if (mb->verb_ecode_ptr < next_ecode &&
5011             (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5012           rrc = MATCH_NOMATCH;
5013         }
5014 
5015       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5016       Fecode += GET(Fecode, 1);
5017       if (*Fecode != OP_ALT) break;
5018       }
5019 
5020     /* Success if matched something or zero repeat allowed */
5021 
5022     if (Lmatched_once || Lzero_allowed)
5023       {
5024       Fecode += 1 + LINK_SIZE;
5025       break;
5026       }
5027 
5028     RRETURN(MATCH_NOMATCH);
5029 
5030 #undef Lmatched_once
5031 #undef Lzero_allowed
5032 #undef Lframe_type
5033 #undef Lstart_eptr
5034 #undef Lstart_group
5035 
5036 
5037     /* ===================================================================== */
5038     /* Handle non-capturing brackets that cannot match an empty string. When we
5039     get to the final alternative within the brackets, as long as there are no
5040     THEN's in the pattern, we can optimize by not recording a new backtracking
5041     point. (Ideally we should test for a THEN within this group, but we don't
5042     have that information.) Don't do this if we are at the very top level,
5043     however, because that would make handling assertions and once-only brackets
5044     messier when there is nothing to go back to. */
5045 
5046 #define Lframe_type F->temp_32[0]     /* Set for all that use GROUPLOOP */
5047 #define Lnext_branch F->temp_sptr[0]  /* Used only in OP_BRA handling */
5048 
5049     case OP_BRA:
5050     if (mb->hasthen || Frdepth == 0)
5051       {
5052       Lframe_type = 0;
5053       goto GROUPLOOP;
5054       }
5055 
5056     for (;;)
5057       {
5058       Lnext_branch = Fecode + GET(Fecode, 1);
5059       if (*Lnext_branch != OP_ALT) break;
5060 
5061       /* This is never the final branch. We do not need to test for MATCH_THEN
5062       here because this code is not used when there is a THEN in the pattern. */
5063 
5064       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM1);
5065       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5066       Fecode = Lnext_branch;
5067       }
5068 
5069     /* Hit the start of the final branch. Continue at this level. */
5070 
5071     Fecode += PRIV(OP_lengths)[*Fecode];
5072     break;
5073 
5074 #undef Lnext_branch
5075 
5076 
5077     /* ===================================================================== */
5078     /* Handle a capturing bracket, other than those that are possessive with an
5079     unlimited repeat. */
5080 
5081     case OP_CBRA:
5082     case OP_SCBRA:
5083     Lframe_type = GF_CAPTURE | GET2(Fecode, 1+LINK_SIZE);
5084     goto GROUPLOOP;
5085 
5086 
5087     /* ===================================================================== */
5088     /* Atomic groups and non-capturing brackets that can match an empty string
5089     must record a backtracking point and also set up a chained frame. */
5090 
5091     case OP_ONCE:
5092     case OP_SCRIPT_RUN:
5093     case OP_SBRA:
5094     Lframe_type = GF_NOCAPTURE | Fop;
5095 
5096     GROUPLOOP:
5097     for (;;)
5098       {
5099       group_frame_type = Lframe_type;
5100       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM2);
5101       if (rrc == MATCH_THEN)
5102         {
5103         PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5104         if (mb->verb_ecode_ptr < next_ecode &&
5105             (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5106           rrc = MATCH_NOMATCH;
5107         }
5108       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5109       Fecode += GET(Fecode, 1);
5110       if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5111       }
5112     /* Control never reaches here. */
5113 
5114 #undef Lframe_type
5115 
5116 
5117     /* ===================================================================== */
5118     /* Recursion either matches the current regex, or some subexpression. The
5119     offset data is the offset to the starting bracket from the start of the
5120     whole pattern. (This is so that it works from duplicated subpatterns.) */
5121 
5122 #define Lframe_type F->temp_32[0]
5123 #define Lstart_branch F->temp_sptr[0]
5124 
5125     case OP_RECURSE:
5126     bracode = mb->start_code + GET(Fecode, 1);
5127     number = (bracode == mb->start_code)? 0 : GET2(bracode, 1 + LINK_SIZE);
5128 
5129     /* If we are already in a recursion, check for repeating the same one
5130     without advancing the subject pointer. This should catch convoluted mutual
5131     recursions. (Some simple cases are caught at compile time.) */
5132 
5133     if (Fcurrent_recurse != RECURSE_UNSET)
5134       {
5135       offset = Flast_group_offset;
5136       while (offset != PCRE2_UNSET)
5137         {
5138         N = (heapframe *)((char *)mb->match_frames + offset);
5139         P = (heapframe *)((char *)N - frame_size);
5140         if (N->group_frame_type == (GF_RECURSE | number))
5141           {
5142           if (Feptr == P->eptr) return PCRE2_ERROR_RECURSELOOP;
5143           break;
5144           }
5145         offset = P->last_group_offset;
5146         }
5147       }
5148 
5149     /* Now run the recursion, branch by branch. */
5150 
5151     Lstart_branch = bracode;
5152     Lframe_type = GF_RECURSE | number;
5153 
5154     for (;;)
5155       {
5156       PCRE2_SPTR next_ecode;
5157 
5158       group_frame_type = Lframe_type;
5159       RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM11);
5160       next_ecode = Lstart_branch + GET(Lstart_branch,1);
5161 
5162       /* Handle backtracking verbs, which are defined in a range that can
5163       easily be tested for. PCRE does not allow THEN, SKIP, PRUNE or COMMIT to
5164       escape beyond a recursion; they cause a NOMATCH for the entire recursion.
5165 
5166       When one of these verbs triggers, the current recursion group number is
5167       recorded. If it matches the recursion we are processing, the verb
5168       happened within the recursion and we must deal with it. Otherwise it must
5169       have happened after the recursion completed, and so has to be passed
5170       back. See comment above about handling THEN. */
5171 
5172       if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX &&
5173           mb->verb_current_recurse == (Lframe_type ^ GF_RECURSE))
5174         {
5175         if (rrc == MATCH_THEN && mb->verb_ecode_ptr < next_ecode &&
5176             (*Lstart_branch == OP_ALT || *next_ecode == OP_ALT))
5177           rrc = MATCH_NOMATCH;
5178         else RRETURN(MATCH_NOMATCH);
5179         }
5180 
5181       /* Note that carrying on after (*ACCEPT) in a recursion is handled in the
5182       OP_ACCEPT code. Nothing needs to be done here. */
5183 
5184       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5185       Lstart_branch = next_ecode;
5186       if (*Lstart_branch != OP_ALT) RRETURN(MATCH_NOMATCH);
5187       }
5188     /* Control never reaches here. */
5189 
5190 #undef Lframe_type
5191 #undef Lstart_branch
5192 
5193 
5194     /* ===================================================================== */
5195     /* Positive assertions are like other groups except that PCRE doesn't allow
5196     the effect of (*THEN) to escape beyond an assertion; it is therefore
5197     treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its
5198     captures and mark retained. Any other return is an error. */
5199 
5200 #define Lframe_type  F->temp_32[0]
5201 
5202     case OP_ASSERT:
5203     case OP_ASSERTBACK:
5204     case OP_ASSERT_NA:
5205     case OP_ASSERTBACK_NA:
5206     Lframe_type = GF_NOCAPTURE | Fop;
5207     for (;;)
5208       {
5209       group_frame_type = Lframe_type;
5210       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM3);
5211       if (rrc == MATCH_ACCEPT)
5212         {
5213         memcpy(Fovector,
5214               (char *)assert_accept_frame + offsetof(heapframe, ovector),
5215               assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5216         Foffset_top = assert_accept_frame->offset_top;
5217         Fmark = assert_accept_frame->mark;
5218         break;
5219         }
5220       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
5221       Fecode += GET(Fecode, 1);
5222       if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5223       }
5224 
5225     do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5226     Fecode += 1 + LINK_SIZE;
5227     break;
5228 
5229 #undef Lframe_type
5230 
5231 
5232     /* ===================================================================== */
5233     /* Handle negative assertions. Loop for each non-matching branch as for
5234     positive assertions. */
5235 
5236 #define Lframe_type  F->temp_32[0]
5237 
5238     case OP_ASSERT_NOT:
5239     case OP_ASSERTBACK_NOT:
5240     Lframe_type  = GF_NOCAPTURE | Fop;
5241 
5242     for (;;)
5243       {
5244       group_frame_type = Lframe_type;
5245       RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM4);
5246       switch(rrc)
5247         {
5248         case MATCH_ACCEPT:   /* Assertion matched, therefore it fails. */
5249         case MATCH_MATCH:
5250         RRETURN (MATCH_NOMATCH);
5251 
5252         case MATCH_NOMATCH:  /* Branch failed, try next if present. */
5253         case MATCH_THEN:
5254         Fecode += GET(Fecode, 1);
5255         if (*Fecode != OP_ALT) goto ASSERT_NOT_FAILED;
5256         break;
5257 
5258         case MATCH_COMMIT:   /* Assertion forced to fail, therefore continue. */
5259         case MATCH_SKIP:
5260         case MATCH_PRUNE:
5261         do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5262         goto ASSERT_NOT_FAILED;
5263 
5264         default:             /* Pass back any other return */
5265         RRETURN(rrc);
5266         }
5267       }
5268 
5269     /* None of the branches have matched or there was a backtrack to (*COMMIT),
5270     (*SKIP), (*PRUNE), or (*THEN) in the last branch. This is success for a
5271     negative assertion, so carry on. */
5272 
5273     ASSERT_NOT_FAILED:
5274     Fecode += 1 + LINK_SIZE;
5275     break;
5276 
5277 #undef Lframe_type
5278 
5279 
5280     /* ===================================================================== */
5281     /* The callout item calls an external function, if one is provided, passing
5282     details of the match so far. This is mainly for debugging, though the
5283     function is able to force a failure. */
5284 
5285     case OP_CALLOUT:
5286     case OP_CALLOUT_STR:
5287     rrc = do_callout(F, mb, &length);
5288     if (rrc > 0) RRETURN(MATCH_NOMATCH);
5289     if (rrc < 0) RRETURN(rrc);
5290     Fecode += length;
5291     break;
5292 
5293 
5294     /* ===================================================================== */
5295     /* Conditional group: compilation checked that there are no more than two
5296     branches. If the condition is false, skipping the first branch takes us
5297     past the end of the item if there is only one branch, but that's exactly
5298     what we want. */
5299 
5300     case OP_COND:
5301     case OP_SCOND:
5302 
5303     /* The variable Flength will be added to Fecode when the condition is
5304     false, to get to the second branch. Setting it to the offset to the ALT or
5305     KET, then incrementing Fecode achieves this effect. However, if the second
5306     branch is non-existent, we must point to the KET so that the end of the
5307     group is correctly processed. We now have Fecode pointing to the condition
5308     or callout. */
5309 
5310     Flength = GET(Fecode, 1);    /* Offset to the second branch */
5311     if (Fecode[Flength] != OP_ALT) Flength -= 1 + LINK_SIZE;
5312     Fecode += 1 + LINK_SIZE;     /* From this opcode */
5313 
5314     /* Because of the way auto-callout works during compile, a callout item is
5315     inserted between OP_COND and an assertion condition. Such a callout can
5316     also be inserted manually. */
5317 
5318     if (*Fecode == OP_CALLOUT || *Fecode == OP_CALLOUT_STR)
5319       {
5320       rrc = do_callout(F, mb, &length);
5321       if (rrc > 0) RRETURN(MATCH_NOMATCH);
5322       if (rrc < 0) RRETURN(rrc);
5323 
5324       /* Advance Fecode past the callout, so it now points to the condition. We
5325       must adjust Flength so that the value of Fecode+Flength is unchanged. */
5326 
5327       Fecode += length;
5328       Flength -= length;
5329       }
5330 
5331     /* Test the various possible conditions */
5332 
5333     condition = FALSE;
5334     switch(*Fecode)
5335       {
5336       case OP_RREF:                  /* Group recursion test */
5337       if (Fcurrent_recurse != RECURSE_UNSET)
5338         {
5339         number = GET2(Fecode, 1);
5340         condition = (number == RREF_ANY || number == Fcurrent_recurse);
5341         }
5342       break;
5343 
5344       case OP_DNRREF:       /* Duplicate named group recursion test */
5345       if (Fcurrent_recurse != RECURSE_UNSET)
5346         {
5347         int count = GET2(Fecode, 1 + IMM2_SIZE);
5348         PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5349         while (count-- > 0)
5350           {
5351           number = GET2(slot, 0);
5352           condition = number == Fcurrent_recurse;
5353           if (condition) break;
5354           slot += mb->name_entry_size;
5355           }
5356         }
5357       break;
5358 
5359       case OP_CREF:                         /* Numbered group used test */
5360       offset = (GET2(Fecode, 1) << 1) - 2;  /* Doubled ref number */
5361       condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5362       break;
5363 
5364       case OP_DNCREF:      /* Duplicate named group used test */
5365         {
5366         int count = GET2(Fecode, 1 + IMM2_SIZE);
5367         PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5368         while (count-- > 0)
5369           {
5370           offset = (GET2(slot, 0) << 1) - 2;
5371           condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5372           if (condition) break;
5373           slot += mb->name_entry_size;
5374           }
5375         }
5376       break;
5377 
5378       case OP_FALSE:
5379       case OP_FAIL:   /* The assertion (?!) becomes OP_FAIL */
5380       break;
5381 
5382       case OP_TRUE:
5383       condition = TRUE;
5384       break;
5385 
5386       /* The condition is an assertion. Run code similar to the assertion code
5387       above. */
5388 
5389 #define Lpositive      F->temp_32[0]
5390 #define Lstart_branch  F->temp_sptr[0]
5391 
5392       default:
5393       Lpositive = (*Fecode == OP_ASSERT || *Fecode == OP_ASSERTBACK);
5394       Lstart_branch = Fecode;
5395 
5396       for (;;)
5397         {
5398         group_frame_type = GF_CONDASSERT | *Fecode;
5399         RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM5);
5400 
5401         switch(rrc)
5402           {
5403           case MATCH_ACCEPT:  /* Save captures */
5404           memcpy(Fovector,
5405                 (char *)assert_accept_frame + offsetof(heapframe, ovector),
5406                 assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5407           Foffset_top = assert_accept_frame->offset_top;
5408 
5409           /* Fall through */
5410           /* In the case of a match, the captures have already been put into
5411           the current frame. */
5412 
5413           case MATCH_MATCH:
5414           condition = Lpositive;   /* TRUE for positive assertion */
5415           break;
5416 
5417           /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
5418           assertion; it is therefore always treated as NOMATCH. */
5419 
5420           case MATCH_NOMATCH:
5421           case MATCH_THEN:
5422           Lstart_branch += GET(Lstart_branch, 1);
5423           if (*Lstart_branch == OP_ALT) continue;  /* Try next branch */
5424           condition = !Lpositive;  /* TRUE for negative assertion */
5425           break;
5426 
5427           /* These force no match without checking other branches. */
5428 
5429           case MATCH_COMMIT:
5430           case MATCH_SKIP:
5431           case MATCH_PRUNE:
5432           condition = !Lpositive;
5433           break;
5434 
5435           default:
5436           RRETURN(rrc);
5437           }
5438         break;  /* Out of the branch loop */
5439         }
5440 
5441       /* If the condition is true, find the end of the assertion so that
5442       advancing past it gets us to the start of the first branch. */
5443 
5444       if (condition)
5445         {
5446         do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5447         }
5448       break;  /* End of assertion condition */
5449       }
5450 
5451 #undef Lpositive
5452 #undef Lstart_branch
5453 
5454     /* Choose branch according to the condition. */
5455 
5456     Fecode += condition? PRIV(OP_lengths)[*Fecode] : Flength;
5457 
5458     /* If the opcode is OP_SCOND it means we are at a repeated conditional
5459     group that might match an empty string. We must therefore descend a level
5460     so that the start is remembered for checking. For OP_COND we can just
5461     continue at this level. */
5462 
5463     if (Fop == OP_SCOND)
5464       {
5465       group_frame_type  = GF_NOCAPTURE | Fop;
5466       RMATCH(Fecode, RM35);
5467       RRETURN(rrc);
5468       }
5469     break;
5470 
5471 
5472 
5473 /* ========================================================================= */
5474 /*                  End of start of parenthesis opcodes                      */
5475 /* ========================================================================= */
5476 
5477 
5478     /* ===================================================================== */
5479     /* Move the subject pointer back. This occurs only at the start of each
5480     branch of a lookbehind assertion. If we are too close to the start to move
5481     back, fail. When working with UTF-8 we move back a number of characters,
5482     not bytes. */
5483 
5484     case OP_REVERSE:
5485     number = GET(Fecode, 1);
5486 #ifdef SUPPORT_UNICODE
5487     if (utf)
5488       {
5489       while (number-- > 0)
5490         {
5491         if (Feptr <= mb->check_subject) RRETURN(MATCH_NOMATCH);
5492         Feptr--;
5493         BACKCHAR(Feptr);
5494         }
5495       }
5496     else
5497 #endif
5498 
5499     /* No UTF-8 support, or not in UTF-8 mode: count is code unit count */
5500 
5501       {
5502       if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
5503       Feptr -= number;
5504       }
5505 
5506     /* Save the earliest consulted character, then skip to next opcode */
5507 
5508     if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
5509     Fecode += 1 + LINK_SIZE;
5510     break;
5511 
5512 
5513     /* ===================================================================== */
5514     /* An alternation is the end of a branch; scan along to find the end of the
5515     bracketed group. */
5516 
5517     case OP_ALT:
5518     do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
5519     break;
5520 
5521 
5522     /* ===================================================================== */
5523     /* The end of a parenthesized group. For all but OP_BRA and OP_COND, the
5524     starting frame was added to the chained frames in order to remember the
5525     starting subject position for the group. */
5526 
5527     case OP_KET:
5528     case OP_KETRMIN:
5529     case OP_KETRMAX:
5530     case OP_KETRPOS:
5531 
5532     bracode = Fecode - GET(Fecode, 1);
5533 
5534     /* Point N to the frame at the start of the most recent group.
5535     Remember the subject pointer at the start of the group. */
5536 
5537     if (*bracode != OP_BRA && *bracode != OP_COND)
5538       {
5539       N = (heapframe *)((char *)mb->match_frames + Flast_group_offset);
5540       P = (heapframe *)((char *)N - frame_size);
5541       Flast_group_offset = P->last_group_offset;
5542 
5543 #ifdef DEBUG_SHOW_RMATCH
5544       fprintf(stderr, "++ KET for frame=%d type=%x prev char offset=%lu\n",
5545         N->rdepth, N->group_frame_type,
5546         (char *)P->eptr - (char *)mb->start_subject);
5547 #endif
5548 
5549       /* If we are at the end of an assertion that is a condition, return a
5550       match, discarding any intermediate backtracking points. Copy back the
5551       mark setting and the captures into the frame before N so that they are
5552       set on return. Doing this for all assertions, both positive and negative,
5553       seems to match what Perl does. */
5554 
5555       if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
5556         {
5557         memcpy((char *)P + offsetof(heapframe, ovector), Fovector,
5558           Foffset_top * sizeof(PCRE2_SIZE));
5559         P->offset_top = Foffset_top;
5560         P->mark = Fmark;
5561         Fback_frame = (char *)F - (char *)P;
5562         RRETURN(MATCH_MATCH);
5563         }
5564       }
5565     else P = NULL;   /* Indicates starting frame not recorded */
5566 
5567     /* The group was not a conditional assertion. */
5568 
5569     switch (*bracode)
5570       {
5571       case OP_BRA:    /* No need to do anything for these */
5572       case OP_COND:
5573       case OP_SCOND:
5574       break;
5575 
5576       /* Non-atomic positive assertions are like OP_BRA, except that the
5577       subject pointer must be put back to where it was at the start of the
5578       assertion. */
5579 
5580       case OP_ASSERT_NA:
5581       case OP_ASSERTBACK_NA:
5582       if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
5583       Feptr = P->eptr;
5584       break;
5585 
5586       /* Atomic positive assertions are like OP_ONCE, except that in addition
5587       the subject pointer must be put back to where it was at the start of the
5588       assertion. */
5589 
5590       case OP_ASSERT:
5591       case OP_ASSERTBACK:
5592       if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
5593       Feptr = P->eptr;
5594       /* Fall through */
5595 
5596       /* For an atomic group, discard internal backtracking points. We must
5597       also ensure that any remaining branches within the top-level of the group
5598       are not tried. Do this by adjusting the code pointer within the backtrack
5599       frame so that it points to the final branch. */
5600 
5601       case OP_ONCE:
5602       Fback_frame = ((char *)F - (char *)P);
5603       for (;;)
5604         {
5605         uint32_t y = GET(P->ecode,1);
5606         if ((P->ecode)[y] != OP_ALT) break;
5607         P->ecode += y;
5608         }
5609       break;
5610 
5611       /* A matching negative assertion returns MATCH, which is turned into
5612       NOMATCH at the assertion level. */
5613 
5614       case OP_ASSERT_NOT:
5615       case OP_ASSERTBACK_NOT:
5616       RRETURN(MATCH_MATCH);
5617 
5618       /* At the end of a script run, apply the script-checking rules. This code
5619       will never by exercised if Unicode support it not compiled, because in
5620       that environment script runs cause an error at compile time. */
5621 
5622       case OP_SCRIPT_RUN:
5623       if (!PRIV(script_run)(P->eptr, Feptr, utf)) RRETURN(MATCH_NOMATCH);
5624       break;
5625 
5626       /* Whole-pattern recursion is coded as a recurse into group 0, so it
5627       won't be picked up here. Instead, we catch it when the OP_END is reached.
5628       Other recursion is handled here. */
5629 
5630       case OP_CBRA:
5631       case OP_CBRAPOS:
5632       case OP_SCBRA:
5633       case OP_SCBRAPOS:
5634       number = GET2(bracode, 1+LINK_SIZE);
5635 
5636       /* Handle a recursively called group. We reinstate the previous set of
5637       captures and then carry on after the recursion call. */
5638 
5639       if (Fcurrent_recurse == number)
5640         {
5641         P = (heapframe *)((char *)N - frame_size);
5642         memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
5643           Foffset_top * sizeof(PCRE2_SIZE));
5644         Foffset_top = P->offset_top;
5645         Fcapture_last = P->capture_last;
5646         Fcurrent_recurse = P->current_recurse;
5647         Fecode = P->ecode + 1 + LINK_SIZE;
5648         continue;  /* With next opcode */
5649         }
5650 
5651       /* Deal with actual capturing. */
5652 
5653       offset = (number << 1) - 2;
5654       Fcapture_last = number;
5655       Fovector[offset] = P->eptr - mb->start_subject;
5656       Fovector[offset+1] = Feptr - mb->start_subject;
5657       if (offset >= Foffset_top) Foffset_top = offset + 2;
5658       break;
5659       }  /* End actions relating to the starting opcode */
5660 
5661     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
5662     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
5663     at a time from the outer level. This must precede the empty string test -
5664     in this case that test is done at the outer level. */
5665 
5666     if (*Fecode == OP_KETRPOS)
5667       {
5668       memcpy((char *)P + offsetof(heapframe, eptr),
5669              (char *)F + offsetof(heapframe, eptr),
5670              frame_copy_size);
5671       RRETURN(MATCH_KETRPOS);
5672       }
5673 
5674     /* Handle the different kinds of closing brackets. A non-repeating ket
5675     needs no special action, just continuing at this level. This also happens
5676     for the repeating kets if the group matched no characters, in order to
5677     forcibly break infinite loops. Otherwise, the repeating kets try the rest
5678     of the pattern or restart from the preceding bracket, in the appropriate
5679     order. */
5680 
5681     if (Fop != OP_KET && (P == NULL || Feptr != P->eptr))
5682       {
5683       if (Fop == OP_KETRMIN)
5684         {
5685         RMATCH(Fecode + 1 + LINK_SIZE, RM6);
5686         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5687         Fecode -= GET(Fecode, 1);
5688         break;   /* End of ket processing */
5689         }
5690 
5691       /* Repeat the maximum number of times (KETRMAX) */
5692 
5693       RMATCH(bracode, RM7);
5694       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5695       }
5696 
5697     /* Carry on at this level for a non-repeating ket, or after matching an
5698     empty string, or after repeating for a maximum number of times. */
5699 
5700     Fecode += 1 + LINK_SIZE;
5701     break;
5702 
5703 
5704     /* ===================================================================== */
5705     /* Start and end of line assertions, not multiline mode. */
5706 
5707     case OP_CIRC:   /* Start of line, unless PCRE2_NOTBOL is set. */
5708     if (Feptr != mb->start_subject || (mb->moptions & PCRE2_NOTBOL) != 0)
5709       RRETURN(MATCH_NOMATCH);
5710     Fecode++;
5711     break;
5712 
5713     case OP_SOD:    /* Unconditional start of subject */
5714     if (Feptr != mb->start_subject) RRETURN(MATCH_NOMATCH);
5715     Fecode++;
5716     break;
5717 
5718     /* When PCRE2_NOTEOL is unset, assert before the subject end, or a
5719     terminating newline unless PCRE2_DOLLAR_ENDONLY is set. */
5720 
5721     case OP_DOLL:
5722     if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
5723     if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
5724 
5725     /* Fall through */
5726     /* Unconditional end of subject assertion (\z) */
5727 
5728     case OP_EOD:
5729     if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
5730     if (mb->partial != 0)
5731       {
5732       mb->hitend = TRUE;
5733       if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5734       }
5735     Fecode++;
5736     break;
5737 
5738     /* End of subject or ending \n assertion (\Z) */
5739 
5740     case OP_EODN:
5741     ASSERT_NL_OR_EOS:
5742     if (Feptr < mb->end_subject &&
5743         (!IS_NEWLINE(Feptr) || Feptr != mb->end_subject - mb->nllen))
5744       {
5745       if (mb->partial != 0 &&
5746           Feptr + 1 >= mb->end_subject &&
5747           NLBLOCK->nltype == NLTYPE_FIXED &&
5748           NLBLOCK->nllen == 2 &&
5749           UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
5750         {
5751         mb->hitend = TRUE;
5752         if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5753         }
5754       RRETURN(MATCH_NOMATCH);
5755       }
5756 
5757     /* Either at end of string or \n before end. */
5758 
5759     if (mb->partial != 0)
5760       {
5761       mb->hitend = TRUE;
5762       if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5763       }
5764     Fecode++;
5765     break;
5766 
5767 
5768     /* ===================================================================== */
5769     /* Start and end of line assertions, multiline mode. */
5770 
5771     /* Start of subject unless notbol, or after any newline except for one at
5772     the very end, unless PCRE2_ALT_CIRCUMFLEX is set. */
5773 
5774     case OP_CIRCM:
5775     if ((mb->moptions & PCRE2_NOTBOL) != 0 && Feptr == mb->start_subject)
5776       RRETURN(MATCH_NOMATCH);
5777     if (Feptr != mb->start_subject &&
5778         ((Feptr == mb->end_subject &&
5779            (mb->poptions & PCRE2_ALT_CIRCUMFLEX) == 0) ||
5780          !WAS_NEWLINE(Feptr)))
5781       RRETURN(MATCH_NOMATCH);
5782     Fecode++;
5783     break;
5784 
5785     /* Assert before any newline, or before end of subject unless noteol is
5786     set. */
5787 
5788     case OP_DOLLM:
5789     if (Feptr < mb->end_subject)
5790       {
5791       if (!IS_NEWLINE(Feptr))
5792         {
5793         if (mb->partial != 0 &&
5794             Feptr + 1 >= mb->end_subject &&
5795             NLBLOCK->nltype == NLTYPE_FIXED &&
5796             NLBLOCK->nllen == 2 &&
5797             UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
5798           {
5799           mb->hitend = TRUE;
5800           if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5801           }
5802         RRETURN(MATCH_NOMATCH);
5803         }
5804       }
5805     else
5806       {
5807       if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
5808       SCHECK_PARTIAL();
5809       }
5810     Fecode++;
5811     break;
5812 
5813 
5814     /* ===================================================================== */
5815     /* Start of match assertion */
5816 
5817     case OP_SOM:
5818     if (Feptr != mb->start_subject + mb->start_offset) RRETURN(MATCH_NOMATCH);
5819     Fecode++;
5820     break;
5821 
5822 
5823     /* ===================================================================== */
5824     /* Reset the start of match point */
5825 
5826     case OP_SET_SOM:
5827     Fstart_match = Feptr;
5828     Fecode++;
5829     break;
5830 
5831 
5832     /* ===================================================================== */
5833     /* Word boundary assertions. Find out if the previous and current
5834     characters are "word" characters. It takes a bit more work in UTF mode.
5835     Characters > 255 are assumed to be "non-word" characters when PCRE2_UCP is
5836     not set. When it is set, use Unicode properties if available, even when not
5837     in UTF mode. Remember the earliest and latest consulted characters. */
5838 
5839     case OP_NOT_WORD_BOUNDARY:
5840     case OP_WORD_BOUNDARY:
5841     if (Feptr == mb->check_subject) prev_is_word = FALSE; else
5842       {
5843       PCRE2_SPTR lastptr = Feptr - 1;
5844 #ifdef SUPPORT_UNICODE
5845       if (utf)
5846         {
5847         BACKCHAR(lastptr);
5848         GETCHAR(fc, lastptr);
5849         }
5850       else
5851 #endif  /* SUPPORT_UNICODE */
5852       fc = *lastptr;
5853       if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr;
5854 #ifdef SUPPORT_UNICODE
5855       if ((mb->poptions & PCRE2_UCP) != 0)
5856         {
5857         if (fc == '_') prev_is_word = TRUE; else
5858           {
5859           int cat = UCD_CATEGORY(fc);
5860           prev_is_word = (cat == ucp_L || cat == ucp_N);
5861           }
5862         }
5863       else
5864 #endif  /* SUPPORT_UNICODE */
5865       prev_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
5866       }
5867 
5868     /* Get status of next character */
5869 
5870     if (Feptr >= mb->end_subject)
5871       {
5872       SCHECK_PARTIAL();
5873       cur_is_word = FALSE;
5874       }
5875     else
5876       {
5877       PCRE2_SPTR nextptr = Feptr + 1;
5878 #ifdef SUPPORT_UNICODE
5879       if (utf)
5880         {
5881         FORWARDCHARTEST(nextptr, mb->end_subject);
5882         GETCHAR(fc, Feptr);
5883         }
5884       else
5885 #endif  /* SUPPORT_UNICODE */
5886       fc = *Feptr;
5887       if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
5888 #ifdef SUPPORT_UNICODE
5889       if ((mb->poptions & PCRE2_UCP) != 0)
5890         {
5891         if (fc == '_') cur_is_word = TRUE; else
5892           {
5893           int cat = UCD_CATEGORY(fc);
5894           cur_is_word = (cat == ucp_L || cat == ucp_N);
5895           }
5896         }
5897       else
5898 #endif  /* SUPPORT_UNICODE */
5899       cur_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
5900       }
5901 
5902     /* Now see if the situation is what we want */
5903 
5904     if ((*Fecode++ == OP_WORD_BOUNDARY)?
5905          cur_is_word == prev_is_word : cur_is_word != prev_is_word)
5906       RRETURN(MATCH_NOMATCH);
5907     break;
5908 
5909 
5910     /* ===================================================================== */
5911     /* Backtracking (*VERB)s, with and without arguments. Note that if the
5912     pattern is successfully matched, we do not come back from RMATCH. */
5913 
5914     case OP_MARK:
5915     Fmark = mb->nomatch_mark = Fecode + 2;
5916     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM12);
5917 
5918     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
5919     argument, and we must check whether that argument matches this MARK's
5920     argument. It is passed back in mb->verb_skip_ptr. If it does match, we
5921     return MATCH_SKIP with mb->verb_skip_ptr now pointing to the subject
5922     position that corresponds to this mark. Otherwise, pass back the return
5923     code unaltered. */
5924 
5925     if (rrc == MATCH_SKIP_ARG &&
5926              PRIV(strcmp)(Fecode + 2, mb->verb_skip_ptr) == 0)
5927       {
5928       mb->verb_skip_ptr = Feptr;   /* Pass back current position */
5929       RRETURN(MATCH_SKIP);
5930       }
5931     RRETURN(rrc);
5932 
5933     case OP_FAIL:
5934     RRETURN(MATCH_NOMATCH);
5935 
5936     /* Record the current recursing group number in mb->verb_current_recurse
5937     when a backtracking return such as MATCH_COMMIT is given. This enables the
5938     recurse processing to catch verbs from within the recursion. */
5939 
5940     case OP_COMMIT:
5941     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM13);
5942     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5943     mb->verb_current_recurse = Fcurrent_recurse;
5944     RRETURN(MATCH_COMMIT);
5945 
5946     case OP_COMMIT_ARG:
5947     Fmark = mb->nomatch_mark = Fecode + 2;
5948     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36);
5949     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5950     mb->verb_current_recurse = Fcurrent_recurse;
5951     RRETURN(MATCH_COMMIT);
5952 
5953     case OP_PRUNE:
5954     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14);
5955     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5956     mb->verb_current_recurse = Fcurrent_recurse;
5957     RRETURN(MATCH_PRUNE);
5958 
5959     case OP_PRUNE_ARG:
5960     Fmark = mb->nomatch_mark = Fecode + 2;
5961     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM15);
5962     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5963     mb->verb_current_recurse = Fcurrent_recurse;
5964     RRETURN(MATCH_PRUNE);
5965 
5966     case OP_SKIP:
5967     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM16);
5968     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5969     mb->verb_skip_ptr = Feptr;   /* Pass back current position */
5970     mb->verb_current_recurse = Fcurrent_recurse;
5971     RRETURN(MATCH_SKIP);
5972 
5973     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
5974     nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
5975     not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
5976     that failed and any that precede it (either they also failed, or were not
5977     triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
5978     SKIP_ARG gets to top level, the match is re-run with mb->ignore_skip_arg
5979     set to the count of the one that failed. */
5980 
5981     case OP_SKIP_ARG:
5982     mb->skip_arg_count++;
5983     if (mb->skip_arg_count <= mb->ignore_skip_arg)
5984       {
5985       Fecode += PRIV(OP_lengths)[*Fecode] + Fecode[1];
5986       break;
5987       }
5988     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM17);
5989     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5990 
5991     /* Pass back the current skip name and return the special MATCH_SKIP_ARG
5992     return code. This will either be caught by a matching MARK, or get to the
5993     top, where it causes a rematch with mb->ignore_skip_arg set to the value of
5994     mb->skip_arg_count. */
5995 
5996     mb->verb_skip_ptr = Fecode + 2;
5997     mb->verb_current_recurse = Fcurrent_recurse;
5998     RRETURN(MATCH_SKIP_ARG);
5999 
6000     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
6001     the branch in which it occurs can be determined. */
6002 
6003     case OP_THEN:
6004     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM18);
6005     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6006     mb->verb_ecode_ptr = Fecode;
6007     mb->verb_current_recurse = Fcurrent_recurse;
6008     RRETURN(MATCH_THEN);
6009 
6010     case OP_THEN_ARG:
6011     Fmark = mb->nomatch_mark = Fecode + 2;
6012     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM19);
6013     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6014     mb->verb_ecode_ptr = Fecode;
6015     mb->verb_current_recurse = Fcurrent_recurse;
6016     RRETURN(MATCH_THEN);
6017 
6018 
6019     /* ===================================================================== */
6020     /* There's been some horrible disaster. Arrival here can only mean there is
6021     something seriously wrong in the code above or the OP_xxx definitions. */
6022 
6023     default:
6024     return PCRE2_ERROR_INTERNAL;
6025     }
6026 
6027   /* Do not insert any code in here without much thought; it is assumed
6028   that "continue" in the code above comes out to here to repeat the main
6029   loop. */
6030 
6031   }  /* End of main loop */
6032 /* Control never reaches here */
6033 
6034 
6035 /* ========================================================================= */
6036 /* The RRETURN() macro jumps here. The number that is saved in Freturn_id
6037 indicates which label we actually want to return to. The value in Frdepth is
6038 the index number of the frame in the vector. The return value has been placed
6039 in rrc. */
6040 
6041 #define LBL(val) case val: goto L_RM##val;
6042 
6043 RETURN_SWITCH:
6044 if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6045 if (Frdepth == 0) return rrc;                     /* Exit from the top level */
6046 F = (heapframe *)((char *)F - Fback_frame);       /* Backtrack */
6047 mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
6048 
6049 #ifdef DEBUG_SHOW_RMATCH
6050 fprintf(stderr, "++ RETURN %d to %d\n", rrc, Freturn_id);
6051 #endif
6052 
6053 switch (Freturn_id)
6054   {
6055   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6056   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
6057   LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
6058   LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
6059   LBL(33) LBL(34) LBL(35) LBL(36)
6060 
6061 #ifdef SUPPORT_WIDE_CHARS
6062   LBL(100) LBL(101)
6063 #endif
6064 
6065 #ifdef SUPPORT_UNICODE
6066   LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206)
6067   LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213)
6068   LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220)
6069   LBL(221) LBL(222)
6070 #endif
6071 
6072   default:
6073   return PCRE2_ERROR_INTERNAL;
6074   }
6075 #undef LBL
6076 }
6077 
6078 
6079 /*************************************************
6080 *           Match a Regular Expression           *
6081 *************************************************/
6082 
6083 /* This function applies a compiled pattern to a subject string and picks out
6084 portions of the string if it matches. Two elements in the vector are set for
6085 each substring: the offsets to the start and end of the substring.
6086 
6087 Arguments:
6088   code            points to the compiled expression
6089   subject         points to the subject string
6090   length          length of subject string (may contain binary zeros)
6091   start_offset    where to start in the subject string
6092   options         option bits
6093   match_data      points to a match_data block
6094   mcontext        points a PCRE2 context
6095 
6096 Returns:          > 0 => success; value is the number of ovector pairs filled
6097                   = 0 => success, but ovector is not big enough
6098                   = -1 => failed to match (PCRE2_ERROR_NOMATCH)
6099                   = -2 => partial match (PCRE2_ERROR_PARTIAL)
6100                   < -2 => some kind of unexpected problem
6101 */
6102 
6103 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_match(const pcre2_code * code,PCRE2_SPTR subject,PCRE2_SIZE length,PCRE2_SIZE start_offset,uint32_t options,pcre2_match_data * match_data,pcre2_match_context * mcontext)6104 pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
6105   PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
6106   pcre2_match_context *mcontext)
6107 {
6108 int rc;
6109 int was_zero_terminated = 0;
6110 const uint8_t *start_bits = NULL;
6111 const pcre2_real_code *re = (const pcre2_real_code *)code;
6112 
6113 BOOL anchored;
6114 BOOL firstline;
6115 BOOL has_first_cu = FALSE;
6116 BOOL has_req_cu = FALSE;
6117 BOOL startline;
6118 
6119 #if PCRE2_CODE_UNIT_WIDTH == 8
6120 PCRE2_SPTR memchr_found_first_cu;
6121 PCRE2_SPTR memchr_found_first_cu2;
6122 #endif
6123 
6124 PCRE2_UCHAR first_cu = 0;
6125 PCRE2_UCHAR first_cu2 = 0;
6126 PCRE2_UCHAR req_cu = 0;
6127 PCRE2_UCHAR req_cu2 = 0;
6128 
6129 PCRE2_SPTR bumpalong_limit;
6130 PCRE2_SPTR end_subject;
6131 PCRE2_SPTR true_end_subject;
6132 PCRE2_SPTR start_match = subject + start_offset;
6133 PCRE2_SPTR req_cu_ptr = start_match - 1;
6134 PCRE2_SPTR start_partial;
6135 PCRE2_SPTR match_partial;
6136 
6137 #ifdef SUPPORT_JIT
6138 BOOL use_jit;
6139 #endif
6140 
6141 /* This flag is needed even when Unicode is not supported for convenience
6142 (it is used by the IS_NEWLINE macro). */
6143 
6144 BOOL utf = FALSE;
6145 
6146 #ifdef SUPPORT_UNICODE
6147 BOOL ucp = FALSE;
6148 BOOL allow_invalid;
6149 uint32_t fragment_options = 0;
6150 #ifdef SUPPORT_JIT
6151 BOOL jit_checked_utf = FALSE;
6152 #endif
6153 #endif  /* SUPPORT_UNICODE */
6154 
6155 PCRE2_SIZE frame_size;
6156 
6157 /* We need to have mb as a pointer to a match block, because the IS_NEWLINE
6158 macro is used below, and it expects NLBLOCK to be defined as a pointer. */
6159 
6160 pcre2_callout_block cb;
6161 match_block actual_match_block;
6162 match_block *mb = &actual_match_block;
6163 
6164 /* Allocate an initial vector of backtracking frames on the stack. If this
6165 proves to be too small, it is replaced by a larger one on the heap. To get a
6166 vector of the size required that is aligned for pointers, allocate it as a
6167 vector of pointers. */
6168 
6169 PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)]
6170     PCRE2_KEEP_UNINITIALIZED;
6171 mb->stack_frames = (heapframe *)stack_frames_vector;
6172 
6173 /* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
6174 subject string. */
6175 
6176 if (length == PCRE2_ZERO_TERMINATED)
6177   {
6178   length = PRIV(strlen)(subject);
6179   was_zero_terminated = 1;
6180   }
6181 true_end_subject = end_subject = subject + length;
6182 
6183 /* Plausibility checks */
6184 
6185 if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
6186 if (code == NULL || subject == NULL || match_data == NULL)
6187   return PCRE2_ERROR_NULL;
6188 if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
6189 
6190 /* Check that the first field in the block is the magic number. */
6191 
6192 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
6193 
6194 /* Check the code unit width. */
6195 
6196 if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
6197   return PCRE2_ERROR_BADMODE;
6198 
6199 /* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
6200 options variable for this function. Users of PCRE2 who are not calling the
6201 function directly would like to have a way of setting these flags, in the same
6202 way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
6203 constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
6204 (*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which we now
6205 transfer to the options for this function. The bits are guaranteed to be
6206 adjacent, but do not have the same values. This bit of Boolean trickery assumes
6207 that the match-time bits are not more significant than the flag bits. If by
6208 accident this is not the case, a compile-time division by zero error will
6209 occur. */
6210 
6211 #define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
6212 #define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
6213 options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
6214 #undef FF
6215 #undef OO
6216 
6217 /* If the pattern was successfully studied with JIT support, we will run the
6218 JIT executable instead of the rest of this function. Most options must be set
6219 at compile time for the JIT code to be usable. */
6220 
6221 #ifdef SUPPORT_JIT
6222 use_jit = (re->executable_jit != NULL &&
6223           (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0);
6224 #endif
6225 
6226 /* Initialize UTF/UCP parameters. */
6227 
6228 #ifdef SUPPORT_UNICODE
6229 utf = (re->overall_options & PCRE2_UTF) != 0;
6230 allow_invalid = (re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0;
6231 ucp = (re->overall_options & PCRE2_UCP) != 0;
6232 #endif  /* SUPPORT_UNICODE */
6233 
6234 /* Convert the partial matching flags into an integer. */
6235 
6236 mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
6237               ((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
6238 
6239 /* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
6240 time. */
6241 
6242 if (mb->partial != 0 &&
6243    ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
6244   return PCRE2_ERROR_BADOPTION;
6245 
6246 /* It is an error to set an offset limit without setting the flag at compile
6247 time. */
6248 
6249 if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
6250      (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
6251   return PCRE2_ERROR_BADOFFSETLIMIT;
6252 
6253 /* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
6254 free the memory that was obtained. Set the field to NULL for no match cases. */
6255 
6256 if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
6257   {
6258   match_data->memctl.free((void *)match_data->subject,
6259     match_data->memctl.memory_data);
6260   match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
6261   }
6262 match_data->subject = NULL;
6263 
6264 /* Zero the error offset in case the first code unit is invalid UTF. */
6265 
6266 match_data->startchar = 0;
6267 
6268 
6269 /* ============================= JIT matching ============================== */
6270 
6271 /* Prepare for JIT matching. Check a UTF string for validity unless no check is
6272 requested or invalid UTF can be handled. We check only the portion of the
6273 subject that might be be inspected during matching - from the offset minus the
6274 maximum lookbehind to the given length. This saves time when a small part of a
6275 large subject is being matched by the use of a starting offset. Note that the
6276 maximum lookbehind is a number of characters, not code units. */
6277 
6278 #ifdef SUPPORT_JIT
6279 if (use_jit)
6280   {
6281 #ifdef SUPPORT_UNICODE
6282   if (utf && (options & PCRE2_NO_UTF_CHECK) == 0 && !allow_invalid)
6283     {
6284 #if PCRE2_CODE_UNIT_WIDTH != 32
6285     unsigned int i;
6286 #endif
6287 
6288     /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
6289     character start. */
6290 
6291 #if PCRE2_CODE_UNIT_WIDTH != 32
6292     if (start_match < end_subject && NOT_FIRSTCU(*start_match))
6293       {
6294       if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
6295 #if PCRE2_CODE_UNIT_WIDTH == 8
6296       return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
6297 #else
6298       return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
6299 #endif
6300       }
6301 #endif  /* WIDTH != 32 */
6302 
6303     /* Move back by the maximum lookbehind, just in case it happens at the very
6304     start of matching. */
6305 
6306 #if PCRE2_CODE_UNIT_WIDTH != 32
6307     for (i = re->max_lookbehind; i > 0 && start_match > subject; i--)
6308       {
6309       start_match--;
6310       while (start_match > subject &&
6311 #if PCRE2_CODE_UNIT_WIDTH == 8
6312       (*start_match & 0xc0) == 0x80)
6313 #else  /* 16-bit */
6314       (*start_match & 0xfc00) == 0xdc00)
6315 #endif
6316         start_match--;
6317       }
6318 #else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6319 
6320     /* In the 32-bit library, one code unit equals one character. However,
6321     we cannot just subtract the lookbehind and then compare pointers, because
6322     a very large lookbehind could create an invalid pointer. */
6323 
6324     if (start_offset >= re->max_lookbehind)
6325       start_match -= re->max_lookbehind;
6326     else
6327       start_match = subject;
6328 #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6329 
6330     /* Validate the relevant portion of the subject. Adjust the offset of an
6331     invalid code point to be an absolute offset in the whole string. */
6332 
6333     match_data->rc = PRIV(valid_utf)(start_match,
6334       length - (start_match - subject), &(match_data->startchar));
6335     if (match_data->rc != 0)
6336       {
6337       match_data->startchar += start_match - subject;
6338       return match_data->rc;
6339       }
6340     jit_checked_utf = TRUE;
6341     }
6342 #endif  /* SUPPORT_UNICODE */
6343 
6344   /* If JIT returns BADOPTION, which means that the selected complete or
6345   partial matching mode was not compiled, fall through to the interpreter. */
6346 
6347   rc = pcre2_jit_match(code, subject, length, start_offset, options,
6348     match_data, mcontext);
6349   if (rc != PCRE2_ERROR_JIT_BADOPTION)
6350     {
6351     if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
6352       {
6353       length = CU2BYTES(length + was_zero_terminated);
6354       match_data->subject = match_data->memctl.malloc(length,
6355         match_data->memctl.memory_data);
6356       if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
6357       memcpy((void *)match_data->subject, subject, length);
6358       match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
6359       }
6360     return rc;
6361     }
6362   }
6363 #endif  /* SUPPORT_JIT */
6364 
6365 /* ========================= End of JIT matching ========================== */
6366 
6367 
6368 /* Proceed with non-JIT matching. The default is to allow lookbehinds to the
6369 start of the subject. A UTF check when there is a non-zero offset may change
6370 this. */
6371 
6372 mb->check_subject = subject;
6373 
6374 /* If a UTF subject string was not checked for validity in the JIT code above,
6375 check it here, and handle support for invalid UTF strings. The check above
6376 happens only when invalid UTF is not supported and PCRE2_NO_CHECK_UTF is unset.
6377 If we get here in those circumstances, it means the subject string is valid,
6378 but for some reason JIT matching was not successful. There is no need to check
6379 the subject again.
6380 
6381 We check only the portion of the subject that might be be inspected during
6382 matching - from the offset minus the maximum lookbehind to the given length.
6383 This saves time when a small part of a large subject is being matched by the
6384 use of a starting offset. Note that the maximum lookbehind is a number of
6385 characters, not code units.
6386 
6387 Note also that support for invalid UTF forces a check, overriding the setting
6388 of PCRE2_NO_CHECK_UTF. */
6389 
6390 #ifdef SUPPORT_UNICODE
6391 if (utf &&
6392 #ifdef SUPPORT_JIT
6393     !jit_checked_utf &&
6394 #endif
6395     ((options & PCRE2_NO_UTF_CHECK) == 0 || allow_invalid))
6396   {
6397 #if PCRE2_CODE_UNIT_WIDTH != 32
6398   BOOL skipped_bad_start = FALSE;
6399 #endif
6400 
6401   /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
6402   character start. If we are handling invalid UTF, just skip over such code
6403   units. Otherwise, give an appropriate error. */
6404 
6405 #if PCRE2_CODE_UNIT_WIDTH != 32
6406   if (allow_invalid)
6407     {
6408     while (start_match < end_subject && NOT_FIRSTCU(*start_match))
6409       {
6410       start_match++;
6411       skipped_bad_start = TRUE;
6412       }
6413     }
6414   else if (start_match < end_subject && NOT_FIRSTCU(*start_match))
6415     {
6416     if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
6417 #if PCRE2_CODE_UNIT_WIDTH == 8
6418     return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
6419 #else
6420     return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
6421 #endif
6422     }
6423 #endif  /* WIDTH != 32 */
6424 
6425   /* The mb->check_subject field points to the start of UTF checking;
6426   lookbehinds can go back no further than this. */
6427 
6428   mb->check_subject = start_match;
6429 
6430   /* Move back by the maximum lookbehind, just in case it happens at the very
6431   start of matching, but don't do this if we skipped bad 8-bit or 16-bit code
6432   units above. */
6433 
6434 #if PCRE2_CODE_UNIT_WIDTH != 32
6435   if (!skipped_bad_start)
6436     {
6437     unsigned int i;
6438     for (i = re->max_lookbehind; i > 0 && mb->check_subject > subject; i--)
6439       {
6440       mb->check_subject--;
6441       while (mb->check_subject > subject &&
6442 #if PCRE2_CODE_UNIT_WIDTH == 8
6443       (*mb->check_subject & 0xc0) == 0x80)
6444 #else  /* 16-bit */
6445       (*mb->check_subject & 0xfc00) == 0xdc00)
6446 #endif
6447         mb->check_subject--;
6448       }
6449     }
6450 #else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6451 
6452   /* In the 32-bit library, one code unit equals one character. However,
6453   we cannot just subtract the lookbehind and then compare pointers, because
6454   a very large lookbehind could create an invalid pointer. */
6455 
6456   if (start_offset >= re->max_lookbehind)
6457     mb->check_subject -= re->max_lookbehind;
6458   else
6459     mb->check_subject = subject;
6460 #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6461 
6462   /* Validate the relevant portion of the subject. There's a loop in case we
6463   encounter bad UTF in the characters preceding start_match which we are
6464   scanning because of a lookbehind. */
6465 
6466   for (;;)
6467     {
6468     match_data->rc = PRIV(valid_utf)(mb->check_subject,
6469       length - (mb->check_subject - subject), &(match_data->startchar));
6470 
6471     if (match_data->rc == 0) break;   /* Valid UTF string */
6472 
6473     /* Invalid UTF string. Adjust the offset to be an absolute offset in the
6474     whole string. If we are handling invalid UTF strings, set end_subject to
6475     stop before the bad code unit, and set the options to "not end of line".
6476     Otherwise return the error. */
6477 
6478     match_data->startchar += mb->check_subject - subject;
6479     if (!allow_invalid || match_data->rc > 0) return match_data->rc;
6480     end_subject = subject + match_data->startchar;
6481 
6482     /* If the end precedes start_match, it means there is invalid UTF in the
6483     extra code units we reversed over because of a lookbehind. Advance past the
6484     first bad code unit, and then skip invalid character starting code units in
6485     8-bit and 16-bit modes, and try again. */
6486 
6487     if (end_subject < start_match)
6488       {
6489       mb->check_subject = end_subject + 1;
6490 #if PCRE2_CODE_UNIT_WIDTH != 32
6491       while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject))
6492         mb->check_subject++;
6493 #endif
6494       }
6495 
6496     /* Otherwise, set the not end of line option, and do the match. */
6497 
6498     else
6499       {
6500       fragment_options = PCRE2_NOTEOL;
6501       break;
6502       }
6503     }
6504   }
6505 #endif  /* SUPPORT_UNICODE */
6506 
6507 /* A NULL match context means "use a default context", but we take the memory
6508 control functions from the pattern. */
6509 
6510 if (mcontext == NULL)
6511   {
6512   mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
6513   mb->memctl = re->memctl;
6514   }
6515 else mb->memctl = mcontext->memctl;
6516 
6517 anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
6518 firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
6519 startline = (re->flags & PCRE2_STARTLINE) != 0;
6520 bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
6521   true_end_subject : subject + mcontext->offset_limit;
6522 
6523 /* Initialize and set up the fixed fields in the callout block, with a pointer
6524 in the match block. */
6525 
6526 mb->cb = &cb;
6527 cb.version = 2;
6528 cb.subject = subject;
6529 cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
6530 cb.callout_flags = 0;
6531 
6532 /* Fill in the remaining fields in the match block, except for moptions, which
6533 gets set later. */
6534 
6535 mb->callout = mcontext->callout;
6536 mb->callout_data = mcontext->callout_data;
6537 
6538 mb->start_subject = subject;
6539 mb->start_offset = start_offset;
6540 mb->end_subject = end_subject;
6541 mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
6542 mb->allowemptypartial = (re->max_lookbehind > 0) ||
6543     (re->flags & PCRE2_MATCH_EMPTY) != 0;
6544 mb->poptions = re->overall_options;          /* Pattern options */
6545 mb->ignore_skip_arg = 0;
6546 mb->mark = mb->nomatch_mark = NULL;          /* In case never set */
6547 
6548 /* The name table is needed for finding all the numbers associated with a
6549 given name, for condition testing. The code follows the name table. */
6550 
6551 mb->name_table = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code));
6552 mb->name_count = re->name_count;
6553 mb->name_entry_size = re->name_entry_size;
6554 mb->start_code = mb->name_table + re->name_count * re->name_entry_size;
6555 
6556 /* Process the \R and newline settings. */
6557 
6558 mb->bsr_convention = re->bsr_convention;
6559 mb->nltype = NLTYPE_FIXED;
6560 switch(re->newline_convention)
6561   {
6562   case PCRE2_NEWLINE_CR:
6563   mb->nllen = 1;
6564   mb->nl[0] = CHAR_CR;
6565   break;
6566 
6567   case PCRE2_NEWLINE_LF:
6568   mb->nllen = 1;
6569   mb->nl[0] = CHAR_NL;
6570   break;
6571 
6572   case PCRE2_NEWLINE_NUL:
6573   mb->nllen = 1;
6574   mb->nl[0] = CHAR_NUL;
6575   break;
6576 
6577   case PCRE2_NEWLINE_CRLF:
6578   mb->nllen = 2;
6579   mb->nl[0] = CHAR_CR;
6580   mb->nl[1] = CHAR_NL;
6581   break;
6582 
6583   case PCRE2_NEWLINE_ANY:
6584   mb->nltype = NLTYPE_ANY;
6585   break;
6586 
6587   case PCRE2_NEWLINE_ANYCRLF:
6588   mb->nltype = NLTYPE_ANYCRLF;
6589   break;
6590 
6591   default: return PCRE2_ERROR_INTERNAL;
6592   }
6593 
6594 /* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
6595 vector at the end, whose size depends on the number of capturing parentheses in
6596 the pattern. It is not used at all if there are no capturing parentheses.
6597 
6598   frame_size             is the total size of each frame
6599   mb->frame_vector_size  is the total usable size of the vector (rounded down
6600                            to a whole number of frames)
6601 
6602 The last of these is changed within the match() function if the frame vector
6603 has to be expanded. We therefore put it into the match block so that it is
6604 correct when calling match() more than once for non-anchored patterns. */
6605 
6606 frame_size = offsetof(heapframe, ovector) +
6607   re->top_bracket * 2 * sizeof(PCRE2_SIZE);
6608 
6609 /* Limits set in the pattern override the match context only if they are
6610 smaller. */
6611 
6612 mb->heap_limit = (mcontext->heap_limit < re->limit_heap)?
6613   mcontext->heap_limit : re->limit_heap;
6614 
6615 mb->match_limit = (mcontext->match_limit < re->limit_match)?
6616   mcontext->match_limit : re->limit_match;
6617 
6618 mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
6619   mcontext->depth_limit : re->limit_depth;
6620 
6621 /* If a pattern has very many capturing parentheses, the frame size may be very
6622 large. Ensure that there are at least 10 available frames by getting an initial
6623 vector on the heap if necessary, except when the heap limit prevents this. Get
6624 fewer if possible. (The heap limit is in kibibytes.) */
6625 
6626 if (frame_size <= START_FRAMES_SIZE/10)
6627   {
6628   mb->match_frames = mb->stack_frames;   /* Initial frame vector on the stack */
6629   mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size);
6630   }
6631 else
6632   {
6633   mb->frame_vector_size = frame_size * 10;
6634   if ((mb->frame_vector_size / 1024) > mb->heap_limit)
6635     {
6636     if (frame_size > mb->heap_limit * 1024) return PCRE2_ERROR_HEAPLIMIT;
6637     mb->frame_vector_size = ((mb->heap_limit * 1024)/frame_size) * frame_size;
6638     }
6639   mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
6640     mb->memctl.memory_data);
6641   if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
6642   }
6643 
6644 mb->match_frames_top =
6645   (heapframe *)((char *)mb->match_frames + mb->frame_vector_size);
6646 
6647 /* Write to the ovector within the first frame to mark every capture unset and
6648 to avoid uninitialized memory read errors when it is copied to a new frame. */
6649 
6650 memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
6651   re->top_bracket * 2 * sizeof(PCRE2_SIZE));
6652 
6653 /* Pointers to the individual character tables */
6654 
6655 mb->lcc = re->tables + lcc_offset;
6656 mb->fcc = re->tables + fcc_offset;
6657 mb->ctypes = re->tables + ctypes_offset;
6658 
6659 /* Set up the first code unit to match, if available. If there's no first code
6660 unit there may be a bitmap of possible first characters. */
6661 
6662 if ((re->flags & PCRE2_FIRSTSET) != 0)
6663   {
6664   has_first_cu = TRUE;
6665   first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
6666   if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
6667     {
6668     first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
6669 #ifdef SUPPORT_UNICODE
6670 #if PCRE2_CODE_UNIT_WIDTH == 8
6671     if (first_cu > 127 && ucp && !utf) first_cu2 = UCD_OTHERCASE(first_cu);
6672 #else
6673     if (first_cu > 127 && (utf || ucp)) first_cu2 = UCD_OTHERCASE(first_cu);
6674 #endif
6675 #endif  /* SUPPORT_UNICODE */
6676     }
6677   }
6678 else
6679   if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
6680     start_bits = re->start_bitmap;
6681 
6682 /* There may also be a "last known required character" set. */
6683 
6684 if ((re->flags & PCRE2_LASTSET) != 0)
6685   {
6686   has_req_cu = TRUE;
6687   req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
6688   if ((re->flags & PCRE2_LASTCASELESS) != 0)
6689     {
6690     req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
6691 #ifdef SUPPORT_UNICODE
6692 #if PCRE2_CODE_UNIT_WIDTH == 8
6693     if (req_cu > 127 && ucp && !utf) req_cu2 = UCD_OTHERCASE(req_cu);
6694 #else
6695     if (req_cu > 127 && (utf || ucp)) req_cu2 = UCD_OTHERCASE(req_cu);
6696 #endif
6697 #endif  /* SUPPORT_UNICODE */
6698     }
6699   }
6700 
6701 
6702 /* ==========================================================================*/
6703 
6704 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
6705 the loop runs just once. */
6706 
6707 #ifdef SUPPORT_UNICODE
6708 FRAGMENT_RESTART:
6709 #endif
6710 
6711 start_partial = match_partial = NULL;
6712 mb->hitend = FALSE;
6713 
6714 #if PCRE2_CODE_UNIT_WIDTH == 8
6715 memchr_found_first_cu = NULL;
6716 memchr_found_first_cu2 = NULL;
6717 #endif
6718 
6719 for(;;)
6720   {
6721   PCRE2_SPTR new_start_match;
6722 
6723   /* ----------------- Start of match optimizations ---------------- */
6724 
6725   /* There are some optimizations that avoid running the match if a known
6726   starting point is not found, or if a known later code unit is not present.
6727   However, there is an option (settable at compile time) that disables these,
6728   for testing and for ensuring that all callouts do actually occur. */
6729 
6730   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
6731     {
6732     /* If firstline is TRUE, the start of the match is constrained to the first
6733     line of a multiline string. That is, the match must be before or at the
6734     first newline following the start of matching. Temporarily adjust
6735     end_subject so that we stop the scans for a first code unit at a newline.
6736     If the match fails at the newline, later code breaks the loop. */
6737 
6738     if (firstline)
6739       {
6740       PCRE2_SPTR t = start_match;
6741 #ifdef SUPPORT_UNICODE
6742       if (utf)
6743         {
6744         while (t < end_subject && !IS_NEWLINE(t))
6745           {
6746           t++;
6747           ACROSSCHAR(t < end_subject, t, t++);
6748           }
6749         }
6750       else
6751 #endif
6752       while (t < end_subject && !IS_NEWLINE(t)) t++;
6753       end_subject = t;
6754       }
6755 
6756     /* Anchored: check the first code unit if one is recorded. This may seem
6757     pointless but it can help in detecting a no match case without scanning for
6758     the required code unit. */
6759 
6760     if (anchored)
6761       {
6762       if (has_first_cu || start_bits != NULL)
6763         {
6764         BOOL ok = start_match < end_subject;
6765         if (ok)
6766           {
6767           PCRE2_UCHAR c = UCHAR21TEST(start_match);
6768           ok = has_first_cu && (c == first_cu || c == first_cu2);
6769           if (!ok && start_bits != NULL)
6770             {
6771 #if PCRE2_CODE_UNIT_WIDTH != 8
6772             if (c > 255) c = 255;
6773 #endif
6774             ok = (start_bits[c/8] & (1u << (c&7))) != 0;
6775             }
6776           }
6777         if (!ok)
6778           {
6779           rc = MATCH_NOMATCH;
6780           break;
6781           }
6782         }
6783       }
6784 
6785     /* Not anchored. Advance to a unique first code unit if there is one. */
6786 
6787     else
6788       {
6789       if (has_first_cu)
6790         {
6791         if (first_cu != first_cu2)  /* Caseless */
6792           {
6793           /* In 16-bit and 32_bit modes we have to do our own search, so can
6794           look for both cases at once. */
6795 
6796 #if PCRE2_CODE_UNIT_WIDTH != 8
6797           PCRE2_UCHAR smc;
6798           while (start_match < end_subject &&
6799                 (smc = UCHAR21TEST(start_match)) != first_cu &&
6800                  smc != first_cu2)
6801             start_match++;
6802 #else
6803           /* In 8-bit mode, the use of memchr() gives a big speed up, even
6804           though we have to call it twice in order to find the earliest
6805           occurrence of the code unit in either of its cases. Caching is used
6806           to remember the positions of previously found code units. This can
6807           make a huge difference when the strings are very long and only one
6808           case is actually present. */
6809 
6810           PCRE2_SPTR pp1 = NULL;
6811           PCRE2_SPTR pp2 = NULL;
6812           PCRE2_SIZE searchlength = end_subject - start_match;
6813 
6814           /* If we haven't got a previously found position for first_cu, or if
6815           the current starting position is later, we need to do a search. If
6816           the code unit is not found, set it to the end. */
6817 
6818           if (memchr_found_first_cu == NULL ||
6819               start_match > memchr_found_first_cu)
6820             {
6821             pp1 = memchr(start_match, first_cu, searchlength);
6822             memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1;
6823             }
6824 
6825           /* If the start is before a previously found position, use the
6826           previous position, or NULL if a previous search failed. */
6827 
6828           else pp1 = (memchr_found_first_cu == end_subject)? NULL :
6829             memchr_found_first_cu;
6830 
6831           /* Do the same thing for the other case. */
6832 
6833           if (memchr_found_first_cu2 == NULL ||
6834               start_match > memchr_found_first_cu2)
6835             {
6836             pp2 = memchr(start_match, first_cu2, searchlength);
6837             memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2;
6838             }
6839 
6840           else pp2 = (memchr_found_first_cu2 == end_subject)? NULL :
6841             memchr_found_first_cu2;
6842 
6843           /* Set the start to the end of the subject if neither case was found.
6844           Otherwise, use the earlier found point. */
6845 
6846           if (pp1 == NULL)
6847             start_match = (pp2 == NULL)? end_subject : pp2;
6848           else
6849             start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
6850 
6851 #endif  /* 8-bit handling */
6852           }
6853 
6854         /* The caseful case is much simpler. */
6855 
6856         else
6857           {
6858 #if PCRE2_CODE_UNIT_WIDTH != 8
6859           while (start_match < end_subject && UCHAR21TEST(start_match) !=
6860                  first_cu)
6861             start_match++;
6862 #else
6863           start_match = memchr(start_match, first_cu, end_subject - start_match);
6864           if (start_match == NULL) start_match = end_subject;
6865 #endif
6866           }
6867 
6868         /* If we can't find the required first code unit, having reached the
6869         true end of the subject, break the bumpalong loop, to force a match
6870         failure, except when doing partial matching, when we let the next cycle
6871         run at the end of the subject. To see why, consider the pattern
6872         /(?<=abc)def/, which partially matches "abc", even though the string
6873         does not contain the starting character "d". If we have not reached the
6874         true end of the subject (PCRE2_FIRSTLINE caused end_subject to be
6875         temporarily modified) we also let the cycle run, because the matching
6876         string is legitimately allowed to start with the first code unit of a
6877         newline. */
6878 
6879         if (mb->partial == 0 && start_match >= mb->end_subject)
6880           {
6881           rc = MATCH_NOMATCH;
6882           break;
6883           }
6884         }
6885 
6886       /* If there's no first code unit, advance to just after a linebreak for a
6887       multiline match if required. */
6888 
6889       else if (startline)
6890         {
6891         if (start_match > mb->start_subject + start_offset)
6892           {
6893 #ifdef SUPPORT_UNICODE
6894           if (utf)
6895             {
6896             while (start_match < end_subject && !WAS_NEWLINE(start_match))
6897               {
6898               start_match++;
6899               ACROSSCHAR(start_match < end_subject, start_match, start_match++);
6900               }
6901             }
6902           else
6903 #endif
6904           while (start_match < end_subject && !WAS_NEWLINE(start_match))
6905             start_match++;
6906 
6907           /* If we have just passed a CR and the newline option is ANY or
6908           ANYCRLF, and we are now at a LF, advance the match position by one
6909           more code unit. */
6910 
6911           if (start_match[-1] == CHAR_CR &&
6912                (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
6913                start_match < end_subject &&
6914                UCHAR21TEST(start_match) == CHAR_NL)
6915             start_match++;
6916           }
6917         }
6918 
6919       /* If there's no first code unit or a requirement for a multiline line
6920       start, advance to a non-unique first code unit if any have been
6921       identified. The bitmap contains only 256 bits. When code units are 16 or
6922       32 bits wide, all code units greater than 254 set the 255 bit. */
6923 
6924       else if (start_bits != NULL)
6925         {
6926         while (start_match < end_subject)
6927           {
6928           uint32_t c = UCHAR21TEST(start_match);
6929 #if PCRE2_CODE_UNIT_WIDTH != 8
6930           if (c > 255) c = 255;
6931 #endif
6932           if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
6933           start_match++;
6934           }
6935 
6936         /* See comment above in first_cu checking about the next few lines. */
6937 
6938         if (mb->partial == 0 && start_match >= mb->end_subject)
6939           {
6940           rc = MATCH_NOMATCH;
6941           break;
6942           }
6943         }
6944       }   /* End first code unit handling */
6945 
6946     /* Restore fudged end_subject */
6947 
6948     end_subject = mb->end_subject;
6949 
6950     /* The following two optimizations must be disabled for partial matching. */
6951 
6952     if (mb->partial == 0)
6953       {
6954       PCRE2_SPTR p;
6955 
6956       /* The minimum matching length is a lower bound; no string of that length
6957       may actually match the pattern. Although the value is, strictly, in
6958       characters, we treat it as code units to avoid spending too much time in
6959       this optimization. */
6960 
6961       if (end_subject - start_match < re->minlength)
6962         {
6963         rc = MATCH_NOMATCH;
6964         break;
6965         }
6966 
6967       /* If req_cu is set, we know that that code unit must appear in the
6968       subject for the (non-partial) match to succeed. If the first code unit is
6969       set, req_cu must be later in the subject; otherwise the test starts at
6970       the match point. This optimization can save a huge amount of backtracking
6971       in patterns with nested unlimited repeats that aren't going to match.
6972       Writing separate code for caseful/caseless versions makes it go faster,
6973       as does using an autoincrement and backing off on a match. As in the case
6974       of the first code unit, using memchr() in the 8-bit library gives a big
6975       speed up. Unlike the first_cu check above, we do not need to call
6976       memchr() twice in the caseless case because we only need to check for the
6977       presence of the character in either case, not find the first occurrence.
6978 
6979       The search can be skipped if the code unit was found later than the
6980       current starting point in a previous iteration of the bumpalong loop.
6981 
6982       HOWEVER: when the subject string is very, very long, searching to its end
6983       can take a long time, and give bad performance on quite ordinary
6984       anchored patterns. This showed up when somebody was matching something
6985       like /^\d+C/ on a 32-megabyte string... so we don't do this when the
6986       string is sufficiently long, but it's worth searching a lot more for
6987       unanchored patterns. */
6988 
6989       p = start_match + (has_first_cu? 1:0);
6990       if (has_req_cu && p > req_cu_ptr)
6991         {
6992         PCRE2_SIZE check_length = end_subject - start_match;
6993 
6994         if (check_length < REQ_CU_MAX ||
6995               (!anchored && check_length < REQ_CU_MAX * 1000))
6996           {
6997           if (req_cu != req_cu2)  /* Caseless */
6998             {
6999 #if PCRE2_CODE_UNIT_WIDTH != 8
7000             while (p < end_subject)
7001               {
7002               uint32_t pp = UCHAR21INCTEST(p);
7003               if (pp == req_cu || pp == req_cu2) { p--; break; }
7004               }
7005 #else  /* 8-bit code units */
7006             PCRE2_SPTR pp = p;
7007             p = memchr(pp, req_cu, end_subject - pp);
7008             if (p == NULL)
7009               {
7010               p = memchr(pp, req_cu2, end_subject - pp);
7011               if (p == NULL) p = end_subject;
7012               }
7013 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
7014             }
7015 
7016           /* The caseful case */
7017 
7018           else
7019             {
7020 #if PCRE2_CODE_UNIT_WIDTH != 8
7021             while (p < end_subject)
7022               {
7023               if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
7024               }
7025 
7026 #else  /* 8-bit code units */
7027             p = memchr(p, req_cu, end_subject - p);
7028             if (p == NULL) p = end_subject;
7029 #endif
7030             }
7031 
7032           /* If we can't find the required code unit, break the bumpalong loop,
7033           forcing a match failure. */
7034 
7035           if (p >= end_subject)
7036             {
7037             rc = MATCH_NOMATCH;
7038             break;
7039             }
7040 
7041           /* If we have found the required code unit, save the point where we
7042           found it, so that we don't search again next time round the bumpalong
7043           loop if the start hasn't yet passed this code unit. */
7044 
7045           req_cu_ptr = p;
7046           }
7047         }
7048       }
7049     }
7050 
7051   /* ------------ End of start of match optimizations ------------ */
7052 
7053   /* Give no match if we have passed the bumpalong limit. */
7054 
7055   if (start_match > bumpalong_limit)
7056     {
7057     rc = MATCH_NOMATCH;
7058     break;
7059     }
7060 
7061   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
7062   first starting point for which a partial match was found. */
7063 
7064   cb.start_match = (PCRE2_SIZE)(start_match - subject);
7065   cb.callout_flags |= PCRE2_CALLOUT_STARTMATCH;
7066 
7067   mb->start_used_ptr = start_match;
7068   mb->last_used_ptr = start_match;
7069 #ifdef SUPPORT_UNICODE
7070   mb->moptions = options | fragment_options;
7071 #else
7072   mb->moptions = options;
7073 #endif
7074   mb->match_call_count = 0;
7075   mb->end_offset_top = 0;
7076   mb->skip_arg_count = 0;
7077 
7078   rc = match(start_match, mb->start_code, match_data->ovector,
7079     match_data->oveccount, re->top_bracket, frame_size, mb);
7080 
7081   if (mb->hitend && start_partial == NULL)
7082     {
7083     start_partial = mb->start_used_ptr;
7084     match_partial = start_match;
7085     }
7086 
7087   switch(rc)
7088     {
7089     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
7090     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
7091     entirely. The only way we can do that is to re-do the match at the same
7092     point, with a flag to force SKIP with an argument to be ignored. Just
7093     treating this case as NOMATCH does not work because it does not check other
7094     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
7095 
7096     case MATCH_SKIP_ARG:
7097     new_start_match = start_match;
7098     mb->ignore_skip_arg = mb->skip_arg_count;
7099     break;
7100 
7101     /* SKIP passes back the next starting point explicitly, but if it is no
7102     greater than the match we have just done, treat it as NOMATCH. */
7103 
7104     case MATCH_SKIP:
7105     if (mb->verb_skip_ptr > start_match)
7106       {
7107       new_start_match = mb->verb_skip_ptr;
7108       break;
7109       }
7110     /* Fall through */
7111 
7112     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
7113     exactly like PRUNE. Unset ignore SKIP-with-argument. */
7114 
7115     case MATCH_NOMATCH:
7116     case MATCH_PRUNE:
7117     case MATCH_THEN:
7118     mb->ignore_skip_arg = 0;
7119     new_start_match = start_match + 1;
7120 #ifdef SUPPORT_UNICODE
7121     if (utf)
7122       ACROSSCHAR(new_start_match < end_subject, new_start_match,
7123         new_start_match++);
7124 #endif
7125     break;
7126 
7127     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
7128 
7129     case MATCH_COMMIT:
7130     rc = MATCH_NOMATCH;
7131     goto ENDLOOP;
7132 
7133     /* Any other return is either a match, or some kind of error. */
7134 
7135     default:
7136     goto ENDLOOP;
7137     }
7138 
7139   /* Control reaches here for the various types of "no match at this point"
7140   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
7141 
7142   rc = MATCH_NOMATCH;
7143 
7144   /* If PCRE2_FIRSTLINE is set, the match must happen before or at the first
7145   newline in the subject (though it may continue over the newline). Therefore,
7146   if we have just failed to match, starting at a newline, do not continue. */
7147 
7148   if (firstline && IS_NEWLINE(start_match)) break;
7149 
7150   /* Advance to new matching position */
7151 
7152   start_match = new_start_match;
7153 
7154   /* Break the loop if the pattern is anchored or if we have passed the end of
7155   the subject. */
7156 
7157   if (anchored || start_match > end_subject) break;
7158 
7159   /* If we have just passed a CR and we are now at a LF, and the pattern does
7160   not contain any explicit matches for \r or \n, and the newline option is CRLF
7161   or ANY or ANYCRLF, advance the match position by one more code unit. In
7162   normal matching start_match will aways be greater than the first position at
7163   this stage, but a failed *SKIP can cause a return at the same point, which is
7164   why the first test exists. */
7165 
7166   if (start_match > subject + start_offset &&
7167       start_match[-1] == CHAR_CR &&
7168       start_match < end_subject &&
7169       *start_match == CHAR_NL &&
7170       (re->flags & PCRE2_HASCRORLF) == 0 &&
7171         (mb->nltype == NLTYPE_ANY ||
7172          mb->nltype == NLTYPE_ANYCRLF ||
7173          mb->nllen == 2))
7174     start_match++;
7175 
7176   mb->mark = NULL;   /* Reset for start of next match attempt */
7177   }                  /* End of for(;;) "bumpalong" loop */
7178 
7179 /* ==========================================================================*/
7180 
7181 /* When we reach here, one of the following stopping conditions is true:
7182 
7183 (1) The match succeeded, either completely, or partially;
7184 
7185 (2) The pattern is anchored or the match was failed after (*COMMIT);
7186 
7187 (3) We are past the end of the subject or the bumpalong limit;
7188 
7189 (4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because
7190     this option requests that a match occur at or before the first newline in
7191     the subject.
7192 
7193 (5) Some kind of error occurred.
7194 
7195 */
7196 
7197 ENDLOOP:
7198 
7199 /* If end_subject != true_end_subject, it means we are handling invalid UTF,
7200 and have just processed a non-terminal fragment. If this resulted in no match
7201 or a partial match we must carry on to the next fragment (a partial match is
7202 returned to the caller only at the very end of the subject). A loop is used to
7203 avoid trying to match against empty fragments; if the pattern can match an
7204 empty string it would have done so already. */
7205 
7206 #ifdef SUPPORT_UNICODE
7207 if (utf && end_subject != true_end_subject &&
7208     (rc == MATCH_NOMATCH || rc == PCRE2_ERROR_PARTIAL))
7209   {
7210   for (;;)
7211     {
7212     /* Advance past the first bad code unit, and then skip invalid character
7213     starting code units in 8-bit and 16-bit modes. */
7214 
7215     start_match = end_subject + 1;
7216 
7217 #if PCRE2_CODE_UNIT_WIDTH != 32
7218     while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
7219       start_match++;
7220 #endif
7221 
7222     /* If we have hit the end of the subject, there isn't another non-empty
7223     fragment, so give up. */
7224 
7225     if (start_match >= true_end_subject)
7226       {
7227       rc = MATCH_NOMATCH;  /* In case it was partial */
7228       break;
7229       }
7230 
7231     /* Check the rest of the subject */
7232 
7233     mb->check_subject = start_match;
7234     rc = PRIV(valid_utf)(start_match, length - (start_match - subject),
7235       &(match_data->startchar));
7236 
7237     /* The rest of the subject is valid UTF. */
7238 
7239     if (rc == 0)
7240       {
7241       mb->end_subject = end_subject = true_end_subject;
7242       fragment_options = PCRE2_NOTBOL;
7243       goto FRAGMENT_RESTART;
7244       }
7245 
7246     /* A subsequent UTF error has been found; if the next fragment is
7247     non-empty, set up to process it. Otherwise, let the loop advance. */
7248 
7249     else if (rc < 0)
7250       {
7251       mb->end_subject = end_subject = start_match + match_data->startchar;
7252       if (end_subject > start_match)
7253         {
7254         fragment_options = PCRE2_NOTBOL|PCRE2_NOTEOL;
7255         goto FRAGMENT_RESTART;
7256         }
7257       }
7258     }
7259   }
7260 #endif  /* SUPPORT_UNICODE */
7261 
7262 /* Release an enlarged frame vector that is on the heap. */
7263 
7264 if (mb->match_frames != mb->stack_frames)
7265   mb->memctl.free(mb->match_frames, mb->memctl.memory_data);
7266 
7267 /* Fill in fields that are always returned in the match data. */
7268 
7269 match_data->code = re;
7270 match_data->mark = mb->mark;
7271 match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER;
7272 
7273 /* Handle a fully successful match. Set the return code to the number of
7274 captured strings, or 0 if there were too many to fit into the ovector, and then
7275 set the remaining returned values before returning. Make a copy of the subject
7276 string if requested. */
7277 
7278 if (rc == MATCH_MATCH)
7279   {
7280   match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)?
7281     0 : (int)mb->end_offset_top/2 + 1;
7282   match_data->startchar = start_match - subject;
7283   match_data->leftchar = mb->start_used_ptr - subject;
7284   match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
7285     mb->last_used_ptr : mb->end_match_ptr) - subject;
7286   if ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
7287     {
7288     length = CU2BYTES(length + was_zero_terminated);
7289     match_data->subject = match_data->memctl.malloc(length,
7290       match_data->memctl.memory_data);
7291     if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
7292     memcpy((void *)match_data->subject, subject, length);
7293     match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
7294     }
7295   else match_data->subject = subject;
7296   return match_data->rc;
7297   }
7298 
7299 /* Control gets here if there has been a partial match, an error, or if the
7300 overall match attempt has failed at all permitted starting positions. Any mark
7301 data is in the nomatch_mark field. */
7302 
7303 match_data->mark = mb->nomatch_mark;
7304 
7305 /* For anything other than nomatch or partial match, just return the code. */
7306 
7307 if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL) match_data->rc = rc;
7308 
7309 /* Handle a partial match. If a "soft" partial match was requested, searching
7310 for a complete match will have continued, and the value of rc at this point
7311 will be MATCH_NOMATCH. For a "hard" partial match, it will already be
7312 PCRE2_ERROR_PARTIAL. */
7313 
7314 else if (match_partial != NULL)
7315   {
7316   match_data->subject = subject;
7317   match_data->ovector[0] = match_partial - subject;
7318   match_data->ovector[1] = end_subject - subject;
7319   match_data->startchar = match_partial - subject;
7320   match_data->leftchar = start_partial - subject;
7321   match_data->rightchar = end_subject - subject;
7322   match_data->rc = PCRE2_ERROR_PARTIAL;
7323   }
7324 
7325 /* Else this is the classic nomatch case. */
7326 
7327 else match_data->rc = PCRE2_ERROR_NOMATCH;
7328 
7329 return match_data->rc;
7330 }
7331 
7332 /* End of pcre2_match.c */
7333