xref: /PHP-7.1/ext/pcre/pcrelib/pcre_exec.c (revision 5f9df47e)
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9            Copyright (c) 1997-2014 University of Cambridge
10 
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14 
15     * Redistributions of source code must retain the above copyright notice,
16       this list of conditions and the following disclaimer.
17 
18     * Redistributions in binary form must reproduce the above copyright
19       notice, this list of conditions and the following disclaimer in the
20       documentation and/or other materials provided with the distribution.
21 
22     * Neither the name of the University of Cambridge nor the names of its
23       contributors may be used to endorse or promote products derived from
24       this software without specific prior written permission.
25 
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39 
40 /* This module contains pcre_exec(), the externally visible function that does
41 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
42 possible. There are also some static supporting functions. */
43 
44 #include "config.h"
45 
46 #define NLBLOCK md             /* Block containing newline information */
47 #define PSSTART start_subject  /* Field containing processed string start */
48 #define PSEND   end_subject    /* Field containing processed string end */
49 
50 #include "pcre_internal.h"
51 
52 /* Undefine some potentially clashing cpp symbols */
53 
54 #undef min
55 #undef max
56 
57 /* The md->capture_last field uses the lower 16 bits for the last captured
58 substring (which can never be greater than 65535) and a bit in the top half
59 to mean "capture vector overflowed". This odd way of doing things was
60 implemented when it was realized that preserving and restoring the overflow bit
61 whenever the last capture number was saved/restored made for a neater
62 interface, and doing it this way saved on (a) another variable, which would
63 have increased the stack frame size (a big NO-NO in PCRE) and (b) another
64 separate set of save/restore instructions. The following defines are used in
65 implementing this. */
66 
67 #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
68 #define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
69 #define OVFLBIT     0x00010000    /* The bit that is set for overflow */
70 
71 /* Values for setting in md->match_function_type to indicate two special types
72 of call to match(). We do it this way to save on using another stack variable,
73 as stack usage is to be discouraged. */
74 
75 #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
76 #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
77 
78 /* Non-error returns from the match() function. Error returns are externally
79 defined PCRE_ERROR_xxx codes, which are all negative. */
80 
81 #define MATCH_MATCH        1
82 #define MATCH_NOMATCH      0
83 
84 /* Special internal returns from the match() function. Make them sufficiently
85 negative to avoid the external error codes. */
86 
87 #define MATCH_ACCEPT       (-999)
88 #define MATCH_KETRPOS      (-998)
89 #define MATCH_ONCE         (-997)
90 /* The next 5 must be kept together and in sequence so that a test that checks
91 for any one of them can use a range. */
92 #define MATCH_COMMIT       (-996)
93 #define MATCH_PRUNE        (-995)
94 #define MATCH_SKIP         (-994)
95 #define MATCH_SKIP_ARG     (-993)
96 #define MATCH_THEN         (-992)
97 #define MATCH_BACKTRACK_MAX MATCH_THEN
98 #define MATCH_BACKTRACK_MIN MATCH_COMMIT
99 
100 /* Maximum number of ints of offset to save on the stack for recursive calls.
101 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
102 because the offset vector is always a multiple of 3 long. */
103 
104 #define REC_STACK_SAVE_MAX 30
105 
106 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
107 
108 static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
109 static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
110 
111 #ifdef PCRE_DEBUG
112 /*************************************************
113 *        Debugging function to print chars       *
114 *************************************************/
115 
116 /* Print a sequence of chars in printable format, stopping at the end of the
117 subject if the requested.
118 
119 Arguments:
120   p           points to characters
121   length      number to print
122   is_subject  TRUE if printing from within md->start_subject
123   md          pointer to matching data block, if is_subject is TRUE
124 
125 Returns:     nothing
126 */
127 
128 static void
pchars(const pcre_uchar * p,int length,BOOL is_subject,match_data * md)129 pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
130 {
131 pcre_uint32 c;
132 BOOL utf = md->utf;
133 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
134 while (length-- > 0)
135   if (isprint(c = UCHAR21INCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
136 }
137 #endif
138 
139 
140 
141 /*************************************************
142 *          Match a back-reference                *
143 *************************************************/
144 
145 /* Normally, if a back reference hasn't been set, the length that is passed is
146 negative, so the match always fails. However, in JavaScript compatibility mode,
147 the length passed is zero. Note that in caseless UTF-8 mode, the number of
148 subject bytes matched may be different to the number of reference bytes.
149 
150 Arguments:
151   offset      index into the offset vector
152   eptr        pointer into the subject
153   length      length of reference to be matched (number of bytes)
154   md          points to match data block
155   caseless    TRUE if caseless
156 
157 Returns:      >= 0 the number of subject bytes matched
158               -1 no match
159               -2 partial match; always given if at end subject
160 */
161 
162 static int
match_ref(int offset,register PCRE_PUCHAR eptr,int length,match_data * md,BOOL caseless)163 match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
164   BOOL caseless)
165 {
166 PCRE_PUCHAR eptr_start = eptr;
167 register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
168 #if defined SUPPORT_UTF && defined SUPPORT_UCP
169 BOOL utf = md->utf;
170 #endif
171 
172 #ifdef PCRE_DEBUG
173 if (eptr >= md->end_subject)
174   printf("matching subject <null>");
175 else
176   {
177   printf("matching subject ");
178   pchars(eptr, length, TRUE, md);
179   }
180 printf(" against backref ");
181 pchars(p, length, FALSE, md);
182 printf("\n");
183 #endif
184 
185 /* Always fail if reference not set (and not JavaScript compatible - in that
186 case the length is passed as zero). */
187 
188 if (length < 0) return -1;
189 
190 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
191 properly if Unicode properties are supported. Otherwise, we can check only
192 ASCII characters. */
193 
194 if (caseless)
195   {
196 #if defined SUPPORT_UTF && defined SUPPORT_UCP
197   if (utf)
198     {
199     /* Match characters up to the end of the reference. NOTE: the number of
200     data units matched may differ, because in UTF-8 there are some characters
201     whose upper and lower case versions code have different numbers of bytes.
202     For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
203     (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
204     sequence of two of the latter. It is important, therefore, to check the
205     length along the reference, not along the subject (earlier code did this
206     wrong). */
207 
208     PCRE_PUCHAR endptr = p + length;
209     while (p < endptr)
210       {
211       pcre_uint32 c, d;
212       const ucd_record *ur;
213       if (eptr >= md->end_subject) return -2;   /* Partial match */
214       GETCHARINC(c, eptr);
215       GETCHARINC(d, p);
216       ur = GET_UCD(d);
217       if (c != d && c != d + ur->other_case)
218         {
219         const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
220         for (;;)
221           {
222           if (c < *pp) return -1;
223           if (c == *pp++) break;
224           }
225         }
226       }
227     }
228   else
229 #endif
230 
231   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
232   is no UCP support. */
233     {
234     while (length-- > 0)
235       {
236       pcre_uint32 cc, cp;
237       if (eptr >= md->end_subject) return -2;   /* Partial match */
238       cc = UCHAR21TEST(eptr);
239       cp = UCHAR21TEST(p);
240       if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
241       p++;
242       eptr++;
243       }
244     }
245   }
246 
247 /* In the caseful case, we can just compare the bytes, whether or not we
248 are in UTF-8 mode. */
249 
250 else
251   {
252   while (length-- > 0)
253     {
254     if (eptr >= md->end_subject) return -2;   /* Partial match */
255     if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;
256     }
257   }
258 
259 return (int)(eptr - eptr_start);
260 }
261 
262 
263 
264 /***************************************************************************
265 ****************************************************************************
266                    RECURSION IN THE match() FUNCTION
267 
268 The match() function is highly recursive, though not every recursive call
269 increases the recursive depth. Nevertheless, some regular expressions can cause
270 it to recurse to a great depth. I was writing for Unix, so I just let it call
271 itself recursively. This uses the stack for saving everything that has to be
272 saved for a recursive call. On Unix, the stack can be large, and this works
273 fine.
274 
275 It turns out that on some non-Unix-like systems there are problems with
276 programs that use a lot of stack. (This despite the fact that every last chip
277 has oodles of memory these days, and techniques for extending the stack have
278 been known for decades.) So....
279 
280 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
281 calls by keeping local variables that need to be preserved in blocks of memory
282 obtained from malloc() instead instead of on the stack. Macros are used to
283 achieve this so that the actual code doesn't look very different to what it
284 always used to.
285 
286 The original heap-recursive code used longjmp(). However, it seems that this
287 can be very slow on some operating systems. Following a suggestion from Stan
288 Switzer, the use of longjmp() has been abolished, at the cost of having to
289 provide a unique number for each call to RMATCH. There is no way of generating
290 a sequence of numbers at compile time in C. I have given them names, to make
291 them stand out more clearly.
292 
293 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
294 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
295 tests. Furthermore, not using longjmp() means that local dynamic variables
296 don't have indeterminate values; this has meant that the frame size can be
297 reduced because the result can be "passed back" by straight setting of the
298 variable instead of being passed in the frame.
299 ****************************************************************************
300 ***************************************************************************/
301 
302 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
303 below must be updated in sync.  */
304 
305 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
306        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
307        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
308        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
309        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
310        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
311        RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };
312 
313 /* These versions of the macros use the stack, as normal. There are debugging
314 versions and production versions. Note that the "rw" argument of RMATCH isn't
315 actually used in this definition. */
316 
317 #ifndef NO_RECURSE
318 #define REGISTER register
319 
320 #ifdef PCRE_DEBUG
321 #define RMATCH(ra,rb,rc,rd,re,rw) \
322   { \
323   printf("match() called in line %d\n", __LINE__); \
324   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
325   printf("to line %d\n", __LINE__); \
326   }
327 #define RRETURN(ra) \
328   { \
329   printf("match() returned %d from line %d\n", ra, __LINE__); \
330   return ra; \
331   }
332 #else
333 #define RMATCH(ra,rb,rc,rd,re,rw) \
334   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
335 #define RRETURN(ra) return ra
336 #endif
337 
338 #else
339 
340 
341 /* These versions of the macros manage a private stack on the heap. Note that
342 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
343 argument of match(), which never changes. */
344 
345 #define REGISTER
346 
347 #define RMATCH(ra,rb,rc,rd,re,rw)\
348   {\
349   heapframe *newframe = frame->Xnextframe;\
350   if (newframe == NULL)\
351     {\
352     newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
353     if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
354     newframe->Xnextframe = NULL;\
355     frame->Xnextframe = newframe;\
356     }\
357   frame->Xwhere = rw;\
358   newframe->Xeptr = ra;\
359   newframe->Xecode = rb;\
360   newframe->Xmstart = mstart;\
361   newframe->Xoffset_top = rc;\
362   newframe->Xeptrb = re;\
363   newframe->Xrdepth = frame->Xrdepth + 1;\
364   newframe->Xprevframe = frame;\
365   frame = newframe;\
366   DPRINTF(("restarting from line %d\n", __LINE__));\
367   goto HEAP_RECURSE;\
368   L_##rw:\
369   DPRINTF(("jumped back to line %d\n", __LINE__));\
370   }
371 
372 #define RRETURN(ra)\
373   {\
374   heapframe *oldframe = frame;\
375   frame = oldframe->Xprevframe;\
376   if (frame != NULL)\
377     {\
378     rrc = ra;\
379     goto HEAP_RETURN;\
380     }\
381   return ra;\
382   }
383 
384 
385 /* Structure for remembering the local variables in a private frame */
386 
387 typedef struct heapframe {
388   struct heapframe *Xprevframe;
389   struct heapframe *Xnextframe;
390 
391   /* Function arguments that may change */
392 
393   PCRE_PUCHAR Xeptr;
394   const pcre_uchar *Xecode;
395   PCRE_PUCHAR Xmstart;
396   int Xoffset_top;
397   eptrblock *Xeptrb;
398   unsigned int Xrdepth;
399 
400   /* Function local variables */
401 
402   PCRE_PUCHAR Xcallpat;
403 #ifdef SUPPORT_UTF
404   PCRE_PUCHAR Xcharptr;
405 #endif
406   PCRE_PUCHAR Xdata;
407   PCRE_PUCHAR Xnext;
408   PCRE_PUCHAR Xpp;
409   PCRE_PUCHAR Xprev;
410   PCRE_PUCHAR Xsaved_eptr;
411 
412   recursion_info Xnew_recursive;
413 
414   BOOL Xcur_is_word;
415   BOOL Xcondition;
416   BOOL Xprev_is_word;
417 
418 #ifdef SUPPORT_UCP
419   int Xprop_type;
420   unsigned int Xprop_value;
421   int Xprop_fail_result;
422   int Xoclength;
423   pcre_uchar Xocchars[6];
424 #endif
425 
426   int Xcodelink;
427   int Xctype;
428   unsigned int Xfc;
429   int Xfi;
430   int Xlength;
431   int Xmax;
432   int Xmin;
433   unsigned int Xnumber;
434   int Xoffset;
435   unsigned int Xop;
436   pcre_int32 Xsave_capture_last;
437   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
438   int Xstacksave[REC_STACK_SAVE_MAX];
439 
440   eptrblock Xnewptrb;
441 
442   /* Where to jump back to */
443 
444   int Xwhere;
445 
446 } heapframe;
447 
448 #endif
449 
450 
451 /***************************************************************************
452 ***************************************************************************/
453 
454 
455 
456 /*************************************************
457 *         Match from current position            *
458 *************************************************/
459 
460 /* This function is called recursively in many circumstances. Whenever it
461 returns a negative (error) response, the outer incarnation must also return the
462 same response. */
463 
464 /* These macros pack up tests that are used for partial matching, and which
465 appear several times in the code. We set the "hit end" flag if the pointer is
466 at the end of the subject and also past the start of the subject (i.e.
467 something has been matched). For hard partial matching, we then return
468 immediately. The second one is used when we already know we are past the end of
469 the subject. */
470 
471 #define CHECK_PARTIAL()\
472   if (md->partial != 0 && eptr >= md->end_subject && \
473       eptr > md->start_used_ptr) \
474     { \
475     md->hitend = TRUE; \
476     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
477     }
478 
479 #define SCHECK_PARTIAL()\
480   if (md->partial != 0 && eptr > md->start_used_ptr) \
481     { \
482     md->hitend = TRUE; \
483     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
484     }
485 
486 
487 /* Performance note: It might be tempting to extract commonly used fields from
488 the md structure (e.g. utf, end_subject) into individual variables to improve
489 performance. Tests using gcc on a SPARC disproved this; in the first case, it
490 made performance worse.
491 
492 Arguments:
493    eptr        pointer to current character in subject
494    ecode       pointer to current position in compiled code
495    mstart      pointer to the current match start position (can be modified
496                  by encountering \K)
497    offset_top  current top pointer
498    md          pointer to "static" info for the match
499    eptrb       pointer to chain of blocks containing eptr at start of
500                  brackets - for testing for empty matches
501    rdepth      the recursion depth
502 
503 Returns:       MATCH_MATCH if matched            )  these values are >= 0
504                MATCH_NOMATCH if failed to match  )
505                a negative MATCH_xxx value for PRUNE, SKIP, etc
506                a negative PCRE_ERROR_xxx value if aborted by an error condition
507                  (e.g. stopped by repeated call or recursion limit)
508 */
509 
510 static int
match(REGISTER PCRE_PUCHAR eptr,REGISTER const pcre_uchar * ecode,PCRE_PUCHAR mstart,int offset_top,match_data * md,eptrblock * eptrb,unsigned int rdepth)511 match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
512   PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
513   unsigned int rdepth)
514 {
515 /* These variables do not need to be preserved over recursion in this function,
516 so they can be ordinary variables in all cases. Mark some of them with
517 "register" because they are used a lot in loops. */
518 
519 register int  rrc;         /* Returns from recursive calls */
520 register int  i;           /* Used for loops not involving calls to RMATCH() */
521 register pcre_uint32 c;    /* Character values not kept over RMATCH() calls */
522 register BOOL utf;         /* Local copy of UTF flag for speed */
523 
524 BOOL minimize, possessive; /* Quantifier options */
525 BOOL caseless;
526 int condcode;
527 
528 /* When recursion is not being used, all "local" variables that have to be
529 preserved over calls to RMATCH() are part of a "frame". We set up the top-level
530 frame on the stack here; subsequent instantiations are obtained from the heap
531 whenever RMATCH() does a "recursion". See the macro definitions above. Putting
532 the top-level on the stack rather than malloc-ing them all gives a performance
533 boost in many cases where there is not much "recursion". */
534 
535 #ifdef NO_RECURSE
536 heapframe *frame = (heapframe *)md->match_frames_base;
537 
538 /* Copy in the original argument variables */
539 
540 frame->Xeptr = eptr;
541 frame->Xecode = ecode;
542 frame->Xmstart = mstart;
543 frame->Xoffset_top = offset_top;
544 frame->Xeptrb = eptrb;
545 frame->Xrdepth = rdepth;
546 
547 /* This is where control jumps back to to effect "recursion" */
548 
549 HEAP_RECURSE:
550 
551 /* Macros make the argument variables come from the current frame */
552 
553 #define eptr               frame->Xeptr
554 #define ecode              frame->Xecode
555 #define mstart             frame->Xmstart
556 #define offset_top         frame->Xoffset_top
557 #define eptrb              frame->Xeptrb
558 #define rdepth             frame->Xrdepth
559 
560 /* Ditto for the local variables */
561 
562 #ifdef SUPPORT_UTF
563 #define charptr            frame->Xcharptr
564 #endif
565 #define callpat            frame->Xcallpat
566 #define codelink           frame->Xcodelink
567 #define data               frame->Xdata
568 #define next               frame->Xnext
569 #define pp                 frame->Xpp
570 #define prev               frame->Xprev
571 #define saved_eptr         frame->Xsaved_eptr
572 
573 #define new_recursive      frame->Xnew_recursive
574 
575 #define cur_is_word        frame->Xcur_is_word
576 #define condition          frame->Xcondition
577 #define prev_is_word       frame->Xprev_is_word
578 
579 #ifdef SUPPORT_UCP
580 #define prop_type          frame->Xprop_type
581 #define prop_value         frame->Xprop_value
582 #define prop_fail_result   frame->Xprop_fail_result
583 #define oclength           frame->Xoclength
584 #define occhars            frame->Xocchars
585 #endif
586 
587 #define ctype              frame->Xctype
588 #define fc                 frame->Xfc
589 #define fi                 frame->Xfi
590 #define length             frame->Xlength
591 #define max                frame->Xmax
592 #define min                frame->Xmin
593 #define number             frame->Xnumber
594 #define offset             frame->Xoffset
595 #define op                 frame->Xop
596 #define save_capture_last  frame->Xsave_capture_last
597 #define save_offset1       frame->Xsave_offset1
598 #define save_offset2       frame->Xsave_offset2
599 #define save_offset3       frame->Xsave_offset3
600 #define stacksave          frame->Xstacksave
601 
602 #define newptrb            frame->Xnewptrb
603 
604 /* When recursion is being used, local variables are allocated on the stack and
605 get preserved during recursion in the normal way. In this environment, fi and
606 i, and fc and c, can be the same variables. */
607 
608 #else         /* NO_RECURSE not defined */
609 #define fi i
610 #define fc c
611 
612 /* Many of the following variables are used only in small blocks of the code.
613 My normal style of coding would have declared them within each of those blocks.
614 However, in order to accommodate the version of this code that uses an external
615 "stack" implemented on the heap, it is easier to declare them all here, so the
616 declarations can be cut out in a block. The only declarations within blocks
617 below are for variables that do not have to be preserved over a recursive call
618 to RMATCH(). */
619 
620 #ifdef SUPPORT_UTF
621 const pcre_uchar *charptr;
622 #endif
623 const pcre_uchar *callpat;
624 const pcre_uchar *data;
625 const pcre_uchar *next;
626 PCRE_PUCHAR       pp;
627 const pcre_uchar *prev;
628 PCRE_PUCHAR       saved_eptr;
629 
630 recursion_info new_recursive;
631 
632 BOOL cur_is_word;
633 BOOL condition;
634 BOOL prev_is_word;
635 
636 #ifdef SUPPORT_UCP
637 int prop_type;
638 unsigned int prop_value;
639 int prop_fail_result;
640 int oclength;
641 pcre_uchar occhars[6];
642 #endif
643 
644 int codelink;
645 int ctype;
646 int length;
647 int max;
648 int min;
649 unsigned int number;
650 int offset;
651 unsigned int op;
652 pcre_int32 save_capture_last;
653 int save_offset1, save_offset2, save_offset3;
654 int stacksave[REC_STACK_SAVE_MAX];
655 
656 eptrblock newptrb;
657 
658 /* There is a special fudge for calling match() in a way that causes it to
659 measure the size of its basic stack frame when the stack is being used for
660 recursion. The second argument (ecode) being NULL triggers this behaviour. It
661 cannot normally ever be NULL. The return is the negated value of the frame
662 size. */
663 
664 if (ecode == NULL)
665   {
666   if (rdepth == 0)
667     return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
668   else
669     {
670     int len = (char *)&rdepth - (char *)eptr;
671     return (len > 0)? -len : len;
672     }
673   }
674 #endif     /* NO_RECURSE */
675 
676 /* To save space on the stack and in the heap frame, I have doubled up on some
677 of the local variables that are used only in localised parts of the code, but
678 still need to be preserved over recursive calls of match(). These macros define
679 the alternative names that are used. */
680 
681 #define allow_zero    cur_is_word
682 #define cbegroup      condition
683 #define code_offset   codelink
684 #define condassert    condition
685 #define matched_once  prev_is_word
686 #define foc           number
687 #define save_mark     data
688 
689 /* These statements are here to stop the compiler complaining about unitialized
690 variables. */
691 
692 #ifdef SUPPORT_UCP
693 prop_value = 0;
694 prop_fail_result = 0;
695 #endif
696 
697 
698 /* This label is used for tail recursion, which is used in a few cases even
699 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
700 used. Thanks to Ian Taylor for noticing this possibility and sending the
701 original patch. */
702 
703 TAIL_RECURSE:
704 
705 /* OK, now we can get on with the real code of the function. Recursive calls
706 are specified by the macro RMATCH and RRETURN is used to return. When
707 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
708 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
709 defined). However, RMATCH isn't like a function call because it's quite a
710 complicated macro. It has to be used in one particular way. This shouldn't,
711 however, impact performance when true recursion is being used. */
712 
713 #ifdef SUPPORT_UTF
714 utf = md->utf;       /* Local copy of the flag */
715 #else
716 utf = FALSE;
717 #endif
718 
719 /* First check that we haven't called match() too many times, or that we
720 haven't exceeded the recursive call limit. */
721 
722 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
723 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
724 
725 /* At the start of a group with an unlimited repeat that may match an empty
726 string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
727 done this way to save having to use another function argument, which would take
728 up space on the stack. See also MATCH_CONDASSERT below.
729 
730 When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
731 such remembered pointers, to be checked when we hit the closing ket, in order
732 to break infinite loops that match no characters. When match() is called in
733 other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
734 NOT be used with tail recursion, because the memory block that is used is on
735 the stack, so a new one may be required for each match(). */
736 
737 if (md->match_function_type == MATCH_CBEGROUP)
738   {
739   newptrb.epb_saved_eptr = eptr;
740   newptrb.epb_prev = eptrb;
741   eptrb = &newptrb;
742   md->match_function_type = 0;
743   }
744 
745 /* Now start processing the opcodes. */
746 
747 for (;;)
748   {
749   minimize = possessive = FALSE;
750   op = *ecode;
751 
752   switch(op)
753     {
754     case OP_MARK:
755     md->nomatch_mark = ecode + 2;
756     md->mark = NULL;    /* In case previously set by assertion */
757     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
758       eptrb, RM55);
759     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
760          md->mark == NULL) md->mark = ecode + 2;
761 
762     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
763     argument, and we must check whether that argument matches this MARK's
764     argument. It is passed back in md->start_match_ptr (an overloading of that
765     variable). If it does match, we reset that variable to the current subject
766     position and return MATCH_SKIP. Otherwise, pass back the return code
767     unaltered. */
768 
769     else if (rrc == MATCH_SKIP_ARG &&
770         STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
771       {
772       md->start_match_ptr = eptr;
773       RRETURN(MATCH_SKIP);
774       }
775     RRETURN(rrc);
776 
777     case OP_FAIL:
778     RRETURN(MATCH_NOMATCH);
779 
780     case OP_COMMIT:
781     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
782       eptrb, RM52);
783     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
784     RRETURN(MATCH_COMMIT);
785 
786     case OP_PRUNE:
787     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
788       eptrb, RM51);
789     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
790     RRETURN(MATCH_PRUNE);
791 
792     case OP_PRUNE_ARG:
793     md->nomatch_mark = ecode + 2;
794     md->mark = NULL;    /* In case previously set by assertion */
795     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
796       eptrb, RM56);
797     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
798          md->mark == NULL) md->mark = ecode + 2;
799     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
800     RRETURN(MATCH_PRUNE);
801 
802     case OP_SKIP:
803     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
804       eptrb, RM53);
805     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
806     md->start_match_ptr = eptr;   /* Pass back current position */
807     RRETURN(MATCH_SKIP);
808 
809     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
810     nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
811     not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
812     that failed and any that precede it (either they also failed, or were not
813     triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
814     SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
815     set to the count of the one that failed. */
816 
817     case OP_SKIP_ARG:
818     md->skip_arg_count++;
819     if (md->skip_arg_count <= md->ignore_skip_arg)
820       {
821       ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
822       break;
823       }
824     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
825       eptrb, RM57);
826     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
827 
828     /* Pass back the current skip name by overloading md->start_match_ptr and
829     returning the special MATCH_SKIP_ARG return code. This will either be
830     caught by a matching MARK, or get to the top, where it causes a rematch
831     with md->ignore_skip_arg set to the value of md->skip_arg_count. */
832 
833     md->start_match_ptr = ecode + 2;
834     RRETURN(MATCH_SKIP_ARG);
835 
836     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
837     the branch in which it occurs can be determined. Overload the start of
838     match pointer to do this. */
839 
840     case OP_THEN:
841     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
842       eptrb, RM54);
843     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
844     md->start_match_ptr = ecode;
845     RRETURN(MATCH_THEN);
846 
847     case OP_THEN_ARG:
848     md->nomatch_mark = ecode + 2;
849     md->mark = NULL;    /* In case previously set by assertion */
850     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
851       md, eptrb, RM58);
852     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
853          md->mark == NULL) md->mark = ecode + 2;
854     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
855     md->start_match_ptr = ecode;
856     RRETURN(MATCH_THEN);
857 
858     /* Handle an atomic group that does not contain any capturing parentheses.
859     This can be handled like an assertion. Prior to 8.13, all atomic groups
860     were handled this way. In 8.13, the code was changed as below for ONCE, so
861     that backups pass through the group and thereby reset captured values.
862     However, this uses a lot more stack, so in 8.20, atomic groups that do not
863     contain any captures generate OP_ONCE_NC, which can be handled in the old,
864     less stack intensive way.
865 
866     Check the alternative branches in turn - the matching won't pass the KET
867     for this kind of subpattern. If any one branch matches, we carry on as at
868     the end of a normal bracket, leaving the subject pointer, but resetting
869     the start-of-match value in case it was changed by \K. */
870 
871     case OP_ONCE_NC:
872     prev = ecode;
873     saved_eptr = eptr;
874     save_mark = md->mark;
875     do
876       {
877       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
878       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
879         {
880         mstart = md->start_match_ptr;
881         break;
882         }
883       if (rrc == MATCH_THEN)
884         {
885         next = ecode + GET(ecode,1);
886         if (md->start_match_ptr < next &&
887             (*ecode == OP_ALT || *next == OP_ALT))
888           rrc = MATCH_NOMATCH;
889         }
890 
891       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
892       ecode += GET(ecode,1);
893       md->mark = save_mark;
894       }
895     while (*ecode == OP_ALT);
896 
897     /* If hit the end of the group (which could be repeated), fail */
898 
899     if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
900 
901     /* Continue as from after the group, updating the offsets high water
902     mark, since extracts may have been taken. */
903 
904     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
905 
906     offset_top = md->end_offset_top;
907     eptr = md->end_match_ptr;
908 
909     /* For a non-repeating ket, just continue at this level. This also
910     happens for a repeating ket if no characters were matched in the group.
911     This is the forcible breaking of infinite loops as implemented in Perl
912     5.005. */
913 
914     if (*ecode == OP_KET || eptr == saved_eptr)
915       {
916       ecode += 1+LINK_SIZE;
917       break;
918       }
919 
920     /* The repeating kets try the rest of the pattern or restart from the
921     preceding bracket, in the appropriate order. The second "call" of match()
922     uses tail recursion, to avoid using another stack frame. */
923 
924     if (*ecode == OP_KETRMIN)
925       {
926       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
927       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
928       ecode = prev;
929       goto TAIL_RECURSE;
930       }
931     else  /* OP_KETRMAX */
932       {
933       RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
934       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
935       ecode += 1 + LINK_SIZE;
936       goto TAIL_RECURSE;
937       }
938     /* Control never gets here */
939 
940     /* Handle a capturing bracket, other than those that are possessive with an
941     unlimited repeat. If there is space in the offset vector, save the current
942     subject position in the working slot at the top of the vector. We mustn't
943     change the current values of the data slot, because they may be set from a
944     previous iteration of this group, and be referred to by a reference inside
945     the group. A failure to match might occur after the group has succeeded,
946     if something later on doesn't match. For this reason, we need to restore
947     the working value and also the values of the final offsets, in case they
948     were set by a previous iteration of the same bracket.
949 
950     If there isn't enough space in the offset vector, treat this as if it were
951     a non-capturing bracket. Don't worry about setting the flag for the error
952     case here; that is handled in the code for KET. */
953 
954     case OP_CBRA:
955     case OP_SCBRA:
956     number = GET2(ecode, 1+LINK_SIZE);
957     offset = number << 1;
958 
959 #ifdef PCRE_DEBUG
960     printf("start bracket %d\n", number);
961     printf("subject=");
962     pchars(eptr, 16, TRUE, md);
963     printf("\n");
964 #endif
965 
966     if (offset < md->offset_max)
967       {
968       save_offset1 = md->offset_vector[offset];
969       save_offset2 = md->offset_vector[offset+1];
970       save_offset3 = md->offset_vector[md->offset_end - number];
971       save_capture_last = md->capture_last;
972       save_mark = md->mark;
973 
974       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
975       md->offset_vector[md->offset_end - number] =
976         (int)(eptr - md->start_subject);
977 
978       for (;;)
979         {
980         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
981         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
982           eptrb, RM1);
983         if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
984 
985         /* If we backed up to a THEN, check whether it is within the current
986         branch by comparing the address of the THEN that is passed back with
987         the end of the branch. If it is within the current branch, and the
988         branch is one of two or more alternatives (it either starts or ends
989         with OP_ALT), we have reached the limit of THEN's action, so convert
990         the return code to NOMATCH, which will cause normal backtracking to
991         happen from now on. Otherwise, THEN is passed back to an outer
992         alternative. This implements Perl's treatment of parenthesized groups,
993         where a group not containing | does not affect the current alternative,
994         that is, (X) is NOT the same as (X|(*F)). */
995 
996         if (rrc == MATCH_THEN)
997           {
998           next = ecode + GET(ecode,1);
999           if (md->start_match_ptr < next &&
1000               (*ecode == OP_ALT || *next == OP_ALT))
1001             rrc = MATCH_NOMATCH;
1002           }
1003 
1004         /* Anything other than NOMATCH is passed back. */
1005 
1006         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1007         md->capture_last = save_capture_last;
1008         ecode += GET(ecode, 1);
1009         md->mark = save_mark;
1010         if (*ecode != OP_ALT) break;
1011         }
1012 
1013       DPRINTF(("bracket %d failed\n", number));
1014       md->offset_vector[offset] = save_offset1;
1015       md->offset_vector[offset+1] = save_offset2;
1016       md->offset_vector[md->offset_end - number] = save_offset3;
1017 
1018       /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
1019 
1020       RRETURN(rrc);
1021       }
1022 
1023     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1024     as a non-capturing bracket. */
1025 
1026     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1027     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1028 
1029     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1030 
1031     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1032     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1033 
1034     /* Non-capturing or atomic group, except for possessive with unlimited
1035     repeat and ONCE group with no captures. Loop for all the alternatives.
1036 
1037     When we get to the final alternative within the brackets, we used to return
1038     the result of a recursive call to match() whatever happened so it was
1039     possible to reduce stack usage by turning this into a tail recursion,
1040     except in the case of a possibly empty group. However, now that there is
1041     the possiblity of (*THEN) occurring in the final alternative, this
1042     optimization is no longer always possible.
1043 
1044     We can optimize if we know there are no (*THEN)s in the pattern; at present
1045     this is the best that can be done.
1046 
1047     MATCH_ONCE is returned when the end of an atomic group is successfully
1048     reached, but subsequent matching fails. It passes back up the tree (causing
1049     captured values to be reset) until the original atomic group level is
1050     reached. This is tested by comparing md->once_target with the start of the
1051     group. At this point, the return is converted into MATCH_NOMATCH so that
1052     previous backup points can be taken. */
1053 
1054     case OP_ONCE:
1055     case OP_BRA:
1056     case OP_SBRA:
1057     DPRINTF(("start non-capturing bracket\n"));
1058 
1059     for (;;)
1060       {
1061       if (op >= OP_SBRA || op == OP_ONCE)
1062         md->match_function_type = MATCH_CBEGROUP;
1063 
1064       /* If this is not a possibly empty group, and there are no (*THEN)s in
1065       the pattern, and this is the final alternative, optimize as described
1066       above. */
1067 
1068       else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1069         {
1070         ecode += PRIV(OP_lengths)[*ecode];
1071         goto TAIL_RECURSE;
1072         }
1073 
1074       /* In all other cases, we have to make another call to match(). */
1075 
1076       save_mark = md->mark;
1077       save_capture_last = md->capture_last;
1078       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1079         RM2);
1080 
1081       /* See comment in the code for capturing groups above about handling
1082       THEN. */
1083 
1084       if (rrc == MATCH_THEN)
1085         {
1086         next = ecode + GET(ecode,1);
1087         if (md->start_match_ptr < next &&
1088             (*ecode == OP_ALT || *next == OP_ALT))
1089           rrc = MATCH_NOMATCH;
1090         }
1091 
1092       if (rrc != MATCH_NOMATCH)
1093         {
1094         if (rrc == MATCH_ONCE)
1095           {
1096           const pcre_uchar *scode = ecode;
1097           if (*scode != OP_ONCE)           /* If not at start, find it */
1098             {
1099             while (*scode == OP_ALT) scode += GET(scode, 1);
1100             scode -= GET(scode, 1);
1101             }
1102           if (md->once_target == scode) rrc = MATCH_NOMATCH;
1103           }
1104         RRETURN(rrc);
1105         }
1106       ecode += GET(ecode, 1);
1107       md->mark = save_mark;
1108       if (*ecode != OP_ALT) break;
1109       md->capture_last = save_capture_last;
1110       }
1111 
1112     RRETURN(MATCH_NOMATCH);
1113 
1114     /* Handle possessive capturing brackets with an unlimited repeat. We come
1115     here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1116     handled similarly to the normal case above. However, the matching is
1117     different. The end of these brackets will always be OP_KETRPOS, which
1118     returns MATCH_KETRPOS without going further in the pattern. By this means
1119     we can handle the group by iteration rather than recursion, thereby
1120     reducing the amount of stack needed. */
1121 
1122     case OP_CBRAPOS:
1123     case OP_SCBRAPOS:
1124     allow_zero = FALSE;
1125 
1126     POSSESSIVE_CAPTURE:
1127     number = GET2(ecode, 1+LINK_SIZE);
1128     offset = number << 1;
1129 
1130 #ifdef PCRE_DEBUG
1131     printf("start possessive bracket %d\n", number);
1132     printf("subject=");
1133     pchars(eptr, 16, TRUE, md);
1134     printf("\n");
1135 #endif
1136 
1137     if (offset >= md->offset_max) goto POSSESSIVE_NON_CAPTURE;
1138 
1139     matched_once = FALSE;
1140     code_offset = (int)(ecode - md->start_code);
1141 
1142     save_offset1 = md->offset_vector[offset];
1143     save_offset2 = md->offset_vector[offset+1];
1144     save_offset3 = md->offset_vector[md->offset_end - number];
1145     save_capture_last = md->capture_last;
1146 
1147     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1148 
1149     /* Each time round the loop, save the current subject position for use
1150     when the group matches. For MATCH_MATCH, the group has matched, so we
1151     restart it with a new subject starting position, remembering that we had
1152     at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1153     usual. If we haven't matched any alternatives in any iteration, check to
1154     see if a previous iteration matched. If so, the group has matched;
1155     continue from afterwards. Otherwise it has failed; restore the previous
1156     capture values before returning NOMATCH. */
1157 
1158     for (;;)
1159       {
1160       md->offset_vector[md->offset_end - number] =
1161         (int)(eptr - md->start_subject);
1162       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1163       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1164         eptrb, RM63);
1165       if (rrc == MATCH_KETRPOS)
1166         {
1167         offset_top = md->end_offset_top;
1168         ecode = md->start_code + code_offset;
1169         save_capture_last = md->capture_last;
1170         matched_once = TRUE;
1171         mstart = md->start_match_ptr;    /* In case \K changed it */
1172         if (eptr == md->end_match_ptr)   /* Matched an empty string */
1173           {
1174           do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1175           break;
1176           }
1177         eptr = md->end_match_ptr;
1178         continue;
1179         }
1180 
1181       /* See comment in the code for capturing groups above about handling
1182       THEN. */
1183 
1184       if (rrc == MATCH_THEN)
1185         {
1186         next = ecode + GET(ecode,1);
1187         if (md->start_match_ptr < next &&
1188             (*ecode == OP_ALT || *next == OP_ALT))
1189           rrc = MATCH_NOMATCH;
1190         }
1191 
1192       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1193       md->capture_last = save_capture_last;
1194       ecode += GET(ecode, 1);
1195       if (*ecode != OP_ALT) break;
1196       }
1197 
1198     if (!matched_once)
1199       {
1200       md->offset_vector[offset] = save_offset1;
1201       md->offset_vector[offset+1] = save_offset2;
1202       md->offset_vector[md->offset_end - number] = save_offset3;
1203       }
1204 
1205     if (allow_zero || matched_once)
1206       {
1207       ecode += 1 + LINK_SIZE;
1208       break;
1209       }
1210 
1211     RRETURN(MATCH_NOMATCH);
1212 
1213     /* Non-capturing possessive bracket with unlimited repeat. We come here
1214     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1215     without the capturing complication. It is written out separately for speed
1216     and cleanliness. */
1217 
1218     case OP_BRAPOS:
1219     case OP_SBRAPOS:
1220     allow_zero = FALSE;
1221 
1222     POSSESSIVE_NON_CAPTURE:
1223     matched_once = FALSE;
1224     code_offset = (int)(ecode - md->start_code);
1225     save_capture_last = md->capture_last;
1226 
1227     for (;;)
1228       {
1229       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1230       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1231         eptrb, RM48);
1232       if (rrc == MATCH_KETRPOS)
1233         {
1234         offset_top = md->end_offset_top;
1235         ecode = md->start_code + code_offset;
1236         matched_once = TRUE;
1237         mstart = md->start_match_ptr;   /* In case \K reset it */
1238         if (eptr == md->end_match_ptr)  /* Matched an empty string */
1239           {
1240           do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1241           break;
1242           }
1243         eptr = md->end_match_ptr;
1244         continue;
1245         }
1246 
1247       /* See comment in the code for capturing groups above about handling
1248       THEN. */
1249 
1250       if (rrc == MATCH_THEN)
1251         {
1252         next = ecode + GET(ecode,1);
1253         if (md->start_match_ptr < next &&
1254             (*ecode == OP_ALT || *next == OP_ALT))
1255           rrc = MATCH_NOMATCH;
1256         }
1257 
1258       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1259       ecode += GET(ecode, 1);
1260       if (*ecode != OP_ALT) break;
1261       md->capture_last = save_capture_last;
1262       }
1263 
1264     if (matched_once || allow_zero)
1265       {
1266       ecode += 1 + LINK_SIZE;
1267       break;
1268       }
1269     RRETURN(MATCH_NOMATCH);
1270 
1271     /* Control never reaches here. */
1272 
1273     /* Conditional group: compilation checked that there are no more than two
1274     branches. If the condition is false, skipping the first branch takes us
1275     past the end of the item if there is only one branch, but that's exactly
1276     what we want. */
1277 
1278     case OP_COND:
1279     case OP_SCOND:
1280 
1281     /* The variable codelink will be added to ecode when the condition is
1282     false, to get to the second branch. Setting it to the offset to the ALT
1283     or KET, then incrementing ecode achieves this effect. We now have ecode
1284     pointing to the condition or callout. */
1285 
1286     codelink = GET(ecode, 1);   /* Offset to the second branch */
1287     ecode += 1 + LINK_SIZE;     /* From this opcode */
1288 
1289     /* Because of the way auto-callout works during compile, a callout item is
1290     inserted between OP_COND and an assertion condition. */
1291 
1292     if (*ecode == OP_CALLOUT)
1293       {
1294       if (PUBL(callout) != NULL)
1295         {
1296         PUBL(callout_block) cb;
1297         cb.version          = 2;   /* Version 1 of the callout block */
1298         cb.callout_number   = ecode[1];
1299         cb.offset_vector    = md->offset_vector;
1300 #if defined COMPILE_PCRE8
1301         cb.subject          = (PCRE_SPTR)md->start_subject;
1302 #elif defined COMPILE_PCRE16
1303         cb.subject          = (PCRE_SPTR16)md->start_subject;
1304 #elif defined COMPILE_PCRE32
1305         cb.subject          = (PCRE_SPTR32)md->start_subject;
1306 #endif
1307         cb.subject_length   = (int)(md->end_subject - md->start_subject);
1308         cb.start_match      = (int)(mstart - md->start_subject);
1309         cb.current_position = (int)(eptr - md->start_subject);
1310         cb.pattern_position = GET(ecode, 2);
1311         cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1312         cb.capture_top      = offset_top/2;
1313         cb.capture_last     = md->capture_last & CAPLMASK;
1314         /* Internal change requires this for API compatibility. */
1315         if (cb.capture_last == 0) cb.capture_last = -1;
1316         cb.callout_data     = md->callout_data;
1317         cb.mark             = md->nomatch_mark;
1318         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1319         if (rrc < 0) RRETURN(rrc);
1320         }
1321 
1322       /* Advance ecode past the callout, so it now points to the condition. We
1323       must adjust codelink so that the value of ecode+codelink is unchanged. */
1324 
1325       ecode += PRIV(OP_lengths)[OP_CALLOUT];
1326       codelink -= PRIV(OP_lengths)[OP_CALLOUT];
1327       }
1328 
1329     /* Test the various possible conditions */
1330 
1331     condition = FALSE;
1332     switch(condcode = *ecode)
1333       {
1334       case OP_RREF:         /* Numbered group recursion test */
1335       if (md->recursive != NULL)     /* Not recursing => FALSE */
1336         {
1337         unsigned int recno = GET2(ecode, 1);   /* Recursion group number*/
1338         condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1339         }
1340       break;
1341 
1342       case OP_DNRREF:       /* Duplicate named group recursion test */
1343       if (md->recursive != NULL)
1344         {
1345         int count = GET2(ecode, 1 + IMM2_SIZE);
1346         pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
1347         while (count-- > 0)
1348           {
1349           unsigned int recno = GET2(slot, 0);
1350           condition = recno == md->recursive->group_num;
1351           if (condition) break;
1352           slot += md->name_entry_size;
1353           }
1354         }
1355       break;
1356 
1357       case OP_CREF:         /* Numbered group used test */
1358       offset = GET2(ecode, 1) << 1;  /* Doubled ref number */
1359       condition = offset < offset_top && md->offset_vector[offset] >= 0;
1360       break;
1361 
1362       case OP_DNCREF:      /* Duplicate named group used test */
1363         {
1364         int count = GET2(ecode, 1 + IMM2_SIZE);
1365         pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
1366         while (count-- > 0)
1367           {
1368           offset = GET2(slot, 0) << 1;
1369           condition = offset < offset_top && md->offset_vector[offset] >= 0;
1370           if (condition) break;
1371           slot += md->name_entry_size;
1372           }
1373         }
1374       break;
1375 
1376       case OP_DEF:     /* DEFINE - always false */
1377       case OP_FAIL:    /* From optimized (?!) condition */
1378       break;
1379 
1380       /* The condition is an assertion. Call match() to evaluate it - setting
1381       md->match_function_type to MATCH_CONDASSERT causes it to stop at the end
1382       of an assertion. */
1383 
1384       default:
1385       md->match_function_type = MATCH_CONDASSERT;
1386       RMATCH(eptr, ecode, offset_top, md, NULL, RM3);
1387       if (rrc == MATCH_MATCH)
1388         {
1389         if (md->end_offset_top > offset_top)
1390           offset_top = md->end_offset_top;  /* Captures may have happened */
1391         condition = TRUE;
1392 
1393         /* Advance ecode past the assertion to the start of the first branch,
1394         but adjust it so that the general choosing code below works. If the
1395         assertion has a quantifier that allows zero repeats we must skip over
1396         the BRAZERO. This is a lunatic thing to do, but somebody did! */
1397 
1398         if (*ecode == OP_BRAZERO) ecode++;
1399         ecode += GET(ecode, 1);
1400         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1401         ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
1402         }
1403 
1404       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1405       assertion; it is therefore treated as NOMATCH. Any other return is an
1406       error. */
1407 
1408       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1409         {
1410         RRETURN(rrc);         /* Need braces because of following else */
1411         }
1412       break;
1413       }
1414 
1415     /* Choose branch according to the condition */
1416 
1417     ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
1418 
1419     /* We are now at the branch that is to be obeyed. As there is only one, we
1420     can use tail recursion to avoid using another stack frame, except when
1421     there is unlimited repeat of a possibly empty group. In the latter case, a
1422     recursive call to match() is always required, unless the second alternative
1423     doesn't exist, in which case we can just plough on. Note that, for
1424     compatibility with Perl, the | in a conditional group is NOT treated as
1425     creating two alternatives. If a THEN is encountered in the branch, it
1426     propagates out to the enclosing alternative (unless nested in a deeper set
1427     of alternatives, of course). */
1428 
1429     if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
1430       {
1431       if (op != OP_SCOND)
1432         {
1433         goto TAIL_RECURSE;
1434         }
1435 
1436       md->match_function_type = MATCH_CBEGROUP;
1437       RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);
1438       RRETURN(rrc);
1439       }
1440 
1441      /* Condition false & no alternative; continue after the group. */
1442 
1443     else
1444       {
1445       }
1446     break;
1447 
1448 
1449     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1450     to close any currently open capturing brackets. */
1451 
1452     case OP_CLOSE:
1453     number = GET2(ecode, 1);   /* Must be less than 65536 */
1454     offset = number << 1;
1455 
1456 #ifdef PCRE_DEBUG
1457       printf("end bracket %d at *ACCEPT", number);
1458       printf("\n");
1459 #endif
1460 
1461     md->capture_last = (md->capture_last & OVFLMASK) | number;
1462     if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1463       {
1464       md->offset_vector[offset] =
1465         md->offset_vector[md->offset_end - number];
1466       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1467 
1468       /* If this group is at or above the current highwater mark, ensure that
1469       any groups between the current high water mark and this group are marked
1470       unset and then update the high water mark. */
1471 
1472       if (offset >= offset_top)
1473         {
1474         register int *iptr = md->offset_vector + offset_top;
1475         register int *iend = md->offset_vector + offset;
1476         while (iptr < iend) *iptr++ = -1;
1477         offset_top = offset + 2;
1478         }
1479       }
1480     ecode += 1 + IMM2_SIZE;
1481     break;
1482 
1483 
1484     /* End of the pattern, either real or forced. */
1485 
1486     case OP_END:
1487     case OP_ACCEPT:
1488     case OP_ASSERT_ACCEPT:
1489 
1490     /* If we have matched an empty string, fail if not in an assertion and not
1491     in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1492     is set and we have matched at the start of the subject. In both cases,
1493     backtracking will then try other alternatives, if any. */
1494 
1495     if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1496          md->recursive == NULL &&
1497          (md->notempty ||
1498            (md->notempty_atstart &&
1499              mstart == md->start_subject + md->start_offset)))
1500       RRETURN(MATCH_NOMATCH);
1501 
1502     /* Otherwise, we have a match. */
1503 
1504     md->end_match_ptr = eptr;           /* Record where we ended */
1505     md->end_offset_top = offset_top;    /* and how many extracts were taken */
1506     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1507 
1508     /* For some reason, the macros don't work properly if an expression is
1509     given as the argument to RRETURN when the heap is in use. */
1510 
1511     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1512     RRETURN(rrc);
1513 
1514     /* Assertion brackets. Check the alternative branches in turn - the
1515     matching won't pass the KET for an assertion. If any one branch matches,
1516     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1517     start of each branch to move the current point backwards, so the code at
1518     this level is identical to the lookahead case. When the assertion is part
1519     of a condition, we want to return immediately afterwards. The caller of
1520     this incarnation of the match() function will have set MATCH_CONDASSERT in
1521     md->match_function type, and one of these opcodes will be the first opcode
1522     that is processed. We use a local variable that is preserved over calls to
1523     match() to remember this case. */
1524 
1525     case OP_ASSERT:
1526     case OP_ASSERTBACK:
1527     save_mark = md->mark;
1528     if (md->match_function_type == MATCH_CONDASSERT)
1529       {
1530       condassert = TRUE;
1531       md->match_function_type = 0;
1532       }
1533     else condassert = FALSE;
1534 
1535     /* Loop for each branch */
1536 
1537     do
1538       {
1539       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1540 
1541       /* A match means that the assertion is true; break out of the loop
1542       that matches its alternatives. */
1543 
1544       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1545         {
1546         mstart = md->start_match_ptr;   /* In case \K reset it */
1547         break;
1548         }
1549 
1550       /* If not matched, restore the previous mark setting. */
1551 
1552       md->mark = save_mark;
1553 
1554       /* See comment in the code for capturing groups above about handling
1555       THEN. */
1556 
1557       if (rrc == MATCH_THEN)
1558         {
1559         next = ecode + GET(ecode,1);
1560         if (md->start_match_ptr < next &&
1561             (*ecode == OP_ALT || *next == OP_ALT))
1562           rrc = MATCH_NOMATCH;
1563         }
1564 
1565       /* Anything other than NOMATCH causes the entire assertion to fail,
1566       passing back the return code. This includes COMMIT, SKIP, PRUNE and an
1567       uncaptured THEN, which means they take their normal effect. This
1568       consistent approach does not always have exactly the same effect as in
1569       Perl. */
1570 
1571       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1572       ecode += GET(ecode, 1);
1573       }
1574     while (*ecode == OP_ALT);   /* Continue for next alternative */
1575 
1576     /* If we have tried all the alternative branches, the assertion has
1577     failed. If not, we broke out after a match. */
1578 
1579     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1580 
1581     /* If checking an assertion for a condition, return MATCH_MATCH. */
1582 
1583     if (condassert) RRETURN(MATCH_MATCH);
1584 
1585     /* Continue from after a successful assertion, updating the offsets high
1586     water mark, since extracts may have been taken during the assertion. */
1587 
1588     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1589     ecode += 1 + LINK_SIZE;
1590     offset_top = md->end_offset_top;
1591     continue;
1592 
1593     /* Negative assertion: all branches must fail to match for the assertion to
1594     succeed. */
1595 
1596     case OP_ASSERT_NOT:
1597     case OP_ASSERTBACK_NOT:
1598     save_mark = md->mark;
1599     if (md->match_function_type == MATCH_CONDASSERT)
1600       {
1601       condassert = TRUE;
1602       md->match_function_type = 0;
1603       }
1604     else condassert = FALSE;
1605 
1606     /* Loop for each alternative branch. */
1607 
1608     do
1609       {
1610       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1611       md->mark = save_mark;   /* Always restore the mark setting */
1612 
1613       switch(rrc)
1614         {
1615         case MATCH_MATCH:            /* A successful match means */
1616         case MATCH_ACCEPT:           /* the assertion has failed. */
1617         RRETURN(MATCH_NOMATCH);
1618 
1619         case MATCH_NOMATCH:          /* Carry on with next branch */
1620         break;
1621 
1622         /* See comment in the code for capturing groups above about handling
1623         THEN. */
1624 
1625         case MATCH_THEN:
1626         next = ecode + GET(ecode,1);
1627         if (md->start_match_ptr < next &&
1628             (*ecode == OP_ALT || *next == OP_ALT))
1629           {
1630           rrc = MATCH_NOMATCH;
1631           break;
1632           }
1633         /* Otherwise fall through. */
1634 
1635         /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
1636         assertion to fail to match, without considering any more alternatives.
1637         Failing to match means the assertion is true. This is a consistent
1638         approach, but does not always have the same effect as in Perl. */
1639 
1640         case MATCH_COMMIT:
1641         case MATCH_SKIP:
1642         case MATCH_SKIP_ARG:
1643         case MATCH_PRUNE:
1644         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1645         goto NEG_ASSERT_TRUE;   /* Break out of alternation loop */
1646 
1647         /* Anything else is an error */
1648 
1649         default:
1650         RRETURN(rrc);
1651         }
1652 
1653       /* Continue with next branch */
1654 
1655       ecode += GET(ecode,1);
1656       }
1657     while (*ecode == OP_ALT);
1658 
1659     /* All branches in the assertion failed to match. */
1660 
1661     NEG_ASSERT_TRUE:
1662     if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1663     ecode += 1 + LINK_SIZE;                /* Continue with current branch */
1664     continue;
1665 
1666     /* Move the subject pointer back. This occurs only at the start of
1667     each branch of a lookbehind assertion. If we are too close to the start to
1668     move back, this match function fails. When working with UTF-8 we move
1669     back a number of characters, not bytes. */
1670 
1671     case OP_REVERSE:
1672 #ifdef SUPPORT_UTF
1673     if (utf)
1674       {
1675       i = GET(ecode, 1);
1676       while (i-- > 0)
1677         {
1678         eptr--;
1679         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1680         BACKCHAR(eptr);
1681         }
1682       }
1683     else
1684 #endif
1685 
1686     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1687 
1688       {
1689       eptr -= GET(ecode, 1);
1690       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1691       }
1692 
1693     /* Save the earliest consulted character, then skip to next op code */
1694 
1695     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1696     ecode += 1 + LINK_SIZE;
1697     break;
1698 
1699     /* The callout item calls an external function, if one is provided, passing
1700     details of the match so far. This is mainly for debugging, though the
1701     function is able to force a failure. */
1702 
1703     case OP_CALLOUT:
1704     if (PUBL(callout) != NULL)
1705       {
1706       PUBL(callout_block) cb;
1707       cb.version          = 2;   /* Version 1 of the callout block */
1708       cb.callout_number   = ecode[1];
1709       cb.offset_vector    = md->offset_vector;
1710 #if defined COMPILE_PCRE8
1711       cb.subject          = (PCRE_SPTR)md->start_subject;
1712 #elif defined COMPILE_PCRE16
1713       cb.subject          = (PCRE_SPTR16)md->start_subject;
1714 #elif defined COMPILE_PCRE32
1715       cb.subject          = (PCRE_SPTR32)md->start_subject;
1716 #endif
1717       cb.subject_length   = (int)(md->end_subject - md->start_subject);
1718       cb.start_match      = (int)(mstart - md->start_subject);
1719       cb.current_position = (int)(eptr - md->start_subject);
1720       cb.pattern_position = GET(ecode, 2);
1721       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1722       cb.capture_top      = offset_top/2;
1723       cb.capture_last     = md->capture_last & CAPLMASK;
1724       /* Internal change requires this for API compatibility. */
1725       if (cb.capture_last == 0) cb.capture_last = -1;
1726       cb.callout_data     = md->callout_data;
1727       cb.mark             = md->nomatch_mark;
1728       if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1729       if (rrc < 0) RRETURN(rrc);
1730       }
1731     ecode += 2 + 2*LINK_SIZE;
1732     break;
1733 
1734     /* Recursion either matches the current regex, or some subexpression. The
1735     offset data is the offset to the starting bracket from the start of the
1736     whole pattern. (This is so that it works from duplicated subpatterns.)
1737 
1738     The state of the capturing groups is preserved over recursion, and
1739     re-instated afterwards. We don't know how many are started and not yet
1740     finished (offset_top records the completed total) so we just have to save
1741     all the potential data. There may be up to 65535 such values, which is too
1742     large to put on the stack, but using malloc for small numbers seems
1743     expensive. As a compromise, the stack is used when there are no more than
1744     REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1745 
1746     There are also other values that have to be saved. We use a chained
1747     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1748     for the original version of this logic. It has, however, been hacked around
1749     a lot, so he is not to blame for the current way it works. */
1750 
1751     case OP_RECURSE:
1752       {
1753       recursion_info *ri;
1754       unsigned int recno;
1755 
1756       callpat = md->start_code + GET(ecode, 1);
1757       recno = (callpat == md->start_code)? 0 :
1758         GET2(callpat, 1 + LINK_SIZE);
1759 
1760       /* Check for repeating a recursion without advancing the subject pointer.
1761       This should catch convoluted mutual recursions. (Some simple cases are
1762       caught at compile time.) */
1763 
1764       for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1765         if (recno == ri->group_num && eptr == ri->subject_position)
1766           RRETURN(PCRE_ERROR_RECURSELOOP);
1767 
1768       /* Add to "recursing stack" */
1769 
1770       new_recursive.group_num = recno;
1771       new_recursive.saved_capture_last = md->capture_last;
1772       new_recursive.subject_position = eptr;
1773       new_recursive.prevrec = md->recursive;
1774       md->recursive = &new_recursive;
1775 
1776       /* Where to continue from afterwards */
1777 
1778       ecode += 1 + LINK_SIZE;
1779 
1780       /* Now save the offset data */
1781 
1782       new_recursive.saved_max = md->offset_end;
1783       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1784         new_recursive.offset_save = stacksave;
1785       else
1786         {
1787         new_recursive.offset_save =
1788           (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1789         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1790         }
1791       memcpy(new_recursive.offset_save, md->offset_vector,
1792             new_recursive.saved_max * sizeof(int));
1793 
1794       /* OK, now we can do the recursion. After processing each alternative,
1795       restore the offset data and the last captured value. If there were nested
1796       recursions, md->recursive might be changed, so reset it before looping.
1797       */
1798 
1799       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1800       cbegroup = (*callpat >= OP_SBRA);
1801       do
1802         {
1803         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1804         RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1805           md, eptrb, RM6);
1806         memcpy(md->offset_vector, new_recursive.offset_save,
1807             new_recursive.saved_max * sizeof(int));
1808         md->capture_last = new_recursive.saved_capture_last;
1809         md->recursive = new_recursive.prevrec;
1810         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1811           {
1812           DPRINTF(("Recursion matched\n"));
1813           if (new_recursive.offset_save != stacksave)
1814             (PUBL(free))(new_recursive.offset_save);
1815 
1816           /* Set where we got to in the subject, and reset the start in case
1817           it was changed by \K. This *is* propagated back out of a recursion,
1818           for Perl compatibility. */
1819 
1820           eptr = md->end_match_ptr;
1821           mstart = md->start_match_ptr;
1822           goto RECURSION_MATCHED;        /* Exit loop; end processing */
1823           }
1824 
1825         /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
1826         recursion; they cause a NOMATCH for the entire recursion. These codes
1827         are defined in a range that can be tested for. */
1828 
1829         if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
1830           {
1831           if (new_recursive.offset_save != stacksave)
1832             (PUBL(free))(new_recursive.offset_save);
1833           RRETURN(MATCH_NOMATCH);
1834           }
1835 
1836         /* Any return code other than NOMATCH is an error. */
1837 
1838         if (rrc != MATCH_NOMATCH)
1839           {
1840           DPRINTF(("Recursion gave error %d\n", rrc));
1841           if (new_recursive.offset_save != stacksave)
1842             (PUBL(free))(new_recursive.offset_save);
1843           RRETURN(rrc);
1844           }
1845 
1846         md->recursive = &new_recursive;
1847         callpat += GET(callpat, 1);
1848         }
1849       while (*callpat == OP_ALT);
1850 
1851       DPRINTF(("Recursion didn't match\n"));
1852       md->recursive = new_recursive.prevrec;
1853       if (new_recursive.offset_save != stacksave)
1854         (PUBL(free))(new_recursive.offset_save);
1855       RRETURN(MATCH_NOMATCH);
1856       }
1857 
1858     RECURSION_MATCHED:
1859     break;
1860 
1861     /* An alternation is the end of a branch; scan along to find the end of the
1862     bracketed group and go to there. */
1863 
1864     case OP_ALT:
1865     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1866     break;
1867 
1868     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1869     indicating that it may occur zero times. It may repeat infinitely, or not
1870     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1871     with fixed upper repeat limits are compiled as a number of copies, with the
1872     optional ones preceded by BRAZERO or BRAMINZERO. */
1873 
1874     case OP_BRAZERO:
1875     next = ecode + 1;
1876     RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1877     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1878     do next += GET(next, 1); while (*next == OP_ALT);
1879     ecode = next + 1 + LINK_SIZE;
1880     break;
1881 
1882     case OP_BRAMINZERO:
1883     next = ecode + 1;
1884     do next += GET(next, 1); while (*next == OP_ALT);
1885     RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
1886     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1887     ecode++;
1888     break;
1889 
1890     case OP_SKIPZERO:
1891     next = ecode+1;
1892     do next += GET(next,1); while (*next == OP_ALT);
1893     ecode = next + 1 + LINK_SIZE;
1894     break;
1895 
1896     /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1897     here; just jump to the group, with allow_zero set TRUE. */
1898 
1899     case OP_BRAPOSZERO:
1900     op = *(++ecode);
1901     allow_zero = TRUE;
1902     if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1903       goto POSSESSIVE_NON_CAPTURE;
1904 
1905     /* End of a group, repeated or non-repeating. */
1906 
1907     case OP_KET:
1908     case OP_KETRMIN:
1909     case OP_KETRMAX:
1910     case OP_KETRPOS:
1911     prev = ecode - GET(ecode, 1);
1912 
1913     /* If this was a group that remembered the subject start, in order to break
1914     infinite repeats of empty string matches, retrieve the subject start from
1915     the chain. Otherwise, set it NULL. */
1916 
1917     if (*prev >= OP_SBRA || *prev == OP_ONCE)
1918       {
1919       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1920       eptrb = eptrb->epb_prev;              /* Backup to previous group */
1921       }
1922     else saved_eptr = NULL;
1923 
1924     /* If we are at the end of an assertion group or a non-capturing atomic
1925     group, stop matching and return MATCH_MATCH, but record the current high
1926     water mark for use by positive assertions. We also need to record the match
1927     start in case it was changed by \K. */
1928 
1929     if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1930          *prev == OP_ONCE_NC)
1931       {
1932       md->end_match_ptr = eptr;      /* For ONCE_NC */
1933       md->end_offset_top = offset_top;
1934       md->start_match_ptr = mstart;
1935       RRETURN(MATCH_MATCH);         /* Sets md->mark */
1936       }
1937 
1938     /* For capturing groups we have to check the group number back at the start
1939     and if necessary complete handling an extraction by setting the offsets and
1940     bumping the high water mark. Whole-pattern recursion is coded as a recurse
1941     into group 0, so it won't be picked up here. Instead, we catch it when the
1942     OP_END is reached. Other recursion is handled here. We just have to record
1943     the current subject position and start match pointer and give a MATCH
1944     return. */
1945 
1946     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1947         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1948       {
1949       number = GET2(prev, 1+LINK_SIZE);
1950       offset = number << 1;
1951 
1952 #ifdef PCRE_DEBUG
1953       printf("end bracket %d", number);
1954       printf("\n");
1955 #endif
1956 
1957       /* Handle a recursively called group. */
1958 
1959       if (md->recursive != NULL && md->recursive->group_num == number)
1960         {
1961         md->end_match_ptr = eptr;
1962         md->start_match_ptr = mstart;
1963         RRETURN(MATCH_MATCH);
1964         }
1965 
1966       /* Deal with capturing */
1967 
1968       md->capture_last = (md->capture_last & OVFLMASK) | number;
1969       if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1970         {
1971         /* If offset is greater than offset_top, it means that we are
1972         "skipping" a capturing group, and that group's offsets must be marked
1973         unset. In earlier versions of PCRE, all the offsets were unset at the
1974         start of matching, but this doesn't work because atomic groups and
1975         assertions can cause a value to be set that should later be unset.
1976         Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1977         part of the atomic group, but this is not on the final matching path,
1978         so must be unset when 2 is set. (If there is no group 2, there is no
1979         problem, because offset_top will then be 2, indicating no capture.) */
1980 
1981         if (offset > offset_top)
1982           {
1983           register int *iptr = md->offset_vector + offset_top;
1984           register int *iend = md->offset_vector + offset;
1985           while (iptr < iend) *iptr++ = -1;
1986           }
1987 
1988         /* Now make the extraction */
1989 
1990         md->offset_vector[offset] =
1991           md->offset_vector[md->offset_end - number];
1992         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1993         if (offset_top <= offset) offset_top = offset + 2;
1994         }
1995       }
1996 
1997     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
1998     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
1999     at a time from the outer level, thus saving stack. This must precede the
2000     empty string test - in this case that test is done at the outer level. */
2001 
2002     if (*ecode == OP_KETRPOS)
2003       {
2004       md->start_match_ptr = mstart;    /* In case \K reset it */
2005       md->end_match_ptr = eptr;
2006       md->end_offset_top = offset_top;
2007       RRETURN(MATCH_KETRPOS);
2008       }
2009 
2010     /* For an ordinary non-repeating ket, just continue at this level. This
2011     also happens for a repeating ket if no characters were matched in the
2012     group. This is the forcible breaking of infinite loops as implemented in
2013     Perl 5.005. For a non-repeating atomic group that includes captures,
2014     establish a backup point by processing the rest of the pattern at a lower
2015     level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
2016     original OP_ONCE level, thereby bypassing intermediate backup points, but
2017     resetting any captures that happened along the way. */
2018 
2019     if (*ecode == OP_KET || eptr == saved_eptr)
2020       {
2021       if (*prev == OP_ONCE)
2022         {
2023         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
2024         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2025         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2026         RRETURN(MATCH_ONCE);
2027         }
2028       ecode += 1 + LINK_SIZE;    /* Carry on at this level */
2029       break;
2030       }
2031 
2032     /* The normal repeating kets try the rest of the pattern or restart from
2033     the preceding bracket, in the appropriate order. In the second case, we can
2034     use tail recursion to avoid using another stack frame, unless we have an
2035     an atomic group or an unlimited repeat of a group that can match an empty
2036     string. */
2037 
2038     if (*ecode == OP_KETRMIN)
2039       {
2040       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
2041       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2042       if (*prev == OP_ONCE)
2043         {
2044         RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
2045         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2046         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2047         RRETURN(MATCH_ONCE);
2048         }
2049       if (*prev >= OP_SBRA)    /* Could match an empty string */
2050         {
2051         RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2052         RRETURN(rrc);
2053         }
2054       ecode = prev;
2055       goto TAIL_RECURSE;
2056       }
2057     else  /* OP_KETRMAX */
2058       {
2059       RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2060       if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2061       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2062       if (*prev == OP_ONCE)
2063         {
2064         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
2065         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2066         md->once_target = prev;
2067         RRETURN(MATCH_ONCE);
2068         }
2069       ecode += 1 + LINK_SIZE;
2070       goto TAIL_RECURSE;
2071       }
2072     /* Control never gets here */
2073 
2074     /* Not multiline mode: start of subject assertion, unless notbol. */
2075 
2076     case OP_CIRC:
2077     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2078 
2079     /* Start of subject assertion */
2080 
2081     case OP_SOD:
2082     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2083     ecode++;
2084     break;
2085 
2086     /* Multiline mode: start of subject unless notbol, or after any newline. */
2087 
2088     case OP_CIRCM:
2089     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2090     if (eptr != md->start_subject &&
2091         (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2092       RRETURN(MATCH_NOMATCH);
2093     ecode++;
2094     break;
2095 
2096     /* Start of match assertion */
2097 
2098     case OP_SOM:
2099     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2100     ecode++;
2101     break;
2102 
2103     /* Reset the start of match point */
2104 
2105     case OP_SET_SOM:
2106     mstart = eptr;
2107     ecode++;
2108     break;
2109 
2110     /* Multiline mode: assert before any newline, or before end of subject
2111     unless noteol is set. */
2112 
2113     case OP_DOLLM:
2114     if (eptr < md->end_subject)
2115       {
2116       if (!IS_NEWLINE(eptr))
2117         {
2118         if (md->partial != 0 &&
2119             eptr + 1 >= md->end_subject &&
2120             NLBLOCK->nltype == NLTYPE_FIXED &&
2121             NLBLOCK->nllen == 2 &&
2122             UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2123           {
2124           md->hitend = TRUE;
2125           if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2126           }
2127         RRETURN(MATCH_NOMATCH);
2128         }
2129       }
2130     else
2131       {
2132       if (md->noteol) RRETURN(MATCH_NOMATCH);
2133       SCHECK_PARTIAL();
2134       }
2135     ecode++;
2136     break;
2137 
2138     /* Not multiline mode: assert before a terminating newline or before end of
2139     subject unless noteol is set. */
2140 
2141     case OP_DOLL:
2142     if (md->noteol) RRETURN(MATCH_NOMATCH);
2143     if (!md->endonly) goto ASSERT_NL_OR_EOS;
2144 
2145     /* ... else fall through for endonly */
2146 
2147     /* End of subject assertion (\z) */
2148 
2149     case OP_EOD:
2150     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2151     SCHECK_PARTIAL();
2152     ecode++;
2153     break;
2154 
2155     /* End of subject or ending \n assertion (\Z) */
2156 
2157     case OP_EODN:
2158     ASSERT_NL_OR_EOS:
2159     if (eptr < md->end_subject &&
2160         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2161       {
2162       if (md->partial != 0 &&
2163           eptr + 1 >= md->end_subject &&
2164           NLBLOCK->nltype == NLTYPE_FIXED &&
2165           NLBLOCK->nllen == 2 &&
2166           UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2167         {
2168         md->hitend = TRUE;
2169         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2170         }
2171       RRETURN(MATCH_NOMATCH);
2172       }
2173 
2174     /* Either at end of string or \n before end. */
2175 
2176     SCHECK_PARTIAL();
2177     ecode++;
2178     break;
2179 
2180     /* Word boundary assertions */
2181 
2182     case OP_NOT_WORD_BOUNDARY:
2183     case OP_WORD_BOUNDARY:
2184       {
2185 
2186       /* Find out if the previous and current characters are "word" characters.
2187       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2188       be "non-word" characters. Remember the earliest consulted character for
2189       partial matching. */
2190 
2191 #ifdef SUPPORT_UTF
2192       if (utf)
2193         {
2194         /* Get status of previous character */
2195 
2196         if (eptr == md->start_subject) prev_is_word = FALSE; else
2197           {
2198           PCRE_PUCHAR lastptr = eptr - 1;
2199           BACKCHAR(lastptr);
2200           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2201           GETCHAR(c, lastptr);
2202 #ifdef SUPPORT_UCP
2203           if (md->use_ucp)
2204             {
2205             if (c == '_') prev_is_word = TRUE; else
2206               {
2207               int cat = UCD_CATEGORY(c);
2208               prev_is_word = (cat == ucp_L || cat == ucp_N);
2209               }
2210             }
2211           else
2212 #endif
2213           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2214           }
2215 
2216         /* Get status of next character */
2217 
2218         if (eptr >= md->end_subject)
2219           {
2220           SCHECK_PARTIAL();
2221           cur_is_word = FALSE;
2222           }
2223         else
2224           {
2225           GETCHAR(c, eptr);
2226 #ifdef SUPPORT_UCP
2227           if (md->use_ucp)
2228             {
2229             if (c == '_') cur_is_word = TRUE; else
2230               {
2231               int cat = UCD_CATEGORY(c);
2232               cur_is_word = (cat == ucp_L || cat == ucp_N);
2233               }
2234             }
2235           else
2236 #endif
2237           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2238           }
2239         }
2240       else
2241 #endif
2242 
2243       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
2244       consistency with the behaviour of \w we do use it in this case. */
2245 
2246         {
2247         /* Get status of previous character */
2248 
2249         if (eptr == md->start_subject) prev_is_word = FALSE; else
2250           {
2251           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
2252 #ifdef SUPPORT_UCP
2253           if (md->use_ucp)
2254             {
2255             c = eptr[-1];
2256             if (c == '_') prev_is_word = TRUE; else
2257               {
2258               int cat = UCD_CATEGORY(c);
2259               prev_is_word = (cat == ucp_L || cat == ucp_N);
2260               }
2261             }
2262           else
2263 #endif
2264           prev_is_word = MAX_255(eptr[-1])
2265             && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2266           }
2267 
2268         /* Get status of next character */
2269 
2270         if (eptr >= md->end_subject)
2271           {
2272           SCHECK_PARTIAL();
2273           cur_is_word = FALSE;
2274           }
2275         else
2276 #ifdef SUPPORT_UCP
2277         if (md->use_ucp)
2278           {
2279           c = *eptr;
2280           if (c == '_') cur_is_word = TRUE; else
2281             {
2282             int cat = UCD_CATEGORY(c);
2283             cur_is_word = (cat == ucp_L || cat == ucp_N);
2284             }
2285           }
2286         else
2287 #endif
2288         cur_is_word = MAX_255(*eptr)
2289           && ((md->ctypes[*eptr] & ctype_word) != 0);
2290         }
2291 
2292       /* Now see if the situation is what we want */
2293 
2294       if ((*ecode++ == OP_WORD_BOUNDARY)?
2295            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2296         RRETURN(MATCH_NOMATCH);
2297       }
2298     break;
2299 
2300     /* Match any single character type except newline; have to take care with
2301     CRLF newlines and partial matching. */
2302 
2303     case OP_ANY:
2304     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2305     if (md->partial != 0 &&
2306         eptr + 1 >= md->end_subject &&
2307         NLBLOCK->nltype == NLTYPE_FIXED &&
2308         NLBLOCK->nllen == 2 &&
2309         UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2310       {
2311       md->hitend = TRUE;
2312       if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2313       }
2314 
2315     /* Fall through */
2316 
2317     /* Match any single character whatsoever. */
2318 
2319     case OP_ALLANY:
2320     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2321       {                            /* not be updated before SCHECK_PARTIAL. */
2322       SCHECK_PARTIAL();
2323       RRETURN(MATCH_NOMATCH);
2324       }
2325     eptr++;
2326 #ifdef SUPPORT_UTF
2327     if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2328 #endif
2329     ecode++;
2330     break;
2331 
2332     /* Match a single byte, even in UTF-8 mode. This opcode really does match
2333     any byte, even newline, independent of the setting of PCRE_DOTALL. */
2334 
2335     case OP_ANYBYTE:
2336     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2337       {                            /* not be updated before SCHECK_PARTIAL. */
2338       SCHECK_PARTIAL();
2339       RRETURN(MATCH_NOMATCH);
2340       }
2341     eptr++;
2342     ecode++;
2343     break;
2344 
2345     case OP_NOT_DIGIT:
2346     if (eptr >= md->end_subject)
2347       {
2348       SCHECK_PARTIAL();
2349       RRETURN(MATCH_NOMATCH);
2350       }
2351     GETCHARINCTEST(c, eptr);
2352     if (
2353 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2354        c < 256 &&
2355 #endif
2356        (md->ctypes[c] & ctype_digit) != 0
2357        )
2358       RRETURN(MATCH_NOMATCH);
2359     ecode++;
2360     break;
2361 
2362     case OP_DIGIT:
2363     if (eptr >= md->end_subject)
2364       {
2365       SCHECK_PARTIAL();
2366       RRETURN(MATCH_NOMATCH);
2367       }
2368     GETCHARINCTEST(c, eptr);
2369     if (
2370 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2371        c > 255 ||
2372 #endif
2373        (md->ctypes[c] & ctype_digit) == 0
2374        )
2375       RRETURN(MATCH_NOMATCH);
2376     ecode++;
2377     break;
2378 
2379     case OP_NOT_WHITESPACE:
2380     if (eptr >= md->end_subject)
2381       {
2382       SCHECK_PARTIAL();
2383       RRETURN(MATCH_NOMATCH);
2384       }
2385     GETCHARINCTEST(c, eptr);
2386     if (
2387 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2388        c < 256 &&
2389 #endif
2390        (md->ctypes[c] & ctype_space) != 0
2391        )
2392       RRETURN(MATCH_NOMATCH);
2393     ecode++;
2394     break;
2395 
2396     case OP_WHITESPACE:
2397     if (eptr >= md->end_subject)
2398       {
2399       SCHECK_PARTIAL();
2400       RRETURN(MATCH_NOMATCH);
2401       }
2402     GETCHARINCTEST(c, eptr);
2403     if (
2404 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2405        c > 255 ||
2406 #endif
2407        (md->ctypes[c] & ctype_space) == 0
2408        )
2409       RRETURN(MATCH_NOMATCH);
2410     ecode++;
2411     break;
2412 
2413     case OP_NOT_WORDCHAR:
2414     if (eptr >= md->end_subject)
2415       {
2416       SCHECK_PARTIAL();
2417       RRETURN(MATCH_NOMATCH);
2418       }
2419     GETCHARINCTEST(c, eptr);
2420     if (
2421 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2422        c < 256 &&
2423 #endif
2424        (md->ctypes[c] & ctype_word) != 0
2425        )
2426       RRETURN(MATCH_NOMATCH);
2427     ecode++;
2428     break;
2429 
2430     case OP_WORDCHAR:
2431     if (eptr >= md->end_subject)
2432       {
2433       SCHECK_PARTIAL();
2434       RRETURN(MATCH_NOMATCH);
2435       }
2436     GETCHARINCTEST(c, eptr);
2437     if (
2438 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2439        c > 255 ||
2440 #endif
2441        (md->ctypes[c] & ctype_word) == 0
2442        )
2443       RRETURN(MATCH_NOMATCH);
2444     ecode++;
2445     break;
2446 
2447     case OP_ANYNL:
2448     if (eptr >= md->end_subject)
2449       {
2450       SCHECK_PARTIAL();
2451       RRETURN(MATCH_NOMATCH);
2452       }
2453     GETCHARINCTEST(c, eptr);
2454     switch(c)
2455       {
2456       default: RRETURN(MATCH_NOMATCH);
2457 
2458       case CHAR_CR:
2459       if (eptr >= md->end_subject)
2460         {
2461         SCHECK_PARTIAL();
2462         }
2463       else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++;
2464       break;
2465 
2466       case CHAR_LF:
2467       break;
2468 
2469       case CHAR_VT:
2470       case CHAR_FF:
2471       case CHAR_NEL:
2472 #ifndef EBCDIC
2473       case 0x2028:
2474       case 0x2029:
2475 #endif  /* Not EBCDIC */
2476       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2477       break;
2478       }
2479     ecode++;
2480     break;
2481 
2482     case OP_NOT_HSPACE:
2483     if (eptr >= md->end_subject)
2484       {
2485       SCHECK_PARTIAL();
2486       RRETURN(MATCH_NOMATCH);
2487       }
2488     GETCHARINCTEST(c, eptr);
2489     switch(c)
2490       {
2491       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2492       default: break;
2493       }
2494     ecode++;
2495     break;
2496 
2497     case OP_HSPACE:
2498     if (eptr >= md->end_subject)
2499       {
2500       SCHECK_PARTIAL();
2501       RRETURN(MATCH_NOMATCH);
2502       }
2503     GETCHARINCTEST(c, eptr);
2504     switch(c)
2505       {
2506       HSPACE_CASES: break;  /* Byte and multibyte cases */
2507       default: RRETURN(MATCH_NOMATCH);
2508       }
2509     ecode++;
2510     break;
2511 
2512     case OP_NOT_VSPACE:
2513     if (eptr >= md->end_subject)
2514       {
2515       SCHECK_PARTIAL();
2516       RRETURN(MATCH_NOMATCH);
2517       }
2518     GETCHARINCTEST(c, eptr);
2519     switch(c)
2520       {
2521       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2522       default: break;
2523       }
2524     ecode++;
2525     break;
2526 
2527     case OP_VSPACE:
2528     if (eptr >= md->end_subject)
2529       {
2530       SCHECK_PARTIAL();
2531       RRETURN(MATCH_NOMATCH);
2532       }
2533     GETCHARINCTEST(c, eptr);
2534     switch(c)
2535       {
2536       VSPACE_CASES: break;
2537       default: RRETURN(MATCH_NOMATCH);
2538       }
2539     ecode++;
2540     break;
2541 
2542 #ifdef SUPPORT_UCP
2543     /* Check the next character by Unicode property. We will get here only
2544     if the support is in the binary; otherwise a compile-time error occurs. */
2545 
2546     case OP_PROP:
2547     case OP_NOTPROP:
2548     if (eptr >= md->end_subject)
2549       {
2550       SCHECK_PARTIAL();
2551       RRETURN(MATCH_NOMATCH);
2552       }
2553     GETCHARINCTEST(c, eptr);
2554       {
2555       const pcre_uint32 *cp;
2556       const ucd_record *prop = GET_UCD(c);
2557 
2558       switch(ecode[1])
2559         {
2560         case PT_ANY:
2561         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2562         break;
2563 
2564         case PT_LAMP:
2565         if ((prop->chartype == ucp_Lu ||
2566              prop->chartype == ucp_Ll ||
2567              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2568           RRETURN(MATCH_NOMATCH);
2569         break;
2570 
2571         case PT_GC:
2572         if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2573           RRETURN(MATCH_NOMATCH);
2574         break;
2575 
2576         case PT_PC:
2577         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2578           RRETURN(MATCH_NOMATCH);
2579         break;
2580 
2581         case PT_SC:
2582         if ((ecode[2] != prop->script) == (op == OP_PROP))
2583           RRETURN(MATCH_NOMATCH);
2584         break;
2585 
2586         /* These are specials */
2587 
2588         case PT_ALNUM:
2589         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2590              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2591           RRETURN(MATCH_NOMATCH);
2592         break;
2593 
2594         /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2595         which means that Perl space and POSIX space are now identical. PCRE
2596         was changed at release 8.34. */
2597 
2598         case PT_SPACE:    /* Perl space */
2599         case PT_PXSPACE:  /* POSIX space */
2600         switch(c)
2601           {
2602           HSPACE_CASES:
2603           VSPACE_CASES:
2604           if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2605           break;
2606 
2607           default:
2608           if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
2609             (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
2610           break;
2611           }
2612         break;
2613 
2614         case PT_WORD:
2615         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2616              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2617              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2618           RRETURN(MATCH_NOMATCH);
2619         break;
2620 
2621         case PT_CLIST:
2622         cp = PRIV(ucd_caseless_sets) + ecode[2];
2623         for (;;)
2624           {
2625           if (c < *cp)
2626             { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
2627           if (c == *cp++)
2628             { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2629           }
2630         break;
2631 
2632         case PT_UCNC:
2633         if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
2634              c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
2635              c >= 0xe000) == (op == OP_NOTPROP))
2636           RRETURN(MATCH_NOMATCH);
2637         break;
2638 
2639         /* This should never occur */
2640 
2641         default:
2642         RRETURN(PCRE_ERROR_INTERNAL);
2643         }
2644 
2645       ecode += 3;
2646       }
2647     break;
2648 
2649     /* Match an extended Unicode sequence. We will get here only if the support
2650     is in the binary; otherwise a compile-time error occurs. */
2651 
2652     case OP_EXTUNI:
2653     if (eptr >= md->end_subject)
2654       {
2655       SCHECK_PARTIAL();
2656       RRETURN(MATCH_NOMATCH);
2657       }
2658     else
2659       {
2660       int lgb, rgb;
2661       GETCHARINCTEST(c, eptr);
2662       lgb = UCD_GRAPHBREAK(c);
2663       while (eptr < md->end_subject)
2664         {
2665         int len = 1;
2666         if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2667         rgb = UCD_GRAPHBREAK(c);
2668         if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2669         lgb = rgb;
2670         eptr += len;
2671         }
2672       }
2673     CHECK_PARTIAL();
2674     ecode++;
2675     break;
2676 #endif  /* SUPPORT_UCP */
2677 
2678 
2679     /* Match a back reference, possibly repeatedly. Look past the end of the
2680     item to see if there is repeat information following. The code is similar
2681     to that for character classes, but repeated for efficiency. Then obey
2682     similar code to character type repeats - written out again for speed.
2683     However, if the referenced string is the empty string, always treat
2684     it as matched, any number of times (otherwise there could be infinite
2685     loops). If the reference is unset, there are two possibilities:
2686 
2687     (a) In the default, Perl-compatible state, set the length negative;
2688     this ensures that every attempt at a match fails. We can't just fail
2689     here, because of the possibility of quantifiers with zero minima.
2690 
2691     (b) If the JavaScript compatibility flag is set, set the length to zero
2692     so that the back reference matches an empty string.
2693 
2694     Otherwise, set the length to the length of what was matched by the
2695     referenced subpattern.
2696 
2697     The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
2698     or to a non-duplicated named group. For a duplicated named group, OP_DNREF
2699     and OP_DNREFI are used. In this case we must scan the list of groups to
2700     which the name refers, and use the first one that is set. */
2701 
2702     case OP_DNREF:
2703     case OP_DNREFI:
2704     caseless = op == OP_DNREFI;
2705       {
2706       int count = GET2(ecode, 1+IMM2_SIZE);
2707       pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
2708       ecode += 1 + 2*IMM2_SIZE;
2709 
2710       /* Setting the default length first and initializing 'offset' avoids
2711       compiler warnings in the REF_REPEAT code. */
2712 
2713       length = (md->jscript_compat)? 0 : -1;
2714       offset = 0;
2715 
2716       while (count-- > 0)
2717         {
2718         offset = GET2(slot, 0) << 1;
2719         if (offset < offset_top && md->offset_vector[offset] >= 0)
2720           {
2721           length = md->offset_vector[offset+1] - md->offset_vector[offset];
2722           break;
2723           }
2724         slot += md->name_entry_size;
2725         }
2726       }
2727     goto REF_REPEAT;
2728 
2729     case OP_REF:
2730     case OP_REFI:
2731     caseless = op == OP_REFI;
2732     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2733     ecode += 1 + IMM2_SIZE;
2734     if (offset >= offset_top || md->offset_vector[offset] < 0)
2735       length = (md->jscript_compat)? 0 : -1;
2736     else
2737       length = md->offset_vector[offset+1] - md->offset_vector[offset];
2738 
2739     /* Set up for repetition, or handle the non-repeated case */
2740 
2741     REF_REPEAT:
2742     switch (*ecode)
2743       {
2744       case OP_CRSTAR:
2745       case OP_CRMINSTAR:
2746       case OP_CRPLUS:
2747       case OP_CRMINPLUS:
2748       case OP_CRQUERY:
2749       case OP_CRMINQUERY:
2750       c = *ecode++ - OP_CRSTAR;
2751       minimize = (c & 1) != 0;
2752       min = rep_min[c];                 /* Pick up values from tables; */
2753       max = rep_max[c];                 /* zero for max => infinity */
2754       if (max == 0) max = INT_MAX;
2755       break;
2756 
2757       case OP_CRRANGE:
2758       case OP_CRMINRANGE:
2759       minimize = (*ecode == OP_CRMINRANGE);
2760       min = GET2(ecode, 1);
2761       max = GET2(ecode, 1 + IMM2_SIZE);
2762       if (max == 0) max = INT_MAX;
2763       ecode += 1 + 2 * IMM2_SIZE;
2764       break;
2765 
2766       default:               /* No repeat follows */
2767       if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2768         {
2769         if (length == -2) eptr = md->end_subject;   /* Partial match */
2770         CHECK_PARTIAL();
2771         RRETURN(MATCH_NOMATCH);
2772         }
2773       eptr += length;
2774       continue;              /* With the main loop */
2775       }
2776 
2777     /* Handle repeated back references. If the length of the reference is
2778     zero, just continue with the main loop. If the length is negative, it
2779     means the reference is unset in non-Java-compatible mode. If the minimum is
2780     zero, we can continue at the same level without recursion. For any other
2781     minimum, carrying on will result in NOMATCH. */
2782 
2783     if (length == 0) continue;
2784     if (length < 0 && min == 0) continue;
2785 
2786     /* First, ensure the minimum number of matches are present. We get back
2787     the length of the reference string explicitly rather than passing the
2788     address of eptr, so that eptr can be a register variable. */
2789 
2790     for (i = 1; i <= min; i++)
2791       {
2792       int slength;
2793       if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2794         {
2795         if (slength == -2) eptr = md->end_subject;   /* Partial match */
2796         CHECK_PARTIAL();
2797         RRETURN(MATCH_NOMATCH);
2798         }
2799       eptr += slength;
2800       }
2801 
2802     /* If min = max, continue at the same level without recursion.
2803     They are not both allowed to be zero. */
2804 
2805     if (min == max) continue;
2806 
2807     /* If minimizing, keep trying and advancing the pointer */
2808 
2809     if (minimize)
2810       {
2811       for (fi = min;; fi++)
2812         {
2813         int slength;
2814         RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2815         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2816         if (fi >= max) RRETURN(MATCH_NOMATCH);
2817         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2818           {
2819           if (slength == -2) eptr = md->end_subject;   /* Partial match */
2820           CHECK_PARTIAL();
2821           RRETURN(MATCH_NOMATCH);
2822           }
2823         eptr += slength;
2824         }
2825       /* Control never gets here */
2826       }
2827 
2828     /* If maximizing, find the longest string and work backwards */
2829 
2830     else
2831       {
2832       pp = eptr;
2833       for (i = min; i < max; i++)
2834         {
2835         int slength;
2836         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2837           {
2838           /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2839           the soft partial matching case. */
2840 
2841           if (slength == -2 && md->partial != 0 &&
2842               md->end_subject > md->start_used_ptr)
2843             {
2844             md->hitend = TRUE;
2845             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2846             }
2847           break;
2848           }
2849         eptr += slength;
2850         }
2851 
2852       while (eptr >= pp)
2853         {
2854         RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2855         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2856         eptr -= length;
2857         }
2858       RRETURN(MATCH_NOMATCH);
2859       }
2860     /* Control never gets here */
2861 
2862     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2863     used when all the characters in the class have values in the range 0-255,
2864     and either the matching is caseful, or the characters are in the range
2865     0-127 when UTF-8 processing is enabled. The only difference between
2866     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2867     encountered.
2868 
2869     First, look past the end of the item to see if there is repeat information
2870     following. Then obey similar code to character type repeats - written out
2871     again for speed. */
2872 
2873     case OP_NCLASS:
2874     case OP_CLASS:
2875       {
2876       /* The data variable is saved across frames, so the byte map needs to
2877       be stored there. */
2878 #define BYTE_MAP ((pcre_uint8 *)data)
2879       data = ecode + 1;                /* Save for matching */
2880       ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2881 
2882       switch (*ecode)
2883         {
2884         case OP_CRSTAR:
2885         case OP_CRMINSTAR:
2886         case OP_CRPLUS:
2887         case OP_CRMINPLUS:
2888         case OP_CRQUERY:
2889         case OP_CRMINQUERY:
2890         case OP_CRPOSSTAR:
2891         case OP_CRPOSPLUS:
2892         case OP_CRPOSQUERY:
2893         c = *ecode++ - OP_CRSTAR;
2894         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
2895         else possessive = TRUE;
2896         min = rep_min[c];                 /* Pick up values from tables; */
2897         max = rep_max[c];                 /* zero for max => infinity */
2898         if (max == 0) max = INT_MAX;
2899         break;
2900 
2901         case OP_CRRANGE:
2902         case OP_CRMINRANGE:
2903         case OP_CRPOSRANGE:
2904         minimize = (*ecode == OP_CRMINRANGE);
2905         possessive = (*ecode == OP_CRPOSRANGE);
2906         min = GET2(ecode, 1);
2907         max = GET2(ecode, 1 + IMM2_SIZE);
2908         if (max == 0) max = INT_MAX;
2909         ecode += 1 + 2 * IMM2_SIZE;
2910         break;
2911 
2912         default:               /* No repeat follows */
2913         min = max = 1;
2914         break;
2915         }
2916 
2917       /* First, ensure the minimum number of matches are present. */
2918 
2919 #ifdef SUPPORT_UTF
2920       if (utf)
2921         {
2922         for (i = 1; i <= min; i++)
2923           {
2924           if (eptr >= md->end_subject)
2925             {
2926             SCHECK_PARTIAL();
2927             RRETURN(MATCH_NOMATCH);
2928             }
2929           GETCHARINC(c, eptr);
2930           if (c > 255)
2931             {
2932             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2933             }
2934           else
2935             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2936           }
2937         }
2938       else
2939 #endif
2940       /* Not UTF mode */
2941         {
2942         for (i = 1; i <= min; i++)
2943           {
2944           if (eptr >= md->end_subject)
2945             {
2946             SCHECK_PARTIAL();
2947             RRETURN(MATCH_NOMATCH);
2948             }
2949           c = *eptr++;
2950 #ifndef COMPILE_PCRE8
2951           if (c > 255)
2952             {
2953             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2954             }
2955           else
2956 #endif
2957             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2958           }
2959         }
2960 
2961       /* If max == min we can continue with the main loop without the
2962       need to recurse. */
2963 
2964       if (min == max) continue;
2965 
2966       /* If minimizing, keep testing the rest of the expression and advancing
2967       the pointer while it matches the class. */
2968 
2969       if (minimize)
2970         {
2971 #ifdef SUPPORT_UTF
2972         if (utf)
2973           {
2974           for (fi = min;; fi++)
2975             {
2976             RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2977             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2978             if (fi >= max) RRETURN(MATCH_NOMATCH);
2979             if (eptr >= md->end_subject)
2980               {
2981               SCHECK_PARTIAL();
2982               RRETURN(MATCH_NOMATCH);
2983               }
2984             GETCHARINC(c, eptr);
2985             if (c > 255)
2986               {
2987               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2988               }
2989             else
2990               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2991             }
2992           }
2993         else
2994 #endif
2995         /* Not UTF mode */
2996           {
2997           for (fi = min;; fi++)
2998             {
2999             RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
3000             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3001             if (fi >= max) RRETURN(MATCH_NOMATCH);
3002             if (eptr >= md->end_subject)
3003               {
3004               SCHECK_PARTIAL();
3005               RRETURN(MATCH_NOMATCH);
3006               }
3007             c = *eptr++;
3008 #ifndef COMPILE_PCRE8
3009             if (c > 255)
3010               {
3011               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
3012               }
3013             else
3014 #endif
3015               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
3016             }
3017           }
3018         /* Control never gets here */
3019         }
3020 
3021       /* If maximizing, find the longest possible run, then work backwards. */
3022 
3023       else
3024         {
3025         pp = eptr;
3026 
3027 #ifdef SUPPORT_UTF
3028         if (utf)
3029           {
3030           for (i = min; i < max; i++)
3031             {
3032             int len = 1;
3033             if (eptr >= md->end_subject)
3034               {
3035               SCHECK_PARTIAL();
3036               break;
3037               }
3038             GETCHARLEN(c, eptr, len);
3039             if (c > 255)
3040               {
3041               if (op == OP_CLASS) break;
3042               }
3043             else
3044               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3045             eptr += len;
3046             }
3047 
3048           if (possessive) continue;    /* No backtracking */
3049 
3050           for (;;)
3051             {
3052             RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
3053             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3054             if (eptr-- == pp) break;        /* Stop if tried at original pos */
3055             BACKCHAR(eptr);
3056             }
3057           }
3058         else
3059 #endif
3060           /* Not UTF mode */
3061           {
3062           for (i = min; i < max; i++)
3063             {
3064             if (eptr >= md->end_subject)
3065               {
3066               SCHECK_PARTIAL();
3067               break;
3068               }
3069             c = *eptr;
3070 #ifndef COMPILE_PCRE8
3071             if (c > 255)
3072               {
3073               if (op == OP_CLASS) break;
3074               }
3075             else
3076 #endif
3077               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3078             eptr++;
3079             }
3080 
3081           if (possessive) continue;    /* No backtracking */
3082 
3083           while (eptr >= pp)
3084             {
3085             RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
3086             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3087             eptr--;
3088             }
3089           }
3090 
3091         RRETURN(MATCH_NOMATCH);
3092         }
3093 #undef BYTE_MAP
3094       }
3095     /* Control never gets here */
3096 
3097 
3098     /* Match an extended character class. In the 8-bit library, this opcode is
3099     encountered only when UTF-8 mode mode is supported. In the 16-bit and
3100     32-bit libraries, codepoints greater than 255 may be encountered even when
3101     UTF is not supported. */
3102 
3103 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3104     case OP_XCLASS:
3105       {
3106       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
3107       ecode += GET(ecode, 1);                      /* Advance past the item */
3108 
3109       switch (*ecode)
3110         {
3111         case OP_CRSTAR:
3112         case OP_CRMINSTAR:
3113         case OP_CRPLUS:
3114         case OP_CRMINPLUS:
3115         case OP_CRQUERY:
3116         case OP_CRMINQUERY:
3117         case OP_CRPOSSTAR:
3118         case OP_CRPOSPLUS:
3119         case OP_CRPOSQUERY:
3120         c = *ecode++ - OP_CRSTAR;
3121         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
3122         else possessive = TRUE;
3123         min = rep_min[c];                 /* Pick up values from tables; */
3124         max = rep_max[c];                 /* zero for max => infinity */
3125         if (max == 0) max = INT_MAX;
3126         break;
3127 
3128         case OP_CRRANGE:
3129         case OP_CRMINRANGE:
3130         case OP_CRPOSRANGE:
3131         minimize = (*ecode == OP_CRMINRANGE);
3132         possessive = (*ecode == OP_CRPOSRANGE);
3133         min = GET2(ecode, 1);
3134         max = GET2(ecode, 1 + IMM2_SIZE);
3135         if (max == 0) max = INT_MAX;
3136         ecode += 1 + 2 * IMM2_SIZE;
3137         break;
3138 
3139         default:               /* No repeat follows */
3140         min = max = 1;
3141         break;
3142         }
3143 
3144       /* First, ensure the minimum number of matches are present. */
3145 
3146       for (i = 1; i <= min; i++)
3147         {
3148         if (eptr >= md->end_subject)
3149           {
3150           SCHECK_PARTIAL();
3151           RRETURN(MATCH_NOMATCH);
3152           }
3153         GETCHARINCTEST(c, eptr);
3154         if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3155         }
3156 
3157       /* If max == min we can continue with the main loop without the
3158       need to recurse. */
3159 
3160       if (min == max) continue;
3161 
3162       /* If minimizing, keep testing the rest of the expression and advancing
3163       the pointer while it matches the class. */
3164 
3165       if (minimize)
3166         {
3167         for (fi = min;; fi++)
3168           {
3169           RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3170           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3171           if (fi >= max) RRETURN(MATCH_NOMATCH);
3172           if (eptr >= md->end_subject)
3173             {
3174             SCHECK_PARTIAL();
3175             RRETURN(MATCH_NOMATCH);
3176             }
3177           GETCHARINCTEST(c, eptr);
3178           if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3179           }
3180         /* Control never gets here */
3181         }
3182 
3183       /* If maximizing, find the longest possible run, then work backwards. */
3184 
3185       else
3186         {
3187         pp = eptr;
3188         for (i = min; i < max; i++)
3189           {
3190           int len = 1;
3191           if (eptr >= md->end_subject)
3192             {
3193             SCHECK_PARTIAL();
3194             break;
3195             }
3196 #ifdef SUPPORT_UTF
3197           GETCHARLENTEST(c, eptr, len);
3198 #else
3199           c = *eptr;
3200 #endif
3201           if (!PRIV(xclass)(c, data, utf)) break;
3202           eptr += len;
3203           }
3204 
3205         if (possessive) continue;    /* No backtracking */
3206 
3207         for(;;)
3208           {
3209           RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3210           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3211           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3212 #ifdef SUPPORT_UTF
3213           if (utf) BACKCHAR(eptr);
3214 #endif
3215           }
3216         RRETURN(MATCH_NOMATCH);
3217         }
3218 
3219       /* Control never gets here */
3220       }
3221 #endif    /* End of XCLASS */
3222 
3223     /* Match a single character, casefully */
3224 
3225     case OP_CHAR:
3226 #ifdef SUPPORT_UTF
3227     if (utf)
3228       {
3229       length = 1;
3230       ecode++;
3231       GETCHARLEN(fc, ecode, length);
3232       if (length > md->end_subject - eptr)
3233         {
3234         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
3235         RRETURN(MATCH_NOMATCH);
3236         }
3237       while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
3238       }
3239     else
3240 #endif
3241     /* Not UTF mode */
3242       {
3243       if (md->end_subject - eptr < 1)
3244         {
3245         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3246         RRETURN(MATCH_NOMATCH);
3247         }
3248       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3249       ecode += 2;
3250       }
3251     break;
3252 
3253     /* Match a single character, caselessly. If we are at the end of the
3254     subject, give up immediately. */
3255 
3256     case OP_CHARI:
3257     if (eptr >= md->end_subject)
3258       {
3259       SCHECK_PARTIAL();
3260       RRETURN(MATCH_NOMATCH);
3261       }
3262 
3263 #ifdef SUPPORT_UTF
3264     if (utf)
3265       {
3266       length = 1;
3267       ecode++;
3268       GETCHARLEN(fc, ecode, length);
3269 
3270       /* If the pattern character's value is < 128, we have only one byte, and
3271       we know that its other case must also be one byte long, so we can use the
3272       fast lookup table. We know that there is at least one byte left in the
3273       subject. */
3274 
3275       if (fc < 128)
3276         {
3277         pcre_uint32 cc = UCHAR21(eptr);
3278         if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3279         ecode++;
3280         eptr++;
3281         }
3282 
3283       /* Otherwise we must pick up the subject character. Note that we cannot
3284       use the value of "length" to check for sufficient bytes left, because the
3285       other case of the character may have more or fewer bytes.  */
3286 
3287       else
3288         {
3289         pcre_uint32 dc;
3290         GETCHARINC(dc, eptr);
3291         ecode += length;
3292 
3293         /* If we have Unicode property support, we can use it to test the other
3294         case of the character, if there is one. */
3295 
3296         if (fc != dc)
3297           {
3298 #ifdef SUPPORT_UCP
3299           if (dc != UCD_OTHERCASE(fc))
3300 #endif
3301             RRETURN(MATCH_NOMATCH);
3302           }
3303         }
3304       }
3305     else
3306 #endif   /* SUPPORT_UTF */
3307 
3308     /* Not UTF mode */
3309       {
3310       if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3311           != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3312       eptr++;
3313       ecode += 2;
3314       }
3315     break;
3316 
3317     /* Match a single character repeatedly. */
3318 
3319     case OP_EXACT:
3320     case OP_EXACTI:
3321     min = max = GET2(ecode, 1);
3322     ecode += 1 + IMM2_SIZE;
3323     goto REPEATCHAR;
3324 
3325     case OP_POSUPTO:
3326     case OP_POSUPTOI:
3327     possessive = TRUE;
3328     /* Fall through */
3329 
3330     case OP_UPTO:
3331     case OP_UPTOI:
3332     case OP_MINUPTO:
3333     case OP_MINUPTOI:
3334     min = 0;
3335     max = GET2(ecode, 1);
3336     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3337     ecode += 1 + IMM2_SIZE;
3338     goto REPEATCHAR;
3339 
3340     case OP_POSSTAR:
3341     case OP_POSSTARI:
3342     possessive = TRUE;
3343     min = 0;
3344     max = INT_MAX;
3345     ecode++;
3346     goto REPEATCHAR;
3347 
3348     case OP_POSPLUS:
3349     case OP_POSPLUSI:
3350     possessive = TRUE;
3351     min = 1;
3352     max = INT_MAX;
3353     ecode++;
3354     goto REPEATCHAR;
3355 
3356     case OP_POSQUERY:
3357     case OP_POSQUERYI:
3358     possessive = TRUE;
3359     min = 0;
3360     max = 1;
3361     ecode++;
3362     goto REPEATCHAR;
3363 
3364     case OP_STAR:
3365     case OP_STARI:
3366     case OP_MINSTAR:
3367     case OP_MINSTARI:
3368     case OP_PLUS:
3369     case OP_PLUSI:
3370     case OP_MINPLUS:
3371     case OP_MINPLUSI:
3372     case OP_QUERY:
3373     case OP_QUERYI:
3374     case OP_MINQUERY:
3375     case OP_MINQUERYI:
3376     c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3377     minimize = (c & 1) != 0;
3378     min = rep_min[c];                 /* Pick up values from tables; */
3379     max = rep_max[c];                 /* zero for max => infinity */
3380     if (max == 0) max = INT_MAX;
3381 
3382     /* Common code for all repeated single-character matches. We first check
3383     for the minimum number of characters. If the minimum equals the maximum, we
3384     are done. Otherwise, if minimizing, check the rest of the pattern for a
3385     match; if there isn't one, advance up to the maximum, one character at a
3386     time.
3387 
3388     If maximizing, advance up to the maximum number of matching characters,
3389     until eptr is past the end of the maximum run. If possessive, we are
3390     then done (no backing up). Otherwise, match at this position; anything
3391     other than no match is immediately returned. For nomatch, back up one
3392     character, unless we are matching \R and the last thing matched was
3393     \r\n, in which case, back up two bytes. When we reach the first optional
3394     character position, we can save stack by doing a tail recurse.
3395 
3396     The various UTF/non-UTF and caseful/caseless cases are handled separately,
3397     for speed. */
3398 
3399     REPEATCHAR:
3400 #ifdef SUPPORT_UTF
3401     if (utf)
3402       {
3403       length = 1;
3404       charptr = ecode;
3405       GETCHARLEN(fc, ecode, length);
3406       ecode += length;
3407 
3408       /* Handle multibyte character matching specially here. There is
3409       support for caseless matching if UCP support is present. */
3410 
3411       if (length > 1)
3412         {
3413 #ifdef SUPPORT_UCP
3414         pcre_uint32 othercase;
3415         if (op >= OP_STARI &&     /* Caseless */
3416             (othercase = UCD_OTHERCASE(fc)) != fc)
3417           oclength = PRIV(ord2utf)(othercase, occhars);
3418         else oclength = 0;
3419 #endif  /* SUPPORT_UCP */
3420 
3421         for (i = 1; i <= min; i++)
3422           {
3423           if (eptr <= md->end_subject - length &&
3424             memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3425 #ifdef SUPPORT_UCP
3426           else if (oclength > 0 &&
3427                    eptr <= md->end_subject - oclength &&
3428                    memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3429 #endif  /* SUPPORT_UCP */
3430           else
3431             {
3432             CHECK_PARTIAL();
3433             RRETURN(MATCH_NOMATCH);
3434             }
3435           }
3436 
3437         if (min == max) continue;
3438 
3439         if (minimize)
3440           {
3441           for (fi = min;; fi++)
3442             {
3443             RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3444             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3445             if (fi >= max) RRETURN(MATCH_NOMATCH);
3446             if (eptr <= md->end_subject - length &&
3447               memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3448 #ifdef SUPPORT_UCP
3449             else if (oclength > 0 &&
3450                      eptr <= md->end_subject - oclength &&
3451                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3452 #endif  /* SUPPORT_UCP */
3453             else
3454               {
3455               CHECK_PARTIAL();
3456               RRETURN(MATCH_NOMATCH);
3457               }
3458             }
3459           /* Control never gets here */
3460           }
3461 
3462         else  /* Maximize */
3463           {
3464           pp = eptr;
3465           for (i = min; i < max; i++)
3466             {
3467             if (eptr <= md->end_subject - length &&
3468                 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3469 #ifdef SUPPORT_UCP
3470             else if (oclength > 0 &&
3471                      eptr <= md->end_subject - oclength &&
3472                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3473 #endif  /* SUPPORT_UCP */
3474             else
3475               {
3476               CHECK_PARTIAL();
3477               break;
3478               }
3479             }
3480 
3481           if (possessive) continue;    /* No backtracking */
3482           for(;;)
3483             {
3484             if (eptr <= pp) goto TAIL_RECURSE;
3485             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3486             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3487 #ifdef SUPPORT_UCP
3488             eptr--;
3489             BACKCHAR(eptr);
3490 #else   /* without SUPPORT_UCP */
3491             eptr -= length;
3492 #endif  /* SUPPORT_UCP */
3493             }
3494           }
3495         /* Control never gets here */
3496         }
3497 
3498       /* If the length of a UTF-8 character is 1, we fall through here, and
3499       obey the code as for non-UTF-8 characters below, though in this case the
3500       value of fc will always be < 128. */
3501       }
3502     else
3503 #endif  /* SUPPORT_UTF */
3504       /* When not in UTF-8 mode, load a single-byte character. */
3505       fc = *ecode++;
3506 
3507     /* The value of fc at this point is always one character, though we may
3508     or may not be in UTF mode. The code is duplicated for the caseless and
3509     caseful cases, for speed, since matching characters is likely to be quite
3510     common. First, ensure the minimum number of matches are present. If min =
3511     max, continue at the same level without recursing. Otherwise, if
3512     minimizing, keep trying the rest of the expression and advancing one
3513     matching character if failing, up to the maximum. Alternatively, if
3514     maximizing, find the maximum number of characters and work backwards. */
3515 
3516     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3517       max, (char *)eptr));
3518 
3519     if (op >= OP_STARI)  /* Caseless */
3520       {
3521 #ifdef COMPILE_PCRE8
3522       /* fc must be < 128 if UTF is enabled. */
3523       foc = md->fcc[fc];
3524 #else
3525 #ifdef SUPPORT_UTF
3526 #ifdef SUPPORT_UCP
3527       if (utf && fc > 127)
3528         foc = UCD_OTHERCASE(fc);
3529 #else
3530       if (utf && fc > 127)
3531         foc = fc;
3532 #endif /* SUPPORT_UCP */
3533       else
3534 #endif /* SUPPORT_UTF */
3535         foc = TABLE_GET(fc, md->fcc, fc);
3536 #endif /* COMPILE_PCRE8 */
3537 
3538       for (i = 1; i <= min; i++)
3539         {
3540         pcre_uint32 cc;                 /* Faster than pcre_uchar */
3541         if (eptr >= md->end_subject)
3542           {
3543           SCHECK_PARTIAL();
3544           RRETURN(MATCH_NOMATCH);
3545           }
3546         cc = UCHAR21TEST(eptr);
3547         if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3548         eptr++;
3549         }
3550       if (min == max) continue;
3551       if (minimize)
3552         {
3553         for (fi = min;; fi++)
3554           {
3555           pcre_uint32 cc;               /* Faster than pcre_uchar */
3556           RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3557           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3558           if (fi >= max) RRETURN(MATCH_NOMATCH);
3559           if (eptr >= md->end_subject)
3560             {
3561             SCHECK_PARTIAL();
3562             RRETURN(MATCH_NOMATCH);
3563             }
3564           cc = UCHAR21TEST(eptr);
3565           if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3566           eptr++;
3567           }
3568         /* Control never gets here */
3569         }
3570       else  /* Maximize */
3571         {
3572         pp = eptr;
3573         for (i = min; i < max; i++)
3574           {
3575           pcre_uint32 cc;               /* Faster than pcre_uchar */
3576           if (eptr >= md->end_subject)
3577             {
3578             SCHECK_PARTIAL();
3579             break;
3580             }
3581           cc = UCHAR21TEST(eptr);
3582           if (fc != cc && foc != cc) break;
3583           eptr++;
3584           }
3585         if (possessive) continue;       /* No backtracking */
3586         for (;;)
3587           {
3588           if (eptr == pp) goto TAIL_RECURSE;
3589           RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3590           eptr--;
3591           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3592           }
3593         /* Control never gets here */
3594         }
3595       }
3596 
3597     /* Caseful comparisons (includes all multi-byte characters) */
3598 
3599     else
3600       {
3601       for (i = 1; i <= min; i++)
3602         {
3603         if (eptr >= md->end_subject)
3604           {
3605           SCHECK_PARTIAL();
3606           RRETURN(MATCH_NOMATCH);
3607           }
3608         if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3609         }
3610 
3611       if (min == max) continue;
3612 
3613       if (minimize)
3614         {
3615         for (fi = min;; fi++)
3616           {
3617           RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3618           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3619           if (fi >= max) RRETURN(MATCH_NOMATCH);
3620           if (eptr >= md->end_subject)
3621             {
3622             SCHECK_PARTIAL();
3623             RRETURN(MATCH_NOMATCH);
3624             }
3625           if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3626           }
3627         /* Control never gets here */
3628         }
3629       else  /* Maximize */
3630         {
3631         pp = eptr;
3632         for (i = min; i < max; i++)
3633           {
3634           if (eptr >= md->end_subject)
3635             {
3636             SCHECK_PARTIAL();
3637             break;
3638             }
3639           if (fc != UCHAR21TEST(eptr)) break;
3640           eptr++;
3641           }
3642         if (possessive) continue;    /* No backtracking */
3643         for (;;)
3644           {
3645           if (eptr == pp) goto TAIL_RECURSE;
3646           RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3647           eptr--;
3648           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3649           }
3650         /* Control never gets here */
3651         }
3652       }
3653     /* Control never gets here */
3654 
3655     /* Match a negated single one-byte character. The character we are
3656     checking can be multibyte. */
3657 
3658     case OP_NOT:
3659     case OP_NOTI:
3660     if (eptr >= md->end_subject)
3661       {
3662       SCHECK_PARTIAL();
3663       RRETURN(MATCH_NOMATCH);
3664       }
3665 #ifdef SUPPORT_UTF
3666     if (utf)
3667       {
3668       register pcre_uint32 ch, och;
3669 
3670       ecode++;
3671       GETCHARINC(ch, ecode);
3672       GETCHARINC(c, eptr);
3673 
3674       if (op == OP_NOT)
3675         {
3676         if (ch == c) RRETURN(MATCH_NOMATCH);
3677         }
3678       else
3679         {
3680 #ifdef SUPPORT_UCP
3681         if (ch > 127)
3682           och = UCD_OTHERCASE(ch);
3683 #else
3684         if (ch > 127)
3685           och = ch;
3686 #endif /* SUPPORT_UCP */
3687         else
3688           och = TABLE_GET(ch, md->fcc, ch);
3689         if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3690         }
3691       }
3692     else
3693 #endif
3694       {
3695       register pcre_uint32 ch = ecode[1];
3696       c = *eptr++;
3697       if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3698         RRETURN(MATCH_NOMATCH);
3699       ecode += 2;
3700       }
3701     break;
3702 
3703     /* Match a negated single one-byte character repeatedly. This is almost a
3704     repeat of the code for a repeated single character, but I haven't found a
3705     nice way of commoning these up that doesn't require a test of the
3706     positive/negative option for each character match. Maybe that wouldn't add
3707     very much to the time taken, but character matching *is* what this is all
3708     about... */
3709 
3710     case OP_NOTEXACT:
3711     case OP_NOTEXACTI:
3712     min = max = GET2(ecode, 1);
3713     ecode += 1 + IMM2_SIZE;
3714     goto REPEATNOTCHAR;
3715 
3716     case OP_NOTUPTO:
3717     case OP_NOTUPTOI:
3718     case OP_NOTMINUPTO:
3719     case OP_NOTMINUPTOI:
3720     min = 0;
3721     max = GET2(ecode, 1);
3722     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3723     ecode += 1 + IMM2_SIZE;
3724     goto REPEATNOTCHAR;
3725 
3726     case OP_NOTPOSSTAR:
3727     case OP_NOTPOSSTARI:
3728     possessive = TRUE;
3729     min = 0;
3730     max = INT_MAX;
3731     ecode++;
3732     goto REPEATNOTCHAR;
3733 
3734     case OP_NOTPOSPLUS:
3735     case OP_NOTPOSPLUSI:
3736     possessive = TRUE;
3737     min = 1;
3738     max = INT_MAX;
3739     ecode++;
3740     goto REPEATNOTCHAR;
3741 
3742     case OP_NOTPOSQUERY:
3743     case OP_NOTPOSQUERYI:
3744     possessive = TRUE;
3745     min = 0;
3746     max = 1;
3747     ecode++;
3748     goto REPEATNOTCHAR;
3749 
3750     case OP_NOTPOSUPTO:
3751     case OP_NOTPOSUPTOI:
3752     possessive = TRUE;
3753     min = 0;
3754     max = GET2(ecode, 1);
3755     ecode += 1 + IMM2_SIZE;
3756     goto REPEATNOTCHAR;
3757 
3758     case OP_NOTSTAR:
3759     case OP_NOTSTARI:
3760     case OP_NOTMINSTAR:
3761     case OP_NOTMINSTARI:
3762     case OP_NOTPLUS:
3763     case OP_NOTPLUSI:
3764     case OP_NOTMINPLUS:
3765     case OP_NOTMINPLUSI:
3766     case OP_NOTQUERY:
3767     case OP_NOTQUERYI:
3768     case OP_NOTMINQUERY:
3769     case OP_NOTMINQUERYI:
3770     c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3771     minimize = (c & 1) != 0;
3772     min = rep_min[c];                 /* Pick up values from tables; */
3773     max = rep_max[c];                 /* zero for max => infinity */
3774     if (max == 0) max = INT_MAX;
3775 
3776     /* Common code for all repeated single-byte matches. */
3777 
3778     REPEATNOTCHAR:
3779     GETCHARINCTEST(fc, ecode);
3780 
3781     /* The code is duplicated for the caseless and caseful cases, for speed,
3782     since matching characters is likely to be quite common. First, ensure the
3783     minimum number of matches are present. If min = max, continue at the same
3784     level without recursing. Otherwise, if minimizing, keep trying the rest of
3785     the expression and advancing one matching character if failing, up to the
3786     maximum. Alternatively, if maximizing, find the maximum number of
3787     characters and work backwards. */
3788 
3789     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3790       max, (char *)eptr));
3791 
3792     if (op >= OP_NOTSTARI)     /* Caseless */
3793       {
3794 #ifdef SUPPORT_UTF
3795 #ifdef SUPPORT_UCP
3796       if (utf && fc > 127)
3797         foc = UCD_OTHERCASE(fc);
3798 #else
3799       if (utf && fc > 127)
3800         foc = fc;
3801 #endif /* SUPPORT_UCP */
3802       else
3803 #endif /* SUPPORT_UTF */
3804         foc = TABLE_GET(fc, md->fcc, fc);
3805 
3806 #ifdef SUPPORT_UTF
3807       if (utf)
3808         {
3809         register pcre_uint32 d;
3810         for (i = 1; i <= min; i++)
3811           {
3812           if (eptr >= md->end_subject)
3813             {
3814             SCHECK_PARTIAL();
3815             RRETURN(MATCH_NOMATCH);
3816             }
3817           GETCHARINC(d, eptr);
3818           if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3819           }
3820         }
3821       else
3822 #endif  /* SUPPORT_UTF */
3823       /* Not UTF mode */
3824         {
3825         for (i = 1; i <= min; i++)
3826           {
3827           if (eptr >= md->end_subject)
3828             {
3829             SCHECK_PARTIAL();
3830             RRETURN(MATCH_NOMATCH);
3831             }
3832           if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3833           eptr++;
3834           }
3835         }
3836 
3837       if (min == max) continue;
3838 
3839       if (minimize)
3840         {
3841 #ifdef SUPPORT_UTF
3842         if (utf)
3843           {
3844           register pcre_uint32 d;
3845           for (fi = min;; fi++)
3846             {
3847             RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3848             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3849             if (fi >= max) RRETURN(MATCH_NOMATCH);
3850             if (eptr >= md->end_subject)
3851               {
3852               SCHECK_PARTIAL();
3853               RRETURN(MATCH_NOMATCH);
3854               }
3855             GETCHARINC(d, eptr);
3856             if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3857             }
3858           }
3859         else
3860 #endif  /*SUPPORT_UTF */
3861         /* Not UTF mode */
3862           {
3863           for (fi = min;; fi++)
3864             {
3865             RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3866             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3867             if (fi >= max) RRETURN(MATCH_NOMATCH);
3868             if (eptr >= md->end_subject)
3869               {
3870               SCHECK_PARTIAL();
3871               RRETURN(MATCH_NOMATCH);
3872               }
3873             if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3874             eptr++;
3875             }
3876           }
3877         /* Control never gets here */
3878         }
3879 
3880       /* Maximize case */
3881 
3882       else
3883         {
3884         pp = eptr;
3885 
3886 #ifdef SUPPORT_UTF
3887         if (utf)
3888           {
3889           register pcre_uint32 d;
3890           for (i = min; i < max; i++)
3891             {
3892             int len = 1;
3893             if (eptr >= md->end_subject)
3894               {
3895               SCHECK_PARTIAL();
3896               break;
3897               }
3898             GETCHARLEN(d, eptr, len);
3899             if (fc == d || (unsigned int)foc == d) break;
3900             eptr += len;
3901             }
3902           if (possessive) continue;    /* No backtracking */
3903           for(;;)
3904             {
3905             if (eptr <= pp) goto TAIL_RECURSE;
3906             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3907             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3908             eptr--;
3909             BACKCHAR(eptr);
3910             }
3911           }
3912         else
3913 #endif  /* SUPPORT_UTF */
3914         /* Not UTF mode */
3915           {
3916           for (i = min; i < max; i++)
3917             {
3918             if (eptr >= md->end_subject)
3919               {
3920               SCHECK_PARTIAL();
3921               break;
3922               }
3923             if (fc == *eptr || foc == *eptr) break;
3924             eptr++;
3925             }
3926           if (possessive) continue;    /* No backtracking */
3927           for (;;)
3928             {
3929             if (eptr == pp) goto TAIL_RECURSE;
3930             RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3931             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3932             eptr--;
3933             }
3934           }
3935         /* Control never gets here */
3936         }
3937       }
3938 
3939     /* Caseful comparisons */
3940 
3941     else
3942       {
3943 #ifdef SUPPORT_UTF
3944       if (utf)
3945         {
3946         register pcre_uint32 d;
3947         for (i = 1; i <= min; i++)
3948           {
3949           if (eptr >= md->end_subject)
3950             {
3951             SCHECK_PARTIAL();
3952             RRETURN(MATCH_NOMATCH);
3953             }
3954           GETCHARINC(d, eptr);
3955           if (fc == d) RRETURN(MATCH_NOMATCH);
3956           }
3957         }
3958       else
3959 #endif
3960       /* Not UTF mode */
3961         {
3962         for (i = 1; i <= min; i++)
3963           {
3964           if (eptr >= md->end_subject)
3965             {
3966             SCHECK_PARTIAL();
3967             RRETURN(MATCH_NOMATCH);
3968             }
3969           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3970           }
3971         }
3972 
3973       if (min == max) continue;
3974 
3975       if (minimize)
3976         {
3977 #ifdef SUPPORT_UTF
3978         if (utf)
3979           {
3980           register pcre_uint32 d;
3981           for (fi = min;; fi++)
3982             {
3983             RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3984             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3985             if (fi >= max) RRETURN(MATCH_NOMATCH);
3986             if (eptr >= md->end_subject)
3987               {
3988               SCHECK_PARTIAL();
3989               RRETURN(MATCH_NOMATCH);
3990               }
3991             GETCHARINC(d, eptr);
3992             if (fc == d) RRETURN(MATCH_NOMATCH);
3993             }
3994           }
3995         else
3996 #endif
3997         /* Not UTF mode */
3998           {
3999           for (fi = min;; fi++)
4000             {
4001             RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
4002             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4003             if (fi >= max) RRETURN(MATCH_NOMATCH);
4004             if (eptr >= md->end_subject)
4005               {
4006               SCHECK_PARTIAL();
4007               RRETURN(MATCH_NOMATCH);
4008               }
4009             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
4010             }
4011           }
4012         /* Control never gets here */
4013         }
4014 
4015       /* Maximize case */
4016 
4017       else
4018         {
4019         pp = eptr;
4020 
4021 #ifdef SUPPORT_UTF
4022         if (utf)
4023           {
4024           register pcre_uint32 d;
4025           for (i = min; i < max; i++)
4026             {
4027             int len = 1;
4028             if (eptr >= md->end_subject)
4029               {
4030               SCHECK_PARTIAL();
4031               break;
4032               }
4033             GETCHARLEN(d, eptr, len);
4034             if (fc == d) break;
4035             eptr += len;
4036             }
4037           if (possessive) continue;    /* No backtracking */
4038           for(;;)
4039             {
4040             if (eptr <= pp) goto TAIL_RECURSE;
4041             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
4042             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4043             eptr--;
4044             BACKCHAR(eptr);
4045             }
4046           }
4047         else
4048 #endif
4049         /* Not UTF mode */
4050           {
4051           for (i = min; i < max; i++)
4052             {
4053             if (eptr >= md->end_subject)
4054               {
4055               SCHECK_PARTIAL();
4056               break;
4057               }
4058             if (fc == *eptr) break;
4059             eptr++;
4060             }
4061           if (possessive) continue;    /* No backtracking */
4062           for (;;)
4063             {
4064             if (eptr == pp) goto TAIL_RECURSE;
4065             RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
4066             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4067             eptr--;
4068             }
4069           }
4070         /* Control never gets here */
4071         }
4072       }
4073     /* Control never gets here */
4074 
4075     /* Match a single character type repeatedly; several different opcodes
4076     share code. This is very similar to the code for single characters, but we
4077     repeat it in the interests of efficiency. */
4078 
4079     case OP_TYPEEXACT:
4080     min = max = GET2(ecode, 1);
4081     minimize = TRUE;
4082     ecode += 1 + IMM2_SIZE;
4083     goto REPEATTYPE;
4084 
4085     case OP_TYPEUPTO:
4086     case OP_TYPEMINUPTO:
4087     min = 0;
4088     max = GET2(ecode, 1);
4089     minimize = *ecode == OP_TYPEMINUPTO;
4090     ecode += 1 + IMM2_SIZE;
4091     goto REPEATTYPE;
4092 
4093     case OP_TYPEPOSSTAR:
4094     possessive = TRUE;
4095     min = 0;
4096     max = INT_MAX;
4097     ecode++;
4098     goto REPEATTYPE;
4099 
4100     case OP_TYPEPOSPLUS:
4101     possessive = TRUE;
4102     min = 1;
4103     max = INT_MAX;
4104     ecode++;
4105     goto REPEATTYPE;
4106 
4107     case OP_TYPEPOSQUERY:
4108     possessive = TRUE;
4109     min = 0;
4110     max = 1;
4111     ecode++;
4112     goto REPEATTYPE;
4113 
4114     case OP_TYPEPOSUPTO:
4115     possessive = TRUE;
4116     min = 0;
4117     max = GET2(ecode, 1);
4118     ecode += 1 + IMM2_SIZE;
4119     goto REPEATTYPE;
4120 
4121     case OP_TYPESTAR:
4122     case OP_TYPEMINSTAR:
4123     case OP_TYPEPLUS:
4124     case OP_TYPEMINPLUS:
4125     case OP_TYPEQUERY:
4126     case OP_TYPEMINQUERY:
4127     c = *ecode++ - OP_TYPESTAR;
4128     minimize = (c & 1) != 0;
4129     min = rep_min[c];                 /* Pick up values from tables; */
4130     max = rep_max[c];                 /* zero for max => infinity */
4131     if (max == 0) max = INT_MAX;
4132 
4133     /* Common code for all repeated single character type matches. Note that
4134     in UTF-8 mode, '.' matches a character of any length, but for the other
4135     character types, the valid characters are all one-byte long. */
4136 
4137     REPEATTYPE:
4138     ctype = *ecode++;      /* Code for the character type */
4139 
4140 #ifdef SUPPORT_UCP
4141     if (ctype == OP_PROP || ctype == OP_NOTPROP)
4142       {
4143       prop_fail_result = ctype == OP_NOTPROP;
4144       prop_type = *ecode++;
4145       prop_value = *ecode++;
4146       }
4147     else prop_type = -1;
4148 #endif
4149 
4150     /* First, ensure the minimum number of matches are present. Use inline
4151     code for maximizing the speed, and do the type test once at the start
4152     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4153     is tidier. Also separate the UCP code, which can be the same for both UTF-8
4154     and single-bytes. */
4155 
4156     if (min > 0)
4157       {
4158 #ifdef SUPPORT_UCP
4159       if (prop_type >= 0)
4160         {
4161         switch(prop_type)
4162           {
4163           case PT_ANY:
4164           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4165           for (i = 1; i <= min; i++)
4166             {
4167             if (eptr >= md->end_subject)
4168               {
4169               SCHECK_PARTIAL();
4170               RRETURN(MATCH_NOMATCH);
4171               }
4172             GETCHARINCTEST(c, eptr);
4173             }
4174           break;
4175 
4176           case PT_LAMP:
4177           for (i = 1; i <= min; i++)
4178             {
4179             int chartype;
4180             if (eptr >= md->end_subject)
4181               {
4182               SCHECK_PARTIAL();
4183               RRETURN(MATCH_NOMATCH);
4184               }
4185             GETCHARINCTEST(c, eptr);
4186             chartype = UCD_CHARTYPE(c);
4187             if ((chartype == ucp_Lu ||
4188                  chartype == ucp_Ll ||
4189                  chartype == ucp_Lt) == prop_fail_result)
4190               RRETURN(MATCH_NOMATCH);
4191             }
4192           break;
4193 
4194           case PT_GC:
4195           for (i = 1; i <= min; i++)
4196             {
4197             if (eptr >= md->end_subject)
4198               {
4199               SCHECK_PARTIAL();
4200               RRETURN(MATCH_NOMATCH);
4201               }
4202             GETCHARINCTEST(c, eptr);
4203             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4204               RRETURN(MATCH_NOMATCH);
4205             }
4206           break;
4207 
4208           case PT_PC:
4209           for (i = 1; i <= min; i++)
4210             {
4211             if (eptr >= md->end_subject)
4212               {
4213               SCHECK_PARTIAL();
4214               RRETURN(MATCH_NOMATCH);
4215               }
4216             GETCHARINCTEST(c, eptr);
4217             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4218               RRETURN(MATCH_NOMATCH);
4219             }
4220           break;
4221 
4222           case PT_SC:
4223           for (i = 1; i <= min; i++)
4224             {
4225             if (eptr >= md->end_subject)
4226               {
4227               SCHECK_PARTIAL();
4228               RRETURN(MATCH_NOMATCH);
4229               }
4230             GETCHARINCTEST(c, eptr);
4231             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4232               RRETURN(MATCH_NOMATCH);
4233             }
4234           break;
4235 
4236           case PT_ALNUM:
4237           for (i = 1; i <= min; i++)
4238             {
4239             int category;
4240             if (eptr >= md->end_subject)
4241               {
4242               SCHECK_PARTIAL();
4243               RRETURN(MATCH_NOMATCH);
4244               }
4245             GETCHARINCTEST(c, eptr);
4246             category = UCD_CATEGORY(c);
4247             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4248               RRETURN(MATCH_NOMATCH);
4249             }
4250           break;
4251 
4252           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4253           which means that Perl space and POSIX space are now identical. PCRE
4254           was changed at release 8.34. */
4255 
4256           case PT_SPACE:    /* Perl space */
4257           case PT_PXSPACE:  /* POSIX space */
4258           for (i = 1; i <= min; i++)
4259             {
4260             if (eptr >= md->end_subject)
4261               {
4262               SCHECK_PARTIAL();
4263               RRETURN(MATCH_NOMATCH);
4264               }
4265             GETCHARINCTEST(c, eptr);
4266             switch(c)
4267               {
4268               HSPACE_CASES:
4269               VSPACE_CASES:
4270               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4271               break;
4272 
4273               default:
4274               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
4275                 RRETURN(MATCH_NOMATCH);
4276               break;
4277               }
4278             }
4279           break;
4280 
4281           case PT_WORD:
4282           for (i = 1; i <= min; i++)
4283             {
4284             int category;
4285             if (eptr >= md->end_subject)
4286               {
4287               SCHECK_PARTIAL();
4288               RRETURN(MATCH_NOMATCH);
4289               }
4290             GETCHARINCTEST(c, eptr);
4291             category = UCD_CATEGORY(c);
4292             if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4293                    == prop_fail_result)
4294               RRETURN(MATCH_NOMATCH);
4295             }
4296           break;
4297 
4298           case PT_CLIST:
4299           for (i = 1; i <= min; i++)
4300             {
4301             const pcre_uint32 *cp;
4302             if (eptr >= md->end_subject)
4303               {
4304               SCHECK_PARTIAL();
4305               RRETURN(MATCH_NOMATCH);
4306               }
4307             GETCHARINCTEST(c, eptr);
4308             cp = PRIV(ucd_caseless_sets) + prop_value;
4309             for (;;)
4310               {
4311               if (c < *cp)
4312                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4313               if (c == *cp++)
4314                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4315               }
4316             }
4317           break;
4318 
4319           case PT_UCNC:
4320           for (i = 1; i <= min; i++)
4321             {
4322             if (eptr >= md->end_subject)
4323               {
4324               SCHECK_PARTIAL();
4325               RRETURN(MATCH_NOMATCH);
4326               }
4327             GETCHARINCTEST(c, eptr);
4328             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
4329                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
4330                  c >= 0xe000) == prop_fail_result)
4331               RRETURN(MATCH_NOMATCH);
4332             }
4333           break;
4334 
4335           /* This should not occur */
4336 
4337           default:
4338           RRETURN(PCRE_ERROR_INTERNAL);
4339           }
4340         }
4341 
4342       /* Match extended Unicode sequences. We will get here only if the
4343       support is in the binary; otherwise a compile-time error occurs. */
4344 
4345       else if (ctype == OP_EXTUNI)
4346         {
4347         for (i = 1; i <= min; i++)
4348           {
4349           if (eptr >= md->end_subject)
4350             {
4351             SCHECK_PARTIAL();
4352             RRETURN(MATCH_NOMATCH);
4353             }
4354           else
4355             {
4356             int lgb, rgb;
4357             GETCHARINCTEST(c, eptr);
4358             lgb = UCD_GRAPHBREAK(c);
4359            while (eptr < md->end_subject)
4360               {
4361               int len = 1;
4362               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4363               rgb = UCD_GRAPHBREAK(c);
4364               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
4365               lgb = rgb;
4366               eptr += len;
4367               }
4368             }
4369           CHECK_PARTIAL();
4370           }
4371         }
4372 
4373       else
4374 #endif     /* SUPPORT_UCP */
4375 
4376 /* Handle all other cases when the coding is UTF-8 */
4377 
4378 #ifdef SUPPORT_UTF
4379       if (utf) switch(ctype)
4380         {
4381         case OP_ANY:
4382         for (i = 1; i <= min; i++)
4383           {
4384           if (eptr >= md->end_subject)
4385             {
4386             SCHECK_PARTIAL();
4387             RRETURN(MATCH_NOMATCH);
4388             }
4389           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4390           if (md->partial != 0 &&
4391               eptr + 1 >= md->end_subject &&
4392               NLBLOCK->nltype == NLTYPE_FIXED &&
4393               NLBLOCK->nllen == 2 &&
4394               UCHAR21(eptr) == NLBLOCK->nl[0])
4395             {
4396             md->hitend = TRUE;
4397             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4398             }
4399           eptr++;
4400           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4401           }
4402         break;
4403 
4404         case OP_ALLANY:
4405         for (i = 1; i <= min; i++)
4406           {
4407           if (eptr >= md->end_subject)
4408             {
4409             SCHECK_PARTIAL();
4410             RRETURN(MATCH_NOMATCH);
4411             }
4412           eptr++;
4413           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4414           }
4415         break;
4416 
4417         case OP_ANYBYTE:
4418         if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4419         eptr += min;
4420         break;
4421 
4422         case OP_ANYNL:
4423         for (i = 1; i <= min; i++)
4424           {
4425           if (eptr >= md->end_subject)
4426             {
4427             SCHECK_PARTIAL();
4428             RRETURN(MATCH_NOMATCH);
4429             }
4430           GETCHARINC(c, eptr);
4431           switch(c)
4432             {
4433             default: RRETURN(MATCH_NOMATCH);
4434 
4435             case CHAR_CR:
4436             if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
4437             break;
4438 
4439             case CHAR_LF:
4440             break;
4441 
4442             case CHAR_VT:
4443             case CHAR_FF:
4444             case CHAR_NEL:
4445 #ifndef EBCDIC
4446             case 0x2028:
4447             case 0x2029:
4448 #endif  /* Not EBCDIC */
4449             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4450             break;
4451             }
4452           }
4453         break;
4454 
4455         case OP_NOT_HSPACE:
4456         for (i = 1; i <= min; i++)
4457           {
4458           if (eptr >= md->end_subject)
4459             {
4460             SCHECK_PARTIAL();
4461             RRETURN(MATCH_NOMATCH);
4462             }
4463           GETCHARINC(c, eptr);
4464           switch(c)
4465             {
4466             HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
4467             default: break;
4468             }
4469           }
4470         break;
4471 
4472         case OP_HSPACE:
4473         for (i = 1; i <= min; i++)
4474           {
4475           if (eptr >= md->end_subject)
4476             {
4477             SCHECK_PARTIAL();
4478             RRETURN(MATCH_NOMATCH);
4479             }
4480           GETCHARINC(c, eptr);
4481           switch(c)
4482             {
4483             HSPACE_CASES: break;  /* Byte and multibyte cases */
4484             default: RRETURN(MATCH_NOMATCH);
4485             }
4486           }
4487         break;
4488 
4489         case OP_NOT_VSPACE:
4490         for (i = 1; i <= min; i++)
4491           {
4492           if (eptr >= md->end_subject)
4493             {
4494             SCHECK_PARTIAL();
4495             RRETURN(MATCH_NOMATCH);
4496             }
4497           GETCHARINC(c, eptr);
4498           switch(c)
4499             {
4500             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
4501             default: break;
4502             }
4503           }
4504         break;
4505 
4506         case OP_VSPACE:
4507         for (i = 1; i <= min; i++)
4508           {
4509           if (eptr >= md->end_subject)
4510             {
4511             SCHECK_PARTIAL();
4512             RRETURN(MATCH_NOMATCH);
4513             }
4514           GETCHARINC(c, eptr);
4515           switch(c)
4516             {
4517             VSPACE_CASES: break;
4518             default: RRETURN(MATCH_NOMATCH);
4519             }
4520           }
4521         break;
4522 
4523         case OP_NOT_DIGIT:
4524         for (i = 1; i <= min; i++)
4525           {
4526           if (eptr >= md->end_subject)
4527             {
4528             SCHECK_PARTIAL();
4529             RRETURN(MATCH_NOMATCH);
4530             }
4531           GETCHARINC(c, eptr);
4532           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
4533             RRETURN(MATCH_NOMATCH);
4534           }
4535         break;
4536 
4537         case OP_DIGIT:
4538         for (i = 1; i <= min; i++)
4539           {
4540           pcre_uint32 cc;
4541           if (eptr >= md->end_subject)
4542             {
4543             SCHECK_PARTIAL();
4544             RRETURN(MATCH_NOMATCH);
4545             }
4546           cc = UCHAR21(eptr);
4547           if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
4548             RRETURN(MATCH_NOMATCH);
4549           eptr++;
4550           /* No need to skip more bytes - we know it's a 1-byte character */
4551           }
4552         break;
4553 
4554         case OP_NOT_WHITESPACE:
4555         for (i = 1; i <= min; i++)
4556           {
4557           pcre_uint32 cc;
4558           if (eptr >= md->end_subject)
4559             {
4560             SCHECK_PARTIAL();
4561             RRETURN(MATCH_NOMATCH);
4562             }
4563           cc = UCHAR21(eptr);
4564           if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
4565             RRETURN(MATCH_NOMATCH);
4566           eptr++;
4567           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4568           }
4569         break;
4570 
4571         case OP_WHITESPACE:
4572         for (i = 1; i <= min; i++)
4573           {
4574           pcre_uint32 cc;
4575           if (eptr >= md->end_subject)
4576             {
4577             SCHECK_PARTIAL();
4578             RRETURN(MATCH_NOMATCH);
4579             }
4580           cc = UCHAR21(eptr);
4581           if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
4582             RRETURN(MATCH_NOMATCH);
4583           eptr++;
4584           /* No need to skip more bytes - we know it's a 1-byte character */
4585           }
4586         break;
4587 
4588         case OP_NOT_WORDCHAR:
4589         for (i = 1; i <= min; i++)
4590           {
4591           pcre_uint32 cc;
4592           if (eptr >= md->end_subject)
4593             {
4594             SCHECK_PARTIAL();
4595             RRETURN(MATCH_NOMATCH);
4596             }
4597           cc = UCHAR21(eptr);
4598           if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
4599             RRETURN(MATCH_NOMATCH);
4600           eptr++;
4601           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4602           }
4603         break;
4604 
4605         case OP_WORDCHAR:
4606         for (i = 1; i <= min; i++)
4607           {
4608           pcre_uint32 cc;
4609           if (eptr >= md->end_subject)
4610             {
4611             SCHECK_PARTIAL();
4612             RRETURN(MATCH_NOMATCH);
4613             }
4614           cc = UCHAR21(eptr);
4615           if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
4616             RRETURN(MATCH_NOMATCH);
4617           eptr++;
4618           /* No need to skip more bytes - we know it's a 1-byte character */
4619           }
4620         break;
4621 
4622         default:
4623         RRETURN(PCRE_ERROR_INTERNAL);
4624         }  /* End switch(ctype) */
4625 
4626       else
4627 #endif     /* SUPPORT_UTF */
4628 
4629       /* Code for the non-UTF-8 case for minimum matching of operators other
4630       than OP_PROP and OP_NOTPROP. */
4631 
4632       switch(ctype)
4633         {
4634         case OP_ANY:
4635         for (i = 1; i <= min; i++)
4636           {
4637           if (eptr >= md->end_subject)
4638             {
4639             SCHECK_PARTIAL();
4640             RRETURN(MATCH_NOMATCH);
4641             }
4642           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4643           if (md->partial != 0 &&
4644               eptr + 1 >= md->end_subject &&
4645               NLBLOCK->nltype == NLTYPE_FIXED &&
4646               NLBLOCK->nllen == 2 &&
4647               *eptr == NLBLOCK->nl[0])
4648             {
4649             md->hitend = TRUE;
4650             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4651             }
4652           eptr++;
4653           }
4654         break;
4655 
4656         case OP_ALLANY:
4657         if (eptr > md->end_subject - min)
4658           {
4659           SCHECK_PARTIAL();
4660           RRETURN(MATCH_NOMATCH);
4661           }
4662         eptr += min;
4663         break;
4664 
4665         case OP_ANYBYTE:
4666         if (eptr > md->end_subject - min)
4667           {
4668           SCHECK_PARTIAL();
4669           RRETURN(MATCH_NOMATCH);
4670           }
4671         eptr += min;
4672         break;
4673 
4674         case OP_ANYNL:
4675         for (i = 1; i <= min; i++)
4676           {
4677           if (eptr >= md->end_subject)
4678             {
4679             SCHECK_PARTIAL();
4680             RRETURN(MATCH_NOMATCH);
4681             }
4682           switch(*eptr++)
4683             {
4684             default: RRETURN(MATCH_NOMATCH);
4685 
4686             case CHAR_CR:
4687             if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
4688             break;
4689 
4690             case CHAR_LF:
4691             break;
4692 
4693             case CHAR_VT:
4694             case CHAR_FF:
4695             case CHAR_NEL:
4696 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4697             case 0x2028:
4698             case 0x2029:
4699 #endif
4700             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4701             break;
4702             }
4703           }
4704         break;
4705 
4706         case OP_NOT_HSPACE:
4707         for (i = 1; i <= min; i++)
4708           {
4709           if (eptr >= md->end_subject)
4710             {
4711             SCHECK_PARTIAL();
4712             RRETURN(MATCH_NOMATCH);
4713             }
4714           switch(*eptr++)
4715             {
4716             default: break;
4717             HSPACE_BYTE_CASES:
4718 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4719             HSPACE_MULTIBYTE_CASES:
4720 #endif
4721             RRETURN(MATCH_NOMATCH);
4722             }
4723           }
4724         break;
4725 
4726         case OP_HSPACE:
4727         for (i = 1; i <= min; i++)
4728           {
4729           if (eptr >= md->end_subject)
4730             {
4731             SCHECK_PARTIAL();
4732             RRETURN(MATCH_NOMATCH);
4733             }
4734           switch(*eptr++)
4735             {
4736             default: RRETURN(MATCH_NOMATCH);
4737             HSPACE_BYTE_CASES:
4738 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4739             HSPACE_MULTIBYTE_CASES:
4740 #endif
4741             break;
4742             }
4743           }
4744         break;
4745 
4746         case OP_NOT_VSPACE:
4747         for (i = 1; i <= min; i++)
4748           {
4749           if (eptr >= md->end_subject)
4750             {
4751             SCHECK_PARTIAL();
4752             RRETURN(MATCH_NOMATCH);
4753             }
4754           switch(*eptr++)
4755             {
4756             VSPACE_BYTE_CASES:
4757 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4758             VSPACE_MULTIBYTE_CASES:
4759 #endif
4760             RRETURN(MATCH_NOMATCH);
4761             default: break;
4762             }
4763           }
4764         break;
4765 
4766         case OP_VSPACE:
4767         for (i = 1; i <= min; i++)
4768           {
4769           if (eptr >= md->end_subject)
4770             {
4771             SCHECK_PARTIAL();
4772             RRETURN(MATCH_NOMATCH);
4773             }
4774           switch(*eptr++)
4775             {
4776             default: RRETURN(MATCH_NOMATCH);
4777             VSPACE_BYTE_CASES:
4778 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4779             VSPACE_MULTIBYTE_CASES:
4780 #endif
4781             break;
4782             }
4783           }
4784         break;
4785 
4786         case OP_NOT_DIGIT:
4787         for (i = 1; i <= min; i++)
4788           {
4789           if (eptr >= md->end_subject)
4790             {
4791             SCHECK_PARTIAL();
4792             RRETURN(MATCH_NOMATCH);
4793             }
4794           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4795             RRETURN(MATCH_NOMATCH);
4796           eptr++;
4797           }
4798         break;
4799 
4800         case OP_DIGIT:
4801         for (i = 1; i <= min; i++)
4802           {
4803           if (eptr >= md->end_subject)
4804             {
4805             SCHECK_PARTIAL();
4806             RRETURN(MATCH_NOMATCH);
4807             }
4808           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4809             RRETURN(MATCH_NOMATCH);
4810           eptr++;
4811           }
4812         break;
4813 
4814         case OP_NOT_WHITESPACE:
4815         for (i = 1; i <= min; i++)
4816           {
4817           if (eptr >= md->end_subject)
4818             {
4819             SCHECK_PARTIAL();
4820             RRETURN(MATCH_NOMATCH);
4821             }
4822           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4823             RRETURN(MATCH_NOMATCH);
4824           eptr++;
4825           }
4826         break;
4827 
4828         case OP_WHITESPACE:
4829         for (i = 1; i <= min; i++)
4830           {
4831           if (eptr >= md->end_subject)
4832             {
4833             SCHECK_PARTIAL();
4834             RRETURN(MATCH_NOMATCH);
4835             }
4836           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4837             RRETURN(MATCH_NOMATCH);
4838           eptr++;
4839           }
4840         break;
4841 
4842         case OP_NOT_WORDCHAR:
4843         for (i = 1; i <= min; i++)
4844           {
4845           if (eptr >= md->end_subject)
4846             {
4847             SCHECK_PARTIAL();
4848             RRETURN(MATCH_NOMATCH);
4849             }
4850           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4851             RRETURN(MATCH_NOMATCH);
4852           eptr++;
4853           }
4854         break;
4855 
4856         case OP_WORDCHAR:
4857         for (i = 1; i <= min; i++)
4858           {
4859           if (eptr >= md->end_subject)
4860             {
4861             SCHECK_PARTIAL();
4862             RRETURN(MATCH_NOMATCH);
4863             }
4864           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4865             RRETURN(MATCH_NOMATCH);
4866           eptr++;
4867           }
4868         break;
4869 
4870         default:
4871         RRETURN(PCRE_ERROR_INTERNAL);
4872         }
4873       }
4874 
4875     /* If min = max, continue at the same level without recursing */
4876 
4877     if (min == max) continue;
4878 
4879     /* If minimizing, we have to test the rest of the pattern before each
4880     subsequent match. Again, separate the UTF-8 case for speed, and also
4881     separate the UCP cases. */
4882 
4883     if (minimize)
4884       {
4885 #ifdef SUPPORT_UCP
4886       if (prop_type >= 0)
4887         {
4888         switch(prop_type)
4889           {
4890           case PT_ANY:
4891           for (fi = min;; fi++)
4892             {
4893             RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4894             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4895             if (fi >= max) RRETURN(MATCH_NOMATCH);
4896             if (eptr >= md->end_subject)
4897               {
4898               SCHECK_PARTIAL();
4899               RRETURN(MATCH_NOMATCH);
4900               }
4901             GETCHARINCTEST(c, eptr);
4902             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4903             }
4904           /* Control never gets here */
4905 
4906           case PT_LAMP:
4907           for (fi = min;; fi++)
4908             {
4909             int chartype;
4910             RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4911             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4912             if (fi >= max) RRETURN(MATCH_NOMATCH);
4913             if (eptr >= md->end_subject)
4914               {
4915               SCHECK_PARTIAL();
4916               RRETURN(MATCH_NOMATCH);
4917               }
4918             GETCHARINCTEST(c, eptr);
4919             chartype = UCD_CHARTYPE(c);
4920             if ((chartype == ucp_Lu ||
4921                  chartype == ucp_Ll ||
4922                  chartype == ucp_Lt) == prop_fail_result)
4923               RRETURN(MATCH_NOMATCH);
4924             }
4925           /* Control never gets here */
4926 
4927           case PT_GC:
4928           for (fi = min;; fi++)
4929             {
4930             RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4931             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4932             if (fi >= max) RRETURN(MATCH_NOMATCH);
4933             if (eptr >= md->end_subject)
4934               {
4935               SCHECK_PARTIAL();
4936               RRETURN(MATCH_NOMATCH);
4937               }
4938             GETCHARINCTEST(c, eptr);
4939             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4940               RRETURN(MATCH_NOMATCH);
4941             }
4942           /* Control never gets here */
4943 
4944           case PT_PC:
4945           for (fi = min;; fi++)
4946             {
4947             RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4948             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4949             if (fi >= max) RRETURN(MATCH_NOMATCH);
4950             if (eptr >= md->end_subject)
4951               {
4952               SCHECK_PARTIAL();
4953               RRETURN(MATCH_NOMATCH);
4954               }
4955             GETCHARINCTEST(c, eptr);
4956             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4957               RRETURN(MATCH_NOMATCH);
4958             }
4959           /* Control never gets here */
4960 
4961           case PT_SC:
4962           for (fi = min;; fi++)
4963             {
4964             RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4965             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4966             if (fi >= max) RRETURN(MATCH_NOMATCH);
4967             if (eptr >= md->end_subject)
4968               {
4969               SCHECK_PARTIAL();
4970               RRETURN(MATCH_NOMATCH);
4971               }
4972             GETCHARINCTEST(c, eptr);
4973             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4974               RRETURN(MATCH_NOMATCH);
4975             }
4976           /* Control never gets here */
4977 
4978           case PT_ALNUM:
4979           for (fi = min;; fi++)
4980             {
4981             int category;
4982             RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4983             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4984             if (fi >= max) RRETURN(MATCH_NOMATCH);
4985             if (eptr >= md->end_subject)
4986               {
4987               SCHECK_PARTIAL();
4988               RRETURN(MATCH_NOMATCH);
4989               }
4990             GETCHARINCTEST(c, eptr);
4991             category = UCD_CATEGORY(c);
4992             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4993               RRETURN(MATCH_NOMATCH);
4994             }
4995           /* Control never gets here */
4996 
4997           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4998           which means that Perl space and POSIX space are now identical. PCRE
4999           was changed at release 8.34. */
5000 
5001           case PT_SPACE:    /* Perl space */
5002           case PT_PXSPACE:  /* POSIX space */
5003           for (fi = min;; fi++)
5004             {
5005             RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
5006             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5007             if (fi >= max) RRETURN(MATCH_NOMATCH);
5008             if (eptr >= md->end_subject)
5009               {
5010               SCHECK_PARTIAL();
5011               RRETURN(MATCH_NOMATCH);
5012               }
5013             GETCHARINCTEST(c, eptr);
5014             switch(c)
5015               {
5016               HSPACE_CASES:
5017               VSPACE_CASES:
5018               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
5019               break;
5020 
5021               default:
5022               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
5023                 RRETURN(MATCH_NOMATCH);
5024               break;
5025               }
5026             }
5027           /* Control never gets here */
5028 
5029           case PT_WORD:
5030           for (fi = min;; fi++)
5031             {
5032             int category;
5033             RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
5034             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5035             if (fi >= max) RRETURN(MATCH_NOMATCH);
5036             if (eptr >= md->end_subject)
5037               {
5038               SCHECK_PARTIAL();
5039               RRETURN(MATCH_NOMATCH);
5040               }
5041             GETCHARINCTEST(c, eptr);
5042             category = UCD_CATEGORY(c);
5043             if ((category == ucp_L ||
5044                  category == ucp_N ||
5045                  c == CHAR_UNDERSCORE)
5046                    == prop_fail_result)
5047               RRETURN(MATCH_NOMATCH);
5048             }
5049           /* Control never gets here */
5050 
5051           case PT_CLIST:
5052           for (fi = min;; fi++)
5053             {
5054             const pcre_uint32 *cp;
5055             RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
5056             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5057             if (fi >= max) RRETURN(MATCH_NOMATCH);
5058             if (eptr >= md->end_subject)
5059               {
5060               SCHECK_PARTIAL();
5061               RRETURN(MATCH_NOMATCH);
5062               }
5063             GETCHARINCTEST(c, eptr);
5064             cp = PRIV(ucd_caseless_sets) + prop_value;
5065             for (;;)
5066               {
5067               if (c < *cp)
5068                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
5069               if (c == *cp++)
5070                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
5071               }
5072             }
5073           /* Control never gets here */
5074 
5075           case PT_UCNC:
5076           for (fi = min;; fi++)
5077             {
5078             RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
5079             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5080             if (fi >= max) RRETURN(MATCH_NOMATCH);
5081             if (eptr >= md->end_subject)
5082               {
5083               SCHECK_PARTIAL();
5084               RRETURN(MATCH_NOMATCH);
5085               }
5086             GETCHARINCTEST(c, eptr);
5087             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5088                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5089                  c >= 0xe000) == prop_fail_result)
5090               RRETURN(MATCH_NOMATCH);
5091             }
5092           /* Control never gets here */
5093 
5094           /* This should never occur */
5095           default:
5096           RRETURN(PCRE_ERROR_INTERNAL);
5097           }
5098         }
5099 
5100       /* Match extended Unicode sequences. We will get here only if the
5101       support is in the binary; otherwise a compile-time error occurs. */
5102 
5103       else if (ctype == OP_EXTUNI)
5104         {
5105         for (fi = min;; fi++)
5106           {
5107           RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
5108           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5109           if (fi >= max) RRETURN(MATCH_NOMATCH);
5110           if (eptr >= md->end_subject)
5111             {
5112             SCHECK_PARTIAL();
5113             RRETURN(MATCH_NOMATCH);
5114             }
5115           else
5116             {
5117             int lgb, rgb;
5118             GETCHARINCTEST(c, eptr);
5119             lgb = UCD_GRAPHBREAK(c);
5120             while (eptr < md->end_subject)
5121               {
5122               int len = 1;
5123               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5124               rgb = UCD_GRAPHBREAK(c);
5125               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5126               lgb = rgb;
5127               eptr += len;
5128               }
5129             }
5130           CHECK_PARTIAL();
5131           }
5132         }
5133       else
5134 #endif     /* SUPPORT_UCP */
5135 
5136 #ifdef SUPPORT_UTF
5137       if (utf)
5138         {
5139         for (fi = min;; fi++)
5140           {
5141           RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
5142           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5143           if (fi >= max) RRETURN(MATCH_NOMATCH);
5144           if (eptr >= md->end_subject)
5145             {
5146             SCHECK_PARTIAL();
5147             RRETURN(MATCH_NOMATCH);
5148             }
5149           if (ctype == OP_ANY && IS_NEWLINE(eptr))
5150             RRETURN(MATCH_NOMATCH);
5151           GETCHARINC(c, eptr);
5152           switch(ctype)
5153             {
5154             case OP_ANY:               /* This is the non-NL case */
5155             if (md->partial != 0 &&    /* Take care with CRLF partial */
5156                 eptr >= md->end_subject &&
5157                 NLBLOCK->nltype == NLTYPE_FIXED &&
5158                 NLBLOCK->nllen == 2 &&
5159                 c == NLBLOCK->nl[0])
5160               {
5161               md->hitend = TRUE;
5162               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5163               }
5164             break;
5165 
5166             case OP_ALLANY:
5167             case OP_ANYBYTE:
5168             break;
5169 
5170             case OP_ANYNL:
5171             switch(c)
5172               {
5173               default: RRETURN(MATCH_NOMATCH);
5174               case CHAR_CR:
5175               if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
5176               break;
5177 
5178               case CHAR_LF:
5179               break;
5180 
5181               case CHAR_VT:
5182               case CHAR_FF:
5183               case CHAR_NEL:
5184 #ifndef EBCDIC
5185               case 0x2028:
5186               case 0x2029:
5187 #endif  /* Not EBCDIC */
5188               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5189               break;
5190               }
5191             break;
5192 
5193             case OP_NOT_HSPACE:
5194             switch(c)
5195               {
5196               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
5197               default: break;
5198               }
5199             break;
5200 
5201             case OP_HSPACE:
5202             switch(c)
5203               {
5204               HSPACE_CASES: break;
5205               default: RRETURN(MATCH_NOMATCH);
5206               }
5207             break;
5208 
5209             case OP_NOT_VSPACE:
5210             switch(c)
5211               {
5212               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
5213               default: break;
5214               }
5215             break;
5216 
5217             case OP_VSPACE:
5218             switch(c)
5219               {
5220               VSPACE_CASES: break;
5221               default: RRETURN(MATCH_NOMATCH);
5222               }
5223             break;
5224 
5225             case OP_NOT_DIGIT:
5226             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
5227               RRETURN(MATCH_NOMATCH);
5228             break;
5229 
5230             case OP_DIGIT:
5231             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
5232               RRETURN(MATCH_NOMATCH);
5233             break;
5234 
5235             case OP_NOT_WHITESPACE:
5236             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
5237               RRETURN(MATCH_NOMATCH);
5238             break;
5239 
5240             case OP_WHITESPACE:
5241             if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5242               RRETURN(MATCH_NOMATCH);
5243             break;
5244 
5245             case OP_NOT_WORDCHAR:
5246             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
5247               RRETURN(MATCH_NOMATCH);
5248             break;
5249 
5250             case OP_WORDCHAR:
5251             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
5252               RRETURN(MATCH_NOMATCH);
5253             break;
5254 
5255             default:
5256             RRETURN(PCRE_ERROR_INTERNAL);
5257             }
5258           }
5259         }
5260       else
5261 #endif
5262       /* Not UTF mode */
5263         {
5264         for (fi = min;; fi++)
5265           {
5266           RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
5267           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5268           if (fi >= max) RRETURN(MATCH_NOMATCH);
5269           if (eptr >= md->end_subject)
5270             {
5271             SCHECK_PARTIAL();
5272             RRETURN(MATCH_NOMATCH);
5273             }
5274           if (ctype == OP_ANY && IS_NEWLINE(eptr))
5275             RRETURN(MATCH_NOMATCH);
5276           c = *eptr++;
5277           switch(ctype)
5278             {
5279             case OP_ANY:               /* This is the non-NL case */
5280             if (md->partial != 0 &&    /* Take care with CRLF partial */
5281                 eptr >= md->end_subject &&
5282                 NLBLOCK->nltype == NLTYPE_FIXED &&
5283                 NLBLOCK->nllen == 2 &&
5284                 c == NLBLOCK->nl[0])
5285               {
5286               md->hitend = TRUE;
5287               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5288               }
5289             break;
5290 
5291             case OP_ALLANY:
5292             case OP_ANYBYTE:
5293             break;
5294 
5295             case OP_ANYNL:
5296             switch(c)
5297               {
5298               default: RRETURN(MATCH_NOMATCH);
5299               case CHAR_CR:
5300               if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
5301               break;
5302 
5303               case CHAR_LF:
5304               break;
5305 
5306               case CHAR_VT:
5307               case CHAR_FF:
5308               case CHAR_NEL:
5309 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5310               case 0x2028:
5311               case 0x2029:
5312 #endif
5313               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5314               break;
5315               }
5316             break;
5317 
5318             case OP_NOT_HSPACE:
5319             switch(c)
5320               {
5321               default: break;
5322               HSPACE_BYTE_CASES:
5323 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5324               HSPACE_MULTIBYTE_CASES:
5325 #endif
5326               RRETURN(MATCH_NOMATCH);
5327               }
5328             break;
5329 
5330             case OP_HSPACE:
5331             switch(c)
5332               {
5333               default: RRETURN(MATCH_NOMATCH);
5334               HSPACE_BYTE_CASES:
5335 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5336               HSPACE_MULTIBYTE_CASES:
5337 #endif
5338               break;
5339               }
5340             break;
5341 
5342             case OP_NOT_VSPACE:
5343             switch(c)
5344               {
5345               default: break;
5346               VSPACE_BYTE_CASES:
5347 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5348               VSPACE_MULTIBYTE_CASES:
5349 #endif
5350               RRETURN(MATCH_NOMATCH);
5351               }
5352             break;
5353 
5354             case OP_VSPACE:
5355             switch(c)
5356               {
5357               default: RRETURN(MATCH_NOMATCH);
5358               VSPACE_BYTE_CASES:
5359 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5360               VSPACE_MULTIBYTE_CASES:
5361 #endif
5362               break;
5363               }
5364             break;
5365 
5366             case OP_NOT_DIGIT:
5367             if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5368             break;
5369 
5370             case OP_DIGIT:
5371             if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5372             break;
5373 
5374             case OP_NOT_WHITESPACE:
5375             if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5376             break;
5377 
5378             case OP_WHITESPACE:
5379             if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5380             break;
5381 
5382             case OP_NOT_WORDCHAR:
5383             if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5384             break;
5385 
5386             case OP_WORDCHAR:
5387             if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5388             break;
5389 
5390             default:
5391             RRETURN(PCRE_ERROR_INTERNAL);
5392             }
5393           }
5394         }
5395       /* Control never gets here */
5396       }
5397 
5398     /* If maximizing, it is worth using inline code for speed, doing the type
5399     test once at the start (i.e. keep it out of the loop). Again, keep the
5400     UTF-8 and UCP stuff separate. */
5401 
5402     else
5403       {
5404       pp = eptr;  /* Remember where we started */
5405 
5406 #ifdef SUPPORT_UCP
5407       if (prop_type >= 0)
5408         {
5409         switch(prop_type)
5410           {
5411           case PT_ANY:
5412           for (i = min; i < max; i++)
5413             {
5414             int len = 1;
5415             if (eptr >= md->end_subject)
5416               {
5417               SCHECK_PARTIAL();
5418               break;
5419               }
5420             GETCHARLENTEST(c, eptr, len);
5421             if (prop_fail_result) break;
5422             eptr+= len;
5423             }
5424           break;
5425 
5426           case PT_LAMP:
5427           for (i = min; i < max; i++)
5428             {
5429             int chartype;
5430             int len = 1;
5431             if (eptr >= md->end_subject)
5432               {
5433               SCHECK_PARTIAL();
5434               break;
5435               }
5436             GETCHARLENTEST(c, eptr, len);
5437             chartype = UCD_CHARTYPE(c);
5438             if ((chartype == ucp_Lu ||
5439                  chartype == ucp_Ll ||
5440                  chartype == ucp_Lt) == prop_fail_result)
5441               break;
5442             eptr+= len;
5443             }
5444           break;
5445 
5446           case PT_GC:
5447           for (i = min; i < max; i++)
5448             {
5449             int len = 1;
5450             if (eptr >= md->end_subject)
5451               {
5452               SCHECK_PARTIAL();
5453               break;
5454               }
5455             GETCHARLENTEST(c, eptr, len);
5456             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
5457             eptr+= len;
5458             }
5459           break;
5460 
5461           case PT_PC:
5462           for (i = min; i < max; i++)
5463             {
5464             int len = 1;
5465             if (eptr >= md->end_subject)
5466               {
5467               SCHECK_PARTIAL();
5468               break;
5469               }
5470             GETCHARLENTEST(c, eptr, len);
5471             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
5472             eptr+= len;
5473             }
5474           break;
5475 
5476           case PT_SC:
5477           for (i = min; i < max; i++)
5478             {
5479             int len = 1;
5480             if (eptr >= md->end_subject)
5481               {
5482               SCHECK_PARTIAL();
5483               break;
5484               }
5485             GETCHARLENTEST(c, eptr, len);
5486             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
5487             eptr+= len;
5488             }
5489           break;
5490 
5491           case PT_ALNUM:
5492           for (i = min; i < max; i++)
5493             {
5494             int category;
5495             int len = 1;
5496             if (eptr >= md->end_subject)
5497               {
5498               SCHECK_PARTIAL();
5499               break;
5500               }
5501             GETCHARLENTEST(c, eptr, len);
5502             category = UCD_CATEGORY(c);
5503             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5504               break;
5505             eptr+= len;
5506             }
5507           break;
5508 
5509           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
5510           which means that Perl space and POSIX space are now identical. PCRE
5511           was changed at release 8.34. */
5512 
5513           case PT_SPACE:    /* Perl space */
5514           case PT_PXSPACE:  /* POSIX space */
5515           for (i = min; i < max; i++)
5516             {
5517             int len = 1;
5518             if (eptr >= md->end_subject)
5519               {
5520               SCHECK_PARTIAL();
5521               break;
5522               }
5523             GETCHARLENTEST(c, eptr, len);
5524             switch(c)
5525               {
5526               HSPACE_CASES:
5527               VSPACE_CASES:
5528               if (prop_fail_result) goto ENDLOOP99;  /* Break the loop */
5529               break;
5530 
5531               default:
5532               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
5533                 goto ENDLOOP99;   /* Break the loop */
5534               break;
5535               }
5536             eptr+= len;
5537             }
5538           ENDLOOP99:
5539           break;
5540 
5541           case PT_WORD:
5542           for (i = min; i < max; i++)
5543             {
5544             int category;
5545             int len = 1;
5546             if (eptr >= md->end_subject)
5547               {
5548               SCHECK_PARTIAL();
5549               break;
5550               }
5551             GETCHARLENTEST(c, eptr, len);
5552             category = UCD_CATEGORY(c);
5553             if ((category == ucp_L || category == ucp_N ||
5554                  c == CHAR_UNDERSCORE) == prop_fail_result)
5555               break;
5556             eptr+= len;
5557             }
5558           break;
5559 
5560           case PT_CLIST:
5561           for (i = min; i < max; i++)
5562             {
5563             const pcre_uint32 *cp;
5564             int len = 1;
5565             if (eptr >= md->end_subject)
5566               {
5567               SCHECK_PARTIAL();
5568               break;
5569               }
5570             GETCHARLENTEST(c, eptr, len);
5571             cp = PRIV(ucd_caseless_sets) + prop_value;
5572             for (;;)
5573               {
5574               if (c < *cp)
5575                 { if (prop_fail_result) break; else goto GOT_MAX; }
5576               if (c == *cp++)
5577                 { if (prop_fail_result) goto GOT_MAX; else break; }
5578               }
5579             eptr += len;
5580             }
5581           GOT_MAX:
5582           break;
5583 
5584           case PT_UCNC:
5585           for (i = min; i < max; i++)
5586             {
5587             int len = 1;
5588             if (eptr >= md->end_subject)
5589               {
5590               SCHECK_PARTIAL();
5591               break;
5592               }
5593             GETCHARLENTEST(c, eptr, len);
5594             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5595                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5596                  c >= 0xe000) == prop_fail_result)
5597               break;
5598             eptr += len;
5599             }
5600           break;
5601 
5602           default:
5603           RRETURN(PCRE_ERROR_INTERNAL);
5604           }
5605 
5606         /* eptr is now past the end of the maximum run */
5607 
5608         if (possessive) continue;    /* No backtracking */
5609         for(;;)
5610           {
5611           if (eptr <= pp) goto TAIL_RECURSE;
5612           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5613           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5614           eptr--;
5615           if (utf) BACKCHAR(eptr);
5616           }
5617         }
5618 
5619       /* Match extended Unicode grapheme clusters. We will get here only if the
5620       support is in the binary; otherwise a compile-time error occurs. */
5621 
5622       else if (ctype == OP_EXTUNI)
5623         {
5624         for (i = min; i < max; i++)
5625           {
5626           if (eptr >= md->end_subject)
5627             {
5628             SCHECK_PARTIAL();
5629             break;
5630             }
5631           else
5632             {
5633             int lgb, rgb;
5634             GETCHARINCTEST(c, eptr);
5635             lgb = UCD_GRAPHBREAK(c);
5636             while (eptr < md->end_subject)
5637               {
5638               int len = 1;
5639               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5640               rgb = UCD_GRAPHBREAK(c);
5641               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5642               lgb = rgb;
5643               eptr += len;
5644               }
5645             }
5646           CHECK_PARTIAL();
5647           }
5648 
5649         /* eptr is now past the end of the maximum run */
5650 
5651         if (possessive) continue;    /* No backtracking */
5652 
5653         /* We use <= pp rather than == pp to detect the start of the run while
5654         backtracking because the use of \C in UTF mode can cause BACKCHAR to
5655         move back past pp. This is just palliative; the use of \C in UTF mode
5656         is fraught with danger. */
5657 
5658         for(;;)
5659           {
5660           int lgb, rgb;
5661           PCRE_PUCHAR fptr;
5662 
5663           if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
5664           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5665           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5666 
5667           /* Backtracking over an extended grapheme cluster involves inspecting
5668           the previous two characters (if present) to see if a break is
5669           permitted between them. */
5670 
5671           eptr--;
5672           if (!utf) c = *eptr; else
5673             {
5674             BACKCHAR(eptr);
5675             GETCHAR(c, eptr);
5676             }
5677           rgb = UCD_GRAPHBREAK(c);
5678 
5679           for (;;)
5680             {
5681             if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
5682             fptr = eptr - 1;
5683             if (!utf) c = *fptr; else
5684               {
5685               BACKCHAR(fptr);
5686               GETCHAR(c, fptr);
5687               }
5688             lgb = UCD_GRAPHBREAK(c);
5689             if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5690             eptr = fptr;
5691             rgb = lgb;
5692             }
5693           }
5694         }
5695 
5696       else
5697 #endif   /* SUPPORT_UCP */
5698 
5699 #ifdef SUPPORT_UTF
5700       if (utf)
5701         {
5702         switch(ctype)
5703           {
5704           case OP_ANY:
5705           for (i = min; i < max; i++)
5706             {
5707             if (eptr >= md->end_subject)
5708               {
5709               SCHECK_PARTIAL();
5710               break;
5711               }
5712             if (IS_NEWLINE(eptr)) break;
5713             if (md->partial != 0 &&    /* Take care with CRLF partial */
5714                 eptr + 1 >= md->end_subject &&
5715                 NLBLOCK->nltype == NLTYPE_FIXED &&
5716                 NLBLOCK->nllen == 2 &&
5717                 UCHAR21(eptr) == NLBLOCK->nl[0])
5718               {
5719               md->hitend = TRUE;
5720               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5721               }
5722             eptr++;
5723             ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5724             }
5725           break;
5726 
5727           case OP_ALLANY:
5728           if (max < INT_MAX)
5729             {
5730             for (i = min; i < max; i++)
5731               {
5732               if (eptr >= md->end_subject)
5733                 {
5734                 SCHECK_PARTIAL();
5735                 break;
5736                 }
5737               eptr++;
5738               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5739               }
5740             }
5741           else
5742             {
5743             eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
5744             SCHECK_PARTIAL();
5745             }
5746           break;
5747 
5748           /* The byte case is the same as non-UTF8 */
5749 
5750           case OP_ANYBYTE:
5751           c = max - min;
5752           if (c > (unsigned int)(md->end_subject - eptr))
5753             {
5754             eptr = md->end_subject;
5755             SCHECK_PARTIAL();
5756             }
5757           else eptr += c;
5758           break;
5759 
5760           case OP_ANYNL:
5761           for (i = min; i < max; i++)
5762             {
5763             int len = 1;
5764             if (eptr >= md->end_subject)
5765               {
5766               SCHECK_PARTIAL();
5767               break;
5768               }
5769             GETCHARLEN(c, eptr, len);
5770             if (c == CHAR_CR)
5771               {
5772               if (++eptr >= md->end_subject) break;
5773               if (UCHAR21(eptr) == CHAR_LF) eptr++;
5774               }
5775             else
5776               {
5777               if (c != CHAR_LF &&
5778                   (md->bsr_anycrlf ||
5779                    (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5780 #ifndef EBCDIC
5781                     && c != 0x2028 && c != 0x2029
5782 #endif  /* Not EBCDIC */
5783                     )))
5784                 break;
5785               eptr += len;
5786               }
5787             }
5788           break;
5789 
5790           case OP_NOT_HSPACE:
5791           case OP_HSPACE:
5792           for (i = min; i < max; i++)
5793             {
5794             BOOL gotspace;
5795             int len = 1;
5796             if (eptr >= md->end_subject)
5797               {
5798               SCHECK_PARTIAL();
5799               break;
5800               }
5801             GETCHARLEN(c, eptr, len);
5802             switch(c)
5803               {
5804               HSPACE_CASES: gotspace = TRUE; break;
5805               default: gotspace = FALSE; break;
5806               }
5807             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5808             eptr += len;
5809             }
5810           break;
5811 
5812           case OP_NOT_VSPACE:
5813           case OP_VSPACE:
5814           for (i = min; i < max; i++)
5815             {
5816             BOOL gotspace;
5817             int len = 1;
5818             if (eptr >= md->end_subject)
5819               {
5820               SCHECK_PARTIAL();
5821               break;
5822               }
5823             GETCHARLEN(c, eptr, len);
5824             switch(c)
5825               {
5826               VSPACE_CASES: gotspace = TRUE; break;
5827               default: gotspace = FALSE; break;
5828               }
5829             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5830             eptr += len;
5831             }
5832           break;
5833 
5834           case OP_NOT_DIGIT:
5835           for (i = min; i < max; i++)
5836             {
5837             int len = 1;
5838             if (eptr >= md->end_subject)
5839               {
5840               SCHECK_PARTIAL();
5841               break;
5842               }
5843             GETCHARLEN(c, eptr, len);
5844             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5845             eptr+= len;
5846             }
5847           break;
5848 
5849           case OP_DIGIT:
5850           for (i = min; i < max; i++)
5851             {
5852             int len = 1;
5853             if (eptr >= md->end_subject)
5854               {
5855               SCHECK_PARTIAL();
5856               break;
5857               }
5858             GETCHARLEN(c, eptr, len);
5859             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5860             eptr+= len;
5861             }
5862           break;
5863 
5864           case OP_NOT_WHITESPACE:
5865           for (i = min; i < max; i++)
5866             {
5867             int len = 1;
5868             if (eptr >= md->end_subject)
5869               {
5870               SCHECK_PARTIAL();
5871               break;
5872               }
5873             GETCHARLEN(c, eptr, len);
5874             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5875             eptr+= len;
5876             }
5877           break;
5878 
5879           case OP_WHITESPACE:
5880           for (i = min; i < max; i++)
5881             {
5882             int len = 1;
5883             if (eptr >= md->end_subject)
5884               {
5885               SCHECK_PARTIAL();
5886               break;
5887               }
5888             GETCHARLEN(c, eptr, len);
5889             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5890             eptr+= len;
5891             }
5892           break;
5893 
5894           case OP_NOT_WORDCHAR:
5895           for (i = min; i < max; i++)
5896             {
5897             int len = 1;
5898             if (eptr >= md->end_subject)
5899               {
5900               SCHECK_PARTIAL();
5901               break;
5902               }
5903             GETCHARLEN(c, eptr, len);
5904             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5905             eptr+= len;
5906             }
5907           break;
5908 
5909           case OP_WORDCHAR:
5910           for (i = min; i < max; i++)
5911             {
5912             int len = 1;
5913             if (eptr >= md->end_subject)
5914               {
5915               SCHECK_PARTIAL();
5916               break;
5917               }
5918             GETCHARLEN(c, eptr, len);
5919             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5920             eptr+= len;
5921             }
5922           break;
5923 
5924           default:
5925           RRETURN(PCRE_ERROR_INTERNAL);
5926           }
5927 
5928         if (possessive) continue;    /* No backtracking */
5929         for(;;)
5930           {
5931           if (eptr <= pp) goto TAIL_RECURSE;
5932           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5933           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5934           eptr--;
5935           BACKCHAR(eptr);
5936           if (ctype == OP_ANYNL && eptr > pp  && UCHAR21(eptr) == CHAR_NL &&
5937               UCHAR21(eptr - 1) == CHAR_CR) eptr--;
5938           }
5939         }
5940       else
5941 #endif  /* SUPPORT_UTF */
5942       /* Not UTF mode */
5943         {
5944         switch(ctype)
5945           {
5946           case OP_ANY:
5947           for (i = min; i < max; i++)
5948             {
5949             if (eptr >= md->end_subject)
5950               {
5951               SCHECK_PARTIAL();
5952               break;
5953               }
5954             if (IS_NEWLINE(eptr)) break;
5955             if (md->partial != 0 &&    /* Take care with CRLF partial */
5956                 eptr + 1 >= md->end_subject &&
5957                 NLBLOCK->nltype == NLTYPE_FIXED &&
5958                 NLBLOCK->nllen == 2 &&
5959                 *eptr == NLBLOCK->nl[0])
5960               {
5961               md->hitend = TRUE;
5962               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5963               }
5964             eptr++;
5965             }
5966           break;
5967 
5968           case OP_ALLANY:
5969           case OP_ANYBYTE:
5970           c = max - min;
5971           if (c > (unsigned int)(md->end_subject - eptr))
5972             {
5973             eptr = md->end_subject;
5974             SCHECK_PARTIAL();
5975             }
5976           else eptr += c;
5977           break;
5978 
5979           case OP_ANYNL:
5980           for (i = min; i < max; i++)
5981             {
5982             if (eptr >= md->end_subject)
5983               {
5984               SCHECK_PARTIAL();
5985               break;
5986               }
5987             c = *eptr;
5988             if (c == CHAR_CR)
5989               {
5990               if (++eptr >= md->end_subject) break;
5991               if (*eptr == CHAR_LF) eptr++;
5992               }
5993             else
5994               {
5995               if (c != CHAR_LF && (md->bsr_anycrlf ||
5996                  (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5997 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5998                  && c != 0x2028 && c != 0x2029
5999 #endif
6000                  ))) break;
6001               eptr++;
6002               }
6003             }
6004           break;
6005 
6006           case OP_NOT_HSPACE:
6007           for (i = min; i < max; i++)
6008             {
6009             if (eptr >= md->end_subject)
6010               {
6011               SCHECK_PARTIAL();
6012               break;
6013               }
6014             switch(*eptr)
6015               {
6016               default: eptr++; break;
6017               HSPACE_BYTE_CASES:
6018 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6019               HSPACE_MULTIBYTE_CASES:
6020 #endif
6021               goto ENDLOOP00;
6022               }
6023             }
6024           ENDLOOP00:
6025           break;
6026 
6027           case OP_HSPACE:
6028           for (i = min; i < max; i++)
6029             {
6030             if (eptr >= md->end_subject)
6031               {
6032               SCHECK_PARTIAL();
6033               break;
6034               }
6035             switch(*eptr)
6036               {
6037               default: goto ENDLOOP01;
6038               HSPACE_BYTE_CASES:
6039 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6040               HSPACE_MULTIBYTE_CASES:
6041 #endif
6042               eptr++; break;
6043               }
6044             }
6045           ENDLOOP01:
6046           break;
6047 
6048           case OP_NOT_VSPACE:
6049           for (i = min; i < max; i++)
6050             {
6051             if (eptr >= md->end_subject)
6052               {
6053               SCHECK_PARTIAL();
6054               break;
6055               }
6056             switch(*eptr)
6057               {
6058               default: eptr++; break;
6059               VSPACE_BYTE_CASES:
6060 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6061               VSPACE_MULTIBYTE_CASES:
6062 #endif
6063               goto ENDLOOP02;
6064               }
6065             }
6066           ENDLOOP02:
6067           break;
6068 
6069           case OP_VSPACE:
6070           for (i = min; i < max; i++)
6071             {
6072             if (eptr >= md->end_subject)
6073               {
6074               SCHECK_PARTIAL();
6075               break;
6076               }
6077             switch(*eptr)
6078               {
6079               default: goto ENDLOOP03;
6080               VSPACE_BYTE_CASES:
6081 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6082               VSPACE_MULTIBYTE_CASES:
6083 #endif
6084               eptr++; break;
6085               }
6086             }
6087           ENDLOOP03:
6088           break;
6089 
6090           case OP_NOT_DIGIT:
6091           for (i = min; i < max; i++)
6092             {
6093             if (eptr >= md->end_subject)
6094               {
6095               SCHECK_PARTIAL();
6096               break;
6097               }
6098             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
6099             eptr++;
6100             }
6101           break;
6102 
6103           case OP_DIGIT:
6104           for (i = min; i < max; i++)
6105             {
6106             if (eptr >= md->end_subject)
6107               {
6108               SCHECK_PARTIAL();
6109               break;
6110               }
6111             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
6112             eptr++;
6113             }
6114           break;
6115 
6116           case OP_NOT_WHITESPACE:
6117           for (i = min; i < max; i++)
6118             {
6119             if (eptr >= md->end_subject)
6120               {
6121               SCHECK_PARTIAL();
6122               break;
6123               }
6124             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
6125             eptr++;
6126             }
6127           break;
6128 
6129           case OP_WHITESPACE:
6130           for (i = min; i < max; i++)
6131             {
6132             if (eptr >= md->end_subject)
6133               {
6134               SCHECK_PARTIAL();
6135               break;
6136               }
6137             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
6138             eptr++;
6139             }
6140           break;
6141 
6142           case OP_NOT_WORDCHAR:
6143           for (i = min; i < max; i++)
6144             {
6145             if (eptr >= md->end_subject)
6146               {
6147               SCHECK_PARTIAL();
6148               break;
6149               }
6150             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
6151             eptr++;
6152             }
6153           break;
6154 
6155           case OP_WORDCHAR:
6156           for (i = min; i < max; i++)
6157             {
6158             if (eptr >= md->end_subject)
6159               {
6160               SCHECK_PARTIAL();
6161               break;
6162               }
6163             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
6164             eptr++;
6165             }
6166           break;
6167 
6168           default:
6169           RRETURN(PCRE_ERROR_INTERNAL);
6170           }
6171 
6172         if (possessive) continue;    /* No backtracking */
6173         for (;;)
6174           {
6175           if (eptr == pp) goto TAIL_RECURSE;
6176           RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6177           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6178           eptr--;
6179           if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_LF &&
6180               eptr[-1] == CHAR_CR) eptr--;
6181           }
6182         }
6183 
6184       /* Control never gets here */
6185       }
6186 
6187     /* There's been some horrible disaster. Arrival here can only mean there is
6188     something seriously wrong in the code above or the OP_xxx definitions. */
6189 
6190     default:
6191     DPRINTF(("Unknown opcode %d\n", *ecode));
6192     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
6193     }
6194 
6195   /* Do not stick any code in here without much thought; it is assumed
6196   that "continue" in the code above comes out to here to repeat the main
6197   loop. */
6198 
6199   }             /* End of main loop */
6200 /* Control never reaches here */
6201 
6202 
6203 /* When compiling to use the heap rather than the stack for recursive calls to
6204 match(), the RRETURN() macro jumps here. The number that is saved in
6205 frame->Xwhere indicates which label we actually want to return to. */
6206 
6207 #ifdef NO_RECURSE
6208 #define LBL(val) case val: goto L_RM##val;
6209 HEAP_RETURN:
6210 switch (frame->Xwhere)
6211   {
6212   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6213   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
6214   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
6215   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6216   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6217   LBL(65) LBL(66)
6218 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6219   LBL(20) LBL(21)
6220 #endif
6221 #ifdef SUPPORT_UTF
6222   LBL(16) LBL(18)
6223   LBL(22) LBL(23) LBL(28) LBL(30)
6224   LBL(32) LBL(34) LBL(42) LBL(46)
6225 #ifdef SUPPORT_UCP
6226   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6227   LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
6228 #endif  /* SUPPORT_UCP */
6229 #endif  /* SUPPORT_UTF */
6230   default:
6231   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6232   return PCRE_ERROR_INTERNAL;
6233   }
6234 #undef LBL
6235 #endif  /* NO_RECURSE */
6236 }
6237 
6238 
6239 /***************************************************************************
6240 ****************************************************************************
6241                    RECURSION IN THE match() FUNCTION
6242 
6243 Undefine all the macros that were defined above to handle this. */
6244 
6245 #ifdef NO_RECURSE
6246 #undef eptr
6247 #undef ecode
6248 #undef mstart
6249 #undef offset_top
6250 #undef eptrb
6251 #undef flags
6252 
6253 #undef callpat
6254 #undef charptr
6255 #undef data
6256 #undef next
6257 #undef pp
6258 #undef prev
6259 #undef saved_eptr
6260 
6261 #undef new_recursive
6262 
6263 #undef cur_is_word
6264 #undef condition
6265 #undef prev_is_word
6266 
6267 #undef ctype
6268 #undef length
6269 #undef max
6270 #undef min
6271 #undef number
6272 #undef offset
6273 #undef op
6274 #undef save_capture_last
6275 #undef save_offset1
6276 #undef save_offset2
6277 #undef save_offset3
6278 #undef stacksave
6279 
6280 #undef newptrb
6281 
6282 #endif
6283 
6284 /* These two are defined as macros in both cases */
6285 
6286 #undef fc
6287 #undef fi
6288 
6289 /***************************************************************************
6290 ***************************************************************************/
6291 
6292 
6293 #ifdef NO_RECURSE
6294 /*************************************************
6295 *          Release allocated heap frames         *
6296 *************************************************/
6297 
6298 /* This function releases all the allocated frames. The base frame is on the
6299 machine stack, and so must not be freed.
6300 
6301 Argument: the address of the base frame
6302 Returns:  nothing
6303 */
6304 
6305 static void
release_match_heapframes(heapframe * frame_base)6306 release_match_heapframes (heapframe *frame_base)
6307 {
6308 heapframe *nextframe = frame_base->Xnextframe;
6309 while (nextframe != NULL)
6310   {
6311   heapframe *oldframe = nextframe;
6312   nextframe = nextframe->Xnextframe;
6313   (PUBL(stack_free))(oldframe);
6314   }
6315 }
6316 #endif
6317 
6318 
6319 /*************************************************
6320 *         Execute a Regular Expression           *
6321 *************************************************/
6322 
6323 /* This function applies a compiled re to a subject string and picks out
6324 portions of the string if it matches. Two elements in the vector are set for
6325 each substring: the offsets to the start and end of the substring.
6326 
6327 Arguments:
6328   argument_re     points to the compiled expression
6329   extra_data      points to extra data or is NULL
6330   subject         points to the subject string
6331   length          length of subject string (may contain binary zeros)
6332   start_offset    where to start in the subject string
6333   options         option bits
6334   offsets         points to a vector of ints to be filled in with offsets
6335   offsetcount     the number of elements in the vector
6336 
6337 Returns:          > 0 => success; value is the number of elements filled in
6338                   = 0 => success, but offsets is not big enough
6339                    -1 => failed to match
6340                  < -1 => some kind of unexpected problem
6341 */
6342 
6343 #if defined COMPILE_PCRE8
6344 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_exec(const pcre * argument_re,const pcre_extra * extra_data,PCRE_SPTR subject,int length,int start_offset,int options,int * offsets,int offsetcount)6345 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6346   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6347   int offsetcount)
6348 #elif defined COMPILE_PCRE16
6349 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6350 pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6351   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6352   int offsetcount)
6353 #elif defined COMPILE_PCRE32
6354 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6355 pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
6356   PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
6357   int offsetcount)
6358 #endif
6359 {
6360 int rc, ocount, arg_offset_max;
6361 int newline;
6362 BOOL using_temporary_offsets = FALSE;
6363 BOOL anchored;
6364 BOOL startline;
6365 BOOL firstline;
6366 BOOL utf;
6367 BOOL has_first_char = FALSE;
6368 BOOL has_req_char = FALSE;
6369 pcre_uchar first_char = 0;
6370 pcre_uchar first_char2 = 0;
6371 pcre_uchar req_char = 0;
6372 pcre_uchar req_char2 = 0;
6373 match_data match_block;
6374 match_data *md = &match_block;
6375 const pcre_uint8 *tables;
6376 const pcre_uint8 *start_bits = NULL;
6377 PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6378 PCRE_PUCHAR end_subject;
6379 PCRE_PUCHAR start_partial = NULL;
6380 PCRE_PUCHAR match_partial = NULL;
6381 PCRE_PUCHAR req_char_ptr = start_match - 1;
6382 
6383 const pcre_study_data *study;
6384 const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6385 
6386 #ifdef NO_RECURSE
6387 heapframe frame_zero;
6388 frame_zero.Xprevframe = NULL;            /* Marks the top level */
6389 frame_zero.Xnextframe = NULL;            /* None are allocated yet */
6390 md->match_frames_base = &frame_zero;
6391 #endif
6392 
6393 /* Check for the special magic call that measures the size of the stack used
6394 per recursive call of match(). Without the funny casting for sizeof, a Windows
6395 compiler gave this error: "unary minus operator applied to unsigned type,
6396 result still unsigned". Hopefully the cast fixes that. */
6397 
6398 if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6399     start_offset == -999)
6400 #ifdef NO_RECURSE
6401   return -((int)sizeof(heapframe));
6402 #else
6403   return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6404 #endif
6405 
6406 /* Plausibility checks */
6407 
6408 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6409 if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6410   return PCRE_ERROR_NULL;
6411 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6412 if (length < 0) return PCRE_ERROR_BADLENGTH;
6413 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6414 
6415 /* Check that the first field in the block is the magic number. If it is not,
6416 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6417 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6418 means that the pattern is likely compiled with different endianness. */
6419 
6420 if (re->magic_number != MAGIC_NUMBER)
6421   return re->magic_number == REVERSED_MAGIC_NUMBER?
6422     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6423 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6424 
6425 /* These two settings are used in the code for checking a UTF-8 string that
6426 follows immediately afterwards. Other values in the md block are used only
6427 during "normal" pcre_exec() processing, not when the JIT support is in use,
6428 so they are set up later. */
6429 
6430 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6431 utf = md->utf = (re->options & PCRE_UTF8) != 0;
6432 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6433               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6434 
6435 /* Check a UTF-8 string if required. Pass back the character offset and error
6436 code for an invalid string if a results vector is available. */
6437 
6438 #ifdef SUPPORT_UTF
6439 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6440   {
6441   int erroroffset;
6442   int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6443   if (errorcode != 0)
6444     {
6445     if (offsetcount >= 2)
6446       {
6447       offsets[0] = erroroffset;
6448       offsets[1] = errorcode;
6449       }
6450 #if defined COMPILE_PCRE8
6451     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6452       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6453 #elif defined COMPILE_PCRE16
6454     return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6455       PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6456 #elif defined COMPILE_PCRE32
6457     return PCRE_ERROR_BADUTF32;
6458 #endif
6459     }
6460 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6461   /* Check that a start_offset points to the start of a UTF character. */
6462   if (start_offset > 0 && start_offset < length &&
6463       NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6464     return PCRE_ERROR_BADUTF8_OFFSET;
6465 #endif
6466   }
6467 #endif
6468 
6469 /* If the pattern was successfully studied with JIT support, run the JIT
6470 executable instead of the rest of this function. Most options must be set at
6471 compile time for the JIT code to be usable. Fallback to the normal code path if
6472 an unsupported flag is set. */
6473 
6474 #ifdef SUPPORT_JIT
6475 if (extra_data != NULL
6476     && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6477                              PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6478     && extra_data->executable_jit != NULL
6479     && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
6480   {
6481   rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
6482        start_offset, options, offsets, offsetcount);
6483 
6484   /* PCRE_ERROR_NULL means that the selected normal or partial matching
6485   mode is not compiled. In this case we simply fallback to interpreter. */
6486 
6487   if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
6488   }
6489 #endif
6490 
6491 /* Carry on with non-JIT matching. This information is for finding all the
6492 numbers associated with a given name, for condition testing. */
6493 
6494 md->name_table = (pcre_uchar *)re + re->name_table_offset;
6495 md->name_count = re->name_count;
6496 md->name_entry_size = re->name_entry_size;
6497 
6498 /* Fish out the optional data from the extra_data structure, first setting
6499 the default values. */
6500 
6501 study = NULL;
6502 md->match_limit = MATCH_LIMIT;
6503 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
6504 md->callout_data = NULL;
6505 
6506 /* The table pointer is always in native byte order. */
6507 
6508 tables = re->tables;
6509 
6510 /* The two limit values override the defaults, whatever their value. */
6511 
6512 if (extra_data != NULL)
6513   {
6514   unsigned long int flags = extra_data->flags;
6515   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
6516     study = (const pcre_study_data *)extra_data->study_data;
6517   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
6518     md->match_limit = extra_data->match_limit;
6519   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
6520     md->match_limit_recursion = extra_data->match_limit_recursion;
6521   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
6522     md->callout_data = extra_data->callout_data;
6523   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
6524   }
6525 
6526 /* Limits in the regex override only if they are smaller. */
6527 
6528 if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
6529   md->match_limit = re->limit_match;
6530 
6531 if ((re->flags & PCRE_RLSET) != 0 &&
6532     re->limit_recursion < md->match_limit_recursion)
6533   md->match_limit_recursion = re->limit_recursion;
6534 
6535 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
6536 is a feature that makes it possible to save compiled regex and re-use them
6537 in other programs later. */
6538 
6539 if (tables == NULL) tables = PRIV(default_tables);
6540 
6541 /* Set up other data */
6542 
6543 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
6544 startline = (re->flags & PCRE_STARTLINE) != 0;
6545 firstline = (re->options & PCRE_FIRSTLINE) != 0;
6546 
6547 /* The code starts after the real_pcre block and the capture name table. */
6548 
6549 md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6550   re->name_count * re->name_entry_size;
6551 
6552 md->start_subject = (PCRE_PUCHAR)subject;
6553 md->start_offset = start_offset;
6554 md->end_subject = md->start_subject + length;
6555 end_subject = md->end_subject;
6556 
6557 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6558 md->use_ucp = (re->options & PCRE_UCP) != 0;
6559 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6560 md->ignore_skip_arg = 0;
6561 
6562 /* Some options are unpacked into BOOL variables in the hope that testing
6563 them will be faster than individual option bits. */
6564 
6565 md->notbol = (options & PCRE_NOTBOL) != 0;
6566 md->noteol = (options & PCRE_NOTEOL) != 0;
6567 md->notempty = (options & PCRE_NOTEMPTY) != 0;
6568 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6569 
6570 md->hitend = FALSE;
6571 md->mark = md->nomatch_mark = NULL;     /* In case never set */
6572 
6573 md->recursive = NULL;                   /* No recursion at top level */
6574 md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6575 
6576 md->lcc = tables + lcc_offset;
6577 md->fcc = tables + fcc_offset;
6578 md->ctypes = tables + ctypes_offset;
6579 
6580 /* Handle different \R options. */
6581 
6582 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
6583   {
6584   case 0:
6585   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
6586     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
6587   else
6588 #ifdef BSR_ANYCRLF
6589   md->bsr_anycrlf = TRUE;
6590 #else
6591   md->bsr_anycrlf = FALSE;
6592 #endif
6593   break;
6594 
6595   case PCRE_BSR_ANYCRLF:
6596   md->bsr_anycrlf = TRUE;
6597   break;
6598 
6599   case PCRE_BSR_UNICODE:
6600   md->bsr_anycrlf = FALSE;
6601   break;
6602 
6603   default: return PCRE_ERROR_BADNEWLINE;
6604   }
6605 
6606 /* Handle different types of newline. The three bits give eight cases. If
6607 nothing is set at run time, whatever was used at compile time applies. */
6608 
6609 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
6610         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
6611   {
6612   case 0: newline = NEWLINE; break;   /* Compile-time default */
6613   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6614   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6615   case PCRE_NEWLINE_CR+
6616        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6617   case PCRE_NEWLINE_ANY: newline = -1; break;
6618   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6619   default: return PCRE_ERROR_BADNEWLINE;
6620   }
6621 
6622 if (newline == -2)
6623   {
6624   md->nltype = NLTYPE_ANYCRLF;
6625   }
6626 else if (newline < 0)
6627   {
6628   md->nltype = NLTYPE_ANY;
6629   }
6630 else
6631   {
6632   md->nltype = NLTYPE_FIXED;
6633   if (newline > 255)
6634     {
6635     md->nllen = 2;
6636     md->nl[0] = (newline >> 8) & 255;
6637     md->nl[1] = newline & 255;
6638     }
6639   else
6640     {
6641     md->nllen = 1;
6642     md->nl[0] = newline;
6643     }
6644   }
6645 
6646 /* Partial matching was originally supported only for a restricted set of
6647 regexes; from release 8.00 there are no restrictions, but the bits are still
6648 defined (though never set). So there's no harm in leaving this code. */
6649 
6650 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6651   return PCRE_ERROR_BADPARTIAL;
6652 
6653 /* If the expression has got more back references than the offsets supplied can
6654 hold, we get a temporary chunk of working store to use during the matching.
6655 Otherwise, we can use the vector supplied, rounding down its size to a multiple
6656 of 3. */
6657 
6658 ocount = offsetcount - (offsetcount % 3);
6659 arg_offset_max = (2*ocount)/3;
6660 
6661 if (re->top_backref > 0 && re->top_backref >= ocount/3)
6662   {
6663   ocount = re->top_backref * 3 + 3;
6664   md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
6665   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6666   using_temporary_offsets = TRUE;
6667   DPRINTF(("Got memory to hold back references\n"));
6668   }
6669 else md->offset_vector = offsets;
6670 md->offset_end = ocount;
6671 md->offset_max = (2*ocount)/3;
6672 md->capture_last = 0;
6673 
6674 /* Reset the working variable associated with each extraction. These should
6675 never be used unless previously set, but they get saved and restored, and so we
6676 initialize them to avoid reading uninitialized locations. Also, unset the
6677 offsets for the matched string. This is really just for tidiness with callouts,
6678 in case they inspect these fields. */
6679 
6680 if (md->offset_vector != NULL)
6681   {
6682   register int *iptr = md->offset_vector + ocount;
6683   register int *iend = iptr - re->top_bracket;
6684   if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
6685   while (--iptr >= iend) *iptr = -1;
6686   if (offsetcount > 0) md->offset_vector[0] = -1;
6687   if (offsetcount > 1) md->offset_vector[1] = -1;
6688   }
6689 
6690 /* Set up the first character to match, if available. The first_char value is
6691 never set for an anchored regular expression, but the anchoring may be forced
6692 at run time, so we have to test for anchoring. The first char may be unset for
6693 an unanchored pattern, of course. If there's no first char and the pattern was
6694 studied, there may be a bitmap of possible first characters. */
6695 
6696 if (!anchored)
6697   {
6698   if ((re->flags & PCRE_FIRSTSET) != 0)
6699     {
6700     has_first_char = TRUE;
6701     first_char = first_char2 = (pcre_uchar)(re->first_char);
6702     if ((re->flags & PCRE_FCH_CASELESS) != 0)
6703       {
6704       first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6705 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6706       if (utf && first_char > 127)
6707         first_char2 = UCD_OTHERCASE(first_char);
6708 #endif
6709       }
6710     }
6711   else
6712     if (!startline && study != NULL &&
6713       (study->flags & PCRE_STUDY_MAPPED) != 0)
6714         start_bits = study->start_bits;
6715   }
6716 
6717 /* For anchored or unanchored matches, there may be a "last known required
6718 character" set. */
6719 
6720 if ((re->flags & PCRE_REQCHSET) != 0)
6721   {
6722   has_req_char = TRUE;
6723   req_char = req_char2 = (pcre_uchar)(re->req_char);
6724   if ((re->flags & PCRE_RCH_CASELESS) != 0)
6725     {
6726     req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6727 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6728     if (utf && req_char > 127)
6729       req_char2 = UCD_OTHERCASE(req_char);
6730 #endif
6731     }
6732   }
6733 
6734 
6735 /* ==========================================================================*/
6736 
6737 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
6738 the loop runs just once. */
6739 
6740 for(;;)
6741   {
6742   PCRE_PUCHAR save_end_subject = end_subject;
6743   PCRE_PUCHAR new_start_match;
6744 
6745   /* If firstline is TRUE, the start of the match is constrained to the first
6746   line of a multiline string. That is, the match must be before or at the first
6747   newline. Implement this by temporarily adjusting end_subject so that we stop
6748   scanning at a newline. If the match fails at the newline, later code breaks
6749   this loop. */
6750 
6751   if (firstline)
6752     {
6753     PCRE_PUCHAR t = start_match;
6754 #ifdef SUPPORT_UTF
6755     if (utf)
6756       {
6757       while (t < md->end_subject && !IS_NEWLINE(t))
6758         {
6759         t++;
6760         ACROSSCHAR(t < end_subject, *t, t++);
6761         }
6762       }
6763     else
6764 #endif
6765     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
6766     end_subject = t;
6767     }
6768 
6769   /* There are some optimizations that avoid running the match if a known
6770   starting point is not found, or if a known later character is not present.
6771   However, there is an option that disables these, for testing and for ensuring
6772   that all callouts do actually occur. The option can be set in the regex by
6773   (*NO_START_OPT) or passed in match-time options. */
6774 
6775   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6776     {
6777     /* Advance to a unique first char if there is one. */
6778 
6779     if (has_first_char)
6780       {
6781       pcre_uchar smc;
6782 
6783       if (first_char != first_char2)
6784         while (start_match < end_subject &&
6785           (smc = UCHAR21TEST(start_match)) != first_char && smc != first_char2)
6786           start_match++;
6787       else
6788         while (start_match < end_subject && UCHAR21TEST(start_match) != first_char)
6789           start_match++;
6790       }
6791 
6792     /* Or to just after a linebreak for a multiline match */
6793 
6794     else if (startline)
6795       {
6796       if (start_match > md->start_subject + start_offset)
6797         {
6798 #ifdef SUPPORT_UTF
6799         if (utf)
6800           {
6801           while (start_match < end_subject && !WAS_NEWLINE(start_match))
6802             {
6803             start_match++;
6804             ACROSSCHAR(start_match < end_subject, *start_match,
6805               start_match++);
6806             }
6807           }
6808         else
6809 #endif
6810         while (start_match < end_subject && !WAS_NEWLINE(start_match))
6811           start_match++;
6812 
6813         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
6814         and we are now at a LF, advance the match position by one more character.
6815         */
6816 
6817         if (start_match[-1] == CHAR_CR &&
6818              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
6819              start_match < end_subject &&
6820              UCHAR21TEST(start_match) == CHAR_NL)
6821           start_match++;
6822         }
6823       }
6824 
6825     /* Or to a non-unique first byte after study */
6826 
6827     else if (start_bits != NULL)
6828       {
6829       while (start_match < end_subject)
6830         {
6831         register pcre_uint32 c = UCHAR21TEST(start_match);
6832 #ifndef COMPILE_PCRE8
6833         if (c > 255) c = 255;
6834 #endif
6835         if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
6836         start_match++;
6837         }
6838       }
6839     }   /* Starting optimizations */
6840 
6841   /* Restore fudged end_subject */
6842 
6843   end_subject = save_end_subject;
6844 
6845   /* The following two optimizations are disabled for partial matching or if
6846   disabling is explicitly requested. */
6847 
6848   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6849     {
6850     /* If the pattern was studied, a minimum subject length may be set. This is
6851     a lower bound; no actual string of that length may actually match the
6852     pattern. Although the value is, strictly, in characters, we treat it as
6853     bytes to avoid spending too much time in this optimization. */
6854 
6855     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
6856         (pcre_uint32)(end_subject - start_match) < study->minlength)
6857       {
6858       rc = MATCH_NOMATCH;
6859       break;
6860       }
6861 
6862     /* If req_char is set, we know that that character must appear in the
6863     subject for the match to succeed. If the first character is set, req_char
6864     must be later in the subject; otherwise the test starts at the match point.
6865     This optimization can save a huge amount of backtracking in patterns with
6866     nested unlimited repeats that aren't going to match. Writing separate code
6867     for cased/caseless versions makes it go faster, as does using an
6868     autoincrement and backing off on a match.
6869 
6870     HOWEVER: when the subject string is very, very long, searching to its end
6871     can take a long time, and give bad performance on quite ordinary patterns.
6872     This showed up when somebody was matching something like /^\d+C/ on a
6873     32-megabyte string... so we don't do this when the string is sufficiently
6874     long. */
6875 
6876     if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
6877       {
6878       register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
6879 
6880       /* We don't need to repeat the search if we haven't yet reached the
6881       place we found it at last time. */
6882 
6883       if (p > req_char_ptr)
6884         {
6885         if (req_char != req_char2)
6886           {
6887           while (p < end_subject)
6888             {
6889             register pcre_uint32 pp = UCHAR21INCTEST(p);
6890             if (pp == req_char || pp == req_char2) { p--; break; }
6891             }
6892           }
6893         else
6894           {
6895           while (p < end_subject)
6896             {
6897             if (UCHAR21INCTEST(p) == req_char) { p--; break; }
6898             }
6899           }
6900 
6901         /* If we can't find the required character, break the matching loop,
6902         forcing a match failure. */
6903 
6904         if (p >= end_subject)
6905           {
6906           rc = MATCH_NOMATCH;
6907           break;
6908           }
6909 
6910         /* If we have found the required character, save the point where we
6911         found it, so that we don't search again next time round the loop if
6912         the start hasn't passed this character yet. */
6913 
6914         req_char_ptr = p;
6915         }
6916       }
6917     }
6918 
6919 #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
6920   printf(">>>> Match against: ");
6921   pchars(start_match, end_subject - start_match, TRUE, md);
6922   printf("\n");
6923 #endif
6924 
6925   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6926   first starting point for which a partial match was found. */
6927 
6928   md->start_match_ptr = start_match;
6929   md->start_used_ptr = start_match;
6930   md->match_call_count = 0;
6931   md->match_function_type = 0;
6932   md->end_offset_top = 0;
6933   md->skip_arg_count = 0;
6934   rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
6935   if (md->hitend && start_partial == NULL)
6936     {
6937     start_partial = md->start_used_ptr;
6938     match_partial = start_match;
6939     }
6940 
6941   switch(rc)
6942     {
6943     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6944     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
6945     entirely. The only way we can do that is to re-do the match at the same
6946     point, with a flag to force SKIP with an argument to be ignored. Just
6947     treating this case as NOMATCH does not work because it does not check other
6948     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
6949 
6950     case MATCH_SKIP_ARG:
6951     new_start_match = start_match;
6952     md->ignore_skip_arg = md->skip_arg_count;
6953     break;
6954 
6955     /* SKIP passes back the next starting point explicitly, but if it is no
6956     greater than the match we have just done, treat it as NOMATCH. */
6957 
6958     case MATCH_SKIP:
6959     if (md->start_match_ptr > start_match)
6960       {
6961       new_start_match = md->start_match_ptr;
6962       break;
6963       }
6964     /* Fall through */
6965 
6966     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6967     exactly like PRUNE. Unset ignore SKIP-with-argument. */
6968 
6969     case MATCH_NOMATCH:
6970     case MATCH_PRUNE:
6971     case MATCH_THEN:
6972     md->ignore_skip_arg = 0;
6973     new_start_match = start_match + 1;
6974 #ifdef SUPPORT_UTF
6975     if (utf)
6976       ACROSSCHAR(new_start_match < end_subject, *new_start_match,
6977         new_start_match++);
6978 #endif
6979     break;
6980 
6981     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6982 
6983     case MATCH_COMMIT:
6984     rc = MATCH_NOMATCH;
6985     goto ENDLOOP;
6986 
6987     /* Any other return is either a match, or some kind of error. */
6988 
6989     default:
6990     goto ENDLOOP;
6991     }
6992 
6993   /* Control reaches here for the various types of "no match at this point"
6994   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6995 
6996   rc = MATCH_NOMATCH;
6997 
6998   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
6999   newline in the subject (though it may continue over the newline). Therefore,
7000   if we have just failed to match, starting at a newline, do not continue. */
7001 
7002   if (firstline && IS_NEWLINE(start_match)) break;
7003 
7004   /* Advance to new matching position */
7005 
7006   start_match = new_start_match;
7007 
7008   /* Break the loop if the pattern is anchored or if we have passed the end of
7009   the subject. */
7010 
7011   if (anchored || start_match > end_subject) break;
7012 
7013   /* If we have just passed a CR and we are now at a LF, and the pattern does
7014   not contain any explicit matches for \r or \n, and the newline option is CRLF
7015   or ANY or ANYCRLF, advance the match position by one more character. In
7016   normal matching start_match will aways be greater than the first position at
7017   this stage, but a failed *SKIP can cause a return at the same point, which is
7018   why the first test exists. */
7019 
7020   if (start_match > (PCRE_PUCHAR)subject + start_offset &&
7021       start_match[-1] == CHAR_CR &&
7022       start_match < end_subject &&
7023       *start_match == CHAR_NL &&
7024       (re->flags & PCRE_HASCRORLF) == 0 &&
7025         (md->nltype == NLTYPE_ANY ||
7026          md->nltype == NLTYPE_ANYCRLF ||
7027          md->nllen == 2))
7028     start_match++;
7029 
7030   md->mark = NULL;   /* Reset for start of next match attempt */
7031   }                  /* End of for(;;) "bumpalong" loop */
7032 
7033 /* ==========================================================================*/
7034 
7035 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
7036 conditions is true:
7037 
7038 (1) The pattern is anchored or the match was failed by (*COMMIT);
7039 
7040 (2) We are past the end of the subject;
7041 
7042 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
7043     this option requests that a match occur at or before the first newline in
7044     the subject.
7045 
7046 When we have a match and the offset vector is big enough to deal with any
7047 backreferences, captured substring offsets will already be set up. In the case
7048 where we had to get some local store to hold offsets for backreference
7049 processing, copy those that we can. In this case there need not be overflow if
7050 certain parts of the pattern were not used, even though there are more
7051 capturing parentheses than vector slots. */
7052 
7053 ENDLOOP:
7054 
7055 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
7056   {
7057   if (using_temporary_offsets)
7058     {
7059     if (arg_offset_max >= 4)
7060       {
7061       memcpy(offsets + 2, md->offset_vector + 2,
7062         (arg_offset_max - 2) * sizeof(int));
7063       DPRINTF(("Copied offsets from temporary memory\n"));
7064       }
7065     if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
7066     DPRINTF(("Freeing temporary memory\n"));
7067     (PUBL(free))(md->offset_vector);
7068     }
7069 
7070   /* Set the return code to the number of captured strings, or 0 if there were
7071   too many to fit into the vector. */
7072 
7073   rc = ((md->capture_last & OVFLBIT) != 0 &&
7074          md->end_offset_top >= arg_offset_max)?
7075     0 : md->end_offset_top/2;
7076 
7077   /* If there is space in the offset vector, set any unused pairs at the end of
7078   the pattern to -1 for backwards compatibility. It is documented that this
7079   happens. In earlier versions, the whole set of potential capturing offsets
7080   was set to -1 each time round the loop, but this is handled differently now.
7081   "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
7082   those at the end that need unsetting here. We can't just unset them all at
7083   the start of the whole thing because they may get set in one branch that is
7084   not the final matching branch. */
7085 
7086   if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
7087     {
7088     register int *iptr, *iend;
7089     int resetcount = 2 + re->top_bracket * 2;
7090     if (resetcount > offsetcount) resetcount = offsetcount;
7091     iptr = offsets + md->end_offset_top;
7092     iend = offsets + resetcount;
7093     while (iptr < iend) *iptr++ = -1;
7094     }
7095 
7096   /* If there is space, set up the whole thing as substring 0. The value of
7097   md->start_match_ptr might be modified if \K was encountered on the success
7098   matching path. */
7099 
7100   if (offsetcount < 2) rc = 0; else
7101     {
7102     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
7103     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
7104     }
7105 
7106   /* Return MARK data if requested */
7107 
7108   if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7109     *(extra_data->mark) = (pcre_uchar *)md->mark;
7110   DPRINTF((">>>> returning %d\n", rc));
7111 #ifdef NO_RECURSE
7112   release_match_heapframes(&frame_zero);
7113 #endif
7114   return rc;
7115   }
7116 
7117 /* Control gets here if there has been an error, or if the overall match
7118 attempt has failed at all permitted starting positions. */
7119 
7120 if (using_temporary_offsets)
7121   {
7122   DPRINTF(("Freeing temporary memory\n"));
7123   (PUBL(free))(md->offset_vector);
7124   }
7125 
7126 /* For anything other than nomatch or partial match, just return the code. */
7127 
7128 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
7129   {
7130   DPRINTF((">>>> error: returning %d\n", rc));
7131 #ifdef NO_RECURSE
7132   release_match_heapframes(&frame_zero);
7133 #endif
7134   return rc;
7135   }
7136 
7137 /* Handle partial matches - disable any mark data */
7138 
7139 if (match_partial != NULL)
7140   {
7141   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
7142   md->mark = NULL;
7143   if (offsetcount > 1)
7144     {
7145     offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7146     offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7147     if (offsetcount > 2)
7148       offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
7149     }
7150   rc = PCRE_ERROR_PARTIAL;
7151   }
7152 
7153 /* This is the classic nomatch case */
7154 
7155 else
7156   {
7157   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
7158   rc = PCRE_ERROR_NOMATCH;
7159   }
7160 
7161 /* Return the MARK data if it has been requested. */
7162 
7163 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7164   *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
7165 #ifdef NO_RECURSE
7166   release_match_heapframes(&frame_zero);
7167 #endif
7168 return rc;
7169 }
7170 
7171 /* End of pcre_exec.c */
7172