xref: /PHP-5.3/ext/pcre/pcrelib/pcre_exec.c (revision 357ab3cb)
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9            Copyright (c) 1997-2012 University of Cambridge
10 
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14 
15     * Redistributions of source code must retain the above copyright notice,
16       this list of conditions and the following disclaimer.
17 
18     * Redistributions in binary form must reproduce the above copyright
19       notice, this list of conditions and the following disclaimer in the
20       documentation and/or other materials provided with the distribution.
21 
22     * Neither the name of the University of Cambridge nor the names of its
23       contributors may be used to endorse or promote products derived from
24       this software without specific prior written permission.
25 
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39 
40 /* This module contains pcre_exec(), the externally visible function that does
41 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
42 possible. There are also some static supporting functions. */
43 
44 #ifdef HAVE_CONFIG_H
45 #include "config.h"
46 #endif
47 
48 #define NLBLOCK md             /* Block containing newline information */
49 #define PSSTART start_subject  /* Field containing processed string start */
50 #define PSEND   end_subject    /* Field containing processed string end */
51 
52 #include "pcre_internal.h"
53 
54 /* Undefine some potentially clashing cpp symbols */
55 
56 #undef min
57 #undef max
58 
59 /* Values for setting in md->match_function_type to indicate two special types
60 of call to match(). We do it this way to save on using another stack variable,
61 as stack usage is to be discouraged. */
62 
63 #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
64 #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
65 
66 /* Non-error returns from the match() function. Error returns are externally
67 defined PCRE_ERROR_xxx codes, which are all negative. */
68 
69 #define MATCH_MATCH        1
70 #define MATCH_NOMATCH      0
71 
72 /* Special internal returns from the match() function. Make them sufficiently
73 negative to avoid the external error codes. */
74 
75 #define MATCH_ACCEPT       (-999)
76 #define MATCH_COMMIT       (-998)
77 #define MATCH_KETRPOS      (-997)
78 #define MATCH_ONCE         (-996)
79 #define MATCH_PRUNE        (-995)
80 #define MATCH_SKIP         (-994)
81 #define MATCH_SKIP_ARG     (-993)
82 #define MATCH_THEN         (-992)
83 
84 /* Maximum number of ints of offset to save on the stack for recursive calls.
85 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
86 because the offset vector is always a multiple of 3 long. */
87 
88 #define REC_STACK_SAVE_MAX 30
89 
90 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
91 
92 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
93 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
94 
95 #ifdef PCRE_DEBUG
96 /*************************************************
97 *        Debugging function to print chars       *
98 *************************************************/
99 
100 /* Print a sequence of chars in printable format, stopping at the end of the
101 subject if the requested.
102 
103 Arguments:
104   p           points to characters
105   length      number to print
106   is_subject  TRUE if printing from within md->start_subject
107   md          pointer to matching data block, if is_subject is TRUE
108 
109 Returns:     nothing
110 */
111 
112 static void
pchars(const pcre_uchar * p,int length,BOOL is_subject,match_data * md)113 pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
114 {
115 pcre_uint32 c;
116 BOOL utf = md->utf;
117 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
118 while (length-- > 0)
119   if (isprint(c = RAWUCHARINCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
120 }
121 #endif
122 
123 
124 
125 /*************************************************
126 *          Match a back-reference                *
127 *************************************************/
128 
129 /* Normally, if a back reference hasn't been set, the length that is passed is
130 negative, so the match always fails. However, in JavaScript compatibility mode,
131 the length passed is zero. Note that in caseless UTF-8 mode, the number of
132 subject bytes matched may be different to the number of reference bytes.
133 
134 Arguments:
135   offset      index into the offset vector
136   eptr        pointer into the subject
137   length      length of reference to be matched (number of bytes)
138   md          points to match data block
139   caseless    TRUE if caseless
140 
141 Returns:      >= 0 the number of subject bytes matched
142               -1 no match
143               -2 partial match; always given if at end subject
144 */
145 
146 static int
match_ref(int offset,register PCRE_PUCHAR eptr,int length,match_data * md,BOOL caseless)147 match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
148   BOOL caseless)
149 {
150 PCRE_PUCHAR eptr_start = eptr;
151 register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
152 #ifdef SUPPORT_UTF
153 BOOL utf = md->utf;
154 #endif
155 
156 #ifdef PCRE_DEBUG
157 if (eptr >= md->end_subject)
158   printf("matching subject <null>");
159 else
160   {
161   printf("matching subject ");
162   pchars(eptr, length, TRUE, md);
163   }
164 printf(" against backref ");
165 pchars(p, length, FALSE, md);
166 printf("\n");
167 #endif
168 
169 /* Always fail if reference not set (and not JavaScript compatible - in that
170 case the length is passed as zero). */
171 
172 if (length < 0) return -1;
173 
174 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
175 properly if Unicode properties are supported. Otherwise, we can check only
176 ASCII characters. */
177 
178 if (caseless)
179   {
180 #ifdef SUPPORT_UTF
181 #ifdef SUPPORT_UCP
182   if (utf)
183     {
184     /* Match characters up to the end of the reference. NOTE: the number of
185     data units matched may differ, because in UTF-8 there are some characters
186     whose upper and lower case versions code have different numbers of bytes.
187     For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
188     (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
189     sequence of two of the latter. It is important, therefore, to check the
190     length along the reference, not along the subject (earlier code did this
191     wrong). */
192 
193     PCRE_PUCHAR endptr = p + length;
194     while (p < endptr)
195       {
196       pcre_uint32 c, d;
197       const ucd_record *ur;
198       if (eptr >= md->end_subject) return -2;   /* Partial match */
199       GETCHARINC(c, eptr);
200       GETCHARINC(d, p);
201       ur = GET_UCD(d);
202       if (c != d && c != d + ur->other_case)
203         {
204         const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
205         for (;;)
206           {
207           if (c < *pp) return -1;
208           if (c == *pp++) break;
209           }
210         }
211       }
212     }
213   else
214 #endif
215 #endif
216 
217   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
218   is no UCP support. */
219     {
220     while (length-- > 0)
221       {
222       pcre_uchar cc, cp;
223       if (eptr >= md->end_subject) return -2;   /* Partial match */
224       cc = RAWUCHARTEST(eptr);
225       cp = RAWUCHARTEST(p);
226       if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
227       p++;
228       eptr++;
229       }
230     }
231   }
232 
233 /* In the caseful case, we can just compare the bytes, whether or not we
234 are in UTF-8 mode. */
235 
236 else
237   {
238   while (length-- > 0)
239     {
240     if (eptr >= md->end_subject) return -2;   /* Partial match */
241     if (RAWUCHARINCTEST(p) != RAWUCHARINCTEST(eptr)) return -1;
242     }
243   }
244 
245 return (int)(eptr - eptr_start);
246 }
247 
248 
249 
250 /***************************************************************************
251 ****************************************************************************
252                    RECURSION IN THE match() FUNCTION
253 
254 The match() function is highly recursive, though not every recursive call
255 increases the recursive depth. Nevertheless, some regular expressions can cause
256 it to recurse to a great depth. I was writing for Unix, so I just let it call
257 itself recursively. This uses the stack for saving everything that has to be
258 saved for a recursive call. On Unix, the stack can be large, and this works
259 fine.
260 
261 It turns out that on some non-Unix-like systems there are problems with
262 programs that use a lot of stack. (This despite the fact that every last chip
263 has oodles of memory these days, and techniques for extending the stack have
264 been known for decades.) So....
265 
266 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
267 calls by keeping local variables that need to be preserved in blocks of memory
268 obtained from malloc() instead instead of on the stack. Macros are used to
269 achieve this so that the actual code doesn't look very different to what it
270 always used to.
271 
272 The original heap-recursive code used longjmp(). However, it seems that this
273 can be very slow on some operating systems. Following a suggestion from Stan
274 Switzer, the use of longjmp() has been abolished, at the cost of having to
275 provide a unique number for each call to RMATCH. There is no way of generating
276 a sequence of numbers at compile time in C. I have given them names, to make
277 them stand out more clearly.
278 
279 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
280 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
281 tests. Furthermore, not using longjmp() means that local dynamic variables
282 don't have indeterminate values; this has meant that the frame size can be
283 reduced because the result can be "passed back" by straight setting of the
284 variable instead of being passed in the frame.
285 ****************************************************************************
286 ***************************************************************************/
287 
288 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
289 below must be updated in sync.  */
290 
291 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
292        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
293        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
294        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
295        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
296        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
297        RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };
298 
299 /* These versions of the macros use the stack, as normal. There are debugging
300 versions and production versions. Note that the "rw" argument of RMATCH isn't
301 actually used in this definition. */
302 
303 #ifndef NO_RECURSE
304 #define REGISTER register
305 
306 #ifdef PCRE_DEBUG
307 #define RMATCH(ra,rb,rc,rd,re,rw) \
308   { \
309   printf("match() called in line %d\n", __LINE__); \
310   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
311   printf("to line %d\n", __LINE__); \
312   }
313 #define RRETURN(ra) \
314   { \
315   printf("match() returned %d from line %d\n", ra, __LINE__); \
316   return ra; \
317   }
318 #else
319 #define RMATCH(ra,rb,rc,rd,re,rw) \
320   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
321 #define RRETURN(ra) return ra
322 #endif
323 
324 #else
325 
326 
327 /* These versions of the macros manage a private stack on the heap. Note that
328 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
329 argument of match(), which never changes. */
330 
331 #define REGISTER
332 
333 #define RMATCH(ra,rb,rc,rd,re,rw)\
334   {\
335   heapframe *newframe = frame->Xnextframe;\
336   if (newframe == NULL)\
337     {\
338     newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
339     if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
340     newframe->Xnextframe = NULL;\
341     frame->Xnextframe = newframe;\
342     }\
343   frame->Xwhere = rw;\
344   newframe->Xeptr = ra;\
345   newframe->Xecode = rb;\
346   newframe->Xmstart = mstart;\
347   newframe->Xoffset_top = rc;\
348   newframe->Xeptrb = re;\
349   newframe->Xrdepth = frame->Xrdepth + 1;\
350   newframe->Xprevframe = frame;\
351   frame = newframe;\
352   DPRINTF(("restarting from line %d\n", __LINE__));\
353   goto HEAP_RECURSE;\
354   L_##rw:\
355   DPRINTF(("jumped back to line %d\n", __LINE__));\
356   }
357 
358 #define RRETURN(ra)\
359   {\
360   heapframe *oldframe = frame;\
361   frame = oldframe->Xprevframe;\
362   if (frame != NULL)\
363     {\
364     rrc = ra;\
365     goto HEAP_RETURN;\
366     }\
367   return ra;\
368   }
369 
370 
371 /* Structure for remembering the local variables in a private frame */
372 
373 typedef struct heapframe {
374   struct heapframe *Xprevframe;
375   struct heapframe *Xnextframe;
376 
377   /* Function arguments that may change */
378 
379   PCRE_PUCHAR Xeptr;
380   const pcre_uchar *Xecode;
381   PCRE_PUCHAR Xmstart;
382   int Xoffset_top;
383   eptrblock *Xeptrb;
384   unsigned int Xrdepth;
385 
386   /* Function local variables */
387 
388   PCRE_PUCHAR Xcallpat;
389 #ifdef SUPPORT_UTF
390   PCRE_PUCHAR Xcharptr;
391 #endif
392   PCRE_PUCHAR Xdata;
393   PCRE_PUCHAR Xnext;
394   PCRE_PUCHAR Xpp;
395   PCRE_PUCHAR Xprev;
396   PCRE_PUCHAR Xsaved_eptr;
397 
398   recursion_info Xnew_recursive;
399 
400   BOOL Xcur_is_word;
401   BOOL Xcondition;
402   BOOL Xprev_is_word;
403 
404 #ifdef SUPPORT_UCP
405   int Xprop_type;
406   unsigned int Xprop_value;
407   int Xprop_fail_result;
408   int Xoclength;
409   pcre_uchar Xocchars[6];
410 #endif
411 
412   int Xcodelink;
413   int Xctype;
414   unsigned int Xfc;
415   int Xfi;
416   int Xlength;
417   int Xmax;
418   int Xmin;
419   int Xnumber;
420   int Xoffset;
421   int Xop;
422   int Xsave_capture_last;
423   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
424   int Xstacksave[REC_STACK_SAVE_MAX];
425 
426   eptrblock Xnewptrb;
427 
428   /* Where to jump back to */
429 
430   int Xwhere;
431 
432 } heapframe;
433 
434 #endif
435 
436 
437 /***************************************************************************
438 ***************************************************************************/
439 
440 
441 
442 /*************************************************
443 *         Match from current position            *
444 *************************************************/
445 
446 /* This function is called recursively in many circumstances. Whenever it
447 returns a negative (error) response, the outer incarnation must also return the
448 same response. */
449 
450 /* These macros pack up tests that are used for partial matching, and which
451 appear several times in the code. We set the "hit end" flag if the pointer is
452 at the end of the subject and also past the start of the subject (i.e.
453 something has been matched). For hard partial matching, we then return
454 immediately. The second one is used when we already know we are past the end of
455 the subject. */
456 
457 #define CHECK_PARTIAL()\
458   if (md->partial != 0 && eptr >= md->end_subject && \
459       eptr > md->start_used_ptr) \
460     { \
461     md->hitend = TRUE; \
462     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
463     }
464 
465 #define SCHECK_PARTIAL()\
466   if (md->partial != 0 && eptr > md->start_used_ptr) \
467     { \
468     md->hitend = TRUE; \
469     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
470     }
471 
472 
473 /* Performance note: It might be tempting to extract commonly used fields from
474 the md structure (e.g. utf, end_subject) into individual variables to improve
475 performance. Tests using gcc on a SPARC disproved this; in the first case, it
476 made performance worse.
477 
478 Arguments:
479    eptr        pointer to current character in subject
480    ecode       pointer to current position in compiled code
481    mstart      pointer to the current match start position (can be modified
482                  by encountering \K)
483    offset_top  current top pointer
484    md          pointer to "static" info for the match
485    eptrb       pointer to chain of blocks containing eptr at start of
486                  brackets - for testing for empty matches
487    rdepth      the recursion depth
488 
489 Returns:       MATCH_MATCH if matched            )  these values are >= 0
490                MATCH_NOMATCH if failed to match  )
491                a negative MATCH_xxx value for PRUNE, SKIP, etc
492                a negative PCRE_ERROR_xxx value if aborted by an error condition
493                  (e.g. stopped by repeated call or recursion limit)
494 */
495 
496 static int
match(REGISTER PCRE_PUCHAR eptr,REGISTER const pcre_uchar * ecode,PCRE_PUCHAR mstart,int offset_top,match_data * md,eptrblock * eptrb,unsigned int rdepth)497 match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
498   PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
499   unsigned int rdepth)
500 {
501 /* These variables do not need to be preserved over recursion in this function,
502 so they can be ordinary variables in all cases. Mark some of them with
503 "register" because they are used a lot in loops. */
504 
505 register int  rrc;         /* Returns from recursive calls */
506 register int  i;           /* Used for loops not involving calls to RMATCH() */
507 register pcre_uint32 c;    /* Character values not kept over RMATCH() calls */
508 register BOOL utf;         /* Local copy of UTF flag for speed */
509 
510 BOOL minimize, possessive; /* Quantifier options */
511 BOOL caseless;
512 int condcode;
513 
514 /* When recursion is not being used, all "local" variables that have to be
515 preserved over calls to RMATCH() are part of a "frame". We set up the top-level
516 frame on the stack here; subsequent instantiations are obtained from the heap
517 whenever RMATCH() does a "recursion". See the macro definitions above. Putting
518 the top-level on the stack rather than malloc-ing them all gives a performance
519 boost in many cases where there is not much "recursion". */
520 
521 #ifdef NO_RECURSE
522 heapframe *frame = (heapframe *)md->match_frames_base;
523 
524 /* Copy in the original argument variables */
525 
526 frame->Xeptr = eptr;
527 frame->Xecode = ecode;
528 frame->Xmstart = mstart;
529 frame->Xoffset_top = offset_top;
530 frame->Xeptrb = eptrb;
531 frame->Xrdepth = rdepth;
532 
533 /* This is where control jumps back to to effect "recursion" */
534 
535 HEAP_RECURSE:
536 
537 /* Macros make the argument variables come from the current frame */
538 
539 #define eptr               frame->Xeptr
540 #define ecode              frame->Xecode
541 #define mstart             frame->Xmstart
542 #define offset_top         frame->Xoffset_top
543 #define eptrb              frame->Xeptrb
544 #define rdepth             frame->Xrdepth
545 
546 /* Ditto for the local variables */
547 
548 #ifdef SUPPORT_UTF
549 #define charptr            frame->Xcharptr
550 #endif
551 #define callpat            frame->Xcallpat
552 #define codelink           frame->Xcodelink
553 #define data               frame->Xdata
554 #define next               frame->Xnext
555 #define pp                 frame->Xpp
556 #define prev               frame->Xprev
557 #define saved_eptr         frame->Xsaved_eptr
558 
559 #define new_recursive      frame->Xnew_recursive
560 
561 #define cur_is_word        frame->Xcur_is_word
562 #define condition          frame->Xcondition
563 #define prev_is_word       frame->Xprev_is_word
564 
565 #ifdef SUPPORT_UCP
566 #define prop_type          frame->Xprop_type
567 #define prop_value         frame->Xprop_value
568 #define prop_fail_result   frame->Xprop_fail_result
569 #define oclength           frame->Xoclength
570 #define occhars            frame->Xocchars
571 #endif
572 
573 #define ctype              frame->Xctype
574 #define fc                 frame->Xfc
575 #define fi                 frame->Xfi
576 #define length             frame->Xlength
577 #define max                frame->Xmax
578 #define min                frame->Xmin
579 #define number             frame->Xnumber
580 #define offset             frame->Xoffset
581 #define op                 frame->Xop
582 #define save_capture_last  frame->Xsave_capture_last
583 #define save_offset1       frame->Xsave_offset1
584 #define save_offset2       frame->Xsave_offset2
585 #define save_offset3       frame->Xsave_offset3
586 #define stacksave          frame->Xstacksave
587 
588 #define newptrb            frame->Xnewptrb
589 
590 /* When recursion is being used, local variables are allocated on the stack and
591 get preserved during recursion in the normal way. In this environment, fi and
592 i, and fc and c, can be the same variables. */
593 
594 #else         /* NO_RECURSE not defined */
595 #define fi i
596 #define fc c
597 
598 /* Many of the following variables are used only in small blocks of the code.
599 My normal style of coding would have declared them within each of those blocks.
600 However, in order to accommodate the version of this code that uses an external
601 "stack" implemented on the heap, it is easier to declare them all here, so the
602 declarations can be cut out in a block. The only declarations within blocks
603 below are for variables that do not have to be preserved over a recursive call
604 to RMATCH(). */
605 
606 #ifdef SUPPORT_UTF
607 const pcre_uchar *charptr;
608 #endif
609 const pcre_uchar *callpat;
610 const pcre_uchar *data;
611 const pcre_uchar *next;
612 PCRE_PUCHAR       pp;
613 const pcre_uchar *prev;
614 PCRE_PUCHAR       saved_eptr;
615 
616 recursion_info new_recursive;
617 
618 BOOL cur_is_word;
619 BOOL condition;
620 BOOL prev_is_word;
621 
622 #ifdef SUPPORT_UCP
623 int prop_type;
624 unsigned int prop_value;
625 int prop_fail_result;
626 int oclength;
627 pcre_uchar occhars[6];
628 #endif
629 
630 int codelink;
631 int ctype;
632 int length;
633 int max;
634 int min;
635 unsigned int number;
636 int offset;
637 pcre_uchar op;
638 int save_capture_last;
639 int save_offset1, save_offset2, save_offset3;
640 int stacksave[REC_STACK_SAVE_MAX];
641 
642 eptrblock newptrb;
643 
644 /* There is a special fudge for calling match() in a way that causes it to
645 measure the size of its basic stack frame when the stack is being used for
646 recursion. The second argument (ecode) being NULL triggers this behaviour. It
647 cannot normally ever be NULL. The return is the negated value of the frame
648 size. */
649 
650 if (ecode == NULL)
651   {
652   if (rdepth == 0)
653     return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
654   else
655     {
656     int len = (char *)&rdepth - (char *)eptr;
657     return (len > 0)? -len : len;
658     }
659   }
660 #endif     /* NO_RECURSE */
661 
662 /* To save space on the stack and in the heap frame, I have doubled up on some
663 of the local variables that are used only in localised parts of the code, but
664 still need to be preserved over recursive calls of match(). These macros define
665 the alternative names that are used. */
666 
667 #define allow_zero    cur_is_word
668 #define cbegroup      condition
669 #define code_offset   codelink
670 #define condassert    condition
671 #define matched_once  prev_is_word
672 #define foc           number
673 #define save_mark     data
674 
675 /* These statements are here to stop the compiler complaining about unitialized
676 variables. */
677 
678 #ifdef SUPPORT_UCP
679 prop_value = 0;
680 prop_fail_result = 0;
681 #endif
682 
683 
684 /* This label is used for tail recursion, which is used in a few cases even
685 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
686 used. Thanks to Ian Taylor for noticing this possibility and sending the
687 original patch. */
688 
689 TAIL_RECURSE:
690 
691 /* OK, now we can get on with the real code of the function. Recursive calls
692 are specified by the macro RMATCH and RRETURN is used to return. When
693 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
694 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
695 defined). However, RMATCH isn't like a function call because it's quite a
696 complicated macro. It has to be used in one particular way. This shouldn't,
697 however, impact performance when true recursion is being used. */
698 
699 #ifdef SUPPORT_UTF
700 utf = md->utf;       /* Local copy of the flag */
701 #else
702 utf = FALSE;
703 #endif
704 
705 /* First check that we haven't called match() too many times, or that we
706 haven't exceeded the recursive call limit. */
707 
708 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
709 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
710 
711 /* At the start of a group with an unlimited repeat that may match an empty
712 string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
713 done this way to save having to use another function argument, which would take
714 up space on the stack. See also MATCH_CONDASSERT below.
715 
716 When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
717 such remembered pointers, to be checked when we hit the closing ket, in order
718 to break infinite loops that match no characters. When match() is called in
719 other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
720 NOT be used with tail recursion, because the memory block that is used is on
721 the stack, so a new one may be required for each match(). */
722 
723 if (md->match_function_type == MATCH_CBEGROUP)
724   {
725   newptrb.epb_saved_eptr = eptr;
726   newptrb.epb_prev = eptrb;
727   eptrb = &newptrb;
728   md->match_function_type = 0;
729   }
730 
731 /* Now start processing the opcodes. */
732 
733 for (;;)
734   {
735   minimize = possessive = FALSE;
736   op = *ecode;
737 
738   switch(op)
739     {
740     case OP_MARK:
741     md->nomatch_mark = ecode + 2;
742     md->mark = NULL;    /* In case previously set by assertion */
743     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
744       eptrb, RM55);
745     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
746          md->mark == NULL) md->mark = ecode + 2;
747 
748     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
749     argument, and we must check whether that argument matches this MARK's
750     argument. It is passed back in md->start_match_ptr (an overloading of that
751     variable). If it does match, we reset that variable to the current subject
752     position and return MATCH_SKIP. Otherwise, pass back the return code
753     unaltered. */
754 
755     else if (rrc == MATCH_SKIP_ARG &&
756         STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
757       {
758       md->start_match_ptr = eptr;
759       RRETURN(MATCH_SKIP);
760       }
761     RRETURN(rrc);
762 
763     case OP_FAIL:
764     RRETURN(MATCH_NOMATCH);
765 
766     /* COMMIT overrides PRUNE, SKIP, and THEN */
767 
768     case OP_COMMIT:
769     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
770       eptrb, RM52);
771     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
772         rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
773         rrc != MATCH_THEN)
774       RRETURN(rrc);
775     RRETURN(MATCH_COMMIT);
776 
777     /* PRUNE overrides THEN */
778 
779     case OP_PRUNE:
780     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
781       eptrb, RM51);
782     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
783     RRETURN(MATCH_PRUNE);
784 
785     case OP_PRUNE_ARG:
786     md->nomatch_mark = ecode + 2;
787     md->mark = NULL;    /* In case previously set by assertion */
788     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
789       eptrb, RM56);
790     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
791          md->mark == NULL) md->mark = ecode + 2;
792     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
793     RRETURN(MATCH_PRUNE);
794 
795     /* SKIP overrides PRUNE and THEN */
796 
797     case OP_SKIP:
798     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
799       eptrb, RM53);
800     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
801       RRETURN(rrc);
802     md->start_match_ptr = eptr;   /* Pass back current position */
803     RRETURN(MATCH_SKIP);
804 
805     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
806     nomatch_mark. There is a flag that disables this opcode when re-matching a
807     pattern that ended with a SKIP for which there was not a matching MARK. */
808 
809     case OP_SKIP_ARG:
810     if (md->ignore_skip_arg)
811       {
812       ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
813       break;
814       }
815     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
816       eptrb, RM57);
817     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
818       RRETURN(rrc);
819 
820     /* Pass back the current skip name by overloading md->start_match_ptr and
821     returning the special MATCH_SKIP_ARG return code. This will either be
822     caught by a matching MARK, or get to the top, where it causes a rematch
823     with the md->ignore_skip_arg flag set. */
824 
825     md->start_match_ptr = ecode + 2;
826     RRETURN(MATCH_SKIP_ARG);
827 
828     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
829     the branch in which it occurs can be determined. Overload the start of
830     match pointer to do this. */
831 
832     case OP_THEN:
833     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
834       eptrb, RM54);
835     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
836     md->start_match_ptr = ecode;
837     RRETURN(MATCH_THEN);
838 
839     case OP_THEN_ARG:
840     md->nomatch_mark = ecode + 2;
841     md->mark = NULL;    /* In case previously set by assertion */
842     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
843       md, eptrb, RM58);
844     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
845          md->mark == NULL) md->mark = ecode + 2;
846     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
847     md->start_match_ptr = ecode;
848     RRETURN(MATCH_THEN);
849 
850     /* Handle an atomic group that does not contain any capturing parentheses.
851     This can be handled like an assertion. Prior to 8.13, all atomic groups
852     were handled this way. In 8.13, the code was changed as below for ONCE, so
853     that backups pass through the group and thereby reset captured values.
854     However, this uses a lot more stack, so in 8.20, atomic groups that do not
855     contain any captures generate OP_ONCE_NC, which can be handled in the old,
856     less stack intensive way.
857 
858     Check the alternative branches in turn - the matching won't pass the KET
859     for this kind of subpattern. If any one branch matches, we carry on as at
860     the end of a normal bracket, leaving the subject pointer, but resetting
861     the start-of-match value in case it was changed by \K. */
862 
863     case OP_ONCE_NC:
864     prev = ecode;
865     saved_eptr = eptr;
866     save_mark = md->mark;
867     do
868       {
869       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
870       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
871         {
872         mstart = md->start_match_ptr;
873         break;
874         }
875       if (rrc == MATCH_THEN)
876         {
877         next = ecode + GET(ecode,1);
878         if (md->start_match_ptr < next &&
879             (*ecode == OP_ALT || *next == OP_ALT))
880           rrc = MATCH_NOMATCH;
881         }
882 
883       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
884       ecode += GET(ecode,1);
885       md->mark = save_mark;
886       }
887     while (*ecode == OP_ALT);
888 
889     /* If hit the end of the group (which could be repeated), fail */
890 
891     if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
892 
893     /* Continue as from after the group, updating the offsets high water
894     mark, since extracts may have been taken. */
895 
896     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
897 
898     offset_top = md->end_offset_top;
899     eptr = md->end_match_ptr;
900 
901     /* For a non-repeating ket, just continue at this level. This also
902     happens for a repeating ket if no characters were matched in the group.
903     This is the forcible breaking of infinite loops as implemented in Perl
904     5.005. */
905 
906     if (*ecode == OP_KET || eptr == saved_eptr)
907       {
908       ecode += 1+LINK_SIZE;
909       break;
910       }
911 
912     /* The repeating kets try the rest of the pattern or restart from the
913     preceding bracket, in the appropriate order. The second "call" of match()
914     uses tail recursion, to avoid using another stack frame. */
915 
916     if (*ecode == OP_KETRMIN)
917       {
918       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
919       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
920       ecode = prev;
921       goto TAIL_RECURSE;
922       }
923     else  /* OP_KETRMAX */
924       {
925       RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
926       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
927       ecode += 1 + LINK_SIZE;
928       goto TAIL_RECURSE;
929       }
930     /* Control never gets here */
931 
932     /* Handle a capturing bracket, other than those that are possessive with an
933     unlimited repeat. If there is space in the offset vector, save the current
934     subject position in the working slot at the top of the vector. We mustn't
935     change the current values of the data slot, because they may be set from a
936     previous iteration of this group, and be referred to by a reference inside
937     the group. A failure to match might occur after the group has succeeded,
938     if something later on doesn't match. For this reason, we need to restore
939     the working value and also the values of the final offsets, in case they
940     were set by a previous iteration of the same bracket.
941 
942     If there isn't enough space in the offset vector, treat this as if it were
943     a non-capturing bracket. Don't worry about setting the flag for the error
944     case here; that is handled in the code for KET. */
945 
946     case OP_CBRA:
947     case OP_SCBRA:
948     number = GET2(ecode, 1+LINK_SIZE);
949     offset = number << 1;
950 
951 #ifdef PCRE_DEBUG
952     printf("start bracket %d\n", number);
953     printf("subject=");
954     pchars(eptr, 16, TRUE, md);
955     printf("\n");
956 #endif
957 
958     if (offset < md->offset_max)
959       {
960       save_offset1 = md->offset_vector[offset];
961       save_offset2 = md->offset_vector[offset+1];
962       save_offset3 = md->offset_vector[md->offset_end - number];
963       save_capture_last = md->capture_last;
964       save_mark = md->mark;
965 
966       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
967       md->offset_vector[md->offset_end - number] =
968         (int)(eptr - md->start_subject);
969 
970       for (;;)
971         {
972         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
973         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
974           eptrb, RM1);
975         if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
976 
977         /* If we backed up to a THEN, check whether it is within the current
978         branch by comparing the address of the THEN that is passed back with
979         the end of the branch. If it is within the current branch, and the
980         branch is one of two or more alternatives (it either starts or ends
981         with OP_ALT), we have reached the limit of THEN's action, so convert
982         the return code to NOMATCH, which will cause normal backtracking to
983         happen from now on. Otherwise, THEN is passed back to an outer
984         alternative. This implements Perl's treatment of parenthesized groups,
985         where a group not containing | does not affect the current alternative,
986         that is, (X) is NOT the same as (X|(*F)). */
987 
988         if (rrc == MATCH_THEN)
989           {
990           next = ecode + GET(ecode,1);
991           if (md->start_match_ptr < next &&
992               (*ecode == OP_ALT || *next == OP_ALT))
993             rrc = MATCH_NOMATCH;
994           }
995 
996         /* Anything other than NOMATCH is passed back. */
997 
998         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
999         md->capture_last = save_capture_last;
1000         ecode += GET(ecode, 1);
1001         md->mark = save_mark;
1002         if (*ecode != OP_ALT) break;
1003         }
1004 
1005       DPRINTF(("bracket %d failed\n", number));
1006       md->offset_vector[offset] = save_offset1;
1007       md->offset_vector[offset+1] = save_offset2;
1008       md->offset_vector[md->offset_end - number] = save_offset3;
1009 
1010       /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
1011 
1012       RRETURN(rrc);
1013       }
1014 
1015     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1016     as a non-capturing bracket. */
1017 
1018     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1019     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1020 
1021     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1022 
1023     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1024     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1025 
1026     /* Non-capturing or atomic group, except for possessive with unlimited
1027     repeat and ONCE group with no captures. Loop for all the alternatives.
1028 
1029     When we get to the final alternative within the brackets, we used to return
1030     the result of a recursive call to match() whatever happened so it was
1031     possible to reduce stack usage by turning this into a tail recursion,
1032     except in the case of a possibly empty group. However, now that there is
1033     the possiblity of (*THEN) occurring in the final alternative, this
1034     optimization is no longer always possible.
1035 
1036     We can optimize if we know there are no (*THEN)s in the pattern; at present
1037     this is the best that can be done.
1038 
1039     MATCH_ONCE is returned when the end of an atomic group is successfully
1040     reached, but subsequent matching fails. It passes back up the tree (causing
1041     captured values to be reset) until the original atomic group level is
1042     reached. This is tested by comparing md->once_target with the start of the
1043     group. At this point, the return is converted into MATCH_NOMATCH so that
1044     previous backup points can be taken. */
1045 
1046     case OP_ONCE:
1047     case OP_BRA:
1048     case OP_SBRA:
1049     DPRINTF(("start non-capturing bracket\n"));
1050 
1051     for (;;)
1052       {
1053       if (op >= OP_SBRA || op == OP_ONCE)
1054         md->match_function_type = MATCH_CBEGROUP;
1055 
1056       /* If this is not a possibly empty group, and there are no (*THEN)s in
1057       the pattern, and this is the final alternative, optimize as described
1058       above. */
1059 
1060       else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1061         {
1062         ecode += PRIV(OP_lengths)[*ecode];
1063         goto TAIL_RECURSE;
1064         }
1065 
1066       /* In all other cases, we have to make another call to match(). */
1067 
1068       save_mark = md->mark;
1069       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1070         RM2);
1071 
1072       /* See comment in the code for capturing groups above about handling
1073       THEN. */
1074 
1075       if (rrc == MATCH_THEN)
1076         {
1077         next = ecode + GET(ecode,1);
1078         if (md->start_match_ptr < next &&
1079             (*ecode == OP_ALT || *next == OP_ALT))
1080           rrc = MATCH_NOMATCH;
1081         }
1082 
1083       if (rrc != MATCH_NOMATCH)
1084         {
1085         if (rrc == MATCH_ONCE)
1086           {
1087           const pcre_uchar *scode = ecode;
1088           if (*scode != OP_ONCE)           /* If not at start, find it */
1089             {
1090             while (*scode == OP_ALT) scode += GET(scode, 1);
1091             scode -= GET(scode, 1);
1092             }
1093           if (md->once_target == scode) rrc = MATCH_NOMATCH;
1094           }
1095         RRETURN(rrc);
1096         }
1097       ecode += GET(ecode, 1);
1098       md->mark = save_mark;
1099       if (*ecode != OP_ALT) break;
1100       }
1101 
1102     RRETURN(MATCH_NOMATCH);
1103 
1104     /* Handle possessive capturing brackets with an unlimited repeat. We come
1105     here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1106     handled similarly to the normal case above. However, the matching is
1107     different. The end of these brackets will always be OP_KETRPOS, which
1108     returns MATCH_KETRPOS without going further in the pattern. By this means
1109     we can handle the group by iteration rather than recursion, thereby
1110     reducing the amount of stack needed. */
1111 
1112     case OP_CBRAPOS:
1113     case OP_SCBRAPOS:
1114     allow_zero = FALSE;
1115 
1116     POSSESSIVE_CAPTURE:
1117     number = GET2(ecode, 1+LINK_SIZE);
1118     offset = number << 1;
1119 
1120 #ifdef PCRE_DEBUG
1121     printf("start possessive bracket %d\n", number);
1122     printf("subject=");
1123     pchars(eptr, 16, TRUE, md);
1124     printf("\n");
1125 #endif
1126 
1127     if (offset < md->offset_max)
1128       {
1129       matched_once = FALSE;
1130       code_offset = (int)(ecode - md->start_code);
1131 
1132       save_offset1 = md->offset_vector[offset];
1133       save_offset2 = md->offset_vector[offset+1];
1134       save_offset3 = md->offset_vector[md->offset_end - number];
1135       save_capture_last = md->capture_last;
1136 
1137       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1138 
1139       /* Each time round the loop, save the current subject position for use
1140       when the group matches. For MATCH_MATCH, the group has matched, so we
1141       restart it with a new subject starting position, remembering that we had
1142       at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1143       usual. If we haven't matched any alternatives in any iteration, check to
1144       see if a previous iteration matched. If so, the group has matched;
1145       continue from afterwards. Otherwise it has failed; restore the previous
1146       capture values before returning NOMATCH. */
1147 
1148       for (;;)
1149         {
1150         md->offset_vector[md->offset_end - number] =
1151           (int)(eptr - md->start_subject);
1152         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1153         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1154           eptrb, RM63);
1155         if (rrc == MATCH_KETRPOS)
1156           {
1157           offset_top = md->end_offset_top;
1158           eptr = md->end_match_ptr;
1159           ecode = md->start_code + code_offset;
1160           save_capture_last = md->capture_last;
1161           matched_once = TRUE;
1162           continue;
1163           }
1164 
1165         /* See comment in the code for capturing groups above about handling
1166         THEN. */
1167 
1168         if (rrc == MATCH_THEN)
1169           {
1170           next = ecode + GET(ecode,1);
1171           if (md->start_match_ptr < next &&
1172               (*ecode == OP_ALT || *next == OP_ALT))
1173             rrc = MATCH_NOMATCH;
1174           }
1175 
1176         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1177         md->capture_last = save_capture_last;
1178         ecode += GET(ecode, 1);
1179         if (*ecode != OP_ALT) break;
1180         }
1181 
1182       if (!matched_once)
1183         {
1184         md->offset_vector[offset] = save_offset1;
1185         md->offset_vector[offset+1] = save_offset2;
1186         md->offset_vector[md->offset_end - number] = save_offset3;
1187         }
1188 
1189       if (allow_zero || matched_once)
1190         {
1191         ecode += 1 + LINK_SIZE;
1192         break;
1193         }
1194 
1195       RRETURN(MATCH_NOMATCH);
1196       }
1197 
1198     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1199     as a non-capturing bracket. */
1200 
1201     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1202     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1203 
1204     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1205 
1206     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1207     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1208 
1209     /* Non-capturing possessive bracket with unlimited repeat. We come here
1210     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1211     without the capturing complication. It is written out separately for speed
1212     and cleanliness. */
1213 
1214     case OP_BRAPOS:
1215     case OP_SBRAPOS:
1216     allow_zero = FALSE;
1217 
1218     POSSESSIVE_NON_CAPTURE:
1219     matched_once = FALSE;
1220     code_offset = (int)(ecode - md->start_code);
1221 
1222     for (;;)
1223       {
1224       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1225       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1226         eptrb, RM48);
1227       if (rrc == MATCH_KETRPOS)
1228         {
1229         offset_top = md->end_offset_top;
1230         eptr = md->end_match_ptr;
1231         ecode = md->start_code + code_offset;
1232         matched_once = TRUE;
1233         continue;
1234         }
1235 
1236       /* See comment in the code for capturing groups above about handling
1237       THEN. */
1238 
1239       if (rrc == MATCH_THEN)
1240         {
1241         next = ecode + GET(ecode,1);
1242         if (md->start_match_ptr < next &&
1243             (*ecode == OP_ALT || *next == OP_ALT))
1244           rrc = MATCH_NOMATCH;
1245         }
1246 
1247       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1248       ecode += GET(ecode, 1);
1249       if (*ecode != OP_ALT) break;
1250       }
1251 
1252     if (matched_once || allow_zero)
1253       {
1254       ecode += 1 + LINK_SIZE;
1255       break;
1256       }
1257     RRETURN(MATCH_NOMATCH);
1258 
1259     /* Control never reaches here. */
1260 
1261     /* Conditional group: compilation checked that there are no more than
1262     two branches. If the condition is false, skipping the first branch takes us
1263     past the end if there is only one branch, but that's OK because that is
1264     exactly what going to the ket would do. */
1265 
1266     case OP_COND:
1267     case OP_SCOND:
1268     codelink = GET(ecode, 1);
1269 
1270     /* Because of the way auto-callout works during compile, a callout item is
1271     inserted between OP_COND and an assertion condition. */
1272 
1273     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1274       {
1275       if (PUBL(callout) != NULL)
1276         {
1277         PUBL(callout_block) cb;
1278         cb.version          = 2;   /* Version 1 of the callout block */
1279         cb.callout_number   = ecode[LINK_SIZE+2];
1280         cb.offset_vector    = md->offset_vector;
1281 #if defined COMPILE_PCRE8
1282         cb.subject          = (PCRE_SPTR)md->start_subject;
1283 #elif defined COMPILE_PCRE16
1284         cb.subject          = (PCRE_SPTR16)md->start_subject;
1285 #elif defined COMPILE_PCRE32
1286         cb.subject          = (PCRE_SPTR32)md->start_subject;
1287 #endif
1288         cb.subject_length   = (int)(md->end_subject - md->start_subject);
1289         cb.start_match      = (int)(mstart - md->start_subject);
1290         cb.current_position = (int)(eptr - md->start_subject);
1291         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
1292         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
1293         cb.capture_top      = offset_top/2;
1294         cb.capture_last     = md->capture_last;
1295         cb.callout_data     = md->callout_data;
1296         cb.mark             = md->nomatch_mark;
1297         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1298         if (rrc < 0) RRETURN(rrc);
1299         }
1300       ecode += PRIV(OP_lengths)[OP_CALLOUT];
1301       }
1302 
1303     condcode = ecode[LINK_SIZE+1];
1304 
1305     /* Now see what the actual condition is */
1306 
1307     if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
1308       {
1309       if (md->recursive == NULL)                /* Not recursing => FALSE */
1310         {
1311         condition = FALSE;
1312         ecode += GET(ecode, 1);
1313         }
1314       else
1315         {
1316         unsigned int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
1317         condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1318 
1319         /* If the test is for recursion into a specific subpattern, and it is
1320         false, but the test was set up by name, scan the table to see if the
1321         name refers to any other numbers, and test them. The condition is true
1322         if any one is set. */
1323 
1324         if (!condition && condcode == OP_NRREF)
1325           {
1326           pcre_uchar *slotA = md->name_table;
1327           for (i = 0; i < md->name_count; i++)
1328             {
1329             if (GET2(slotA, 0) == recno) break;
1330             slotA += md->name_entry_size;
1331             }
1332 
1333           /* Found a name for the number - there can be only one; duplicate
1334           names for different numbers are allowed, but not vice versa. First
1335           scan down for duplicates. */
1336 
1337           if (i < md->name_count)
1338             {
1339             pcre_uchar *slotB = slotA;
1340             while (slotB > md->name_table)
1341               {
1342               slotB -= md->name_entry_size;
1343               if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1344                 {
1345                 condition = GET2(slotB, 0) == md->recursive->group_num;
1346                 if (condition) break;
1347                 }
1348               else break;
1349               }
1350 
1351             /* Scan up for duplicates */
1352 
1353             if (!condition)
1354               {
1355               slotB = slotA;
1356               for (i++; i < md->name_count; i++)
1357                 {
1358                 slotB += md->name_entry_size;
1359                 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1360                   {
1361                   condition = GET2(slotB, 0) == md->recursive->group_num;
1362                   if (condition) break;
1363                   }
1364                 else break;
1365                 }
1366               }
1367             }
1368           }
1369 
1370         /* Chose branch according to the condition */
1371 
1372         ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1373         }
1374       }
1375 
1376     else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
1377       {
1378       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
1379       condition = offset < offset_top && md->offset_vector[offset] >= 0;
1380 
1381       /* If the numbered capture is unset, but the reference was by name,
1382       scan the table to see if the name refers to any other numbers, and test
1383       them. The condition is true if any one is set. This is tediously similar
1384       to the code above, but not close enough to try to amalgamate. */
1385 
1386       if (!condition && condcode == OP_NCREF)
1387         {
1388         unsigned int refno = offset >> 1;
1389         pcre_uchar *slotA = md->name_table;
1390 
1391         for (i = 0; i < md->name_count; i++)
1392           {
1393           if (GET2(slotA, 0) == refno) break;
1394           slotA += md->name_entry_size;
1395           }
1396 
1397         /* Found a name for the number - there can be only one; duplicate names
1398         for different numbers are allowed, but not vice versa. First scan down
1399         for duplicates. */
1400 
1401         if (i < md->name_count)
1402           {
1403           pcre_uchar *slotB = slotA;
1404           while (slotB > md->name_table)
1405             {
1406             slotB -= md->name_entry_size;
1407             if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1408               {
1409               offset = GET2(slotB, 0) << 1;
1410               condition = offset < offset_top &&
1411                 md->offset_vector[offset] >= 0;
1412               if (condition) break;
1413               }
1414             else break;
1415             }
1416 
1417           /* Scan up for duplicates */
1418 
1419           if (!condition)
1420             {
1421             slotB = slotA;
1422             for (i++; i < md->name_count; i++)
1423               {
1424               slotB += md->name_entry_size;
1425               if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1426                 {
1427                 offset = GET2(slotB, 0) << 1;
1428                 condition = offset < offset_top &&
1429                   md->offset_vector[offset] >= 0;
1430                 if (condition) break;
1431                 }
1432               else break;
1433               }
1434             }
1435           }
1436         }
1437 
1438       /* Chose branch according to the condition */
1439 
1440       ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1441       }
1442 
1443     else if (condcode == OP_DEF)     /* DEFINE - always false */
1444       {
1445       condition = FALSE;
1446       ecode += GET(ecode, 1);
1447       }
1448 
1449     /* The condition is an assertion. Call match() to evaluate it - setting
1450     md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
1451     an assertion. */
1452 
1453     else
1454       {
1455       md->match_function_type = MATCH_CONDASSERT;
1456       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
1457       if (rrc == MATCH_MATCH)
1458         {
1459         if (md->end_offset_top > offset_top)
1460           offset_top = md->end_offset_top;  /* Captures may have happened */
1461         condition = TRUE;
1462         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1463         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1464         }
1465 
1466       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1467       assertion; it is therefore treated as NOMATCH. */
1468 
1469       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1470         {
1471         RRETURN(rrc);         /* Need braces because of following else */
1472         }
1473       else
1474         {
1475         condition = FALSE;
1476         ecode += codelink;
1477         }
1478       }
1479 
1480     /* We are now at the branch that is to be obeyed. As there is only one, can
1481     use tail recursion to avoid using another stack frame, except when there is
1482     unlimited repeat of a possibly empty group. In the latter case, a recursive
1483     call to match() is always required, unless the second alternative doesn't
1484     exist, in which case we can just plough on. Note that, for compatibility
1485     with Perl, the | in a conditional group is NOT treated as creating two
1486     alternatives. If a THEN is encountered in the branch, it propagates out to
1487     the enclosing alternative (unless nested in a deeper set of alternatives,
1488     of course). */
1489 
1490     if (condition || *ecode == OP_ALT)
1491       {
1492       if (op != OP_SCOND)
1493         {
1494         ecode += 1 + LINK_SIZE;
1495         goto TAIL_RECURSE;
1496         }
1497 
1498       md->match_function_type = MATCH_CBEGROUP;
1499       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
1500       RRETURN(rrc);
1501       }
1502 
1503      /* Condition false & no alternative; continue after the group. */
1504 
1505     else
1506       {
1507       ecode += 1 + LINK_SIZE;
1508       }
1509     break;
1510 
1511 
1512     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1513     to close any currently open capturing brackets. */
1514 
1515     case OP_CLOSE:
1516     number = GET2(ecode, 1);
1517     offset = number << 1;
1518 
1519 #ifdef PCRE_DEBUG
1520       printf("end bracket %d at *ACCEPT", number);
1521       printf("\n");
1522 #endif
1523 
1524     md->capture_last = number;
1525     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1526       {
1527       md->offset_vector[offset] =
1528         md->offset_vector[md->offset_end - number];
1529       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1530       if (offset_top <= offset) offset_top = offset + 2;
1531       }
1532     ecode += 1 + IMM2_SIZE;
1533     break;
1534 
1535 
1536     /* End of the pattern, either real or forced. */
1537 
1538     case OP_END:
1539     case OP_ACCEPT:
1540     case OP_ASSERT_ACCEPT:
1541 
1542     /* If we have matched an empty string, fail if not in an assertion and not
1543     in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1544     is set and we have matched at the start of the subject. In both cases,
1545     backtracking will then try other alternatives, if any. */
1546 
1547     if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1548          md->recursive == NULL &&
1549          (md->notempty ||
1550            (md->notempty_atstart &&
1551              mstart == md->start_subject + md->start_offset)))
1552       RRETURN(MATCH_NOMATCH);
1553 
1554     /* Otherwise, we have a match. */
1555 
1556     md->end_match_ptr = eptr;           /* Record where we ended */
1557     md->end_offset_top = offset_top;    /* and how many extracts were taken */
1558     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1559 
1560     /* For some reason, the macros don't work properly if an expression is
1561     given as the argument to RRETURN when the heap is in use. */
1562 
1563     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1564     RRETURN(rrc);
1565 
1566     /* Assertion brackets. Check the alternative branches in turn - the
1567     matching won't pass the KET for an assertion. If any one branch matches,
1568     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1569     start of each branch to move the current point backwards, so the code at
1570     this level is identical to the lookahead case. When the assertion is part
1571     of a condition, we want to return immediately afterwards. The caller of
1572     this incarnation of the match() function will have set MATCH_CONDASSERT in
1573     md->match_function type, and one of these opcodes will be the first opcode
1574     that is processed. We use a local variable that is preserved over calls to
1575     match() to remember this case. */
1576 
1577     case OP_ASSERT:
1578     case OP_ASSERTBACK:
1579     save_mark = md->mark;
1580     if (md->match_function_type == MATCH_CONDASSERT)
1581       {
1582       condassert = TRUE;
1583       md->match_function_type = 0;
1584       }
1585     else condassert = FALSE;
1586 
1587     do
1588       {
1589       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1590       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1591         {
1592         mstart = md->start_match_ptr;   /* In case \K reset it */
1593         break;
1594         }
1595       md->mark = save_mark;
1596 
1597       /* A COMMIT failure must fail the entire assertion, without trying any
1598       subsequent branches. */
1599 
1600       if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);
1601 
1602       /* PCRE does not allow THEN to escape beyond an assertion; it
1603       is treated as NOMATCH. */
1604 
1605       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1606       ecode += GET(ecode, 1);
1607       }
1608     while (*ecode == OP_ALT);
1609 
1610     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1611 
1612     /* If checking an assertion for a condition, return MATCH_MATCH. */
1613 
1614     if (condassert) RRETURN(MATCH_MATCH);
1615 
1616     /* Continue from after the assertion, updating the offsets high water
1617     mark, since extracts may have been taken during the assertion. */
1618 
1619     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1620     ecode += 1 + LINK_SIZE;
1621     offset_top = md->end_offset_top;
1622     continue;
1623 
1624     /* Negative assertion: all branches must fail to match. Encountering SKIP,
1625     PRUNE, or COMMIT means we must assume failure without checking subsequent
1626     branches. */
1627 
1628     case OP_ASSERT_NOT:
1629     case OP_ASSERTBACK_NOT:
1630     save_mark = md->mark;
1631     if (md->match_function_type == MATCH_CONDASSERT)
1632       {
1633       condassert = TRUE;
1634       md->match_function_type = 0;
1635       }
1636     else condassert = FALSE;
1637 
1638     do
1639       {
1640       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1641       md->mark = save_mark;
1642       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1643       if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1644         {
1645         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1646         break;
1647         }
1648 
1649       /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1650       as NOMATCH. */
1651 
1652       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1653       ecode += GET(ecode,1);
1654       }
1655     while (*ecode == OP_ALT);
1656 
1657     if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1658 
1659     ecode += 1 + LINK_SIZE;
1660     continue;
1661 
1662     /* Move the subject pointer back. This occurs only at the start of
1663     each branch of a lookbehind assertion. If we are too close to the start to
1664     move back, this match function fails. When working with UTF-8 we move
1665     back a number of characters, not bytes. */
1666 
1667     case OP_REVERSE:
1668 #ifdef SUPPORT_UTF
1669     if (utf)
1670       {
1671       i = GET(ecode, 1);
1672       while (i-- > 0)
1673         {
1674         eptr--;
1675         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1676         BACKCHAR(eptr);
1677         }
1678       }
1679     else
1680 #endif
1681 
1682     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1683 
1684       {
1685       eptr -= GET(ecode, 1);
1686       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1687       }
1688 
1689     /* Save the earliest consulted character, then skip to next op code */
1690 
1691     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1692     ecode += 1 + LINK_SIZE;
1693     break;
1694 
1695     /* The callout item calls an external function, if one is provided, passing
1696     details of the match so far. This is mainly for debugging, though the
1697     function is able to force a failure. */
1698 
1699     case OP_CALLOUT:
1700     if (PUBL(callout) != NULL)
1701       {
1702       PUBL(callout_block) cb;
1703       cb.version          = 2;   /* Version 1 of the callout block */
1704       cb.callout_number   = ecode[1];
1705       cb.offset_vector    = md->offset_vector;
1706 #if defined COMPILE_PCRE8
1707       cb.subject          = (PCRE_SPTR)md->start_subject;
1708 #elif defined COMPILE_PCRE16
1709       cb.subject          = (PCRE_SPTR16)md->start_subject;
1710 #elif defined COMPILE_PCRE32
1711       cb.subject          = (PCRE_SPTR32)md->start_subject;
1712 #endif
1713       cb.subject_length   = (int)(md->end_subject - md->start_subject);
1714       cb.start_match      = (int)(mstart - md->start_subject);
1715       cb.current_position = (int)(eptr - md->start_subject);
1716       cb.pattern_position = GET(ecode, 2);
1717       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1718       cb.capture_top      = offset_top/2;
1719       cb.capture_last     = md->capture_last;
1720       cb.callout_data     = md->callout_data;
1721       cb.mark             = md->nomatch_mark;
1722       if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1723       if (rrc < 0) RRETURN(rrc);
1724       }
1725     ecode += 2 + 2*LINK_SIZE;
1726     break;
1727 
1728     /* Recursion either matches the current regex, or some subexpression. The
1729     offset data is the offset to the starting bracket from the start of the
1730     whole pattern. (This is so that it works from duplicated subpatterns.)
1731 
1732     The state of the capturing groups is preserved over recursion, and
1733     re-instated afterwards. We don't know how many are started and not yet
1734     finished (offset_top records the completed total) so we just have to save
1735     all the potential data. There may be up to 65535 such values, which is too
1736     large to put on the stack, but using malloc for small numbers seems
1737     expensive. As a compromise, the stack is used when there are no more than
1738     REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1739 
1740     There are also other values that have to be saved. We use a chained
1741     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1742     for the original version of this logic. It has, however, been hacked around
1743     a lot, so he is not to blame for the current way it works. */
1744 
1745     case OP_RECURSE:
1746       {
1747       recursion_info *ri;
1748       unsigned int recno;
1749 
1750       callpat = md->start_code + GET(ecode, 1);
1751       recno = (callpat == md->start_code)? 0 :
1752         GET2(callpat, 1 + LINK_SIZE);
1753 
1754       /* Check for repeating a recursion without advancing the subject pointer.
1755       This should catch convoluted mutual recursions. (Some simple cases are
1756       caught at compile time.) */
1757 
1758       for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1759         if (recno == ri->group_num && eptr == ri->subject_position)
1760           RRETURN(PCRE_ERROR_RECURSELOOP);
1761 
1762       /* Add to "recursing stack" */
1763 
1764       new_recursive.group_num = recno;
1765       new_recursive.subject_position = eptr;
1766       new_recursive.prevrec = md->recursive;
1767       md->recursive = &new_recursive;
1768 
1769       /* Where to continue from afterwards */
1770 
1771       ecode += 1 + LINK_SIZE;
1772 
1773       /* Now save the offset data */
1774 
1775       new_recursive.saved_max = md->offset_end;
1776       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1777         new_recursive.offset_save = stacksave;
1778       else
1779         {
1780         new_recursive.offset_save =
1781           (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1782         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1783         }
1784       memcpy(new_recursive.offset_save, md->offset_vector,
1785             new_recursive.saved_max * sizeof(int));
1786 
1787       /* OK, now we can do the recursion. After processing each alternative,
1788       restore the offset data. If there were nested recursions, md->recursive
1789       might be changed, so reset it before looping. */
1790 
1791       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1792       cbegroup = (*callpat >= OP_SBRA);
1793       do
1794         {
1795         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1796         RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1797           md, eptrb, RM6);
1798         memcpy(md->offset_vector, new_recursive.offset_save,
1799             new_recursive.saved_max * sizeof(int));
1800         md->recursive = new_recursive.prevrec;
1801         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1802           {
1803           DPRINTF(("Recursion matched\n"));
1804           if (new_recursive.offset_save != stacksave)
1805             (PUBL(free))(new_recursive.offset_save);
1806 
1807           /* Set where we got to in the subject, and reset the start in case
1808           it was changed by \K. This *is* propagated back out of a recursion,
1809           for Perl compatibility. */
1810 
1811           eptr = md->end_match_ptr;
1812           mstart = md->start_match_ptr;
1813           goto RECURSION_MATCHED;        /* Exit loop; end processing */
1814           }
1815 
1816         /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it
1817         is treated as NOMATCH. */
1818 
1819         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&
1820                  rrc != MATCH_COMMIT)
1821           {
1822           DPRINTF(("Recursion gave error %d\n", rrc));
1823           if (new_recursive.offset_save != stacksave)
1824             (PUBL(free))(new_recursive.offset_save);
1825           RRETURN(rrc);
1826           }
1827 
1828         md->recursive = &new_recursive;
1829         callpat += GET(callpat, 1);
1830         }
1831       while (*callpat == OP_ALT);
1832 
1833       DPRINTF(("Recursion didn't match\n"));
1834       md->recursive = new_recursive.prevrec;
1835       if (new_recursive.offset_save != stacksave)
1836         (PUBL(free))(new_recursive.offset_save);
1837       RRETURN(MATCH_NOMATCH);
1838       }
1839 
1840     RECURSION_MATCHED:
1841     break;
1842 
1843     /* An alternation is the end of a branch; scan along to find the end of the
1844     bracketed group and go to there. */
1845 
1846     case OP_ALT:
1847     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1848     break;
1849 
1850     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1851     indicating that it may occur zero times. It may repeat infinitely, or not
1852     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1853     with fixed upper repeat limits are compiled as a number of copies, with the
1854     optional ones preceded by BRAZERO or BRAMINZERO. */
1855 
1856     case OP_BRAZERO:
1857     next = ecode + 1;
1858     RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1859     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1860     do next += GET(next, 1); while (*next == OP_ALT);
1861     ecode = next + 1 + LINK_SIZE;
1862     break;
1863 
1864     case OP_BRAMINZERO:
1865     next = ecode + 1;
1866     do next += GET(next, 1); while (*next == OP_ALT);
1867     RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
1868     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1869     ecode++;
1870     break;
1871 
1872     case OP_SKIPZERO:
1873     next = ecode+1;
1874     do next += GET(next,1); while (*next == OP_ALT);
1875     ecode = next + 1 + LINK_SIZE;
1876     break;
1877 
1878     /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1879     here; just jump to the group, with allow_zero set TRUE. */
1880 
1881     case OP_BRAPOSZERO:
1882     op = *(++ecode);
1883     allow_zero = TRUE;
1884     if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1885       goto POSSESSIVE_NON_CAPTURE;
1886 
1887     /* End of a group, repeated or non-repeating. */
1888 
1889     case OP_KET:
1890     case OP_KETRMIN:
1891     case OP_KETRMAX:
1892     case OP_KETRPOS:
1893     prev = ecode - GET(ecode, 1);
1894 
1895     /* If this was a group that remembered the subject start, in order to break
1896     infinite repeats of empty string matches, retrieve the subject start from
1897     the chain. Otherwise, set it NULL. */
1898 
1899     if (*prev >= OP_SBRA || *prev == OP_ONCE)
1900       {
1901       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1902       eptrb = eptrb->epb_prev;              /* Backup to previous group */
1903       }
1904     else saved_eptr = NULL;
1905 
1906     /* If we are at the end of an assertion group or a non-capturing atomic
1907     group, stop matching and return MATCH_MATCH, but record the current high
1908     water mark for use by positive assertions. We also need to record the match
1909     start in case it was changed by \K. */
1910 
1911     if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1912          *prev == OP_ONCE_NC)
1913       {
1914       md->end_match_ptr = eptr;      /* For ONCE_NC */
1915       md->end_offset_top = offset_top;
1916       md->start_match_ptr = mstart;
1917       RRETURN(MATCH_MATCH);         /* Sets md->mark */
1918       }
1919 
1920     /* For capturing groups we have to check the group number back at the start
1921     and if necessary complete handling an extraction by setting the offsets and
1922     bumping the high water mark. Whole-pattern recursion is coded as a recurse
1923     into group 0, so it won't be picked up here. Instead, we catch it when the
1924     OP_END is reached. Other recursion is handled here. We just have to record
1925     the current subject position and start match pointer and give a MATCH
1926     return. */
1927 
1928     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1929         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1930       {
1931       number = GET2(prev, 1+LINK_SIZE);
1932       offset = number << 1;
1933 
1934 #ifdef PCRE_DEBUG
1935       printf("end bracket %d", number);
1936       printf("\n");
1937 #endif
1938 
1939       /* Handle a recursively called group. */
1940 
1941       if (md->recursive != NULL && md->recursive->group_num == number)
1942         {
1943         md->end_match_ptr = eptr;
1944         md->start_match_ptr = mstart;
1945         RRETURN(MATCH_MATCH);
1946         }
1947 
1948       /* Deal with capturing */
1949 
1950       md->capture_last = number;
1951       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1952         {
1953         /* If offset is greater than offset_top, it means that we are
1954         "skipping" a capturing group, and that group's offsets must be marked
1955         unset. In earlier versions of PCRE, all the offsets were unset at the
1956         start of matching, but this doesn't work because atomic groups and
1957         assertions can cause a value to be set that should later be unset.
1958         Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1959         part of the atomic group, but this is not on the final matching path,
1960         so must be unset when 2 is set. (If there is no group 2, there is no
1961         problem, because offset_top will then be 2, indicating no capture.) */
1962 
1963         if (offset > offset_top)
1964           {
1965           register int *iptr = md->offset_vector + offset_top;
1966           register int *iend = md->offset_vector + offset;
1967           while (iptr < iend) *iptr++ = -1;
1968           }
1969 
1970         /* Now make the extraction */
1971 
1972         md->offset_vector[offset] =
1973           md->offset_vector[md->offset_end - number];
1974         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1975         if (offset_top <= offset) offset_top = offset + 2;
1976         }
1977       }
1978 
1979     /* For an ordinary non-repeating ket, just continue at this level. This
1980     also happens for a repeating ket if no characters were matched in the
1981     group. This is the forcible breaking of infinite loops as implemented in
1982     Perl 5.005. For a non-repeating atomic group that includes captures,
1983     establish a backup point by processing the rest of the pattern at a lower
1984     level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
1985     original OP_ONCE level, thereby bypassing intermediate backup points, but
1986     resetting any captures that happened along the way. */
1987 
1988     if (*ecode == OP_KET || eptr == saved_eptr)
1989       {
1990       if (*prev == OP_ONCE)
1991         {
1992         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
1993         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1994         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
1995         RRETURN(MATCH_ONCE);
1996         }
1997       ecode += 1 + LINK_SIZE;    /* Carry on at this level */
1998       break;
1999       }
2000 
2001     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
2002     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
2003     at a time from the outer level, thus saving stack. */
2004 
2005     if (*ecode == OP_KETRPOS)
2006       {
2007       md->end_match_ptr = eptr;
2008       md->end_offset_top = offset_top;
2009       RRETURN(MATCH_KETRPOS);
2010       }
2011 
2012     /* The normal repeating kets try the rest of the pattern or restart from
2013     the preceding bracket, in the appropriate order. In the second case, we can
2014     use tail recursion to avoid using another stack frame, unless we have an
2015     an atomic group or an unlimited repeat of a group that can match an empty
2016     string. */
2017 
2018     if (*ecode == OP_KETRMIN)
2019       {
2020       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
2021       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2022       if (*prev == OP_ONCE)
2023         {
2024         RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
2025         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2026         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2027         RRETURN(MATCH_ONCE);
2028         }
2029       if (*prev >= OP_SBRA)    /* Could match an empty string */
2030         {
2031         RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2032         RRETURN(rrc);
2033         }
2034       ecode = prev;
2035       goto TAIL_RECURSE;
2036       }
2037     else  /* OP_KETRMAX */
2038       {
2039       RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2040       if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2041       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2042       if (*prev == OP_ONCE)
2043         {
2044         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
2045         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2046         md->once_target = prev;
2047         RRETURN(MATCH_ONCE);
2048         }
2049       ecode += 1 + LINK_SIZE;
2050       goto TAIL_RECURSE;
2051       }
2052     /* Control never gets here */
2053 
2054     /* Not multiline mode: start of subject assertion, unless notbol. */
2055 
2056     case OP_CIRC:
2057     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2058 
2059     /* Start of subject assertion */
2060 
2061     case OP_SOD:
2062     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2063     ecode++;
2064     break;
2065 
2066     /* Multiline mode: start of subject unless notbol, or after any newline. */
2067 
2068     case OP_CIRCM:
2069     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2070     if (eptr != md->start_subject &&
2071         (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2072       RRETURN(MATCH_NOMATCH);
2073     ecode++;
2074     break;
2075 
2076     /* Start of match assertion */
2077 
2078     case OP_SOM:
2079     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2080     ecode++;
2081     break;
2082 
2083     /* Reset the start of match point */
2084 
2085     case OP_SET_SOM:
2086     mstart = eptr;
2087     ecode++;
2088     break;
2089 
2090     /* Multiline mode: assert before any newline, or before end of subject
2091     unless noteol is set. */
2092 
2093     case OP_DOLLM:
2094     if (eptr < md->end_subject)
2095       {
2096       if (!IS_NEWLINE(eptr))
2097         {
2098         if (md->partial != 0 &&
2099             eptr + 1 >= md->end_subject &&
2100             NLBLOCK->nltype == NLTYPE_FIXED &&
2101             NLBLOCK->nllen == 2 &&
2102             RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
2103           {
2104           md->hitend = TRUE;
2105           if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2106           }
2107         RRETURN(MATCH_NOMATCH);
2108         }
2109       }
2110     else
2111       {
2112       if (md->noteol) RRETURN(MATCH_NOMATCH);
2113       SCHECK_PARTIAL();
2114       }
2115     ecode++;
2116     break;
2117 
2118     /* Not multiline mode: assert before a terminating newline or before end of
2119     subject unless noteol is set. */
2120 
2121     case OP_DOLL:
2122     if (md->noteol) RRETURN(MATCH_NOMATCH);
2123     if (!md->endonly) goto ASSERT_NL_OR_EOS;
2124 
2125     /* ... else fall through for endonly */
2126 
2127     /* End of subject assertion (\z) */
2128 
2129     case OP_EOD:
2130     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2131     SCHECK_PARTIAL();
2132     ecode++;
2133     break;
2134 
2135     /* End of subject or ending \n assertion (\Z) */
2136 
2137     case OP_EODN:
2138     ASSERT_NL_OR_EOS:
2139     if (eptr < md->end_subject &&
2140         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2141       {
2142       if (md->partial != 0 &&
2143           eptr + 1 >= md->end_subject &&
2144           NLBLOCK->nltype == NLTYPE_FIXED &&
2145           NLBLOCK->nllen == 2 &&
2146           RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
2147         {
2148         md->hitend = TRUE;
2149         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2150         }
2151       RRETURN(MATCH_NOMATCH);
2152       }
2153 
2154     /* Either at end of string or \n before end. */
2155 
2156     SCHECK_PARTIAL();
2157     ecode++;
2158     break;
2159 
2160     /* Word boundary assertions */
2161 
2162     case OP_NOT_WORD_BOUNDARY:
2163     case OP_WORD_BOUNDARY:
2164       {
2165 
2166       /* Find out if the previous and current characters are "word" characters.
2167       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2168       be "non-word" characters. Remember the earliest consulted character for
2169       partial matching. */
2170 
2171 #ifdef SUPPORT_UTF
2172       if (utf)
2173         {
2174         /* Get status of previous character */
2175 
2176         if (eptr == md->start_subject) prev_is_word = FALSE; else
2177           {
2178           PCRE_PUCHAR lastptr = eptr - 1;
2179           BACKCHAR(lastptr);
2180           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2181           GETCHAR(c, lastptr);
2182 #ifdef SUPPORT_UCP
2183           if (md->use_ucp)
2184             {
2185             if (c == '_') prev_is_word = TRUE; else
2186               {
2187               int cat = UCD_CATEGORY(c);
2188               prev_is_word = (cat == ucp_L || cat == ucp_N);
2189               }
2190             }
2191           else
2192 #endif
2193           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2194           }
2195 
2196         /* Get status of next character */
2197 
2198         if (eptr >= md->end_subject)
2199           {
2200           SCHECK_PARTIAL();
2201           cur_is_word = FALSE;
2202           }
2203         else
2204           {
2205           GETCHAR(c, eptr);
2206 #ifdef SUPPORT_UCP
2207           if (md->use_ucp)
2208             {
2209             if (c == '_') cur_is_word = TRUE; else
2210               {
2211               int cat = UCD_CATEGORY(c);
2212               cur_is_word = (cat == ucp_L || cat == ucp_N);
2213               }
2214             }
2215           else
2216 #endif
2217           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2218           }
2219         }
2220       else
2221 #endif
2222 
2223       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
2224       consistency with the behaviour of \w we do use it in this case. */
2225 
2226         {
2227         /* Get status of previous character */
2228 
2229         if (eptr == md->start_subject) prev_is_word = FALSE; else
2230           {
2231           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
2232 #ifdef SUPPORT_UCP
2233           if (md->use_ucp)
2234             {
2235             c = eptr[-1];
2236             if (c == '_') prev_is_word = TRUE; else
2237               {
2238               int cat = UCD_CATEGORY(c);
2239               prev_is_word = (cat == ucp_L || cat == ucp_N);
2240               }
2241             }
2242           else
2243 #endif
2244           prev_is_word = MAX_255(eptr[-1])
2245             && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2246           }
2247 
2248         /* Get status of next character */
2249 
2250         if (eptr >= md->end_subject)
2251           {
2252           SCHECK_PARTIAL();
2253           cur_is_word = FALSE;
2254           }
2255         else
2256 #ifdef SUPPORT_UCP
2257         if (md->use_ucp)
2258           {
2259           c = *eptr;
2260           if (c == '_') cur_is_word = TRUE; else
2261             {
2262             int cat = UCD_CATEGORY(c);
2263             cur_is_word = (cat == ucp_L || cat == ucp_N);
2264             }
2265           }
2266         else
2267 #endif
2268         cur_is_word = MAX_255(*eptr)
2269           && ((md->ctypes[*eptr] & ctype_word) != 0);
2270         }
2271 
2272       /* Now see if the situation is what we want */
2273 
2274       if ((*ecode++ == OP_WORD_BOUNDARY)?
2275            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2276         RRETURN(MATCH_NOMATCH);
2277       }
2278     break;
2279 
2280     /* Match any single character type except newline; have to take care with
2281     CRLF newlines and partial matching. */
2282 
2283     case OP_ANY:
2284     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2285     if (md->partial != 0 &&
2286         eptr + 1 >= md->end_subject &&
2287         NLBLOCK->nltype == NLTYPE_FIXED &&
2288         NLBLOCK->nllen == 2 &&
2289         RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
2290       {
2291       md->hitend = TRUE;
2292       if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2293       }
2294 
2295     /* Fall through */
2296 
2297     /* Match any single character whatsoever. */
2298 
2299     case OP_ALLANY:
2300     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2301       {                            /* not be updated before SCHECK_PARTIAL. */
2302       SCHECK_PARTIAL();
2303       RRETURN(MATCH_NOMATCH);
2304       }
2305     eptr++;
2306 #ifdef SUPPORT_UTF
2307     if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2308 #endif
2309     ecode++;
2310     break;
2311 
2312     /* Match a single byte, even in UTF-8 mode. This opcode really does match
2313     any byte, even newline, independent of the setting of PCRE_DOTALL. */
2314 
2315     case OP_ANYBYTE:
2316     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2317       {                            /* not be updated before SCHECK_PARTIAL. */
2318       SCHECK_PARTIAL();
2319       RRETURN(MATCH_NOMATCH);
2320       }
2321     eptr++;
2322     ecode++;
2323     break;
2324 
2325     case OP_NOT_DIGIT:
2326     if (eptr >= md->end_subject)
2327       {
2328       SCHECK_PARTIAL();
2329       RRETURN(MATCH_NOMATCH);
2330       }
2331     GETCHARINCTEST(c, eptr);
2332     if (
2333 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2334        c < 256 &&
2335 #endif
2336        (md->ctypes[c] & ctype_digit) != 0
2337        )
2338       RRETURN(MATCH_NOMATCH);
2339     ecode++;
2340     break;
2341 
2342     case OP_DIGIT:
2343     if (eptr >= md->end_subject)
2344       {
2345       SCHECK_PARTIAL();
2346       RRETURN(MATCH_NOMATCH);
2347       }
2348     GETCHARINCTEST(c, eptr);
2349     if (
2350 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2351        c > 255 ||
2352 #endif
2353        (md->ctypes[c] & ctype_digit) == 0
2354        )
2355       RRETURN(MATCH_NOMATCH);
2356     ecode++;
2357     break;
2358 
2359     case OP_NOT_WHITESPACE:
2360     if (eptr >= md->end_subject)
2361       {
2362       SCHECK_PARTIAL();
2363       RRETURN(MATCH_NOMATCH);
2364       }
2365     GETCHARINCTEST(c, eptr);
2366     if (
2367 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2368        c < 256 &&
2369 #endif
2370        (md->ctypes[c] & ctype_space) != 0
2371        )
2372       RRETURN(MATCH_NOMATCH);
2373     ecode++;
2374     break;
2375 
2376     case OP_WHITESPACE:
2377     if (eptr >= md->end_subject)
2378       {
2379       SCHECK_PARTIAL();
2380       RRETURN(MATCH_NOMATCH);
2381       }
2382     GETCHARINCTEST(c, eptr);
2383     if (
2384 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2385        c > 255 ||
2386 #endif
2387        (md->ctypes[c] & ctype_space) == 0
2388        )
2389       RRETURN(MATCH_NOMATCH);
2390     ecode++;
2391     break;
2392 
2393     case OP_NOT_WORDCHAR:
2394     if (eptr >= md->end_subject)
2395       {
2396       SCHECK_PARTIAL();
2397       RRETURN(MATCH_NOMATCH);
2398       }
2399     GETCHARINCTEST(c, eptr);
2400     if (
2401 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2402        c < 256 &&
2403 #endif
2404        (md->ctypes[c] & ctype_word) != 0
2405        )
2406       RRETURN(MATCH_NOMATCH);
2407     ecode++;
2408     break;
2409 
2410     case OP_WORDCHAR:
2411     if (eptr >= md->end_subject)
2412       {
2413       SCHECK_PARTIAL();
2414       RRETURN(MATCH_NOMATCH);
2415       }
2416     GETCHARINCTEST(c, eptr);
2417     if (
2418 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2419        c > 255 ||
2420 #endif
2421        (md->ctypes[c] & ctype_word) == 0
2422        )
2423       RRETURN(MATCH_NOMATCH);
2424     ecode++;
2425     break;
2426 
2427     case OP_ANYNL:
2428     if (eptr >= md->end_subject)
2429       {
2430       SCHECK_PARTIAL();
2431       RRETURN(MATCH_NOMATCH);
2432       }
2433     GETCHARINCTEST(c, eptr);
2434     switch(c)
2435       {
2436       default: RRETURN(MATCH_NOMATCH);
2437 
2438       case CHAR_CR:
2439       if (eptr >= md->end_subject)
2440         {
2441         SCHECK_PARTIAL();
2442         }
2443       else if (RAWUCHARTEST(eptr) == CHAR_LF) eptr++;
2444       break;
2445 
2446       case CHAR_LF:
2447       break;
2448 
2449       case CHAR_VT:
2450       case CHAR_FF:
2451       case CHAR_NEL:
2452 #ifndef EBCDIC
2453       case 0x2028:
2454       case 0x2029:
2455 #endif  /* Not EBCDIC */
2456       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2457       break;
2458       }
2459     ecode++;
2460     break;
2461 
2462     case OP_NOT_HSPACE:
2463     if (eptr >= md->end_subject)
2464       {
2465       SCHECK_PARTIAL();
2466       RRETURN(MATCH_NOMATCH);
2467       }
2468     GETCHARINCTEST(c, eptr);
2469     switch(c)
2470       {
2471       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2472       default: break;
2473       }
2474     ecode++;
2475     break;
2476 
2477     case OP_HSPACE:
2478     if (eptr >= md->end_subject)
2479       {
2480       SCHECK_PARTIAL();
2481       RRETURN(MATCH_NOMATCH);
2482       }
2483     GETCHARINCTEST(c, eptr);
2484     switch(c)
2485       {
2486       HSPACE_CASES: break;  /* Byte and multibyte cases */
2487       default: RRETURN(MATCH_NOMATCH);
2488       }
2489     ecode++;
2490     break;
2491 
2492     case OP_NOT_VSPACE:
2493     if (eptr >= md->end_subject)
2494       {
2495       SCHECK_PARTIAL();
2496       RRETURN(MATCH_NOMATCH);
2497       }
2498     GETCHARINCTEST(c, eptr);
2499     switch(c)
2500       {
2501       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2502       default: break;
2503       }
2504     ecode++;
2505     break;
2506 
2507     case OP_VSPACE:
2508     if (eptr >= md->end_subject)
2509       {
2510       SCHECK_PARTIAL();
2511       RRETURN(MATCH_NOMATCH);
2512       }
2513     GETCHARINCTEST(c, eptr);
2514     switch(c)
2515       {
2516       VSPACE_CASES: break;
2517       default: RRETURN(MATCH_NOMATCH);
2518       }
2519     ecode++;
2520     break;
2521 
2522 #ifdef SUPPORT_UCP
2523     /* Check the next character by Unicode property. We will get here only
2524     if the support is in the binary; otherwise a compile-time error occurs. */
2525 
2526     case OP_PROP:
2527     case OP_NOTPROP:
2528     if (eptr >= md->end_subject)
2529       {
2530       SCHECK_PARTIAL();
2531       RRETURN(MATCH_NOMATCH);
2532       }
2533     GETCHARINCTEST(c, eptr);
2534       {
2535       const pcre_uint32 *cp;
2536       const ucd_record *prop = GET_UCD(c);
2537 
2538       switch(ecode[1])
2539         {
2540         case PT_ANY:
2541         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2542         break;
2543 
2544         case PT_LAMP:
2545         if ((prop->chartype == ucp_Lu ||
2546              prop->chartype == ucp_Ll ||
2547              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2548           RRETURN(MATCH_NOMATCH);
2549         break;
2550 
2551         case PT_GC:
2552         if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2553           RRETURN(MATCH_NOMATCH);
2554         break;
2555 
2556         case PT_PC:
2557         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2558           RRETURN(MATCH_NOMATCH);
2559         break;
2560 
2561         case PT_SC:
2562         if ((ecode[2] != prop->script) == (op == OP_PROP))
2563           RRETURN(MATCH_NOMATCH);
2564         break;
2565 
2566         /* These are specials */
2567 
2568         case PT_ALNUM:
2569         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2570              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2571           RRETURN(MATCH_NOMATCH);
2572         break;
2573 
2574         case PT_SPACE:    /* Perl space */
2575         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2576              c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2577                == (op == OP_NOTPROP))
2578           RRETURN(MATCH_NOMATCH);
2579         break;
2580 
2581         case PT_PXSPACE:  /* POSIX space */
2582         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2583              c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2584              c == CHAR_FF || c == CHAR_CR)
2585                == (op == OP_NOTPROP))
2586           RRETURN(MATCH_NOMATCH);
2587         break;
2588 
2589         case PT_WORD:
2590         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2591              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2592              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2593           RRETURN(MATCH_NOMATCH);
2594         break;
2595 
2596         case PT_CLIST:
2597         cp = PRIV(ucd_caseless_sets) + ecode[2];
2598         for (;;)
2599           {
2600           if (c < *cp)
2601             { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
2602           if (c == *cp++)
2603             { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2604           }
2605         break;
2606 
2607         /* This should never occur */
2608 
2609         default:
2610         RRETURN(PCRE_ERROR_INTERNAL);
2611         }
2612 
2613       ecode += 3;
2614       }
2615     break;
2616 
2617     /* Match an extended Unicode sequence. We will get here only if the support
2618     is in the binary; otherwise a compile-time error occurs. */
2619 
2620     case OP_EXTUNI:
2621     if (eptr >= md->end_subject)
2622       {
2623       SCHECK_PARTIAL();
2624       RRETURN(MATCH_NOMATCH);
2625       }
2626     else
2627       {
2628       int lgb, rgb;
2629       GETCHARINCTEST(c, eptr);
2630       lgb = UCD_GRAPHBREAK(c);
2631       while (eptr < md->end_subject)
2632         {
2633         int len = 1;
2634         if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2635         rgb = UCD_GRAPHBREAK(c);
2636         if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2637         lgb = rgb;
2638         eptr += len;
2639         }
2640       }
2641     CHECK_PARTIAL();
2642     ecode++;
2643     break;
2644 #endif  /* SUPPORT_UCP */
2645 
2646 
2647     /* Match a back reference, possibly repeatedly. Look past the end of the
2648     item to see if there is repeat information following. The code is similar
2649     to that for character classes, but repeated for efficiency. Then obey
2650     similar code to character type repeats - written out again for speed.
2651     However, if the referenced string is the empty string, always treat
2652     it as matched, any number of times (otherwise there could be infinite
2653     loops). */
2654 
2655     case OP_REF:
2656     case OP_REFI:
2657     caseless = op == OP_REFI;
2658     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2659     ecode += 1 + IMM2_SIZE;
2660 
2661     /* If the reference is unset, there are two possibilities:
2662 
2663     (a) In the default, Perl-compatible state, set the length negative;
2664     this ensures that every attempt at a match fails. We can't just fail
2665     here, because of the possibility of quantifiers with zero minima.
2666 
2667     (b) If the JavaScript compatibility flag is set, set the length to zero
2668     so that the back reference matches an empty string.
2669 
2670     Otherwise, set the length to the length of what was matched by the
2671     referenced subpattern. */
2672 
2673     if (offset >= offset_top || md->offset_vector[offset] < 0)
2674       length = (md->jscript_compat)? 0 : -1;
2675     else
2676       length = md->offset_vector[offset+1] - md->offset_vector[offset];
2677 
2678     /* Set up for repetition, or handle the non-repeated case */
2679 
2680     switch (*ecode)
2681       {
2682       case OP_CRSTAR:
2683       case OP_CRMINSTAR:
2684       case OP_CRPLUS:
2685       case OP_CRMINPLUS:
2686       case OP_CRQUERY:
2687       case OP_CRMINQUERY:
2688       c = *ecode++ - OP_CRSTAR;
2689       minimize = (c & 1) != 0;
2690       min = rep_min[c];                 /* Pick up values from tables; */
2691       max = rep_max[c];                 /* zero for max => infinity */
2692       if (max == 0) max = INT_MAX;
2693       break;
2694 
2695       case OP_CRRANGE:
2696       case OP_CRMINRANGE:
2697       minimize = (*ecode == OP_CRMINRANGE);
2698       min = GET2(ecode, 1);
2699       max = GET2(ecode, 1 + IMM2_SIZE);
2700       if (max == 0) max = INT_MAX;
2701       ecode += 1 + 2 * IMM2_SIZE;
2702       break;
2703 
2704       default:               /* No repeat follows */
2705       if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2706         {
2707         if (length == -2) eptr = md->end_subject;   /* Partial match */
2708         CHECK_PARTIAL();
2709         RRETURN(MATCH_NOMATCH);
2710         }
2711       eptr += length;
2712       continue;              /* With the main loop */
2713       }
2714 
2715     /* Handle repeated back references. If the length of the reference is
2716     zero, just continue with the main loop. If the length is negative, it
2717     means the reference is unset in non-Java-compatible mode. If the minimum is
2718     zero, we can continue at the same level without recursion. For any other
2719     minimum, carrying on will result in NOMATCH. */
2720 
2721     if (length == 0) continue;
2722     if (length < 0 && min == 0) continue;
2723 
2724     /* First, ensure the minimum number of matches are present. We get back
2725     the length of the reference string explicitly rather than passing the
2726     address of eptr, so that eptr can be a register variable. */
2727 
2728     for (i = 1; i <= min; i++)
2729       {
2730       int slength;
2731       if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2732         {
2733         if (slength == -2) eptr = md->end_subject;   /* Partial match */
2734         CHECK_PARTIAL();
2735         RRETURN(MATCH_NOMATCH);
2736         }
2737       eptr += slength;
2738       }
2739 
2740     /* If min = max, continue at the same level without recursion.
2741     They are not both allowed to be zero. */
2742 
2743     if (min == max) continue;
2744 
2745     /* If minimizing, keep trying and advancing the pointer */
2746 
2747     if (minimize)
2748       {
2749       for (fi = min;; fi++)
2750         {
2751         int slength;
2752         RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2753         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2754         if (fi >= max) RRETURN(MATCH_NOMATCH);
2755         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2756           {
2757           if (slength == -2) eptr = md->end_subject;   /* Partial match */
2758           CHECK_PARTIAL();
2759           RRETURN(MATCH_NOMATCH);
2760           }
2761         eptr += slength;
2762         }
2763       /* Control never gets here */
2764       }
2765 
2766     /* If maximizing, find the longest string and work backwards */
2767 
2768     else
2769       {
2770       pp = eptr;
2771       for (i = min; i < max; i++)
2772         {
2773         int slength;
2774         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2775           {
2776           /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2777           the soft partial matching case. */
2778 
2779           if (slength == -2 && md->partial != 0 &&
2780               md->end_subject > md->start_used_ptr)
2781             {
2782             md->hitend = TRUE;
2783             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2784             }
2785           break;
2786           }
2787         eptr += slength;
2788         }
2789 
2790       while (eptr >= pp)
2791         {
2792         RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2793         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2794         eptr -= length;
2795         }
2796       RRETURN(MATCH_NOMATCH);
2797       }
2798     /* Control never gets here */
2799 
2800     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2801     used when all the characters in the class have values in the range 0-255,
2802     and either the matching is caseful, or the characters are in the range
2803     0-127 when UTF-8 processing is enabled. The only difference between
2804     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2805     encountered.
2806 
2807     First, look past the end of the item to see if there is repeat information
2808     following. Then obey similar code to character type repeats - written out
2809     again for speed. */
2810 
2811     case OP_NCLASS:
2812     case OP_CLASS:
2813       {
2814       /* The data variable is saved across frames, so the byte map needs to
2815       be stored there. */
2816 #define BYTE_MAP ((pcre_uint8 *)data)
2817       data = ecode + 1;                /* Save for matching */
2818       ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2819 
2820       switch (*ecode)
2821         {
2822         case OP_CRSTAR:
2823         case OP_CRMINSTAR:
2824         case OP_CRPLUS:
2825         case OP_CRMINPLUS:
2826         case OP_CRQUERY:
2827         case OP_CRMINQUERY:
2828         c = *ecode++ - OP_CRSTAR;
2829         minimize = (c & 1) != 0;
2830         min = rep_min[c];                 /* Pick up values from tables; */
2831         max = rep_max[c];                 /* zero for max => infinity */
2832         if (max == 0) max = INT_MAX;
2833         break;
2834 
2835         case OP_CRRANGE:
2836         case OP_CRMINRANGE:
2837         minimize = (*ecode == OP_CRMINRANGE);
2838         min = GET2(ecode, 1);
2839         max = GET2(ecode, 1 + IMM2_SIZE);
2840         if (max == 0) max = INT_MAX;
2841         ecode += 1 + 2 * IMM2_SIZE;
2842         break;
2843 
2844         default:               /* No repeat follows */
2845         min = max = 1;
2846         break;
2847         }
2848 
2849       /* First, ensure the minimum number of matches are present. */
2850 
2851 #ifdef SUPPORT_UTF
2852       if (utf)
2853         {
2854         for (i = 1; i <= min; i++)
2855           {
2856           if (eptr >= md->end_subject)
2857             {
2858             SCHECK_PARTIAL();
2859             RRETURN(MATCH_NOMATCH);
2860             }
2861           GETCHARINC(c, eptr);
2862           if (c > 255)
2863             {
2864             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2865             }
2866           else
2867             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2868           }
2869         }
2870       else
2871 #endif
2872       /* Not UTF mode */
2873         {
2874         for (i = 1; i <= min; i++)
2875           {
2876           if (eptr >= md->end_subject)
2877             {
2878             SCHECK_PARTIAL();
2879             RRETURN(MATCH_NOMATCH);
2880             }
2881           c = *eptr++;
2882 #ifndef COMPILE_PCRE8
2883           if (c > 255)
2884             {
2885             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2886             }
2887           else
2888 #endif
2889             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2890           }
2891         }
2892 
2893       /* If max == min we can continue with the main loop without the
2894       need to recurse. */
2895 
2896       if (min == max) continue;
2897 
2898       /* If minimizing, keep testing the rest of the expression and advancing
2899       the pointer while it matches the class. */
2900 
2901       if (minimize)
2902         {
2903 #ifdef SUPPORT_UTF
2904         if (utf)
2905           {
2906           for (fi = min;; fi++)
2907             {
2908             RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2909             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2910             if (fi >= max) RRETURN(MATCH_NOMATCH);
2911             if (eptr >= md->end_subject)
2912               {
2913               SCHECK_PARTIAL();
2914               RRETURN(MATCH_NOMATCH);
2915               }
2916             GETCHARINC(c, eptr);
2917             if (c > 255)
2918               {
2919               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2920               }
2921             else
2922               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2923             }
2924           }
2925         else
2926 #endif
2927         /* Not UTF mode */
2928           {
2929           for (fi = min;; fi++)
2930             {
2931             RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
2932             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2933             if (fi >= max) RRETURN(MATCH_NOMATCH);
2934             if (eptr >= md->end_subject)
2935               {
2936               SCHECK_PARTIAL();
2937               RRETURN(MATCH_NOMATCH);
2938               }
2939             c = *eptr++;
2940 #ifndef COMPILE_PCRE8
2941             if (c > 255)
2942               {
2943               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2944               }
2945             else
2946 #endif
2947               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2948             }
2949           }
2950         /* Control never gets here */
2951         }
2952 
2953       /* If maximizing, find the longest possible run, then work backwards. */
2954 
2955       else
2956         {
2957         pp = eptr;
2958 
2959 #ifdef SUPPORT_UTF
2960         if (utf)
2961           {
2962           for (i = min; i < max; i++)
2963             {
2964             int len = 1;
2965             if (eptr >= md->end_subject)
2966               {
2967               SCHECK_PARTIAL();
2968               break;
2969               }
2970             GETCHARLEN(c, eptr, len);
2971             if (c > 255)
2972               {
2973               if (op == OP_CLASS) break;
2974               }
2975             else
2976               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
2977             eptr += len;
2978             }
2979           for (;;)
2980             {
2981             RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
2982             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2983             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2984             BACKCHAR(eptr);
2985             }
2986           }
2987         else
2988 #endif
2989           /* Not UTF mode */
2990           {
2991           for (i = min; i < max; i++)
2992             {
2993             if (eptr >= md->end_subject)
2994               {
2995               SCHECK_PARTIAL();
2996               break;
2997               }
2998             c = *eptr;
2999 #ifndef COMPILE_PCRE8
3000             if (c > 255)
3001               {
3002               if (op == OP_CLASS) break;
3003               }
3004             else
3005 #endif
3006               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3007             eptr++;
3008             }
3009           while (eptr >= pp)
3010             {
3011             RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
3012             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3013             eptr--;
3014             }
3015           }
3016 
3017         RRETURN(MATCH_NOMATCH);
3018         }
3019 #undef BYTE_MAP
3020       }
3021     /* Control never gets here */
3022 
3023 
3024     /* Match an extended character class. This opcode is encountered only
3025     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
3026     mode, because Unicode properties are supported in non-UTF-8 mode. */
3027 
3028 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3029     case OP_XCLASS:
3030       {
3031       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
3032       ecode += GET(ecode, 1);                      /* Advance past the item */
3033 
3034       switch (*ecode)
3035         {
3036         case OP_CRSTAR:
3037         case OP_CRMINSTAR:
3038         case OP_CRPLUS:
3039         case OP_CRMINPLUS:
3040         case OP_CRQUERY:
3041         case OP_CRMINQUERY:
3042         c = *ecode++ - OP_CRSTAR;
3043         minimize = (c & 1) != 0;
3044         min = rep_min[c];                 /* Pick up values from tables; */
3045         max = rep_max[c];                 /* zero for max => infinity */
3046         if (max == 0) max = INT_MAX;
3047         break;
3048 
3049         case OP_CRRANGE:
3050         case OP_CRMINRANGE:
3051         minimize = (*ecode == OP_CRMINRANGE);
3052         min = GET2(ecode, 1);
3053         max = GET2(ecode, 1 + IMM2_SIZE);
3054         if (max == 0) max = INT_MAX;
3055         ecode += 1 + 2 * IMM2_SIZE;
3056         break;
3057 
3058         default:               /* No repeat follows */
3059         min = max = 1;
3060         break;
3061         }
3062 
3063       /* First, ensure the minimum number of matches are present. */
3064 
3065       for (i = 1; i <= min; i++)
3066         {
3067         if (eptr >= md->end_subject)
3068           {
3069           SCHECK_PARTIAL();
3070           RRETURN(MATCH_NOMATCH);
3071           }
3072         GETCHARINCTEST(c, eptr);
3073         if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3074         }
3075 
3076       /* If max == min we can continue with the main loop without the
3077       need to recurse. */
3078 
3079       if (min == max) continue;
3080 
3081       /* If minimizing, keep testing the rest of the expression and advancing
3082       the pointer while it matches the class. */
3083 
3084       if (minimize)
3085         {
3086         for (fi = min;; fi++)
3087           {
3088           RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3089           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3090           if (fi >= max) RRETURN(MATCH_NOMATCH);
3091           if (eptr >= md->end_subject)
3092             {
3093             SCHECK_PARTIAL();
3094             RRETURN(MATCH_NOMATCH);
3095             }
3096           GETCHARINCTEST(c, eptr);
3097           if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3098           }
3099         /* Control never gets here */
3100         }
3101 
3102       /* If maximizing, find the longest possible run, then work backwards. */
3103 
3104       else
3105         {
3106         pp = eptr;
3107         for (i = min; i < max; i++)
3108           {
3109           int len = 1;
3110           if (eptr >= md->end_subject)
3111             {
3112             SCHECK_PARTIAL();
3113             break;
3114             }
3115 #ifdef SUPPORT_UTF
3116           GETCHARLENTEST(c, eptr, len);
3117 #else
3118           c = *eptr;
3119 #endif
3120           if (!PRIV(xclass)(c, data, utf)) break;
3121           eptr += len;
3122           }
3123         for(;;)
3124           {
3125           RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3126           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3127           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3128 #ifdef SUPPORT_UTF
3129           if (utf) BACKCHAR(eptr);
3130 #endif
3131           }
3132         RRETURN(MATCH_NOMATCH);
3133         }
3134 
3135       /* Control never gets here */
3136       }
3137 #endif    /* End of XCLASS */
3138 
3139     /* Match a single character, casefully */
3140 
3141     case OP_CHAR:
3142 #ifdef SUPPORT_UTF
3143     if (utf)
3144       {
3145       length = 1;
3146       ecode++;
3147       GETCHARLEN(fc, ecode, length);
3148       if (length > md->end_subject - eptr)
3149         {
3150         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
3151         RRETURN(MATCH_NOMATCH);
3152         }
3153       while (length-- > 0) if (*ecode++ != RAWUCHARINC(eptr)) RRETURN(MATCH_NOMATCH);
3154       }
3155     else
3156 #endif
3157     /* Not UTF mode */
3158       {
3159       if (md->end_subject - eptr < 1)
3160         {
3161         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3162         RRETURN(MATCH_NOMATCH);
3163         }
3164       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3165       ecode += 2;
3166       }
3167     break;
3168 
3169     /* Match a single character, caselessly. If we are at the end of the
3170     subject, give up immediately. */
3171 
3172     case OP_CHARI:
3173     if (eptr >= md->end_subject)
3174       {
3175       SCHECK_PARTIAL();
3176       RRETURN(MATCH_NOMATCH);
3177       }
3178 
3179 #ifdef SUPPORT_UTF
3180     if (utf)
3181       {
3182       length = 1;
3183       ecode++;
3184       GETCHARLEN(fc, ecode, length);
3185 
3186       /* If the pattern character's value is < 128, we have only one byte, and
3187       we know that its other case must also be one byte long, so we can use the
3188       fast lookup table. We know that there is at least one byte left in the
3189       subject. */
3190 
3191       if (fc < 128)
3192         {
3193         pcre_uchar cc = RAWUCHAR(eptr);
3194         if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3195         ecode++;
3196         eptr++;
3197         }
3198 
3199       /* Otherwise we must pick up the subject character. Note that we cannot
3200       use the value of "length" to check for sufficient bytes left, because the
3201       other case of the character may have more or fewer bytes.  */
3202 
3203       else
3204         {
3205         pcre_uint32 dc;
3206         GETCHARINC(dc, eptr);
3207         ecode += length;
3208 
3209         /* If we have Unicode property support, we can use it to test the other
3210         case of the character, if there is one. */
3211 
3212         if (fc != dc)
3213           {
3214 #ifdef SUPPORT_UCP
3215           if (dc != UCD_OTHERCASE(fc))
3216 #endif
3217             RRETURN(MATCH_NOMATCH);
3218           }
3219         }
3220       }
3221     else
3222 #endif   /* SUPPORT_UTF */
3223 
3224     /* Not UTF mode */
3225       {
3226       if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3227           != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3228       eptr++;
3229       ecode += 2;
3230       }
3231     break;
3232 
3233     /* Match a single character repeatedly. */
3234 
3235     case OP_EXACT:
3236     case OP_EXACTI:
3237     min = max = GET2(ecode, 1);
3238     ecode += 1 + IMM2_SIZE;
3239     goto REPEATCHAR;
3240 
3241     case OP_POSUPTO:
3242     case OP_POSUPTOI:
3243     possessive = TRUE;
3244     /* Fall through */
3245 
3246     case OP_UPTO:
3247     case OP_UPTOI:
3248     case OP_MINUPTO:
3249     case OP_MINUPTOI:
3250     min = 0;
3251     max = GET2(ecode, 1);
3252     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3253     ecode += 1 + IMM2_SIZE;
3254     goto REPEATCHAR;
3255 
3256     case OP_POSSTAR:
3257     case OP_POSSTARI:
3258     possessive = TRUE;
3259     min = 0;
3260     max = INT_MAX;
3261     ecode++;
3262     goto REPEATCHAR;
3263 
3264     case OP_POSPLUS:
3265     case OP_POSPLUSI:
3266     possessive = TRUE;
3267     min = 1;
3268     max = INT_MAX;
3269     ecode++;
3270     goto REPEATCHAR;
3271 
3272     case OP_POSQUERY:
3273     case OP_POSQUERYI:
3274     possessive = TRUE;
3275     min = 0;
3276     max = 1;
3277     ecode++;
3278     goto REPEATCHAR;
3279 
3280     case OP_STAR:
3281     case OP_STARI:
3282     case OP_MINSTAR:
3283     case OP_MINSTARI:
3284     case OP_PLUS:
3285     case OP_PLUSI:
3286     case OP_MINPLUS:
3287     case OP_MINPLUSI:
3288     case OP_QUERY:
3289     case OP_QUERYI:
3290     case OP_MINQUERY:
3291     case OP_MINQUERYI:
3292     c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3293     minimize = (c & 1) != 0;
3294     min = rep_min[c];                 /* Pick up values from tables; */
3295     max = rep_max[c];                 /* zero for max => infinity */
3296     if (max == 0) max = INT_MAX;
3297 
3298     /* Common code for all repeated single-character matches. */
3299 
3300     REPEATCHAR:
3301 #ifdef SUPPORT_UTF
3302     if (utf)
3303       {
3304       length = 1;
3305       charptr = ecode;
3306       GETCHARLEN(fc, ecode, length);
3307       ecode += length;
3308 
3309       /* Handle multibyte character matching specially here. There is
3310       support for caseless matching if UCP support is present. */
3311 
3312       if (length > 1)
3313         {
3314 #ifdef SUPPORT_UCP
3315         pcre_uint32 othercase;
3316         if (op >= OP_STARI &&     /* Caseless */
3317             (othercase = UCD_OTHERCASE(fc)) != fc)
3318           oclength = PRIV(ord2utf)(othercase, occhars);
3319         else oclength = 0;
3320 #endif  /* SUPPORT_UCP */
3321 
3322         for (i = 1; i <= min; i++)
3323           {
3324           if (eptr <= md->end_subject - length &&
3325             memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3326 #ifdef SUPPORT_UCP
3327           else if (oclength > 0 &&
3328                    eptr <= md->end_subject - oclength &&
3329                    memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3330 #endif  /* SUPPORT_UCP */
3331           else
3332             {
3333             CHECK_PARTIAL();
3334             RRETURN(MATCH_NOMATCH);
3335             }
3336           }
3337 
3338         if (min == max) continue;
3339 
3340         if (minimize)
3341           {
3342           for (fi = min;; fi++)
3343             {
3344             RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3345             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3346             if (fi >= max) RRETURN(MATCH_NOMATCH);
3347             if (eptr <= md->end_subject - length &&
3348               memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3349 #ifdef SUPPORT_UCP
3350             else if (oclength > 0 &&
3351                      eptr <= md->end_subject - oclength &&
3352                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3353 #endif  /* SUPPORT_UCP */
3354             else
3355               {
3356               CHECK_PARTIAL();
3357               RRETURN(MATCH_NOMATCH);
3358               }
3359             }
3360           /* Control never gets here */
3361           }
3362 
3363         else  /* Maximize */
3364           {
3365           pp = eptr;
3366           for (i = min; i < max; i++)
3367             {
3368             if (eptr <= md->end_subject - length &&
3369                 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3370 #ifdef SUPPORT_UCP
3371             else if (oclength > 0 &&
3372                      eptr <= md->end_subject - oclength &&
3373                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3374 #endif  /* SUPPORT_UCP */
3375             else
3376               {
3377               CHECK_PARTIAL();
3378               break;
3379               }
3380             }
3381 
3382           if (possessive) continue;
3383 
3384           for(;;)
3385             {
3386             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3387             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3388             if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
3389 #ifdef SUPPORT_UCP
3390             eptr--;
3391             BACKCHAR(eptr);
3392 #else   /* without SUPPORT_UCP */
3393             eptr -= length;
3394 #endif  /* SUPPORT_UCP */
3395             }
3396           }
3397         /* Control never gets here */
3398         }
3399 
3400       /* If the length of a UTF-8 character is 1, we fall through here, and
3401       obey the code as for non-UTF-8 characters below, though in this case the
3402       value of fc will always be < 128. */
3403       }
3404     else
3405 #endif  /* SUPPORT_UTF */
3406       /* When not in UTF-8 mode, load a single-byte character. */
3407       fc = *ecode++;
3408 
3409     /* The value of fc at this point is always one character, though we may
3410     or may not be in UTF mode. The code is duplicated for the caseless and
3411     caseful cases, for speed, since matching characters is likely to be quite
3412     common. First, ensure the minimum number of matches are present. If min =
3413     max, continue at the same level without recursing. Otherwise, if
3414     minimizing, keep trying the rest of the expression and advancing one
3415     matching character if failing, up to the maximum. Alternatively, if
3416     maximizing, find the maximum number of characters and work backwards. */
3417 
3418     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3419       max, (char *)eptr));
3420 
3421     if (op >= OP_STARI)  /* Caseless */
3422       {
3423 #ifdef COMPILE_PCRE8
3424       /* fc must be < 128 if UTF is enabled. */
3425       foc = md->fcc[fc];
3426 #else
3427 #ifdef SUPPORT_UTF
3428 #ifdef SUPPORT_UCP
3429       if (utf && fc > 127)
3430         foc = UCD_OTHERCASE(fc);
3431 #else
3432       if (utf && fc > 127)
3433         foc = fc;
3434 #endif /* SUPPORT_UCP */
3435       else
3436 #endif /* SUPPORT_UTF */
3437         foc = TABLE_GET(fc, md->fcc, fc);
3438 #endif /* COMPILE_PCRE8 */
3439 
3440       for (i = 1; i <= min; i++)
3441         {
3442         pcre_uchar cc;
3443 
3444         if (eptr >= md->end_subject)
3445           {
3446           SCHECK_PARTIAL();
3447           RRETURN(MATCH_NOMATCH);
3448           }
3449         cc = RAWUCHARTEST(eptr);
3450         if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3451         eptr++;
3452         }
3453       if (min == max) continue;
3454       if (minimize)
3455         {
3456         for (fi = min;; fi++)
3457           {
3458           pcre_uchar cc;
3459 
3460           RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3461           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3462           if (fi >= max) RRETURN(MATCH_NOMATCH);
3463           if (eptr >= md->end_subject)
3464             {
3465             SCHECK_PARTIAL();
3466             RRETURN(MATCH_NOMATCH);
3467             }
3468           cc = RAWUCHARTEST(eptr);
3469           if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3470           eptr++;
3471           }
3472         /* Control never gets here */
3473         }
3474       else  /* Maximize */
3475         {
3476         pp = eptr;
3477         for (i = min; i < max; i++)
3478           {
3479           pcre_uchar cc;
3480 
3481           if (eptr >= md->end_subject)
3482             {
3483             SCHECK_PARTIAL();
3484             break;
3485             }
3486           cc = RAWUCHARTEST(eptr);
3487           if (fc != cc && foc != cc) break;
3488           eptr++;
3489           }
3490 
3491         if (possessive) continue;
3492 
3493         while (eptr >= pp)
3494           {
3495           RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3496           eptr--;
3497           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3498           }
3499         RRETURN(MATCH_NOMATCH);
3500         }
3501       /* Control never gets here */
3502       }
3503 
3504     /* Caseful comparisons (includes all multi-byte characters) */
3505 
3506     else
3507       {
3508       for (i = 1; i <= min; i++)
3509         {
3510         if (eptr >= md->end_subject)
3511           {
3512           SCHECK_PARTIAL();
3513           RRETURN(MATCH_NOMATCH);
3514           }
3515         if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3516         }
3517 
3518       if (min == max) continue;
3519 
3520       if (minimize)
3521         {
3522         for (fi = min;; fi++)
3523           {
3524           RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3525           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3526           if (fi >= max) RRETURN(MATCH_NOMATCH);
3527           if (eptr >= md->end_subject)
3528             {
3529             SCHECK_PARTIAL();
3530             RRETURN(MATCH_NOMATCH);
3531             }
3532           if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3533           }
3534         /* Control never gets here */
3535         }
3536       else  /* Maximize */
3537         {
3538         pp = eptr;
3539         for (i = min; i < max; i++)
3540           {
3541           if (eptr >= md->end_subject)
3542             {
3543             SCHECK_PARTIAL();
3544             break;
3545             }
3546           if (fc != RAWUCHARTEST(eptr)) break;
3547           eptr++;
3548           }
3549         if (possessive) continue;
3550 
3551         while (eptr >= pp)
3552           {
3553           RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3554           eptr--;
3555           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3556           }
3557         RRETURN(MATCH_NOMATCH);
3558         }
3559       }
3560     /* Control never gets here */
3561 
3562     /* Match a negated single one-byte character. The character we are
3563     checking can be multibyte. */
3564 
3565     case OP_NOT:
3566     case OP_NOTI:
3567     if (eptr >= md->end_subject)
3568       {
3569       SCHECK_PARTIAL();
3570       RRETURN(MATCH_NOMATCH);
3571       }
3572 #ifdef SUPPORT_UTF
3573     if (utf)
3574       {
3575       register pcre_uint32 ch, och;
3576 
3577       ecode++;
3578       GETCHARINC(ch, ecode);
3579       GETCHARINC(c, eptr);
3580 
3581       if (op == OP_NOT)
3582         {
3583         if (ch == c) RRETURN(MATCH_NOMATCH);
3584         }
3585       else
3586         {
3587 #ifdef SUPPORT_UCP
3588         if (ch > 127)
3589           och = UCD_OTHERCASE(ch);
3590 #else
3591         if (ch > 127)
3592           och = ch;
3593 #endif /* SUPPORT_UCP */
3594         else
3595           och = TABLE_GET(ch, md->fcc, ch);
3596         if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3597         }
3598       }
3599     else
3600 #endif
3601       {
3602       register pcre_uint32 ch = ecode[1];
3603       c = *eptr++;
3604       if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3605         RRETURN(MATCH_NOMATCH);
3606       ecode += 2;
3607       }
3608     break;
3609 
3610     /* Match a negated single one-byte character repeatedly. This is almost a
3611     repeat of the code for a repeated single character, but I haven't found a
3612     nice way of commoning these up that doesn't require a test of the
3613     positive/negative option for each character match. Maybe that wouldn't add
3614     very much to the time taken, but character matching *is* what this is all
3615     about... */
3616 
3617     case OP_NOTEXACT:
3618     case OP_NOTEXACTI:
3619     min = max = GET2(ecode, 1);
3620     ecode += 1 + IMM2_SIZE;
3621     goto REPEATNOTCHAR;
3622 
3623     case OP_NOTUPTO:
3624     case OP_NOTUPTOI:
3625     case OP_NOTMINUPTO:
3626     case OP_NOTMINUPTOI:
3627     min = 0;
3628     max = GET2(ecode, 1);
3629     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3630     ecode += 1 + IMM2_SIZE;
3631     goto REPEATNOTCHAR;
3632 
3633     case OP_NOTPOSSTAR:
3634     case OP_NOTPOSSTARI:
3635     possessive = TRUE;
3636     min = 0;
3637     max = INT_MAX;
3638     ecode++;
3639     goto REPEATNOTCHAR;
3640 
3641     case OP_NOTPOSPLUS:
3642     case OP_NOTPOSPLUSI:
3643     possessive = TRUE;
3644     min = 1;
3645     max = INT_MAX;
3646     ecode++;
3647     goto REPEATNOTCHAR;
3648 
3649     case OP_NOTPOSQUERY:
3650     case OP_NOTPOSQUERYI:
3651     possessive = TRUE;
3652     min = 0;
3653     max = 1;
3654     ecode++;
3655     goto REPEATNOTCHAR;
3656 
3657     case OP_NOTPOSUPTO:
3658     case OP_NOTPOSUPTOI:
3659     possessive = TRUE;
3660     min = 0;
3661     max = GET2(ecode, 1);
3662     ecode += 1 + IMM2_SIZE;
3663     goto REPEATNOTCHAR;
3664 
3665     case OP_NOTSTAR:
3666     case OP_NOTSTARI:
3667     case OP_NOTMINSTAR:
3668     case OP_NOTMINSTARI:
3669     case OP_NOTPLUS:
3670     case OP_NOTPLUSI:
3671     case OP_NOTMINPLUS:
3672     case OP_NOTMINPLUSI:
3673     case OP_NOTQUERY:
3674     case OP_NOTQUERYI:
3675     case OP_NOTMINQUERY:
3676     case OP_NOTMINQUERYI:
3677     c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3678     minimize = (c & 1) != 0;
3679     min = rep_min[c];                 /* Pick up values from tables; */
3680     max = rep_max[c];                 /* zero for max => infinity */
3681     if (max == 0) max = INT_MAX;
3682 
3683     /* Common code for all repeated single-byte matches. */
3684 
3685     REPEATNOTCHAR:
3686     GETCHARINCTEST(fc, ecode);
3687 
3688     /* The code is duplicated for the caseless and caseful cases, for speed,
3689     since matching characters is likely to be quite common. First, ensure the
3690     minimum number of matches are present. If min = max, continue at the same
3691     level without recursing. Otherwise, if minimizing, keep trying the rest of
3692     the expression and advancing one matching character if failing, up to the
3693     maximum. Alternatively, if maximizing, find the maximum number of
3694     characters and work backwards. */
3695 
3696     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3697       max, (char *)eptr));
3698 
3699     if (op >= OP_NOTSTARI)     /* Caseless */
3700       {
3701 #ifdef SUPPORT_UTF
3702 #ifdef SUPPORT_UCP
3703       if (utf && fc > 127)
3704         foc = UCD_OTHERCASE(fc);
3705 #else
3706       if (utf && fc > 127)
3707         foc = fc;
3708 #endif /* SUPPORT_UCP */
3709       else
3710 #endif /* SUPPORT_UTF */
3711         foc = TABLE_GET(fc, md->fcc, fc);
3712 
3713 #ifdef SUPPORT_UTF
3714       if (utf)
3715         {
3716         register pcre_uint32 d;
3717         for (i = 1; i <= min; i++)
3718           {
3719           if (eptr >= md->end_subject)
3720             {
3721             SCHECK_PARTIAL();
3722             RRETURN(MATCH_NOMATCH);
3723             }
3724           GETCHARINC(d, eptr);
3725           if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3726           }
3727         }
3728       else
3729 #endif
3730       /* Not UTF mode */
3731         {
3732         for (i = 1; i <= min; i++)
3733           {
3734           if (eptr >= md->end_subject)
3735             {
3736             SCHECK_PARTIAL();
3737             RRETURN(MATCH_NOMATCH);
3738             }
3739           if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3740           eptr++;
3741           }
3742         }
3743 
3744       if (min == max) continue;
3745 
3746       if (minimize)
3747         {
3748 #ifdef SUPPORT_UTF
3749         if (utf)
3750           {
3751           register pcre_uint32 d;
3752           for (fi = min;; fi++)
3753             {
3754             RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3755             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3756             if (fi >= max) RRETURN(MATCH_NOMATCH);
3757             if (eptr >= md->end_subject)
3758               {
3759               SCHECK_PARTIAL();
3760               RRETURN(MATCH_NOMATCH);
3761               }
3762             GETCHARINC(d, eptr);
3763             if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3764             }
3765           }
3766         else
3767 #endif
3768         /* Not UTF mode */
3769           {
3770           for (fi = min;; fi++)
3771             {
3772             RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3773             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3774             if (fi >= max) RRETURN(MATCH_NOMATCH);
3775             if (eptr >= md->end_subject)
3776               {
3777               SCHECK_PARTIAL();
3778               RRETURN(MATCH_NOMATCH);
3779               }
3780             if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3781             eptr++;
3782             }
3783           }
3784         /* Control never gets here */
3785         }
3786 
3787       /* Maximize case */
3788 
3789       else
3790         {
3791         pp = eptr;
3792 
3793 #ifdef SUPPORT_UTF
3794         if (utf)
3795           {
3796           register pcre_uint32 d;
3797           for (i = min; i < max; i++)
3798             {
3799             int len = 1;
3800             if (eptr >= md->end_subject)
3801               {
3802               SCHECK_PARTIAL();
3803               break;
3804               }
3805             GETCHARLEN(d, eptr, len);
3806             if (fc == d || (unsigned int)foc == d) break;
3807             eptr += len;
3808             }
3809           if (possessive) continue;
3810           for(;;)
3811             {
3812             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3813             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3814             if (eptr-- == pp) break;        /* Stop if tried at original pos */
3815             BACKCHAR(eptr);
3816             }
3817           }
3818         else
3819 #endif
3820         /* Not UTF mode */
3821           {
3822           for (i = min; i < max; i++)
3823             {
3824             if (eptr >= md->end_subject)
3825               {
3826               SCHECK_PARTIAL();
3827               break;
3828               }
3829             if (fc == *eptr || foc == *eptr) break;
3830             eptr++;
3831             }
3832           if (possessive) continue;
3833           while (eptr >= pp)
3834             {
3835             RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3836             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3837             eptr--;
3838             }
3839           }
3840 
3841         RRETURN(MATCH_NOMATCH);
3842         }
3843       /* Control never gets here */
3844       }
3845 
3846     /* Caseful comparisons */
3847 
3848     else
3849       {
3850 #ifdef SUPPORT_UTF
3851       if (utf)
3852         {
3853         register pcre_uint32 d;
3854         for (i = 1; i <= min; i++)
3855           {
3856           if (eptr >= md->end_subject)
3857             {
3858             SCHECK_PARTIAL();
3859             RRETURN(MATCH_NOMATCH);
3860             }
3861           GETCHARINC(d, eptr);
3862           if (fc == d) RRETURN(MATCH_NOMATCH);
3863           }
3864         }
3865       else
3866 #endif
3867       /* Not UTF mode */
3868         {
3869         for (i = 1; i <= min; i++)
3870           {
3871           if (eptr >= md->end_subject)
3872             {
3873             SCHECK_PARTIAL();
3874             RRETURN(MATCH_NOMATCH);
3875             }
3876           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3877           }
3878         }
3879 
3880       if (min == max) continue;
3881 
3882       if (minimize)
3883         {
3884 #ifdef SUPPORT_UTF
3885         if (utf)
3886           {
3887           register pcre_uint32 d;
3888           for (fi = min;; fi++)
3889             {
3890             RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3891             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3892             if (fi >= max) RRETURN(MATCH_NOMATCH);
3893             if (eptr >= md->end_subject)
3894               {
3895               SCHECK_PARTIAL();
3896               RRETURN(MATCH_NOMATCH);
3897               }
3898             GETCHARINC(d, eptr);
3899             if (fc == d) RRETURN(MATCH_NOMATCH);
3900             }
3901           }
3902         else
3903 #endif
3904         /* Not UTF mode */
3905           {
3906           for (fi = min;; fi++)
3907             {
3908             RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
3909             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3910             if (fi >= max) RRETURN(MATCH_NOMATCH);
3911             if (eptr >= md->end_subject)
3912               {
3913               SCHECK_PARTIAL();
3914               RRETURN(MATCH_NOMATCH);
3915               }
3916             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3917             }
3918           }
3919         /* Control never gets here */
3920         }
3921 
3922       /* Maximize case */
3923 
3924       else
3925         {
3926         pp = eptr;
3927 
3928 #ifdef SUPPORT_UTF
3929         if (utf)
3930           {
3931           register pcre_uint32 d;
3932           for (i = min; i < max; i++)
3933             {
3934             int len = 1;
3935             if (eptr >= md->end_subject)
3936               {
3937               SCHECK_PARTIAL();
3938               break;
3939               }
3940             GETCHARLEN(d, eptr, len);
3941             if (fc == d) break;
3942             eptr += len;
3943             }
3944           if (possessive) continue;
3945           for(;;)
3946             {
3947             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
3948             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3949             if (eptr-- == pp) break;        /* Stop if tried at original pos */
3950             BACKCHAR(eptr);
3951             }
3952           }
3953         else
3954 #endif
3955         /* Not UTF mode */
3956           {
3957           for (i = min; i < max; i++)
3958             {
3959             if (eptr >= md->end_subject)
3960               {
3961               SCHECK_PARTIAL();
3962               break;
3963               }
3964             if (fc == *eptr) break;
3965             eptr++;
3966             }
3967           if (possessive) continue;
3968           while (eptr >= pp)
3969             {
3970             RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
3971             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3972             eptr--;
3973             }
3974           }
3975 
3976         RRETURN(MATCH_NOMATCH);
3977         }
3978       }
3979     /* Control never gets here */
3980 
3981     /* Match a single character type repeatedly; several different opcodes
3982     share code. This is very similar to the code for single characters, but we
3983     repeat it in the interests of efficiency. */
3984 
3985     case OP_TYPEEXACT:
3986     min = max = GET2(ecode, 1);
3987     minimize = TRUE;
3988     ecode += 1 + IMM2_SIZE;
3989     goto REPEATTYPE;
3990 
3991     case OP_TYPEUPTO:
3992     case OP_TYPEMINUPTO:
3993     min = 0;
3994     max = GET2(ecode, 1);
3995     minimize = *ecode == OP_TYPEMINUPTO;
3996     ecode += 1 + IMM2_SIZE;
3997     goto REPEATTYPE;
3998 
3999     case OP_TYPEPOSSTAR:
4000     possessive = TRUE;
4001     min = 0;
4002     max = INT_MAX;
4003     ecode++;
4004     goto REPEATTYPE;
4005 
4006     case OP_TYPEPOSPLUS:
4007     possessive = TRUE;
4008     min = 1;
4009     max = INT_MAX;
4010     ecode++;
4011     goto REPEATTYPE;
4012 
4013     case OP_TYPEPOSQUERY:
4014     possessive = TRUE;
4015     min = 0;
4016     max = 1;
4017     ecode++;
4018     goto REPEATTYPE;
4019 
4020     case OP_TYPEPOSUPTO:
4021     possessive = TRUE;
4022     min = 0;
4023     max = GET2(ecode, 1);
4024     ecode += 1 + IMM2_SIZE;
4025     goto REPEATTYPE;
4026 
4027     case OP_TYPESTAR:
4028     case OP_TYPEMINSTAR:
4029     case OP_TYPEPLUS:
4030     case OP_TYPEMINPLUS:
4031     case OP_TYPEQUERY:
4032     case OP_TYPEMINQUERY:
4033     c = *ecode++ - OP_TYPESTAR;
4034     minimize = (c & 1) != 0;
4035     min = rep_min[c];                 /* Pick up values from tables; */
4036     max = rep_max[c];                 /* zero for max => infinity */
4037     if (max == 0) max = INT_MAX;
4038 
4039     /* Common code for all repeated single character type matches. Note that
4040     in UTF-8 mode, '.' matches a character of any length, but for the other
4041     character types, the valid characters are all one-byte long. */
4042 
4043     REPEATTYPE:
4044     ctype = *ecode++;      /* Code for the character type */
4045 
4046 #ifdef SUPPORT_UCP
4047     if (ctype == OP_PROP || ctype == OP_NOTPROP)
4048       {
4049       prop_fail_result = ctype == OP_NOTPROP;
4050       prop_type = *ecode++;
4051       prop_value = *ecode++;
4052       }
4053     else prop_type = -1;
4054 #endif
4055 
4056     /* First, ensure the minimum number of matches are present. Use inline
4057     code for maximizing the speed, and do the type test once at the start
4058     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4059     is tidier. Also separate the UCP code, which can be the same for both UTF-8
4060     and single-bytes. */
4061 
4062     if (min > 0)
4063       {
4064 #ifdef SUPPORT_UCP
4065       if (prop_type >= 0)
4066         {
4067         switch(prop_type)
4068           {
4069           case PT_ANY:
4070           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4071           for (i = 1; i <= min; i++)
4072             {
4073             if (eptr >= md->end_subject)
4074               {
4075               SCHECK_PARTIAL();
4076               RRETURN(MATCH_NOMATCH);
4077               }
4078             GETCHARINCTEST(c, eptr);
4079             }
4080           break;
4081 
4082           case PT_LAMP:
4083           for (i = 1; i <= min; i++)
4084             {
4085             int chartype;
4086             if (eptr >= md->end_subject)
4087               {
4088               SCHECK_PARTIAL();
4089               RRETURN(MATCH_NOMATCH);
4090               }
4091             GETCHARINCTEST(c, eptr);
4092             chartype = UCD_CHARTYPE(c);
4093             if ((chartype == ucp_Lu ||
4094                  chartype == ucp_Ll ||
4095                  chartype == ucp_Lt) == prop_fail_result)
4096               RRETURN(MATCH_NOMATCH);
4097             }
4098           break;
4099 
4100           case PT_GC:
4101           for (i = 1; i <= min; i++)
4102             {
4103             if (eptr >= md->end_subject)
4104               {
4105               SCHECK_PARTIAL();
4106               RRETURN(MATCH_NOMATCH);
4107               }
4108             GETCHARINCTEST(c, eptr);
4109             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4110               RRETURN(MATCH_NOMATCH);
4111             }
4112           break;
4113 
4114           case PT_PC:
4115           for (i = 1; i <= min; i++)
4116             {
4117             if (eptr >= md->end_subject)
4118               {
4119               SCHECK_PARTIAL();
4120               RRETURN(MATCH_NOMATCH);
4121               }
4122             GETCHARINCTEST(c, eptr);
4123             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4124               RRETURN(MATCH_NOMATCH);
4125             }
4126           break;
4127 
4128           case PT_SC:
4129           for (i = 1; i <= min; i++)
4130             {
4131             if (eptr >= md->end_subject)
4132               {
4133               SCHECK_PARTIAL();
4134               RRETURN(MATCH_NOMATCH);
4135               }
4136             GETCHARINCTEST(c, eptr);
4137             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4138               RRETURN(MATCH_NOMATCH);
4139             }
4140           break;
4141 
4142           case PT_ALNUM:
4143           for (i = 1; i <= min; i++)
4144             {
4145             int category;
4146             if (eptr >= md->end_subject)
4147               {
4148               SCHECK_PARTIAL();
4149               RRETURN(MATCH_NOMATCH);
4150               }
4151             GETCHARINCTEST(c, eptr);
4152             category = UCD_CATEGORY(c);
4153             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4154               RRETURN(MATCH_NOMATCH);
4155             }
4156           break;
4157 
4158           case PT_SPACE:    /* Perl space */
4159           for (i = 1; i <= min; i++)
4160             {
4161             if (eptr >= md->end_subject)
4162               {
4163               SCHECK_PARTIAL();
4164               RRETURN(MATCH_NOMATCH);
4165               }
4166             GETCHARINCTEST(c, eptr);
4167             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4168                  c == CHAR_FF || c == CHAR_CR)
4169                    == prop_fail_result)
4170               RRETURN(MATCH_NOMATCH);
4171             }
4172           break;
4173 
4174           case PT_PXSPACE:  /* POSIX space */
4175           for (i = 1; i <= min; i++)
4176             {
4177             if (eptr >= md->end_subject)
4178               {
4179               SCHECK_PARTIAL();
4180               RRETURN(MATCH_NOMATCH);
4181               }
4182             GETCHARINCTEST(c, eptr);
4183             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4184                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4185                    == prop_fail_result)
4186               RRETURN(MATCH_NOMATCH);
4187             }
4188           break;
4189 
4190           case PT_WORD:
4191           for (i = 1; i <= min; i++)
4192             {
4193             int category;
4194             if (eptr >= md->end_subject)
4195               {
4196               SCHECK_PARTIAL();
4197               RRETURN(MATCH_NOMATCH);
4198               }
4199             GETCHARINCTEST(c, eptr);
4200             category = UCD_CATEGORY(c);
4201             if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4202                    == prop_fail_result)
4203               RRETURN(MATCH_NOMATCH);
4204             }
4205           break;
4206 
4207           case PT_CLIST:
4208           for (i = 1; i <= min; i++)
4209             {
4210             const pcre_uint32 *cp;
4211             if (eptr >= md->end_subject)
4212               {
4213               SCHECK_PARTIAL();
4214               RRETURN(MATCH_NOMATCH);
4215               }
4216             GETCHARINCTEST(c, eptr);
4217             cp = PRIV(ucd_caseless_sets) + prop_value;
4218             for (;;)
4219               {
4220               if (c < *cp)
4221                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4222               if (c == *cp++)
4223                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4224               }
4225             }
4226           break;
4227 
4228           /* This should not occur */
4229 
4230           default:
4231           RRETURN(PCRE_ERROR_INTERNAL);
4232           }
4233         }
4234 
4235       /* Match extended Unicode sequences. We will get here only if the
4236       support is in the binary; otherwise a compile-time error occurs. */
4237 
4238       else if (ctype == OP_EXTUNI)
4239         {
4240         for (i = 1; i <= min; i++)
4241           {
4242           if (eptr >= md->end_subject)
4243             {
4244             SCHECK_PARTIAL();
4245             RRETURN(MATCH_NOMATCH);
4246             }
4247           else
4248             {
4249             int lgb, rgb;
4250             GETCHARINCTEST(c, eptr);
4251             lgb = UCD_GRAPHBREAK(c);
4252            while (eptr < md->end_subject)
4253               {
4254               int len = 1;
4255               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4256               rgb = UCD_GRAPHBREAK(c);
4257               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
4258               lgb = rgb;
4259               eptr += len;
4260               }
4261             }
4262           CHECK_PARTIAL();
4263           }
4264         }
4265 
4266       else
4267 #endif     /* SUPPORT_UCP */
4268 
4269 /* Handle all other cases when the coding is UTF-8 */
4270 
4271 #ifdef SUPPORT_UTF
4272       if (utf) switch(ctype)
4273         {
4274         case OP_ANY:
4275         for (i = 1; i <= min; i++)
4276           {
4277           if (eptr >= md->end_subject)
4278             {
4279             SCHECK_PARTIAL();
4280             RRETURN(MATCH_NOMATCH);
4281             }
4282           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4283           if (md->partial != 0 &&
4284               eptr + 1 >= md->end_subject &&
4285               NLBLOCK->nltype == NLTYPE_FIXED &&
4286               NLBLOCK->nllen == 2 &&
4287               RAWUCHAR(eptr) == NLBLOCK->nl[0])
4288             {
4289             md->hitend = TRUE;
4290             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4291             }
4292           eptr++;
4293           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4294           }
4295         break;
4296 
4297         case OP_ALLANY:
4298         for (i = 1; i <= min; i++)
4299           {
4300           if (eptr >= md->end_subject)
4301             {
4302             SCHECK_PARTIAL();
4303             RRETURN(MATCH_NOMATCH);
4304             }
4305           eptr++;
4306           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4307           }
4308         break;
4309 
4310         case OP_ANYBYTE:
4311         if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4312         eptr += min;
4313         break;
4314 
4315         case OP_ANYNL:
4316         for (i = 1; i <= min; i++)
4317           {
4318           if (eptr >= md->end_subject)
4319             {
4320             SCHECK_PARTIAL();
4321             RRETURN(MATCH_NOMATCH);
4322             }
4323           GETCHARINC(c, eptr);
4324           switch(c)
4325             {
4326             default: RRETURN(MATCH_NOMATCH);
4327 
4328             case CHAR_CR:
4329             if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
4330             break;
4331 
4332             case CHAR_LF:
4333             break;
4334 
4335             case CHAR_VT:
4336             case CHAR_FF:
4337             case CHAR_NEL:
4338 #ifndef EBCDIC
4339             case 0x2028:
4340             case 0x2029:
4341 #endif  /* Not EBCDIC */
4342             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4343             break;
4344             }
4345           }
4346         break;
4347 
4348         case OP_NOT_HSPACE:
4349         for (i = 1; i <= min; i++)
4350           {
4351           if (eptr >= md->end_subject)
4352             {
4353             SCHECK_PARTIAL();
4354             RRETURN(MATCH_NOMATCH);
4355             }
4356           GETCHARINC(c, eptr);
4357           switch(c)
4358             {
4359             HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
4360             default: break;
4361             }
4362           }
4363         break;
4364 
4365         case OP_HSPACE:
4366         for (i = 1; i <= min; i++)
4367           {
4368           if (eptr >= md->end_subject)
4369             {
4370             SCHECK_PARTIAL();
4371             RRETURN(MATCH_NOMATCH);
4372             }
4373           GETCHARINC(c, eptr);
4374           switch(c)
4375             {
4376             HSPACE_CASES: break;  /* Byte and multibyte cases */
4377             default: RRETURN(MATCH_NOMATCH);
4378             }
4379           }
4380         break;
4381 
4382         case OP_NOT_VSPACE:
4383         for (i = 1; i <= min; i++)
4384           {
4385           if (eptr >= md->end_subject)
4386             {
4387             SCHECK_PARTIAL();
4388             RRETURN(MATCH_NOMATCH);
4389             }
4390           GETCHARINC(c, eptr);
4391           switch(c)
4392             {
4393             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
4394             default: break;
4395             }
4396           }
4397         break;
4398 
4399         case OP_VSPACE:
4400         for (i = 1; i <= min; i++)
4401           {
4402           if (eptr >= md->end_subject)
4403             {
4404             SCHECK_PARTIAL();
4405             RRETURN(MATCH_NOMATCH);
4406             }
4407           GETCHARINC(c, eptr);
4408           switch(c)
4409             {
4410             VSPACE_CASES: break;
4411             default: RRETURN(MATCH_NOMATCH);
4412             }
4413           }
4414         break;
4415 
4416         case OP_NOT_DIGIT:
4417         for (i = 1; i <= min; i++)
4418           {
4419           if (eptr >= md->end_subject)
4420             {
4421             SCHECK_PARTIAL();
4422             RRETURN(MATCH_NOMATCH);
4423             }
4424           GETCHARINC(c, eptr);
4425           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
4426             RRETURN(MATCH_NOMATCH);
4427           }
4428         break;
4429 
4430         case OP_DIGIT:
4431         for (i = 1; i <= min; i++)
4432           {
4433           pcre_uchar cc;
4434 
4435           if (eptr >= md->end_subject)
4436             {
4437             SCHECK_PARTIAL();
4438             RRETURN(MATCH_NOMATCH);
4439             }
4440           cc = RAWUCHAR(eptr);
4441           if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
4442             RRETURN(MATCH_NOMATCH);
4443           eptr++;
4444           /* No need to skip more bytes - we know it's a 1-byte character */
4445           }
4446         break;
4447 
4448         case OP_NOT_WHITESPACE:
4449         for (i = 1; i <= min; i++)
4450           {
4451           pcre_uchar cc;
4452 
4453           if (eptr >= md->end_subject)
4454             {
4455             SCHECK_PARTIAL();
4456             RRETURN(MATCH_NOMATCH);
4457             }
4458           cc = RAWUCHAR(eptr);
4459           if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
4460             RRETURN(MATCH_NOMATCH);
4461           eptr++;
4462           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4463           }
4464         break;
4465 
4466         case OP_WHITESPACE:
4467         for (i = 1; i <= min; i++)
4468           {
4469           pcre_uchar cc;
4470 
4471           if (eptr >= md->end_subject)
4472             {
4473             SCHECK_PARTIAL();
4474             RRETURN(MATCH_NOMATCH);
4475             }
4476           cc = RAWUCHAR(eptr);
4477           if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
4478             RRETURN(MATCH_NOMATCH);
4479           eptr++;
4480           /* No need to skip more bytes - we know it's a 1-byte character */
4481           }
4482         break;
4483 
4484         case OP_NOT_WORDCHAR:
4485         for (i = 1; i <= min; i++)
4486           {
4487           pcre_uchar cc;
4488 
4489           if (eptr >= md->end_subject)
4490             {
4491             SCHECK_PARTIAL();
4492             RRETURN(MATCH_NOMATCH);
4493             }
4494           cc = RAWUCHAR(eptr);
4495           if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
4496             RRETURN(MATCH_NOMATCH);
4497           eptr++;
4498           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4499           }
4500         break;
4501 
4502         case OP_WORDCHAR:
4503         for (i = 1; i <= min; i++)
4504           {
4505           pcre_uchar cc;
4506 
4507           if (eptr >= md->end_subject)
4508             {
4509             SCHECK_PARTIAL();
4510             RRETURN(MATCH_NOMATCH);
4511             }
4512           cc = RAWUCHAR(eptr);
4513           if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
4514             RRETURN(MATCH_NOMATCH);
4515           eptr++;
4516           /* No need to skip more bytes - we know it's a 1-byte character */
4517           }
4518         break;
4519 
4520         default:
4521         RRETURN(PCRE_ERROR_INTERNAL);
4522         }  /* End switch(ctype) */
4523 
4524       else
4525 #endif     /* SUPPORT_UTF */
4526 
4527       /* Code for the non-UTF-8 case for minimum matching of operators other
4528       than OP_PROP and OP_NOTPROP. */
4529 
4530       switch(ctype)
4531         {
4532         case OP_ANY:
4533         for (i = 1; i <= min; i++)
4534           {
4535           if (eptr >= md->end_subject)
4536             {
4537             SCHECK_PARTIAL();
4538             RRETURN(MATCH_NOMATCH);
4539             }
4540           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4541           if (md->partial != 0 &&
4542               eptr + 1 >= md->end_subject &&
4543               NLBLOCK->nltype == NLTYPE_FIXED &&
4544               NLBLOCK->nllen == 2 &&
4545               *eptr == NLBLOCK->nl[0])
4546             {
4547             md->hitend = TRUE;
4548             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4549             }
4550           eptr++;
4551           }
4552         break;
4553 
4554         case OP_ALLANY:
4555         if (eptr > md->end_subject - min)
4556           {
4557           SCHECK_PARTIAL();
4558           RRETURN(MATCH_NOMATCH);
4559           }
4560         eptr += min;
4561         break;
4562 
4563         case OP_ANYBYTE:
4564         if (eptr > md->end_subject - min)
4565           {
4566           SCHECK_PARTIAL();
4567           RRETURN(MATCH_NOMATCH);
4568           }
4569         eptr += min;
4570         break;
4571 
4572         case OP_ANYNL:
4573         for (i = 1; i <= min; i++)
4574           {
4575           if (eptr >= md->end_subject)
4576             {
4577             SCHECK_PARTIAL();
4578             RRETURN(MATCH_NOMATCH);
4579             }
4580           switch(*eptr++)
4581             {
4582             default: RRETURN(MATCH_NOMATCH);
4583 
4584             case CHAR_CR:
4585             if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
4586             break;
4587 
4588             case CHAR_LF:
4589             break;
4590 
4591             case CHAR_VT:
4592             case CHAR_FF:
4593             case CHAR_NEL:
4594 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4595             case 0x2028:
4596             case 0x2029:
4597 #endif
4598             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4599             break;
4600             }
4601           }
4602         break;
4603 
4604         case OP_NOT_HSPACE:
4605         for (i = 1; i <= min; i++)
4606           {
4607           if (eptr >= md->end_subject)
4608             {
4609             SCHECK_PARTIAL();
4610             RRETURN(MATCH_NOMATCH);
4611             }
4612           switch(*eptr++)
4613             {
4614             default: break;
4615             HSPACE_BYTE_CASES:
4616 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4617             HSPACE_MULTIBYTE_CASES:
4618 #endif
4619             RRETURN(MATCH_NOMATCH);
4620             }
4621           }
4622         break;
4623 
4624         case OP_HSPACE:
4625         for (i = 1; i <= min; i++)
4626           {
4627           if (eptr >= md->end_subject)
4628             {
4629             SCHECK_PARTIAL();
4630             RRETURN(MATCH_NOMATCH);
4631             }
4632           switch(*eptr++)
4633             {
4634             default: RRETURN(MATCH_NOMATCH);
4635             HSPACE_BYTE_CASES:
4636 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4637             HSPACE_MULTIBYTE_CASES:
4638 #endif
4639             break;
4640             }
4641           }
4642         break;
4643 
4644         case OP_NOT_VSPACE:
4645         for (i = 1; i <= min; i++)
4646           {
4647           if (eptr >= md->end_subject)
4648             {
4649             SCHECK_PARTIAL();
4650             RRETURN(MATCH_NOMATCH);
4651             }
4652           switch(*eptr++)
4653             {
4654             VSPACE_BYTE_CASES:
4655 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4656             VSPACE_MULTIBYTE_CASES:
4657 #endif
4658             RRETURN(MATCH_NOMATCH);
4659             default: break;
4660             }
4661           }
4662         break;
4663 
4664         case OP_VSPACE:
4665         for (i = 1; i <= min; i++)
4666           {
4667           if (eptr >= md->end_subject)
4668             {
4669             SCHECK_PARTIAL();
4670             RRETURN(MATCH_NOMATCH);
4671             }
4672           switch(*eptr++)
4673             {
4674             default: RRETURN(MATCH_NOMATCH);
4675             VSPACE_BYTE_CASES:
4676 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4677             VSPACE_MULTIBYTE_CASES:
4678 #endif
4679             break;
4680             }
4681           }
4682         break;
4683 
4684         case OP_NOT_DIGIT:
4685         for (i = 1; i <= min; i++)
4686           {
4687           if (eptr >= md->end_subject)
4688             {
4689             SCHECK_PARTIAL();
4690             RRETURN(MATCH_NOMATCH);
4691             }
4692           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4693             RRETURN(MATCH_NOMATCH);
4694           eptr++;
4695           }
4696         break;
4697 
4698         case OP_DIGIT:
4699         for (i = 1; i <= min; i++)
4700           {
4701           if (eptr >= md->end_subject)
4702             {
4703             SCHECK_PARTIAL();
4704             RRETURN(MATCH_NOMATCH);
4705             }
4706           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4707             RRETURN(MATCH_NOMATCH);
4708           eptr++;
4709           }
4710         break;
4711 
4712         case OP_NOT_WHITESPACE:
4713         for (i = 1; i <= min; i++)
4714           {
4715           if (eptr >= md->end_subject)
4716             {
4717             SCHECK_PARTIAL();
4718             RRETURN(MATCH_NOMATCH);
4719             }
4720           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4721             RRETURN(MATCH_NOMATCH);
4722           eptr++;
4723           }
4724         break;
4725 
4726         case OP_WHITESPACE:
4727         for (i = 1; i <= min; i++)
4728           {
4729           if (eptr >= md->end_subject)
4730             {
4731             SCHECK_PARTIAL();
4732             RRETURN(MATCH_NOMATCH);
4733             }
4734           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4735             RRETURN(MATCH_NOMATCH);
4736           eptr++;
4737           }
4738         break;
4739 
4740         case OP_NOT_WORDCHAR:
4741         for (i = 1; i <= min; i++)
4742           {
4743           if (eptr >= md->end_subject)
4744             {
4745             SCHECK_PARTIAL();
4746             RRETURN(MATCH_NOMATCH);
4747             }
4748           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4749             RRETURN(MATCH_NOMATCH);
4750           eptr++;
4751           }
4752         break;
4753 
4754         case OP_WORDCHAR:
4755         for (i = 1; i <= min; i++)
4756           {
4757           if (eptr >= md->end_subject)
4758             {
4759             SCHECK_PARTIAL();
4760             RRETURN(MATCH_NOMATCH);
4761             }
4762           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4763             RRETURN(MATCH_NOMATCH);
4764           eptr++;
4765           }
4766         break;
4767 
4768         default:
4769         RRETURN(PCRE_ERROR_INTERNAL);
4770         }
4771       }
4772 
4773     /* If min = max, continue at the same level without recursing */
4774 
4775     if (min == max) continue;
4776 
4777     /* If minimizing, we have to test the rest of the pattern before each
4778     subsequent match. Again, separate the UTF-8 case for speed, and also
4779     separate the UCP cases. */
4780 
4781     if (minimize)
4782       {
4783 #ifdef SUPPORT_UCP
4784       if (prop_type >= 0)
4785         {
4786         switch(prop_type)
4787           {
4788           case PT_ANY:
4789           for (fi = min;; fi++)
4790             {
4791             RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4792             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4793             if (fi >= max) RRETURN(MATCH_NOMATCH);
4794             if (eptr >= md->end_subject)
4795               {
4796               SCHECK_PARTIAL();
4797               RRETURN(MATCH_NOMATCH);
4798               }
4799             GETCHARINCTEST(c, eptr);
4800             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4801             }
4802           /* Control never gets here */
4803 
4804           case PT_LAMP:
4805           for (fi = min;; fi++)
4806             {
4807             int chartype;
4808             RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4809             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4810             if (fi >= max) RRETURN(MATCH_NOMATCH);
4811             if (eptr >= md->end_subject)
4812               {
4813               SCHECK_PARTIAL();
4814               RRETURN(MATCH_NOMATCH);
4815               }
4816             GETCHARINCTEST(c, eptr);
4817             chartype = UCD_CHARTYPE(c);
4818             if ((chartype == ucp_Lu ||
4819                  chartype == ucp_Ll ||
4820                  chartype == ucp_Lt) == prop_fail_result)
4821               RRETURN(MATCH_NOMATCH);
4822             }
4823           /* Control never gets here */
4824 
4825           case PT_GC:
4826           for (fi = min;; fi++)
4827             {
4828             RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4829             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4830             if (fi >= max) RRETURN(MATCH_NOMATCH);
4831             if (eptr >= md->end_subject)
4832               {
4833               SCHECK_PARTIAL();
4834               RRETURN(MATCH_NOMATCH);
4835               }
4836             GETCHARINCTEST(c, eptr);
4837             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4838               RRETURN(MATCH_NOMATCH);
4839             }
4840           /* Control never gets here */
4841 
4842           case PT_PC:
4843           for (fi = min;; fi++)
4844             {
4845             RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4846             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4847             if (fi >= max) RRETURN(MATCH_NOMATCH);
4848             if (eptr >= md->end_subject)
4849               {
4850               SCHECK_PARTIAL();
4851               RRETURN(MATCH_NOMATCH);
4852               }
4853             GETCHARINCTEST(c, eptr);
4854             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4855               RRETURN(MATCH_NOMATCH);
4856             }
4857           /* Control never gets here */
4858 
4859           case PT_SC:
4860           for (fi = min;; fi++)
4861             {
4862             RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4863             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4864             if (fi >= max) RRETURN(MATCH_NOMATCH);
4865             if (eptr >= md->end_subject)
4866               {
4867               SCHECK_PARTIAL();
4868               RRETURN(MATCH_NOMATCH);
4869               }
4870             GETCHARINCTEST(c, eptr);
4871             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4872               RRETURN(MATCH_NOMATCH);
4873             }
4874           /* Control never gets here */
4875 
4876           case PT_ALNUM:
4877           for (fi = min;; fi++)
4878             {
4879             int category;
4880             RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4881             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4882             if (fi >= max) RRETURN(MATCH_NOMATCH);
4883             if (eptr >= md->end_subject)
4884               {
4885               SCHECK_PARTIAL();
4886               RRETURN(MATCH_NOMATCH);
4887               }
4888             GETCHARINCTEST(c, eptr);
4889             category = UCD_CATEGORY(c);
4890             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4891               RRETURN(MATCH_NOMATCH);
4892             }
4893           /* Control never gets here */
4894 
4895           case PT_SPACE:    /* Perl space */
4896           for (fi = min;; fi++)
4897             {
4898             RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
4899             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4900             if (fi >= max) RRETURN(MATCH_NOMATCH);
4901             if (eptr >= md->end_subject)
4902               {
4903               SCHECK_PARTIAL();
4904               RRETURN(MATCH_NOMATCH);
4905               }
4906             GETCHARINCTEST(c, eptr);
4907             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4908                  c == CHAR_FF || c == CHAR_CR)
4909                    == prop_fail_result)
4910               RRETURN(MATCH_NOMATCH);
4911             }
4912           /* Control never gets here */
4913 
4914           case PT_PXSPACE:  /* POSIX space */
4915           for (fi = min;; fi++)
4916             {
4917             RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
4918             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4919             if (fi >= max) RRETURN(MATCH_NOMATCH);
4920             if (eptr >= md->end_subject)
4921               {
4922               SCHECK_PARTIAL();
4923               RRETURN(MATCH_NOMATCH);
4924               }
4925             GETCHARINCTEST(c, eptr);
4926             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4927                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4928                    == prop_fail_result)
4929               RRETURN(MATCH_NOMATCH);
4930             }
4931           /* Control never gets here */
4932 
4933           case PT_WORD:
4934           for (fi = min;; fi++)
4935             {
4936             int category;
4937             RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
4938             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4939             if (fi >= max) RRETURN(MATCH_NOMATCH);
4940             if (eptr >= md->end_subject)
4941               {
4942               SCHECK_PARTIAL();
4943               RRETURN(MATCH_NOMATCH);
4944               }
4945             GETCHARINCTEST(c, eptr);
4946             category = UCD_CATEGORY(c);
4947             if ((category == ucp_L ||
4948                  category == ucp_N ||
4949                  c == CHAR_UNDERSCORE)
4950                    == prop_fail_result)
4951               RRETURN(MATCH_NOMATCH);
4952             }
4953           /* Control never gets here */
4954 
4955           case PT_CLIST:
4956           for (fi = min;; fi++)
4957             {
4958             const pcre_uint32 *cp;
4959             RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
4960             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4961             if (fi >= max) RRETURN(MATCH_NOMATCH);
4962             if (eptr >= md->end_subject)
4963               {
4964               SCHECK_PARTIAL();
4965               RRETURN(MATCH_NOMATCH);
4966               }
4967             GETCHARINCTEST(c, eptr);
4968             cp = PRIV(ucd_caseless_sets) + prop_value;
4969             for (;;)
4970               {
4971               if (c < *cp)
4972                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4973               if (c == *cp++)
4974                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4975               }
4976             }
4977           /* Control never gets here */
4978 
4979           /* This should never occur */
4980           default:
4981           RRETURN(PCRE_ERROR_INTERNAL);
4982           }
4983         }
4984 
4985       /* Match extended Unicode sequences. We will get here only if the
4986       support is in the binary; otherwise a compile-time error occurs. */
4987 
4988       else if (ctype == OP_EXTUNI)
4989         {
4990         for (fi = min;; fi++)
4991           {
4992           RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
4993           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4994           if (fi >= max) RRETURN(MATCH_NOMATCH);
4995           if (eptr >= md->end_subject)
4996             {
4997             SCHECK_PARTIAL();
4998             RRETURN(MATCH_NOMATCH);
4999             }
5000           else
5001             {
5002             int lgb, rgb;
5003             GETCHARINCTEST(c, eptr);
5004             lgb = UCD_GRAPHBREAK(c);
5005             while (eptr < md->end_subject)
5006               {
5007               int len = 1;
5008               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5009               rgb = UCD_GRAPHBREAK(c);
5010               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5011               lgb = rgb;
5012               eptr += len;
5013               }
5014             }
5015           CHECK_PARTIAL();
5016           }
5017         }
5018       else
5019 #endif     /* SUPPORT_UCP */
5020 
5021 #ifdef SUPPORT_UTF
5022       if (utf)
5023         {
5024         for (fi = min;; fi++)
5025           {
5026           RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
5027           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5028           if (fi >= max) RRETURN(MATCH_NOMATCH);
5029           if (eptr >= md->end_subject)
5030             {
5031             SCHECK_PARTIAL();
5032             RRETURN(MATCH_NOMATCH);
5033             }
5034           if (ctype == OP_ANY && IS_NEWLINE(eptr))
5035             RRETURN(MATCH_NOMATCH);
5036           GETCHARINC(c, eptr);
5037           switch(ctype)
5038             {
5039             case OP_ANY:               /* This is the non-NL case */
5040             if (md->partial != 0 &&    /* Take care with CRLF partial */
5041                 eptr >= md->end_subject &&
5042                 NLBLOCK->nltype == NLTYPE_FIXED &&
5043                 NLBLOCK->nllen == 2 &&
5044                 c == NLBLOCK->nl[0])
5045               {
5046               md->hitend = TRUE;
5047               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5048               }
5049             break;
5050 
5051             case OP_ALLANY:
5052             case OP_ANYBYTE:
5053             break;
5054 
5055             case OP_ANYNL:
5056             switch(c)
5057               {
5058               default: RRETURN(MATCH_NOMATCH);
5059               case CHAR_CR:
5060               if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
5061               break;
5062 
5063               case CHAR_LF:
5064               break;
5065 
5066               case CHAR_VT:
5067               case CHAR_FF:
5068               case CHAR_NEL:
5069 #ifndef EBCDIC
5070               case 0x2028:
5071               case 0x2029:
5072 #endif  /* Not EBCDIC */
5073               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5074               break;
5075               }
5076             break;
5077 
5078             case OP_NOT_HSPACE:
5079             switch(c)
5080               {
5081               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
5082               default: break;
5083               }
5084             break;
5085 
5086             case OP_HSPACE:
5087             switch(c)
5088               {
5089               HSPACE_CASES: break;
5090               default: RRETURN(MATCH_NOMATCH);
5091               }
5092             break;
5093 
5094             case OP_NOT_VSPACE:
5095             switch(c)
5096               {
5097               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
5098               default: break;
5099               }
5100             break;
5101 
5102             case OP_VSPACE:
5103             switch(c)
5104               {
5105               VSPACE_CASES: break;
5106               default: RRETURN(MATCH_NOMATCH);
5107               }
5108             break;
5109 
5110             case OP_NOT_DIGIT:
5111             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
5112               RRETURN(MATCH_NOMATCH);
5113             break;
5114 
5115             case OP_DIGIT:
5116             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
5117               RRETURN(MATCH_NOMATCH);
5118             break;
5119 
5120             case OP_NOT_WHITESPACE:
5121             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
5122               RRETURN(MATCH_NOMATCH);
5123             break;
5124 
5125             case OP_WHITESPACE:
5126             if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5127               RRETURN(MATCH_NOMATCH);
5128             break;
5129 
5130             case OP_NOT_WORDCHAR:
5131             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
5132               RRETURN(MATCH_NOMATCH);
5133             break;
5134 
5135             case OP_WORDCHAR:
5136             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
5137               RRETURN(MATCH_NOMATCH);
5138             break;
5139 
5140             default:
5141             RRETURN(PCRE_ERROR_INTERNAL);
5142             }
5143           }
5144         }
5145       else
5146 #endif
5147       /* Not UTF mode */
5148         {
5149         for (fi = min;; fi++)
5150           {
5151           RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
5152           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5153           if (fi >= max) RRETURN(MATCH_NOMATCH);
5154           if (eptr >= md->end_subject)
5155             {
5156             SCHECK_PARTIAL();
5157             RRETURN(MATCH_NOMATCH);
5158             }
5159           if (ctype == OP_ANY && IS_NEWLINE(eptr))
5160             RRETURN(MATCH_NOMATCH);
5161           c = *eptr++;
5162           switch(ctype)
5163             {
5164             case OP_ANY:               /* This is the non-NL case */
5165             if (md->partial != 0 &&    /* Take care with CRLF partial */
5166                 eptr >= md->end_subject &&
5167                 NLBLOCK->nltype == NLTYPE_FIXED &&
5168                 NLBLOCK->nllen == 2 &&
5169                 c == NLBLOCK->nl[0])
5170               {
5171               md->hitend = TRUE;
5172               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5173               }
5174             break;
5175 
5176             case OP_ALLANY:
5177             case OP_ANYBYTE:
5178             break;
5179 
5180             case OP_ANYNL:
5181             switch(c)
5182               {
5183               default: RRETURN(MATCH_NOMATCH);
5184               case CHAR_CR:
5185               if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
5186               break;
5187 
5188               case CHAR_LF:
5189               break;
5190 
5191               case CHAR_VT:
5192               case CHAR_FF:
5193               case CHAR_NEL:
5194 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5195               case 0x2028:
5196               case 0x2029:
5197 #endif
5198               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5199               break;
5200               }
5201             break;
5202 
5203             case OP_NOT_HSPACE:
5204             switch(c)
5205               {
5206               default: break;
5207               HSPACE_BYTE_CASES:
5208 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5209               HSPACE_MULTIBYTE_CASES:
5210 #endif
5211               RRETURN(MATCH_NOMATCH);
5212               }
5213             break;
5214 
5215             case OP_HSPACE:
5216             switch(c)
5217               {
5218               default: RRETURN(MATCH_NOMATCH);
5219               HSPACE_BYTE_CASES:
5220 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5221               HSPACE_MULTIBYTE_CASES:
5222 #endif
5223               break;
5224               }
5225             break;
5226 
5227             case OP_NOT_VSPACE:
5228             switch(c)
5229               {
5230               default: break;
5231               VSPACE_BYTE_CASES:
5232 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5233               VSPACE_MULTIBYTE_CASES:
5234 #endif
5235               RRETURN(MATCH_NOMATCH);
5236               }
5237             break;
5238 
5239             case OP_VSPACE:
5240             switch(c)
5241               {
5242               default: RRETURN(MATCH_NOMATCH);
5243               VSPACE_BYTE_CASES:
5244 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5245               VSPACE_MULTIBYTE_CASES:
5246 #endif
5247               break;
5248               }
5249             break;
5250 
5251             case OP_NOT_DIGIT:
5252             if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5253             break;
5254 
5255             case OP_DIGIT:
5256             if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5257             break;
5258 
5259             case OP_NOT_WHITESPACE:
5260             if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5261             break;
5262 
5263             case OP_WHITESPACE:
5264             if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5265             break;
5266 
5267             case OP_NOT_WORDCHAR:
5268             if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5269             break;
5270 
5271             case OP_WORDCHAR:
5272             if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5273             break;
5274 
5275             default:
5276             RRETURN(PCRE_ERROR_INTERNAL);
5277             }
5278           }
5279         }
5280       /* Control never gets here */
5281       }
5282 
5283     /* If maximizing, it is worth using inline code for speed, doing the type
5284     test once at the start (i.e. keep it out of the loop). Again, keep the
5285     UTF-8 and UCP stuff separate. */
5286 
5287     else
5288       {
5289       pp = eptr;  /* Remember where we started */
5290 
5291 #ifdef SUPPORT_UCP
5292       if (prop_type >= 0)
5293         {
5294         switch(prop_type)
5295           {
5296           case PT_ANY:
5297           for (i = min; i < max; i++)
5298             {
5299             int len = 1;
5300             if (eptr >= md->end_subject)
5301               {
5302               SCHECK_PARTIAL();
5303               break;
5304               }
5305             GETCHARLENTEST(c, eptr, len);
5306             if (prop_fail_result) break;
5307             eptr+= len;
5308             }
5309           break;
5310 
5311           case PT_LAMP:
5312           for (i = min; i < max; i++)
5313             {
5314             int chartype;
5315             int len = 1;
5316             if (eptr >= md->end_subject)
5317               {
5318               SCHECK_PARTIAL();
5319               break;
5320               }
5321             GETCHARLENTEST(c, eptr, len);
5322             chartype = UCD_CHARTYPE(c);
5323             if ((chartype == ucp_Lu ||
5324                  chartype == ucp_Ll ||
5325                  chartype == ucp_Lt) == prop_fail_result)
5326               break;
5327             eptr+= len;
5328             }
5329           break;
5330 
5331           case PT_GC:
5332           for (i = min; i < max; i++)
5333             {
5334             int len = 1;
5335             if (eptr >= md->end_subject)
5336               {
5337               SCHECK_PARTIAL();
5338               break;
5339               }
5340             GETCHARLENTEST(c, eptr, len);
5341             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
5342             eptr+= len;
5343             }
5344           break;
5345 
5346           case PT_PC:
5347           for (i = min; i < max; i++)
5348             {
5349             int len = 1;
5350             if (eptr >= md->end_subject)
5351               {
5352               SCHECK_PARTIAL();
5353               break;
5354               }
5355             GETCHARLENTEST(c, eptr, len);
5356             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
5357             eptr+= len;
5358             }
5359           break;
5360 
5361           case PT_SC:
5362           for (i = min; i < max; i++)
5363             {
5364             int len = 1;
5365             if (eptr >= md->end_subject)
5366               {
5367               SCHECK_PARTIAL();
5368               break;
5369               }
5370             GETCHARLENTEST(c, eptr, len);
5371             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
5372             eptr+= len;
5373             }
5374           break;
5375 
5376           case PT_ALNUM:
5377           for (i = min; i < max; i++)
5378             {
5379             int category;
5380             int len = 1;
5381             if (eptr >= md->end_subject)
5382               {
5383               SCHECK_PARTIAL();
5384               break;
5385               }
5386             GETCHARLENTEST(c, eptr, len);
5387             category = UCD_CATEGORY(c);
5388             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5389               break;
5390             eptr+= len;
5391             }
5392           break;
5393 
5394           case PT_SPACE:    /* Perl space */
5395           for (i = min; i < max; i++)
5396             {
5397             int len = 1;
5398             if (eptr >= md->end_subject)
5399               {
5400               SCHECK_PARTIAL();
5401               break;
5402               }
5403             GETCHARLENTEST(c, eptr, len);
5404             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
5405                  c == CHAR_FF || c == CHAR_CR)
5406                  == prop_fail_result)
5407               break;
5408             eptr+= len;
5409             }
5410           break;
5411 
5412           case PT_PXSPACE:  /* POSIX space */
5413           for (i = min; i < max; i++)
5414             {
5415             int len = 1;
5416             if (eptr >= md->end_subject)
5417               {
5418               SCHECK_PARTIAL();
5419               break;
5420               }
5421             GETCHARLENTEST(c, eptr, len);
5422             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
5423                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
5424                  == prop_fail_result)
5425               break;
5426             eptr+= len;
5427             }
5428           break;
5429 
5430           case PT_WORD:
5431           for (i = min; i < max; i++)
5432             {
5433             int category;
5434             int len = 1;
5435             if (eptr >= md->end_subject)
5436               {
5437               SCHECK_PARTIAL();
5438               break;
5439               }
5440             GETCHARLENTEST(c, eptr, len);
5441             category = UCD_CATEGORY(c);
5442             if ((category == ucp_L || category == ucp_N ||
5443                  c == CHAR_UNDERSCORE) == prop_fail_result)
5444               break;
5445             eptr+= len;
5446             }
5447           break;
5448 
5449           case PT_CLIST:
5450           for (i = min; i < max; i++)
5451             {
5452             const pcre_uint32 *cp;
5453             int len = 1;
5454             if (eptr >= md->end_subject)
5455               {
5456               SCHECK_PARTIAL();
5457               break;
5458               }
5459             GETCHARLENTEST(c, eptr, len);
5460             cp = PRIV(ucd_caseless_sets) + prop_value;
5461             for (;;)
5462               {
5463               if (c < *cp)
5464                 { if (prop_fail_result) break; else goto GOT_MAX; }
5465               if (c == *cp++)
5466                 { if (prop_fail_result) goto GOT_MAX; else break; }
5467               }
5468             eptr += len;
5469             }
5470           GOT_MAX:
5471           break;
5472 
5473           default:
5474           RRETURN(PCRE_ERROR_INTERNAL);
5475           }
5476 
5477         /* eptr is now past the end of the maximum run */
5478 
5479         if (possessive) continue;
5480         for(;;)
5481           {
5482           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5483           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5484           if (eptr-- == pp) break;        /* Stop if tried at original pos */
5485           if (utf) BACKCHAR(eptr);
5486           }
5487         }
5488 
5489       /* Match extended Unicode sequences. We will get here only if the
5490       support is in the binary; otherwise a compile-time error occurs. */
5491 
5492       else if (ctype == OP_EXTUNI)
5493         {
5494         for (i = min; i < max; i++)
5495           {
5496           if (eptr >= md->end_subject)
5497             {
5498             SCHECK_PARTIAL();
5499             break;
5500             }
5501           else
5502             {
5503             int lgb, rgb;
5504             GETCHARINCTEST(c, eptr);
5505             lgb = UCD_GRAPHBREAK(c);
5506             while (eptr < md->end_subject)
5507               {
5508               int len = 1;
5509               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5510               rgb = UCD_GRAPHBREAK(c);
5511               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5512               lgb = rgb;
5513               eptr += len;
5514               }
5515             }
5516           CHECK_PARTIAL();
5517           }
5518 
5519         /* eptr is now past the end of the maximum run */
5520 
5521         if (possessive) continue;
5522 
5523         for(;;)
5524           {
5525           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5526           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5527           if (eptr-- == pp) break;        /* Stop if tried at original pos */
5528           for (;;)                        /* Move back over one extended */
5529             {
5530             if (!utf) c = *eptr; else
5531               {
5532               BACKCHAR(eptr);
5533               GETCHAR(c, eptr);
5534               }
5535             if (UCD_CATEGORY(c) != ucp_M) break;
5536             eptr--;
5537             }
5538           }
5539         }
5540 
5541       else
5542 #endif   /* SUPPORT_UCP */
5543 
5544 #ifdef SUPPORT_UTF
5545       if (utf)
5546         {
5547         switch(ctype)
5548           {
5549           case OP_ANY:
5550           if (max < INT_MAX)
5551             {
5552             for (i = min; i < max; i++)
5553               {
5554               if (eptr >= md->end_subject)
5555                 {
5556                 SCHECK_PARTIAL();
5557                 break;
5558                 }
5559               if (IS_NEWLINE(eptr)) break;
5560               if (md->partial != 0 &&    /* Take care with CRLF partial */
5561                   eptr + 1 >= md->end_subject &&
5562                   NLBLOCK->nltype == NLTYPE_FIXED &&
5563                   NLBLOCK->nllen == 2 &&
5564                   RAWUCHAR(eptr) == NLBLOCK->nl[0])
5565                 {
5566                 md->hitend = TRUE;
5567                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5568                 }
5569               eptr++;
5570               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5571               }
5572             }
5573 
5574           /* Handle unlimited UTF-8 repeat */
5575 
5576           else
5577             {
5578             for (i = min; i < max; i++)
5579               {
5580               if (eptr >= md->end_subject)
5581                 {
5582                 SCHECK_PARTIAL();
5583                 break;
5584                 }
5585               if (IS_NEWLINE(eptr)) break;
5586               if (md->partial != 0 &&    /* Take care with CRLF partial */
5587                   eptr + 1 >= md->end_subject &&
5588                   NLBLOCK->nltype == NLTYPE_FIXED &&
5589                   NLBLOCK->nllen == 2 &&
5590                   RAWUCHAR(eptr) == NLBLOCK->nl[0])
5591                 {
5592                 md->hitend = TRUE;
5593                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5594                 }
5595               eptr++;
5596               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5597               }
5598             }
5599           break;
5600 
5601           case OP_ALLANY:
5602           if (max < INT_MAX)
5603             {
5604             for (i = min; i < max; i++)
5605               {
5606               if (eptr >= md->end_subject)
5607                 {
5608                 SCHECK_PARTIAL();
5609                 break;
5610                 }
5611               eptr++;
5612               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5613               }
5614             }
5615           else
5616             {
5617             eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
5618             SCHECK_PARTIAL();
5619             }
5620           break;
5621 
5622           /* The byte case is the same as non-UTF8 */
5623 
5624           case OP_ANYBYTE:
5625           c = max - min;
5626           if (c > (unsigned int)(md->end_subject - eptr))
5627             {
5628             eptr = md->end_subject;
5629             SCHECK_PARTIAL();
5630             }
5631           else eptr += c;
5632           break;
5633 
5634           case OP_ANYNL:
5635           for (i = min; i < max; i++)
5636             {
5637             int len = 1;
5638             if (eptr >= md->end_subject)
5639               {
5640               SCHECK_PARTIAL();
5641               break;
5642               }
5643             GETCHARLEN(c, eptr, len);
5644             if (c == CHAR_CR)
5645               {
5646               if (++eptr >= md->end_subject) break;
5647               if (RAWUCHAR(eptr) == CHAR_LF) eptr++;
5648               }
5649             else
5650               {
5651               if (c != CHAR_LF &&
5652                   (md->bsr_anycrlf ||
5653                    (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5654 #ifndef EBCDIC
5655                     && c != 0x2028 && c != 0x2029
5656 #endif  /* Not EBCDIC */
5657                     )))
5658                 break;
5659               eptr += len;
5660               }
5661             }
5662           break;
5663 
5664           case OP_NOT_HSPACE:
5665           case OP_HSPACE:
5666           for (i = min; i < max; i++)
5667             {
5668             BOOL gotspace;
5669             int len = 1;
5670             if (eptr >= md->end_subject)
5671               {
5672               SCHECK_PARTIAL();
5673               break;
5674               }
5675             GETCHARLEN(c, eptr, len);
5676             switch(c)
5677               {
5678               HSPACE_CASES: gotspace = TRUE; break;
5679               default: gotspace = FALSE; break;
5680               }
5681             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5682             eptr += len;
5683             }
5684           break;
5685 
5686           case OP_NOT_VSPACE:
5687           case OP_VSPACE:
5688           for (i = min; i < max; i++)
5689             {
5690             BOOL gotspace;
5691             int len = 1;
5692             if (eptr >= md->end_subject)
5693               {
5694               SCHECK_PARTIAL();
5695               break;
5696               }
5697             GETCHARLEN(c, eptr, len);
5698             switch(c)
5699               {
5700               VSPACE_CASES: gotspace = TRUE; break;
5701               default: gotspace = FALSE; break;
5702               }
5703             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5704             eptr += len;
5705             }
5706           break;
5707 
5708           case OP_NOT_DIGIT:
5709           for (i = min; i < max; i++)
5710             {
5711             int len = 1;
5712             if (eptr >= md->end_subject)
5713               {
5714               SCHECK_PARTIAL();
5715               break;
5716               }
5717             GETCHARLEN(c, eptr, len);
5718             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5719             eptr+= len;
5720             }
5721           break;
5722 
5723           case OP_DIGIT:
5724           for (i = min; i < max; i++)
5725             {
5726             int len = 1;
5727             if (eptr >= md->end_subject)
5728               {
5729               SCHECK_PARTIAL();
5730               break;
5731               }
5732             GETCHARLEN(c, eptr, len);
5733             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5734             eptr+= len;
5735             }
5736           break;
5737 
5738           case OP_NOT_WHITESPACE:
5739           for (i = min; i < max; i++)
5740             {
5741             int len = 1;
5742             if (eptr >= md->end_subject)
5743               {
5744               SCHECK_PARTIAL();
5745               break;
5746               }
5747             GETCHARLEN(c, eptr, len);
5748             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5749             eptr+= len;
5750             }
5751           break;
5752 
5753           case OP_WHITESPACE:
5754           for (i = min; i < max; i++)
5755             {
5756             int len = 1;
5757             if (eptr >= md->end_subject)
5758               {
5759               SCHECK_PARTIAL();
5760               break;
5761               }
5762             GETCHARLEN(c, eptr, len);
5763             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5764             eptr+= len;
5765             }
5766           break;
5767 
5768           case OP_NOT_WORDCHAR:
5769           for (i = min; i < max; i++)
5770             {
5771             int len = 1;
5772             if (eptr >= md->end_subject)
5773               {
5774               SCHECK_PARTIAL();
5775               break;
5776               }
5777             GETCHARLEN(c, eptr, len);
5778             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5779             eptr+= len;
5780             }
5781           break;
5782 
5783           case OP_WORDCHAR:
5784           for (i = min; i < max; i++)
5785             {
5786             int len = 1;
5787             if (eptr >= md->end_subject)
5788               {
5789               SCHECK_PARTIAL();
5790               break;
5791               }
5792             GETCHARLEN(c, eptr, len);
5793             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5794             eptr+= len;
5795             }
5796           break;
5797 
5798           default:
5799           RRETURN(PCRE_ERROR_INTERNAL);
5800           }
5801 
5802         /* eptr is now past the end of the maximum run. If possessive, we are
5803         done (no backing up). Otherwise, match at this position; anything other
5804         than no match is immediately returned. For nomatch, back up one
5805         character, unless we are matching \R and the last thing matched was
5806         \r\n, in which case, back up two bytes. */
5807 
5808         if (possessive) continue;
5809         for(;;)
5810           {
5811           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5812           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5813           if (eptr-- == pp) break;        /* Stop if tried at original pos */
5814           BACKCHAR(eptr);
5815           if (ctype == OP_ANYNL && eptr > pp  && RAWUCHAR(eptr) == CHAR_NL &&
5816               RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;
5817           }
5818         }
5819       else
5820 #endif  /* SUPPORT_UTF */
5821       /* Not UTF mode */
5822         {
5823         switch(ctype)
5824           {
5825           case OP_ANY:
5826           for (i = min; i < max; i++)
5827             {
5828             if (eptr >= md->end_subject)
5829               {
5830               SCHECK_PARTIAL();
5831               break;
5832               }
5833             if (IS_NEWLINE(eptr)) break;
5834             if (md->partial != 0 &&    /* Take care with CRLF partial */
5835                 eptr + 1 >= md->end_subject &&
5836                 NLBLOCK->nltype == NLTYPE_FIXED &&
5837                 NLBLOCK->nllen == 2 &&
5838                 *eptr == NLBLOCK->nl[0])
5839               {
5840               md->hitend = TRUE;
5841               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5842               }
5843             eptr++;
5844             }
5845           break;
5846 
5847           case OP_ALLANY:
5848           case OP_ANYBYTE:
5849           c = max - min;
5850           if (c > (unsigned int)(md->end_subject - eptr))
5851             {
5852             eptr = md->end_subject;
5853             SCHECK_PARTIAL();
5854             }
5855           else eptr += c;
5856           break;
5857 
5858           case OP_ANYNL:
5859           for (i = min; i < max; i++)
5860             {
5861             if (eptr >= md->end_subject)
5862               {
5863               SCHECK_PARTIAL();
5864               break;
5865               }
5866             c = *eptr;
5867             if (c == CHAR_CR)
5868               {
5869               if (++eptr >= md->end_subject) break;
5870               if (*eptr == CHAR_LF) eptr++;
5871               }
5872             else
5873               {
5874               if (c != CHAR_LF && (md->bsr_anycrlf ||
5875                  (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5876 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5877                  && c != 0x2028 && c != 0x2029
5878 #endif
5879                  ))) break;
5880               eptr++;
5881               }
5882             }
5883           break;
5884 
5885           case OP_NOT_HSPACE:
5886           for (i = min; i < max; i++)
5887             {
5888             if (eptr >= md->end_subject)
5889               {
5890               SCHECK_PARTIAL();
5891               break;
5892               }
5893             switch(*eptr)
5894               {
5895               default: eptr++; break;
5896               HSPACE_BYTE_CASES:
5897 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5898               HSPACE_MULTIBYTE_CASES:
5899 #endif
5900               goto ENDLOOP00;
5901               }
5902             }
5903           ENDLOOP00:
5904           break;
5905 
5906           case OP_HSPACE:
5907           for (i = min; i < max; i++)
5908             {
5909             if (eptr >= md->end_subject)
5910               {
5911               SCHECK_PARTIAL();
5912               break;
5913               }
5914             switch(*eptr)
5915               {
5916               default: goto ENDLOOP01;
5917               HSPACE_BYTE_CASES:
5918 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5919               HSPACE_MULTIBYTE_CASES:
5920 #endif
5921               eptr++; break;
5922               }
5923             }
5924           ENDLOOP01:
5925           break;
5926 
5927           case OP_NOT_VSPACE:
5928           for (i = min; i < max; i++)
5929             {
5930             if (eptr >= md->end_subject)
5931               {
5932               SCHECK_PARTIAL();
5933               break;
5934               }
5935             switch(*eptr)
5936               {
5937               default: eptr++; break;
5938               VSPACE_BYTE_CASES:
5939 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5940               VSPACE_MULTIBYTE_CASES:
5941 #endif
5942               goto ENDLOOP02;
5943               }
5944             }
5945           ENDLOOP02:
5946           break;
5947 
5948           case OP_VSPACE:
5949           for (i = min; i < max; i++)
5950             {
5951             if (eptr >= md->end_subject)
5952               {
5953               SCHECK_PARTIAL();
5954               break;
5955               }
5956             switch(*eptr)
5957               {
5958               default: goto ENDLOOP03;
5959               VSPACE_BYTE_CASES:
5960 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5961               VSPACE_MULTIBYTE_CASES:
5962 #endif
5963               eptr++; break;
5964               }
5965             }
5966           ENDLOOP03:
5967           break;
5968 
5969           case OP_NOT_DIGIT:
5970           for (i = min; i < max; i++)
5971             {
5972             if (eptr >= md->end_subject)
5973               {
5974               SCHECK_PARTIAL();
5975               break;
5976               }
5977             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
5978             eptr++;
5979             }
5980           break;
5981 
5982           case OP_DIGIT:
5983           for (i = min; i < max; i++)
5984             {
5985             if (eptr >= md->end_subject)
5986               {
5987               SCHECK_PARTIAL();
5988               break;
5989               }
5990             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
5991             eptr++;
5992             }
5993           break;
5994 
5995           case OP_NOT_WHITESPACE:
5996           for (i = min; i < max; i++)
5997             {
5998             if (eptr >= md->end_subject)
5999               {
6000               SCHECK_PARTIAL();
6001               break;
6002               }
6003             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
6004             eptr++;
6005             }
6006           break;
6007 
6008           case OP_WHITESPACE:
6009           for (i = min; i < max; i++)
6010             {
6011             if (eptr >= md->end_subject)
6012               {
6013               SCHECK_PARTIAL();
6014               break;
6015               }
6016             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
6017             eptr++;
6018             }
6019           break;
6020 
6021           case OP_NOT_WORDCHAR:
6022           for (i = min; i < max; i++)
6023             {
6024             if (eptr >= md->end_subject)
6025               {
6026               SCHECK_PARTIAL();
6027               break;
6028               }
6029             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
6030             eptr++;
6031             }
6032           break;
6033 
6034           case OP_WORDCHAR:
6035           for (i = min; i < max; i++)
6036             {
6037             if (eptr >= md->end_subject)
6038               {
6039               SCHECK_PARTIAL();
6040               break;
6041               }
6042             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
6043             eptr++;
6044             }
6045           break;
6046 
6047           default:
6048           RRETURN(PCRE_ERROR_INTERNAL);
6049           }
6050 
6051         /* eptr is now past the end of the maximum run. If possessive, we are
6052         done (no backing up). Otherwise, match at this position; anything other
6053         than no match is immediately returned. For nomatch, back up one
6054         character (byte), unless we are matching \R and the last thing matched
6055         was \r\n, in which case, back up two bytes. */
6056 
6057         if (possessive) continue;
6058         while (eptr >= pp)
6059           {
6060           RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6061           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6062           eptr--;
6063           if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_LF &&
6064               eptr[-1] == CHAR_CR) eptr--;
6065           }
6066         }
6067 
6068       /* Get here if we can't make it match with any permitted repetitions */
6069 
6070       RRETURN(MATCH_NOMATCH);
6071       }
6072     /* Control never gets here */
6073 
6074     /* There's been some horrible disaster. Arrival here can only mean there is
6075     something seriously wrong in the code above or the OP_xxx definitions. */
6076 
6077     default:
6078     DPRINTF(("Unknown opcode %d\n", *ecode));
6079     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
6080     }
6081 
6082   /* Do not stick any code in here without much thought; it is assumed
6083   that "continue" in the code above comes out to here to repeat the main
6084   loop. */
6085 
6086   }             /* End of main loop */
6087 /* Control never reaches here */
6088 
6089 
6090 /* When compiling to use the heap rather than the stack for recursive calls to
6091 match(), the RRETURN() macro jumps here. The number that is saved in
6092 frame->Xwhere indicates which label we actually want to return to. */
6093 
6094 #ifdef NO_RECURSE
6095 #define LBL(val) case val: goto L_RM##val;
6096 HEAP_RETURN:
6097 switch (frame->Xwhere)
6098   {
6099   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6100   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
6101   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
6102   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6103   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6104   LBL(65) LBL(66)
6105 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6106   LBL(21)
6107 #endif
6108 #ifdef SUPPORT_UTF
6109   LBL(16) LBL(18) LBL(20)
6110   LBL(22) LBL(23) LBL(28) LBL(30)
6111   LBL(32) LBL(34) LBL(42) LBL(46)
6112 #ifdef SUPPORT_UCP
6113   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6114   LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
6115 #endif  /* SUPPORT_UCP */
6116 #endif  /* SUPPORT_UTF */
6117   default:
6118   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6119   return PCRE_ERROR_INTERNAL;
6120   }
6121 #undef LBL
6122 #endif  /* NO_RECURSE */
6123 }
6124 
6125 
6126 /***************************************************************************
6127 ****************************************************************************
6128                    RECURSION IN THE match() FUNCTION
6129 
6130 Undefine all the macros that were defined above to handle this. */
6131 
6132 #ifdef NO_RECURSE
6133 #undef eptr
6134 #undef ecode
6135 #undef mstart
6136 #undef offset_top
6137 #undef eptrb
6138 #undef flags
6139 
6140 #undef callpat
6141 #undef charptr
6142 #undef data
6143 #undef next
6144 #undef pp
6145 #undef prev
6146 #undef saved_eptr
6147 
6148 #undef new_recursive
6149 
6150 #undef cur_is_word
6151 #undef condition
6152 #undef prev_is_word
6153 
6154 #undef ctype
6155 #undef length
6156 #undef max
6157 #undef min
6158 #undef number
6159 #undef offset
6160 #undef op
6161 #undef save_capture_last
6162 #undef save_offset1
6163 #undef save_offset2
6164 #undef save_offset3
6165 #undef stacksave
6166 
6167 #undef newptrb
6168 
6169 #endif
6170 
6171 /* These two are defined as macros in both cases */
6172 
6173 #undef fc
6174 #undef fi
6175 
6176 /***************************************************************************
6177 ***************************************************************************/
6178 
6179 
6180 #ifdef NO_RECURSE
6181 /*************************************************
6182 *          Release allocated heap frames         *
6183 *************************************************/
6184 
6185 /* This function releases all the allocated frames. The base frame is on the
6186 machine stack, and so must not be freed.
6187 
6188 Argument: the address of the base frame
6189 Returns:  nothing
6190 */
6191 
6192 static void
release_match_heapframes(heapframe * frame_base)6193 release_match_heapframes (heapframe *frame_base)
6194 {
6195 heapframe *nextframe = frame_base->Xnextframe;
6196 while (nextframe != NULL)
6197   {
6198   heapframe *oldframe = nextframe;
6199   nextframe = nextframe->Xnextframe;
6200   (PUBL(stack_free))(oldframe);
6201   }
6202 }
6203 #endif
6204 
6205 
6206 /*************************************************
6207 *         Execute a Regular Expression           *
6208 *************************************************/
6209 
6210 /* This function applies a compiled re to a subject string and picks out
6211 portions of the string if it matches. Two elements in the vector are set for
6212 each substring: the offsets to the start and end of the substring.
6213 
6214 Arguments:
6215   argument_re     points to the compiled expression
6216   extra_data      points to extra data or is NULL
6217   subject         points to the subject string
6218   length          length of subject string (may contain binary zeros)
6219   start_offset    where to start in the subject string
6220   options         option bits
6221   offsets         points to a vector of ints to be filled in with offsets
6222   offsetcount     the number of elements in the vector
6223 
6224 Returns:          > 0 => success; value is the number of elements filled in
6225                   = 0 => success, but offsets is not big enough
6226                    -1 => failed to match
6227                  < -1 => some kind of unexpected problem
6228 */
6229 
6230 #if defined COMPILE_PCRE8
6231 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_exec(const pcre * argument_re,const pcre_extra * extra_data,PCRE_SPTR subject,int length,int start_offset,int options,int * offsets,int offsetcount)6232 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6233   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6234   int offsetcount)
6235 #elif defined COMPILE_PCRE16
6236 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6237 pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6238   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6239   int offsetcount)
6240 #elif defined COMPILE_PCRE32
6241 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6242 pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
6243   PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
6244   int offsetcount)
6245 #endif
6246 {
6247 int rc, ocount, arg_offset_max;
6248 int newline;
6249 BOOL using_temporary_offsets = FALSE;
6250 BOOL anchored;
6251 BOOL startline;
6252 BOOL firstline;
6253 BOOL utf;
6254 BOOL has_first_char = FALSE;
6255 BOOL has_req_char = FALSE;
6256 pcre_uchar first_char = 0;
6257 pcre_uchar first_char2 = 0;
6258 pcre_uchar req_char = 0;
6259 pcre_uchar req_char2 = 0;
6260 match_data match_block;
6261 match_data *md = &match_block;
6262 const pcre_uint8 *tables;
6263 const pcre_uint8 *start_bits = NULL;
6264 PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6265 PCRE_PUCHAR end_subject;
6266 PCRE_PUCHAR start_partial = NULL;
6267 PCRE_PUCHAR req_char_ptr = start_match - 1;
6268 
6269 const pcre_study_data *study;
6270 const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6271 
6272 #ifdef NO_RECURSE
6273 heapframe frame_zero;
6274 frame_zero.Xprevframe = NULL;            /* Marks the top level */
6275 frame_zero.Xnextframe = NULL;            /* None are allocated yet */
6276 md->match_frames_base = &frame_zero;
6277 #endif
6278 
6279 /* Check for the special magic call that measures the size of the stack used
6280 per recursive call of match(). Without the funny casting for sizeof, a Windows
6281 compiler gave this error: "unary minus operator applied to unsigned type,
6282 result still unsigned". Hopefully the cast fixes that. */
6283 
6284 if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6285     start_offset == -999)
6286 #ifdef NO_RECURSE
6287   return -((int)sizeof(heapframe));
6288 #else
6289   return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6290 #endif
6291 
6292 /* Plausibility checks */
6293 
6294 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6295 if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6296   return PCRE_ERROR_NULL;
6297 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6298 if (length < 0) return PCRE_ERROR_BADLENGTH;
6299 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6300 
6301 /* Check that the first field in the block is the magic number. If it is not,
6302 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6303 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6304 means that the pattern is likely compiled with different endianness. */
6305 
6306 if (re->magic_number != MAGIC_NUMBER)
6307   return re->magic_number == REVERSED_MAGIC_NUMBER?
6308     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6309 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6310 
6311 /* These two settings are used in the code for checking a UTF-8 string that
6312 follows immediately afterwards. Other values in the md block are used only
6313 during "normal" pcre_exec() processing, not when the JIT support is in use,
6314 so they are set up later. */
6315 
6316 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6317 utf = md->utf = (re->options & PCRE_UTF8) != 0;
6318 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6319               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6320 
6321 /* Check a UTF-8 string if required. Pass back the character offset and error
6322 code for an invalid string if a results vector is available. */
6323 
6324 #ifdef SUPPORT_UTF
6325 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6326   {
6327   int erroroffset;
6328   int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6329   if (errorcode != 0)
6330     {
6331     if (offsetcount >= 2)
6332       {
6333       offsets[0] = erroroffset;
6334       offsets[1] = errorcode;
6335       }
6336 #if defined COMPILE_PCRE8
6337     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6338       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6339 #elif defined COMPILE_PCRE16
6340     return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6341       PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6342 #elif defined COMPILE_PCRE32
6343     return PCRE_ERROR_BADUTF32;
6344 #endif
6345     }
6346 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6347   /* Check that a start_offset points to the start of a UTF character. */
6348   if (start_offset > 0 && start_offset < length &&
6349       NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6350     return PCRE_ERROR_BADUTF8_OFFSET;
6351 #endif
6352   }
6353 #endif
6354 
6355 /* If the pattern was successfully studied with JIT support, run the JIT
6356 executable instead of the rest of this function. Most options must be set at
6357 compile time for the JIT code to be usable. Fallback to the normal code path if
6358 an unsupported flag is set. */
6359 
6360 #ifdef SUPPORT_JIT
6361 if (extra_data != NULL
6362     && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6363                              PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6364     && extra_data->executable_jit != NULL
6365     && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
6366   {
6367   rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
6368        start_offset, options, offsets, offsetcount);
6369 
6370   /* PCRE_ERROR_NULL means that the selected normal or partial matching
6371   mode is not compiled. In this case we simply fallback to interpreter. */
6372 
6373   if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
6374   }
6375 #endif
6376 
6377 /* Carry on with non-JIT matching. This information is for finding all the
6378 numbers associated with a given name, for condition testing. */
6379 
6380 md->name_table = (pcre_uchar *)re + re->name_table_offset;
6381 md->name_count = re->name_count;
6382 md->name_entry_size = re->name_entry_size;
6383 
6384 /* Fish out the optional data from the extra_data structure, first setting
6385 the default values. */
6386 
6387 study = NULL;
6388 md->match_limit = MATCH_LIMIT;
6389 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
6390 md->callout_data = NULL;
6391 
6392 /* The table pointer is always in native byte order. */
6393 
6394 tables = re->tables;
6395 
6396 if (extra_data != NULL)
6397   {
6398   register unsigned int flags = extra_data->flags;
6399   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
6400     study = (const pcre_study_data *)extra_data->study_data;
6401   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
6402     md->match_limit = extra_data->match_limit;
6403   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
6404     md->match_limit_recursion = extra_data->match_limit_recursion;
6405   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
6406     md->callout_data = extra_data->callout_data;
6407   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
6408   }
6409 
6410 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
6411 is a feature that makes it possible to save compiled regex and re-use them
6412 in other programs later. */
6413 
6414 if (tables == NULL) tables = PRIV(default_tables);
6415 
6416 /* Set up other data */
6417 
6418 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
6419 startline = (re->flags & PCRE_STARTLINE) != 0;
6420 firstline = (re->options & PCRE_FIRSTLINE) != 0;
6421 
6422 /* The code starts after the real_pcre block and the capture name table. */
6423 
6424 md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6425   re->name_count * re->name_entry_size;
6426 
6427 md->start_subject = (PCRE_PUCHAR)subject;
6428 md->start_offset = start_offset;
6429 md->end_subject = md->start_subject + length;
6430 end_subject = md->end_subject;
6431 
6432 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6433 md->use_ucp = (re->options & PCRE_UCP) != 0;
6434 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6435 md->ignore_skip_arg = FALSE;
6436 
6437 /* Some options are unpacked into BOOL variables in the hope that testing
6438 them will be faster than individual option bits. */
6439 
6440 md->notbol = (options & PCRE_NOTBOL) != 0;
6441 md->noteol = (options & PCRE_NOTEOL) != 0;
6442 md->notempty = (options & PCRE_NOTEMPTY) != 0;
6443 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6444 
6445 md->hitend = FALSE;
6446 md->mark = md->nomatch_mark = NULL;     /* In case never set */
6447 
6448 md->recursive = NULL;                   /* No recursion at top level */
6449 md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6450 
6451 md->lcc = tables + lcc_offset;
6452 md->fcc = tables + fcc_offset;
6453 md->ctypes = tables + ctypes_offset;
6454 
6455 /* Handle different \R options. */
6456 
6457 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
6458   {
6459   case 0:
6460   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
6461     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
6462   else
6463 #ifdef BSR_ANYCRLF
6464   md->bsr_anycrlf = TRUE;
6465 #else
6466   md->bsr_anycrlf = FALSE;
6467 #endif
6468   break;
6469 
6470   case PCRE_BSR_ANYCRLF:
6471   md->bsr_anycrlf = TRUE;
6472   break;
6473 
6474   case PCRE_BSR_UNICODE:
6475   md->bsr_anycrlf = FALSE;
6476   break;
6477 
6478   default: return PCRE_ERROR_BADNEWLINE;
6479   }
6480 
6481 /* Handle different types of newline. The three bits give eight cases. If
6482 nothing is set at run time, whatever was used at compile time applies. */
6483 
6484 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
6485         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
6486   {
6487   case 0: newline = NEWLINE; break;   /* Compile-time default */
6488   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6489   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6490   case PCRE_NEWLINE_CR+
6491        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6492   case PCRE_NEWLINE_ANY: newline = -1; break;
6493   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6494   default: return PCRE_ERROR_BADNEWLINE;
6495   }
6496 
6497 if (newline == -2)
6498   {
6499   md->nltype = NLTYPE_ANYCRLF;
6500   }
6501 else if (newline < 0)
6502   {
6503   md->nltype = NLTYPE_ANY;
6504   }
6505 else
6506   {
6507   md->nltype = NLTYPE_FIXED;
6508   if (newline > 255)
6509     {
6510     md->nllen = 2;
6511     md->nl[0] = (newline >> 8) & 255;
6512     md->nl[1] = newline & 255;
6513     }
6514   else
6515     {
6516     md->nllen = 1;
6517     md->nl[0] = newline;
6518     }
6519   }
6520 
6521 /* Partial matching was originally supported only for a restricted set of
6522 regexes; from release 8.00 there are no restrictions, but the bits are still
6523 defined (though never set). So there's no harm in leaving this code. */
6524 
6525 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6526   return PCRE_ERROR_BADPARTIAL;
6527 
6528 /* If the expression has got more back references than the offsets supplied can
6529 hold, we get a temporary chunk of working store to use during the matching.
6530 Otherwise, we can use the vector supplied, rounding down its size to a multiple
6531 of 3. */
6532 
6533 ocount = offsetcount - (offsetcount % 3);
6534 arg_offset_max = (2*ocount)/3;
6535 
6536 if (re->top_backref > 0 && re->top_backref >= ocount/3)
6537   {
6538   ocount = re->top_backref * 3 + 3;
6539   md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
6540   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6541   using_temporary_offsets = TRUE;
6542   DPRINTF(("Got memory to hold back references\n"));
6543   }
6544 else md->offset_vector = offsets;
6545 
6546 md->offset_end = ocount;
6547 md->offset_max = (2*ocount)/3;
6548 md->offset_overflow = FALSE;
6549 md->capture_last = -1;
6550 
6551 /* Reset the working variable associated with each extraction. These should
6552 never be used unless previously set, but they get saved and restored, and so we
6553 initialize them to avoid reading uninitialized locations. Also, unset the
6554 offsets for the matched string. This is really just for tidiness with callouts,
6555 in case they inspect these fields. */
6556 
6557 if (md->offset_vector != NULL)
6558   {
6559   register int *iptr = md->offset_vector + ocount;
6560   register int *iend = iptr - re->top_bracket;
6561   if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
6562   while (--iptr >= iend) *iptr = -1;
6563   md->offset_vector[0] = md->offset_vector[1] = -1;
6564   }
6565 
6566 /* Set up the first character to match, if available. The first_char value is
6567 never set for an anchored regular expression, but the anchoring may be forced
6568 at run time, so we have to test for anchoring. The first char may be unset for
6569 an unanchored pattern, of course. If there's no first char and the pattern was
6570 studied, there may be a bitmap of possible first characters. */
6571 
6572 if (!anchored)
6573   {
6574   if ((re->flags & PCRE_FIRSTSET) != 0)
6575     {
6576     has_first_char = TRUE;
6577     first_char = first_char2 = (pcre_uchar)(re->first_char);
6578     if ((re->flags & PCRE_FCH_CASELESS) != 0)
6579       {
6580       first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6581 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6582       if (utf && first_char > 127)
6583         first_char2 = UCD_OTHERCASE(first_char);
6584 #endif
6585       }
6586     }
6587   else
6588     if (!startline && study != NULL &&
6589       (study->flags & PCRE_STUDY_MAPPED) != 0)
6590         start_bits = study->start_bits;
6591   }
6592 
6593 /* For anchored or unanchored matches, there may be a "last known required
6594 character" set. */
6595 
6596 if ((re->flags & PCRE_REQCHSET) != 0)
6597   {
6598   has_req_char = TRUE;
6599   req_char = req_char2 = (pcre_uchar)(re->req_char);
6600   if ((re->flags & PCRE_RCH_CASELESS) != 0)
6601     {
6602     req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6603 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6604     if (utf && req_char > 127)
6605       req_char2 = UCD_OTHERCASE(req_char);
6606 #endif
6607     }
6608   }
6609 
6610 
6611 /* ==========================================================================*/
6612 
6613 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
6614 the loop runs just once. */
6615 
6616 for(;;)
6617   {
6618   PCRE_PUCHAR save_end_subject = end_subject;
6619   PCRE_PUCHAR new_start_match;
6620 
6621   /* If firstline is TRUE, the start of the match is constrained to the first
6622   line of a multiline string. That is, the match must be before or at the first
6623   newline. Implement this by temporarily adjusting end_subject so that we stop
6624   scanning at a newline. If the match fails at the newline, later code breaks
6625   this loop. */
6626 
6627   if (firstline)
6628     {
6629     PCRE_PUCHAR t = start_match;
6630 #ifdef SUPPORT_UTF
6631     if (utf)
6632       {
6633       while (t < md->end_subject && !IS_NEWLINE(t))
6634         {
6635         t++;
6636         ACROSSCHAR(t < end_subject, *t, t++);
6637         }
6638       }
6639     else
6640 #endif
6641     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
6642     end_subject = t;
6643     }
6644 
6645   /* There are some optimizations that avoid running the match if a known
6646   starting point is not found, or if a known later character is not present.
6647   However, there is an option that disables these, for testing and for ensuring
6648   that all callouts do actually occur. The option can be set in the regex by
6649   (*NO_START_OPT) or passed in match-time options. */
6650 
6651   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6652     {
6653     /* Advance to a unique first char if there is one. */
6654 
6655     if (has_first_char)
6656       {
6657       pcre_uchar smc;
6658 
6659       if (first_char != first_char2)
6660         while (start_match < end_subject &&
6661           (smc = RAWUCHARTEST(start_match)) != first_char && smc != first_char2)
6662           start_match++;
6663       else
6664         while (start_match < end_subject && RAWUCHARTEST(start_match) != first_char)
6665           start_match++;
6666       }
6667 
6668     /* Or to just after a linebreak for a multiline match */
6669 
6670     else if (startline)
6671       {
6672       if (start_match > md->start_subject + start_offset)
6673         {
6674 #ifdef SUPPORT_UTF
6675         if (utf)
6676           {
6677           while (start_match < end_subject && !WAS_NEWLINE(start_match))
6678             {
6679             start_match++;
6680             ACROSSCHAR(start_match < end_subject, *start_match,
6681               start_match++);
6682             }
6683           }
6684         else
6685 #endif
6686         while (start_match < end_subject && !WAS_NEWLINE(start_match))
6687           start_match++;
6688 
6689         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
6690         and we are now at a LF, advance the match position by one more character.
6691         */
6692 
6693         if (start_match[-1] == CHAR_CR &&
6694              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
6695              start_match < end_subject &&
6696              RAWUCHARTEST(start_match) == CHAR_NL)
6697           start_match++;
6698         }
6699       }
6700 
6701     /* Or to a non-unique first byte after study */
6702 
6703     else if (start_bits != NULL)
6704       {
6705       while (start_match < end_subject)
6706         {
6707         register pcre_uint32 c = RAWUCHARTEST(start_match);
6708 #ifndef COMPILE_PCRE8
6709         if (c > 255) c = 255;
6710 #endif
6711         if ((start_bits[c/8] & (1 << (c&7))) == 0)
6712           {
6713           start_match++;
6714 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6715           /* In non 8-bit mode, the iteration will stop for
6716           characters > 255 at the beginning or not stop at all. */
6717           if (utf)
6718             ACROSSCHAR(start_match < end_subject, *start_match,
6719               start_match++);
6720 #endif
6721           }
6722         else break;
6723         }
6724       }
6725     }   /* Starting optimizations */
6726 
6727   /* Restore fudged end_subject */
6728 
6729   end_subject = save_end_subject;
6730 
6731   /* The following two optimizations are disabled for partial matching or if
6732   disabling is explicitly requested. */
6733 
6734   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6735     {
6736     /* If the pattern was studied, a minimum subject length may be set. This is
6737     a lower bound; no actual string of that length may actually match the
6738     pattern. Although the value is, strictly, in characters, we treat it as
6739     bytes to avoid spending too much time in this optimization. */
6740 
6741     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
6742         (pcre_uint32)(end_subject - start_match) < study->minlength)
6743       {
6744       rc = MATCH_NOMATCH;
6745       break;
6746       }
6747 
6748     /* If req_char is set, we know that that character must appear in the
6749     subject for the match to succeed. If the first character is set, req_char
6750     must be later in the subject; otherwise the test starts at the match point.
6751     This optimization can save a huge amount of backtracking in patterns with
6752     nested unlimited repeats that aren't going to match. Writing separate code
6753     for cased/caseless versions makes it go faster, as does using an
6754     autoincrement and backing off on a match.
6755 
6756     HOWEVER: when the subject string is very, very long, searching to its end
6757     can take a long time, and give bad performance on quite ordinary patterns.
6758     This showed up when somebody was matching something like /^\d+C/ on a
6759     32-megabyte string... so we don't do this when the string is sufficiently
6760     long. */
6761 
6762     if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
6763       {
6764       register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
6765 
6766       /* We don't need to repeat the search if we haven't yet reached the
6767       place we found it at last time. */
6768 
6769       if (p > req_char_ptr)
6770         {
6771         if (req_char != req_char2)
6772           {
6773           while (p < end_subject)
6774             {
6775             register pcre_uint32 pp = RAWUCHARINCTEST(p);
6776             if (pp == req_char || pp == req_char2) { p--; break; }
6777             }
6778           }
6779         else
6780           {
6781           while (p < end_subject)
6782             {
6783             if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
6784             }
6785           }
6786 
6787         /* If we can't find the required character, break the matching loop,
6788         forcing a match failure. */
6789 
6790         if (p >= end_subject)
6791           {
6792           rc = MATCH_NOMATCH;
6793           break;
6794           }
6795 
6796         /* If we have found the required character, save the point where we
6797         found it, so that we don't search again next time round the loop if
6798         the start hasn't passed this character yet. */
6799 
6800         req_char_ptr = p;
6801         }
6802       }
6803     }
6804 
6805 #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
6806   printf(">>>> Match against: ");
6807   pchars(start_match, end_subject - start_match, TRUE, md);
6808   printf("\n");
6809 #endif
6810 
6811   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6812   first starting point for which a partial match was found. */
6813 
6814   md->start_match_ptr = start_match;
6815   md->start_used_ptr = start_match;
6816   md->match_call_count = 0;
6817   md->match_function_type = 0;
6818   md->end_offset_top = 0;
6819   rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
6820   if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6821 
6822   switch(rc)
6823     {
6824     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6825     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
6826     entirely. The only way we can do that is to re-do the match at the same
6827     point, with a flag to force SKIP with an argument to be ignored. Just
6828     treating this case as NOMATCH does not work because it does not check other
6829     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
6830 
6831     case MATCH_SKIP_ARG:
6832     new_start_match = start_match;
6833     md->ignore_skip_arg = TRUE;
6834     break;
6835 
6836     /* SKIP passes back the next starting point explicitly, but if it is the
6837     same as the match we have just done, treat it as NOMATCH. */
6838 
6839     case MATCH_SKIP:
6840     if (md->start_match_ptr != start_match)
6841       {
6842       new_start_match = md->start_match_ptr;
6843       break;
6844       }
6845     /* Fall through */
6846 
6847     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6848     exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */
6849 
6850     case MATCH_NOMATCH:
6851     case MATCH_PRUNE:
6852     case MATCH_THEN:
6853     md->ignore_skip_arg = FALSE;
6854     new_start_match = start_match + 1;
6855 #ifdef SUPPORT_UTF
6856     if (utf)
6857       ACROSSCHAR(new_start_match < end_subject, *new_start_match,
6858         new_start_match++);
6859 #endif
6860     break;
6861 
6862     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6863 
6864     case MATCH_COMMIT:
6865     rc = MATCH_NOMATCH;
6866     goto ENDLOOP;
6867 
6868     /* Any other return is either a match, or some kind of error. */
6869 
6870     default:
6871     goto ENDLOOP;
6872     }
6873 
6874   /* Control reaches here for the various types of "no match at this point"
6875   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6876 
6877   rc = MATCH_NOMATCH;
6878 
6879   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
6880   newline in the subject (though it may continue over the newline). Therefore,
6881   if we have just failed to match, starting at a newline, do not continue. */
6882 
6883   if (firstline && IS_NEWLINE(start_match)) break;
6884 
6885   /* Advance to new matching position */
6886 
6887   start_match = new_start_match;
6888 
6889   /* Break the loop if the pattern is anchored or if we have passed the end of
6890   the subject. */
6891 
6892   if (anchored || start_match > end_subject) break;
6893 
6894   /* If we have just passed a CR and we are now at a LF, and the pattern does
6895   not contain any explicit matches for \r or \n, and the newline option is CRLF
6896   or ANY or ANYCRLF, advance the match position by one more character. In
6897   normal matching start_match will aways be greater than the first position at
6898   this stage, but a failed *SKIP can cause a return at the same point, which is
6899   why the first test exists. */
6900 
6901   if (start_match > (PCRE_PUCHAR)subject + start_offset &&
6902       start_match[-1] == CHAR_CR &&
6903       start_match < end_subject &&
6904       *start_match == CHAR_NL &&
6905       (re->flags & PCRE_HASCRORLF) == 0 &&
6906         (md->nltype == NLTYPE_ANY ||
6907          md->nltype == NLTYPE_ANYCRLF ||
6908          md->nllen == 2))
6909     start_match++;
6910 
6911   md->mark = NULL;   /* Reset for start of next match attempt */
6912   }                  /* End of for(;;) "bumpalong" loop */
6913 
6914 /* ==========================================================================*/
6915 
6916 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
6917 conditions is true:
6918 
6919 (1) The pattern is anchored or the match was failed by (*COMMIT);
6920 
6921 (2) We are past the end of the subject;
6922 
6923 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
6924     this option requests that a match occur at or before the first newline in
6925     the subject.
6926 
6927 When we have a match and the offset vector is big enough to deal with any
6928 backreferences, captured substring offsets will already be set up. In the case
6929 where we had to get some local store to hold offsets for backreference
6930 processing, copy those that we can. In this case there need not be overflow if
6931 certain parts of the pattern were not used, even though there are more
6932 capturing parentheses than vector slots. */
6933 
6934 ENDLOOP:
6935 
6936 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
6937   {
6938   if (using_temporary_offsets)
6939     {
6940     if (arg_offset_max >= 4)
6941       {
6942       memcpy(offsets + 2, md->offset_vector + 2,
6943         (arg_offset_max - 2) * sizeof(int));
6944       DPRINTF(("Copied offsets from temporary memory\n"));
6945       }
6946     if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
6947     DPRINTF(("Freeing temporary memory\n"));
6948     (PUBL(free))(md->offset_vector);
6949     }
6950 
6951   /* Set the return code to the number of captured strings, or 0 if there were
6952   too many to fit into the vector. */
6953 
6954   rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
6955     0 : md->end_offset_top/2;
6956 
6957   /* If there is space in the offset vector, set any unused pairs at the end of
6958   the pattern to -1 for backwards compatibility. It is documented that this
6959   happens. In earlier versions, the whole set of potential capturing offsets
6960   was set to -1 each time round the loop, but this is handled differently now.
6961   "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
6962   those at the end that need unsetting here. We can't just unset them all at
6963   the start of the whole thing because they may get set in one branch that is
6964   not the final matching branch. */
6965 
6966   if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
6967     {
6968     register int *iptr, *iend;
6969     int resetcount = 2 + re->top_bracket * 2;
6970     if (resetcount > offsetcount) resetcount = offsetcount;
6971     iptr = offsets + md->end_offset_top;
6972     iend = offsets + resetcount;
6973     while (iptr < iend) *iptr++ = -1;
6974     }
6975 
6976   /* If there is space, set up the whole thing as substring 0. The value of
6977   md->start_match_ptr might be modified if \K was encountered on the success
6978   matching path. */
6979 
6980   if (offsetcount < 2) rc = 0; else
6981     {
6982     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
6983     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
6984     }
6985 
6986   /* Return MARK data if requested */
6987 
6988   if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6989     *(extra_data->mark) = (pcre_uchar *)md->mark;
6990   DPRINTF((">>>> returning %d\n", rc));
6991 #ifdef NO_RECURSE
6992   release_match_heapframes(&frame_zero);
6993 #endif
6994   return rc;
6995   }
6996 
6997 /* Control gets here if there has been an error, or if the overall match
6998 attempt has failed at all permitted starting positions. */
6999 
7000 if (using_temporary_offsets)
7001   {
7002   DPRINTF(("Freeing temporary memory\n"));
7003   (PUBL(free))(md->offset_vector);
7004   }
7005 
7006 /* For anything other than nomatch or partial match, just return the code. */
7007 
7008 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
7009   {
7010   DPRINTF((">>>> error: returning %d\n", rc));
7011 #ifdef NO_RECURSE
7012   release_match_heapframes(&frame_zero);
7013 #endif
7014   return rc;
7015   }
7016 
7017 /* Handle partial matches - disable any mark data */
7018 
7019 if (start_partial != NULL)
7020   {
7021   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
7022   md->mark = NULL;
7023   if (offsetcount > 1)
7024     {
7025     offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7026     offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7027     }
7028   rc = PCRE_ERROR_PARTIAL;
7029   }
7030 
7031 /* This is the classic nomatch case */
7032 
7033 else
7034   {
7035   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
7036   rc = PCRE_ERROR_NOMATCH;
7037   }
7038 
7039 /* Return the MARK data if it has been requested. */
7040 
7041 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7042   *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
7043 #ifdef NO_RECURSE
7044   release_match_heapframes(&frame_zero);
7045 #endif
7046 return rc;
7047 }
7048 
7049 /* End of pcre_exec.c */
7050