1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10           New API code Copyright (c) 2016-2022 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 /* This module contains functions that scan a compiled pattern and change
42 repeats into possessive repeats where possible. */
43 
44 
45 #ifdef HAVE_CONFIG_H
46 #include "config.h"
47 #endif
48 
49 
50 #include "pcre2_internal.h"
51 
52 
53 /*************************************************
54 *        Tables for auto-possessification        *
55 *************************************************/
56 
57 /* This table is used to check whether auto-possessification is possible
58 between adjacent character-type opcodes. The left-hand (repeated) opcode is
59 used to select the row, and the right-hand opcode is use to select the column.
60 A value of 1 means that auto-possessification is OK. For example, the second
61 value in the first row means that \D+\d can be turned into \D++\d.
62 
63 The Unicode property types (\P and \p) have to be present to fill out the table
64 because of what their opcode values are, but the table values should always be
65 zero because property types are handled separately in the code. The last four
66 columns apply to items that cannot be repeated, so there is no need to have
67 rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is
68 *not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
69 
70 #define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
71 #define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1)
72 
73 static const uint8_t autoposstab[APTROWS][APTCOLS] = {
74 /* \D \d \S \s \W \w  . .+ \C \P \p \R \H \h \V \v \X \Z \z  $ $M */
75   { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \D */
76   { 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \d */
77   { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \S */
78   { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \s */
79   { 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \W */
80   { 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \w */
81   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* .  */
82   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* .+ */
83   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \C */
84   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  /* \P */
85   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  /* \p */
86   { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 },  /* \R */
87   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 },  /* \H */
88   { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 },  /* \h */
89   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 },  /* \V */
90   { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 },  /* \v */
91   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }   /* \X */
92 };
93 
94 #ifdef SUPPORT_UNICODE
95 /* This table is used to check whether auto-possessification is possible
96 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The
97 left-hand (repeated) opcode is used to select the row, and the right-hand
98 opcode is used to select the column. The values are as follows:
99 
100   0   Always return FALSE (never auto-possessify)
101   1   Character groups are distinct (possessify if both are OP_PROP)
102   2   Check character categories in the same group (general or particular)
103   3   TRUE if the two opcodes are not the same (PROP vs NOTPROP)
104 
105   4   Check left general category vs right particular category
106   5   Check right general category vs left particular category
107 
108   6   Left alphanum vs right general category
109   7   Left space vs right general category
110   8   Left word vs right general category
111 
112   9   Right alphanum vs left general category
113  10   Right space vs left general category
114  11   Right word vs left general category
115 
116  12   Left alphanum vs right particular category
117  13   Left space vs right particular category
118  14   Left word vs right particular category
119 
120  15   Right alphanum vs left particular category
121  16   Right space vs left particular category
122  17   Right word vs left particular category
123 */
124 
125 static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
126 /* ANY LAMP GC  PC  SC  SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */
127   { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_ANY */
128   { 0,  3,  0,  0,  0,   0,    3,    1,      1,   0,    0,   0,    0,    0 },  /* PT_LAMP */
129   { 0,  0,  2,  4,  0,   0,    9,   10,     10,  11,    0,   0,    0,    0 },  /* PT_GC */
130   { 0,  0,  5,  2,  0,   0,   15,   16,     16,  17,    0,   0,    0,    0 },  /* PT_PC */
131   { 0,  0,  0,  0,  2,   2,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_SC */
132   { 0,  0,  0,  0,  2,   2,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_SCX */
133   { 0,  3,  6, 12,  0,   0,    3,    1,      1,   0,    0,   0,    0,    0 },  /* PT_ALNUM */
134   { 0,  1,  7, 13,  0,   0,    1,    3,      3,   1,    0,   0,    0,    0 },  /* PT_SPACE */
135   { 0,  1,  7, 13,  0,   0,    1,    3,      3,   1,    0,   0,    0,    0 },  /* PT_PXSPACE */
136   { 0,  0,  8, 14,  0,   0,    0,    1,      1,   3,    0,   0,    0,    0 },  /* PT_WORD */
137   { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_CLIST */
138   { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   3,    0,    0 },  /* PT_UCNC */
139   { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_BIDICL */
140   { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 }   /* PT_BOOL */
141 };
142 
143 /* This table is used to check whether auto-possessification is possible
144 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one
145 specifies a general category and the other specifies a particular category. The
146 row is selected by the general category and the column by the particular
147 category. The value is 1 if the particular category is not part of the general
148 category. */
149 
150 static const uint8_t catposstab[7][30] = {
151 /* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */
152   { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* C */
153   { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* L */
154   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* M */
155   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* N */
156   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },  /* P */
157   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 },  /* S */
158   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 }   /* Z */
159 };
160 
161 /* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against
162 a general or particular category. The properties in each row are those
163 that apply to the character set in question. Duplication means that a little
164 unnecessary work is done when checking, but this keeps things much simpler
165 because they can all use the same code. For more details see the comment where
166 this table is used.
167 
168 Note: SPACE and PXSPACE used to be different because Perl excluded VT from
169 "space", but from Perl 5.18 it's included, so both categories are treated the
170 same here. */
171 
172 static const uint8_t posspropstab[3][4] = {
173   { ucp_L, ucp_N, ucp_N, ucp_Nl },  /* ALNUM, 3rd and 4th values redundant */
174   { ucp_Z, ucp_Z, ucp_C, ucp_Cc },  /* SPACE and PXSPACE, 2nd value redundant */
175   { ucp_L, ucp_N, ucp_P, ucp_Po }   /* WORD */
176 };
177 #endif  /* SUPPORT_UNICODE */
178 
179 
180 
181 #ifdef SUPPORT_UNICODE
182 /*************************************************
183 *        Check a character and a property        *
184 *************************************************/
185 
186 /* This function is called by compare_opcodes() when a property item is
187 adjacent to a fixed character.
188 
189 Arguments:
190   c            the character
191   ptype        the property type
192   pdata        the data for the type
193   negated      TRUE if it's a negated property (\P or \p{^)
194 
195 Returns:       TRUE if auto-possessifying is OK
196 */
197 
198 static BOOL
check_char_prop(uint32_t c,unsigned int ptype,unsigned int pdata,BOOL negated)199 check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
200   BOOL negated)
201 {
202 BOOL ok;
203 const uint32_t *p;
204 const ucd_record *prop = GET_UCD(c);
205 
206 switch(ptype)
207   {
208   case PT_LAMP:
209   return (prop->chartype == ucp_Lu ||
210           prop->chartype == ucp_Ll ||
211           prop->chartype == ucp_Lt) == negated;
212 
213   case PT_GC:
214   return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
215 
216   case PT_PC:
217   return (pdata == prop->chartype) == negated;
218 
219   case PT_SC:
220   return (pdata == prop->script) == negated;
221 
222   case PT_SCX:
223   ok = (pdata == prop->script
224         || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
225   return ok == negated;
226 
227   /* These are specials */
228 
229   case PT_ALNUM:
230   return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
231           PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
232 
233   /* Perl space used to exclude VT, but from Perl 5.18 it is included, which
234   means that Perl space and POSIX space are now identical. PCRE was changed
235   at release 8.34. */
236 
237   case PT_SPACE:    /* Perl space */
238   case PT_PXSPACE:  /* POSIX space */
239   switch(c)
240     {
241     HSPACE_CASES:
242     VSPACE_CASES:
243     return negated;
244 
245     default:
246     return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
247     }
248   break;  /* Control never reaches here */
249 
250   case PT_WORD:
251   return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
252           PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
253           c == CHAR_UNDERSCORE) == negated;
254 
255   case PT_CLIST:
256   p = PRIV(ucd_caseless_sets) + prop->caseset;
257   for (;;)
258     {
259     if (c < *p) return !negated;
260     if (c == *p++) return negated;
261     }
262   break;  /* Control never reaches here */
263 
264   /* Haven't yet thought these through. */
265 
266   case PT_BIDICL:
267   return FALSE;
268 
269   case PT_BOOL:
270   return FALSE;
271   }
272 
273 return FALSE;
274 }
275 #endif  /* SUPPORT_UNICODE */
276 
277 
278 
279 /*************************************************
280 *        Base opcode of repeated opcodes         *
281 *************************************************/
282 
283 /* Returns the base opcode for repeated single character type opcodes. If the
284 opcode is not a repeated character type, it returns with the original value.
285 
286 Arguments:  c opcode
287 Returns:    base opcode for the type
288 */
289 
290 static PCRE2_UCHAR
get_repeat_base(PCRE2_UCHAR c)291 get_repeat_base(PCRE2_UCHAR c)
292 {
293 return (c > OP_TYPEPOSUPTO)? c :
294        (c >= OP_TYPESTAR)?   OP_TYPESTAR :
295        (c >= OP_NOTSTARI)?   OP_NOTSTARI :
296        (c >= OP_NOTSTAR)?    OP_NOTSTAR :
297        (c >= OP_STARI)?      OP_STARI :
298                              OP_STAR;
299 }
300 
301 
302 /*************************************************
303 *        Fill the character property list        *
304 *************************************************/
305 
306 /* Checks whether the code points to an opcode that can take part in auto-
307 possessification, and if so, fills a list with its properties.
308 
309 Arguments:
310   code        points to start of expression
311   utf         TRUE if in UTF mode
312   ucp         TRUE if in UCP mode
313   fcc         points to the case-flipping table
314   list        points to output list
315               list[0] will be filled with the opcode
316               list[1] will be non-zero if this opcode
317                 can match an empty character string
318               list[2..7] depends on the opcode
319 
320 Returns:      points to the start of the next opcode if *code is accepted
321               NULL if *code is not accepted
322 */
323 
324 static PCRE2_SPTR
get_chr_property_list(PCRE2_SPTR code,BOOL utf,BOOL ucp,const uint8_t * fcc,uint32_t * list)325 get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc,
326   uint32_t *list)
327 {
328 PCRE2_UCHAR c = *code;
329 PCRE2_UCHAR base;
330 PCRE2_SPTR end;
331 uint32_t chr;
332 
333 #ifdef SUPPORT_UNICODE
334 uint32_t *clist_dest;
335 const uint32_t *clist_src;
336 #else
337 (void)utf;    /* Suppress "unused parameter" compiler warnings */
338 (void)ucp;
339 #endif
340 
341 list[0] = c;
342 list[1] = FALSE;
343 code++;
344 
345 if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
346   {
347   base = get_repeat_base(c);
348   c -= (base - OP_STAR);
349 
350   if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
351     code += IMM2_SIZE;
352 
353   list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
354              c != OP_POSPLUS);
355 
356   switch(base)
357     {
358     case OP_STAR:
359     list[0] = OP_CHAR;
360     break;
361 
362     case OP_STARI:
363     list[0] = OP_CHARI;
364     break;
365 
366     case OP_NOTSTAR:
367     list[0] = OP_NOT;
368     break;
369 
370     case OP_NOTSTARI:
371     list[0] = OP_NOTI;
372     break;
373 
374     case OP_TYPESTAR:
375     list[0] = *code;
376     code++;
377     break;
378     }
379   c = list[0];
380   }
381 
382 switch(c)
383   {
384   case OP_NOT_DIGIT:
385   case OP_DIGIT:
386   case OP_NOT_WHITESPACE:
387   case OP_WHITESPACE:
388   case OP_NOT_WORDCHAR:
389   case OP_WORDCHAR:
390   case OP_ANY:
391   case OP_ALLANY:
392   case OP_ANYNL:
393   case OP_NOT_HSPACE:
394   case OP_HSPACE:
395   case OP_NOT_VSPACE:
396   case OP_VSPACE:
397   case OP_EXTUNI:
398   case OP_EODN:
399   case OP_EOD:
400   case OP_DOLL:
401   case OP_DOLLM:
402   return code;
403 
404   case OP_CHAR:
405   case OP_NOT:
406   GETCHARINCTEST(chr, code);
407   list[2] = chr;
408   list[3] = NOTACHAR;
409   return code;
410 
411   case OP_CHARI:
412   case OP_NOTI:
413   list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
414   GETCHARINCTEST(chr, code);
415   list[2] = chr;
416 
417 #ifdef SUPPORT_UNICODE
418   if (chr < 128 || (chr < 256 && !utf && !ucp))
419     list[3] = fcc[chr];
420   else
421     list[3] = UCD_OTHERCASE(chr);
422 #elif defined SUPPORT_WIDE_CHARS
423   list[3] = (chr < 256) ? fcc[chr] : chr;
424 #else
425   list[3] = fcc[chr];
426 #endif
427 
428   /* The othercase might be the same value. */
429 
430   if (chr == list[3])
431     list[3] = NOTACHAR;
432   else
433     list[4] = NOTACHAR;
434   return code;
435 
436 #ifdef SUPPORT_UNICODE
437   case OP_PROP:
438   case OP_NOTPROP:
439   if (code[0] != PT_CLIST)
440     {
441     list[2] = code[0];
442     list[3] = code[1];
443     return code + 2;
444     }
445 
446   /* Convert only if we have enough space. */
447 
448   clist_src = PRIV(ucd_caseless_sets) + code[1];
449   clist_dest = list + 2;
450   code += 2;
451 
452   do {
453      if (clist_dest >= list + 8)
454        {
455        /* Early return if there is not enough space. This should never
456        happen, since all clists are shorter than 5 character now. */
457        list[2] = code[0];
458        list[3] = code[1];
459        return code;
460        }
461      *clist_dest++ = *clist_src;
462      }
463   while(*clist_src++ != NOTACHAR);
464 
465   /* All characters are stored. The terminating NOTACHAR is copied from the
466   clist itself. */
467 
468   list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
469   return code;
470 #endif
471 
472   case OP_NCLASS:
473   case OP_CLASS:
474 #ifdef SUPPORT_WIDE_CHARS
475   case OP_XCLASS:
476   if (c == OP_XCLASS)
477     end = code + GET(code, 0) - 1;
478   else
479 #endif
480     end = code + 32 / sizeof(PCRE2_UCHAR);
481 
482   switch(*end)
483     {
484     case OP_CRSTAR:
485     case OP_CRMINSTAR:
486     case OP_CRQUERY:
487     case OP_CRMINQUERY:
488     case OP_CRPOSSTAR:
489     case OP_CRPOSQUERY:
490     list[1] = TRUE;
491     end++;
492     break;
493 
494     case OP_CRPLUS:
495     case OP_CRMINPLUS:
496     case OP_CRPOSPLUS:
497     end++;
498     break;
499 
500     case OP_CRRANGE:
501     case OP_CRMINRANGE:
502     case OP_CRPOSRANGE:
503     list[1] = (GET2(end, 1) == 0);
504     end += 1 + 2 * IMM2_SIZE;
505     break;
506     }
507   list[2] = (uint32_t)(end - code);
508   return end;
509   }
510 
511 return NULL;    /* Opcode not accepted */
512 }
513 
514 
515 
516 /*************************************************
517 *    Scan further character sets for match       *
518 *************************************************/
519 
520 /* Checks whether the base and the current opcode have a common character, in
521 which case the base cannot be possessified.
522 
523 Arguments:
524   code        points to the byte code
525   utf         TRUE in UTF mode
526   ucp         TRUE in UCP mode
527   cb          compile data block
528   base_list   the data list of the base opcode
529   base_end    the end of the base opcode
530   rec_limit   points to recursion depth counter
531 
532 Returns:      TRUE if the auto-possessification is possible
533 */
534 
535 static BOOL
compare_opcodes(PCRE2_SPTR code,BOOL utf,BOOL ucp,const compile_block * cb,const uint32_t * base_list,PCRE2_SPTR base_end,int * rec_limit)536 compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb,
537   const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
538 {
539 PCRE2_UCHAR c;
540 uint32_t list[8];
541 const uint32_t *chr_ptr;
542 const uint32_t *ochr_ptr;
543 const uint32_t *list_ptr;
544 PCRE2_SPTR next_code;
545 #ifdef SUPPORT_WIDE_CHARS
546 PCRE2_SPTR xclass_flags;
547 #endif
548 const uint8_t *class_bitset;
549 const uint8_t *set1, *set2, *set_end;
550 uint32_t chr;
551 BOOL accepted, invert_bits;
552 BOOL entered_a_group = FALSE;
553 
554 if (--(*rec_limit) <= 0) return FALSE;  /* Recursion has gone too deep */
555 
556 /* Note: the base_list[1] contains whether the current opcode has a greedy
557 (represented by a non-zero value) quantifier. This is a different from
558 other character type lists, which store here that the character iterator
559 matches to an empty string (also represented by a non-zero value). */
560 
561 for(;;)
562   {
563   PCRE2_SPTR bracode;
564 
565   /* All operations move the code pointer forward.
566   Therefore infinite recursions are not possible. */
567 
568   c = *code;
569 
570   /* Skip over callouts */
571 
572   if (c == OP_CALLOUT)
573     {
574     code += PRIV(OP_lengths)[c];
575     continue;
576     }
577 
578   if (c == OP_CALLOUT_STR)
579     {
580     code += GET(code, 1 + 2*LINK_SIZE);
581     continue;
582     }
583 
584   /* At the end of a branch, skip to the end of the group. */
585 
586   if (c == OP_ALT)
587     {
588     do code += GET(code, 1); while (*code == OP_ALT);
589     c = *code;
590     }
591 
592   /* Inspect the next opcode. */
593 
594   switch(c)
595     {
596     /* We can always possessify a greedy iterator at the end of the pattern,
597     which is reached after skipping over the final OP_KET. A non-greedy
598     iterator must never be possessified. */
599 
600     case OP_END:
601     return base_list[1] != 0;
602 
603     /* When an iterator is at the end of certain kinds of group we can inspect
604     what follows the group by skipping over the closing ket. Note that this
605     does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given
606     iteration is variable (could be another iteration or could be the next
607     item). As these two opcodes are not listed in the next switch, they will
608     end up as the next code to inspect, and return FALSE by virtue of being
609     unsupported. */
610 
611     case OP_KET:
612     case OP_KETRPOS:
613     /* The non-greedy case cannot be converted to a possessive form. */
614 
615     if (base_list[1] == 0) return FALSE;
616 
617     /* If the bracket is capturing it might be referenced by an OP_RECURSE
618     so its last iterator can never be possessified if the pattern contains
619     recursions. (This could be improved by keeping a list of group numbers that
620     are called by recursion.) */
621 
622     bracode = code - GET(code, 1);
623     switch(*bracode)
624       {
625       case OP_CBRA:
626       case OP_SCBRA:
627       case OP_CBRAPOS:
628       case OP_SCBRAPOS:
629       if (cb->had_recurse) return FALSE;
630       break;
631 
632       /* A script run might have to backtrack if the iterated item can match
633       characters from more than one script. So give up unless repeating an
634       explicit character. */
635 
636       case OP_SCRIPT_RUN:
637       if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI)
638         return FALSE;
639       break;
640 
641       /* Atomic sub-patterns and assertions can always auto-possessify their
642       last iterator except for variable length lookbehinds. However, if the
643       group was entered as a result of checking a previous iterator, this is
644       not possible. */
645 
646       case OP_ASSERT:
647       case OP_ASSERT_NOT:
648       case OP_ONCE:
649       return !entered_a_group;
650 
651       case OP_ASSERTBACK:
652       case OP_ASSERTBACK_NOT:
653       return (bracode[1+LINK_SIZE] == OP_VREVERSE)? FALSE : !entered_a_group;
654 
655       /* Non-atomic assertions - don't possessify last iterator. This needs
656       more thought. */
657 
658       case OP_ASSERT_NA:
659       case OP_ASSERTBACK_NA:
660       return FALSE;
661       }
662 
663     /* Skip over the bracket and inspect what comes next. */
664 
665     code += PRIV(OP_lengths)[c];
666     continue;
667 
668     /* Handle cases where the next item is a group. */
669 
670     case OP_ONCE:
671     case OP_BRA:
672     case OP_CBRA:
673     next_code = code + GET(code, 1);
674     code += PRIV(OP_lengths)[c];
675 
676     /* Check each branch. We have to recurse a level for all but the last
677     branch. */
678 
679     while (*next_code == OP_ALT)
680       {
681       if (!compare_opcodes(code, utf, ucp, cb, base_list, base_end, rec_limit))
682         return FALSE;
683       code = next_code + 1 + LINK_SIZE;
684       next_code += GET(next_code, 1);
685       }
686 
687     entered_a_group = TRUE;
688     continue;
689 
690     case OP_BRAZERO:
691     case OP_BRAMINZERO:
692 
693     next_code = code + 1;
694     if (*next_code != OP_BRA && *next_code != OP_CBRA &&
695         *next_code != OP_ONCE) return FALSE;
696 
697     do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
698 
699     /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
700 
701     next_code += 1 + LINK_SIZE;
702     if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end,
703          rec_limit))
704       return FALSE;
705 
706     code += PRIV(OP_lengths)[c];
707     continue;
708 
709     /* The next opcode does not need special handling; fall through and use it
710     to see if the base can be possessified. */
711 
712     default:
713     break;
714     }
715 
716   /* We now have the next appropriate opcode to compare with the base. Check
717   for a supported opcode, and load its properties. */
718 
719   code = get_chr_property_list(code, utf, ucp, cb->fcc, list);
720   if (code == NULL) return FALSE;    /* Unsupported */
721 
722   /* If either opcode is a small character list, set pointers for comparing
723   characters from that list with another list, or with a property. */
724 
725   if (base_list[0] == OP_CHAR)
726     {
727     chr_ptr = base_list + 2;
728     list_ptr = list;
729     }
730   else if (list[0] == OP_CHAR)
731     {
732     chr_ptr = list + 2;
733     list_ptr = base_list;
734     }
735 
736   /* Character bitsets can also be compared to certain opcodes. */
737 
738   else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
739 #if PCRE2_CODE_UNIT_WIDTH == 8
740       /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
741       || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
742 #endif
743       )
744     {
745 #if PCRE2_CODE_UNIT_WIDTH == 8
746     if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
747 #else
748     if (base_list[0] == OP_CLASS)
749 #endif
750       {
751       set1 = (uint8_t *)(base_end - base_list[2]);
752       list_ptr = list;
753       }
754     else
755       {
756       set1 = (uint8_t *)(code - list[2]);
757       list_ptr = base_list;
758       }
759 
760     invert_bits = FALSE;
761     switch(list_ptr[0])
762       {
763       case OP_CLASS:
764       case OP_NCLASS:
765       set2 = (uint8_t *)
766         ((list_ptr == list ? code : base_end) - list_ptr[2]);
767       break;
768 
769 #ifdef SUPPORT_WIDE_CHARS
770       case OP_XCLASS:
771       xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE;
772       if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
773       if ((*xclass_flags & XCL_MAP) == 0)
774         {
775         /* No bits are set for characters < 256. */
776         if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0;
777         /* Might be an empty repeat. */
778         continue;
779         }
780       set2 = (uint8_t *)(xclass_flags + 1);
781       break;
782 #endif
783 
784       case OP_NOT_DIGIT:
785       invert_bits = TRUE;
786       /* Fall through */
787       case OP_DIGIT:
788       set2 = (uint8_t *)(cb->cbits + cbit_digit);
789       break;
790 
791       case OP_NOT_WHITESPACE:
792       invert_bits = TRUE;
793       /* Fall through */
794       case OP_WHITESPACE:
795       set2 = (uint8_t *)(cb->cbits + cbit_space);
796       break;
797 
798       case OP_NOT_WORDCHAR:
799       invert_bits = TRUE;
800       /* Fall through */
801       case OP_WORDCHAR:
802       set2 = (uint8_t *)(cb->cbits + cbit_word);
803       break;
804 
805       default:
806       return FALSE;
807       }
808 
809     /* Because the bit sets are unaligned bytes, we need to perform byte
810     comparison here. */
811 
812     set_end = set1 + 32;
813     if (invert_bits)
814       {
815       do
816         {
817         if ((*set1++ & ~(*set2++)) != 0) return FALSE;
818         }
819       while (set1 < set_end);
820       }
821     else
822       {
823       do
824         {
825         if ((*set1++ & *set2++) != 0) return FALSE;
826         }
827       while (set1 < set_end);
828       }
829 
830     if (list[1] == 0) return TRUE;
831     /* Might be an empty repeat. */
832     continue;
833     }
834 
835   /* Some property combinations also acceptable. Unicode property opcodes are
836   processed specially; the rest can be handled with a lookup table. */
837 
838   else
839     {
840     uint32_t leftop, rightop;
841 
842     leftop = base_list[0];
843     rightop = list[0];
844 
845 #ifdef SUPPORT_UNICODE
846     accepted = FALSE; /* Always set in non-unicode case. */
847     if (leftop == OP_PROP || leftop == OP_NOTPROP)
848       {
849       if (rightop == OP_EOD)
850         accepted = TRUE;
851       else if (rightop == OP_PROP || rightop == OP_NOTPROP)
852         {
853         int n;
854         const uint8_t *p;
855         BOOL same = leftop == rightop;
856         BOOL lisprop = leftop == OP_PROP;
857         BOOL risprop = rightop == OP_PROP;
858         BOOL bothprop = lisprop && risprop;
859 
860         /* There's a table that specifies how each combination is to be
861         processed:
862           0   Always return FALSE (never auto-possessify)
863           1   Character groups are distinct (possessify if both are OP_PROP)
864           2   Check character categories in the same group (general or particular)
865           3   Return TRUE if the two opcodes are not the same
866           ... see comments below
867         */
868 
869         n = propposstab[base_list[2]][list[2]];
870         switch(n)
871           {
872           case 0: break;
873           case 1: accepted = bothprop; break;
874           case 2: accepted = (base_list[3] == list[3]) != same; break;
875           case 3: accepted = !same; break;
876 
877           case 4:  /* Left general category, right particular category */
878           accepted = risprop && catposstab[base_list[3]][list[3]] == same;
879           break;
880 
881           case 5:  /* Right general category, left particular category */
882           accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
883           break;
884 
885           /* This code is logically tricky. Think hard before fiddling with it.
886           The posspropstab table has four entries per row. Each row relates to
887           one of PCRE's special properties such as ALNUM or SPACE or WORD.
888           Only WORD actually needs all four entries, but using repeats for the
889           others means they can all use the same code below.
890 
891           The first two entries in each row are Unicode general categories, and
892           apply always, because all the characters they include are part of the
893           PCRE character set. The third and fourth entries are a general and a
894           particular category, respectively, that include one or more relevant
895           characters. One or the other is used, depending on whether the check
896           is for a general or a particular category. However, in both cases the
897           category contains more characters than the specials that are defined
898           for the property being tested against. Therefore, it cannot be used
899           in a NOTPROP case.
900 
901           Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
902           Underscore is covered by ucp_P or ucp_Po. */
903 
904           case 6:  /* Left alphanum vs right general category */
905           case 7:  /* Left space vs right general category */
906           case 8:  /* Left word vs right general category */
907           p = posspropstab[n-6];
908           accepted = risprop && lisprop ==
909             (list[3] != p[0] &&
910              list[3] != p[1] &&
911             (list[3] != p[2] || !lisprop));
912           break;
913 
914           case 9:   /* Right alphanum vs left general category */
915           case 10:  /* Right space vs left general category */
916           case 11:  /* Right word vs left general category */
917           p = posspropstab[n-9];
918           accepted = lisprop && risprop ==
919             (base_list[3] != p[0] &&
920              base_list[3] != p[1] &&
921             (base_list[3] != p[2] || !risprop));
922           break;
923 
924           case 12:  /* Left alphanum vs right particular category */
925           case 13:  /* Left space vs right particular category */
926           case 14:  /* Left word vs right particular category */
927           p = posspropstab[n-12];
928           accepted = risprop && lisprop ==
929             (catposstab[p[0]][list[3]] &&
930              catposstab[p[1]][list[3]] &&
931             (list[3] != p[3] || !lisprop));
932           break;
933 
934           case 15:  /* Right alphanum vs left particular category */
935           case 16:  /* Right space vs left particular category */
936           case 17:  /* Right word vs left particular category */
937           p = posspropstab[n-15];
938           accepted = lisprop && risprop ==
939             (catposstab[p[0]][base_list[3]] &&
940              catposstab[p[1]][base_list[3]] &&
941             (base_list[3] != p[3] || !risprop));
942           break;
943           }
944         }
945       }
946 
947     else
948 #endif  /* SUPPORT_UNICODE */
949 
950     accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
951            rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
952            autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
953 
954     if (!accepted) return FALSE;
955 
956     if (list[1] == 0) return TRUE;
957     /* Might be an empty repeat. */
958     continue;
959     }
960 
961   /* Control reaches here only if one of the items is a small character list.
962   All characters are checked against the other side. */
963 
964   do
965     {
966     chr = *chr_ptr;
967 
968     switch(list_ptr[0])
969       {
970       case OP_CHAR:
971       ochr_ptr = list_ptr + 2;
972       do
973         {
974         if (chr == *ochr_ptr) return FALSE;
975         ochr_ptr++;
976         }
977       while(*ochr_ptr != NOTACHAR);
978       break;
979 
980       case OP_NOT:
981       ochr_ptr = list_ptr + 2;
982       do
983         {
984         if (chr == *ochr_ptr)
985           break;
986         ochr_ptr++;
987         }
988       while(*ochr_ptr != NOTACHAR);
989       if (*ochr_ptr == NOTACHAR) return FALSE;   /* Not found */
990       break;
991 
992       /* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not*
993       set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
994 
995       case OP_DIGIT:
996       if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE;
997       break;
998 
999       case OP_NOT_DIGIT:
1000       if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE;
1001       break;
1002 
1003       case OP_WHITESPACE:
1004       if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE;
1005       break;
1006 
1007       case OP_NOT_WHITESPACE:
1008       if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE;
1009       break;
1010 
1011       case OP_WORDCHAR:
1012       if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE;
1013       break;
1014 
1015       case OP_NOT_WORDCHAR:
1016       if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE;
1017       break;
1018 
1019       case OP_HSPACE:
1020       switch(chr)
1021         {
1022         HSPACE_CASES: return FALSE;
1023         default: break;
1024         }
1025       break;
1026 
1027       case OP_NOT_HSPACE:
1028       switch(chr)
1029         {
1030         HSPACE_CASES: break;
1031         default: return FALSE;
1032         }
1033       break;
1034 
1035       case OP_ANYNL:
1036       case OP_VSPACE:
1037       switch(chr)
1038         {
1039         VSPACE_CASES: return FALSE;
1040         default: break;
1041         }
1042       break;
1043 
1044       case OP_NOT_VSPACE:
1045       switch(chr)
1046         {
1047         VSPACE_CASES: break;
1048         default: return FALSE;
1049         }
1050       break;
1051 
1052       case OP_DOLL:
1053       case OP_EODN:
1054       switch (chr)
1055         {
1056         case CHAR_CR:
1057         case CHAR_LF:
1058         case CHAR_VT:
1059         case CHAR_FF:
1060         case CHAR_NEL:
1061 #ifndef EBCDIC
1062         case 0x2028:
1063         case 0x2029:
1064 #endif  /* Not EBCDIC */
1065         return FALSE;
1066         }
1067       break;
1068 
1069       case OP_EOD:    /* Can always possessify before \z */
1070       break;
1071 
1072 #ifdef SUPPORT_UNICODE
1073       case OP_PROP:
1074       case OP_NOTPROP:
1075       if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
1076             list_ptr[0] == OP_NOTPROP))
1077         return FALSE;
1078       break;
1079 #endif
1080 
1081       case OP_NCLASS:
1082       if (chr > 255) return FALSE;
1083       /* Fall through */
1084 
1085       case OP_CLASS:
1086       if (chr > 255) break;
1087       class_bitset = (uint8_t *)
1088         ((list_ptr == list ? code : base_end) - list_ptr[2]);
1089       if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE;
1090       break;
1091 
1092 #ifdef SUPPORT_WIDE_CHARS
1093       case OP_XCLASS:
1094       if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
1095           list_ptr[2] + LINK_SIZE, utf)) return FALSE;
1096       break;
1097 #endif
1098 
1099       default:
1100       return FALSE;
1101       }
1102 
1103     chr_ptr++;
1104     }
1105   while(*chr_ptr != NOTACHAR);
1106 
1107   /* At least one character must be matched from this opcode. */
1108 
1109   if (list[1] == 0) return TRUE;
1110   }
1111 
1112 /* Control never reaches here. There used to be a fail-save return FALSE; here,
1113 but some compilers complain about an unreachable statement. */
1114 }
1115 
1116 
1117 
1118 /*************************************************
1119 *    Scan compiled regex for auto-possession     *
1120 *************************************************/
1121 
1122 /* Replaces single character iterations with their possessive alternatives
1123 if appropriate. This function modifies the compiled opcode! Hitting a
1124 non-existent opcode may indicate a bug in PCRE2, but it can also be caused if a
1125 bad UTF string was compiled with PCRE2_NO_UTF_CHECK. The rec_limit catches
1126 overly complicated or large patterns. In these cases, the check just stops,
1127 leaving the remainder of the pattern unpossessified.
1128 
1129 Arguments:
1130   code        points to start of the byte code
1131   cb          compile data block
1132 
1133 Returns:      0 for success
1134               -1 if a non-existant opcode is encountered
1135 */
1136 
1137 int
PRIV(auto_possessify)1138 PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb)
1139 {
1140 PCRE2_UCHAR c;
1141 PCRE2_SPTR end;
1142 PCRE2_UCHAR *repeat_opcode;
1143 uint32_t list[8];
1144 int rec_limit = 1000;  /* Was 10,000 but clang+ASAN uses a lot of stack. */
1145 BOOL utf = (cb->external_options & PCRE2_UTF) != 0;
1146 BOOL ucp = (cb->external_options & PCRE2_UCP) != 0;
1147 
1148 for (;;)
1149   {
1150   c = *code;
1151 
1152   if (c >= OP_TABLE_LENGTH) return -1;   /* Something gone wrong */
1153 
1154   if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
1155     {
1156     c -= get_repeat_base(c) - OP_STAR;
1157     end = (c <= OP_MINUPTO) ?
1158       get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL;
1159     list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
1160 
1161     if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
1162         &rec_limit))
1163       {
1164       switch(c)
1165         {
1166         case OP_STAR:
1167         *code += OP_POSSTAR - OP_STAR;
1168         break;
1169 
1170         case OP_MINSTAR:
1171         *code += OP_POSSTAR - OP_MINSTAR;
1172         break;
1173 
1174         case OP_PLUS:
1175         *code += OP_POSPLUS - OP_PLUS;
1176         break;
1177 
1178         case OP_MINPLUS:
1179         *code += OP_POSPLUS - OP_MINPLUS;
1180         break;
1181 
1182         case OP_QUERY:
1183         *code += OP_POSQUERY - OP_QUERY;
1184         break;
1185 
1186         case OP_MINQUERY:
1187         *code += OP_POSQUERY - OP_MINQUERY;
1188         break;
1189 
1190         case OP_UPTO:
1191         *code += OP_POSUPTO - OP_UPTO;
1192         break;
1193 
1194         case OP_MINUPTO:
1195         *code += OP_POSUPTO - OP_MINUPTO;
1196         break;
1197         }
1198       }
1199     c = *code;
1200     }
1201   else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS)
1202     {
1203 #ifdef SUPPORT_WIDE_CHARS
1204     if (c == OP_XCLASS)
1205       repeat_opcode = code + GET(code, 1);
1206     else
1207 #endif
1208       repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
1209 
1210     c = *repeat_opcode;
1211     if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
1212       {
1213       /* The return from get_chr_property_list() will never be NULL when
1214       *code (aka c) is one of the three class opcodes. However, gcc with
1215       -fanalyzer notes that a NULL return is possible, and grumbles. Hence we
1216       put in a check. */
1217 
1218       end = get_chr_property_list(code, utf, ucp, cb->fcc, list);
1219       list[1] = (c & 1) == 0;
1220 
1221       if (end != NULL &&
1222           compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
1223         {
1224         switch (c)
1225           {
1226           case OP_CRSTAR:
1227           case OP_CRMINSTAR:
1228           *repeat_opcode = OP_CRPOSSTAR;
1229           break;
1230 
1231           case OP_CRPLUS:
1232           case OP_CRMINPLUS:
1233           *repeat_opcode = OP_CRPOSPLUS;
1234           break;
1235 
1236           case OP_CRQUERY:
1237           case OP_CRMINQUERY:
1238           *repeat_opcode = OP_CRPOSQUERY;
1239           break;
1240 
1241           case OP_CRRANGE:
1242           case OP_CRMINRANGE:
1243           *repeat_opcode = OP_CRPOSRANGE;
1244           break;
1245           }
1246         }
1247       }
1248     c = *code;
1249     }
1250 
1251   switch(c)
1252     {
1253     case OP_END:
1254     return 0;
1255 
1256     case OP_TYPESTAR:
1257     case OP_TYPEMINSTAR:
1258     case OP_TYPEPLUS:
1259     case OP_TYPEMINPLUS:
1260     case OP_TYPEQUERY:
1261     case OP_TYPEMINQUERY:
1262     case OP_TYPEPOSSTAR:
1263     case OP_TYPEPOSPLUS:
1264     case OP_TYPEPOSQUERY:
1265     if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
1266     break;
1267 
1268     case OP_TYPEUPTO:
1269     case OP_TYPEMINUPTO:
1270     case OP_TYPEEXACT:
1271     case OP_TYPEPOSUPTO:
1272     if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
1273       code += 2;
1274     break;
1275 
1276     case OP_CALLOUT_STR:
1277     code += GET(code, 1 + 2*LINK_SIZE);
1278     break;
1279 
1280 #ifdef SUPPORT_WIDE_CHARS
1281     case OP_XCLASS:
1282     code += GET(code, 1);
1283     break;
1284 #endif
1285 
1286     case OP_MARK:
1287     case OP_COMMIT_ARG:
1288     case OP_PRUNE_ARG:
1289     case OP_SKIP_ARG:
1290     case OP_THEN_ARG:
1291     code += code[1];
1292     break;
1293     }
1294 
1295   /* Add in the fixed length from the table */
1296 
1297   code += PRIV(OP_lengths)[c];
1298 
1299   /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
1300   followed by a multi-byte character. The length in the table is a minimum, so
1301   we have to arrange to skip the extra code units. */
1302 
1303 #ifdef MAYBE_UTF_MULTI
1304   if (utf) switch(c)
1305     {
1306     case OP_CHAR:
1307     case OP_CHARI:
1308     case OP_NOT:
1309     case OP_NOTI:
1310     case OP_STAR:
1311     case OP_MINSTAR:
1312     case OP_PLUS:
1313     case OP_MINPLUS:
1314     case OP_QUERY:
1315     case OP_MINQUERY:
1316     case OP_UPTO:
1317     case OP_MINUPTO:
1318     case OP_EXACT:
1319     case OP_POSSTAR:
1320     case OP_POSPLUS:
1321     case OP_POSQUERY:
1322     case OP_POSUPTO:
1323     case OP_STARI:
1324     case OP_MINSTARI:
1325     case OP_PLUSI:
1326     case OP_MINPLUSI:
1327     case OP_QUERYI:
1328     case OP_MINQUERYI:
1329     case OP_UPTOI:
1330     case OP_MINUPTOI:
1331     case OP_EXACTI:
1332     case OP_POSSTARI:
1333     case OP_POSPLUSI:
1334     case OP_POSQUERYI:
1335     case OP_POSUPTOI:
1336     case OP_NOTSTAR:
1337     case OP_NOTMINSTAR:
1338     case OP_NOTPLUS:
1339     case OP_NOTMINPLUS:
1340     case OP_NOTQUERY:
1341     case OP_NOTMINQUERY:
1342     case OP_NOTUPTO:
1343     case OP_NOTMINUPTO:
1344     case OP_NOTEXACT:
1345     case OP_NOTPOSSTAR:
1346     case OP_NOTPOSPLUS:
1347     case OP_NOTPOSQUERY:
1348     case OP_NOTPOSUPTO:
1349     case OP_NOTSTARI:
1350     case OP_NOTMINSTARI:
1351     case OP_NOTPLUSI:
1352     case OP_NOTMINPLUSI:
1353     case OP_NOTQUERYI:
1354     case OP_NOTMINQUERYI:
1355     case OP_NOTUPTOI:
1356     case OP_NOTMINUPTOI:
1357     case OP_NOTEXACTI:
1358     case OP_NOTPOSSTARI:
1359     case OP_NOTPOSPLUSI:
1360     case OP_NOTPOSQUERYI:
1361     case OP_NOTPOSUPTOI:
1362     if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
1363     break;
1364     }
1365 #else
1366   (void)(utf);  /* Keep compiler happy by referencing function argument */
1367 #endif  /* SUPPORT_WIDE_CHARS */
1368   }
1369 }
1370 
1371 /* End of pcre2_auto_possess.c */
1372