1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10           New API code Copyright (c) 2016-2022 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 /* This module contains functions that scan a compiled pattern and change
42 repeats into possessive repeats where possible. */
43 
44 
45 #ifdef HAVE_CONFIG_H
46 #include "config.h"
47 #endif
48 
49 
50 #include "pcre2_internal.h"
51 
52 
53 /*************************************************
54 *        Tables for auto-possessification        *
55 *************************************************/
56 
57 /* This table is used to check whether auto-possessification is possible
58 between adjacent character-type opcodes. The left-hand (repeated) opcode is
59 used to select the row, and the right-hand opcode is use to select the column.
60 A value of 1 means that auto-possessification is OK. For example, the second
61 value in the first row means that \D+\d can be turned into \D++\d.
62 
63 The Unicode property types (\P and \p) have to be present to fill out the table
64 because of what their opcode values are, but the table values should always be
65 zero because property types are handled separately in the code. The last four
66 columns apply to items that cannot be repeated, so there is no need to have
67 rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is
68 *not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
69 
70 #define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
71 #define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1)
72 
73 static const uint8_t autoposstab[APTROWS][APTCOLS] = {
74 /* \D \d \S \s \W \w  . .+ \C \P \p \R \H \h \V \v \X \Z \z  $ $M */
75   { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \D */
76   { 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \d */
77   { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \S */
78   { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \s */
79   { 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \W */
80   { 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \w */
81   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* .  */
82   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* .+ */
83   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \C */
84   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  /* \P */
85   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  /* \p */
86   { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 },  /* \R */
87   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 },  /* \H */
88   { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 },  /* \h */
89   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 },  /* \V */
90   { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 },  /* \v */
91   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }   /* \X */
92 };
93 
94 #ifdef SUPPORT_UNICODE
95 /* This table is used to check whether auto-possessification is possible
96 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The
97 left-hand (repeated) opcode is used to select the row, and the right-hand
98 opcode is used to select the column. The values are as follows:
99 
100   0   Always return FALSE (never auto-possessify)
101   1   Character groups are distinct (possessify if both are OP_PROP)
102   2   Check character categories in the same group (general or particular)
103   3   TRUE if the two opcodes are not the same (PROP vs NOTPROP)
104 
105   4   Check left general category vs right particular category
106   5   Check right general category vs left particular category
107 
108   6   Left alphanum vs right general category
109   7   Left space vs right general category
110   8   Left word vs right general category
111 
112   9   Right alphanum vs left general category
113  10   Right space vs left general category
114  11   Right word vs left general category
115 
116  12   Left alphanum vs right particular category
117  13   Left space vs right particular category
118  14   Left word vs right particular category
119 
120  15   Right alphanum vs left particular category
121  16   Right space vs left particular category
122  17   Right word vs left particular category
123 */
124 
125 static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
126 /* ANY LAMP GC  PC  SC  SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */
127   { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_ANY */
128   { 0,  3,  0,  0,  0,   0,    3,    1,      1,   0,    0,   0,    0,    0 },  /* PT_LAMP */
129   { 0,  0,  2,  4,  0,   0,    9,   10,     10,  11,    0,   0,    0,    0 },  /* PT_GC */
130   { 0,  0,  5,  2,  0,   0,   15,   16,     16,  17,    0,   0,    0,    0 },  /* PT_PC */
131   { 0,  0,  0,  0,  2,   2,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_SC */
132   { 0,  0,  0,  0,  2,   2,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_SCX */
133   { 0,  3,  6, 12,  0,   0,    3,    1,      1,   0,    0,   0,    0,    0 },  /* PT_ALNUM */
134   { 0,  1,  7, 13,  0,   0,    1,    3,      3,   1,    0,   0,    0,    0 },  /* PT_SPACE */
135   { 0,  1,  7, 13,  0,   0,    1,    3,      3,   1,    0,   0,    0,    0 },  /* PT_PXSPACE */
136   { 0,  0,  8, 14,  0,   0,    0,    1,      1,   3,    0,   0,    0,    0 },  /* PT_WORD */
137   { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_CLIST */
138   { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   3,    0,    0 },  /* PT_UCNC */
139   { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_BIDICL */
140   { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 }   /* PT_BOOL */
141 };
142 
143 /* This table is used to check whether auto-possessification is possible
144 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one
145 specifies a general category and the other specifies a particular category. The
146 row is selected by the general category and the column by the particular
147 category. The value is 1 if the particular category is not part of the general
148 category. */
149 
150 static const uint8_t catposstab[7][30] = {
151 /* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */
152   { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* C */
153   { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* L */
154   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* M */
155   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* N */
156   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },  /* P */
157   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 },  /* S */
158   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 }   /* Z */
159 };
160 
161 /* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against
162 a general or particular category. The properties in each row are those
163 that apply to the character set in question. Duplication means that a little
164 unnecessary work is done when checking, but this keeps things much simpler
165 because they can all use the same code. For more details see the comment where
166 this table is used.
167 
168 Note: SPACE and PXSPACE used to be different because Perl excluded VT from
169 "space", but from Perl 5.18 it's included, so both categories are treated the
170 same here. */
171 
172 static const uint8_t posspropstab[3][4] = {
173   { ucp_L, ucp_N, ucp_N, ucp_Nl },  /* ALNUM, 3rd and 4th values redundant */
174   { ucp_Z, ucp_Z, ucp_C, ucp_Cc },  /* SPACE and PXSPACE, 2nd value redundant */
175   { ucp_L, ucp_N, ucp_P, ucp_Po }   /* WORD */
176 };
177 #endif  /* SUPPORT_UNICODE */
178 
179 
180 
181 #ifdef SUPPORT_UNICODE
182 /*************************************************
183 *        Check a character and a property        *
184 *************************************************/
185 
186 /* This function is called by compare_opcodes() when a property item is
187 adjacent to a fixed character.
188 
189 Arguments:
190   c            the character
191   ptype        the property type
192   pdata        the data for the type
193   negated      TRUE if it's a negated property (\P or \p{^)
194 
195 Returns:       TRUE if auto-possessifying is OK
196 */
197 
198 static BOOL
check_char_prop(uint32_t c,unsigned int ptype,unsigned int pdata,BOOL negated)199 check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
200   BOOL negated)
201 {
202 BOOL ok;
203 const uint32_t *p;
204 const ucd_record *prop = GET_UCD(c);
205 
206 switch(ptype)
207   {
208   case PT_LAMP:
209   return (prop->chartype == ucp_Lu ||
210           prop->chartype == ucp_Ll ||
211           prop->chartype == ucp_Lt) == negated;
212 
213   case PT_GC:
214   return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
215 
216   case PT_PC:
217   return (pdata == prop->chartype) == negated;
218 
219   case PT_SC:
220   return (pdata == prop->script) == negated;
221 
222   case PT_SCX:
223   ok = (pdata == prop->script
224         || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
225   return ok == negated;
226 
227   /* These are specials */
228 
229   case PT_ALNUM:
230   return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
231           PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
232 
233   /* Perl space used to exclude VT, but from Perl 5.18 it is included, which
234   means that Perl space and POSIX space are now identical. PCRE was changed
235   at release 8.34. */
236 
237   case PT_SPACE:    /* Perl space */
238   case PT_PXSPACE:  /* POSIX space */
239   switch(c)
240     {
241     HSPACE_CASES:
242     VSPACE_CASES:
243     return negated;
244 
245     default:
246     return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
247     }
248   break;  /* Control never reaches here */
249 
250   case PT_WORD:
251   return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
252           PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
253           c == CHAR_UNDERSCORE) == negated;
254 
255   case PT_CLIST:
256   p = PRIV(ucd_caseless_sets) + prop->caseset;
257   for (;;)
258     {
259     if (c < *p) return !negated;
260     if (c == *p++) return negated;
261     }
262   break;  /* Control never reaches here */
263 
264   /* Haven't yet thought these through. */
265 
266   case PT_BIDICL:
267   return FALSE;
268 
269   case PT_BOOL:
270   return FALSE;
271   }
272 
273 return FALSE;
274 }
275 #endif  /* SUPPORT_UNICODE */
276 
277 
278 
279 /*************************************************
280 *        Base opcode of repeated opcodes         *
281 *************************************************/
282 
283 /* Returns the base opcode for repeated single character type opcodes. If the
284 opcode is not a repeated character type, it returns with the original value.
285 
286 Arguments:  c opcode
287 Returns:    base opcode for the type
288 */
289 
290 static PCRE2_UCHAR
get_repeat_base(PCRE2_UCHAR c)291 get_repeat_base(PCRE2_UCHAR c)
292 {
293 return (c > OP_TYPEPOSUPTO)? c :
294        (c >= OP_TYPESTAR)?   OP_TYPESTAR :
295        (c >= OP_NOTSTARI)?   OP_NOTSTARI :
296        (c >= OP_NOTSTAR)?    OP_NOTSTAR :
297        (c >= OP_STARI)?      OP_STARI :
298                              OP_STAR;
299 }
300 
301 
302 /*************************************************
303 *        Fill the character property list        *
304 *************************************************/
305 
306 /* Checks whether the code points to an opcode that can take part in auto-
307 possessification, and if so, fills a list with its properties.
308 
309 Arguments:
310   code        points to start of expression
311   utf         TRUE if in UTF mode
312   ucp         TRUE if in UCP mode
313   fcc         points to the case-flipping table
314   list        points to output list
315               list[0] will be filled with the opcode
316               list[1] will be non-zero if this opcode
317                 can match an empty character string
318               list[2..7] depends on the opcode
319 
320 Returns:      points to the start of the next opcode if *code is accepted
321               NULL if *code is not accepted
322 */
323 
324 static PCRE2_SPTR
get_chr_property_list(PCRE2_SPTR code,BOOL utf,BOOL ucp,const uint8_t * fcc,uint32_t * list)325 get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc,
326   uint32_t *list)
327 {
328 PCRE2_UCHAR c = *code;
329 PCRE2_UCHAR base;
330 PCRE2_SPTR end;
331 uint32_t chr;
332 
333 #ifdef SUPPORT_UNICODE
334 uint32_t *clist_dest;
335 const uint32_t *clist_src;
336 #else
337 (void)utf;    /* Suppress "unused parameter" compiler warnings */
338 (void)ucp;
339 #endif
340 
341 list[0] = c;
342 list[1] = FALSE;
343 code++;
344 
345 if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
346   {
347   base = get_repeat_base(c);
348   c -= (base - OP_STAR);
349 
350   if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
351     code += IMM2_SIZE;
352 
353   list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
354              c != OP_POSPLUS);
355 
356   switch(base)
357     {
358     case OP_STAR:
359     list[0] = OP_CHAR;
360     break;
361 
362     case OP_STARI:
363     list[0] = OP_CHARI;
364     break;
365 
366     case OP_NOTSTAR:
367     list[0] = OP_NOT;
368     break;
369 
370     case OP_NOTSTARI:
371     list[0] = OP_NOTI;
372     break;
373 
374     case OP_TYPESTAR:
375     list[0] = *code;
376     code++;
377     break;
378     }
379   c = list[0];
380   }
381 
382 switch(c)
383   {
384   case OP_NOT_DIGIT:
385   case OP_DIGIT:
386   case OP_NOT_WHITESPACE:
387   case OP_WHITESPACE:
388   case OP_NOT_WORDCHAR:
389   case OP_WORDCHAR:
390   case OP_ANY:
391   case OP_ALLANY:
392   case OP_ANYNL:
393   case OP_NOT_HSPACE:
394   case OP_HSPACE:
395   case OP_NOT_VSPACE:
396   case OP_VSPACE:
397   case OP_EXTUNI:
398   case OP_EODN:
399   case OP_EOD:
400   case OP_DOLL:
401   case OP_DOLLM:
402   return code;
403 
404   case OP_CHAR:
405   case OP_NOT:
406   GETCHARINCTEST(chr, code);
407   list[2] = chr;
408   list[3] = NOTACHAR;
409   return code;
410 
411   case OP_CHARI:
412   case OP_NOTI:
413   list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
414   GETCHARINCTEST(chr, code);
415   list[2] = chr;
416 
417 #ifdef SUPPORT_UNICODE
418   if (chr < 128 || (chr < 256 && !utf && !ucp))
419     list[3] = fcc[chr];
420   else
421     list[3] = UCD_OTHERCASE(chr);
422 #elif defined SUPPORT_WIDE_CHARS
423   list[3] = (chr < 256) ? fcc[chr] : chr;
424 #else
425   list[3] = fcc[chr];
426 #endif
427 
428   /* The othercase might be the same value. */
429 
430   if (chr == list[3])
431     list[3] = NOTACHAR;
432   else
433     list[4] = NOTACHAR;
434   return code;
435 
436 #ifdef SUPPORT_UNICODE
437   case OP_PROP:
438   case OP_NOTPROP:
439   if (code[0] != PT_CLIST)
440     {
441     list[2] = code[0];
442     list[3] = code[1];
443     return code + 2;
444     }
445 
446   /* Convert only if we have enough space. */
447 
448   clist_src = PRIV(ucd_caseless_sets) + code[1];
449   clist_dest = list + 2;
450   code += 2;
451 
452   do {
453      if (clist_dest >= list + 8)
454        {
455        /* Early return if there is not enough space. This should never
456        happen, since all clists are shorter than 5 character now. */
457        list[2] = code[0];
458        list[3] = code[1];
459        return code;
460        }
461      *clist_dest++ = *clist_src;
462      }
463   while(*clist_src++ != NOTACHAR);
464 
465   /* All characters are stored. The terminating NOTACHAR is copied from the
466   clist itself. */
467 
468   list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
469   return code;
470 #endif
471 
472   case OP_NCLASS:
473   case OP_CLASS:
474 #ifdef SUPPORT_WIDE_CHARS
475   case OP_XCLASS:
476   if (c == OP_XCLASS)
477     end = code + GET(code, 0) - 1;
478   else
479 #endif
480     end = code + 32 / sizeof(PCRE2_UCHAR);
481 
482   switch(*end)
483     {
484     case OP_CRSTAR:
485     case OP_CRMINSTAR:
486     case OP_CRQUERY:
487     case OP_CRMINQUERY:
488     case OP_CRPOSSTAR:
489     case OP_CRPOSQUERY:
490     list[1] = TRUE;
491     end++;
492     break;
493 
494     case OP_CRPLUS:
495     case OP_CRMINPLUS:
496     case OP_CRPOSPLUS:
497     end++;
498     break;
499 
500     case OP_CRRANGE:
501     case OP_CRMINRANGE:
502     case OP_CRPOSRANGE:
503     list[1] = (GET2(end, 1) == 0);
504     end += 1 + 2 * IMM2_SIZE;
505     break;
506     }
507   list[2] = (uint32_t)(end - code);
508   return end;
509   }
510 
511 return NULL;    /* Opcode not accepted */
512 }
513 
514 
515 
516 /*************************************************
517 *    Scan further character sets for match       *
518 *************************************************/
519 
520 /* Checks whether the base and the current opcode have a common character, in
521 which case the base cannot be possessified.
522 
523 Arguments:
524   code        points to the byte code
525   utf         TRUE in UTF mode
526   ucp         TRUE in UCP mode
527   cb          compile data block
528   base_list   the data list of the base opcode
529   base_end    the end of the base opcode
530   rec_limit   points to recursion depth counter
531 
532 Returns:      TRUE if the auto-possessification is possible
533 */
534 
535 static BOOL
compare_opcodes(PCRE2_SPTR code,BOOL utf,BOOL ucp,const compile_block * cb,const uint32_t * base_list,PCRE2_SPTR base_end,int * rec_limit)536 compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb,
537   const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
538 {
539 PCRE2_UCHAR c;
540 uint32_t list[8];
541 const uint32_t *chr_ptr;
542 const uint32_t *ochr_ptr;
543 const uint32_t *list_ptr;
544 PCRE2_SPTR next_code;
545 #ifdef SUPPORT_WIDE_CHARS
546 PCRE2_SPTR xclass_flags;
547 #endif
548 const uint8_t *class_bitset;
549 const uint8_t *set1, *set2, *set_end;
550 uint32_t chr;
551 BOOL accepted, invert_bits;
552 BOOL entered_a_group = FALSE;
553 
554 if (--(*rec_limit) <= 0) return FALSE;  /* Recursion has gone too deep */
555 
556 /* Note: the base_list[1] contains whether the current opcode has a greedy
557 (represented by a non-zero value) quantifier. This is a different from
558 other character type lists, which store here that the character iterator
559 matches to an empty string (also represented by a non-zero value). */
560 
561 for(;;)
562   {
563   /* All operations move the code pointer forward.
564   Therefore infinite recursions are not possible. */
565 
566   c = *code;
567 
568   /* Skip over callouts */
569 
570   if (c == OP_CALLOUT)
571     {
572     code += PRIV(OP_lengths)[c];
573     continue;
574     }
575 
576   if (c == OP_CALLOUT_STR)
577     {
578     code += GET(code, 1 + 2*LINK_SIZE);
579     continue;
580     }
581 
582   /* At the end of a branch, skip to the end of the group. */
583 
584   if (c == OP_ALT)
585     {
586     do code += GET(code, 1); while (*code == OP_ALT);
587     c = *code;
588     }
589 
590   /* Inspect the next opcode. */
591 
592   switch(c)
593     {
594     /* We can always possessify a greedy iterator at the end of the pattern,
595     which is reached after skipping over the final OP_KET. A non-greedy
596     iterator must never be possessified. */
597 
598     case OP_END:
599     return base_list[1] != 0;
600 
601     /* When an iterator is at the end of certain kinds of group we can inspect
602     what follows the group by skipping over the closing ket. Note that this
603     does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given
604     iteration is variable (could be another iteration or could be the next
605     item). As these two opcodes are not listed in the next switch, they will
606     end up as the next code to inspect, and return FALSE by virtue of being
607     unsupported. */
608 
609     case OP_KET:
610     case OP_KETRPOS:
611     /* The non-greedy case cannot be converted to a possessive form. */
612 
613     if (base_list[1] == 0) return FALSE;
614 
615     /* If the bracket is capturing it might be referenced by an OP_RECURSE
616     so its last iterator can never be possessified if the pattern contains
617     recursions. (This could be improved by keeping a list of group numbers that
618     are called by recursion.) */
619 
620     switch(*(code - GET(code, 1)))
621       {
622       case OP_CBRA:
623       case OP_SCBRA:
624       case OP_CBRAPOS:
625       case OP_SCBRAPOS:
626       if (cb->had_recurse) return FALSE;
627       break;
628 
629       /* A script run might have to backtrack if the iterated item can match
630       characters from more than one script. So give up unless repeating an
631       explicit character. */
632 
633       case OP_SCRIPT_RUN:
634       if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI)
635         return FALSE;
636       break;
637 
638       /* Atomic sub-patterns and assertions can always auto-possessify their
639       last iterator. However, if the group was entered as a result of checking
640       a previous iterator, this is not possible. */
641 
642       case OP_ASSERT:
643       case OP_ASSERT_NOT:
644       case OP_ASSERTBACK:
645       case OP_ASSERTBACK_NOT:
646       case OP_ONCE:
647       return !entered_a_group;
648 
649       /* Non-atomic assertions - don't possessify last iterator. This needs
650       more thought. */
651 
652       case OP_ASSERT_NA:
653       case OP_ASSERTBACK_NA:
654       return FALSE;
655       }
656 
657     /* Skip over the bracket and inspect what comes next. */
658 
659     code += PRIV(OP_lengths)[c];
660     continue;
661 
662     /* Handle cases where the next item is a group. */
663 
664     case OP_ONCE:
665     case OP_BRA:
666     case OP_CBRA:
667     next_code = code + GET(code, 1);
668     code += PRIV(OP_lengths)[c];
669 
670     /* Check each branch. We have to recurse a level for all but the last
671     branch. */
672 
673     while (*next_code == OP_ALT)
674       {
675       if (!compare_opcodes(code, utf, ucp, cb, base_list, base_end, rec_limit))
676         return FALSE;
677       code = next_code + 1 + LINK_SIZE;
678       next_code += GET(next_code, 1);
679       }
680 
681     entered_a_group = TRUE;
682     continue;
683 
684     case OP_BRAZERO:
685     case OP_BRAMINZERO:
686 
687     next_code = code + 1;
688     if (*next_code != OP_BRA && *next_code != OP_CBRA &&
689         *next_code != OP_ONCE) return FALSE;
690 
691     do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
692 
693     /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
694 
695     next_code += 1 + LINK_SIZE;
696     if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end,
697          rec_limit))
698       return FALSE;
699 
700     code += PRIV(OP_lengths)[c];
701     continue;
702 
703     /* The next opcode does not need special handling; fall through and use it
704     to see if the base can be possessified. */
705 
706     default:
707     break;
708     }
709 
710   /* We now have the next appropriate opcode to compare with the base. Check
711   for a supported opcode, and load its properties. */
712 
713   code = get_chr_property_list(code, utf, ucp, cb->fcc, list);
714   if (code == NULL) return FALSE;    /* Unsupported */
715 
716   /* If either opcode is a small character list, set pointers for comparing
717   characters from that list with another list, or with a property. */
718 
719   if (base_list[0] == OP_CHAR)
720     {
721     chr_ptr = base_list + 2;
722     list_ptr = list;
723     }
724   else if (list[0] == OP_CHAR)
725     {
726     chr_ptr = list + 2;
727     list_ptr = base_list;
728     }
729 
730   /* Character bitsets can also be compared to certain opcodes. */
731 
732   else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
733 #if PCRE2_CODE_UNIT_WIDTH == 8
734       /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
735       || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
736 #endif
737       )
738     {
739 #if PCRE2_CODE_UNIT_WIDTH == 8
740     if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
741 #else
742     if (base_list[0] == OP_CLASS)
743 #endif
744       {
745       set1 = (uint8_t *)(base_end - base_list[2]);
746       list_ptr = list;
747       }
748     else
749       {
750       set1 = (uint8_t *)(code - list[2]);
751       list_ptr = base_list;
752       }
753 
754     invert_bits = FALSE;
755     switch(list_ptr[0])
756       {
757       case OP_CLASS:
758       case OP_NCLASS:
759       set2 = (uint8_t *)
760         ((list_ptr == list ? code : base_end) - list_ptr[2]);
761       break;
762 
763 #ifdef SUPPORT_WIDE_CHARS
764       case OP_XCLASS:
765       xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE;
766       if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
767       if ((*xclass_flags & XCL_MAP) == 0)
768         {
769         /* No bits are set for characters < 256. */
770         if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0;
771         /* Might be an empty repeat. */
772         continue;
773         }
774       set2 = (uint8_t *)(xclass_flags + 1);
775       break;
776 #endif
777 
778       case OP_NOT_DIGIT:
779       invert_bits = TRUE;
780       /* Fall through */
781       case OP_DIGIT:
782       set2 = (uint8_t *)(cb->cbits + cbit_digit);
783       break;
784 
785       case OP_NOT_WHITESPACE:
786       invert_bits = TRUE;
787       /* Fall through */
788       case OP_WHITESPACE:
789       set2 = (uint8_t *)(cb->cbits + cbit_space);
790       break;
791 
792       case OP_NOT_WORDCHAR:
793       invert_bits = TRUE;
794       /* Fall through */
795       case OP_WORDCHAR:
796       set2 = (uint8_t *)(cb->cbits + cbit_word);
797       break;
798 
799       default:
800       return FALSE;
801       }
802 
803     /* Because the bit sets are unaligned bytes, we need to perform byte
804     comparison here. */
805 
806     set_end = set1 + 32;
807     if (invert_bits)
808       {
809       do
810         {
811         if ((*set1++ & ~(*set2++)) != 0) return FALSE;
812         }
813       while (set1 < set_end);
814       }
815     else
816       {
817       do
818         {
819         if ((*set1++ & *set2++) != 0) return FALSE;
820         }
821       while (set1 < set_end);
822       }
823 
824     if (list[1] == 0) return TRUE;
825     /* Might be an empty repeat. */
826     continue;
827     }
828 
829   /* Some property combinations also acceptable. Unicode property opcodes are
830   processed specially; the rest can be handled with a lookup table. */
831 
832   else
833     {
834     uint32_t leftop, rightop;
835 
836     leftop = base_list[0];
837     rightop = list[0];
838 
839 #ifdef SUPPORT_UNICODE
840     accepted = FALSE; /* Always set in non-unicode case. */
841     if (leftop == OP_PROP || leftop == OP_NOTPROP)
842       {
843       if (rightop == OP_EOD)
844         accepted = TRUE;
845       else if (rightop == OP_PROP || rightop == OP_NOTPROP)
846         {
847         int n;
848         const uint8_t *p;
849         BOOL same = leftop == rightop;
850         BOOL lisprop = leftop == OP_PROP;
851         BOOL risprop = rightop == OP_PROP;
852         BOOL bothprop = lisprop && risprop;
853 
854         /* There's a table that specifies how each combination is to be
855         processed:
856           0   Always return FALSE (never auto-possessify)
857           1   Character groups are distinct (possessify if both are OP_PROP)
858           2   Check character categories in the same group (general or particular)
859           3   Return TRUE if the two opcodes are not the same
860           ... see comments below
861         */
862 
863         n = propposstab[base_list[2]][list[2]];
864         switch(n)
865           {
866           case 0: break;
867           case 1: accepted = bothprop; break;
868           case 2: accepted = (base_list[3] == list[3]) != same; break;
869           case 3: accepted = !same; break;
870 
871           case 4:  /* Left general category, right particular category */
872           accepted = risprop && catposstab[base_list[3]][list[3]] == same;
873           break;
874 
875           case 5:  /* Right general category, left particular category */
876           accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
877           break;
878 
879           /* This code is logically tricky. Think hard before fiddling with it.
880           The posspropstab table has four entries per row. Each row relates to
881           one of PCRE's special properties such as ALNUM or SPACE or WORD.
882           Only WORD actually needs all four entries, but using repeats for the
883           others means they can all use the same code below.
884 
885           The first two entries in each row are Unicode general categories, and
886           apply always, because all the characters they include are part of the
887           PCRE character set. The third and fourth entries are a general and a
888           particular category, respectively, that include one or more relevant
889           characters. One or the other is used, depending on whether the check
890           is for a general or a particular category. However, in both cases the
891           category contains more characters than the specials that are defined
892           for the property being tested against. Therefore, it cannot be used
893           in a NOTPROP case.
894 
895           Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
896           Underscore is covered by ucp_P or ucp_Po. */
897 
898           case 6:  /* Left alphanum vs right general category */
899           case 7:  /* Left space vs right general category */
900           case 8:  /* Left word vs right general category */
901           p = posspropstab[n-6];
902           accepted = risprop && lisprop ==
903             (list[3] != p[0] &&
904              list[3] != p[1] &&
905             (list[3] != p[2] || !lisprop));
906           break;
907 
908           case 9:   /* Right alphanum vs left general category */
909           case 10:  /* Right space vs left general category */
910           case 11:  /* Right word vs left general category */
911           p = posspropstab[n-9];
912           accepted = lisprop && risprop ==
913             (base_list[3] != p[0] &&
914              base_list[3] != p[1] &&
915             (base_list[3] != p[2] || !risprop));
916           break;
917 
918           case 12:  /* Left alphanum vs right particular category */
919           case 13:  /* Left space vs right particular category */
920           case 14:  /* Left word vs right particular category */
921           p = posspropstab[n-12];
922           accepted = risprop && lisprop ==
923             (catposstab[p[0]][list[3]] &&
924              catposstab[p[1]][list[3]] &&
925             (list[3] != p[3] || !lisprop));
926           break;
927 
928           case 15:  /* Right alphanum vs left particular category */
929           case 16:  /* Right space vs left particular category */
930           case 17:  /* Right word vs left particular category */
931           p = posspropstab[n-15];
932           accepted = lisprop && risprop ==
933             (catposstab[p[0]][base_list[3]] &&
934              catposstab[p[1]][base_list[3]] &&
935             (base_list[3] != p[3] || !risprop));
936           break;
937           }
938         }
939       }
940 
941     else
942 #endif  /* SUPPORT_UNICODE */
943 
944     accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
945            rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
946            autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
947 
948     if (!accepted) return FALSE;
949 
950     if (list[1] == 0) return TRUE;
951     /* Might be an empty repeat. */
952     continue;
953     }
954 
955   /* Control reaches here only if one of the items is a small character list.
956   All characters are checked against the other side. */
957 
958   do
959     {
960     chr = *chr_ptr;
961 
962     switch(list_ptr[0])
963       {
964       case OP_CHAR:
965       ochr_ptr = list_ptr + 2;
966       do
967         {
968         if (chr == *ochr_ptr) return FALSE;
969         ochr_ptr++;
970         }
971       while(*ochr_ptr != NOTACHAR);
972       break;
973 
974       case OP_NOT:
975       ochr_ptr = list_ptr + 2;
976       do
977         {
978         if (chr == *ochr_ptr)
979           break;
980         ochr_ptr++;
981         }
982       while(*ochr_ptr != NOTACHAR);
983       if (*ochr_ptr == NOTACHAR) return FALSE;   /* Not found */
984       break;
985 
986       /* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not*
987       set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
988 
989       case OP_DIGIT:
990       if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE;
991       break;
992 
993       case OP_NOT_DIGIT:
994       if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE;
995       break;
996 
997       case OP_WHITESPACE:
998       if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE;
999       break;
1000 
1001       case OP_NOT_WHITESPACE:
1002       if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE;
1003       break;
1004 
1005       case OP_WORDCHAR:
1006       if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE;
1007       break;
1008 
1009       case OP_NOT_WORDCHAR:
1010       if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE;
1011       break;
1012 
1013       case OP_HSPACE:
1014       switch(chr)
1015         {
1016         HSPACE_CASES: return FALSE;
1017         default: break;
1018         }
1019       break;
1020 
1021       case OP_NOT_HSPACE:
1022       switch(chr)
1023         {
1024         HSPACE_CASES: break;
1025         default: return FALSE;
1026         }
1027       break;
1028 
1029       case OP_ANYNL:
1030       case OP_VSPACE:
1031       switch(chr)
1032         {
1033         VSPACE_CASES: return FALSE;
1034         default: break;
1035         }
1036       break;
1037 
1038       case OP_NOT_VSPACE:
1039       switch(chr)
1040         {
1041         VSPACE_CASES: break;
1042         default: return FALSE;
1043         }
1044       break;
1045 
1046       case OP_DOLL:
1047       case OP_EODN:
1048       switch (chr)
1049         {
1050         case CHAR_CR:
1051         case CHAR_LF:
1052         case CHAR_VT:
1053         case CHAR_FF:
1054         case CHAR_NEL:
1055 #ifndef EBCDIC
1056         case 0x2028:
1057         case 0x2029:
1058 #endif  /* Not EBCDIC */
1059         return FALSE;
1060         }
1061       break;
1062 
1063       case OP_EOD:    /* Can always possessify before \z */
1064       break;
1065 
1066 #ifdef SUPPORT_UNICODE
1067       case OP_PROP:
1068       case OP_NOTPROP:
1069       if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
1070             list_ptr[0] == OP_NOTPROP))
1071         return FALSE;
1072       break;
1073 #endif
1074 
1075       case OP_NCLASS:
1076       if (chr > 255) return FALSE;
1077       /* Fall through */
1078 
1079       case OP_CLASS:
1080       if (chr > 255) break;
1081       class_bitset = (uint8_t *)
1082         ((list_ptr == list ? code : base_end) - list_ptr[2]);
1083       if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE;
1084       break;
1085 
1086 #ifdef SUPPORT_WIDE_CHARS
1087       case OP_XCLASS:
1088       if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
1089           list_ptr[2] + LINK_SIZE, utf)) return FALSE;
1090       break;
1091 #endif
1092 
1093       default:
1094       return FALSE;
1095       }
1096 
1097     chr_ptr++;
1098     }
1099   while(*chr_ptr != NOTACHAR);
1100 
1101   /* At least one character must be matched from this opcode. */
1102 
1103   if (list[1] == 0) return TRUE;
1104   }
1105 
1106 /* Control never reaches here. There used to be a fail-save return FALSE; here,
1107 but some compilers complain about an unreachable statement. */
1108 }
1109 
1110 
1111 
1112 /*************************************************
1113 *    Scan compiled regex for auto-possession     *
1114 *************************************************/
1115 
1116 /* Replaces single character iterations with their possessive alternatives
1117 if appropriate. This function modifies the compiled opcode! Hitting a
1118 non-existent opcode may indicate a bug in PCRE2, but it can also be caused if a
1119 bad UTF string was compiled with PCRE2_NO_UTF_CHECK. The rec_limit catches
1120 overly complicated or large patterns. In these cases, the check just stops,
1121 leaving the remainder of the pattern unpossessified.
1122 
1123 Arguments:
1124   code        points to start of the byte code
1125   cb          compile data block
1126 
1127 Returns:      0 for success
1128               -1 if a non-existant opcode is encountered
1129 */
1130 
1131 int
PRIV(auto_possessify)1132 PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb)
1133 {
1134 PCRE2_UCHAR c;
1135 PCRE2_SPTR end;
1136 PCRE2_UCHAR *repeat_opcode;
1137 uint32_t list[8];
1138 int rec_limit = 1000;  /* Was 10,000 but clang+ASAN uses a lot of stack. */
1139 BOOL utf = (cb->external_options & PCRE2_UTF) != 0;
1140 BOOL ucp = (cb->external_options & PCRE2_UCP) != 0;
1141 
1142 for (;;)
1143   {
1144   c = *code;
1145 
1146   if (c >= OP_TABLE_LENGTH) return -1;   /* Something gone wrong */
1147 
1148   if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
1149     {
1150     c -= get_repeat_base(c) - OP_STAR;
1151     end = (c <= OP_MINUPTO) ?
1152       get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL;
1153     list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
1154 
1155     if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
1156         &rec_limit))
1157       {
1158       switch(c)
1159         {
1160         case OP_STAR:
1161         *code += OP_POSSTAR - OP_STAR;
1162         break;
1163 
1164         case OP_MINSTAR:
1165         *code += OP_POSSTAR - OP_MINSTAR;
1166         break;
1167 
1168         case OP_PLUS:
1169         *code += OP_POSPLUS - OP_PLUS;
1170         break;
1171 
1172         case OP_MINPLUS:
1173         *code += OP_POSPLUS - OP_MINPLUS;
1174         break;
1175 
1176         case OP_QUERY:
1177         *code += OP_POSQUERY - OP_QUERY;
1178         break;
1179 
1180         case OP_MINQUERY:
1181         *code += OP_POSQUERY - OP_MINQUERY;
1182         break;
1183 
1184         case OP_UPTO:
1185         *code += OP_POSUPTO - OP_UPTO;
1186         break;
1187 
1188         case OP_MINUPTO:
1189         *code += OP_POSUPTO - OP_MINUPTO;
1190         break;
1191         }
1192       }
1193     c = *code;
1194     }
1195   else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS)
1196     {
1197 #ifdef SUPPORT_WIDE_CHARS
1198     if (c == OP_XCLASS)
1199       repeat_opcode = code + GET(code, 1);
1200     else
1201 #endif
1202       repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
1203 
1204     c = *repeat_opcode;
1205     if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
1206       {
1207       /* The return from get_chr_property_list() will never be NULL when
1208       *code (aka c) is one of the three class opcodes. However, gcc with
1209       -fanalyzer notes that a NULL return is possible, and grumbles. Hence we
1210       put in a check. */
1211 
1212       end = get_chr_property_list(code, utf, ucp, cb->fcc, list);
1213       list[1] = (c & 1) == 0;
1214 
1215       if (end != NULL &&
1216           compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
1217         {
1218         switch (c)
1219           {
1220           case OP_CRSTAR:
1221           case OP_CRMINSTAR:
1222           *repeat_opcode = OP_CRPOSSTAR;
1223           break;
1224 
1225           case OP_CRPLUS:
1226           case OP_CRMINPLUS:
1227           *repeat_opcode = OP_CRPOSPLUS;
1228           break;
1229 
1230           case OP_CRQUERY:
1231           case OP_CRMINQUERY:
1232           *repeat_opcode = OP_CRPOSQUERY;
1233           break;
1234 
1235           case OP_CRRANGE:
1236           case OP_CRMINRANGE:
1237           *repeat_opcode = OP_CRPOSRANGE;
1238           break;
1239           }
1240         }
1241       }
1242     c = *code;
1243     }
1244 
1245   switch(c)
1246     {
1247     case OP_END:
1248     return 0;
1249 
1250     case OP_TYPESTAR:
1251     case OP_TYPEMINSTAR:
1252     case OP_TYPEPLUS:
1253     case OP_TYPEMINPLUS:
1254     case OP_TYPEQUERY:
1255     case OP_TYPEMINQUERY:
1256     case OP_TYPEPOSSTAR:
1257     case OP_TYPEPOSPLUS:
1258     case OP_TYPEPOSQUERY:
1259     if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
1260     break;
1261 
1262     case OP_TYPEUPTO:
1263     case OP_TYPEMINUPTO:
1264     case OP_TYPEEXACT:
1265     case OP_TYPEPOSUPTO:
1266     if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
1267       code += 2;
1268     break;
1269 
1270     case OP_CALLOUT_STR:
1271     code += GET(code, 1 + 2*LINK_SIZE);
1272     break;
1273 
1274 #ifdef SUPPORT_WIDE_CHARS
1275     case OP_XCLASS:
1276     code += GET(code, 1);
1277     break;
1278 #endif
1279 
1280     case OP_MARK:
1281     case OP_COMMIT_ARG:
1282     case OP_PRUNE_ARG:
1283     case OP_SKIP_ARG:
1284     case OP_THEN_ARG:
1285     code += code[1];
1286     break;
1287     }
1288 
1289   /* Add in the fixed length from the table */
1290 
1291   code += PRIV(OP_lengths)[c];
1292 
1293   /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
1294   followed by a multi-byte character. The length in the table is a minimum, so
1295   we have to arrange to skip the extra code units. */
1296 
1297 #ifdef MAYBE_UTF_MULTI
1298   if (utf) switch(c)
1299     {
1300     case OP_CHAR:
1301     case OP_CHARI:
1302     case OP_NOT:
1303     case OP_NOTI:
1304     case OP_STAR:
1305     case OP_MINSTAR:
1306     case OP_PLUS:
1307     case OP_MINPLUS:
1308     case OP_QUERY:
1309     case OP_MINQUERY:
1310     case OP_UPTO:
1311     case OP_MINUPTO:
1312     case OP_EXACT:
1313     case OP_POSSTAR:
1314     case OP_POSPLUS:
1315     case OP_POSQUERY:
1316     case OP_POSUPTO:
1317     case OP_STARI:
1318     case OP_MINSTARI:
1319     case OP_PLUSI:
1320     case OP_MINPLUSI:
1321     case OP_QUERYI:
1322     case OP_MINQUERYI:
1323     case OP_UPTOI:
1324     case OP_MINUPTOI:
1325     case OP_EXACTI:
1326     case OP_POSSTARI:
1327     case OP_POSPLUSI:
1328     case OP_POSQUERYI:
1329     case OP_POSUPTOI:
1330     case OP_NOTSTAR:
1331     case OP_NOTMINSTAR:
1332     case OP_NOTPLUS:
1333     case OP_NOTMINPLUS:
1334     case OP_NOTQUERY:
1335     case OP_NOTMINQUERY:
1336     case OP_NOTUPTO:
1337     case OP_NOTMINUPTO:
1338     case OP_NOTEXACT:
1339     case OP_NOTPOSSTAR:
1340     case OP_NOTPOSPLUS:
1341     case OP_NOTPOSQUERY:
1342     case OP_NOTPOSUPTO:
1343     case OP_NOTSTARI:
1344     case OP_NOTMINSTARI:
1345     case OP_NOTPLUSI:
1346     case OP_NOTMINPLUSI:
1347     case OP_NOTQUERYI:
1348     case OP_NOTMINQUERYI:
1349     case OP_NOTUPTOI:
1350     case OP_NOTMINUPTOI:
1351     case OP_NOTEXACTI:
1352     case OP_NOTPOSSTARI:
1353     case OP_NOTPOSPLUSI:
1354     case OP_NOTPOSQUERYI:
1355     case OP_NOTPOSUPTOI:
1356     if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
1357     break;
1358     }
1359 #else
1360   (void)(utf);  /* Keep compiler happy by referencing function argument */
1361 #endif  /* SUPPORT_WIDE_CHARS */
1362   }
1363 }
1364 
1365 /* End of pcre2_auto_possess.c */
1366