1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016-2021 University of Cambridge
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41 /* This module contains functions that scan a compiled pattern and change
42 repeats into possessive repeats where possible. */
43
44
45 #ifdef HAVE_CONFIG_H
46 #include "config.h"
47 #endif
48
49
50 #include "pcre2_internal.h"
51
52
53 /*************************************************
54 * Tables for auto-possessification *
55 *************************************************/
56
57 /* This table is used to check whether auto-possessification is possible
58 between adjacent character-type opcodes. The left-hand (repeated) opcode is
59 used to select the row, and the right-hand opcode is use to select the column.
60 A value of 1 means that auto-possessification is OK. For example, the second
61 value in the first row means that \D+\d can be turned into \D++\d.
62
63 The Unicode property types (\P and \p) have to be present to fill out the table
64 because of what their opcode values are, but the table values should always be
65 zero because property types are handled separately in the code. The last four
66 columns apply to items that cannot be repeated, so there is no need to have
67 rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is
68 *not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
69
70 #define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
71 #define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1)
72
73 static const uint8_t autoposstab[APTROWS][APTCOLS] = {
74 /* \D \d \S \s \W \w . .+ \C \P \p \R \H \h \V \v \X \Z \z $ $M */
75 { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \D */
76 { 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \d */
77 { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \S */
78 { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \s */
79 { 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \W */
80 { 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \w */
81 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* . */
82 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* .+ */
83 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \C */
84 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \P */
85 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \p */
86 { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \R */
87 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \H */
88 { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \h */
89 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \V */
90 { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 }, /* \v */
91 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 } /* \X */
92 };
93
94 #ifdef SUPPORT_UNICODE
95 /* This table is used to check whether auto-possessification is possible
96 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The
97 left-hand (repeated) opcode is used to select the row, and the right-hand
98 opcode is used to select the column. The values are as follows:
99
100 0 Always return FALSE (never auto-possessify)
101 1 Character groups are distinct (possessify if both are OP_PROP)
102 2 Check character categories in the same group (general or particular)
103 3 TRUE if the two opcodes are not the same (PROP vs NOTPROP)
104
105 4 Check left general category vs right particular category
106 5 Check right general category vs left particular category
107
108 6 Left alphanum vs right general category
109 7 Left space vs right general category
110 8 Left word vs right general category
111
112 9 Right alphanum vs left general category
113 10 Right space vs left general category
114 11 Right word vs left general category
115
116 12 Left alphanum vs right particular category
117 13 Left space vs right particular category
118 14 Left word vs right particular category
119
120 15 Right alphanum vs left particular category
121 16 Right space vs left particular category
122 17 Right word vs left particular category
123 */
124
125 static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
126 /* ANY LAMP GC PC SC ALNUM SPACE PXSPACE WORD CLIST UCNC */
127 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */
128 { 0, 3, 0, 0, 0, 3, 1, 1, 0, 0, 0 }, /* PT_LAMP */
129 { 0, 0, 2, 4, 0, 9, 10, 10, 11, 0, 0 }, /* PT_GC */
130 { 0, 0, 5, 2, 0, 15, 16, 16, 17, 0, 0 }, /* PT_PC */
131 { 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0 }, /* PT_SC */
132 { 0, 3, 6, 12, 0, 3, 1, 1, 0, 0, 0 }, /* PT_ALNUM */
133 { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_SPACE */
134 { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_PXSPACE */
135 { 0, 0, 8, 14, 0, 0, 1, 1, 3, 0, 0 }, /* PT_WORD */
136 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */
137 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 } /* PT_UCNC */
138 };
139
140 /* This table is used to check whether auto-possessification is possible
141 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one
142 specifies a general category and the other specifies a particular category. The
143 row is selected by the general category and the column by the particular
144 category. The value is 1 if the particular category is not part of the general
145 category. */
146
147 static const uint8_t catposstab[7][30] = {
148 /* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */
149 { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* C */
150 { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* L */
151 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* M */
152 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */
153 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 }, /* P */
154 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 }, /* S */
155 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 } /* Z */
156 };
157
158 /* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against
159 a general or particular category. The properties in each row are those
160 that apply to the character set in question. Duplication means that a little
161 unnecessary work is done when checking, but this keeps things much simpler
162 because they can all use the same code. For more details see the comment where
163 this table is used.
164
165 Note: SPACE and PXSPACE used to be different because Perl excluded VT from
166 "space", but from Perl 5.18 it's included, so both categories are treated the
167 same here. */
168
169 static const uint8_t posspropstab[3][4] = {
170 { ucp_L, ucp_N, ucp_N, ucp_Nl }, /* ALNUM, 3rd and 4th values redundant */
171 { ucp_Z, ucp_Z, ucp_C, ucp_Cc }, /* SPACE and PXSPACE, 2nd value redundant */
172 { ucp_L, ucp_N, ucp_P, ucp_Po } /* WORD */
173 };
174 #endif /* SUPPORT_UNICODE */
175
176
177
178 #ifdef SUPPORT_UNICODE
179 /*************************************************
180 * Check a character and a property *
181 *************************************************/
182
183 /* This function is called by compare_opcodes() when a property item is
184 adjacent to a fixed character.
185
186 Arguments:
187 c the character
188 ptype the property type
189 pdata the data for the type
190 negated TRUE if it's a negated property (\P or \p{^)
191
192 Returns: TRUE if auto-possessifying is OK
193 */
194
195 static BOOL
check_char_prop(uint32_t c,unsigned int ptype,unsigned int pdata,BOOL negated)196 check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
197 BOOL negated)
198 {
199 const uint32_t *p;
200 const ucd_record *prop = GET_UCD(c);
201
202 switch(ptype)
203 {
204 case PT_LAMP:
205 return (prop->chartype == ucp_Lu ||
206 prop->chartype == ucp_Ll ||
207 prop->chartype == ucp_Lt) == negated;
208
209 case PT_GC:
210 return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
211
212 case PT_PC:
213 return (pdata == prop->chartype) == negated;
214
215 case PT_SC:
216 return (pdata == prop->script) == negated;
217
218 /* These are specials */
219
220 case PT_ALNUM:
221 return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
222 PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
223
224 /* Perl space used to exclude VT, but from Perl 5.18 it is included, which
225 means that Perl space and POSIX space are now identical. PCRE was changed
226 at release 8.34. */
227
228 case PT_SPACE: /* Perl space */
229 case PT_PXSPACE: /* POSIX space */
230 switch(c)
231 {
232 HSPACE_CASES:
233 VSPACE_CASES:
234 return negated;
235
236 default:
237 return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
238 }
239 break; /* Control never reaches here */
240
241 case PT_WORD:
242 return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
243 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
244 c == CHAR_UNDERSCORE) == negated;
245
246 case PT_CLIST:
247 p = PRIV(ucd_caseless_sets) + prop->caseset;
248 for (;;)
249 {
250 if (c < *p) return !negated;
251 if (c == *p++) return negated;
252 }
253 break; /* Control never reaches here */
254 }
255
256 return FALSE;
257 }
258 #endif /* SUPPORT_UNICODE */
259
260
261
262 /*************************************************
263 * Base opcode of repeated opcodes *
264 *************************************************/
265
266 /* Returns the base opcode for repeated single character type opcodes. If the
267 opcode is not a repeated character type, it returns with the original value.
268
269 Arguments: c opcode
270 Returns: base opcode for the type
271 */
272
273 static PCRE2_UCHAR
get_repeat_base(PCRE2_UCHAR c)274 get_repeat_base(PCRE2_UCHAR c)
275 {
276 return (c > OP_TYPEPOSUPTO)? c :
277 (c >= OP_TYPESTAR)? OP_TYPESTAR :
278 (c >= OP_NOTSTARI)? OP_NOTSTARI :
279 (c >= OP_NOTSTAR)? OP_NOTSTAR :
280 (c >= OP_STARI)? OP_STARI :
281 OP_STAR;
282 }
283
284
285 /*************************************************
286 * Fill the character property list *
287 *************************************************/
288
289 /* Checks whether the code points to an opcode that can take part in auto-
290 possessification, and if so, fills a list with its properties.
291
292 Arguments:
293 code points to start of expression
294 utf TRUE if in UTF mode
295 ucp TRUE if in UCP mode
296 fcc points to the case-flipping table
297 list points to output list
298 list[0] will be filled with the opcode
299 list[1] will be non-zero if this opcode
300 can match an empty character string
301 list[2..7] depends on the opcode
302
303 Returns: points to the start of the next opcode if *code is accepted
304 NULL if *code is not accepted
305 */
306
307 static PCRE2_SPTR
get_chr_property_list(PCRE2_SPTR code,BOOL utf,BOOL ucp,const uint8_t * fcc,uint32_t * list)308 get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc,
309 uint32_t *list)
310 {
311 PCRE2_UCHAR c = *code;
312 PCRE2_UCHAR base;
313 PCRE2_SPTR end;
314 uint32_t chr;
315
316 #ifdef SUPPORT_UNICODE
317 uint32_t *clist_dest;
318 const uint32_t *clist_src;
319 #else
320 (void)utf; /* Suppress "unused parameter" compiler warnings */
321 (void)ucp;
322 #endif
323
324 list[0] = c;
325 list[1] = FALSE;
326 code++;
327
328 if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
329 {
330 base = get_repeat_base(c);
331 c -= (base - OP_STAR);
332
333 if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
334 code += IMM2_SIZE;
335
336 list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
337 c != OP_POSPLUS);
338
339 switch(base)
340 {
341 case OP_STAR:
342 list[0] = OP_CHAR;
343 break;
344
345 case OP_STARI:
346 list[0] = OP_CHARI;
347 break;
348
349 case OP_NOTSTAR:
350 list[0] = OP_NOT;
351 break;
352
353 case OP_NOTSTARI:
354 list[0] = OP_NOTI;
355 break;
356
357 case OP_TYPESTAR:
358 list[0] = *code;
359 code++;
360 break;
361 }
362 c = list[0];
363 }
364
365 switch(c)
366 {
367 case OP_NOT_DIGIT:
368 case OP_DIGIT:
369 case OP_NOT_WHITESPACE:
370 case OP_WHITESPACE:
371 case OP_NOT_WORDCHAR:
372 case OP_WORDCHAR:
373 case OP_ANY:
374 case OP_ALLANY:
375 case OP_ANYNL:
376 case OP_NOT_HSPACE:
377 case OP_HSPACE:
378 case OP_NOT_VSPACE:
379 case OP_VSPACE:
380 case OP_EXTUNI:
381 case OP_EODN:
382 case OP_EOD:
383 case OP_DOLL:
384 case OP_DOLLM:
385 return code;
386
387 case OP_CHAR:
388 case OP_NOT:
389 GETCHARINCTEST(chr, code);
390 list[2] = chr;
391 list[3] = NOTACHAR;
392 return code;
393
394 case OP_CHARI:
395 case OP_NOTI:
396 list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
397 GETCHARINCTEST(chr, code);
398 list[2] = chr;
399
400 #ifdef SUPPORT_UNICODE
401 if (chr < 128 || (chr < 256 && !utf && !ucp))
402 list[3] = fcc[chr];
403 else
404 list[3] = UCD_OTHERCASE(chr);
405 #elif defined SUPPORT_WIDE_CHARS
406 list[3] = (chr < 256) ? fcc[chr] : chr;
407 #else
408 list[3] = fcc[chr];
409 #endif
410
411 /* The othercase might be the same value. */
412
413 if (chr == list[3])
414 list[3] = NOTACHAR;
415 else
416 list[4] = NOTACHAR;
417 return code;
418
419 #ifdef SUPPORT_UNICODE
420 case OP_PROP:
421 case OP_NOTPROP:
422 if (code[0] != PT_CLIST)
423 {
424 list[2] = code[0];
425 list[3] = code[1];
426 return code + 2;
427 }
428
429 /* Convert only if we have enough space. */
430
431 clist_src = PRIV(ucd_caseless_sets) + code[1];
432 clist_dest = list + 2;
433 code += 2;
434
435 do {
436 if (clist_dest >= list + 8)
437 {
438 /* Early return if there is not enough space. This should never
439 happen, since all clists are shorter than 5 character now. */
440 list[2] = code[0];
441 list[3] = code[1];
442 return code;
443 }
444 *clist_dest++ = *clist_src;
445 }
446 while(*clist_src++ != NOTACHAR);
447
448 /* All characters are stored. The terminating NOTACHAR is copied from the
449 clist itself. */
450
451 list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
452 return code;
453 #endif
454
455 case OP_NCLASS:
456 case OP_CLASS:
457 #ifdef SUPPORT_WIDE_CHARS
458 case OP_XCLASS:
459 if (c == OP_XCLASS)
460 end = code + GET(code, 0) - 1;
461 else
462 #endif
463 end = code + 32 / sizeof(PCRE2_UCHAR);
464
465 switch(*end)
466 {
467 case OP_CRSTAR:
468 case OP_CRMINSTAR:
469 case OP_CRQUERY:
470 case OP_CRMINQUERY:
471 case OP_CRPOSSTAR:
472 case OP_CRPOSQUERY:
473 list[1] = TRUE;
474 end++;
475 break;
476
477 case OP_CRPLUS:
478 case OP_CRMINPLUS:
479 case OP_CRPOSPLUS:
480 end++;
481 break;
482
483 case OP_CRRANGE:
484 case OP_CRMINRANGE:
485 case OP_CRPOSRANGE:
486 list[1] = (GET2(end, 1) == 0);
487 end += 1 + 2 * IMM2_SIZE;
488 break;
489 }
490 list[2] = (uint32_t)(end - code);
491 return end;
492 }
493
494 return NULL; /* Opcode not accepted */
495 }
496
497
498
499 /*************************************************
500 * Scan further character sets for match *
501 *************************************************/
502
503 /* Checks whether the base and the current opcode have a common character, in
504 which case the base cannot be possessified.
505
506 Arguments:
507 code points to the byte code
508 utf TRUE in UTF mode
509 ucp TRUE in UCP mode
510 cb compile data block
511 base_list the data list of the base opcode
512 base_end the end of the base opcode
513 rec_limit points to recursion depth counter
514
515 Returns: TRUE if the auto-possessification is possible
516 */
517
518 static BOOL
compare_opcodes(PCRE2_SPTR code,BOOL utf,BOOL ucp,const compile_block * cb,const uint32_t * base_list,PCRE2_SPTR base_end,int * rec_limit)519 compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb,
520 const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
521 {
522 PCRE2_UCHAR c;
523 uint32_t list[8];
524 const uint32_t *chr_ptr;
525 const uint32_t *ochr_ptr;
526 const uint32_t *list_ptr;
527 PCRE2_SPTR next_code;
528 #ifdef SUPPORT_WIDE_CHARS
529 PCRE2_SPTR xclass_flags;
530 #endif
531 const uint8_t *class_bitset;
532 const uint8_t *set1, *set2, *set_end;
533 uint32_t chr;
534 BOOL accepted, invert_bits;
535 BOOL entered_a_group = FALSE;
536
537 if (--(*rec_limit) <= 0) return FALSE; /* Recursion has gone too deep */
538
539 /* Note: the base_list[1] contains whether the current opcode has a greedy
540 (represented by a non-zero value) quantifier. This is a different from
541 other character type lists, which store here that the character iterator
542 matches to an empty string (also represented by a non-zero value). */
543
544 for(;;)
545 {
546 /* All operations move the code pointer forward.
547 Therefore infinite recursions are not possible. */
548
549 c = *code;
550
551 /* Skip over callouts */
552
553 if (c == OP_CALLOUT)
554 {
555 code += PRIV(OP_lengths)[c];
556 continue;
557 }
558
559 if (c == OP_CALLOUT_STR)
560 {
561 code += GET(code, 1 + 2*LINK_SIZE);
562 continue;
563 }
564
565 /* At the end of a branch, skip to the end of the group. */
566
567 if (c == OP_ALT)
568 {
569 do code += GET(code, 1); while (*code == OP_ALT);
570 c = *code;
571 }
572
573 /* Inspect the next opcode. */
574
575 switch(c)
576 {
577 /* We can always possessify a greedy iterator at the end of the pattern,
578 which is reached after skipping over the final OP_KET. A non-greedy
579 iterator must never be possessified. */
580
581 case OP_END:
582 return base_list[1] != 0;
583
584 /* When an iterator is at the end of certain kinds of group we can inspect
585 what follows the group by skipping over the closing ket. Note that this
586 does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given
587 iteration is variable (could be another iteration or could be the next
588 item). As these two opcodes are not listed in the next switch, they will
589 end up as the next code to inspect, and return FALSE by virtue of being
590 unsupported. */
591
592 case OP_KET:
593 case OP_KETRPOS:
594 /* The non-greedy case cannot be converted to a possessive form. */
595
596 if (base_list[1] == 0) return FALSE;
597
598 /* If the bracket is capturing it might be referenced by an OP_RECURSE
599 so its last iterator can never be possessified if the pattern contains
600 recursions. (This could be improved by keeping a list of group numbers that
601 are called by recursion.) */
602
603 switch(*(code - GET(code, 1)))
604 {
605 case OP_CBRA:
606 case OP_SCBRA:
607 case OP_CBRAPOS:
608 case OP_SCBRAPOS:
609 if (cb->had_recurse) return FALSE;
610 break;
611
612 /* A script run might have to backtrack if the iterated item can match
613 characters from more than one script. So give up unless repeating an
614 explicit character. */
615
616 case OP_SCRIPT_RUN:
617 if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI)
618 return FALSE;
619 break;
620
621 /* Atomic sub-patterns and assertions can always auto-possessify their
622 last iterator. However, if the group was entered as a result of checking
623 a previous iterator, this is not possible. */
624
625 case OP_ASSERT:
626 case OP_ASSERT_NOT:
627 case OP_ASSERTBACK:
628 case OP_ASSERTBACK_NOT:
629 case OP_ONCE:
630 return !entered_a_group;
631
632 /* Non-atomic assertions - don't possessify last iterator. This needs
633 more thought. */
634
635 case OP_ASSERT_NA:
636 case OP_ASSERTBACK_NA:
637 return FALSE;
638 }
639
640 /* Skip over the bracket and inspect what comes next. */
641
642 code += PRIV(OP_lengths)[c];
643 continue;
644
645 /* Handle cases where the next item is a group. */
646
647 case OP_ONCE:
648 case OP_BRA:
649 case OP_CBRA:
650 next_code = code + GET(code, 1);
651 code += PRIV(OP_lengths)[c];
652
653 /* Check each branch. We have to recurse a level for all but the last
654 branch. */
655
656 while (*next_code == OP_ALT)
657 {
658 if (!compare_opcodes(code, utf, ucp, cb, base_list, base_end, rec_limit))
659 return FALSE;
660 code = next_code + 1 + LINK_SIZE;
661 next_code += GET(next_code, 1);
662 }
663
664 entered_a_group = TRUE;
665 continue;
666
667 case OP_BRAZERO:
668 case OP_BRAMINZERO:
669
670 next_code = code + 1;
671 if (*next_code != OP_BRA && *next_code != OP_CBRA &&
672 *next_code != OP_ONCE) return FALSE;
673
674 do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
675
676 /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
677
678 next_code += 1 + LINK_SIZE;
679 if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end,
680 rec_limit))
681 return FALSE;
682
683 code += PRIV(OP_lengths)[c];
684 continue;
685
686 /* The next opcode does not need special handling; fall through and use it
687 to see if the base can be possessified. */
688
689 default:
690 break;
691 }
692
693 /* We now have the next appropriate opcode to compare with the base. Check
694 for a supported opcode, and load its properties. */
695
696 code = get_chr_property_list(code, utf, ucp, cb->fcc, list);
697 if (code == NULL) return FALSE; /* Unsupported */
698
699 /* If either opcode is a small character list, set pointers for comparing
700 characters from that list with another list, or with a property. */
701
702 if (base_list[0] == OP_CHAR)
703 {
704 chr_ptr = base_list + 2;
705 list_ptr = list;
706 }
707 else if (list[0] == OP_CHAR)
708 {
709 chr_ptr = list + 2;
710 list_ptr = base_list;
711 }
712
713 /* Character bitsets can also be compared to certain opcodes. */
714
715 else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
716 #if PCRE2_CODE_UNIT_WIDTH == 8
717 /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
718 || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
719 #endif
720 )
721 {
722 #if PCRE2_CODE_UNIT_WIDTH == 8
723 if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
724 #else
725 if (base_list[0] == OP_CLASS)
726 #endif
727 {
728 set1 = (uint8_t *)(base_end - base_list[2]);
729 list_ptr = list;
730 }
731 else
732 {
733 set1 = (uint8_t *)(code - list[2]);
734 list_ptr = base_list;
735 }
736
737 invert_bits = FALSE;
738 switch(list_ptr[0])
739 {
740 case OP_CLASS:
741 case OP_NCLASS:
742 set2 = (uint8_t *)
743 ((list_ptr == list ? code : base_end) - list_ptr[2]);
744 break;
745
746 #ifdef SUPPORT_WIDE_CHARS
747 case OP_XCLASS:
748 xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE;
749 if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
750 if ((*xclass_flags & XCL_MAP) == 0)
751 {
752 /* No bits are set for characters < 256. */
753 if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0;
754 /* Might be an empty repeat. */
755 continue;
756 }
757 set2 = (uint8_t *)(xclass_flags + 1);
758 break;
759 #endif
760
761 case OP_NOT_DIGIT:
762 invert_bits = TRUE;
763 /* Fall through */
764 case OP_DIGIT:
765 set2 = (uint8_t *)(cb->cbits + cbit_digit);
766 break;
767
768 case OP_NOT_WHITESPACE:
769 invert_bits = TRUE;
770 /* Fall through */
771 case OP_WHITESPACE:
772 set2 = (uint8_t *)(cb->cbits + cbit_space);
773 break;
774
775 case OP_NOT_WORDCHAR:
776 invert_bits = TRUE;
777 /* Fall through */
778 case OP_WORDCHAR:
779 set2 = (uint8_t *)(cb->cbits + cbit_word);
780 break;
781
782 default:
783 return FALSE;
784 }
785
786 /* Because the bit sets are unaligned bytes, we need to perform byte
787 comparison here. */
788
789 set_end = set1 + 32;
790 if (invert_bits)
791 {
792 do
793 {
794 if ((*set1++ & ~(*set2++)) != 0) return FALSE;
795 }
796 while (set1 < set_end);
797 }
798 else
799 {
800 do
801 {
802 if ((*set1++ & *set2++) != 0) return FALSE;
803 }
804 while (set1 < set_end);
805 }
806
807 if (list[1] == 0) return TRUE;
808 /* Might be an empty repeat. */
809 continue;
810 }
811
812 /* Some property combinations also acceptable. Unicode property opcodes are
813 processed specially; the rest can be handled with a lookup table. */
814
815 else
816 {
817 uint32_t leftop, rightop;
818
819 leftop = base_list[0];
820 rightop = list[0];
821
822 #ifdef SUPPORT_UNICODE
823 accepted = FALSE; /* Always set in non-unicode case. */
824 if (leftop == OP_PROP || leftop == OP_NOTPROP)
825 {
826 if (rightop == OP_EOD)
827 accepted = TRUE;
828 else if (rightop == OP_PROP || rightop == OP_NOTPROP)
829 {
830 int n;
831 const uint8_t *p;
832 BOOL same = leftop == rightop;
833 BOOL lisprop = leftop == OP_PROP;
834 BOOL risprop = rightop == OP_PROP;
835 BOOL bothprop = lisprop && risprop;
836
837 /* There's a table that specifies how each combination is to be
838 processed:
839 0 Always return FALSE (never auto-possessify)
840 1 Character groups are distinct (possessify if both are OP_PROP)
841 2 Check character categories in the same group (general or particular)
842 3 Return TRUE if the two opcodes are not the same
843 ... see comments below
844 */
845
846 n = propposstab[base_list[2]][list[2]];
847 switch(n)
848 {
849 case 0: break;
850 case 1: accepted = bothprop; break;
851 case 2: accepted = (base_list[3] == list[3]) != same; break;
852 case 3: accepted = !same; break;
853
854 case 4: /* Left general category, right particular category */
855 accepted = risprop && catposstab[base_list[3]][list[3]] == same;
856 break;
857
858 case 5: /* Right general category, left particular category */
859 accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
860 break;
861
862 /* This code is logically tricky. Think hard before fiddling with it.
863 The posspropstab table has four entries per row. Each row relates to
864 one of PCRE's special properties such as ALNUM or SPACE or WORD.
865 Only WORD actually needs all four entries, but using repeats for the
866 others means they can all use the same code below.
867
868 The first two entries in each row are Unicode general categories, and
869 apply always, because all the characters they include are part of the
870 PCRE character set. The third and fourth entries are a general and a
871 particular category, respectively, that include one or more relevant
872 characters. One or the other is used, depending on whether the check
873 is for a general or a particular category. However, in both cases the
874 category contains more characters than the specials that are defined
875 for the property being tested against. Therefore, it cannot be used
876 in a NOTPROP case.
877
878 Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
879 Underscore is covered by ucp_P or ucp_Po. */
880
881 case 6: /* Left alphanum vs right general category */
882 case 7: /* Left space vs right general category */
883 case 8: /* Left word vs right general category */
884 p = posspropstab[n-6];
885 accepted = risprop && lisprop ==
886 (list[3] != p[0] &&
887 list[3] != p[1] &&
888 (list[3] != p[2] || !lisprop));
889 break;
890
891 case 9: /* Right alphanum vs left general category */
892 case 10: /* Right space vs left general category */
893 case 11: /* Right word vs left general category */
894 p = posspropstab[n-9];
895 accepted = lisprop && risprop ==
896 (base_list[3] != p[0] &&
897 base_list[3] != p[1] &&
898 (base_list[3] != p[2] || !risprop));
899 break;
900
901 case 12: /* Left alphanum vs right particular category */
902 case 13: /* Left space vs right particular category */
903 case 14: /* Left word vs right particular category */
904 p = posspropstab[n-12];
905 accepted = risprop && lisprop ==
906 (catposstab[p[0]][list[3]] &&
907 catposstab[p[1]][list[3]] &&
908 (list[3] != p[3] || !lisprop));
909 break;
910
911 case 15: /* Right alphanum vs left particular category */
912 case 16: /* Right space vs left particular category */
913 case 17: /* Right word vs left particular category */
914 p = posspropstab[n-15];
915 accepted = lisprop && risprop ==
916 (catposstab[p[0]][base_list[3]] &&
917 catposstab[p[1]][base_list[3]] &&
918 (base_list[3] != p[3] || !risprop));
919 break;
920 }
921 }
922 }
923
924 else
925 #endif /* SUPPORT_UNICODE */
926
927 accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
928 rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
929 autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
930
931 if (!accepted) return FALSE;
932
933 if (list[1] == 0) return TRUE;
934 /* Might be an empty repeat. */
935 continue;
936 }
937
938 /* Control reaches here only if one of the items is a small character list.
939 All characters are checked against the other side. */
940
941 do
942 {
943 chr = *chr_ptr;
944
945 switch(list_ptr[0])
946 {
947 case OP_CHAR:
948 ochr_ptr = list_ptr + 2;
949 do
950 {
951 if (chr == *ochr_ptr) return FALSE;
952 ochr_ptr++;
953 }
954 while(*ochr_ptr != NOTACHAR);
955 break;
956
957 case OP_NOT:
958 ochr_ptr = list_ptr + 2;
959 do
960 {
961 if (chr == *ochr_ptr)
962 break;
963 ochr_ptr++;
964 }
965 while(*ochr_ptr != NOTACHAR);
966 if (*ochr_ptr == NOTACHAR) return FALSE; /* Not found */
967 break;
968
969 /* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not*
970 set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
971
972 case OP_DIGIT:
973 if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE;
974 break;
975
976 case OP_NOT_DIGIT:
977 if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE;
978 break;
979
980 case OP_WHITESPACE:
981 if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE;
982 break;
983
984 case OP_NOT_WHITESPACE:
985 if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE;
986 break;
987
988 case OP_WORDCHAR:
989 if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE;
990 break;
991
992 case OP_NOT_WORDCHAR:
993 if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE;
994 break;
995
996 case OP_HSPACE:
997 switch(chr)
998 {
999 HSPACE_CASES: return FALSE;
1000 default: break;
1001 }
1002 break;
1003
1004 case OP_NOT_HSPACE:
1005 switch(chr)
1006 {
1007 HSPACE_CASES: break;
1008 default: return FALSE;
1009 }
1010 break;
1011
1012 case OP_ANYNL:
1013 case OP_VSPACE:
1014 switch(chr)
1015 {
1016 VSPACE_CASES: return FALSE;
1017 default: break;
1018 }
1019 break;
1020
1021 case OP_NOT_VSPACE:
1022 switch(chr)
1023 {
1024 VSPACE_CASES: break;
1025 default: return FALSE;
1026 }
1027 break;
1028
1029 case OP_DOLL:
1030 case OP_EODN:
1031 switch (chr)
1032 {
1033 case CHAR_CR:
1034 case CHAR_LF:
1035 case CHAR_VT:
1036 case CHAR_FF:
1037 case CHAR_NEL:
1038 #ifndef EBCDIC
1039 case 0x2028:
1040 case 0x2029:
1041 #endif /* Not EBCDIC */
1042 return FALSE;
1043 }
1044 break;
1045
1046 case OP_EOD: /* Can always possessify before \z */
1047 break;
1048
1049 #ifdef SUPPORT_UNICODE
1050 case OP_PROP:
1051 case OP_NOTPROP:
1052 if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
1053 list_ptr[0] == OP_NOTPROP))
1054 return FALSE;
1055 break;
1056 #endif
1057
1058 case OP_NCLASS:
1059 if (chr > 255) return FALSE;
1060 /* Fall through */
1061
1062 case OP_CLASS:
1063 if (chr > 255) break;
1064 class_bitset = (uint8_t *)
1065 ((list_ptr == list ? code : base_end) - list_ptr[2]);
1066 if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE;
1067 break;
1068
1069 #ifdef SUPPORT_WIDE_CHARS
1070 case OP_XCLASS:
1071 if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
1072 list_ptr[2] + LINK_SIZE, utf)) return FALSE;
1073 break;
1074 #endif
1075
1076 default:
1077 return FALSE;
1078 }
1079
1080 chr_ptr++;
1081 }
1082 while(*chr_ptr != NOTACHAR);
1083
1084 /* At least one character must be matched from this opcode. */
1085
1086 if (list[1] == 0) return TRUE;
1087 }
1088
1089 /* Control never reaches here. There used to be a fail-save return FALSE; here,
1090 but some compilers complain about an unreachable statement. */
1091 }
1092
1093
1094
1095 /*************************************************
1096 * Scan compiled regex for auto-possession *
1097 *************************************************/
1098
1099 /* Replaces single character iterations with their possessive alternatives
1100 if appropriate. This function modifies the compiled opcode! Hitting a
1101 non-existent opcode may indicate a bug in PCRE2, but it can also be caused if a
1102 bad UTF string was compiled with PCRE2_NO_UTF_CHECK. The rec_limit catches
1103 overly complicated or large patterns. In these cases, the check just stops,
1104 leaving the remainder of the pattern unpossessified.
1105
1106 Arguments:
1107 code points to start of the byte code
1108 cb compile data block
1109
1110 Returns: 0 for success
1111 -1 if a non-existant opcode is encountered
1112 */
1113
1114 int
PRIV(auto_possessify)1115 PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb)
1116 {
1117 PCRE2_UCHAR c;
1118 PCRE2_SPTR end;
1119 PCRE2_UCHAR *repeat_opcode;
1120 uint32_t list[8];
1121 int rec_limit = 1000; /* Was 10,000 but clang+ASAN uses a lot of stack. */
1122 BOOL utf = (cb->external_options & PCRE2_UTF) != 0;
1123 BOOL ucp = (cb->external_options & PCRE2_UCP) != 0;
1124
1125 for (;;)
1126 {
1127 c = *code;
1128
1129 if (c >= OP_TABLE_LENGTH) return -1; /* Something gone wrong */
1130
1131 if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
1132 {
1133 c -= get_repeat_base(c) - OP_STAR;
1134 end = (c <= OP_MINUPTO) ?
1135 get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL;
1136 list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
1137
1138 if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
1139 &rec_limit))
1140 {
1141 switch(c)
1142 {
1143 case OP_STAR:
1144 *code += OP_POSSTAR - OP_STAR;
1145 break;
1146
1147 case OP_MINSTAR:
1148 *code += OP_POSSTAR - OP_MINSTAR;
1149 break;
1150
1151 case OP_PLUS:
1152 *code += OP_POSPLUS - OP_PLUS;
1153 break;
1154
1155 case OP_MINPLUS:
1156 *code += OP_POSPLUS - OP_MINPLUS;
1157 break;
1158
1159 case OP_QUERY:
1160 *code += OP_POSQUERY - OP_QUERY;
1161 break;
1162
1163 case OP_MINQUERY:
1164 *code += OP_POSQUERY - OP_MINQUERY;
1165 break;
1166
1167 case OP_UPTO:
1168 *code += OP_POSUPTO - OP_UPTO;
1169 break;
1170
1171 case OP_MINUPTO:
1172 *code += OP_POSUPTO - OP_MINUPTO;
1173 break;
1174 }
1175 }
1176 c = *code;
1177 }
1178 else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS)
1179 {
1180 #ifdef SUPPORT_WIDE_CHARS
1181 if (c == OP_XCLASS)
1182 repeat_opcode = code + GET(code, 1);
1183 else
1184 #endif
1185 repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
1186
1187 c = *repeat_opcode;
1188 if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
1189 {
1190 /* The return from get_chr_property_list() will never be NULL when
1191 *code (aka c) is one of the three class opcodes. However, gcc with
1192 -fanalyzer notes that a NULL return is possible, and grumbles. Hence we
1193 put in a check. */
1194
1195 end = get_chr_property_list(code, utf, ucp, cb->fcc, list);
1196 list[1] = (c & 1) == 0;
1197
1198 if (end != NULL &&
1199 compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
1200 {
1201 switch (c)
1202 {
1203 case OP_CRSTAR:
1204 case OP_CRMINSTAR:
1205 *repeat_opcode = OP_CRPOSSTAR;
1206 break;
1207
1208 case OP_CRPLUS:
1209 case OP_CRMINPLUS:
1210 *repeat_opcode = OP_CRPOSPLUS;
1211 break;
1212
1213 case OP_CRQUERY:
1214 case OP_CRMINQUERY:
1215 *repeat_opcode = OP_CRPOSQUERY;
1216 break;
1217
1218 case OP_CRRANGE:
1219 case OP_CRMINRANGE:
1220 *repeat_opcode = OP_CRPOSRANGE;
1221 break;
1222 }
1223 }
1224 }
1225 c = *code;
1226 }
1227
1228 switch(c)
1229 {
1230 case OP_END:
1231 return 0;
1232
1233 case OP_TYPESTAR:
1234 case OP_TYPEMINSTAR:
1235 case OP_TYPEPLUS:
1236 case OP_TYPEMINPLUS:
1237 case OP_TYPEQUERY:
1238 case OP_TYPEMINQUERY:
1239 case OP_TYPEPOSSTAR:
1240 case OP_TYPEPOSPLUS:
1241 case OP_TYPEPOSQUERY:
1242 if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
1243 break;
1244
1245 case OP_TYPEUPTO:
1246 case OP_TYPEMINUPTO:
1247 case OP_TYPEEXACT:
1248 case OP_TYPEPOSUPTO:
1249 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
1250 code += 2;
1251 break;
1252
1253 case OP_CALLOUT_STR:
1254 code += GET(code, 1 + 2*LINK_SIZE);
1255 break;
1256
1257 #ifdef SUPPORT_WIDE_CHARS
1258 case OP_XCLASS:
1259 code += GET(code, 1);
1260 break;
1261 #endif
1262
1263 case OP_MARK:
1264 case OP_COMMIT_ARG:
1265 case OP_PRUNE_ARG:
1266 case OP_SKIP_ARG:
1267 case OP_THEN_ARG:
1268 code += code[1];
1269 break;
1270 }
1271
1272 /* Add in the fixed length from the table */
1273
1274 code += PRIV(OP_lengths)[c];
1275
1276 /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
1277 followed by a multi-byte character. The length in the table is a minimum, so
1278 we have to arrange to skip the extra code units. */
1279
1280 #ifdef MAYBE_UTF_MULTI
1281 if (utf) switch(c)
1282 {
1283 case OP_CHAR:
1284 case OP_CHARI:
1285 case OP_NOT:
1286 case OP_NOTI:
1287 case OP_STAR:
1288 case OP_MINSTAR:
1289 case OP_PLUS:
1290 case OP_MINPLUS:
1291 case OP_QUERY:
1292 case OP_MINQUERY:
1293 case OP_UPTO:
1294 case OP_MINUPTO:
1295 case OP_EXACT:
1296 case OP_POSSTAR:
1297 case OP_POSPLUS:
1298 case OP_POSQUERY:
1299 case OP_POSUPTO:
1300 case OP_STARI:
1301 case OP_MINSTARI:
1302 case OP_PLUSI:
1303 case OP_MINPLUSI:
1304 case OP_QUERYI:
1305 case OP_MINQUERYI:
1306 case OP_UPTOI:
1307 case OP_MINUPTOI:
1308 case OP_EXACTI:
1309 case OP_POSSTARI:
1310 case OP_POSPLUSI:
1311 case OP_POSQUERYI:
1312 case OP_POSUPTOI:
1313 case OP_NOTSTAR:
1314 case OP_NOTMINSTAR:
1315 case OP_NOTPLUS:
1316 case OP_NOTMINPLUS:
1317 case OP_NOTQUERY:
1318 case OP_NOTMINQUERY:
1319 case OP_NOTUPTO:
1320 case OP_NOTMINUPTO:
1321 case OP_NOTEXACT:
1322 case OP_NOTPOSSTAR:
1323 case OP_NOTPOSPLUS:
1324 case OP_NOTPOSQUERY:
1325 case OP_NOTPOSUPTO:
1326 case OP_NOTSTARI:
1327 case OP_NOTMINSTARI:
1328 case OP_NOTPLUSI:
1329 case OP_NOTMINPLUSI:
1330 case OP_NOTQUERYI:
1331 case OP_NOTMINQUERYI:
1332 case OP_NOTUPTOI:
1333 case OP_NOTMINUPTOI:
1334 case OP_NOTEXACTI:
1335 case OP_NOTPOSSTARI:
1336 case OP_NOTPOSPLUSI:
1337 case OP_NOTPOSQUERYI:
1338 case OP_NOTPOSUPTOI:
1339 if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
1340 break;
1341 }
1342 #else
1343 (void)(utf); /* Keep compiler happy by referencing function argument */
1344 #endif /* SUPPORT_WIDE_CHARS */
1345 }
1346 }
1347
1348 /* End of pcre2_auto_possess.c */
1349