1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016-2023 University of Cambridge
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41
42 /* This module contains a PCRE private debugging function for printing out the
43 internal form of a compiled regular expression, along with some supporting
44 local functions. This source file is #included in pcre2test.c at each supported
45 code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
46 that comprise the library. It can also optionally be included in
47 pcre2_compile.c for detailed debugging in error situations. */
48
49
50 /* Tables of operator names. The same 8-bit table is used for all code unit
51 widths, so it must be defined only once. The list itself is defined in
52 pcre2_internal.h, which is #included by pcre2test before this file. */
53
54 #ifndef OP_LISTS_DEFINED
55 static const char *OP_names[] = { OP_NAME_LIST };
56 #define OP_LISTS_DEFINED
57 #endif
58
59 /* The functions and tables herein must all have mode-dependent names. */
60
61 #define OP_lengths PCRE2_SUFFIX(OP_lengths_)
62 #define get_ucpname PCRE2_SUFFIX(get_ucpname_)
63 #define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
64 #define print_char PCRE2_SUFFIX(print_char_)
65 #define print_custring PCRE2_SUFFIX(print_custring_)
66 #define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
67 #define print_prop PCRE2_SUFFIX(print_prop_)
68
69 /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
70 the definition is next to the definition of the opcodes in pcre2_internal.h.
71 The contents of the table are, however, mode-dependent. */
72
73 static const uint8_t OP_lengths[] = { OP_LENGTHS };
74
75
76
77 /*************************************************
78 * Print one character from a string *
79 *************************************************/
80
81 /* In UTF mode the character may occupy more than one code unit.
82
83 Arguments:
84 f file to write to
85 ptr pointer to first code unit of the character
86 utf TRUE if string is UTF (will be FALSE if UTF is not supported)
87
88 Returns: number of additional code units used
89 */
90
91 static unsigned int
print_char(FILE * f,PCRE2_SPTR ptr,BOOL utf)92 print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
93 {
94 uint32_t c = *ptr;
95 BOOL one_code_unit = !utf;
96
97 /* If UTF is supported and requested, check for a valid single code unit. */
98
99 #ifdef SUPPORT_UNICODE
100 if (utf)
101 {
102 #if PCRE2_CODE_UNIT_WIDTH == 8
103 one_code_unit = c < 0x80;
104 #elif PCRE2_CODE_UNIT_WIDTH == 16
105 one_code_unit = (c & 0xfc00) != 0xd800;
106 #else
107 one_code_unit = (c & 0xfffff800u) != 0xd800u;
108 #endif /* CODE_UNIT_WIDTH */
109 }
110 #endif /* SUPPORT_UNICODE */
111
112 /* Handle a valid one-code-unit character at any width. */
113
114 if (one_code_unit)
115 {
116 if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
117 else if (c < 0x80) fprintf(f, "\\x%02x", c);
118 else fprintf(f, "\\x{%02x}", c);
119 return 0;
120 }
121
122 /* Code for invalid UTF code units and multi-unit UTF characters is different
123 for each width. If UTF is not supported, control should never get here, but we
124 need a return statement to keep the compiler happy. */
125
126 #ifndef SUPPORT_UNICODE
127 return 0;
128 #else
129
130 /* Malformed UTF-8 should occur only if the sanity check has been turned off.
131 Rather than swallow random bytes, just stop if we hit a bad one. Print it with
132 \X instead of \x as an indication. */
133
134 #if PCRE2_CODE_UNIT_WIDTH == 8
135 if ((c & 0xc0) != 0xc0)
136 {
137 fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
138 return 0;
139 }
140 else
141 {
142 int i;
143 int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
144 int s = 6*a;
145 c = (c & PRIV(utf8_table3)[a]) << s;
146 for (i = 1; i <= a; i++)
147 {
148 if ((ptr[i] & 0xc0) != 0x80)
149 {
150 fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */
151 return i - 1;
152 }
153 s -= 6;
154 c |= (ptr[i] & 0x3f) << s;
155 }
156 fprintf(f, "\\x{%x}", c);
157 return a;
158 }
159 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
160
161 /* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
162 Print it with \X instead of \x as an indication. */
163
164 #if PCRE2_CODE_UNIT_WIDTH == 16
165 if ((ptr[1] & 0xfc00) != 0xdc00)
166 {
167 fprintf(f, "\\X{%x}", c);
168 return 0;
169 }
170 c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
171 fprintf(f, "\\x{%x}", c);
172 return 1;
173 #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
174
175 /* For UTF-32 we get here only for a malformed code unit, which should only
176 occur if the sanity check has been turned off. Print it with \X instead of \x
177 as an indication. */
178
179 #if PCRE2_CODE_UNIT_WIDTH == 32
180 fprintf(f, "\\X{%x}", c);
181 return 0;
182 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
183 #endif /* SUPPORT_UNICODE */
184 }
185
186
187
188 /*************************************************
189 * Print string as a list of code units *
190 *************************************************/
191
192 /* These take no account of UTF as they always print each individual code unit.
193 The string is zero-terminated for print_custring(); the length is given for
194 print_custring_bylen().
195
196 Arguments:
197 f file to write to
198 ptr point to the string
199 len length for print_custring_bylen()
200
201 Returns: nothing
202 */
203
204 static void
print_custring(FILE * f,PCRE2_SPTR ptr)205 print_custring(FILE *f, PCRE2_SPTR ptr)
206 {
207 while (*ptr != '\0')
208 {
209 uint32_t c = *ptr++;
210 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
211 }
212 }
213
214 static void
print_custring_bylen(FILE * f,PCRE2_SPTR ptr,PCRE2_UCHAR len)215 print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
216 {
217 for (; len > 0; len--)
218 {
219 uint32_t c = *ptr++;
220 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
221 }
222 }
223
224
225
226 /*************************************************
227 * Find Unicode property name *
228 *************************************************/
229
230 /* When there is no UTF/UCP support, the table of names does not exist. This
231 function should not be called in such configurations, because a pattern that
232 tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
233 into the main code, however, we just put one into this function.
234
235 Now that the table contains both full names and their abbreviations, we do some
236 fiddling to try to get the full name, which is either the longer of two found
237 names, or a 3-character script name. */
238
239 static const char *
get_ucpname(unsigned int ptype,unsigned int pvalue)240 get_ucpname(unsigned int ptype, unsigned int pvalue)
241 {
242 #ifdef SUPPORT_UNICODE
243 int count = 0;
244 const char *yield = "??";
245 size_t len = 0;
246 unsigned int ptypex = (ptype == PT_SC)? PT_SCX : ptype;
247
248 for (int i = PRIV(utt_size) - 1; i >= 0; i--)
249 {
250 const ucp_type_table *u = PRIV(utt) + i;
251
252 if ((ptype == u->type || ptypex == u->type) && pvalue == u->value)
253 {
254 const char *s = PRIV(utt_names) + u->name_offset;
255 size_t sl = strlen(s);
256
257 if (sl == 3 && (u->type == PT_SC || u->type == PT_SCX))
258 {
259 yield = s;
260 break;
261 }
262
263 if (sl > len)
264 {
265 yield = s;
266 len = sl;
267 }
268
269 if (++count >= 2) break;
270 }
271 }
272
273 return yield;
274
275 #else /* No UTF support */
276 (void)ptype;
277 (void)pvalue;
278 return "??";
279 #endif /* SUPPORT_UNICODE */
280 }
281
282
283
284 /*************************************************
285 * Print Unicode property value *
286 *************************************************/
287
288 /* "Normal" properties can be printed from tables. The PT_CLIST property is a
289 pseudo-property that contains a pointer to a list of case-equivalent
290 characters.
291
292 Arguments:
293 f file to write to
294 code pointer in the compiled code
295 before text to print before
296 after text to print after
297
298 Returns: nothing
299 */
300
301 static void
print_prop(FILE * f,PCRE2_SPTR code,const char * before,const char * after)302 print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
303 {
304 if (code[1] != PT_CLIST)
305 {
306 const char *sc = (code[1] == PT_SC)? "script:" : "";
307 const char *s = get_ucpname(code[1], code[2]);
308 fprintf(f, "%s%s %s%c%s%s", before, OP_names[*code], sc, toupper(s[0]), s+1, after);
309 }
310 else
311 {
312 const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
313 fprintf (f, "%s%sclist", before, (*code == OP_PROP)? "" : "not ");
314 while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
315 fprintf(f, "%s", after);
316 }
317 }
318
319
320
321 /*************************************************
322 * Print compiled pattern *
323 *************************************************/
324
325 /* The print_lengths flag controls whether offsets and lengths of items are
326 printed. Lenths can be turned off from pcre2test so that automatic tests on
327 bytecode can be written that do not depend on the value of LINK_SIZE.
328
329 Arguments:
330 re a compiled pattern
331 f the file to write to
332 print_lengths show various lengths
333
334 Returns: nothing
335 */
336
337 static void
pcre2_printint(pcre2_code * re,FILE * f,BOOL print_lengths)338 pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
339 {
340 PCRE2_SPTR codestart, nametable, code;
341 uint32_t nesize = re->name_entry_size;
342 BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
343
344 nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
345 code = codestart = nametable + re->name_count * re->name_entry_size;
346
347 for(;;)
348 {
349 PCRE2_SPTR ccode;
350 uint32_t c;
351 int i;
352 const char *flag = " ";
353 unsigned int extra = 0;
354
355 if (print_lengths)
356 fprintf(f, "%3d ", (int)(code - codestart));
357 else
358 fprintf(f, " ");
359
360 switch(*code)
361 {
362 /* ========================================================================== */
363 /* These cases are never obeyed. This is a fudge that causes a compile-
364 time error if the vectors OP_names or OP_lengths, which are indexed
365 by opcode, are not the correct length. It seems to be the only way to do
366 such a check at compile time, as the sizeof() operator does not work in
367 the C preprocessor. */
368
369 case OP_TABLE_LENGTH:
370 case OP_TABLE_LENGTH +
371 ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
372 (sizeof(OP_lengths) == OP_TABLE_LENGTH)):
373 return;
374 /* ========================================================================== */
375
376 case OP_END:
377 fprintf(f, " %s\n", OP_names[*code]);
378 fprintf(f, "------------------------------------------------------------------\n");
379 return;
380
381 case OP_CHAR:
382 fprintf(f, " ");
383 do
384 {
385 code++;
386 code += 1 + print_char(f, code, utf);
387 }
388 while (*code == OP_CHAR);
389 fprintf(f, "\n");
390 continue;
391
392 case OP_CHARI:
393 fprintf(f, " /i ");
394 do
395 {
396 code++;
397 code += 1 + print_char(f, code, utf);
398 }
399 while (*code == OP_CHARI);
400 fprintf(f, "\n");
401 continue;
402
403 case OP_CBRA:
404 case OP_CBRAPOS:
405 case OP_SCBRA:
406 case OP_SCBRAPOS:
407 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
408 else fprintf(f, " ");
409 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
410 break;
411
412 case OP_BRA:
413 case OP_BRAPOS:
414 case OP_SBRA:
415 case OP_SBRAPOS:
416 case OP_KETRMAX:
417 case OP_KETRMIN:
418 case OP_KETRPOS:
419 case OP_ALT:
420 case OP_KET:
421 case OP_ASSERT:
422 case OP_ASSERT_NOT:
423 case OP_ASSERTBACK:
424 case OP_ASSERTBACK_NOT:
425 case OP_ASSERT_NA:
426 case OP_ASSERTBACK_NA:
427 case OP_ONCE:
428 case OP_SCRIPT_RUN:
429 case OP_COND:
430 case OP_SCOND:
431 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
432 else fprintf(f, " ");
433 fprintf(f, "%s", OP_names[*code]);
434 break;
435
436 case OP_REVERSE:
437 if (print_lengths) fprintf(f, "%3d ", GET2(code, 1));
438 else fprintf(f, " ");
439 fprintf(f, "%s", OP_names[*code]);
440 break;
441
442 case OP_VREVERSE:
443 if (print_lengths) fprintf(f, "%3d %d ", GET2(code, 1),
444 GET2(code, 1 + IMM2_SIZE));
445 else fprintf(f, " ");
446 fprintf(f, "%s", OP_names[*code]);
447 break;
448
449 case OP_CLOSE:
450 fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
451 break;
452
453 case OP_CREF:
454 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
455 break;
456
457 case OP_DNCREF:
458 {
459 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
460 fprintf(f, " %s Cond ref <", flag);
461 print_custring(f, entry);
462 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
463 }
464 break;
465
466 case OP_RREF:
467 c = GET2(code, 1);
468 if (c == RREF_ANY)
469 fprintf(f, " Cond recurse any");
470 else
471 fprintf(f, " Cond recurse %d", c);
472 break;
473
474 case OP_DNRREF:
475 {
476 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
477 fprintf(f, " %s Cond recurse <", flag);
478 print_custring(f, entry);
479 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
480 }
481 break;
482
483 case OP_FALSE:
484 fprintf(f, " Cond false");
485 break;
486
487 case OP_TRUE:
488 fprintf(f, " Cond true");
489 break;
490
491 case OP_STARI:
492 case OP_MINSTARI:
493 case OP_POSSTARI:
494 case OP_PLUSI:
495 case OP_MINPLUSI:
496 case OP_POSPLUSI:
497 case OP_QUERYI:
498 case OP_MINQUERYI:
499 case OP_POSQUERYI:
500 flag = "/i";
501 /* Fall through */
502 case OP_STAR:
503 case OP_MINSTAR:
504 case OP_POSSTAR:
505 case OP_PLUS:
506 case OP_MINPLUS:
507 case OP_POSPLUS:
508 case OP_QUERY:
509 case OP_MINQUERY:
510 case OP_POSQUERY:
511 case OP_TYPESTAR:
512 case OP_TYPEMINSTAR:
513 case OP_TYPEPOSSTAR:
514 case OP_TYPEPLUS:
515 case OP_TYPEMINPLUS:
516 case OP_TYPEPOSPLUS:
517 case OP_TYPEQUERY:
518 case OP_TYPEMINQUERY:
519 case OP_TYPEPOSQUERY:
520 fprintf(f, " %s ", flag);
521
522 if (*code >= OP_TYPESTAR)
523 {
524 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
525 {
526 print_prop(f, code + 1, "", " ");
527 extra = 2;
528 }
529 else fprintf(f, "%s", OP_names[code[1]]);
530 }
531 else extra = print_char(f, code+1, utf);
532 fprintf(f, "%s", OP_names[*code]);
533 break;
534
535 case OP_EXACTI:
536 case OP_UPTOI:
537 case OP_MINUPTOI:
538 case OP_POSUPTOI:
539 flag = "/i";
540 /* Fall through */
541 case OP_EXACT:
542 case OP_UPTO:
543 case OP_MINUPTO:
544 case OP_POSUPTO:
545 fprintf(f, " %s ", flag);
546 extra = print_char(f, code + 1 + IMM2_SIZE, utf);
547 fprintf(f, "{");
548 if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
549 fprintf(f, "%d}", GET2(code,1));
550 if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
551 else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
552 break;
553
554 case OP_TYPEEXACT:
555 case OP_TYPEUPTO:
556 case OP_TYPEMINUPTO:
557 case OP_TYPEPOSUPTO:
558 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
559 {
560 print_prop(f, code + IMM2_SIZE + 1, " ", " ");
561 extra = 2;
562 }
563 else fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]);
564 fprintf(f, "{");
565 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
566 fprintf(f, "%d}", GET2(code,1));
567 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
568 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
569 break;
570
571 case OP_NOTI:
572 flag = "/i";
573 /* Fall through */
574 case OP_NOT:
575 fprintf(f, " %s [^", flag);
576 extra = print_char(f, code + 1, utf);
577 fprintf(f, "]");
578 break;
579
580 case OP_NOTSTARI:
581 case OP_NOTMINSTARI:
582 case OP_NOTPOSSTARI:
583 case OP_NOTPLUSI:
584 case OP_NOTMINPLUSI:
585 case OP_NOTPOSPLUSI:
586 case OP_NOTQUERYI:
587 case OP_NOTMINQUERYI:
588 case OP_NOTPOSQUERYI:
589 flag = "/i";
590 /* Fall through */
591
592 case OP_NOTSTAR:
593 case OP_NOTMINSTAR:
594 case OP_NOTPOSSTAR:
595 case OP_NOTPLUS:
596 case OP_NOTMINPLUS:
597 case OP_NOTPOSPLUS:
598 case OP_NOTQUERY:
599 case OP_NOTMINQUERY:
600 case OP_NOTPOSQUERY:
601 fprintf(f, " %s [^", flag);
602 extra = print_char(f, code + 1, utf);
603 fprintf(f, "]%s", OP_names[*code]);
604 break;
605
606 case OP_NOTEXACTI:
607 case OP_NOTUPTOI:
608 case OP_NOTMINUPTOI:
609 case OP_NOTPOSUPTOI:
610 flag = "/i";
611 /* Fall through */
612
613 case OP_NOTEXACT:
614 case OP_NOTUPTO:
615 case OP_NOTMINUPTO:
616 case OP_NOTPOSUPTO:
617 fprintf(f, " %s [^", flag);
618 extra = print_char(f, code + 1 + IMM2_SIZE, utf);
619 fprintf(f, "]{");
620 if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
621 fprintf(f, "%d}", GET2(code,1));
622 if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
623 else
624 if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
625 break;
626
627 case OP_RECURSE:
628 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
629 else fprintf(f, " ");
630 fprintf(f, "%s", OP_names[*code]);
631 break;
632
633 case OP_REFI:
634 flag = "/i";
635 /* Fall through */
636 case OP_REF:
637 fprintf(f, " %s \\%d", flag, GET2(code,1));
638 ccode = code + OP_lengths[*code];
639 goto CLASS_REF_REPEAT;
640
641 case OP_DNREFI:
642 flag = "/i";
643 /* Fall through */
644 case OP_DNREF:
645 {
646 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
647 fprintf(f, " %s \\k<", flag);
648 print_custring(f, entry);
649 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
650 }
651 ccode = code + OP_lengths[*code];
652 goto CLASS_REF_REPEAT;
653
654 case OP_CALLOUT:
655 fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
656 GET(code, 1), GET(code, 1 + LINK_SIZE));
657 break;
658
659 case OP_CALLOUT_STR:
660 c = code[1 + 4*LINK_SIZE];
661 fprintf(f, " %s %c", OP_names[*code], c);
662 extra = GET(code, 1 + 2*LINK_SIZE);
663 print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
664 for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
665 if (c == PRIV(callout_start_delims)[i])
666 {
667 c = PRIV(callout_end_delims)[i];
668 break;
669 }
670 fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
671 GET(code, 1 + LINK_SIZE));
672 break;
673
674 case OP_PROP:
675 case OP_NOTPROP:
676 print_prop(f, code, " ", "");
677 break;
678
679 /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
680 in having this code always here, and it makes it less messy without all
681 those #ifdefs. */
682
683 case OP_CLASS:
684 case OP_NCLASS:
685 case OP_XCLASS:
686 {
687 BOOL printmap, invertmap;
688
689 fprintf(f, " [");
690
691 /* Negative XCLASS has an inverted map whereas the original opcodes have
692 already done the inversion. */
693
694 invertmap = FALSE;
695 if (*code == OP_XCLASS)
696 {
697 extra = GET(code, 1);
698 ccode = code + LINK_SIZE + 1;
699 printmap = (*ccode & XCL_MAP) != 0;
700 if ((*ccode & XCL_NOT) != 0)
701 {
702 invertmap = (*ccode & XCL_HASPROP) == 0;
703 fprintf(f, "^");
704 }
705 ccode++;
706 }
707 else /* CLASS or NCLASS */
708 {
709 printmap = TRUE;
710 ccode = code + 1;
711 }
712
713 /* Print a bit map */
714
715 if (printmap)
716 {
717 uint8_t inverted_map[32];
718 uint8_t *map = (uint8_t *)ccode;
719
720 if (invertmap)
721 {
722 /* Using 255 ^ instead of ~ avoids clang sanitize warning. */
723 for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i];
724 map = inverted_map;
725 }
726
727 for (i = 0; i < 256; i++)
728 {
729 if ((map[i/8] & (1u << (i&7))) != 0)
730 {
731 int j;
732 for (j = i+1; j < 256; j++)
733 if ((map[j/8] & (1u << (j&7))) == 0) break;
734 if (i == '-' || i == ']') fprintf(f, "\\");
735 if (PRINTABLE(i)) fprintf(f, "%c", i);
736 else fprintf(f, "\\x%02x", i);
737 if (--j > i)
738 {
739 if (j != i + 1) fprintf(f, "-");
740 if (j == '-' || j == ']') fprintf(f, "\\");
741 if (PRINTABLE(j)) fprintf(f, "%c", j);
742 else fprintf(f, "\\x%02x", j);
743 }
744 i = j;
745 }
746 }
747 ccode += 32 / sizeof(PCRE2_UCHAR);
748 }
749 }
750
751 /* For an XCLASS there is always some additional data */
752
753 if (*code == OP_XCLASS)
754 {
755 PCRE2_UCHAR ch;
756 while ((ch = *ccode++) != XCL_END)
757 {
758 const char *notch = "";
759
760 switch(ch)
761 {
762 case XCL_NOTPROP:
763 notch = "^";
764 /* Fall through */
765
766 case XCL_PROP:
767 {
768 unsigned int ptype = *ccode++;
769 unsigned int pvalue = *ccode++;
770 const char *s;
771
772 switch(ptype)
773 {
774 case PT_PXGRAPH:
775 fprintf(f, "[:%sgraph:]", notch);
776 break;
777
778 case PT_PXPRINT:
779 fprintf(f, "[:%sprint:]", notch);
780 break;
781
782 case PT_PXPUNCT:
783 fprintf(f, "[:%spunct:]", notch);
784 break;
785
786 case PT_PXXDIGIT:
787 fprintf(f, "[:%sxdigit:]", notch);
788 break;
789
790 default:
791 s = get_ucpname(ptype, pvalue);
792 fprintf(f, "\\%c{%c%s}", ((notch[0] == '^')? 'P':'p'),
793 toupper(s[0]), s+1);
794 break;
795 }
796 }
797 break;
798
799 default:
800 ccode += 1 + print_char(f, ccode, utf);
801 if (ch == XCL_RANGE)
802 {
803 fprintf(f, "-");
804 ccode += 1 + print_char(f, ccode, utf);
805 }
806 break;
807 }
808 }
809 }
810
811 /* Indicate a non-UTF class which was created by negation */
812
813 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
814
815 /* Handle repeats after a class or a back reference */
816
817 CLASS_REF_REPEAT:
818 switch(*ccode)
819 {
820 unsigned int min, max;
821
822 case OP_CRSTAR:
823 case OP_CRMINSTAR:
824 case OP_CRPLUS:
825 case OP_CRMINPLUS:
826 case OP_CRQUERY:
827 case OP_CRMINQUERY:
828 case OP_CRPOSSTAR:
829 case OP_CRPOSPLUS:
830 case OP_CRPOSQUERY:
831 fprintf(f, "%s", OP_names[*ccode]);
832 extra += OP_lengths[*ccode];
833 break;
834
835 case OP_CRRANGE:
836 case OP_CRMINRANGE:
837 case OP_CRPOSRANGE:
838 min = GET2(ccode,1);
839 max = GET2(ccode,1 + IMM2_SIZE);
840 if (max == 0) fprintf(f, "{%u,}", min);
841 else fprintf(f, "{%u,%u}", min, max);
842 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
843 else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
844 extra += OP_lengths[*ccode];
845 break;
846
847 /* Do nothing if it's not a repeat; this code stops picky compilers
848 warning about the lack of a default code path. */
849
850 default:
851 break;
852 }
853 break;
854
855 case OP_MARK:
856 case OP_COMMIT_ARG:
857 case OP_PRUNE_ARG:
858 case OP_SKIP_ARG:
859 case OP_THEN_ARG:
860 fprintf(f, " %s ", OP_names[*code]);
861 print_custring_bylen(f, code + 2, code[1]);
862 extra += code[1];
863 break;
864
865 case OP_THEN:
866 fprintf(f, " %s", OP_names[*code]);
867 break;
868
869 case OP_CIRCM:
870 case OP_DOLLM:
871 flag = "/m";
872 /* Fall through */
873
874 /* Anything else is just an item with no data, but possibly a flag. */
875
876 default:
877 fprintf(f, " %s %s", flag, OP_names[*code]);
878 break;
879 }
880
881 code += OP_lengths[*code] + extra;
882 fprintf(f, "\n");
883 }
884 }
885
886 /* End of pcre2_printint.c */
887