xref: /php-src/ext/pcre/pcre2lib/pcre2_printint.c (revision ae5beff6)
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10           New API code Copyright (c) 2016-2023 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 
42 /* This module contains a PCRE private debugging function for printing out the
43 internal form of a compiled regular expression, along with some supporting
44 local functions. This source file is #included in pcre2test.c at each supported
45 code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
46 that comprise the library. It can also optionally be included in
47 pcre2_compile.c for detailed debugging in error situations. */
48 
49 
50 /* Tables of operator names. The same 8-bit table is used for all code unit
51 widths, so it must be defined only once. The list itself is defined in
52 pcre2_internal.h, which is #included by pcre2test before this file. */
53 
54 #ifndef OP_LISTS_DEFINED
55 static const char *OP_names[] = { OP_NAME_LIST };
56 #define OP_LISTS_DEFINED
57 #endif
58 
59 /* The functions and tables herein must all have mode-dependent names. */
60 
61 #define OP_lengths            PCRE2_SUFFIX(OP_lengths_)
62 #define get_ucpname           PCRE2_SUFFIX(get_ucpname_)
63 #define pcre2_printint        PCRE2_SUFFIX(pcre2_printint_)
64 #define print_char            PCRE2_SUFFIX(print_char_)
65 #define print_custring        PCRE2_SUFFIX(print_custring_)
66 #define print_custring_bylen  PCRE2_SUFFIX(print_custring_bylen_)
67 #define print_prop            PCRE2_SUFFIX(print_prop_)
68 
69 /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
70 the definition is next to the definition of the opcodes in pcre2_internal.h.
71 The contents of the table are, however, mode-dependent. */
72 
73 static const uint8_t OP_lengths[] = { OP_LENGTHS };
74 
75 
76 
77 /*************************************************
78 *       Print one character from a string        *
79 *************************************************/
80 
81 /* In UTF mode the character may occupy more than one code unit.
82 
83 Arguments:
84   f           file to write to
85   ptr         pointer to first code unit of the character
86   utf         TRUE if string is UTF (will be FALSE if UTF is not supported)
87 
88 Returns:      number of additional code units used
89 */
90 
91 static unsigned int
print_char(FILE * f,PCRE2_SPTR ptr,BOOL utf)92 print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
93 {
94 uint32_t c = *ptr;
95 BOOL one_code_unit = !utf;
96 
97 /* If UTF is supported and requested, check for a valid single code unit. */
98 
99 #ifdef SUPPORT_UNICODE
100 if (utf)
101   {
102 #if PCRE2_CODE_UNIT_WIDTH == 8
103   one_code_unit = c < 0x80;
104 #elif PCRE2_CODE_UNIT_WIDTH == 16
105   one_code_unit = (c & 0xfc00) != 0xd800;
106 #else
107   one_code_unit = (c & 0xfffff800u) != 0xd800u;
108 #endif  /* CODE_UNIT_WIDTH */
109   }
110 #endif  /* SUPPORT_UNICODE */
111 
112 /* Handle a valid one-code-unit character at any width. */
113 
114 if (one_code_unit)
115   {
116   if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
117   else if (c < 0x80) fprintf(f, "\\x%02x", c);
118   else fprintf(f, "\\x{%02x}", c);
119   return 0;
120   }
121 
122 /* Code for invalid UTF code units and multi-unit UTF characters is different
123 for each width. If UTF is not supported, control should never get here, but we
124 need a return statement to keep the compiler happy. */
125 
126 #ifndef SUPPORT_UNICODE
127 return 0;
128 #else
129 
130 /* Malformed UTF-8 should occur only if the sanity check has been turned off.
131 Rather than swallow random bytes, just stop if we hit a bad one. Print it with
132 \X instead of \x as an indication. */
133 
134 #if PCRE2_CODE_UNIT_WIDTH == 8
135 if ((c & 0xc0) != 0xc0)
136   {
137   fprintf(f, "\\X{%x}", c);       /* Invalid starting byte */
138   return 0;
139   }
140 else
141   {
142   int i;
143   int a = PRIV(utf8_table4)[c & 0x3f];  /* Number of additional bytes */
144   int s = 6*a;
145   c = (c & PRIV(utf8_table3)[a]) << s;
146   for (i = 1; i <= a; i++)
147     {
148     if ((ptr[i] & 0xc0) != 0x80)
149       {
150       fprintf(f, "\\X{%x}", c);   /* Invalid secondary byte */
151       return i - 1;
152       }
153     s -= 6;
154     c |= (ptr[i] & 0x3f) << s;
155     }
156   fprintf(f, "\\x{%x}", c);
157   return a;
158 }
159 #endif  /* PCRE2_CODE_UNIT_WIDTH == 8 */
160 
161 /* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
162 Print it with \X instead of \x as an indication. */
163 
164 #if PCRE2_CODE_UNIT_WIDTH == 16
165 if ((ptr[1] & 0xfc00) != 0xdc00)
166   {
167   fprintf(f, "\\X{%x}", c);
168   return 0;
169   }
170 c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
171 fprintf(f, "\\x{%x}", c);
172 return 1;
173 #endif  /* PCRE2_CODE_UNIT_WIDTH == 16 */
174 
175 /* For UTF-32 we get here only for a malformed code unit, which should only
176 occur if the sanity check has been turned off. Print it with \X instead of \x
177 as an indication. */
178 
179 #if PCRE2_CODE_UNIT_WIDTH == 32
180 fprintf(f, "\\X{%x}", c);
181 return 0;
182 #endif  /* PCRE2_CODE_UNIT_WIDTH == 32 */
183 #endif  /* SUPPORT_UNICODE */
184 }
185 
186 
187 
188 /*************************************************
189 *     Print string as a list of code units       *
190 *************************************************/
191 
192 /* These take no account of UTF as they always print each individual code unit.
193 The string is zero-terminated for print_custring(); the length is given for
194 print_custring_bylen().
195 
196 Arguments:
197   f          file to write to
198   ptr        point to the string
199   len        length for print_custring_bylen()
200 
201 Returns:     nothing
202 */
203 
204 static void
print_custring(FILE * f,PCRE2_SPTR ptr)205 print_custring(FILE *f, PCRE2_SPTR ptr)
206 {
207 while (*ptr != '\0')
208   {
209   uint32_t c = *ptr++;
210   if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
211   }
212 }
213 
214 static void
print_custring_bylen(FILE * f,PCRE2_SPTR ptr,PCRE2_UCHAR len)215 print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
216 {
217 for (; len > 0; len--)
218   {
219   uint32_t c = *ptr++;
220   if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
221   }
222 }
223 
224 
225 
226 /*************************************************
227 *          Find Unicode property name            *
228 *************************************************/
229 
230 /* When there is no UTF/UCP support, the table of names does not exist. This
231 function should not be called in such configurations, because a pattern that
232 tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
233 into the main code, however, we just put one into this function.
234 
235 Now that the table contains both full names and their abbreviations, we do some
236 fiddling to try to get the full name, which is either the longer of two found
237 names, or a 3-character script name. */
238 
239 static const char *
get_ucpname(unsigned int ptype,unsigned int pvalue)240 get_ucpname(unsigned int ptype, unsigned int pvalue)
241 {
242 #ifdef SUPPORT_UNICODE
243 int count = 0;
244 const char *yield = "??";
245 size_t len = 0;
246 unsigned int ptypex = (ptype == PT_SC)? PT_SCX : ptype;
247 
248 for (int i = PRIV(utt_size) - 1; i >= 0; i--)
249   {
250   const ucp_type_table *u = PRIV(utt) + i;
251 
252   if ((ptype == u->type || ptypex == u->type) && pvalue == u->value)
253     {
254     const char *s = PRIV(utt_names) + u->name_offset;
255     size_t sl = strlen(s);
256 
257     if (sl == 3 && (u->type == PT_SC || u->type == PT_SCX))
258       {
259       yield = s;
260       break;
261       }
262 
263     if (sl > len)
264       {
265       yield = s;
266       len = sl;
267       }
268 
269     if (++count >= 2) break;
270     }
271   }
272 
273 return yield;
274 
275 #else   /* No UTF support */
276 (void)ptype;
277 (void)pvalue;
278 return "??";
279 #endif  /* SUPPORT_UNICODE */
280 }
281 
282 
283 
284 /*************************************************
285 *       Print Unicode property value             *
286 *************************************************/
287 
288 /* "Normal" properties can be printed from tables. The PT_CLIST property is a
289 pseudo-property that contains a pointer to a list of case-equivalent
290 characters.
291 
292 Arguments:
293   f            file to write to
294   code         pointer in the compiled code
295   before       text to print before
296   after        text to print after
297 
298 Returns:       nothing
299 */
300 
301 static void
print_prop(FILE * f,PCRE2_SPTR code,const char * before,const char * after)302 print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
303 {
304 if (code[1] != PT_CLIST)
305   {
306   const char *sc = (code[1] == PT_SC)? "script:" : "";
307   const char *s = get_ucpname(code[1], code[2]);
308   fprintf(f, "%s%s %s%c%s%s", before, OP_names[*code], sc, toupper(s[0]), s+1, after);
309   }
310 else
311   {
312   const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
313   fprintf (f, "%s%sclist", before, (*code == OP_PROP)? "" : "not ");
314   while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
315   fprintf(f, "%s", after);
316   }
317 }
318 
319 
320 
321 /*************************************************
322 *            Print compiled pattern              *
323 *************************************************/
324 
325 /* The print_lengths flag controls whether offsets and lengths of items are
326 printed. Lenths can be turned off from pcre2test so that automatic tests on
327 bytecode can be written that do not depend on the value of LINK_SIZE.
328 
329 Arguments:
330   re              a compiled pattern
331   f               the file to write to
332   print_lengths   show various lengths
333 
334 Returns:          nothing
335 */
336 
337 static void
pcre2_printint(pcre2_code * re,FILE * f,BOOL print_lengths)338 pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
339 {
340 PCRE2_SPTR codestart, nametable, code;
341 uint32_t nesize = re->name_entry_size;
342 BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
343 
344 nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
345 code = codestart = nametable + re->name_count * re->name_entry_size;
346 
347 for(;;)
348   {
349   PCRE2_SPTR ccode;
350   uint32_t c;
351   int i;
352   const char *flag = "  ";
353   unsigned int extra = 0;
354 
355   if (print_lengths)
356     fprintf(f, "%3d ", (int)(code - codestart));
357   else
358     fprintf(f, "    ");
359 
360   switch(*code)
361     {
362 /* ========================================================================== */
363       /* These cases are never obeyed. This is a fudge that causes a compile-
364       time error if the vectors OP_names or OP_lengths, which are indexed
365       by opcode, are not the correct length. It seems to be the only way to do
366       such a check at compile time, as the sizeof() operator does not work in
367       the C preprocessor. */
368 
369       case OP_TABLE_LENGTH:
370       case OP_TABLE_LENGTH +
371         ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
372         (sizeof(OP_lengths) == OP_TABLE_LENGTH)):
373       return;
374 /* ========================================================================== */
375 
376     case OP_END:
377     fprintf(f, "    %s\n", OP_names[*code]);
378     fprintf(f, "------------------------------------------------------------------\n");
379     return;
380 
381     case OP_CHAR:
382     fprintf(f, "    ");
383     do
384       {
385       code++;
386       code += 1 + print_char(f, code, utf);
387       }
388     while (*code == OP_CHAR);
389     fprintf(f, "\n");
390     continue;
391 
392     case OP_CHARI:
393     fprintf(f, " /i ");
394     do
395       {
396       code++;
397       code += 1 + print_char(f, code, utf);
398       }
399     while (*code == OP_CHARI);
400     fprintf(f, "\n");
401     continue;
402 
403     case OP_CBRA:
404     case OP_CBRAPOS:
405     case OP_SCBRA:
406     case OP_SCBRAPOS:
407     if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
408       else fprintf(f, "    ");
409     fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
410     break;
411 
412     case OP_BRA:
413     case OP_BRAPOS:
414     case OP_SBRA:
415     case OP_SBRAPOS:
416     case OP_KETRMAX:
417     case OP_KETRMIN:
418     case OP_KETRPOS:
419     case OP_ALT:
420     case OP_KET:
421     case OP_ASSERT:
422     case OP_ASSERT_NOT:
423     case OP_ASSERTBACK:
424     case OP_ASSERTBACK_NOT:
425     case OP_ASSERT_NA:
426     case OP_ASSERTBACK_NA:
427     case OP_ONCE:
428     case OP_SCRIPT_RUN:
429     case OP_COND:
430     case OP_SCOND:
431     if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
432       else fprintf(f, "    ");
433     fprintf(f, "%s", OP_names[*code]);
434     break;
435 
436     case OP_REVERSE:
437     if (print_lengths) fprintf(f, "%3d ", GET2(code, 1));
438       else fprintf(f, "    ");
439     fprintf(f, "%s", OP_names[*code]);
440     break;
441 
442     case OP_VREVERSE:
443     if (print_lengths) fprintf(f, "%3d %d ", GET2(code, 1),
444       GET2(code, 1 + IMM2_SIZE));
445     else fprintf(f, "    ");
446     fprintf(f, "%s", OP_names[*code]);
447     break;
448 
449     case OP_CLOSE:
450     fprintf(f, "    %s %d", OP_names[*code], GET2(code, 1));
451     break;
452 
453     case OP_CREF:
454     fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
455     break;
456 
457     case OP_DNCREF:
458       {
459       PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
460       fprintf(f, " %s Cond ref <", flag);
461       print_custring(f, entry);
462       fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
463       }
464     break;
465 
466     case OP_RREF:
467     c = GET2(code, 1);
468     if (c == RREF_ANY)
469       fprintf(f, "    Cond recurse any");
470     else
471       fprintf(f, "    Cond recurse %d", c);
472     break;
473 
474     case OP_DNRREF:
475       {
476       PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
477       fprintf(f, " %s Cond recurse <", flag);
478       print_custring(f, entry);
479       fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
480       }
481     break;
482 
483     case OP_FALSE:
484     fprintf(f, "    Cond false");
485     break;
486 
487     case OP_TRUE:
488     fprintf(f, "    Cond true");
489     break;
490 
491     case OP_STARI:
492     case OP_MINSTARI:
493     case OP_POSSTARI:
494     case OP_PLUSI:
495     case OP_MINPLUSI:
496     case OP_POSPLUSI:
497     case OP_QUERYI:
498     case OP_MINQUERYI:
499     case OP_POSQUERYI:
500     flag = "/i";
501     /* Fall through */
502     case OP_STAR:
503     case OP_MINSTAR:
504     case OP_POSSTAR:
505     case OP_PLUS:
506     case OP_MINPLUS:
507     case OP_POSPLUS:
508     case OP_QUERY:
509     case OP_MINQUERY:
510     case OP_POSQUERY:
511     case OP_TYPESTAR:
512     case OP_TYPEMINSTAR:
513     case OP_TYPEPOSSTAR:
514     case OP_TYPEPLUS:
515     case OP_TYPEMINPLUS:
516     case OP_TYPEPOSPLUS:
517     case OP_TYPEQUERY:
518     case OP_TYPEMINQUERY:
519     case OP_TYPEPOSQUERY:
520     fprintf(f, " %s ", flag);
521 
522     if (*code >= OP_TYPESTAR)
523       {
524       if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
525         {
526         print_prop(f, code + 1, "", " ");
527         extra = 2;
528         }
529       else fprintf(f, "%s", OP_names[code[1]]);
530       }
531     else extra = print_char(f, code+1, utf);
532     fprintf(f, "%s", OP_names[*code]);
533     break;
534 
535     case OP_EXACTI:
536     case OP_UPTOI:
537     case OP_MINUPTOI:
538     case OP_POSUPTOI:
539     flag = "/i";
540     /* Fall through */
541     case OP_EXACT:
542     case OP_UPTO:
543     case OP_MINUPTO:
544     case OP_POSUPTO:
545     fprintf(f, " %s ", flag);
546     extra = print_char(f, code + 1 + IMM2_SIZE, utf);
547     fprintf(f, "{");
548     if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
549     fprintf(f, "%d}", GET2(code,1));
550     if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
551       else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
552     break;
553 
554     case OP_TYPEEXACT:
555     case OP_TYPEUPTO:
556     case OP_TYPEMINUPTO:
557     case OP_TYPEPOSUPTO:
558     if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
559       {
560       print_prop(f, code + IMM2_SIZE + 1, "    ", " ");
561       extra = 2;
562       }
563     else fprintf(f, "    %s", OP_names[code[1 + IMM2_SIZE]]);
564     fprintf(f, "{");
565     if (*code != OP_TYPEEXACT) fprintf(f, "0,");
566     fprintf(f, "%d}", GET2(code,1));
567     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
568       else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
569     break;
570 
571     case OP_NOTI:
572     flag = "/i";
573     /* Fall through */
574     case OP_NOT:
575     fprintf(f, " %s [^", flag);
576     extra = print_char(f, code + 1, utf);
577     fprintf(f, "]");
578     break;
579 
580     case OP_NOTSTARI:
581     case OP_NOTMINSTARI:
582     case OP_NOTPOSSTARI:
583     case OP_NOTPLUSI:
584     case OP_NOTMINPLUSI:
585     case OP_NOTPOSPLUSI:
586     case OP_NOTQUERYI:
587     case OP_NOTMINQUERYI:
588     case OP_NOTPOSQUERYI:
589     flag = "/i";
590     /* Fall through */
591 
592     case OP_NOTSTAR:
593     case OP_NOTMINSTAR:
594     case OP_NOTPOSSTAR:
595     case OP_NOTPLUS:
596     case OP_NOTMINPLUS:
597     case OP_NOTPOSPLUS:
598     case OP_NOTQUERY:
599     case OP_NOTMINQUERY:
600     case OP_NOTPOSQUERY:
601     fprintf(f, " %s [^", flag);
602     extra = print_char(f, code + 1, utf);
603     fprintf(f, "]%s", OP_names[*code]);
604     break;
605 
606     case OP_NOTEXACTI:
607     case OP_NOTUPTOI:
608     case OP_NOTMINUPTOI:
609     case OP_NOTPOSUPTOI:
610     flag = "/i";
611     /* Fall through */
612 
613     case OP_NOTEXACT:
614     case OP_NOTUPTO:
615     case OP_NOTMINUPTO:
616     case OP_NOTPOSUPTO:
617     fprintf(f, " %s [^", flag);
618     extra = print_char(f, code + 1 + IMM2_SIZE, utf);
619     fprintf(f, "]{");
620     if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
621     fprintf(f, "%d}", GET2(code,1));
622     if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
623       else
624     if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
625     break;
626 
627     case OP_RECURSE:
628     if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
629       else fprintf(f, "    ");
630     fprintf(f, "%s", OP_names[*code]);
631     break;
632 
633     case OP_REFI:
634     flag = "/i";
635     /* Fall through */
636     case OP_REF:
637     fprintf(f, " %s \\%d", flag, GET2(code,1));
638     ccode = code + OP_lengths[*code];
639     goto CLASS_REF_REPEAT;
640 
641     case OP_DNREFI:
642     flag = "/i";
643     /* Fall through */
644     case OP_DNREF:
645       {
646       PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
647       fprintf(f, " %s \\k<", flag);
648       print_custring(f, entry);
649       fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
650       }
651     ccode = code + OP_lengths[*code];
652     goto CLASS_REF_REPEAT;
653 
654     case OP_CALLOUT:
655     fprintf(f, "    %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
656       GET(code, 1), GET(code, 1 + LINK_SIZE));
657     break;
658 
659     case OP_CALLOUT_STR:
660     c = code[1 + 4*LINK_SIZE];
661     fprintf(f, "    %s %c", OP_names[*code], c);
662     extra = GET(code, 1 + 2*LINK_SIZE);
663     print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
664     for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
665       if (c == PRIV(callout_start_delims)[i])
666         {
667         c = PRIV(callout_end_delims)[i];
668         break;
669         }
670     fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
671       GET(code, 1 + LINK_SIZE));
672     break;
673 
674     case OP_PROP:
675     case OP_NOTPROP:
676     print_prop(f, code, "    ", "");
677     break;
678 
679     /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
680     in having this code always here, and it makes it less messy without all
681     those #ifdefs. */
682 
683     case OP_CLASS:
684     case OP_NCLASS:
685     case OP_XCLASS:
686       {
687       BOOL printmap, invertmap;
688 
689       fprintf(f, "    [");
690 
691       /* Negative XCLASS has an inverted map whereas the original opcodes have
692       already done the inversion. */
693 
694       invertmap = FALSE;
695       if (*code == OP_XCLASS)
696         {
697         extra = GET(code, 1);
698         ccode = code + LINK_SIZE + 1;
699         printmap = (*ccode & XCL_MAP) != 0;
700         if ((*ccode & XCL_NOT) != 0)
701           {
702           invertmap = (*ccode & XCL_HASPROP) == 0;
703           fprintf(f, "^");
704           }
705         ccode++;
706         }
707       else  /* CLASS or NCLASS */
708         {
709         printmap = TRUE;
710         ccode = code + 1;
711         }
712 
713       /* Print a bit map */
714 
715       if (printmap)
716         {
717         uint8_t inverted_map[32];
718         uint8_t *map = (uint8_t *)ccode;
719 
720         if (invertmap)
721           {
722           /* Using 255 ^ instead of ~ avoids clang sanitize warning. */
723           for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i];
724           map = inverted_map;
725           }
726 
727         for (i = 0; i < 256; i++)
728           {
729           if ((map[i/8] & (1u << (i&7))) != 0)
730             {
731             int j;
732             for (j = i+1; j < 256; j++)
733               if ((map[j/8] & (1u << (j&7))) == 0) break;
734             if (i == '-' || i == ']') fprintf(f, "\\");
735             if (PRINTABLE(i)) fprintf(f, "%c", i);
736               else fprintf(f, "\\x%02x", i);
737             if (--j > i)
738               {
739               if (j != i + 1) fprintf(f, "-");
740               if (j == '-' || j == ']') fprintf(f, "\\");
741               if (PRINTABLE(j)) fprintf(f, "%c", j);
742                 else fprintf(f, "\\x%02x", j);
743               }
744             i = j;
745             }
746           }
747         ccode += 32 / sizeof(PCRE2_UCHAR);
748         }
749       }
750 
751     /* For an XCLASS there is always some additional data */
752 
753     if (*code == OP_XCLASS)
754       {
755       PCRE2_UCHAR ch;
756       while ((ch = *ccode++) != XCL_END)
757         {
758         const char *notch = "";
759 
760         switch(ch)
761           {
762           case XCL_NOTPROP:
763           notch = "^";
764           /* Fall through */
765 
766           case XCL_PROP:
767             {
768             unsigned int ptype = *ccode++;
769             unsigned int pvalue = *ccode++;
770             const char *s;
771 
772             switch(ptype)
773               {
774               case PT_PXGRAPH:
775               fprintf(f, "[:%sgraph:]", notch);
776               break;
777 
778               case PT_PXPRINT:
779               fprintf(f, "[:%sprint:]", notch);
780               break;
781 
782               case PT_PXPUNCT:
783               fprintf(f, "[:%spunct:]", notch);
784               break;
785 
786               case PT_PXXDIGIT:
787               fprintf(f, "[:%sxdigit:]", notch);
788               break;
789 
790               default:
791               s = get_ucpname(ptype, pvalue);
792               fprintf(f, "\\%c{%c%s}", ((notch[0] == '^')? 'P':'p'),
793                 toupper(s[0]), s+1);
794               break;
795               }
796             }
797           break;
798 
799           default:
800           ccode += 1 + print_char(f, ccode, utf);
801           if (ch == XCL_RANGE)
802             {
803             fprintf(f, "-");
804             ccode += 1 + print_char(f, ccode, utf);
805             }
806           break;
807           }
808         }
809       }
810 
811     /* Indicate a non-UTF class which was created by negation */
812 
813     fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
814 
815     /* Handle repeats after a class or a back reference */
816 
817     CLASS_REF_REPEAT:
818     switch(*ccode)
819       {
820       unsigned int min, max;
821 
822       case OP_CRSTAR:
823       case OP_CRMINSTAR:
824       case OP_CRPLUS:
825       case OP_CRMINPLUS:
826       case OP_CRQUERY:
827       case OP_CRMINQUERY:
828       case OP_CRPOSSTAR:
829       case OP_CRPOSPLUS:
830       case OP_CRPOSQUERY:
831       fprintf(f, "%s", OP_names[*ccode]);
832       extra += OP_lengths[*ccode];
833       break;
834 
835       case OP_CRRANGE:
836       case OP_CRMINRANGE:
837       case OP_CRPOSRANGE:
838       min = GET2(ccode,1);
839       max = GET2(ccode,1 + IMM2_SIZE);
840       if (max == 0) fprintf(f, "{%u,}", min);
841       else fprintf(f, "{%u,%u}", min, max);
842       if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
843       else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
844       extra += OP_lengths[*ccode];
845       break;
846 
847       /* Do nothing if it's not a repeat; this code stops picky compilers
848       warning about the lack of a default code path. */
849 
850       default:
851       break;
852       }
853     break;
854 
855     case OP_MARK:
856     case OP_COMMIT_ARG:
857     case OP_PRUNE_ARG:
858     case OP_SKIP_ARG:
859     case OP_THEN_ARG:
860     fprintf(f, "    %s ", OP_names[*code]);
861     print_custring_bylen(f, code + 2, code[1]);
862     extra += code[1];
863     break;
864 
865     case OP_THEN:
866     fprintf(f, "    %s", OP_names[*code]);
867     break;
868 
869     case OP_CIRCM:
870     case OP_DOLLM:
871     flag = "/m";
872     /* Fall through */
873 
874     /* Anything else is just an item with no data, but possibly a flag. */
875 
876     default:
877     fprintf(f, " %s %s", flag, OP_names[*code]);
878     break;
879     }
880 
881   code += OP_lengths[*code] + extra;
882   fprintf(f, "\n");
883   }
884 }
885 
886 /* End of pcre2_printint.c */
887