xref: /PHP-8.0/ext/pcre/pcre2lib/pcre2_printint.c (revision 225117af)
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10           New API code Copyright (c) 2016-2019 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 
42 /* This module contains a PCRE private debugging function for printing out the
43 internal form of a compiled regular expression, along with some supporting
44 local functions. This source file is #included in pcre2test.c at each supported
45 code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
46 that comprise the library. It can also optionally be included in
47 pcre2_compile.c for detailed debugging in error situations. */
48 
49 
50 /* Tables of operator names. The same 8-bit table is used for all code unit
51 widths, so it must be defined only once. The list itself is defined in
52 pcre2_internal.h, which is #included by pcre2test before this file. */
53 
54 #ifndef OP_LISTS_DEFINED
55 static const char *OP_names[] = { OP_NAME_LIST };
56 #define OP_LISTS_DEFINED
57 #endif
58 
59 /* The functions and tables herein must all have mode-dependent names. */
60 
61 #define OP_lengths            PCRE2_SUFFIX(OP_lengths_)
62 #define get_ucpname           PCRE2_SUFFIX(get_ucpname_)
63 #define pcre2_printint        PCRE2_SUFFIX(pcre2_printint_)
64 #define print_char            PCRE2_SUFFIX(print_char_)
65 #define print_custring        PCRE2_SUFFIX(print_custring_)
66 #define print_custring_bylen  PCRE2_SUFFIX(print_custring_bylen_)
67 #define print_prop            PCRE2_SUFFIX(print_prop_)
68 
69 /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
70 the definition is next to the definition of the opcodes in pcre2_internal.h.
71 The contents of the table are, however, mode-dependent. */
72 
73 static const uint8_t OP_lengths[] = { OP_LENGTHS };
74 
75 
76 
77 /*************************************************
78 *       Print one character from a string        *
79 *************************************************/
80 
81 /* In UTF mode the character may occupy more than one code unit.
82 
83 Arguments:
84   f           file to write to
85   ptr         pointer to first code unit of the character
86   utf         TRUE if string is UTF (will be FALSE if UTF is not supported)
87 
88 Returns:      number of additional code units used
89 */
90 
91 static unsigned int
print_char(FILE * f,PCRE2_SPTR ptr,BOOL utf)92 print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
93 {
94 uint32_t c = *ptr;
95 BOOL one_code_unit = !utf;
96 
97 /* If UTF is supported and requested, check for a valid single code unit. */
98 
99 #ifdef SUPPORT_UNICODE
100 if (utf)
101   {
102 #if PCRE2_CODE_UNIT_WIDTH == 8
103   one_code_unit = c < 0x80;
104 #elif PCRE2_CODE_UNIT_WIDTH == 16
105   one_code_unit = (c & 0xfc00) != 0xd800;
106 #else
107   one_code_unit = (c & 0xfffff800u) != 0xd800u;
108 #endif  /* CODE_UNIT_WIDTH */
109   }
110 #endif  /* SUPPORT_UNICODE */
111 
112 /* Handle a valid one-code-unit character at any width. */
113 
114 if (one_code_unit)
115   {
116   if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
117   else if (c < 0x80) fprintf(f, "\\x%02x", c);
118   else fprintf(f, "\\x{%02x}", c);
119   return 0;
120   }
121 
122 /* Code for invalid UTF code units and multi-unit UTF characters is different
123 for each width. If UTF is not supported, control should never get here, but we
124 need a return statement to keep the compiler happy. */
125 
126 #ifndef SUPPORT_UNICODE
127 return 0;
128 #else
129 
130 /* Malformed UTF-8 should occur only if the sanity check has been turned off.
131 Rather than swallow random bytes, just stop if we hit a bad one. Print it with
132 \X instead of \x as an indication. */
133 
134 #if PCRE2_CODE_UNIT_WIDTH == 8
135 if ((c & 0xc0) != 0xc0)
136   {
137   fprintf(f, "\\X{%x}", c);       /* Invalid starting byte */
138   return 0;
139   }
140 else
141   {
142   int i;
143   int a = PRIV(utf8_table4)[c & 0x3f];  /* Number of additional bytes */
144   int s = 6*a;
145   c = (c & PRIV(utf8_table3)[a]) << s;
146   for (i = 1; i <= a; i++)
147     {
148     if ((ptr[i] & 0xc0) != 0x80)
149       {
150       fprintf(f, "\\X{%x}", c);   /* Invalid secondary byte */
151       return i - 1;
152       }
153     s -= 6;
154     c |= (ptr[i] & 0x3f) << s;
155     }
156   fprintf(f, "\\x{%x}", c);
157   return a;
158 }
159 #endif  /* PCRE2_CODE_UNIT_WIDTH == 8 */
160 
161 /* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
162 Print it with \X instead of \x as an indication. */
163 
164 #if PCRE2_CODE_UNIT_WIDTH == 16
165 if ((ptr[1] & 0xfc00) != 0xdc00)
166   {
167   fprintf(f, "\\X{%x}", c);
168   return 0;
169   }
170 c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
171 fprintf(f, "\\x{%x}", c);
172 return 1;
173 #endif  /* PCRE2_CODE_UNIT_WIDTH == 16 */
174 
175 /* For UTF-32 we get here only for a malformed code unit, which should only
176 occur if the sanity check has been turned off. Print it with \X instead of \x
177 as an indication. */
178 
179 #if PCRE2_CODE_UNIT_WIDTH == 32
180 fprintf(f, "\\X{%x}", c);
181 return 0;
182 #endif  /* PCRE2_CODE_UNIT_WIDTH == 32 */
183 #endif  /* SUPPORT_UNICODE */
184 }
185 
186 
187 
188 /*************************************************
189 *     Print string as a list of code units       *
190 *************************************************/
191 
192 /* These take no account of UTF as they always print each individual code unit.
193 The string is zero-terminated for print_custring(); the length is given for
194 print_custring_bylen().
195 
196 Arguments:
197   f          file to write to
198   ptr        point to the string
199   len        length for print_custring_bylen()
200 
201 Returns:     nothing
202 */
203 
204 static void
print_custring(FILE * f,PCRE2_SPTR ptr)205 print_custring(FILE *f, PCRE2_SPTR ptr)
206 {
207 while (*ptr != '\0')
208   {
209   uint32_t c = *ptr++;
210   if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
211   }
212 }
213 
214 static void
print_custring_bylen(FILE * f,PCRE2_SPTR ptr,PCRE2_UCHAR len)215 print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
216 {
217 for (; len > 0; len--)
218   {
219   uint32_t c = *ptr++;
220   if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
221   }
222 }
223 
224 
225 
226 /*************************************************
227 *          Find Unicode property name            *
228 *************************************************/
229 
230 /* When there is no UTF/UCP support, the table of names does not exist. This
231 function should not be called in such configurations, because a pattern that
232 tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
233 into the main code, however, we just put one into this function. */
234 
235 static const char *
get_ucpname(unsigned int ptype,unsigned int pvalue)236 get_ucpname(unsigned int ptype, unsigned int pvalue)
237 {
238 #ifdef SUPPORT_UNICODE
239 int i;
240 for (i = PRIV(utt_size) - 1; i >= 0; i--)
241   {
242   if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
243   }
244 return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
245 #else   /* No UTF support */
246 (void)ptype;
247 (void)pvalue;
248 return "??";
249 #endif  /* SUPPORT_UNICODE */
250 }
251 
252 
253 
254 /*************************************************
255 *       Print Unicode property value             *
256 *************************************************/
257 
258 /* "Normal" properties can be printed from tables. The PT_CLIST property is a
259 pseudo-property that contains a pointer to a list of case-equivalent
260 characters.
261 
262 Arguments:
263   f            file to write to
264   code         pointer in the compiled code
265   before       text to print before
266   after        text to print after
267 
268 Returns:       nothing
269 */
270 
271 static void
print_prop(FILE * f,PCRE2_SPTR code,const char * before,const char * after)272 print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
273 {
274 if (code[1] != PT_CLIST)
275   {
276   fprintf(f, "%s%s %s%s", before, OP_names[*code], get_ucpname(code[1],
277     code[2]), after);
278   }
279 else
280   {
281   const char *not = (*code == OP_PROP)? "" : "not ";
282   const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
283   fprintf (f, "%s%sclist", before, not);
284   while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
285   fprintf(f, "%s", after);
286   }
287 }
288 
289 
290 
291 /*************************************************
292 *            Print compiled pattern              *
293 *************************************************/
294 
295 /* The print_lengths flag controls whether offsets and lengths of items are
296 printed. Lenths can be turned off from pcre2test so that automatic tests on
297 bytecode can be written that do not depend on the value of LINK_SIZE.
298 
299 Arguments:
300   re              a compiled pattern
301   f               the file to write to
302   print_lengths   show various lengths
303 
304 Returns:          nothing
305 */
306 
307 static void
pcre2_printint(pcre2_code * re,FILE * f,BOOL print_lengths)308 pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
309 {
310 PCRE2_SPTR codestart, nametable, code;
311 uint32_t nesize = re->name_entry_size;
312 BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
313 
314 nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
315 code = codestart = nametable + re->name_count * re->name_entry_size;
316 
317 for(;;)
318   {
319   PCRE2_SPTR ccode;
320   uint32_t c;
321   int i;
322   const char *flag = "  ";
323   unsigned int extra = 0;
324 
325   if (print_lengths)
326     fprintf(f, "%3d ", (int)(code - codestart));
327   else
328     fprintf(f, "    ");
329 
330   switch(*code)
331     {
332 /* ========================================================================== */
333       /* These cases are never obeyed. This is a fudge that causes a compile-
334       time error if the vectors OP_names or OP_lengths, which are indexed
335       by opcode, are not the correct length. It seems to be the only way to do
336       such a check at compile time, as the sizeof() operator does not work in
337       the C preprocessor. */
338 
339       case OP_TABLE_LENGTH:
340       case OP_TABLE_LENGTH +
341         ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
342         (sizeof(OP_lengths) == OP_TABLE_LENGTH)):
343       return;
344 /* ========================================================================== */
345 
346     case OP_END:
347     fprintf(f, "    %s\n", OP_names[*code]);
348     fprintf(f, "------------------------------------------------------------------\n");
349     return;
350 
351     case OP_CHAR:
352     fprintf(f, "    ");
353     do
354       {
355       code++;
356       code += 1 + print_char(f, code, utf);
357       }
358     while (*code == OP_CHAR);
359     fprintf(f, "\n");
360     continue;
361 
362     case OP_CHARI:
363     fprintf(f, " /i ");
364     do
365       {
366       code++;
367       code += 1 + print_char(f, code, utf);
368       }
369     while (*code == OP_CHARI);
370     fprintf(f, "\n");
371     continue;
372 
373     case OP_CBRA:
374     case OP_CBRAPOS:
375     case OP_SCBRA:
376     case OP_SCBRAPOS:
377     if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
378       else fprintf(f, "    ");
379     fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
380     break;
381 
382     case OP_BRA:
383     case OP_BRAPOS:
384     case OP_SBRA:
385     case OP_SBRAPOS:
386     case OP_KETRMAX:
387     case OP_KETRMIN:
388     case OP_KETRPOS:
389     case OP_ALT:
390     case OP_KET:
391     case OP_ASSERT:
392     case OP_ASSERT_NOT:
393     case OP_ASSERTBACK:
394     case OP_ASSERTBACK_NOT:
395     case OP_ASSERT_NA:
396     case OP_ASSERTBACK_NA:
397     case OP_ONCE:
398     case OP_SCRIPT_RUN:
399     case OP_COND:
400     case OP_SCOND:
401     case OP_REVERSE:
402     if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
403       else fprintf(f, "    ");
404     fprintf(f, "%s", OP_names[*code]);
405     break;
406 
407     case OP_CLOSE:
408     fprintf(f, "    %s %d", OP_names[*code], GET2(code, 1));
409     break;
410 
411     case OP_CREF:
412     fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
413     break;
414 
415     case OP_DNCREF:
416       {
417       PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
418       fprintf(f, " %s Cond ref <", flag);
419       print_custring(f, entry);
420       fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
421       }
422     break;
423 
424     case OP_RREF:
425     c = GET2(code, 1);
426     if (c == RREF_ANY)
427       fprintf(f, "    Cond recurse any");
428     else
429       fprintf(f, "    Cond recurse %d", c);
430     break;
431 
432     case OP_DNRREF:
433       {
434       PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
435       fprintf(f, " %s Cond recurse <", flag);
436       print_custring(f, entry);
437       fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
438       }
439     break;
440 
441     case OP_FALSE:
442     fprintf(f, "    Cond false");
443     break;
444 
445     case OP_TRUE:
446     fprintf(f, "    Cond true");
447     break;
448 
449     case OP_STARI:
450     case OP_MINSTARI:
451     case OP_POSSTARI:
452     case OP_PLUSI:
453     case OP_MINPLUSI:
454     case OP_POSPLUSI:
455     case OP_QUERYI:
456     case OP_MINQUERYI:
457     case OP_POSQUERYI:
458     flag = "/i";
459     /* Fall through */
460     case OP_STAR:
461     case OP_MINSTAR:
462     case OP_POSSTAR:
463     case OP_PLUS:
464     case OP_MINPLUS:
465     case OP_POSPLUS:
466     case OP_QUERY:
467     case OP_MINQUERY:
468     case OP_POSQUERY:
469     case OP_TYPESTAR:
470     case OP_TYPEMINSTAR:
471     case OP_TYPEPOSSTAR:
472     case OP_TYPEPLUS:
473     case OP_TYPEMINPLUS:
474     case OP_TYPEPOSPLUS:
475     case OP_TYPEQUERY:
476     case OP_TYPEMINQUERY:
477     case OP_TYPEPOSQUERY:
478     fprintf(f, " %s ", flag);
479 
480     if (*code >= OP_TYPESTAR)
481       {
482       if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
483         {
484         print_prop(f, code + 1, "", " ");
485         extra = 2;
486         }
487       else fprintf(f, "%s", OP_names[code[1]]);
488       }
489     else extra = print_char(f, code+1, utf);
490     fprintf(f, "%s", OP_names[*code]);
491     break;
492 
493     case OP_EXACTI:
494     case OP_UPTOI:
495     case OP_MINUPTOI:
496     case OP_POSUPTOI:
497     flag = "/i";
498     /* Fall through */
499     case OP_EXACT:
500     case OP_UPTO:
501     case OP_MINUPTO:
502     case OP_POSUPTO:
503     fprintf(f, " %s ", flag);
504     extra = print_char(f, code + 1 + IMM2_SIZE, utf);
505     fprintf(f, "{");
506     if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
507     fprintf(f, "%d}", GET2(code,1));
508     if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
509       else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
510     break;
511 
512     case OP_TYPEEXACT:
513     case OP_TYPEUPTO:
514     case OP_TYPEMINUPTO:
515     case OP_TYPEPOSUPTO:
516     if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
517       {
518       print_prop(f, code + IMM2_SIZE + 1, "    ", " ");
519       extra = 2;
520       }
521     else fprintf(f, "    %s", OP_names[code[1 + IMM2_SIZE]]);
522     fprintf(f, "{");
523     if (*code != OP_TYPEEXACT) fprintf(f, "0,");
524     fprintf(f, "%d}", GET2(code,1));
525     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
526       else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
527     break;
528 
529     case OP_NOTI:
530     flag = "/i";
531     /* Fall through */
532     case OP_NOT:
533     fprintf(f, " %s [^", flag);
534     extra = print_char(f, code + 1, utf);
535     fprintf(f, "]");
536     break;
537 
538     case OP_NOTSTARI:
539     case OP_NOTMINSTARI:
540     case OP_NOTPOSSTARI:
541     case OP_NOTPLUSI:
542     case OP_NOTMINPLUSI:
543     case OP_NOTPOSPLUSI:
544     case OP_NOTQUERYI:
545     case OP_NOTMINQUERYI:
546     case OP_NOTPOSQUERYI:
547     flag = "/i";
548     /* Fall through */
549 
550     case OP_NOTSTAR:
551     case OP_NOTMINSTAR:
552     case OP_NOTPOSSTAR:
553     case OP_NOTPLUS:
554     case OP_NOTMINPLUS:
555     case OP_NOTPOSPLUS:
556     case OP_NOTQUERY:
557     case OP_NOTMINQUERY:
558     case OP_NOTPOSQUERY:
559     fprintf(f, " %s [^", flag);
560     extra = print_char(f, code + 1, utf);
561     fprintf(f, "]%s", OP_names[*code]);
562     break;
563 
564     case OP_NOTEXACTI:
565     case OP_NOTUPTOI:
566     case OP_NOTMINUPTOI:
567     case OP_NOTPOSUPTOI:
568     flag = "/i";
569     /* Fall through */
570 
571     case OP_NOTEXACT:
572     case OP_NOTUPTO:
573     case OP_NOTMINUPTO:
574     case OP_NOTPOSUPTO:
575     fprintf(f, " %s [^", flag);
576     extra = print_char(f, code + 1 + IMM2_SIZE, utf);
577     fprintf(f, "]{");
578     if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
579     fprintf(f, "%d}", GET2(code,1));
580     if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
581       else
582     if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
583     break;
584 
585     case OP_RECURSE:
586     if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
587       else fprintf(f, "    ");
588     fprintf(f, "%s", OP_names[*code]);
589     break;
590 
591     case OP_REFI:
592     flag = "/i";
593     /* Fall through */
594     case OP_REF:
595     fprintf(f, " %s \\%d", flag, GET2(code,1));
596     ccode = code + OP_lengths[*code];
597     goto CLASS_REF_REPEAT;
598 
599     case OP_DNREFI:
600     flag = "/i";
601     /* Fall through */
602     case OP_DNREF:
603       {
604       PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
605       fprintf(f, " %s \\k<", flag);
606       print_custring(f, entry);
607       fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
608       }
609     ccode = code + OP_lengths[*code];
610     goto CLASS_REF_REPEAT;
611 
612     case OP_CALLOUT:
613     fprintf(f, "    %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
614       GET(code, 1), GET(code, 1 + LINK_SIZE));
615     break;
616 
617     case OP_CALLOUT_STR:
618     c = code[1 + 4*LINK_SIZE];
619     fprintf(f, "    %s %c", OP_names[*code], c);
620     extra = GET(code, 1 + 2*LINK_SIZE);
621     print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
622     for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
623       if (c == PRIV(callout_start_delims)[i])
624         {
625         c = PRIV(callout_end_delims)[i];
626         break;
627         }
628     fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
629       GET(code, 1 + LINK_SIZE));
630     break;
631 
632     case OP_PROP:
633     case OP_NOTPROP:
634     print_prop(f, code, "    ", "");
635     break;
636 
637     /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
638     in having this code always here, and it makes it less messy without all
639     those #ifdefs. */
640 
641     case OP_CLASS:
642     case OP_NCLASS:
643     case OP_XCLASS:
644       {
645       unsigned int min, max;
646       BOOL printmap;
647       BOOL invertmap = FALSE;
648       uint8_t *map;
649       uint8_t inverted_map[32];
650 
651       fprintf(f, "    [");
652 
653       if (*code == OP_XCLASS)
654         {
655         extra = GET(code, 1);
656         ccode = code + LINK_SIZE + 1;
657         printmap = (*ccode & XCL_MAP) != 0;
658         if ((*ccode & XCL_NOT) != 0)
659           {
660           invertmap = (*ccode & XCL_HASPROP) == 0;
661           fprintf(f, "^");
662           }
663         ccode++;
664         }
665       else
666         {
667         printmap = TRUE;
668         ccode = code + 1;
669         }
670 
671       /* Print a bit map */
672 
673       if (printmap)
674         {
675         map = (uint8_t *)ccode;
676         if (invertmap)
677           {
678           /* Using 255 ^ instead of ~ avoids clang sanitize warning. */
679           for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i];
680           map = inverted_map;
681           }
682 
683         for (i = 0; i < 256; i++)
684           {
685           if ((map[i/8] & (1u << (i&7))) != 0)
686             {
687             int j;
688             for (j = i+1; j < 256; j++)
689               if ((map[j/8] & (1u << (j&7))) == 0) break;
690             if (i == '-' || i == ']') fprintf(f, "\\");
691             if (PRINTABLE(i)) fprintf(f, "%c", i);
692               else fprintf(f, "\\x%02x", i);
693             if (--j > i)
694               {
695               if (j != i + 1) fprintf(f, "-");
696               if (j == '-' || j == ']') fprintf(f, "\\");
697               if (PRINTABLE(j)) fprintf(f, "%c", j);
698                 else fprintf(f, "\\x%02x", j);
699               }
700             i = j;
701             }
702           }
703         ccode += 32 / sizeof(PCRE2_UCHAR);
704         }
705 
706       /* For an XCLASS there is always some additional data */
707 
708       if (*code == OP_XCLASS)
709         {
710         PCRE2_UCHAR ch;
711         while ((ch = *ccode++) != XCL_END)
712           {
713           BOOL not = FALSE;
714           const char *notch = "";
715 
716           switch(ch)
717             {
718             case XCL_NOTPROP:
719             not = TRUE;
720             notch = "^";
721             /* Fall through */
722 
723             case XCL_PROP:
724               {
725               unsigned int ptype = *ccode++;
726               unsigned int pvalue = *ccode++;
727 
728               switch(ptype)
729                 {
730                 case PT_PXGRAPH:
731                 fprintf(f, "[:%sgraph:]", notch);
732                 break;
733 
734                 case PT_PXPRINT:
735                 fprintf(f, "[:%sprint:]", notch);
736                 break;
737 
738                 case PT_PXPUNCT:
739                 fprintf(f, "[:%spunct:]", notch);
740                 break;
741 
742                 default:
743                 fprintf(f, "\\%c{%s}", (not? 'P':'p'),
744                   get_ucpname(ptype, pvalue));
745                 break;
746                 }
747               }
748             break;
749 
750             default:
751             ccode += 1 + print_char(f, ccode, utf);
752             if (ch == XCL_RANGE)
753               {
754               fprintf(f, "-");
755               ccode += 1 + print_char(f, ccode, utf);
756               }
757             break;
758             }
759           }
760         }
761 
762       /* Indicate a non-UTF class which was created by negation */
763 
764       fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
765 
766       /* Handle repeats after a class or a back reference */
767 
768       CLASS_REF_REPEAT:
769       switch(*ccode)
770         {
771         case OP_CRSTAR:
772         case OP_CRMINSTAR:
773         case OP_CRPLUS:
774         case OP_CRMINPLUS:
775         case OP_CRQUERY:
776         case OP_CRMINQUERY:
777         case OP_CRPOSSTAR:
778         case OP_CRPOSPLUS:
779         case OP_CRPOSQUERY:
780         fprintf(f, "%s", OP_names[*ccode]);
781         extra += OP_lengths[*ccode];
782         break;
783 
784         case OP_CRRANGE:
785         case OP_CRMINRANGE:
786         case OP_CRPOSRANGE:
787         min = GET2(ccode,1);
788         max = GET2(ccode,1 + IMM2_SIZE);
789         if (max == 0) fprintf(f, "{%u,}", min);
790         else fprintf(f, "{%u,%u}", min, max);
791         if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
792         else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
793         extra += OP_lengths[*ccode];
794         break;
795 
796         /* Do nothing if it's not a repeat; this code stops picky compilers
797         warning about the lack of a default code path. */
798 
799         default:
800         break;
801         }
802       }
803     break;
804 
805     case OP_MARK:
806     case OP_COMMIT_ARG:
807     case OP_PRUNE_ARG:
808     case OP_SKIP_ARG:
809     case OP_THEN_ARG:
810     fprintf(f, "    %s ", OP_names[*code]);
811     print_custring_bylen(f, code + 2, code[1]);
812     extra += code[1];
813     break;
814 
815     case OP_THEN:
816     fprintf(f, "    %s", OP_names[*code]);
817     break;
818 
819     case OP_CIRCM:
820     case OP_DOLLM:
821     flag = "/m";
822     /* Fall through */
823 
824     /* Anything else is just an item with no data, but possibly a flag. */
825 
826     default:
827     fprintf(f, " %s %s", flag, OP_names[*code]);
828     break;
829     }
830 
831   code += OP_lengths[*code] + extra;
832   fprintf(f, "\n");
833   }
834 }
835 
836 /* End of pcre2_printint.c */
837