1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016-2019 University of Cambridge
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41
42 /* This module contains a PCRE private debugging function for printing out the
43 internal form of a compiled regular expression, along with some supporting
44 local functions. This source file is #included in pcre2test.c at each supported
45 code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
46 that comprise the library. It can also optionally be included in
47 pcre2_compile.c for detailed debugging in error situations. */
48
49
50 /* Tables of operator names. The same 8-bit table is used for all code unit
51 widths, so it must be defined only once. The list itself is defined in
52 pcre2_internal.h, which is #included by pcre2test before this file. */
53
54 #ifndef OP_LISTS_DEFINED
55 static const char *OP_names[] = { OP_NAME_LIST };
56 #define OP_LISTS_DEFINED
57 #endif
58
59 /* The functions and tables herein must all have mode-dependent names. */
60
61 #define OP_lengths PCRE2_SUFFIX(OP_lengths_)
62 #define get_ucpname PCRE2_SUFFIX(get_ucpname_)
63 #define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
64 #define print_char PCRE2_SUFFIX(print_char_)
65 #define print_custring PCRE2_SUFFIX(print_custring_)
66 #define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
67 #define print_prop PCRE2_SUFFIX(print_prop_)
68
69 /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
70 the definition is next to the definition of the opcodes in pcre2_internal.h.
71 The contents of the table are, however, mode-dependent. */
72
73 static const uint8_t OP_lengths[] = { OP_LENGTHS };
74
75
76
77 /*************************************************
78 * Print one character from a string *
79 *************************************************/
80
81 /* In UTF mode the character may occupy more than one code unit.
82
83 Arguments:
84 f file to write to
85 ptr pointer to first code unit of the character
86 utf TRUE if string is UTF (will be FALSE if UTF is not supported)
87
88 Returns: number of additional code units used
89 */
90
91 static unsigned int
print_char(FILE * f,PCRE2_SPTR ptr,BOOL utf)92 print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
93 {
94 uint32_t c = *ptr;
95 BOOL one_code_unit = !utf;
96
97 /* If UTF is supported and requested, check for a valid single code unit. */
98
99 #ifdef SUPPORT_UNICODE
100 if (utf)
101 {
102 #if PCRE2_CODE_UNIT_WIDTH == 8
103 one_code_unit = c < 0x80;
104 #elif PCRE2_CODE_UNIT_WIDTH == 16
105 one_code_unit = (c & 0xfc00) != 0xd800;
106 #else
107 one_code_unit = (c & 0xfffff800u) != 0xd800u;
108 #endif /* CODE_UNIT_WIDTH */
109 }
110 #endif /* SUPPORT_UNICODE */
111
112 /* Handle a valid one-code-unit character at any width. */
113
114 if (one_code_unit)
115 {
116 if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
117 else if (c < 0x80) fprintf(f, "\\x%02x", c);
118 else fprintf(f, "\\x{%02x}", c);
119 return 0;
120 }
121
122 /* Code for invalid UTF code units and multi-unit UTF characters is different
123 for each width. If UTF is not supported, control should never get here, but we
124 need a return statement to keep the compiler happy. */
125
126 #ifndef SUPPORT_UNICODE
127 return 0;
128 #else
129
130 /* Malformed UTF-8 should occur only if the sanity check has been turned off.
131 Rather than swallow random bytes, just stop if we hit a bad one. Print it with
132 \X instead of \x as an indication. */
133
134 #if PCRE2_CODE_UNIT_WIDTH == 8
135 if ((c & 0xc0) != 0xc0)
136 {
137 fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
138 return 0;
139 }
140 else
141 {
142 int i;
143 int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
144 int s = 6*a;
145 c = (c & PRIV(utf8_table3)[a]) << s;
146 for (i = 1; i <= a; i++)
147 {
148 if ((ptr[i] & 0xc0) != 0x80)
149 {
150 fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */
151 return i - 1;
152 }
153 s -= 6;
154 c |= (ptr[i] & 0x3f) << s;
155 }
156 fprintf(f, "\\x{%x}", c);
157 return a;
158 }
159 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
160
161 /* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
162 Print it with \X instead of \x as an indication. */
163
164 #if PCRE2_CODE_UNIT_WIDTH == 16
165 if ((ptr[1] & 0xfc00) != 0xdc00)
166 {
167 fprintf(f, "\\X{%x}", c);
168 return 0;
169 }
170 c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
171 fprintf(f, "\\x{%x}", c);
172 return 1;
173 #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
174
175 /* For UTF-32 we get here only for a malformed code unit, which should only
176 occur if the sanity check has been turned off. Print it with \X instead of \x
177 as an indication. */
178
179 #if PCRE2_CODE_UNIT_WIDTH == 32
180 fprintf(f, "\\X{%x}", c);
181 return 0;
182 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
183 #endif /* SUPPORT_UNICODE */
184 }
185
186
187
188 /*************************************************
189 * Print string as a list of code units *
190 *************************************************/
191
192 /* These take no account of UTF as they always print each individual code unit.
193 The string is zero-terminated for print_custring(); the length is given for
194 print_custring_bylen().
195
196 Arguments:
197 f file to write to
198 ptr point to the string
199 len length for print_custring_bylen()
200
201 Returns: nothing
202 */
203
204 static void
print_custring(FILE * f,PCRE2_SPTR ptr)205 print_custring(FILE *f, PCRE2_SPTR ptr)
206 {
207 while (*ptr != '\0')
208 {
209 uint32_t c = *ptr++;
210 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
211 }
212 }
213
214 static void
print_custring_bylen(FILE * f,PCRE2_SPTR ptr,PCRE2_UCHAR len)215 print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
216 {
217 for (; len > 0; len--)
218 {
219 uint32_t c = *ptr++;
220 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
221 }
222 }
223
224
225
226 /*************************************************
227 * Find Unicode property name *
228 *************************************************/
229
230 /* When there is no UTF/UCP support, the table of names does not exist. This
231 function should not be called in such configurations, because a pattern that
232 tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
233 into the main code, however, we just put one into this function. */
234
235 static const char *
get_ucpname(unsigned int ptype,unsigned int pvalue)236 get_ucpname(unsigned int ptype, unsigned int pvalue)
237 {
238 #ifdef SUPPORT_UNICODE
239 int i;
240 for (i = PRIV(utt_size) - 1; i >= 0; i--)
241 {
242 if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
243 }
244 return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
245 #else /* No UTF support */
246 (void)ptype;
247 (void)pvalue;
248 return "??";
249 #endif /* SUPPORT_UNICODE */
250 }
251
252
253
254 /*************************************************
255 * Print Unicode property value *
256 *************************************************/
257
258 /* "Normal" properties can be printed from tables. The PT_CLIST property is a
259 pseudo-property that contains a pointer to a list of case-equivalent
260 characters.
261
262 Arguments:
263 f file to write to
264 code pointer in the compiled code
265 before text to print before
266 after text to print after
267
268 Returns: nothing
269 */
270
271 static void
print_prop(FILE * f,PCRE2_SPTR code,const char * before,const char * after)272 print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
273 {
274 if (code[1] != PT_CLIST)
275 {
276 fprintf(f, "%s%s %s%s", before, OP_names[*code], get_ucpname(code[1],
277 code[2]), after);
278 }
279 else
280 {
281 const char *not = (*code == OP_PROP)? "" : "not ";
282 const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
283 fprintf (f, "%s%sclist", before, not);
284 while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
285 fprintf(f, "%s", after);
286 }
287 }
288
289
290
291 /*************************************************
292 * Print compiled pattern *
293 *************************************************/
294
295 /* The print_lengths flag controls whether offsets and lengths of items are
296 printed. Lenths can be turned off from pcre2test so that automatic tests on
297 bytecode can be written that do not depend on the value of LINK_SIZE.
298
299 Arguments:
300 re a compiled pattern
301 f the file to write to
302 print_lengths show various lengths
303
304 Returns: nothing
305 */
306
307 static void
pcre2_printint(pcre2_code * re,FILE * f,BOOL print_lengths)308 pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
309 {
310 PCRE2_SPTR codestart, nametable, code;
311 uint32_t nesize = re->name_entry_size;
312 BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
313
314 nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
315 code = codestart = nametable + re->name_count * re->name_entry_size;
316
317 for(;;)
318 {
319 PCRE2_SPTR ccode;
320 uint32_t c;
321 int i;
322 const char *flag = " ";
323 unsigned int extra = 0;
324
325 if (print_lengths)
326 fprintf(f, "%3d ", (int)(code - codestart));
327 else
328 fprintf(f, " ");
329
330 switch(*code)
331 {
332 /* ========================================================================== */
333 /* These cases are never obeyed. This is a fudge that causes a compile-
334 time error if the vectors OP_names or OP_lengths, which are indexed
335 by opcode, are not the correct length. It seems to be the only way to do
336 such a check at compile time, as the sizeof() operator does not work in
337 the C preprocessor. */
338
339 case OP_TABLE_LENGTH:
340 case OP_TABLE_LENGTH +
341 ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
342 (sizeof(OP_lengths) == OP_TABLE_LENGTH)):
343 return;
344 /* ========================================================================== */
345
346 case OP_END:
347 fprintf(f, " %s\n", OP_names[*code]);
348 fprintf(f, "------------------------------------------------------------------\n");
349 return;
350
351 case OP_CHAR:
352 fprintf(f, " ");
353 do
354 {
355 code++;
356 code += 1 + print_char(f, code, utf);
357 }
358 while (*code == OP_CHAR);
359 fprintf(f, "\n");
360 continue;
361
362 case OP_CHARI:
363 fprintf(f, " /i ");
364 do
365 {
366 code++;
367 code += 1 + print_char(f, code, utf);
368 }
369 while (*code == OP_CHARI);
370 fprintf(f, "\n");
371 continue;
372
373 case OP_CBRA:
374 case OP_CBRAPOS:
375 case OP_SCBRA:
376 case OP_SCBRAPOS:
377 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
378 else fprintf(f, " ");
379 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
380 break;
381
382 case OP_BRA:
383 case OP_BRAPOS:
384 case OP_SBRA:
385 case OP_SBRAPOS:
386 case OP_KETRMAX:
387 case OP_KETRMIN:
388 case OP_KETRPOS:
389 case OP_ALT:
390 case OP_KET:
391 case OP_ASSERT:
392 case OP_ASSERT_NOT:
393 case OP_ASSERTBACK:
394 case OP_ASSERTBACK_NOT:
395 case OP_ASSERT_NA:
396 case OP_ASSERTBACK_NA:
397 case OP_ONCE:
398 case OP_SCRIPT_RUN:
399 case OP_COND:
400 case OP_SCOND:
401 case OP_REVERSE:
402 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
403 else fprintf(f, " ");
404 fprintf(f, "%s", OP_names[*code]);
405 break;
406
407 case OP_CLOSE:
408 fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
409 break;
410
411 case OP_CREF:
412 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
413 break;
414
415 case OP_DNCREF:
416 {
417 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
418 fprintf(f, " %s Cond ref <", flag);
419 print_custring(f, entry);
420 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
421 }
422 break;
423
424 case OP_RREF:
425 c = GET2(code, 1);
426 if (c == RREF_ANY)
427 fprintf(f, " Cond recurse any");
428 else
429 fprintf(f, " Cond recurse %d", c);
430 break;
431
432 case OP_DNRREF:
433 {
434 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
435 fprintf(f, " %s Cond recurse <", flag);
436 print_custring(f, entry);
437 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
438 }
439 break;
440
441 case OP_FALSE:
442 fprintf(f, " Cond false");
443 break;
444
445 case OP_TRUE:
446 fprintf(f, " Cond true");
447 break;
448
449 case OP_STARI:
450 case OP_MINSTARI:
451 case OP_POSSTARI:
452 case OP_PLUSI:
453 case OP_MINPLUSI:
454 case OP_POSPLUSI:
455 case OP_QUERYI:
456 case OP_MINQUERYI:
457 case OP_POSQUERYI:
458 flag = "/i";
459 /* Fall through */
460 case OP_STAR:
461 case OP_MINSTAR:
462 case OP_POSSTAR:
463 case OP_PLUS:
464 case OP_MINPLUS:
465 case OP_POSPLUS:
466 case OP_QUERY:
467 case OP_MINQUERY:
468 case OP_POSQUERY:
469 case OP_TYPESTAR:
470 case OP_TYPEMINSTAR:
471 case OP_TYPEPOSSTAR:
472 case OP_TYPEPLUS:
473 case OP_TYPEMINPLUS:
474 case OP_TYPEPOSPLUS:
475 case OP_TYPEQUERY:
476 case OP_TYPEMINQUERY:
477 case OP_TYPEPOSQUERY:
478 fprintf(f, " %s ", flag);
479
480 if (*code >= OP_TYPESTAR)
481 {
482 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
483 {
484 print_prop(f, code + 1, "", " ");
485 extra = 2;
486 }
487 else fprintf(f, "%s", OP_names[code[1]]);
488 }
489 else extra = print_char(f, code+1, utf);
490 fprintf(f, "%s", OP_names[*code]);
491 break;
492
493 case OP_EXACTI:
494 case OP_UPTOI:
495 case OP_MINUPTOI:
496 case OP_POSUPTOI:
497 flag = "/i";
498 /* Fall through */
499 case OP_EXACT:
500 case OP_UPTO:
501 case OP_MINUPTO:
502 case OP_POSUPTO:
503 fprintf(f, " %s ", flag);
504 extra = print_char(f, code + 1 + IMM2_SIZE, utf);
505 fprintf(f, "{");
506 if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
507 fprintf(f, "%d}", GET2(code,1));
508 if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
509 else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
510 break;
511
512 case OP_TYPEEXACT:
513 case OP_TYPEUPTO:
514 case OP_TYPEMINUPTO:
515 case OP_TYPEPOSUPTO:
516 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
517 {
518 print_prop(f, code + IMM2_SIZE + 1, " ", " ");
519 extra = 2;
520 }
521 else fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]);
522 fprintf(f, "{");
523 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
524 fprintf(f, "%d}", GET2(code,1));
525 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
526 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
527 break;
528
529 case OP_NOTI:
530 flag = "/i";
531 /* Fall through */
532 case OP_NOT:
533 fprintf(f, " %s [^", flag);
534 extra = print_char(f, code + 1, utf);
535 fprintf(f, "]");
536 break;
537
538 case OP_NOTSTARI:
539 case OP_NOTMINSTARI:
540 case OP_NOTPOSSTARI:
541 case OP_NOTPLUSI:
542 case OP_NOTMINPLUSI:
543 case OP_NOTPOSPLUSI:
544 case OP_NOTQUERYI:
545 case OP_NOTMINQUERYI:
546 case OP_NOTPOSQUERYI:
547 flag = "/i";
548 /* Fall through */
549
550 case OP_NOTSTAR:
551 case OP_NOTMINSTAR:
552 case OP_NOTPOSSTAR:
553 case OP_NOTPLUS:
554 case OP_NOTMINPLUS:
555 case OP_NOTPOSPLUS:
556 case OP_NOTQUERY:
557 case OP_NOTMINQUERY:
558 case OP_NOTPOSQUERY:
559 fprintf(f, " %s [^", flag);
560 extra = print_char(f, code + 1, utf);
561 fprintf(f, "]%s", OP_names[*code]);
562 break;
563
564 case OP_NOTEXACTI:
565 case OP_NOTUPTOI:
566 case OP_NOTMINUPTOI:
567 case OP_NOTPOSUPTOI:
568 flag = "/i";
569 /* Fall through */
570
571 case OP_NOTEXACT:
572 case OP_NOTUPTO:
573 case OP_NOTMINUPTO:
574 case OP_NOTPOSUPTO:
575 fprintf(f, " %s [^", flag);
576 extra = print_char(f, code + 1 + IMM2_SIZE, utf);
577 fprintf(f, "]{");
578 if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
579 fprintf(f, "%d}", GET2(code,1));
580 if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
581 else
582 if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
583 break;
584
585 case OP_RECURSE:
586 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
587 else fprintf(f, " ");
588 fprintf(f, "%s", OP_names[*code]);
589 break;
590
591 case OP_REFI:
592 flag = "/i";
593 /* Fall through */
594 case OP_REF:
595 fprintf(f, " %s \\%d", flag, GET2(code,1));
596 ccode = code + OP_lengths[*code];
597 goto CLASS_REF_REPEAT;
598
599 case OP_DNREFI:
600 flag = "/i";
601 /* Fall through */
602 case OP_DNREF:
603 {
604 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
605 fprintf(f, " %s \\k<", flag);
606 print_custring(f, entry);
607 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
608 }
609 ccode = code + OP_lengths[*code];
610 goto CLASS_REF_REPEAT;
611
612 case OP_CALLOUT:
613 fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
614 GET(code, 1), GET(code, 1 + LINK_SIZE));
615 break;
616
617 case OP_CALLOUT_STR:
618 c = code[1 + 4*LINK_SIZE];
619 fprintf(f, " %s %c", OP_names[*code], c);
620 extra = GET(code, 1 + 2*LINK_SIZE);
621 print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
622 for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
623 if (c == PRIV(callout_start_delims)[i])
624 {
625 c = PRIV(callout_end_delims)[i];
626 break;
627 }
628 fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
629 GET(code, 1 + LINK_SIZE));
630 break;
631
632 case OP_PROP:
633 case OP_NOTPROP:
634 print_prop(f, code, " ", "");
635 break;
636
637 /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
638 in having this code always here, and it makes it less messy without all
639 those #ifdefs. */
640
641 case OP_CLASS:
642 case OP_NCLASS:
643 case OP_XCLASS:
644 {
645 unsigned int min, max;
646 BOOL printmap;
647 BOOL invertmap = FALSE;
648 uint8_t *map;
649 uint8_t inverted_map[32];
650
651 fprintf(f, " [");
652
653 if (*code == OP_XCLASS)
654 {
655 extra = GET(code, 1);
656 ccode = code + LINK_SIZE + 1;
657 printmap = (*ccode & XCL_MAP) != 0;
658 if ((*ccode & XCL_NOT) != 0)
659 {
660 invertmap = (*ccode & XCL_HASPROP) == 0;
661 fprintf(f, "^");
662 }
663 ccode++;
664 }
665 else
666 {
667 printmap = TRUE;
668 ccode = code + 1;
669 }
670
671 /* Print a bit map */
672
673 if (printmap)
674 {
675 map = (uint8_t *)ccode;
676 if (invertmap)
677 {
678 /* Using 255 ^ instead of ~ avoids clang sanitize warning. */
679 for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i];
680 map = inverted_map;
681 }
682
683 for (i = 0; i < 256; i++)
684 {
685 if ((map[i/8] & (1u << (i&7))) != 0)
686 {
687 int j;
688 for (j = i+1; j < 256; j++)
689 if ((map[j/8] & (1u << (j&7))) == 0) break;
690 if (i == '-' || i == ']') fprintf(f, "\\");
691 if (PRINTABLE(i)) fprintf(f, "%c", i);
692 else fprintf(f, "\\x%02x", i);
693 if (--j > i)
694 {
695 if (j != i + 1) fprintf(f, "-");
696 if (j == '-' || j == ']') fprintf(f, "\\");
697 if (PRINTABLE(j)) fprintf(f, "%c", j);
698 else fprintf(f, "\\x%02x", j);
699 }
700 i = j;
701 }
702 }
703 ccode += 32 / sizeof(PCRE2_UCHAR);
704 }
705
706 /* For an XCLASS there is always some additional data */
707
708 if (*code == OP_XCLASS)
709 {
710 PCRE2_UCHAR ch;
711 while ((ch = *ccode++) != XCL_END)
712 {
713 BOOL not = FALSE;
714 const char *notch = "";
715
716 switch(ch)
717 {
718 case XCL_NOTPROP:
719 not = TRUE;
720 notch = "^";
721 /* Fall through */
722
723 case XCL_PROP:
724 {
725 unsigned int ptype = *ccode++;
726 unsigned int pvalue = *ccode++;
727
728 switch(ptype)
729 {
730 case PT_PXGRAPH:
731 fprintf(f, "[:%sgraph:]", notch);
732 break;
733
734 case PT_PXPRINT:
735 fprintf(f, "[:%sprint:]", notch);
736 break;
737
738 case PT_PXPUNCT:
739 fprintf(f, "[:%spunct:]", notch);
740 break;
741
742 default:
743 fprintf(f, "\\%c{%s}", (not? 'P':'p'),
744 get_ucpname(ptype, pvalue));
745 break;
746 }
747 }
748 break;
749
750 default:
751 ccode += 1 + print_char(f, ccode, utf);
752 if (ch == XCL_RANGE)
753 {
754 fprintf(f, "-");
755 ccode += 1 + print_char(f, ccode, utf);
756 }
757 break;
758 }
759 }
760 }
761
762 /* Indicate a non-UTF class which was created by negation */
763
764 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
765
766 /* Handle repeats after a class or a back reference */
767
768 CLASS_REF_REPEAT:
769 switch(*ccode)
770 {
771 case OP_CRSTAR:
772 case OP_CRMINSTAR:
773 case OP_CRPLUS:
774 case OP_CRMINPLUS:
775 case OP_CRQUERY:
776 case OP_CRMINQUERY:
777 case OP_CRPOSSTAR:
778 case OP_CRPOSPLUS:
779 case OP_CRPOSQUERY:
780 fprintf(f, "%s", OP_names[*ccode]);
781 extra += OP_lengths[*ccode];
782 break;
783
784 case OP_CRRANGE:
785 case OP_CRMINRANGE:
786 case OP_CRPOSRANGE:
787 min = GET2(ccode,1);
788 max = GET2(ccode,1 + IMM2_SIZE);
789 if (max == 0) fprintf(f, "{%u,}", min);
790 else fprintf(f, "{%u,%u}", min, max);
791 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
792 else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
793 extra += OP_lengths[*ccode];
794 break;
795
796 /* Do nothing if it's not a repeat; this code stops picky compilers
797 warning about the lack of a default code path. */
798
799 default:
800 break;
801 }
802 }
803 break;
804
805 case OP_MARK:
806 case OP_COMMIT_ARG:
807 case OP_PRUNE_ARG:
808 case OP_SKIP_ARG:
809 case OP_THEN_ARG:
810 fprintf(f, " %s ", OP_names[*code]);
811 print_custring_bylen(f, code + 2, code[1]);
812 extra += code[1];
813 break;
814
815 case OP_THEN:
816 fprintf(f, " %s", OP_names[*code]);
817 break;
818
819 case OP_CIRCM:
820 case OP_DOLLM:
821 flag = "/m";
822 /* Fall through */
823
824 /* Anything else is just an item with no data, but possibly a flag. */
825
826 default:
827 fprintf(f, " %s %s", flag, OP_names[*code]);
828 break;
829 }
830
831 code += OP_lengths[*code] + extra;
832 fprintf(f, "\n");
833 }
834 }
835
836 /* End of pcre2_printint.c */
837