1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016-2018 University of Cambridge
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41
42 /* This module contains a PCRE private debugging function for printing out the
43 internal form of a compiled regular expression, along with some supporting
44 local functions. This source file is #included in pcre2test.c at each supported
45 code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
46 that comprise the library. It can also optionally be included in
47 pcre2_compile.c for detailed debugging in error situations. */
48
49
50 /* Tables of operator names. The same 8-bit table is used for all code unit
51 widths, so it must be defined only once. The list itself is defined in
52 pcre2_internal.h, which is #included by pcre2test before this file. */
53
54 #ifndef OP_LISTS_DEFINED
55 static const char *OP_names[] = { OP_NAME_LIST };
56 #define OP_LISTS_DEFINED
57 #endif
58
59 /* The functions and tables herein must all have mode-dependent names. */
60
61 #define OP_lengths PCRE2_SUFFIX(OP_lengths_)
62 #define get_ucpname PCRE2_SUFFIX(get_ucpname_)
63 #define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
64 #define print_char PCRE2_SUFFIX(print_char_)
65 #define print_custring PCRE2_SUFFIX(print_custring_)
66 #define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
67 #define print_prop PCRE2_SUFFIX(print_prop_)
68
69 /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
70 the definition is next to the definition of the opcodes in pcre2_internal.h.
71 The contents of the table are, however, mode-dependent. */
72
73 static const uint8_t OP_lengths[] = { OP_LENGTHS };
74
75
76
77 /*************************************************
78 * Print one character from a string *
79 *************************************************/
80
81 /* In UTF mode the character may occupy more than one code unit.
82
83 Arguments:
84 f file to write to
85 ptr pointer to first code unit of the character
86 utf TRUE if string is UTF (will be FALSE if UTF is not supported)
87
88 Returns: number of additional code units used
89 */
90
91 static unsigned int
print_char(FILE * f,PCRE2_SPTR ptr,BOOL utf)92 print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
93 {
94 uint32_t c = *ptr;
95 BOOL one_code_unit = !utf;
96
97 /* If UTF is supported and requested, check for a valid single code unit. */
98
99 #ifdef SUPPORT_UNICODE
100 if (utf)
101 {
102 #if PCRE2_CODE_UNIT_WIDTH == 8
103 one_code_unit = c < 0x80;
104 #elif PCRE2_CODE_UNIT_WIDTH == 16
105 one_code_unit = (c & 0xfc00) != 0xd800;
106 #else
107 one_code_unit = (c & 0xfffff800u) != 0xd800u;
108 #endif /* CODE_UNIT_WIDTH */
109 }
110 #endif /* SUPPORT_UNICODE */
111
112 /* Handle a valid one-code-unit character at any width. */
113
114 if (one_code_unit)
115 {
116 if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
117 else if (c < 0x80) fprintf(f, "\\x%02x", c);
118 else fprintf(f, "\\x{%02x}", c);
119 return 0;
120 }
121
122 /* Code for invalid UTF code units and multi-unit UTF characters is different
123 for each width. If UTF is not supported, control should never get here, but we
124 need a return statement to keep the compiler happy. */
125
126 #ifndef SUPPORT_UNICODE
127 return 0;
128 #else
129
130 /* Malformed UTF-8 should occur only if the sanity check has been turned off.
131 Rather than swallow random bytes, just stop if we hit a bad one. Print it with
132 \X instead of \x as an indication. */
133
134 #if PCRE2_CODE_UNIT_WIDTH == 8
135 if ((c & 0xc0) != 0xc0)
136 {
137 fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
138 return 0;
139 }
140 else
141 {
142 int i;
143 int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
144 int s = 6*a;
145 c = (c & PRIV(utf8_table3)[a]) << s;
146 for (i = 1; i <= a; i++)
147 {
148 if ((ptr[i] & 0xc0) != 0x80)
149 {
150 fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */
151 return i - 1;
152 }
153 s -= 6;
154 c |= (ptr[i] & 0x3f) << s;
155 }
156 fprintf(f, "\\x{%x}", c);
157 return a;
158 }
159 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
160
161 /* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
162 Print it with \X instead of \x as an indication. */
163
164 #if PCRE2_CODE_UNIT_WIDTH == 16
165 if ((ptr[1] & 0xfc00) != 0xdc00)
166 {
167 fprintf(f, "\\X{%x}", c);
168 return 0;
169 }
170 c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
171 fprintf(f, "\\x{%x}", c);
172 return 1;
173 #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
174
175 /* For UTF-32 we get here only for a malformed code unit, which should only
176 occur if the sanity check has been turned off. Print it with \X instead of \x
177 as an indication. */
178
179 #if PCRE2_CODE_UNIT_WIDTH == 32
180 fprintf(f, "\\X{%x}", c);
181 return 0;
182 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
183 #endif /* SUPPORT_UNICODE */
184 }
185
186
187
188 /*************************************************
189 * Print string as a list of code units *
190 *************************************************/
191
192 /* These take no account of UTF as they always print each individual code unit.
193 The string is zero-terminated for print_custring(); the length is given for
194 print_custring_bylen().
195
196 Arguments:
197 f file to write to
198 ptr point to the string
199 len length for print_custring_bylen()
200
201 Returns: nothing
202 */
203
204 static void
print_custring(FILE * f,PCRE2_SPTR ptr)205 print_custring(FILE *f, PCRE2_SPTR ptr)
206 {
207 while (*ptr != '\0')
208 {
209 uint32_t c = *ptr++;
210 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
211 }
212 }
213
214 static void
print_custring_bylen(FILE * f,PCRE2_SPTR ptr,PCRE2_UCHAR len)215 print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
216 {
217 for (; len > 0; len--)
218 {
219 uint32_t c = *ptr++;
220 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
221 }
222 }
223
224
225
226 /*************************************************
227 * Find Unicode property name *
228 *************************************************/
229
230 /* When there is no UTF/UCP support, the table of names does not exist. This
231 function should not be called in such configurations, because a pattern that
232 tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
233 into the main code, however, we just put one into this function. */
234
235 static const char *
get_ucpname(unsigned int ptype,unsigned int pvalue)236 get_ucpname(unsigned int ptype, unsigned int pvalue)
237 {
238 #ifdef SUPPORT_UNICODE
239 int i;
240 for (i = PRIV(utt_size) - 1; i >= 0; i--)
241 {
242 if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
243 }
244 return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
245 #else /* No UTF support */
246 (void)ptype;
247 (void)pvalue;
248 return "??";
249 #endif /* SUPPORT_UNICODE */
250 }
251
252
253
254 /*************************************************
255 * Print Unicode property value *
256 *************************************************/
257
258 /* "Normal" properties can be printed from tables. The PT_CLIST property is a
259 pseudo-property that contains a pointer to a list of case-equivalent
260 characters.
261
262 Arguments:
263 f file to write to
264 code pointer in the compiled code
265 before text to print before
266 after text to print after
267
268 Returns: nothing
269 */
270
271 static void
print_prop(FILE * f,PCRE2_SPTR code,const char * before,const char * after)272 print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
273 {
274 if (code[1] != PT_CLIST)
275 {
276 fprintf(f, "%s%s %s%s", before, OP_names[*code], get_ucpname(code[1],
277 code[2]), after);
278 }
279 else
280 {
281 const char *not = (*code == OP_PROP)? "" : "not ";
282 const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
283 fprintf (f, "%s%sclist", before, not);
284 while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
285 fprintf(f, "%s", after);
286 }
287 }
288
289
290
291 /*************************************************
292 * Print compiled pattern *
293 *************************************************/
294
295 /* The print_lengths flag controls whether offsets and lengths of items are
296 printed. Lenths can be turned off from pcre2test so that automatic tests on
297 bytecode can be written that do not depend on the value of LINK_SIZE.
298
299 Arguments:
300 re a compiled pattern
301 f the file to write to
302 print_lengths show various lengths
303
304 Returns: nothing
305 */
306
307 static void
pcre2_printint(pcre2_code * re,FILE * f,BOOL print_lengths)308 pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
309 {
310 PCRE2_SPTR codestart, nametable, code;
311 uint32_t nesize = re->name_entry_size;
312 BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
313
314 nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
315 code = codestart = nametable + re->name_count * re->name_entry_size;
316
317 for(;;)
318 {
319 PCRE2_SPTR ccode;
320 uint32_t c;
321 int i;
322 const char *flag = " ";
323 unsigned int extra = 0;
324
325 if (print_lengths)
326 fprintf(f, "%3d ", (int)(code - codestart));
327 else
328 fprintf(f, " ");
329
330 switch(*code)
331 {
332 /* ========================================================================== */
333 /* These cases are never obeyed. This is a fudge that causes a compile-
334 time error if the vectors OP_names or OP_lengths, which are indexed
335 by opcode, are not the correct length. It seems to be the only way to do
336 such a check at compile time, as the sizeof() operator does not work in
337 the C preprocessor. */
338
339 case OP_TABLE_LENGTH:
340 case OP_TABLE_LENGTH +
341 ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
342 (sizeof(OP_lengths) == OP_TABLE_LENGTH)):
343 return;
344 /* ========================================================================== */
345
346 case OP_END:
347 fprintf(f, " %s\n", OP_names[*code]);
348 fprintf(f, "------------------------------------------------------------------\n");
349 return;
350
351 case OP_CHAR:
352 fprintf(f, " ");
353 do
354 {
355 code++;
356 code += 1 + print_char(f, code, utf);
357 }
358 while (*code == OP_CHAR);
359 fprintf(f, "\n");
360 continue;
361
362 case OP_CHARI:
363 fprintf(f, " /i ");
364 do
365 {
366 code++;
367 code += 1 + print_char(f, code, utf);
368 }
369 while (*code == OP_CHARI);
370 fprintf(f, "\n");
371 continue;
372
373 case OP_CBRA:
374 case OP_CBRAPOS:
375 case OP_SCBRA:
376 case OP_SCBRAPOS:
377 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
378 else fprintf(f, " ");
379 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
380 break;
381
382 case OP_BRA:
383 case OP_BRAPOS:
384 case OP_SBRA:
385 case OP_SBRAPOS:
386 case OP_KETRMAX:
387 case OP_KETRMIN:
388 case OP_KETRPOS:
389 case OP_ALT:
390 case OP_KET:
391 case OP_ASSERT:
392 case OP_ASSERT_NOT:
393 case OP_ASSERTBACK:
394 case OP_ASSERTBACK_NOT:
395 case OP_ONCE:
396 case OP_COND:
397 case OP_SCOND:
398 case OP_REVERSE:
399 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
400 else fprintf(f, " ");
401 fprintf(f, "%s", OP_names[*code]);
402 break;
403
404 case OP_CLOSE:
405 fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
406 break;
407
408 case OP_CREF:
409 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
410 break;
411
412 case OP_DNCREF:
413 {
414 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
415 fprintf(f, " %s Cond ref <", flag);
416 print_custring(f, entry);
417 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
418 }
419 break;
420
421 case OP_RREF:
422 c = GET2(code, 1);
423 if (c == RREF_ANY)
424 fprintf(f, " Cond recurse any");
425 else
426 fprintf(f, " Cond recurse %d", c);
427 break;
428
429 case OP_DNRREF:
430 {
431 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
432 fprintf(f, " %s Cond recurse <", flag);
433 print_custring(f, entry);
434 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
435 }
436 break;
437
438 case OP_FALSE:
439 fprintf(f, " Cond false");
440 break;
441
442 case OP_TRUE:
443 fprintf(f, " Cond true");
444 break;
445
446 case OP_STARI:
447 case OP_MINSTARI:
448 case OP_POSSTARI:
449 case OP_PLUSI:
450 case OP_MINPLUSI:
451 case OP_POSPLUSI:
452 case OP_QUERYI:
453 case OP_MINQUERYI:
454 case OP_POSQUERYI:
455 flag = "/i";
456 /* Fall through */
457 case OP_STAR:
458 case OP_MINSTAR:
459 case OP_POSSTAR:
460 case OP_PLUS:
461 case OP_MINPLUS:
462 case OP_POSPLUS:
463 case OP_QUERY:
464 case OP_MINQUERY:
465 case OP_POSQUERY:
466 case OP_TYPESTAR:
467 case OP_TYPEMINSTAR:
468 case OP_TYPEPOSSTAR:
469 case OP_TYPEPLUS:
470 case OP_TYPEMINPLUS:
471 case OP_TYPEPOSPLUS:
472 case OP_TYPEQUERY:
473 case OP_TYPEMINQUERY:
474 case OP_TYPEPOSQUERY:
475 fprintf(f, " %s ", flag);
476
477 if (*code >= OP_TYPESTAR)
478 {
479 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
480 {
481 print_prop(f, code + 1, "", " ");
482 extra = 2;
483 }
484 else fprintf(f, "%s", OP_names[code[1]]);
485 }
486 else extra = print_char(f, code+1, utf);
487 fprintf(f, "%s", OP_names[*code]);
488 break;
489
490 case OP_EXACTI:
491 case OP_UPTOI:
492 case OP_MINUPTOI:
493 case OP_POSUPTOI:
494 flag = "/i";
495 /* Fall through */
496 case OP_EXACT:
497 case OP_UPTO:
498 case OP_MINUPTO:
499 case OP_POSUPTO:
500 fprintf(f, " %s ", flag);
501 extra = print_char(f, code + 1 + IMM2_SIZE, utf);
502 fprintf(f, "{");
503 if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
504 fprintf(f, "%d}", GET2(code,1));
505 if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
506 else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
507 break;
508
509 case OP_TYPEEXACT:
510 case OP_TYPEUPTO:
511 case OP_TYPEMINUPTO:
512 case OP_TYPEPOSUPTO:
513 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
514 {
515 print_prop(f, code + IMM2_SIZE + 1, " ", " ");
516 extra = 2;
517 }
518 else fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]);
519 fprintf(f, "{");
520 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
521 fprintf(f, "%d}", GET2(code,1));
522 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
523 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
524 break;
525
526 case OP_NOTI:
527 flag = "/i";
528 /* Fall through */
529 case OP_NOT:
530 fprintf(f, " %s [^", flag);
531 extra = print_char(f, code + 1, utf);
532 fprintf(f, "]");
533 break;
534
535 case OP_NOTSTARI:
536 case OP_NOTMINSTARI:
537 case OP_NOTPOSSTARI:
538 case OP_NOTPLUSI:
539 case OP_NOTMINPLUSI:
540 case OP_NOTPOSPLUSI:
541 case OP_NOTQUERYI:
542 case OP_NOTMINQUERYI:
543 case OP_NOTPOSQUERYI:
544 flag = "/i";
545 /* Fall through */
546
547 case OP_NOTSTAR:
548 case OP_NOTMINSTAR:
549 case OP_NOTPOSSTAR:
550 case OP_NOTPLUS:
551 case OP_NOTMINPLUS:
552 case OP_NOTPOSPLUS:
553 case OP_NOTQUERY:
554 case OP_NOTMINQUERY:
555 case OP_NOTPOSQUERY:
556 fprintf(f, " %s [^", flag);
557 extra = print_char(f, code + 1, utf);
558 fprintf(f, "]%s", OP_names[*code]);
559 break;
560
561 case OP_NOTEXACTI:
562 case OP_NOTUPTOI:
563 case OP_NOTMINUPTOI:
564 case OP_NOTPOSUPTOI:
565 flag = "/i";
566 /* Fall through */
567
568 case OP_NOTEXACT:
569 case OP_NOTUPTO:
570 case OP_NOTMINUPTO:
571 case OP_NOTPOSUPTO:
572 fprintf(f, " %s [^", flag);
573 extra = print_char(f, code + 1 + IMM2_SIZE, utf);
574 fprintf(f, "]{");
575 if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
576 fprintf(f, "%d}", GET2(code,1));
577 if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
578 else
579 if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
580 break;
581
582 case OP_RECURSE:
583 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
584 else fprintf(f, " ");
585 fprintf(f, "%s", OP_names[*code]);
586 break;
587
588 case OP_REFI:
589 flag = "/i";
590 /* Fall through */
591 case OP_REF:
592 fprintf(f, " %s \\%d", flag, GET2(code,1));
593 ccode = code + OP_lengths[*code];
594 goto CLASS_REF_REPEAT;
595
596 case OP_DNREFI:
597 flag = "/i";
598 /* Fall through */
599 case OP_DNREF:
600 {
601 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
602 fprintf(f, " %s \\k<", flag);
603 print_custring(f, entry);
604 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
605 }
606 ccode = code + OP_lengths[*code];
607 goto CLASS_REF_REPEAT;
608
609 case OP_CALLOUT:
610 fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
611 GET(code, 1), GET(code, 1 + LINK_SIZE));
612 break;
613
614 case OP_CALLOUT_STR:
615 c = code[1 + 4*LINK_SIZE];
616 fprintf(f, " %s %c", OP_names[*code], c);
617 extra = GET(code, 1 + 2*LINK_SIZE);
618 print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
619 for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
620 if (c == PRIV(callout_start_delims)[i])
621 {
622 c = PRIV(callout_end_delims)[i];
623 break;
624 }
625 fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
626 GET(code, 1 + LINK_SIZE));
627 break;
628
629 case OP_PROP:
630 case OP_NOTPROP:
631 print_prop(f, code, " ", "");
632 break;
633
634 /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
635 in having this code always here, and it makes it less messy without all
636 those #ifdefs. */
637
638 case OP_CLASS:
639 case OP_NCLASS:
640 case OP_XCLASS:
641 {
642 unsigned int min, max;
643 BOOL printmap;
644 BOOL invertmap = FALSE;
645 uint8_t *map;
646 uint8_t inverted_map[32];
647
648 fprintf(f, " [");
649
650 if (*code == OP_XCLASS)
651 {
652 extra = GET(code, 1);
653 ccode = code + LINK_SIZE + 1;
654 printmap = (*ccode & XCL_MAP) != 0;
655 if ((*ccode & XCL_NOT) != 0)
656 {
657 invertmap = (*ccode & XCL_HASPROP) == 0;
658 fprintf(f, "^");
659 }
660 ccode++;
661 }
662 else
663 {
664 printmap = TRUE;
665 ccode = code + 1;
666 }
667
668 /* Print a bit map */
669
670 if (printmap)
671 {
672 map = (uint8_t *)ccode;
673 if (invertmap)
674 {
675 for (i = 0; i < 32; i++) inverted_map[i] = ~map[i];
676 map = inverted_map;
677 }
678
679 for (i = 0; i < 256; i++)
680 {
681 if ((map[i/8] & (1 << (i&7))) != 0)
682 {
683 int j;
684 for (j = i+1; j < 256; j++)
685 if ((map[j/8] & (1 << (j&7))) == 0) break;
686 if (i == '-' || i == ']') fprintf(f, "\\");
687 if (PRINTABLE(i)) fprintf(f, "%c", i);
688 else fprintf(f, "\\x%02x", i);
689 if (--j > i)
690 {
691 if (j != i + 1) fprintf(f, "-");
692 if (j == '-' || j == ']') fprintf(f, "\\");
693 if (PRINTABLE(j)) fprintf(f, "%c", j);
694 else fprintf(f, "\\x%02x", j);
695 }
696 i = j;
697 }
698 }
699 ccode += 32 / sizeof(PCRE2_UCHAR);
700 }
701
702 /* For an XCLASS there is always some additional data */
703
704 if (*code == OP_XCLASS)
705 {
706 PCRE2_UCHAR ch;
707 while ((ch = *ccode++) != XCL_END)
708 {
709 BOOL not = FALSE;
710 const char *notch = "";
711
712 switch(ch)
713 {
714 case XCL_NOTPROP:
715 not = TRUE;
716 notch = "^";
717 /* Fall through */
718
719 case XCL_PROP:
720 {
721 unsigned int ptype = *ccode++;
722 unsigned int pvalue = *ccode++;
723
724 switch(ptype)
725 {
726 case PT_PXGRAPH:
727 fprintf(f, "[:%sgraph:]", notch);
728 break;
729
730 case PT_PXPRINT:
731 fprintf(f, "[:%sprint:]", notch);
732 break;
733
734 case PT_PXPUNCT:
735 fprintf(f, "[:%spunct:]", notch);
736 break;
737
738 default:
739 fprintf(f, "\\%c{%s}", (not? 'P':'p'),
740 get_ucpname(ptype, pvalue));
741 break;
742 }
743 }
744 break;
745
746 default:
747 ccode += 1 + print_char(f, ccode, utf);
748 if (ch == XCL_RANGE)
749 {
750 fprintf(f, "-");
751 ccode += 1 + print_char(f, ccode, utf);
752 }
753 break;
754 }
755 }
756 }
757
758 /* Indicate a non-UTF class which was created by negation */
759
760 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
761
762 /* Handle repeats after a class or a back reference */
763
764 CLASS_REF_REPEAT:
765 switch(*ccode)
766 {
767 case OP_CRSTAR:
768 case OP_CRMINSTAR:
769 case OP_CRPLUS:
770 case OP_CRMINPLUS:
771 case OP_CRQUERY:
772 case OP_CRMINQUERY:
773 case OP_CRPOSSTAR:
774 case OP_CRPOSPLUS:
775 case OP_CRPOSQUERY:
776 fprintf(f, "%s", OP_names[*ccode]);
777 extra += OP_lengths[*ccode];
778 break;
779
780 case OP_CRRANGE:
781 case OP_CRMINRANGE:
782 case OP_CRPOSRANGE:
783 min = GET2(ccode,1);
784 max = GET2(ccode,1 + IMM2_SIZE);
785 if (max == 0) fprintf(f, "{%u,}", min);
786 else fprintf(f, "{%u,%u}", min, max);
787 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
788 else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
789 extra += OP_lengths[*ccode];
790 break;
791
792 /* Do nothing if it's not a repeat; this code stops picky compilers
793 warning about the lack of a default code path. */
794
795 default:
796 break;
797 }
798 }
799 break;
800
801 case OP_MARK:
802 case OP_COMMIT_ARG:
803 case OP_PRUNE_ARG:
804 case OP_SKIP_ARG:
805 case OP_THEN_ARG:
806 fprintf(f, " %s ", OP_names[*code]);
807 print_custring_bylen(f, code + 2, code[1]);
808 extra += code[1];
809 break;
810
811 case OP_THEN:
812 fprintf(f, " %s", OP_names[*code]);
813 break;
814
815 case OP_CIRCM:
816 case OP_DOLLM:
817 flag = "/m";
818 /* Fall through */
819
820 /* Anything else is just an item with no data, but possibly a flag. */
821
822 default:
823 fprintf(f, " %s %s", flag, OP_names[*code]);
824 break;
825 }
826
827 code += OP_lengths[*code] + extra;
828 fprintf(f, "\n");
829 }
830 }
831
832 /* End of pcre2_printint.c */
833