1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016-2022 University of Cambridge
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41
42 /* This module contains a PCRE private debugging function for printing out the
43 internal form of a compiled regular expression, along with some supporting
44 local functions. This source file is #included in pcre2test.c at each supported
45 code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
46 that comprise the library. It can also optionally be included in
47 pcre2_compile.c for detailed debugging in error situations. */
48
49
50 /* Tables of operator names. The same 8-bit table is used for all code unit
51 widths, so it must be defined only once. The list itself is defined in
52 pcre2_internal.h, which is #included by pcre2test before this file. */
53
54 #ifndef OP_LISTS_DEFINED
55 static const char *OP_names[] = { OP_NAME_LIST };
56 #define OP_LISTS_DEFINED
57 #endif
58
59 /* The functions and tables herein must all have mode-dependent names. */
60
61 #define OP_lengths PCRE2_SUFFIX(OP_lengths_)
62 #define get_ucpname PCRE2_SUFFIX(get_ucpname_)
63 #define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
64 #define print_char PCRE2_SUFFIX(print_char_)
65 #define print_custring PCRE2_SUFFIX(print_custring_)
66 #define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
67 #define print_prop PCRE2_SUFFIX(print_prop_)
68
69 /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
70 the definition is next to the definition of the opcodes in pcre2_internal.h.
71 The contents of the table are, however, mode-dependent. */
72
73 static const uint8_t OP_lengths[] = { OP_LENGTHS };
74
75
76
77 /*************************************************
78 * Print one character from a string *
79 *************************************************/
80
81 /* In UTF mode the character may occupy more than one code unit.
82
83 Arguments:
84 f file to write to
85 ptr pointer to first code unit of the character
86 utf TRUE if string is UTF (will be FALSE if UTF is not supported)
87
88 Returns: number of additional code units used
89 */
90
91 static unsigned int
print_char(FILE * f,PCRE2_SPTR ptr,BOOL utf)92 print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
93 {
94 uint32_t c = *ptr;
95 BOOL one_code_unit = !utf;
96
97 /* If UTF is supported and requested, check for a valid single code unit. */
98
99 #ifdef SUPPORT_UNICODE
100 if (utf)
101 {
102 #if PCRE2_CODE_UNIT_WIDTH == 8
103 one_code_unit = c < 0x80;
104 #elif PCRE2_CODE_UNIT_WIDTH == 16
105 one_code_unit = (c & 0xfc00) != 0xd800;
106 #else
107 one_code_unit = (c & 0xfffff800u) != 0xd800u;
108 #endif /* CODE_UNIT_WIDTH */
109 }
110 #endif /* SUPPORT_UNICODE */
111
112 /* Handle a valid one-code-unit character at any width. */
113
114 if (one_code_unit)
115 {
116 if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
117 else if (c < 0x80) fprintf(f, "\\x%02x", c);
118 else fprintf(f, "\\x{%02x}", c);
119 return 0;
120 }
121
122 /* Code for invalid UTF code units and multi-unit UTF characters is different
123 for each width. If UTF is not supported, control should never get here, but we
124 need a return statement to keep the compiler happy. */
125
126 #ifndef SUPPORT_UNICODE
127 return 0;
128 #else
129
130 /* Malformed UTF-8 should occur only if the sanity check has been turned off.
131 Rather than swallow random bytes, just stop if we hit a bad one. Print it with
132 \X instead of \x as an indication. */
133
134 #if PCRE2_CODE_UNIT_WIDTH == 8
135 if ((c & 0xc0) != 0xc0)
136 {
137 fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
138 return 0;
139 }
140 else
141 {
142 int i;
143 int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
144 int s = 6*a;
145 c = (c & PRIV(utf8_table3)[a]) << s;
146 for (i = 1; i <= a; i++)
147 {
148 if ((ptr[i] & 0xc0) != 0x80)
149 {
150 fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */
151 return i - 1;
152 }
153 s -= 6;
154 c |= (ptr[i] & 0x3f) << s;
155 }
156 fprintf(f, "\\x{%x}", c);
157 return a;
158 }
159 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
160
161 /* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
162 Print it with \X instead of \x as an indication. */
163
164 #if PCRE2_CODE_UNIT_WIDTH == 16
165 if ((ptr[1] & 0xfc00) != 0xdc00)
166 {
167 fprintf(f, "\\X{%x}", c);
168 return 0;
169 }
170 c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
171 fprintf(f, "\\x{%x}", c);
172 return 1;
173 #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
174
175 /* For UTF-32 we get here only for a malformed code unit, which should only
176 occur if the sanity check has been turned off. Print it with \X instead of \x
177 as an indication. */
178
179 #if PCRE2_CODE_UNIT_WIDTH == 32
180 fprintf(f, "\\X{%x}", c);
181 return 0;
182 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
183 #endif /* SUPPORT_UNICODE */
184 }
185
186
187
188 /*************************************************
189 * Print string as a list of code units *
190 *************************************************/
191
192 /* These take no account of UTF as they always print each individual code unit.
193 The string is zero-terminated for print_custring(); the length is given for
194 print_custring_bylen().
195
196 Arguments:
197 f file to write to
198 ptr point to the string
199 len length for print_custring_bylen()
200
201 Returns: nothing
202 */
203
204 static void
print_custring(FILE * f,PCRE2_SPTR ptr)205 print_custring(FILE *f, PCRE2_SPTR ptr)
206 {
207 while (*ptr != '\0')
208 {
209 uint32_t c = *ptr++;
210 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
211 }
212 }
213
214 static void
print_custring_bylen(FILE * f,PCRE2_SPTR ptr,PCRE2_UCHAR len)215 print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
216 {
217 for (; len > 0; len--)
218 {
219 uint32_t c = *ptr++;
220 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
221 }
222 }
223
224
225
226 /*************************************************
227 * Find Unicode property name *
228 *************************************************/
229
230 /* When there is no UTF/UCP support, the table of names does not exist. This
231 function should not be called in such configurations, because a pattern that
232 tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
233 into the main code, however, we just put one into this function.
234
235 Now that the table contains both full names and their abbreviations, we do some
236 fiddling to try to get the full name, which is either the longer of two found
237 names, or a 3-character script name. */
238
239 static const char *
get_ucpname(unsigned int ptype,unsigned int pvalue)240 get_ucpname(unsigned int ptype, unsigned int pvalue)
241 {
242 #ifdef SUPPORT_UNICODE
243 int count = 0;
244 const char *yield = "??";
245 size_t len = 0;
246 unsigned int ptypex = (ptype == PT_SC)? PT_SCX : ptype;
247
248 for (int i = PRIV(utt_size) - 1; i >= 0; i--)
249 {
250 const ucp_type_table *u = PRIV(utt) + i;
251
252 if ((ptype == u->type || ptypex == u->type) && pvalue == u->value)
253 {
254 const char *s = PRIV(utt_names) + u->name_offset;
255 size_t sl = strlen(s);
256
257 if (sl == 3 && (u->type == PT_SC || u->type == PT_SCX))
258 {
259 yield = s;
260 break;
261 }
262
263 if (sl > len)
264 {
265 yield = s;
266 len = sl;
267 }
268
269 if (++count >= 2) break;
270 }
271 }
272
273 return yield;
274
275 #else /* No UTF support */
276 (void)ptype;
277 (void)pvalue;
278 return "??";
279 #endif /* SUPPORT_UNICODE */
280 }
281
282
283
284 /*************************************************
285 * Print Unicode property value *
286 *************************************************/
287
288 /* "Normal" properties can be printed from tables. The PT_CLIST property is a
289 pseudo-property that contains a pointer to a list of case-equivalent
290 characters.
291
292 Arguments:
293 f file to write to
294 code pointer in the compiled code
295 before text to print before
296 after text to print after
297
298 Returns: nothing
299 */
300
301 static void
print_prop(FILE * f,PCRE2_SPTR code,const char * before,const char * after)302 print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
303 {
304 if (code[1] != PT_CLIST)
305 {
306 const char *sc = (code[1] == PT_SC)? "script:" : "";
307 const char *s = get_ucpname(code[1], code[2]);
308 fprintf(f, "%s%s %s%c%s%s", before, OP_names[*code], sc, toupper(s[0]), s+1, after);
309 }
310 else
311 {
312 const char *not = (*code == OP_PROP)? "" : "not ";
313 const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
314 fprintf (f, "%s%sclist", before, not);
315 while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
316 fprintf(f, "%s", after);
317 }
318 }
319
320
321
322 /*************************************************
323 * Print compiled pattern *
324 *************************************************/
325
326 /* The print_lengths flag controls whether offsets and lengths of items are
327 printed. Lenths can be turned off from pcre2test so that automatic tests on
328 bytecode can be written that do not depend on the value of LINK_SIZE.
329
330 Arguments:
331 re a compiled pattern
332 f the file to write to
333 print_lengths show various lengths
334
335 Returns: nothing
336 */
337
338 static void
pcre2_printint(pcre2_code * re,FILE * f,BOOL print_lengths)339 pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
340 {
341 PCRE2_SPTR codestart, nametable, code;
342 uint32_t nesize = re->name_entry_size;
343 BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
344
345 nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
346 code = codestart = nametable + re->name_count * re->name_entry_size;
347
348 for(;;)
349 {
350 PCRE2_SPTR ccode;
351 uint32_t c;
352 int i;
353 const char *flag = " ";
354 unsigned int extra = 0;
355
356 if (print_lengths)
357 fprintf(f, "%3d ", (int)(code - codestart));
358 else
359 fprintf(f, " ");
360
361 switch(*code)
362 {
363 /* ========================================================================== */
364 /* These cases are never obeyed. This is a fudge that causes a compile-
365 time error if the vectors OP_names or OP_lengths, which are indexed
366 by opcode, are not the correct length. It seems to be the only way to do
367 such a check at compile time, as the sizeof() operator does not work in
368 the C preprocessor. */
369
370 case OP_TABLE_LENGTH:
371 case OP_TABLE_LENGTH +
372 ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
373 (sizeof(OP_lengths) == OP_TABLE_LENGTH)):
374 return;
375 /* ========================================================================== */
376
377 case OP_END:
378 fprintf(f, " %s\n", OP_names[*code]);
379 fprintf(f, "------------------------------------------------------------------\n");
380 return;
381
382 case OP_CHAR:
383 fprintf(f, " ");
384 do
385 {
386 code++;
387 code += 1 + print_char(f, code, utf);
388 }
389 while (*code == OP_CHAR);
390 fprintf(f, "\n");
391 continue;
392
393 case OP_CHARI:
394 fprintf(f, " /i ");
395 do
396 {
397 code++;
398 code += 1 + print_char(f, code, utf);
399 }
400 while (*code == OP_CHARI);
401 fprintf(f, "\n");
402 continue;
403
404 case OP_CBRA:
405 case OP_CBRAPOS:
406 case OP_SCBRA:
407 case OP_SCBRAPOS:
408 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
409 else fprintf(f, " ");
410 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
411 break;
412
413 case OP_BRA:
414 case OP_BRAPOS:
415 case OP_SBRA:
416 case OP_SBRAPOS:
417 case OP_KETRMAX:
418 case OP_KETRMIN:
419 case OP_KETRPOS:
420 case OP_ALT:
421 case OP_KET:
422 case OP_ASSERT:
423 case OP_ASSERT_NOT:
424 case OP_ASSERTBACK:
425 case OP_ASSERTBACK_NOT:
426 case OP_ASSERT_NA:
427 case OP_ASSERTBACK_NA:
428 case OP_ONCE:
429 case OP_SCRIPT_RUN:
430 case OP_COND:
431 case OP_SCOND:
432 case OP_REVERSE:
433 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
434 else fprintf(f, " ");
435 fprintf(f, "%s", OP_names[*code]);
436 break;
437
438 case OP_CLOSE:
439 fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
440 break;
441
442 case OP_CREF:
443 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
444 break;
445
446 case OP_DNCREF:
447 {
448 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
449 fprintf(f, " %s Cond ref <", flag);
450 print_custring(f, entry);
451 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
452 }
453 break;
454
455 case OP_RREF:
456 c = GET2(code, 1);
457 if (c == RREF_ANY)
458 fprintf(f, " Cond recurse any");
459 else
460 fprintf(f, " Cond recurse %d", c);
461 break;
462
463 case OP_DNRREF:
464 {
465 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
466 fprintf(f, " %s Cond recurse <", flag);
467 print_custring(f, entry);
468 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
469 }
470 break;
471
472 case OP_FALSE:
473 fprintf(f, " Cond false");
474 break;
475
476 case OP_TRUE:
477 fprintf(f, " Cond true");
478 break;
479
480 case OP_STARI:
481 case OP_MINSTARI:
482 case OP_POSSTARI:
483 case OP_PLUSI:
484 case OP_MINPLUSI:
485 case OP_POSPLUSI:
486 case OP_QUERYI:
487 case OP_MINQUERYI:
488 case OP_POSQUERYI:
489 flag = "/i";
490 /* Fall through */
491 case OP_STAR:
492 case OP_MINSTAR:
493 case OP_POSSTAR:
494 case OP_PLUS:
495 case OP_MINPLUS:
496 case OP_POSPLUS:
497 case OP_QUERY:
498 case OP_MINQUERY:
499 case OP_POSQUERY:
500 case OP_TYPESTAR:
501 case OP_TYPEMINSTAR:
502 case OP_TYPEPOSSTAR:
503 case OP_TYPEPLUS:
504 case OP_TYPEMINPLUS:
505 case OP_TYPEPOSPLUS:
506 case OP_TYPEQUERY:
507 case OP_TYPEMINQUERY:
508 case OP_TYPEPOSQUERY:
509 fprintf(f, " %s ", flag);
510
511 if (*code >= OP_TYPESTAR)
512 {
513 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
514 {
515 print_prop(f, code + 1, "", " ");
516 extra = 2;
517 }
518 else fprintf(f, "%s", OP_names[code[1]]);
519 }
520 else extra = print_char(f, code+1, utf);
521 fprintf(f, "%s", OP_names[*code]);
522 break;
523
524 case OP_EXACTI:
525 case OP_UPTOI:
526 case OP_MINUPTOI:
527 case OP_POSUPTOI:
528 flag = "/i";
529 /* Fall through */
530 case OP_EXACT:
531 case OP_UPTO:
532 case OP_MINUPTO:
533 case OP_POSUPTO:
534 fprintf(f, " %s ", flag);
535 extra = print_char(f, code + 1 + IMM2_SIZE, utf);
536 fprintf(f, "{");
537 if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
538 fprintf(f, "%d}", GET2(code,1));
539 if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
540 else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
541 break;
542
543 case OP_TYPEEXACT:
544 case OP_TYPEUPTO:
545 case OP_TYPEMINUPTO:
546 case OP_TYPEPOSUPTO:
547 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
548 {
549 print_prop(f, code + IMM2_SIZE + 1, " ", " ");
550 extra = 2;
551 }
552 else fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]);
553 fprintf(f, "{");
554 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
555 fprintf(f, "%d}", GET2(code,1));
556 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
557 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
558 break;
559
560 case OP_NOTI:
561 flag = "/i";
562 /* Fall through */
563 case OP_NOT:
564 fprintf(f, " %s [^", flag);
565 extra = print_char(f, code + 1, utf);
566 fprintf(f, "]");
567 break;
568
569 case OP_NOTSTARI:
570 case OP_NOTMINSTARI:
571 case OP_NOTPOSSTARI:
572 case OP_NOTPLUSI:
573 case OP_NOTMINPLUSI:
574 case OP_NOTPOSPLUSI:
575 case OP_NOTQUERYI:
576 case OP_NOTMINQUERYI:
577 case OP_NOTPOSQUERYI:
578 flag = "/i";
579 /* Fall through */
580
581 case OP_NOTSTAR:
582 case OP_NOTMINSTAR:
583 case OP_NOTPOSSTAR:
584 case OP_NOTPLUS:
585 case OP_NOTMINPLUS:
586 case OP_NOTPOSPLUS:
587 case OP_NOTQUERY:
588 case OP_NOTMINQUERY:
589 case OP_NOTPOSQUERY:
590 fprintf(f, " %s [^", flag);
591 extra = print_char(f, code + 1, utf);
592 fprintf(f, "]%s", OP_names[*code]);
593 break;
594
595 case OP_NOTEXACTI:
596 case OP_NOTUPTOI:
597 case OP_NOTMINUPTOI:
598 case OP_NOTPOSUPTOI:
599 flag = "/i";
600 /* Fall through */
601
602 case OP_NOTEXACT:
603 case OP_NOTUPTO:
604 case OP_NOTMINUPTO:
605 case OP_NOTPOSUPTO:
606 fprintf(f, " %s [^", flag);
607 extra = print_char(f, code + 1 + IMM2_SIZE, utf);
608 fprintf(f, "]{");
609 if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
610 fprintf(f, "%d}", GET2(code,1));
611 if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
612 else
613 if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
614 break;
615
616 case OP_RECURSE:
617 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
618 else fprintf(f, " ");
619 fprintf(f, "%s", OP_names[*code]);
620 break;
621
622 case OP_REFI:
623 flag = "/i";
624 /* Fall through */
625 case OP_REF:
626 fprintf(f, " %s \\%d", flag, GET2(code,1));
627 ccode = code + OP_lengths[*code];
628 goto CLASS_REF_REPEAT;
629
630 case OP_DNREFI:
631 flag = "/i";
632 /* Fall through */
633 case OP_DNREF:
634 {
635 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
636 fprintf(f, " %s \\k<", flag);
637 print_custring(f, entry);
638 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
639 }
640 ccode = code + OP_lengths[*code];
641 goto CLASS_REF_REPEAT;
642
643 case OP_CALLOUT:
644 fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
645 GET(code, 1), GET(code, 1 + LINK_SIZE));
646 break;
647
648 case OP_CALLOUT_STR:
649 c = code[1 + 4*LINK_SIZE];
650 fprintf(f, " %s %c", OP_names[*code], c);
651 extra = GET(code, 1 + 2*LINK_SIZE);
652 print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
653 for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
654 if (c == PRIV(callout_start_delims)[i])
655 {
656 c = PRIV(callout_end_delims)[i];
657 break;
658 }
659 fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
660 GET(code, 1 + LINK_SIZE));
661 break;
662
663 case OP_PROP:
664 case OP_NOTPROP:
665 print_prop(f, code, " ", "");
666 break;
667
668 /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
669 in having this code always here, and it makes it less messy without all
670 those #ifdefs. */
671
672 case OP_CLASS:
673 case OP_NCLASS:
674 case OP_XCLASS:
675 {
676 unsigned int min, max;
677 BOOL printmap;
678 BOOL invertmap = FALSE;
679 uint8_t *map;
680 uint8_t inverted_map[32];
681
682 fprintf(f, " [");
683
684 if (*code == OP_XCLASS)
685 {
686 extra = GET(code, 1);
687 ccode = code + LINK_SIZE + 1;
688 printmap = (*ccode & XCL_MAP) != 0;
689 if ((*ccode & XCL_NOT) != 0)
690 {
691 invertmap = (*ccode & XCL_HASPROP) == 0;
692 fprintf(f, "^");
693 }
694 ccode++;
695 }
696 else
697 {
698 printmap = TRUE;
699 ccode = code + 1;
700 }
701
702 /* Print a bit map */
703
704 if (printmap)
705 {
706 map = (uint8_t *)ccode;
707 if (invertmap)
708 {
709 /* Using 255 ^ instead of ~ avoids clang sanitize warning. */
710 for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i];
711 map = inverted_map;
712 }
713
714 for (i = 0; i < 256; i++)
715 {
716 if ((map[i/8] & (1u << (i&7))) != 0)
717 {
718 int j;
719 for (j = i+1; j < 256; j++)
720 if ((map[j/8] & (1u << (j&7))) == 0) break;
721 if (i == '-' || i == ']') fprintf(f, "\\");
722 if (PRINTABLE(i)) fprintf(f, "%c", i);
723 else fprintf(f, "\\x%02x", i);
724 if (--j > i)
725 {
726 if (j != i + 1) fprintf(f, "-");
727 if (j == '-' || j == ']') fprintf(f, "\\");
728 if (PRINTABLE(j)) fprintf(f, "%c", j);
729 else fprintf(f, "\\x%02x", j);
730 }
731 i = j;
732 }
733 }
734 ccode += 32 / sizeof(PCRE2_UCHAR);
735 }
736
737 /* For an XCLASS there is always some additional data */
738
739 if (*code == OP_XCLASS)
740 {
741 PCRE2_UCHAR ch;
742 while ((ch = *ccode++) != XCL_END)
743 {
744 BOOL not = FALSE;
745 const char *notch = "";
746
747 switch(ch)
748 {
749 case XCL_NOTPROP:
750 not = TRUE;
751 notch = "^";
752 /* Fall through */
753
754 case XCL_PROP:
755 {
756 unsigned int ptype = *ccode++;
757 unsigned int pvalue = *ccode++;
758 const char *s;
759
760 switch(ptype)
761 {
762 case PT_PXGRAPH:
763 fprintf(f, "[:%sgraph:]", notch);
764 break;
765
766 case PT_PXPRINT:
767 fprintf(f, "[:%sprint:]", notch);
768 break;
769
770 case PT_PXPUNCT:
771 fprintf(f, "[:%spunct:]", notch);
772 break;
773
774 default:
775 s = get_ucpname(ptype, pvalue);
776 fprintf(f, "\\%c{%c%s}", (not? 'P':'p'), toupper(s[0]), s+1);
777 break;
778 }
779 }
780 break;
781
782 default:
783 ccode += 1 + print_char(f, ccode, utf);
784 if (ch == XCL_RANGE)
785 {
786 fprintf(f, "-");
787 ccode += 1 + print_char(f, ccode, utf);
788 }
789 break;
790 }
791 }
792 }
793
794 /* Indicate a non-UTF class which was created by negation */
795
796 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
797
798 /* Handle repeats after a class or a back reference */
799
800 CLASS_REF_REPEAT:
801 switch(*ccode)
802 {
803 case OP_CRSTAR:
804 case OP_CRMINSTAR:
805 case OP_CRPLUS:
806 case OP_CRMINPLUS:
807 case OP_CRQUERY:
808 case OP_CRMINQUERY:
809 case OP_CRPOSSTAR:
810 case OP_CRPOSPLUS:
811 case OP_CRPOSQUERY:
812 fprintf(f, "%s", OP_names[*ccode]);
813 extra += OP_lengths[*ccode];
814 break;
815
816 case OP_CRRANGE:
817 case OP_CRMINRANGE:
818 case OP_CRPOSRANGE:
819 min = GET2(ccode,1);
820 max = GET2(ccode,1 + IMM2_SIZE);
821 if (max == 0) fprintf(f, "{%u,}", min);
822 else fprintf(f, "{%u,%u}", min, max);
823 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
824 else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
825 extra += OP_lengths[*ccode];
826 break;
827
828 /* Do nothing if it's not a repeat; this code stops picky compilers
829 warning about the lack of a default code path. */
830
831 default:
832 break;
833 }
834 }
835 break;
836
837 case OP_MARK:
838 case OP_COMMIT_ARG:
839 case OP_PRUNE_ARG:
840 case OP_SKIP_ARG:
841 case OP_THEN_ARG:
842 fprintf(f, " %s ", OP_names[*code]);
843 print_custring_bylen(f, code + 2, code[1]);
844 extra += code[1];
845 break;
846
847 case OP_THEN:
848 fprintf(f, " %s", OP_names[*code]);
849 break;
850
851 case OP_CIRCM:
852 case OP_DOLLM:
853 flag = "/m";
854 /* Fall through */
855
856 /* Anything else is just an item with no data, but possibly a flag. */
857
858 default:
859 fprintf(f, " %s %s", flag, OP_names[*code]);
860 break;
861 }
862
863 code += OP_lengths[*code] + extra;
864 fprintf(f, "\n");
865 }
866 }
867
868 /* End of pcre2_printint.c */
869