1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains a PCRE private debugging function for printing out the
42 internal form of a compiled regular expression, along with some supporting
43 local functions. This source file is used in two places:
44
45 (1) It is #included by pcre_compile.c when it is compiled in debugging mode
46 (PCRE_DEBUG defined in pcre_internal.h). It is not included in production
47 compiles. In this case PCRE_INCLUDED is defined.
48
49 (2) It is also compiled separately and linked with pcretest.c, which can be
50 asked to print out a compiled regex for debugging purposes. */
51
52 #ifndef PCRE_INCLUDED
53
54 #include "config.h"
55
56 /* For pcretest program. */
57 #define PRIV(name) name
58
59 /* We have to include pcre_internal.h because we need the internal info for
60 displaying the results of pcre_study() and we also need to know about the
61 internal macros, structures, and other internal data values; pcretest has
62 "inside information" compared to a program that strictly follows the PCRE API.
63
64 Although pcre_internal.h does itself include pcre.h, we explicitly include it
65 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
66 appropriately for an application, not for building PCRE. */
67
68 #include "pcre.h"
69 #include "pcre_internal.h"
70
71 /* These are the funtions that are contained within. It doesn't seem worth
72 having a separate .h file just for this. */
73
74 #endif /* PCRE_INCLUDED */
75
76 #ifdef PCRE_INCLUDED
77 static /* Keep the following function as private. */
78 #endif
79
80 #if defined COMPILE_PCRE8
81 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
82 #elif defined COMPILE_PCRE16
83 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
84 #elif defined COMPILE_PCRE32
85 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
86 #endif
87
88 /* Macro that decides whether a character should be output as a literal or in
89 hexadecimal. We don't use isprint() because that can vary from system to system
90 (even without the use of locales) and we want the output always to be the same,
91 for testing purposes. */
92
93 #ifdef EBCDIC
94 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
95 #else
96 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
97 #endif
98
99 /* The table of operator names. */
100
101 static const char *priv_OP_names[] = { OP_NAME_LIST };
102
103 /* This table of operator lengths is not actually used by the working code,
104 but its size is needed for a check that ensures it is the correct size for the
105 number of opcodes (thus catching update omissions). */
106
107 static const pcre_uint8 priv_OP_lengths[] = { OP_LENGTHS };
108
109
110
111 /*************************************************
112 * Print single- or multi-byte character *
113 *************************************************/
114
115 static unsigned int
print_char(FILE * f,pcre_uchar * ptr,BOOL utf)116 print_char(FILE *f, pcre_uchar *ptr, BOOL utf)
117 {
118 pcre_uint32 c = *ptr;
119
120 #ifndef SUPPORT_UTF
121
122 (void)utf; /* Avoid compiler warning */
123 if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
124 else if (c <= 0x80) fprintf(f, "\\x%02x", c);
125 else fprintf(f, "\\x{%x}", c);
126 return 0;
127
128 #else
129
130 #if defined COMPILE_PCRE8
131
132 if (!utf || (c & 0xc0) != 0xc0)
133 {
134 if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
135 else if (c < 0x80) fprintf(f, "\\x%02x", c);
136 else fprintf(f, "\\x{%02x}", c);
137 return 0;
138 }
139 else
140 {
141 int i;
142 int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
143 int s = 6*a;
144 c = (c & PRIV(utf8_table3)[a]) << s;
145 for (i = 1; i <= a; i++)
146 {
147 /* This is a check for malformed UTF-8; it should only occur if the sanity
148 check has been turned off. Rather than swallow random bytes, just stop if
149 we hit a bad one. Print it with \X instead of \x as an indication. */
150
151 if ((ptr[i] & 0xc0) != 0x80)
152 {
153 fprintf(f, "\\X{%x}", c);
154 return i - 1;
155 }
156
157 /* The byte is OK */
158
159 s -= 6;
160 c |= (ptr[i] & 0x3f) << s;
161 }
162 fprintf(f, "\\x{%x}", c);
163 return a;
164 }
165
166 #elif defined COMPILE_PCRE16
167
168 if (!utf || (c & 0xfc00) != 0xd800)
169 {
170 if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
171 else if (c <= 0x80) fprintf(f, "\\x%02x", c);
172 else fprintf(f, "\\x{%02x}", c);
173 return 0;
174 }
175 else
176 {
177 /* This is a check for malformed UTF-16; it should only occur if the sanity
178 check has been turned off. Rather than swallow a low surrogate, just stop if
179 we hit a bad one. Print it with \X instead of \x as an indication. */
180
181 if ((ptr[1] & 0xfc00) != 0xdc00)
182 {
183 fprintf(f, "\\X{%x}", c);
184 return 0;
185 }
186
187 c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
188 fprintf(f, "\\x{%x}", c);
189 return 1;
190 }
191
192 #elif defined COMPILE_PCRE32
193
194 if (!utf || (c & 0xfffff800u) != 0xd800u)
195 {
196 if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
197 else if (c <= 0x80) fprintf(f, "\\x%02x", c);
198 else fprintf(f, "\\x{%x}", c);
199 return 0;
200 }
201 else
202 {
203 /* This is a check for malformed UTF-32; it should only occur if the sanity
204 check has been turned off. Rather than swallow a surrogate, just stop if
205 we hit one. Print it with \X instead of \x as an indication. */
206 fprintf(f, "\\X{%x}", c);
207 return 0;
208 }
209
210 #endif /* COMPILE_PCRE[8|16|32] */
211
212 #endif /* SUPPORT_UTF */
213 }
214
215 /*************************************************
216 * Print uchar string (regardless of utf) *
217 *************************************************/
218
219 static void
print_puchar(FILE * f,PCRE_PUCHAR ptr)220 print_puchar(FILE *f, PCRE_PUCHAR ptr)
221 {
222 while (*ptr != '\0')
223 {
224 register pcre_uint32 c = *ptr++;
225 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
226 }
227 }
228
229 /*************************************************
230 * Find Unicode property name *
231 *************************************************/
232
233 static const char *
get_ucpname(unsigned int ptype,unsigned int pvalue)234 get_ucpname(unsigned int ptype, unsigned int pvalue)
235 {
236 #ifdef SUPPORT_UCP
237 int i;
238 for (i = PRIV(utt_size) - 1; i >= 0; i--)
239 {
240 if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
241 }
242 return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
243 #else
244 /* It gets harder and harder to shut off unwanted compiler warnings. */
245 ptype = ptype * pvalue;
246 return (ptype == pvalue)? "??" : "??";
247 #endif
248 }
249
250
251 /*************************************************
252 * Print Unicode property value *
253 *************************************************/
254
255 /* "Normal" properties can be printed from tables. The PT_CLIST property is a
256 pseudo-property that contains a pointer to a list of case-equivalent
257 characters. This is used only when UCP support is available and UTF mode is
258 selected. It should never occur otherwise, but just in case it does, have
259 something ready to print. */
260
261 static void
print_prop(FILE * f,pcre_uchar * code,const char * before,const char * after)262 print_prop(FILE *f, pcre_uchar *code, const char *before, const char *after)
263 {
264 if (code[1] != PT_CLIST)
265 {
266 fprintf(f, "%s%s %s%s", before, priv_OP_names[*code], get_ucpname(code[1],
267 code[2]), after);
268 }
269 else
270 {
271 const char *not = (*code == OP_PROP)? "" : "not ";
272 #ifndef SUPPORT_UCP
273 fprintf(f, "%s%sclist %d%s", before, not, code[2], after);
274 #else
275 const pcre_uint32 *p = PRIV(ucd_caseless_sets) + code[2];
276 fprintf (f, "%s%sclist", before, not);
277 while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
278 fprintf(f, "%s", after);
279 #endif
280 }
281 }
282
283
284
285
286 /*************************************************
287 * Print compiled regex *
288 *************************************************/
289
290 /* Make this function work for a regex with integers either byte order.
291 However, we assume that what we are passed is a compiled regex. The
292 print_lengths flag controls whether offsets and lengths of items are printed.
293 They can be turned off from pcretest so that automatic tests on bytecode can be
294 written that do not depend on the value of LINK_SIZE. */
295
296 #ifdef PCRE_INCLUDED
297 static /* Keep the following function as private. */
298 #endif
299 #if defined COMPILE_PCRE8
300 void
pcre_printint(pcre * external_re,FILE * f,BOOL print_lengths)301 pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
302 #elif defined COMPILE_PCRE16
303 void
304 pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths)
305 #elif defined COMPILE_PCRE32
306 void
307 pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths)
308 #endif
309 {
310 REAL_PCRE *re = (REAL_PCRE *)external_re;
311 pcre_uchar *codestart, *code;
312 BOOL utf;
313
314 unsigned int options = re->options;
315 int offset = re->name_table_offset;
316 int count = re->name_count;
317 int size = re->name_entry_size;
318
319 if (re->magic_number != MAGIC_NUMBER)
320 {
321 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
322 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
323 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
324 options = ((options << 24) & 0xff000000) |
325 ((options << 8) & 0x00ff0000) |
326 ((options >> 8) & 0x0000ff00) |
327 ((options >> 24) & 0x000000ff);
328 }
329
330 code = codestart = (pcre_uchar *)re + offset + count * size;
331 /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
332 utf = (options & PCRE_UTF8) != 0;
333
334 for(;;)
335 {
336 pcre_uchar *ccode;
337 const char *flag = " ";
338 pcre_uint32 c;
339 unsigned int extra = 0;
340
341 if (print_lengths)
342 fprintf(f, "%3d ", (int)(code - codestart));
343 else
344 fprintf(f, " ");
345
346 switch(*code)
347 {
348 /* ========================================================================== */
349 /* These cases are never obeyed. This is a fudge that causes a compile-
350 time error if the vectors OP_names or OP_lengths, which are indexed
351 by opcode, are not the correct length. It seems to be the only way to do
352 such a check at compile time, as the sizeof() operator does not work in
353 the C preprocessor. */
354
355 case OP_TABLE_LENGTH:
356 case OP_TABLE_LENGTH +
357 ((sizeof(priv_OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
358 (sizeof(priv_OP_lengths) == OP_TABLE_LENGTH)):
359 break;
360 /* ========================================================================== */
361
362 case OP_END:
363 fprintf(f, " %s\n", priv_OP_names[*code]);
364 fprintf(f, "------------------------------------------------------------------\n");
365 return;
366
367 case OP_CHAR:
368 fprintf(f, " ");
369 do
370 {
371 code++;
372 code += 1 + print_char(f, code, utf);
373 }
374 while (*code == OP_CHAR);
375 fprintf(f, "\n");
376 continue;
377
378 case OP_CHARI:
379 fprintf(f, " /i ");
380 do
381 {
382 code++;
383 code += 1 + print_char(f, code, utf);
384 }
385 while (*code == OP_CHARI);
386 fprintf(f, "\n");
387 continue;
388
389 case OP_CBRA:
390 case OP_CBRAPOS:
391 case OP_SCBRA:
392 case OP_SCBRAPOS:
393 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
394 else fprintf(f, " ");
395 fprintf(f, "%s %d", priv_OP_names[*code], GET2(code, 1+LINK_SIZE));
396 break;
397
398 case OP_BRA:
399 case OP_BRAPOS:
400 case OP_SBRA:
401 case OP_SBRAPOS:
402 case OP_KETRMAX:
403 case OP_KETRMIN:
404 case OP_KETRPOS:
405 case OP_ALT:
406 case OP_KET:
407 case OP_ASSERT:
408 case OP_ASSERT_NOT:
409 case OP_ASSERTBACK:
410 case OP_ASSERTBACK_NOT:
411 case OP_ONCE:
412 case OP_ONCE_NC:
413 case OP_COND:
414 case OP_SCOND:
415 case OP_REVERSE:
416 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
417 else fprintf(f, " ");
418 fprintf(f, "%s", priv_OP_names[*code]);
419 break;
420
421 case OP_CLOSE:
422 fprintf(f, " %s %d", priv_OP_names[*code], GET2(code, 1));
423 break;
424
425 case OP_CREF:
426 fprintf(f, "%3d %s", GET2(code,1), priv_OP_names[*code]);
427 break;
428
429 case OP_DNCREF:
430 {
431 pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
432 IMM2_SIZE;
433 fprintf(f, " %s Cond ref <", flag);
434 print_puchar(f, entry);
435 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
436 }
437 break;
438
439 case OP_RREF:
440 c = GET2(code, 1);
441 if (c == RREF_ANY)
442 fprintf(f, " Cond recurse any");
443 else
444 fprintf(f, " Cond recurse %d", c);
445 break;
446
447 case OP_DNRREF:
448 {
449 pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
450 IMM2_SIZE;
451 fprintf(f, " %s Cond recurse <", flag);
452 print_puchar(f, entry);
453 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
454 }
455 break;
456
457 case OP_DEF:
458 fprintf(f, " Cond def");
459 break;
460
461 case OP_STARI:
462 case OP_MINSTARI:
463 case OP_POSSTARI:
464 case OP_PLUSI:
465 case OP_MINPLUSI:
466 case OP_POSPLUSI:
467 case OP_QUERYI:
468 case OP_MINQUERYI:
469 case OP_POSQUERYI:
470 flag = "/i";
471 /* Fall through */
472 case OP_STAR:
473 case OP_MINSTAR:
474 case OP_POSSTAR:
475 case OP_PLUS:
476 case OP_MINPLUS:
477 case OP_POSPLUS:
478 case OP_QUERY:
479 case OP_MINQUERY:
480 case OP_POSQUERY:
481 case OP_TYPESTAR:
482 case OP_TYPEMINSTAR:
483 case OP_TYPEPOSSTAR:
484 case OP_TYPEPLUS:
485 case OP_TYPEMINPLUS:
486 case OP_TYPEPOSPLUS:
487 case OP_TYPEQUERY:
488 case OP_TYPEMINQUERY:
489 case OP_TYPEPOSQUERY:
490 fprintf(f, " %s ", flag);
491 if (*code >= OP_TYPESTAR)
492 {
493 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
494 {
495 print_prop(f, code + 1, "", " ");
496 extra = 2;
497 }
498 else fprintf(f, "%s", priv_OP_names[code[1]]);
499 }
500 else extra = print_char(f, code+1, utf);
501 fprintf(f, "%s", priv_OP_names[*code]);
502 break;
503
504 case OP_EXACTI:
505 case OP_UPTOI:
506 case OP_MINUPTOI:
507 case OP_POSUPTOI:
508 flag = "/i";
509 /* Fall through */
510 case OP_EXACT:
511 case OP_UPTO:
512 case OP_MINUPTO:
513 case OP_POSUPTO:
514 fprintf(f, " %s ", flag);
515 extra = print_char(f, code + 1 + IMM2_SIZE, utf);
516 fprintf(f, "{");
517 if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
518 fprintf(f, "%d}", GET2(code,1));
519 if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
520 else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
521 break;
522
523 case OP_TYPEEXACT:
524 case OP_TYPEUPTO:
525 case OP_TYPEMINUPTO:
526 case OP_TYPEPOSUPTO:
527 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
528 {
529 print_prop(f, code + IMM2_SIZE + 1, " ", " ");
530 extra = 2;
531 }
532 else fprintf(f, " %s", priv_OP_names[code[1 + IMM2_SIZE]]);
533 fprintf(f, "{");
534 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
535 fprintf(f, "%d}", GET2(code,1));
536 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
537 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
538 break;
539
540 case OP_NOTI:
541 flag = "/i";
542 /* Fall through */
543 case OP_NOT:
544 fprintf(f, " %s [^", flag);
545 extra = print_char(f, code + 1, utf);
546 fprintf(f, "]");
547 break;
548
549 case OP_NOTSTARI:
550 case OP_NOTMINSTARI:
551 case OP_NOTPOSSTARI:
552 case OP_NOTPLUSI:
553 case OP_NOTMINPLUSI:
554 case OP_NOTPOSPLUSI:
555 case OP_NOTQUERYI:
556 case OP_NOTMINQUERYI:
557 case OP_NOTPOSQUERYI:
558 flag = "/i";
559 /* Fall through */
560
561 case OP_NOTSTAR:
562 case OP_NOTMINSTAR:
563 case OP_NOTPOSSTAR:
564 case OP_NOTPLUS:
565 case OP_NOTMINPLUS:
566 case OP_NOTPOSPLUS:
567 case OP_NOTQUERY:
568 case OP_NOTMINQUERY:
569 case OP_NOTPOSQUERY:
570 fprintf(f, " %s [^", flag);
571 extra = print_char(f, code + 1, utf);
572 fprintf(f, "]%s", priv_OP_names[*code]);
573 break;
574
575 case OP_NOTEXACTI:
576 case OP_NOTUPTOI:
577 case OP_NOTMINUPTOI:
578 case OP_NOTPOSUPTOI:
579 flag = "/i";
580 /* Fall through */
581
582 case OP_NOTEXACT:
583 case OP_NOTUPTO:
584 case OP_NOTMINUPTO:
585 case OP_NOTPOSUPTO:
586 fprintf(f, " %s [^", flag);
587 extra = print_char(f, code + 1 + IMM2_SIZE, utf);
588 fprintf(f, "]{");
589 if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
590 fprintf(f, "%d}", GET2(code,1));
591 if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
592 else
593 if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
594 break;
595
596 case OP_RECURSE:
597 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
598 else fprintf(f, " ");
599 fprintf(f, "%s", priv_OP_names[*code]);
600 break;
601
602 case OP_REFI:
603 flag = "/i";
604 /* Fall through */
605 case OP_REF:
606 fprintf(f, " %s \\%d", flag, GET2(code,1));
607 ccode = code + priv_OP_lengths[*code];
608 goto CLASS_REF_REPEAT;
609
610 case OP_DNREFI:
611 flag = "/i";
612 /* Fall through */
613 case OP_DNREF:
614 {
615 pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
616 IMM2_SIZE;
617 fprintf(f, " %s \\k<", flag);
618 print_puchar(f, entry);
619 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
620 }
621 ccode = code + priv_OP_lengths[*code];
622 goto CLASS_REF_REPEAT;
623
624 case OP_CALLOUT:
625 fprintf(f, " %s %d %d %d", priv_OP_names[*code], code[1], GET(code,2),
626 GET(code, 2 + LINK_SIZE));
627 break;
628
629 case OP_PROP:
630 case OP_NOTPROP:
631 print_prop(f, code, " ", "");
632 break;
633
634 /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
635 in having this code always here, and it makes it less messy without all
636 those #ifdefs. */
637
638 case OP_CLASS:
639 case OP_NCLASS:
640 case OP_XCLASS:
641 {
642 int i;
643 unsigned int min, max;
644 BOOL printmap;
645 BOOL invertmap = FALSE;
646 pcre_uint8 *map;
647 pcre_uint8 inverted_map[32];
648
649 fprintf(f, " [");
650
651 if (*code == OP_XCLASS)
652 {
653 extra = GET(code, 1);
654 ccode = code + LINK_SIZE + 1;
655 printmap = (*ccode & XCL_MAP) != 0;
656 if ((*ccode & XCL_NOT) != 0)
657 {
658 invertmap = (*ccode & XCL_HASPROP) == 0;
659 fprintf(f, "^");
660 }
661 ccode++;
662 }
663 else
664 {
665 printmap = TRUE;
666 ccode = code + 1;
667 }
668
669 /* Print a bit map */
670
671 if (printmap)
672 {
673 map = (pcre_uint8 *)ccode;
674 if (invertmap)
675 {
676 for (i = 0; i < 32; i++) inverted_map[i] = ~map[i];
677 map = inverted_map;
678 }
679
680 for (i = 0; i < 256; i++)
681 {
682 if ((map[i/8] & (1 << (i&7))) != 0)
683 {
684 int j;
685 for (j = i+1; j < 256; j++)
686 if ((map[j/8] & (1 << (j&7))) == 0) break;
687 if (i == '-' || i == ']') fprintf(f, "\\");
688 if (PRINTABLE(i)) fprintf(f, "%c", i);
689 else fprintf(f, "\\x%02x", i);
690 if (--j > i)
691 {
692 if (j != i + 1) fprintf(f, "-");
693 if (j == '-' || j == ']') fprintf(f, "\\");
694 if (PRINTABLE(j)) fprintf(f, "%c", j);
695 else fprintf(f, "\\x%02x", j);
696 }
697 i = j;
698 }
699 }
700 ccode += 32 / sizeof(pcre_uchar);
701 }
702
703 /* For an XCLASS there is always some additional data */
704
705 if (*code == OP_XCLASS)
706 {
707 pcre_uchar ch;
708 while ((ch = *ccode++) != XCL_END)
709 {
710 BOOL not = FALSE;
711 const char *notch = "";
712
713 switch(ch)
714 {
715 case XCL_NOTPROP:
716 not = TRUE;
717 notch = "^";
718 /* Fall through */
719
720 case XCL_PROP:
721 {
722 unsigned int ptype = *ccode++;
723 unsigned int pvalue = *ccode++;
724
725 switch(ptype)
726 {
727 case PT_PXGRAPH:
728 fprintf(f, "[:%sgraph:]", notch);
729 break;
730
731 case PT_PXPRINT:
732 fprintf(f, "[:%sprint:]", notch);
733 break;
734
735 case PT_PXPUNCT:
736 fprintf(f, "[:%spunct:]", notch);
737 break;
738
739 default:
740 fprintf(f, "\\%c{%s}", (not? 'P':'p'),
741 get_ucpname(ptype, pvalue));
742 break;
743 }
744 }
745 break;
746
747 default:
748 ccode += 1 + print_char(f, ccode, utf);
749 if (ch == XCL_RANGE)
750 {
751 fprintf(f, "-");
752 ccode += 1 + print_char(f, ccode, utf);
753 }
754 break;
755 }
756 }
757 }
758
759 /* Indicate a non-UTF class which was created by negation */
760
761 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
762
763 /* Handle repeats after a class or a back reference */
764
765 CLASS_REF_REPEAT:
766 switch(*ccode)
767 {
768 case OP_CRSTAR:
769 case OP_CRMINSTAR:
770 case OP_CRPLUS:
771 case OP_CRMINPLUS:
772 case OP_CRQUERY:
773 case OP_CRMINQUERY:
774 case OP_CRPOSSTAR:
775 case OP_CRPOSPLUS:
776 case OP_CRPOSQUERY:
777 fprintf(f, "%s", priv_OP_names[*ccode]);
778 extra += priv_OP_lengths[*ccode];
779 break;
780
781 case OP_CRRANGE:
782 case OP_CRMINRANGE:
783 case OP_CRPOSRANGE:
784 min = GET2(ccode,1);
785 max = GET2(ccode,1 + IMM2_SIZE);
786 if (max == 0) fprintf(f, "{%u,}", min);
787 else fprintf(f, "{%u,%u}", min, max);
788 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
789 else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
790 extra += priv_OP_lengths[*ccode];
791 break;
792
793 /* Do nothing if it's not a repeat; this code stops picky compilers
794 warning about the lack of a default code path. */
795
796 default:
797 break;
798 }
799 }
800 break;
801
802 case OP_MARK:
803 case OP_PRUNE_ARG:
804 case OP_SKIP_ARG:
805 case OP_THEN_ARG:
806 fprintf(f, " %s ", priv_OP_names[*code]);
807 print_puchar(f, code + 2);
808 extra += code[1];
809 break;
810
811 case OP_THEN:
812 fprintf(f, " %s", priv_OP_names[*code]);
813 break;
814
815 case OP_CIRCM:
816 case OP_DOLLM:
817 flag = "/m";
818 /* Fall through */
819
820 /* Anything else is just an item with no data, but possibly a flag. */
821
822 default:
823 fprintf(f, " %s %s", flag, priv_OP_names[*code]);
824 break;
825 }
826
827 code += priv_OP_lengths[*code] + extra;
828 fprintf(f, "\n");
829 }
830 }
831
832 /* End of pcre_printint.src */
833