1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016-2018 University of Cambridge
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45
46 #include "pcre2_internal.h"
47
48
49 /*************************************************
50 * Return info about compiled pattern *
51 *************************************************/
52
53 /*
54 Arguments:
55 code points to compiled code
56 what what information is required
57 where where to put the information; if NULL, return length
58
59 Returns: 0 when data returned
60 > 0 when length requested
61 < 0 on error or unset value
62 */
63
64 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_pattern_info(const pcre2_code * code,uint32_t what,void * where)65 pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
66 {
67 const pcre2_real_code *re = (pcre2_real_code *)code;
68
69 if (where == NULL) /* Requests field length */
70 {
71 switch(what)
72 {
73 case PCRE2_INFO_ALLOPTIONS:
74 case PCRE2_INFO_ARGOPTIONS:
75 case PCRE2_INFO_BACKREFMAX:
76 case PCRE2_INFO_BSR:
77 case PCRE2_INFO_CAPTURECOUNT:
78 case PCRE2_INFO_DEPTHLIMIT:
79 case PCRE2_INFO_EXTRAOPTIONS:
80 case PCRE2_INFO_FIRSTCODETYPE:
81 case PCRE2_INFO_FIRSTCODEUNIT:
82 case PCRE2_INFO_HASBACKSLASHC:
83 case PCRE2_INFO_HASCRORLF:
84 case PCRE2_INFO_HEAPLIMIT:
85 case PCRE2_INFO_JCHANGED:
86 case PCRE2_INFO_LASTCODETYPE:
87 case PCRE2_INFO_LASTCODEUNIT:
88 case PCRE2_INFO_MATCHEMPTY:
89 case PCRE2_INFO_MATCHLIMIT:
90 case PCRE2_INFO_MAXLOOKBEHIND:
91 case PCRE2_INFO_MINLENGTH:
92 case PCRE2_INFO_NAMEENTRYSIZE:
93 case PCRE2_INFO_NAMECOUNT:
94 case PCRE2_INFO_NEWLINE:
95 return sizeof(uint32_t);
96
97 case PCRE2_INFO_FIRSTBITMAP:
98 return sizeof(const uint8_t *);
99
100 case PCRE2_INFO_JITSIZE:
101 case PCRE2_INFO_SIZE:
102 case PCRE2_INFO_FRAMESIZE:
103 return sizeof(size_t);
104
105 case PCRE2_INFO_NAMETABLE:
106 return sizeof(PCRE2_SPTR);
107 }
108 }
109
110 if (re == NULL) return PCRE2_ERROR_NULL;
111
112 /* Check that the first field in the block is the magic number. If it is not,
113 return with PCRE2_ERROR_BADMAGIC. */
114
115 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
116
117 /* Check that this pattern was compiled in the correct bit mode */
118
119 if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
120
121 switch(what)
122 {
123 case PCRE2_INFO_ALLOPTIONS:
124 *((uint32_t *)where) = re->overall_options;
125 break;
126
127 case PCRE2_INFO_ARGOPTIONS:
128 *((uint32_t *)where) = re->compile_options;
129 break;
130
131 case PCRE2_INFO_BACKREFMAX:
132 *((uint32_t *)where) = re->top_backref;
133 break;
134
135 case PCRE2_INFO_BSR:
136 *((uint32_t *)where) = re->bsr_convention;
137 break;
138
139 case PCRE2_INFO_CAPTURECOUNT:
140 *((uint32_t *)where) = re->top_bracket;
141 break;
142
143 case PCRE2_INFO_DEPTHLIMIT:
144 *((uint32_t *)where) = re->limit_depth;
145 if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET;
146 break;
147
148 case PCRE2_INFO_EXTRAOPTIONS:
149 *((uint32_t *)where) = re->extra_options;
150 break;
151
152 case PCRE2_INFO_FIRSTCODETYPE:
153 *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
154 ((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;
155 break;
156
157 case PCRE2_INFO_FIRSTCODEUNIT:
158 *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)?
159 re->first_codeunit : 0;
160 break;
161
162 case PCRE2_INFO_FIRSTBITMAP:
163 *((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)?
164 &(re->start_bitmap[0]) : NULL;
165 break;
166
167 case PCRE2_INFO_FRAMESIZE:
168 *((size_t *)where) = offsetof(heapframe, ovector) +
169 re->top_bracket * 2 * sizeof(PCRE2_SIZE);
170 break;
171
172 case PCRE2_INFO_HASBACKSLASHC:
173 *((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;
174 break;
175
176 case PCRE2_INFO_HASCRORLF:
177 *((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
178 break;
179
180 case PCRE2_INFO_HEAPLIMIT:
181 *((uint32_t *)where) = re->limit_heap;
182 if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET;
183 break;
184
185 case PCRE2_INFO_JCHANGED:
186 *((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
187 break;
188
189 case PCRE2_INFO_JITSIZE:
190 #ifdef SUPPORT_JIT
191 *((size_t *)where) = (re->executable_jit != NULL)?
192 PRIV(jit_get_size)(re->executable_jit) : 0;
193 #else
194 *((size_t *)where) = 0;
195 #endif
196 break;
197
198 case PCRE2_INFO_LASTCODETYPE:
199 *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0;
200 break;
201
202 case PCRE2_INFO_LASTCODEUNIT:
203 *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)?
204 re->last_codeunit : 0;
205 break;
206
207 case PCRE2_INFO_MATCHEMPTY:
208 *((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0;
209 break;
210
211 case PCRE2_INFO_MATCHLIMIT:
212 *((uint32_t *)where) = re->limit_match;
213 if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;
214 break;
215
216 case PCRE2_INFO_MAXLOOKBEHIND:
217 *((uint32_t *)where) = re->max_lookbehind;
218 break;
219
220 case PCRE2_INFO_MINLENGTH:
221 *((uint32_t *)where) = re->minlength;
222 break;
223
224 case PCRE2_INFO_NAMEENTRYSIZE:
225 *((uint32_t *)where) = re->name_entry_size;
226 break;
227
228 case PCRE2_INFO_NAMECOUNT:
229 *((uint32_t *)where) = re->name_count;
230 break;
231
232 case PCRE2_INFO_NAMETABLE:
233 *((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code));
234 break;
235
236 case PCRE2_INFO_NEWLINE:
237 *((uint32_t *)where) = re->newline_convention;
238 break;
239
240 case PCRE2_INFO_SIZE:
241 *((size_t *)where) = re->blocksize;
242 break;
243
244 default: return PCRE2_ERROR_BADOPTION;
245 }
246
247 return 0;
248 }
249
250
251
252 /*************************************************
253 * Callout enumerator *
254 *************************************************/
255
256 /*
257 Arguments:
258 code points to compiled code
259 callback function called for each callout block
260 callout_data user data passed to the callback
261
262 Returns: 0 when successfully completed
263 < 0 on local error
264 != 0 for callback error
265 */
266
267 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_callout_enumerate(const pcre2_code * code,int (* callback)(pcre2_callout_enumerate_block *,void *),void * callout_data)268 pcre2_callout_enumerate(const pcre2_code *code,
269 int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)
270 {
271 pcre2_real_code *re = (pcre2_real_code *)code;
272 pcre2_callout_enumerate_block cb;
273 PCRE2_SPTR cc;
274 #ifdef SUPPORT_UNICODE
275 BOOL utf;
276 #endif
277
278 if (re == NULL) return PCRE2_ERROR_NULL;
279
280 #ifdef SUPPORT_UNICODE
281 utf = (re->overall_options & PCRE2_UTF) != 0;
282 #endif
283
284 /* Check that the first field in the block is the magic number. If it is not,
285 return with PCRE2_ERROR_BADMAGIC. */
286
287 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
288
289 /* Check that this pattern was compiled in the correct bit mode */
290
291 if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
292
293 cb.version = 0;
294 cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code))
295 + re->name_count * re->name_entry_size;
296
297 while (TRUE)
298 {
299 int rc;
300 switch (*cc)
301 {
302 case OP_END:
303 return 0;
304
305 case OP_CHAR:
306 case OP_CHARI:
307 case OP_NOT:
308 case OP_NOTI:
309 case OP_STAR:
310 case OP_MINSTAR:
311 case OP_PLUS:
312 case OP_MINPLUS:
313 case OP_QUERY:
314 case OP_MINQUERY:
315 case OP_UPTO:
316 case OP_MINUPTO:
317 case OP_EXACT:
318 case OP_POSSTAR:
319 case OP_POSPLUS:
320 case OP_POSQUERY:
321 case OP_POSUPTO:
322 case OP_STARI:
323 case OP_MINSTARI:
324 case OP_PLUSI:
325 case OP_MINPLUSI:
326 case OP_QUERYI:
327 case OP_MINQUERYI:
328 case OP_UPTOI:
329 case OP_MINUPTOI:
330 case OP_EXACTI:
331 case OP_POSSTARI:
332 case OP_POSPLUSI:
333 case OP_POSQUERYI:
334 case OP_POSUPTOI:
335 case OP_NOTSTAR:
336 case OP_NOTMINSTAR:
337 case OP_NOTPLUS:
338 case OP_NOTMINPLUS:
339 case OP_NOTQUERY:
340 case OP_NOTMINQUERY:
341 case OP_NOTUPTO:
342 case OP_NOTMINUPTO:
343 case OP_NOTEXACT:
344 case OP_NOTPOSSTAR:
345 case OP_NOTPOSPLUS:
346 case OP_NOTPOSQUERY:
347 case OP_NOTPOSUPTO:
348 case OP_NOTSTARI:
349 case OP_NOTMINSTARI:
350 case OP_NOTPLUSI:
351 case OP_NOTMINPLUSI:
352 case OP_NOTQUERYI:
353 case OP_NOTMINQUERYI:
354 case OP_NOTUPTOI:
355 case OP_NOTMINUPTOI:
356 case OP_NOTEXACTI:
357 case OP_NOTPOSSTARI:
358 case OP_NOTPOSPLUSI:
359 case OP_NOTPOSQUERYI:
360 case OP_NOTPOSUPTOI:
361 cc += PRIV(OP_lengths)[*cc];
362 #ifdef SUPPORT_UNICODE
363 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
364 #endif
365 break;
366
367 case OP_TYPESTAR:
368 case OP_TYPEMINSTAR:
369 case OP_TYPEPLUS:
370 case OP_TYPEMINPLUS:
371 case OP_TYPEQUERY:
372 case OP_TYPEMINQUERY:
373 case OP_TYPEUPTO:
374 case OP_TYPEMINUPTO:
375 case OP_TYPEEXACT:
376 case OP_TYPEPOSSTAR:
377 case OP_TYPEPOSPLUS:
378 case OP_TYPEPOSQUERY:
379 case OP_TYPEPOSUPTO:
380 cc += PRIV(OP_lengths)[*cc];
381 #ifdef SUPPORT_UNICODE
382 if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2;
383 #endif
384 break;
385
386 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
387 case OP_XCLASS:
388 cc += GET(cc, 1);
389 break;
390 #endif
391
392 case OP_MARK:
393 case OP_COMMIT_ARG:
394 case OP_PRUNE_ARG:
395 case OP_SKIP_ARG:
396 case OP_THEN_ARG:
397 cc += PRIV(OP_lengths)[*cc] + cc[1];
398 break;
399
400 case OP_CALLOUT:
401 cb.pattern_position = GET(cc, 1);
402 cb.next_item_length = GET(cc, 1 + LINK_SIZE);
403 cb.callout_number = cc[1 + 2*LINK_SIZE];
404 cb.callout_string_offset = 0;
405 cb.callout_string_length = 0;
406 cb.callout_string = NULL;
407 rc = callback(&cb, callout_data);
408 if (rc != 0) return rc;
409 cc += PRIV(OP_lengths)[*cc];
410 break;
411
412 case OP_CALLOUT_STR:
413 cb.pattern_position = GET(cc, 1);
414 cb.next_item_length = GET(cc, 1 + LINK_SIZE);
415 cb.callout_number = 0;
416 cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE);
417 cb.callout_string_length =
418 GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2;
419 cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1;
420 rc = callback(&cb, callout_data);
421 if (rc != 0) return rc;
422 cc += GET(cc, 1 + 2*LINK_SIZE);
423 break;
424
425 default:
426 cc += PRIV(OP_lengths)[*cc];
427 break;
428 }
429 }
430 }
431
432 /* End of pcre2_pattern_info.c */
433