1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10           New API code Copyright (c) 2016-2018 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45 
46 #include "pcre2_internal.h"
47 
48 
49 /*************************************************
50 *        Return info about compiled pattern      *
51 *************************************************/
52 
53 /*
54 Arguments:
55   code          points to compiled code
56   what          what information is required
57   where         where to put the information; if NULL, return length
58 
59 Returns:        0 when data returned
60                 > 0 when length requested
61                 < 0 on error or unset value
62 */
63 
64 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_pattern_info(const pcre2_code * code,uint32_t what,void * where)65 pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
66 {
67 const pcre2_real_code *re = (pcre2_real_code *)code;
68 
69 if (where == NULL)   /* Requests field length */
70   {
71   switch(what)
72     {
73     case PCRE2_INFO_ALLOPTIONS:
74     case PCRE2_INFO_ARGOPTIONS:
75     case PCRE2_INFO_BACKREFMAX:
76     case PCRE2_INFO_BSR:
77     case PCRE2_INFO_CAPTURECOUNT:
78     case PCRE2_INFO_DEPTHLIMIT:
79     case PCRE2_INFO_EXTRAOPTIONS:
80     case PCRE2_INFO_FIRSTCODETYPE:
81     case PCRE2_INFO_FIRSTCODEUNIT:
82     case PCRE2_INFO_HASBACKSLASHC:
83     case PCRE2_INFO_HASCRORLF:
84     case PCRE2_INFO_HEAPLIMIT:
85     case PCRE2_INFO_JCHANGED:
86     case PCRE2_INFO_LASTCODETYPE:
87     case PCRE2_INFO_LASTCODEUNIT:
88     case PCRE2_INFO_MATCHEMPTY:
89     case PCRE2_INFO_MATCHLIMIT:
90     case PCRE2_INFO_MAXLOOKBEHIND:
91     case PCRE2_INFO_MINLENGTH:
92     case PCRE2_INFO_NAMEENTRYSIZE:
93     case PCRE2_INFO_NAMECOUNT:
94     case PCRE2_INFO_NEWLINE:
95     return sizeof(uint32_t);
96 
97     case PCRE2_INFO_FIRSTBITMAP:
98     return sizeof(const uint8_t *);
99 
100     case PCRE2_INFO_JITSIZE:
101     case PCRE2_INFO_SIZE:
102     case PCRE2_INFO_FRAMESIZE:
103     return sizeof(size_t);
104 
105     case PCRE2_INFO_NAMETABLE:
106     return sizeof(PCRE2_SPTR);
107     }
108   }
109 
110 if (re == NULL) return PCRE2_ERROR_NULL;
111 
112 /* Check that the first field in the block is the magic number. If it is not,
113 return with PCRE2_ERROR_BADMAGIC. */
114 
115 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
116 
117 /* Check that this pattern was compiled in the correct bit mode */
118 
119 if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
120 
121 switch(what)
122   {
123   case PCRE2_INFO_ALLOPTIONS:
124   *((uint32_t *)where) = re->overall_options;
125   break;
126 
127   case PCRE2_INFO_ARGOPTIONS:
128   *((uint32_t *)where) = re->compile_options;
129   break;
130 
131   case PCRE2_INFO_BACKREFMAX:
132   *((uint32_t *)where) = re->top_backref;
133   break;
134 
135   case PCRE2_INFO_BSR:
136   *((uint32_t *)where) = re->bsr_convention;
137   break;
138 
139   case PCRE2_INFO_CAPTURECOUNT:
140   *((uint32_t *)where) = re->top_bracket;
141   break;
142 
143   case PCRE2_INFO_DEPTHLIMIT:
144   *((uint32_t *)where) = re->limit_depth;
145   if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET;
146   break;
147 
148   case PCRE2_INFO_EXTRAOPTIONS:
149   *((uint32_t *)where) = re->extra_options;
150   break;
151 
152   case PCRE2_INFO_FIRSTCODETYPE:
153   *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
154                          ((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;
155   break;
156 
157   case PCRE2_INFO_FIRSTCODEUNIT:
158   *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)?
159     re->first_codeunit : 0;
160   break;
161 
162   case PCRE2_INFO_FIRSTBITMAP:
163   *((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)?
164     &(re->start_bitmap[0]) : NULL;
165   break;
166 
167   case PCRE2_INFO_FRAMESIZE:
168   *((size_t *)where) = offsetof(heapframe, ovector) +
169     re->top_bracket * 2 * sizeof(PCRE2_SIZE);
170   break;
171 
172   case PCRE2_INFO_HASBACKSLASHC:
173   *((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;
174   break;
175 
176   case PCRE2_INFO_HASCRORLF:
177   *((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
178   break;
179 
180   case PCRE2_INFO_HEAPLIMIT:
181   *((uint32_t *)where) = re->limit_heap;
182   if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET;
183   break;
184 
185   case PCRE2_INFO_JCHANGED:
186   *((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
187   break;
188 
189   case PCRE2_INFO_JITSIZE:
190 #ifdef SUPPORT_JIT
191   *((size_t *)where) = (re->executable_jit != NULL)?
192     PRIV(jit_get_size)(re->executable_jit) : 0;
193 #else
194   *((size_t *)where) = 0;
195 #endif
196   break;
197 
198   case PCRE2_INFO_LASTCODETYPE:
199   *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0;
200   break;
201 
202   case PCRE2_INFO_LASTCODEUNIT:
203   *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)?
204     re->last_codeunit : 0;
205   break;
206 
207   case PCRE2_INFO_MATCHEMPTY:
208   *((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0;
209   break;
210 
211   case PCRE2_INFO_MATCHLIMIT:
212   *((uint32_t *)where) = re->limit_match;
213   if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;
214   break;
215 
216   case PCRE2_INFO_MAXLOOKBEHIND:
217   *((uint32_t *)where) = re->max_lookbehind;
218   break;
219 
220   case PCRE2_INFO_MINLENGTH:
221   *((uint32_t *)where) = re->minlength;
222   break;
223 
224   case PCRE2_INFO_NAMEENTRYSIZE:
225   *((uint32_t *)where) = re->name_entry_size;
226   break;
227 
228   case PCRE2_INFO_NAMECOUNT:
229   *((uint32_t *)where) = re->name_count;
230   break;
231 
232   case PCRE2_INFO_NAMETABLE:
233   *((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code));
234   break;
235 
236   case PCRE2_INFO_NEWLINE:
237   *((uint32_t *)where) = re->newline_convention;
238   break;
239 
240   case PCRE2_INFO_SIZE:
241   *((size_t *)where) = re->blocksize;
242   break;
243 
244   default: return PCRE2_ERROR_BADOPTION;
245   }
246 
247 return 0;
248 }
249 
250 
251 
252 /*************************************************
253 *              Callout enumerator                *
254 *************************************************/
255 
256 /*
257 Arguments:
258   code          points to compiled code
259   callback      function called for each callout block
260   callout_data  user data passed to the callback
261 
262 Returns:        0 when successfully completed
263                 < 0 on local error
264                != 0 for callback error
265 */
266 
267 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_callout_enumerate(const pcre2_code * code,int (* callback)(pcre2_callout_enumerate_block *,void *),void * callout_data)268 pcre2_callout_enumerate(const pcre2_code *code,
269   int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)
270 {
271 pcre2_real_code *re = (pcre2_real_code *)code;
272 pcre2_callout_enumerate_block cb;
273 PCRE2_SPTR cc;
274 #ifdef SUPPORT_UNICODE
275 BOOL utf;
276 #endif
277 
278 if (re == NULL) return PCRE2_ERROR_NULL;
279 
280 #ifdef SUPPORT_UNICODE
281 utf = (re->overall_options & PCRE2_UTF) != 0;
282 #endif
283 
284 /* Check that the first field in the block is the magic number. If it is not,
285 return with PCRE2_ERROR_BADMAGIC. */
286 
287 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
288 
289 /* Check that this pattern was compiled in the correct bit mode */
290 
291 if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
292 
293 cb.version = 0;
294 cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code))
295      + re->name_count * re->name_entry_size;
296 
297 while (TRUE)
298   {
299   int rc;
300   switch (*cc)
301     {
302     case OP_END:
303     return 0;
304 
305     case OP_CHAR:
306     case OP_CHARI:
307     case OP_NOT:
308     case OP_NOTI:
309     case OP_STAR:
310     case OP_MINSTAR:
311     case OP_PLUS:
312     case OP_MINPLUS:
313     case OP_QUERY:
314     case OP_MINQUERY:
315     case OP_UPTO:
316     case OP_MINUPTO:
317     case OP_EXACT:
318     case OP_POSSTAR:
319     case OP_POSPLUS:
320     case OP_POSQUERY:
321     case OP_POSUPTO:
322     case OP_STARI:
323     case OP_MINSTARI:
324     case OP_PLUSI:
325     case OP_MINPLUSI:
326     case OP_QUERYI:
327     case OP_MINQUERYI:
328     case OP_UPTOI:
329     case OP_MINUPTOI:
330     case OP_EXACTI:
331     case OP_POSSTARI:
332     case OP_POSPLUSI:
333     case OP_POSQUERYI:
334     case OP_POSUPTOI:
335     case OP_NOTSTAR:
336     case OP_NOTMINSTAR:
337     case OP_NOTPLUS:
338     case OP_NOTMINPLUS:
339     case OP_NOTQUERY:
340     case OP_NOTMINQUERY:
341     case OP_NOTUPTO:
342     case OP_NOTMINUPTO:
343     case OP_NOTEXACT:
344     case OP_NOTPOSSTAR:
345     case OP_NOTPOSPLUS:
346     case OP_NOTPOSQUERY:
347     case OP_NOTPOSUPTO:
348     case OP_NOTSTARI:
349     case OP_NOTMINSTARI:
350     case OP_NOTPLUSI:
351     case OP_NOTMINPLUSI:
352     case OP_NOTQUERYI:
353     case OP_NOTMINQUERYI:
354     case OP_NOTUPTOI:
355     case OP_NOTMINUPTOI:
356     case OP_NOTEXACTI:
357     case OP_NOTPOSSTARI:
358     case OP_NOTPOSPLUSI:
359     case OP_NOTPOSQUERYI:
360     case OP_NOTPOSUPTOI:
361     cc += PRIV(OP_lengths)[*cc];
362 #ifdef SUPPORT_UNICODE
363     if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
364 #endif
365     break;
366 
367     case OP_TYPESTAR:
368     case OP_TYPEMINSTAR:
369     case OP_TYPEPLUS:
370     case OP_TYPEMINPLUS:
371     case OP_TYPEQUERY:
372     case OP_TYPEMINQUERY:
373     case OP_TYPEUPTO:
374     case OP_TYPEMINUPTO:
375     case OP_TYPEEXACT:
376     case OP_TYPEPOSSTAR:
377     case OP_TYPEPOSPLUS:
378     case OP_TYPEPOSQUERY:
379     case OP_TYPEPOSUPTO:
380     cc += PRIV(OP_lengths)[*cc];
381 #ifdef SUPPORT_UNICODE
382     if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2;
383 #endif
384     break;
385 
386 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
387     case OP_XCLASS:
388     cc += GET(cc, 1);
389     break;
390 #endif
391 
392     case OP_MARK:
393     case OP_COMMIT_ARG:
394     case OP_PRUNE_ARG:
395     case OP_SKIP_ARG:
396     case OP_THEN_ARG:
397     cc += PRIV(OP_lengths)[*cc] + cc[1];
398     break;
399 
400     case OP_CALLOUT:
401     cb.pattern_position = GET(cc, 1);
402     cb.next_item_length = GET(cc, 1 + LINK_SIZE);
403     cb.callout_number = cc[1 + 2*LINK_SIZE];
404     cb.callout_string_offset = 0;
405     cb.callout_string_length = 0;
406     cb.callout_string = NULL;
407     rc = callback(&cb, callout_data);
408     if (rc != 0) return rc;
409     cc += PRIV(OP_lengths)[*cc];
410     break;
411 
412     case OP_CALLOUT_STR:
413     cb.pattern_position = GET(cc, 1);
414     cb.next_item_length = GET(cc, 1 + LINK_SIZE);
415     cb.callout_number = 0;
416     cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE);
417     cb.callout_string_length =
418       GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2;
419     cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1;
420     rc = callback(&cb, callout_data);
421     if (rc != 0) return rc;
422     cc += GET(cc, 1 + 2*LINK_SIZE);
423     break;
424 
425     default:
426     cc += PRIV(OP_lengths)[*cc];
427     break;
428     }
429   }
430 }
431 
432 /* End of pcre2_pattern_info.c */
433