1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10           New API code Copyright (c) 2016-2018 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 
42 /* This module contains a single function that scans through a compiled pattern
43 until it finds a capturing bracket with the given number, or, if the number is
44 negative, an instance of OP_REVERSE for a lookbehind. The function is called
45 from pcre2_compile.c and also from pcre2_study.c when finding the minimum
46 matching length. */
47 
48 
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52 
53 #include "pcre2_internal.h"
54 
55 
56 /*************************************************
57 *    Scan compiled regex for specific bracket    *
58 *************************************************/
59 
60 /*
61 Arguments:
62   code        points to start of expression
63   utf         TRUE in UTF mode
64   number      the required bracket number or negative to find a lookbehind
65 
66 Returns:      pointer to the opcode for the bracket, or NULL if not found
67 */
68 
69 PCRE2_SPTR
PRIV(find_bracket)70 PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
71 {
72 for (;;)
73   {
74   PCRE2_UCHAR c = *code;
75 
76   if (c == OP_END) return NULL;
77 
78   /* XCLASS is used for classes that cannot be represented just by a bit map.
79   This includes negated single high-valued characters. CALLOUT_STR is used for
80   callouts with string arguments. In both cases the length in the table is
81   zero; the actual length is stored in the compiled code. */
82 
83   if (c == OP_XCLASS) code += GET(code, 1);
84     else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
85 
86   /* Handle lookbehind */
87 
88   else if (c == OP_REVERSE)
89     {
90     if (number < 0) return (PCRE2_UCHAR *)code;
91     code += PRIV(OP_lengths)[c];
92     }
93 
94   /* Handle capturing bracket */
95 
96   else if (c == OP_CBRA || c == OP_SCBRA ||
97            c == OP_CBRAPOS || c == OP_SCBRAPOS)
98     {
99     int n = (int)GET2(code, 1+LINK_SIZE);
100     if (n == number) return (PCRE2_UCHAR *)code;
101     code += PRIV(OP_lengths)[c];
102     }
103 
104   /* Otherwise, we can get the item's length from the table, except that for
105   repeated character types, we have to test for \p and \P, which have an extra
106   two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
107   must add in its length. */
108 
109   else
110     {
111     switch(c)
112       {
113       case OP_TYPESTAR:
114       case OP_TYPEMINSTAR:
115       case OP_TYPEPLUS:
116       case OP_TYPEMINPLUS:
117       case OP_TYPEQUERY:
118       case OP_TYPEMINQUERY:
119       case OP_TYPEPOSSTAR:
120       case OP_TYPEPOSPLUS:
121       case OP_TYPEPOSQUERY:
122       if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
123       break;
124 
125       case OP_TYPEUPTO:
126       case OP_TYPEMINUPTO:
127       case OP_TYPEEXACT:
128       case OP_TYPEPOSUPTO:
129       if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
130         code += 2;
131       break;
132 
133       case OP_MARK:
134       case OP_COMMIT_ARG:
135       case OP_PRUNE_ARG:
136       case OP_SKIP_ARG:
137       case OP_THEN_ARG:
138       code += code[1];
139       break;
140       }
141 
142     /* Add in the fixed length from the table */
143 
144     code += PRIV(OP_lengths)[c];
145 
146   /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
147   followed by a multi-byte character. The length in the table is a minimum, so
148   we have to arrange to skip the extra bytes. */
149 
150 #ifdef MAYBE_UTF_MULTI
151     if (utf) switch(c)
152       {
153       case OP_CHAR:
154       case OP_CHARI:
155       case OP_NOT:
156       case OP_NOTI:
157       case OP_EXACT:
158       case OP_EXACTI:
159       case OP_NOTEXACT:
160       case OP_NOTEXACTI:
161       case OP_UPTO:
162       case OP_UPTOI:
163       case OP_NOTUPTO:
164       case OP_NOTUPTOI:
165       case OP_MINUPTO:
166       case OP_MINUPTOI:
167       case OP_NOTMINUPTO:
168       case OP_NOTMINUPTOI:
169       case OP_POSUPTO:
170       case OP_POSUPTOI:
171       case OP_NOTPOSUPTO:
172       case OP_NOTPOSUPTOI:
173       case OP_STAR:
174       case OP_STARI:
175       case OP_NOTSTAR:
176       case OP_NOTSTARI:
177       case OP_MINSTAR:
178       case OP_MINSTARI:
179       case OP_NOTMINSTAR:
180       case OP_NOTMINSTARI:
181       case OP_POSSTAR:
182       case OP_POSSTARI:
183       case OP_NOTPOSSTAR:
184       case OP_NOTPOSSTARI:
185       case OP_PLUS:
186       case OP_PLUSI:
187       case OP_NOTPLUS:
188       case OP_NOTPLUSI:
189       case OP_MINPLUS:
190       case OP_MINPLUSI:
191       case OP_NOTMINPLUS:
192       case OP_NOTMINPLUSI:
193       case OP_POSPLUS:
194       case OP_POSPLUSI:
195       case OP_NOTPOSPLUS:
196       case OP_NOTPOSPLUSI:
197       case OP_QUERY:
198       case OP_QUERYI:
199       case OP_NOTQUERY:
200       case OP_NOTQUERYI:
201       case OP_MINQUERY:
202       case OP_MINQUERYI:
203       case OP_NOTMINQUERY:
204       case OP_NOTMINQUERYI:
205       case OP_POSQUERY:
206       case OP_POSQUERYI:
207       case OP_NOTPOSQUERY:
208       case OP_NOTPOSQUERYI:
209       if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
210       break;
211       }
212 #else
213     (void)(utf);  /* Keep compiler happy by referencing function argument */
214 #endif  /* MAYBE_UTF_MULTI */
215     }
216   }
217 }
218 
219 /* End of pcre2_find_bracket.c */
220