1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 This module by Zoltan Herczeg and Sebastian Pop
10 Original API code Copyright (c) 1997-2012 University of Cambridge
11 New API code Copyright (c) 2016-2019 University of Cambridge
12
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
23
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
27
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41
42 # if defined(FFCS)
43 # if defined(FF_UTF)
44 # define FF_FUN ffcs_utf
45 # else
46 # define FF_FUN ffcs
47 # endif
48
49 # elif defined(FFCS_2)
50 # if defined(FF_UTF)
51 # define FF_FUN ffcs_2_utf
52 # else
53 # define FF_FUN ffcs_2
54 # endif
55
56 # elif defined(FFCS_MASK)
57 # if defined(FF_UTF)
58 # define FF_FUN ffcs_mask_utf
59 # else
60 # define FF_FUN ffcs_mask
61 # endif
62
63 # elif defined(FFCPS_0)
64 # if defined (FF_UTF)
65 # define FF_FUN ffcps_0_utf
66 # else
67 # define FF_FUN ffcps_0
68 # endif
69
70 # elif defined (FFCPS_1)
71 # if defined (FF_UTF)
72 # define FF_FUN ffcps_1_utf
73 # else
74 # define FF_FUN ffcps_1
75 # endif
76
77 # elif defined (FFCPS_DEFAULT)
78 # if defined (FF_UTF)
79 # define FF_FUN ffcps_default_utf
80 # else
81 # define FF_FUN ffcps_default
82 # endif
83 # endif
84
85 #if (defined(__GNUC__) && __SANITIZE_ADDRESS__) \
86 || (defined(__clang__) \
87 && ((__clang_major__ == 3 && __clang_minor__ >= 3) || (__clang_major__ > 3)))
88 __attribute__((no_sanitize_address))
89 #endif
FF_FUN(sljit_u8 * str_end,sljit_u8 ** str_ptr,sljit_uw offs1,sljit_uw offs2,sljit_uw chars)90 static sljit_u8* SLJIT_FUNC FF_FUN(sljit_u8 *str_end, sljit_u8 **str_ptr, sljit_uw offs1, sljit_uw offs2, sljit_uw chars)
91 #undef FF_FUN
92 {
93 quad_word qw;
94 int_char ic;
95
96 SLJIT_UNUSED_ARG(offs1);
97 SLJIT_UNUSED_ARG(offs2);
98
99 ic.x = chars;
100
101 #if defined(FFCS)
102 sljit_u8 c1 = ic.c.c1;
103 vect_t vc1 = VDUPQ(c1);
104
105 #elif defined(FFCS_2)
106 sljit_u8 c1 = ic.c.c1;
107 vect_t vc1 = VDUPQ(c1);
108 sljit_u8 c2 = ic.c.c2;
109 vect_t vc2 = VDUPQ(c2);
110
111 #elif defined(FFCS_MASK)
112 sljit_u8 c1 = ic.c.c1;
113 vect_t vc1 = VDUPQ(c1);
114 sljit_u8 mask = ic.c.c2;
115 vect_t vmask = VDUPQ(mask);
116 #endif
117
118 #if defined(FFCPS)
119 compare_type compare1_type = compare_match1;
120 compare_type compare2_type = compare_match1;
121 vect_t cmp1a, cmp1b, cmp2a, cmp2b;
122 const sljit_u32 diff = IN_UCHARS(offs1 - offs2);
123 PCRE2_UCHAR char1a = ic.c.c1;
124 PCRE2_UCHAR char2a = ic.c.c3;
125
126 # ifdef FFCPS_CHAR1A2A
127 cmp1a = VDUPQ(char1a);
128 cmp2a = VDUPQ(char2a);
129 cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
130 cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
131 # else
132 PCRE2_UCHAR char1b = ic.c.c2;
133 PCRE2_UCHAR char2b = ic.c.c4;
134 if (char1a == char1b)
135 {
136 cmp1a = VDUPQ(char1a);
137 cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
138 }
139 else
140 {
141 sljit_u32 bit1 = char1a ^ char1b;
142 if (is_powerof2(bit1))
143 {
144 compare1_type = compare_match1i;
145 cmp1a = VDUPQ(char1a | bit1);
146 cmp1b = VDUPQ(bit1);
147 }
148 else
149 {
150 compare1_type = compare_match2;
151 cmp1a = VDUPQ(char1a);
152 cmp1b = VDUPQ(char1b);
153 }
154 }
155
156 if (char2a == char2b)
157 {
158 cmp2a = VDUPQ(char2a);
159 cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
160 }
161 else
162 {
163 sljit_u32 bit2 = char2a ^ char2b;
164 if (is_powerof2(bit2))
165 {
166 compare2_type = compare_match1i;
167 cmp2a = VDUPQ(char2a | bit2);
168 cmp2b = VDUPQ(bit2);
169 }
170 else
171 {
172 compare2_type = compare_match2;
173 cmp2a = VDUPQ(char2a);
174 cmp2b = VDUPQ(char2b);
175 }
176 }
177 # endif
178
179 *str_ptr += IN_UCHARS(offs1);
180 #endif
181
182 #if PCRE2_CODE_UNIT_WIDTH != 8
183 vect_t char_mask = VDUPQ(0xff);
184 #endif
185
186 #if defined(FF_UTF)
187 restart:;
188 #endif
189
190 #if defined(FFCPS)
191 if (*str_ptr >= str_end)
192 return NULL;
193 sljit_u8 *p1 = *str_ptr - diff;
194 #endif
195 sljit_s32 align_offset = ((uint64_t)*str_ptr & 0xf);
196 *str_ptr = (sljit_u8 *) ((uint64_t)*str_ptr & ~0xf);
197 vect_t data = VLD1Q(*str_ptr);
198 #if PCRE2_CODE_UNIT_WIDTH != 8
199 data = VANDQ(data, char_mask);
200 #endif
201
202 #if defined(FFCS)
203 vect_t eq = VCEQQ(data, vc1);
204
205 #elif defined(FFCS_2)
206 vect_t eq1 = VCEQQ(data, vc1);
207 vect_t eq2 = VCEQQ(data, vc2);
208 vect_t eq = VORRQ(eq1, eq2);
209
210 #elif defined(FFCS_MASK)
211 vect_t eq = VORRQ(data, vmask);
212 eq = VCEQQ(eq, vc1);
213
214 #elif defined(FFCPS)
215 # if defined(FFCPS_DIFF1)
216 vect_t prev_data = data;
217 # endif
218
219 vect_t data2;
220 if (p1 < *str_ptr)
221 {
222 data2 = VLD1Q(*str_ptr - diff);
223 #if PCRE2_CODE_UNIT_WIDTH != 8
224 data2 = VANDQ(data2, char_mask);
225 #endif
226 }
227 else
228 data2 = shift_left_n_lanes(data, offs1 - offs2);
229
230 if (compare1_type == compare_match1)
231 data = VCEQQ(data, cmp1a);
232 else
233 data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
234
235 if (compare2_type == compare_match1)
236 data2 = VCEQQ(data2, cmp2a);
237 else
238 data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
239
240 vect_t eq = VANDQ(data, data2);
241 #endif
242
243 VST1Q(qw.mem, eq);
244 /* Ignore matches before the first STR_PTR. */
245 if (align_offset < 8)
246 {
247 qw.dw[0] >>= align_offset * 8;
248 if (qw.dw[0])
249 {
250 *str_ptr += align_offset + __builtin_ctzll(qw.dw[0]) / 8;
251 goto match;
252 }
253 if (qw.dw[1])
254 {
255 *str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8;
256 goto match;
257 }
258 }
259 else
260 {
261 qw.dw[1] >>= (align_offset - 8) * 8;
262 if (qw.dw[1])
263 {
264 *str_ptr += align_offset + __builtin_ctzll(qw.dw[1]) / 8;
265 goto match;
266 }
267 }
268 *str_ptr += 16;
269
270 while (*str_ptr < str_end)
271 {
272 vect_t orig_data = VLD1Q(*str_ptr);
273 #if PCRE2_CODE_UNIT_WIDTH != 8
274 orig_data = VANDQ(orig_data, char_mask);
275 #endif
276 data = orig_data;
277
278 #if defined(FFCS)
279 eq = VCEQQ(data, vc1);
280
281 #elif defined(FFCS_2)
282 eq1 = VCEQQ(data, vc1);
283 eq2 = VCEQQ(data, vc2);
284 eq = VORRQ(eq1, eq2);
285
286 #elif defined(FFCS_MASK)
287 eq = VORRQ(data, vmask);
288 eq = VCEQQ(eq, vc1);
289 #endif
290
291 #if defined(FFCPS)
292 # if defined (FFCPS_DIFF1)
293 data2 = VEXTQ(prev_data, data, VECTOR_FACTOR - 1);
294 # else
295 data2 = VLD1Q(*str_ptr - diff);
296 # if PCRE2_CODE_UNIT_WIDTH != 8
297 data2 = VANDQ(data2, char_mask);
298 # endif
299 # endif
300
301 # ifdef FFCPS_CHAR1A2A
302 data = VCEQQ(data, cmp1a);
303 data2 = VCEQQ(data2, cmp2a);
304 # else
305 if (compare1_type == compare_match1)
306 data = VCEQQ(data, cmp1a);
307 else
308 data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
309 if (compare2_type == compare_match1)
310 data2 = VCEQQ(data2, cmp2a);
311 else
312 data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
313 # endif
314
315 eq = VANDQ(data, data2);
316 #endif
317
318 VST1Q(qw.mem, eq);
319 if (qw.dw[0])
320 *str_ptr += __builtin_ctzll(qw.dw[0]) / 8;
321 else if (qw.dw[1])
322 *str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8;
323 else {
324 *str_ptr += 16;
325 #if defined (FFCPS_DIFF1)
326 prev_data = orig_data;
327 #endif
328 continue;
329 }
330
331 match:;
332 if (*str_ptr >= str_end)
333 /* Failed match. */
334 return NULL;
335
336 #if defined(FF_UTF)
337 if (utf_continue((PCRE2_SPTR)*str_ptr - offs1))
338 {
339 /* Not a match. */
340 *str_ptr += IN_UCHARS(1);
341 goto restart;
342 }
343 #endif
344
345 /* Match. */
346 #if defined (FFCPS)
347 *str_ptr -= IN_UCHARS(offs1);
348 #endif
349 return *str_ptr;
350 }
351
352 /* Failed match. */
353 return NULL;
354 }
355