1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2018 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Andrei Zmievski <andrei@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 /* $Id$ */
20
21 #include "php.h"
22 #include "php_ini.h"
23 #include "php_globals.h"
24 #include "php_pcre.h"
25 #include "ext/standard/info.h"
26 #include "ext/standard/basic_functions.h"
27 #include "zend_smart_str.h"
28
29 #if HAVE_PCRE || HAVE_BUNDLED_PCRE
30
31 #include "ext/standard/php_string.h"
32
33 #define PREG_PATTERN_ORDER 1
34 #define PREG_SET_ORDER 2
35 #define PREG_OFFSET_CAPTURE (1<<8)
36 #define PREG_UNMATCHED_AS_NULL (1<<9)
37
38 #define PREG_SPLIT_NO_EMPTY (1<<0)
39 #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
40 #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
41
42 #define PREG_REPLACE_EVAL (1<<0)
43
44 #define PREG_GREP_INVERT (1<<0)
45
46 #define PCRE_CACHE_SIZE 4096
47
48 /* not fully functional workaround for libpcre < 8.0, see bug #70232 */
49 #ifndef PCRE_NOTEMPTY_ATSTART
50 # define PCRE_NOTEMPTY_ATSTART PCRE_NOTEMPTY
51 #endif
52
53 enum {
54 PHP_PCRE_NO_ERROR = 0,
55 PHP_PCRE_INTERNAL_ERROR,
56 PHP_PCRE_BACKTRACK_LIMIT_ERROR,
57 PHP_PCRE_RECURSION_LIMIT_ERROR,
58 PHP_PCRE_BAD_UTF8_ERROR,
59 PHP_PCRE_BAD_UTF8_OFFSET_ERROR,
60 PHP_PCRE_JIT_STACKLIMIT_ERROR
61 };
62
63
64 PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
65
66 #ifdef HAVE_PCRE_JIT_SUPPORT
67 #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
68 #define PCRE_JIT_STACK_MAX_SIZE (64 * 1024)
69 ZEND_TLS pcre_jit_stack *jit_stack = NULL;
70 #endif
71 #if defined(ZTS)
72 static MUTEX_T pcre_mt = NULL;
73 #define php_pcre_mutex_alloc() if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
74 #define php_pcre_mutex_free() if (tsrm_is_main_thread() && pcre_mt) tsrm_mutex_free(pcre_mt); pcre_mt = NULL;
75 #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
76 #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
77 #else
78 #define php_pcre_mutex_alloc()
79 #define php_pcre_mutex_free()
80 #define php_pcre_mutex_lock()
81 #define php_pcre_mutex_unlock()
82 #endif
83
pcre_handle_exec_error(int pcre_code)84 static void pcre_handle_exec_error(int pcre_code) /* {{{ */
85 {
86 int preg_code = 0;
87
88 switch (pcre_code) {
89 case PCRE_ERROR_MATCHLIMIT:
90 preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
91 break;
92
93 case PCRE_ERROR_RECURSIONLIMIT:
94 preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
95 break;
96
97 case PCRE_ERROR_BADUTF8:
98 preg_code = PHP_PCRE_BAD_UTF8_ERROR;
99 break;
100
101 case PCRE_ERROR_BADUTF8_OFFSET:
102 preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
103 break;
104
105 #ifdef HAVE_PCRE_JIT_SUPPORT
106 case PCRE_ERROR_JIT_STACKLIMIT:
107 preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
108 break;
109 #endif
110
111 default:
112 preg_code = PHP_PCRE_INTERNAL_ERROR;
113 break;
114 }
115
116 PCRE_G(error_code) = preg_code;
117 }
118 /* }}} */
119
php_free_pcre_cache(zval * data)120 static void php_free_pcre_cache(zval *data) /* {{{ */
121 {
122 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
123 if (!pce) return;
124 pcre_free(pce->re);
125 if (pce->extra) {
126 pcre_free_study(pce->extra);
127 }
128 #if HAVE_SETLOCALE
129 if ((void*)pce->tables) pefree((void*)pce->tables, 1);
130 #endif
131 pefree(pce, 1);
132 }
133 /* }}} */
134
PHP_GINIT_FUNCTION(pcre)135 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
136 {
137 zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
138 pcre_globals->backtrack_limit = 0;
139 pcre_globals->recursion_limit = 0;
140 pcre_globals->error_code = PHP_PCRE_NO_ERROR;
141 }
142 /* }}} */
143
PHP_GSHUTDOWN_FUNCTION(pcre)144 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
145 {
146 zend_hash_destroy(&pcre_globals->pcre_cache);
147
148 #ifdef HAVE_PCRE_JIT_SUPPORT
149 /* Stack may only be destroyed when no cached patterns
150 possibly associated with it do exist. */
151 if (jit_stack) {
152 pcre_jit_stack_free(jit_stack);
153 jit_stack = NULL;
154 }
155 #endif
156
157 }
158 /* }}} */
159
160 PHP_INI_BEGIN()
161 STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
162 STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
163 #ifdef HAVE_PCRE_JIT_SUPPORT
164 STD_PHP_INI_ENTRY("pcre.jit", "1", PHP_INI_ALL, OnUpdateBool, jit, zend_pcre_globals, pcre_globals)
165 #endif
PHP_INI_END()166 PHP_INI_END()
167
168
169 /* {{{ PHP_MINFO_FUNCTION(pcre) */
170 static PHP_MINFO_FUNCTION(pcre)
171 {
172 #ifdef HAVE_PCRE_JIT_SUPPORT
173 int jit_yes = 0;
174 #endif
175
176 php_info_print_table_start();
177 php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
178 php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
179
180 #ifdef HAVE_PCRE_JIT_SUPPORT
181 if (!pcre_config(PCRE_CONFIG_JIT, &jit_yes)) {
182 php_info_print_table_row(2, "PCRE JIT Support", jit_yes ? "enabled" : "disabled");
183 } else {
184 php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
185 }
186 #else
187 php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
188 #endif
189
190 #ifdef HAVE_PCRE_VALGRIND_SUPPORT
191 php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
192 #endif
193
194 php_info_print_table_end();
195
196 DISPLAY_INI_ENTRIES();
197 }
198 /* }}} */
199
200 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTION(pcre)201 static PHP_MINIT_FUNCTION(pcre)
202 {
203 REGISTER_INI_ENTRIES();
204
205 php_pcre_mutex_alloc();
206
207 REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
208 REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
209 REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
210 REGISTER_LONG_CONSTANT("PREG_UNMATCHED_AS_NULL", PREG_UNMATCHED_AS_NULL, CONST_CS | CONST_PERSISTENT);
211 REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
212 REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
213 REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
214 REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
215
216 REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
217 REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
218 REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
219 REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
220 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
221 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
222 REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
223 REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
224
225 return SUCCESS;
226 }
227 /* }}} */
228
229 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTION(pcre)230 static PHP_MSHUTDOWN_FUNCTION(pcre)
231 {
232 UNREGISTER_INI_ENTRIES();
233
234 php_pcre_mutex_free();
235
236 return SUCCESS;
237 }
238 /* }}} */
239
240 #ifdef HAVE_PCRE_JIT_SUPPORT
241 /* {{{ PHP_RINIT_FUNCTION(pcre) */
PHP_RINIT_FUNCTION(pcre)242 static PHP_RINIT_FUNCTION(pcre)
243 {
244 if (PCRE_G(jit) && jit_stack == NULL) {
245 php_pcre_mutex_lock();
246 jit_stack = pcre_jit_stack_alloc(PCRE_JIT_STACK_MIN_SIZE,PCRE_JIT_STACK_MAX_SIZE);
247 php_pcre_mutex_unlock();
248 }
249
250 return SUCCESS;
251 }
252 /* }}} */
253 #endif
254
255 /* {{{ static pcre_clean_cache */
pcre_clean_cache(zval * data,void * arg)256 static int pcre_clean_cache(zval *data, void *arg)
257 {
258 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
259 int *num_clean = (int *)arg;
260
261 if (*num_clean > 0 && !pce->refcount) {
262 (*num_clean)--;
263 return ZEND_HASH_APPLY_REMOVE;
264 } else {
265 return ZEND_HASH_APPLY_KEEP;
266 }
267 }
268 /* }}} */
269
270 /* {{{ static make_subpats_table */
make_subpats_table(int num_subpats,pcre_cache_entry * pce)271 static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
272 {
273 pcre_extra *extra = pce->extra;
274 int name_cnt = pce->name_count, name_size, ni = 0;
275 int rc;
276 char *name_table;
277 unsigned short name_idx;
278 char **subpat_names;
279 int rc1, rc2;
280
281 rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
282 rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
283 rc = rc2 ? rc2 : rc1;
284 if (rc < 0) {
285 php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
286 return NULL;
287 }
288
289 subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
290 while (ni++ < name_cnt) {
291 name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
292 subpat_names[name_idx] = name_table + 2;
293 if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
294 php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
295 efree(subpat_names);
296 return NULL;
297 }
298 name_table += name_size;
299 }
300 return subpat_names;
301 }
302 /* }}} */
303
304 /* {{{ static calculate_unit_length */
305 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */
calculate_unit_length(pcre_cache_entry * pce,char * start)306 static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start)
307 {
308 int unit_len;
309
310 if (pce->compile_options & PCRE_UTF8) {
311 char *end = start;
312
313 /* skip continuation bytes */
314 while ((*++end & 0xC0) == 0x80);
315 unit_len = end - start;
316 } else {
317 unit_len = 1;
318 }
319 return unit_len;
320 }
321 /* }}} */
322
323 /* {{{ pcre_get_compiled_regex_cache
324 */
pcre_get_compiled_regex_cache_ex(zend_string * regex,int locale_aware)325 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware)
326 {
327 pcre *re = NULL;
328 pcre_extra *extra;
329 int coptions = 0;
330 int soptions = 0;
331 const char *error;
332 int erroffset;
333 char delimiter;
334 char start_delimiter;
335 char end_delimiter;
336 char *p, *pp;
337 char *pattern;
338 int do_study = 0;
339 int poptions = 0;
340 unsigned const char *tables = NULL;
341 pcre_cache_entry *pce;
342 pcre_cache_entry new_entry;
343 int rc;
344 zend_string *key;
345
346 #if HAVE_SETLOCALE
347 if (locale_aware && BG(locale_string) &&
348 (ZSTR_LEN(BG(locale_string)) != 1 && ZSTR_VAL(BG(locale_string))[0] != 'C')) {
349 key = zend_string_alloc(ZSTR_LEN(regex) + ZSTR_LEN(BG(locale_string)) + 1, 0);
350 memcpy(ZSTR_VAL(key), ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)) + 1);
351 memcpy(ZSTR_VAL(key) + ZSTR_LEN(BG(locale_string)), ZSTR_VAL(regex), ZSTR_LEN(regex) + 1);
352 } else
353 #endif
354 {
355 key = regex;
356 }
357
358 /* Try to lookup the cached regex entry, and if successful, just pass
359 back the compiled pattern, otherwise go on and compile it. */
360 pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), key);
361 if (pce) {
362 #if HAVE_SETLOCALE
363 if (key != regex) {
364 zend_string_release(key);
365 }
366 #endif
367 return pce;
368 }
369
370 p = ZSTR_VAL(regex);
371
372 /* Parse through the leading whitespace, and display a warning if we
373 get to the end without encountering a delimiter. */
374 while (isspace((int)*(unsigned char *)p)) p++;
375 if (*p == 0) {
376 #if HAVE_SETLOCALE
377 if (key != regex) {
378 zend_string_release(key);
379 }
380 #endif
381 php_error_docref(NULL, E_WARNING,
382 p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
383 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
384 return NULL;
385 }
386
387 /* Get the delimiter and display a warning if it is alphanumeric
388 or a backslash. */
389 delimiter = *p++;
390 if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
391 #if HAVE_SETLOCALE
392 if (key != regex) {
393 zend_string_release(key);
394 }
395 #endif
396 php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
397 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
398 return NULL;
399 }
400
401 start_delimiter = delimiter;
402 if ((pp = strchr("([{< )]}> )]}>", delimiter)))
403 delimiter = pp[5];
404 end_delimiter = delimiter;
405
406 pp = p;
407
408 if (start_delimiter == end_delimiter) {
409 /* We need to iterate through the pattern, searching for the ending delimiter,
410 but skipping the backslashed delimiters. If the ending delimiter is not
411 found, display a warning. */
412 while (*pp != 0) {
413 if (*pp == '\\' && pp[1] != 0) pp++;
414 else if (*pp == delimiter)
415 break;
416 pp++;
417 }
418 } else {
419 /* We iterate through the pattern, searching for the matching ending
420 * delimiter. For each matching starting delimiter, we increment nesting
421 * level, and decrement it for each matching ending delimiter. If we
422 * reach the end of the pattern without matching, display a warning.
423 */
424 int brackets = 1; /* brackets nesting level */
425 while (*pp != 0) {
426 if (*pp == '\\' && pp[1] != 0) pp++;
427 else if (*pp == end_delimiter && --brackets <= 0)
428 break;
429 else if (*pp == start_delimiter)
430 brackets++;
431 pp++;
432 }
433 }
434
435 if (*pp == 0) {
436 #if HAVE_SETLOCALE
437 if (key != regex) {
438 zend_string_release(key);
439 }
440 #endif
441 if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
442 php_error_docref(NULL,E_WARNING, "Null byte in regex");
443 } else if (start_delimiter == end_delimiter) {
444 php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
445 } else {
446 php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
447 }
448 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
449 return NULL;
450 }
451
452 /* Make a copy of the actual pattern. */
453 pattern = estrndup(p, pp-p);
454
455 /* Move on to the options */
456 pp++;
457
458 /* Parse through the options, setting appropriate flags. Display
459 a warning if we encounter an unknown modifier. */
460 while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
461 switch (*pp++) {
462 /* Perl compatible options */
463 case 'i': coptions |= PCRE_CASELESS; break;
464 case 'm': coptions |= PCRE_MULTILINE; break;
465 case 's': coptions |= PCRE_DOTALL; break;
466 case 'x': coptions |= PCRE_EXTENDED; break;
467
468 /* PCRE specific options */
469 case 'A': coptions |= PCRE_ANCHORED; break;
470 case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break;
471 case 'S': do_study = 1; break;
472 case 'U': coptions |= PCRE_UNGREEDY; break;
473 case 'X': coptions |= PCRE_EXTRA; break;
474 case 'u': coptions |= PCRE_UTF8;
475 /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
476 characters, even in UTF-8 mode. However, this can be changed by setting
477 the PCRE_UCP option. */
478 #ifdef PCRE_UCP
479 coptions |= PCRE_UCP;
480 #endif
481 break;
482 case 'J': coptions |= PCRE_DUPNAMES; break;
483
484 /* Custom preg options */
485 case 'e': poptions |= PREG_REPLACE_EVAL; break;
486
487 case ' ':
488 case '\n':
489 case '\r':
490 break;
491
492 default:
493 if (pp[-1]) {
494 php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
495 } else {
496 php_error_docref(NULL,E_WARNING, "Null byte in regex");
497 }
498 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
499 efree(pattern);
500 #if HAVE_SETLOCALE
501 if (key != regex) {
502 zend_string_release(key);
503 }
504 #endif
505 return NULL;
506 }
507 }
508
509 #if HAVE_SETLOCALE
510 if (key != regex) {
511 tables = pcre_maketables();
512 }
513 #endif
514
515 /* Compile pattern and display a warning if compilation failed. */
516 re = pcre_compile(pattern,
517 coptions,
518 &error,
519 &erroffset,
520 tables);
521
522 if (re == NULL) {
523 #if HAVE_SETLOCALE
524 if (key != regex) {
525 zend_string_release(key);
526 }
527 #endif
528 php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
529 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
530 efree(pattern);
531 if (tables) {
532 pefree((void*)tables, 1);
533 }
534 return NULL;
535 }
536
537 #ifdef HAVE_PCRE_JIT_SUPPORT
538 if (PCRE_G(jit)) {
539 /* Enable PCRE JIT compiler */
540 do_study = 1;
541 soptions |= PCRE_STUDY_JIT_COMPILE;
542 }
543 #endif
544
545 /* If study option was specified, study the pattern and
546 store the result in extra for passing to pcre_exec. */
547 if (do_study) {
548 php_pcre_mutex_lock();
549 extra = pcre_study(re, soptions, &error);
550 php_pcre_mutex_unlock();
551 if (extra) {
552 extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
553 extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
554 extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
555 #ifdef HAVE_PCRE_JIT_SUPPORT
556 if (PCRE_G(jit) && jit_stack) {
557 pcre_assign_jit_stack(extra, NULL, jit_stack);
558 }
559 #endif
560 }
561 if (error != NULL) {
562 php_error_docref(NULL, E_WARNING, "Error while studying pattern");
563 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
564 }
565 } else {
566 extra = NULL;
567 }
568
569 efree(pattern);
570
571 /*
572 * If we reached cache limit, clean out the items from the head of the list;
573 * these are supposedly the oldest ones (but not necessarily the least used
574 * ones).
575 */
576 if (!pce && zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
577 int num_clean = PCRE_CACHE_SIZE / 8;
578 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
579 }
580
581 /* Store the compiled pattern and extra info in the cache. */
582 new_entry.re = re;
583 new_entry.extra = extra;
584 new_entry.preg_options = poptions;
585 new_entry.compile_options = coptions;
586 #if HAVE_SETLOCALE
587 new_entry.tables = tables;
588 #endif
589 new_entry.refcount = 0;
590
591 rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
592 if (rc < 0) {
593 #if HAVE_SETLOCALE
594 if (key != regex) {
595 zend_string_release(key);
596 }
597 #endif
598 php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
599 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
600 return NULL;
601 }
602
603 rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
604 if (rc < 0) {
605 #if HAVE_SETLOCALE
606 if (key != regex) {
607 zend_string_release(key);
608 }
609 #endif
610 php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
611 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
612 return NULL;
613 }
614
615 /*
616 * Interned strings are not duplicated when stored in HashTable,
617 * but all the interned strings created during HTTP request are removed
618 * at end of request. However PCRE_G(pcre_cache) must be consistent
619 * on the next request as well. So we disable usage of interned strings
620 * as hash keys especually for this table.
621 * See bug #63180
622 */
623 if (!ZSTR_IS_INTERNED(key) || !(GC_FLAGS(key) & IS_STR_PERMANENT)) {
624 pce = zend_hash_str_update_mem(&PCRE_G(pcre_cache),
625 ZSTR_VAL(key), ZSTR_LEN(key), &new_entry, sizeof(pcre_cache_entry));
626 #if HAVE_SETLOCALE
627 if (key != regex) {
628 zend_string_release(key);
629 }
630 #endif
631 } else {
632 pce = zend_hash_update_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
633 }
634
635 return pce;
636 }
637 /* }}} */
638
639 /* {{{ pcre_get_compiled_regex_cache
640 */
pcre_get_compiled_regex_cache(zend_string * regex)641 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
642 {
643 return pcre_get_compiled_regex_cache_ex(regex, 1);
644 }
645 /* }}} */
646
647 /* {{{ pcre_get_compiled_regex
648 */
pcre_get_compiled_regex(zend_string * regex,pcre_extra ** extra,int * preg_options)649 PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options)
650 {
651 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
652
653 if (extra) {
654 *extra = pce ? pce->extra : NULL;
655 }
656 if (preg_options) {
657 *preg_options = pce ? pce->preg_options : 0;
658 }
659
660 return pce ? pce->re : NULL;
661 }
662 /* }}} */
663
664 /* {{{ pcre_get_compiled_regex_ex
665 */
pcre_get_compiled_regex_ex(zend_string * regex,pcre_extra ** extra,int * preg_options,int * compile_options)666 PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options)
667 {
668 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
669
670 if (extra) {
671 *extra = pce ? pce->extra : NULL;
672 }
673 if (preg_options) {
674 *preg_options = pce ? pce->preg_options : 0;
675 }
676 if (compile_options) {
677 *compile_options = pce ? pce->compile_options : 0;
678 }
679
680 return pce ? pce->re : NULL;
681 }
682 /* }}} */
683
684 /* {{{ add_offset_pair */
add_offset_pair(zval * result,char * str,int len,int offset,char * name,int unmatched_as_null)685 static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name, int unmatched_as_null)
686 {
687 zval match_pair, tmp;
688
689 array_init_size(&match_pair, 2);
690
691 /* Add (match, offset) to the return value */
692 if (offset < 0) {
693 if (unmatched_as_null) {
694 ZVAL_NULL(&tmp);
695 } else {
696 ZVAL_EMPTY_STRING(&tmp);
697 }
698 } else {
699 ZVAL_STRINGL(&tmp, str, len);
700 }
701 zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
702 ZVAL_LONG(&tmp, offset);
703 zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
704
705 if (name) {
706 Z_ADDREF(match_pair);
707 zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
708 }
709 zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
710 }
711 /* }}} */
712
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS,int global)713 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
714 {
715 /* parameters */
716 zend_string *regex; /* Regular expression */
717 zend_string *subject; /* String to match against */
718 pcre_cache_entry *pce; /* Compiled regular expression */
719 zval *subpats = NULL; /* Array for subpatterns */
720 zend_long flags = 0; /* Match control flags */
721 zend_long start_offset = 0; /* Where the new search starts */
722
723 ZEND_PARSE_PARAMETERS_START(2, 5)
724 Z_PARAM_STR(regex)
725 Z_PARAM_STR(subject)
726 Z_PARAM_OPTIONAL
727 Z_PARAM_ZVAL_DEREF(subpats)
728 Z_PARAM_LONG(flags)
729 Z_PARAM_LONG(start_offset)
730 ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
731
732 if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
733 php_error_docref(NULL, E_WARNING, "Subject is too long");
734 RETURN_FALSE;
735 }
736
737 /* Compile regex or get it from cache. */
738 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
739 RETURN_FALSE;
740 }
741
742 pce->refcount++;
743 php_pcre_match_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, subpats,
744 global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
745 pce->refcount--;
746 }
747 /* }}} */
748
749 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry * pce,char * subject,int subject_len,zval * return_value,zval * subpats,int global,int use_flags,zend_long flags,zend_long start_offset)750 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
751 zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset)
752 {
753 zval result_set, /* Holds a set of subpatterns after
754 a global match */
755 *match_sets = NULL; /* An array of sets of matches for each
756 subpattern after a global match */
757 pcre_extra *extra = pce->extra;/* Holds results of studying */
758 pcre_extra extra_data; /* Used locally for exec options */
759 int no_utf_check = 0; /* Execution options */
760 int count = 0; /* Count of matched subpatterns */
761 int *offsets; /* Array of subpattern offsets */
762 int num_subpats; /* Number of captured subpatterns */
763 int size_offsets; /* Size of the offsets array */
764 int matched; /* Has anything matched */
765 int g_notempty = 0; /* If the match should not be empty */
766 char **subpat_names; /* Array for named subpatterns */
767 int i;
768 int subpats_order; /* Order of subpattern matches */
769 int offset_capture; /* Capture match offsets: yes/no */
770 int unmatched_as_null; /* Null non-matches: yes/no */
771 unsigned char *mark = NULL; /* Target for MARK name */
772 zval marks; /* Array of marks for PREG_PATTERN_ORDER */
773
774 ALLOCA_FLAG(use_heap);
775
776 ZVAL_UNDEF(&marks);
777
778 /* Overwrite the passed-in value for subpatterns with an empty array. */
779 if (subpats != NULL) {
780 zval_ptr_dtor(subpats);
781 array_init(subpats);
782 }
783
784 subpats_order = global ? PREG_PATTERN_ORDER : 0;
785
786 if (use_flags) {
787 offset_capture = flags & PREG_OFFSET_CAPTURE;
788 unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
789
790 /*
791 * subpats_order is pre-set to pattern mode so we change it only if
792 * necessary.
793 */
794 if (flags & 0xff) {
795 subpats_order = flags & 0xff;
796 }
797 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
798 (!global && subpats_order != 0)) {
799 php_error_docref(NULL, E_WARNING, "Invalid flags specified");
800 return;
801 }
802 } else {
803 offset_capture = 0;
804 unmatched_as_null = 0;
805 }
806
807 /* Negative offset counts from the end of the string. */
808 if (start_offset < 0) {
809 start_offset = subject_len + start_offset;
810 if (start_offset < 0) {
811 start_offset = 0;
812 }
813 }
814
815 if (extra == NULL) {
816 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
817 extra = &extra_data;
818 }
819 extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
820 extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
821 #ifdef PCRE_EXTRA_MARK
822 extra->mark = &mark;
823 extra->flags |= PCRE_EXTRA_MARK;
824 #endif
825
826 /* Calculate the size of the offsets array, and allocate memory for it. */
827 num_subpats = pce->capture_count + 1;
828 size_offsets = num_subpats * 3;
829
830 /*
831 * Build a mapping from subpattern numbers to their names. We will
832 * allocate the table only if there are any named subpatterns.
833 */
834 subpat_names = NULL;
835 if (pce->name_count > 0) {
836 subpat_names = make_subpats_table(num_subpats, pce);
837 if (!subpat_names) {
838 RETURN_FALSE;
839 }
840 }
841
842 if (size_offsets <= 32) {
843 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
844 } else {
845 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
846 }
847 memset(offsets, 0, size_offsets*sizeof(int));
848 /* Allocate match sets array and initialize the values. */
849 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
850 match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
851 for (i=0; i<num_subpats; i++) {
852 array_init(&match_sets[i]);
853 }
854 }
855
856 matched = 0;
857 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
858
859 #ifdef HAVE_PCRE_JIT_SUPPORT
860 if (!(pce->compile_options & PCRE_UTF8)) {
861 no_utf_check = PCRE_NO_UTF8_CHECK;
862 }
863 #endif
864
865 do {
866 /* Execute the regular expression. */
867 #ifdef HAVE_PCRE_JIT_SUPPORT
868 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
869 && no_utf_check && !g_notempty) {
870 if (start_offset < 0 || start_offset > subject_len) {
871 pcre_handle_exec_error(PCRE_ERROR_BADOFFSET);
872 break;
873 }
874 count = pcre_jit_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
875 no_utf_check|g_notempty, offsets, size_offsets, jit_stack);
876 } else
877 #endif
878 count = pcre_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
879 no_utf_check|g_notempty, offsets, size_offsets);
880
881 /* the string was already proved to be valid UTF-8 */
882 no_utf_check = PCRE_NO_UTF8_CHECK;
883
884 /* Check for too many substrings condition. */
885 if (count == 0) {
886 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
887 count = size_offsets/3;
888 }
889
890 /* If something has matched */
891 if (count > 0) {
892 matched++;
893
894 /* If subpatterns array has been passed, fill it in with values. */
895 if (subpats != NULL) {
896 /* Try to get the list of substrings and display a warning if failed. */
897 if (offsets[1] - offsets[0] < 0) {
898 if (subpat_names) {
899 efree(subpat_names);
900 }
901 if (size_offsets <= 32) {
902 free_alloca(offsets, use_heap);
903 } else {
904 efree(offsets);
905 }
906 if (match_sets) efree(match_sets);
907 php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
908 RETURN_FALSE;
909 }
910
911 if (global) { /* global pattern matching */
912 if (subpats && subpats_order == PREG_PATTERN_ORDER) {
913 /* For each subpattern, insert it into the appropriate array. */
914 if (offset_capture) {
915 for (i = 0; i < count; i++) {
916 add_offset_pair(&match_sets[i], subject + offsets[i<<1],
917 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
918 }
919 } else {
920 for (i = 0; i < count; i++) {
921 if (offsets[i<<1] < 0) {
922 if (unmatched_as_null) {
923 add_next_index_null(&match_sets[i]);
924 } else {
925 add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
926 }
927 } else {
928 add_next_index_stringl(&match_sets[i], subject + offsets[i<<1],
929 offsets[(i<<1)+1] - offsets[i<<1]);
930 }
931 }
932 }
933 /* Add MARK, if available */
934 if (mark) {
935 if (Z_TYPE(marks) == IS_UNDEF) {
936 array_init(&marks);
937 }
938 add_index_string(&marks, matched - 1, (char *) mark);
939 }
940 /*
941 * If the number of captured subpatterns on this run is
942 * less than the total possible number, pad the result
943 * arrays with NULLs or empty strings.
944 */
945 if (count < num_subpats) {
946 for (; i < num_subpats; i++) {
947 if (unmatched_as_null) {
948 add_next_index_null(&match_sets[i]);
949 } else {
950 add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
951 }
952 }
953 }
954 } else {
955 /* Allocate the result set array */
956 array_init_size(&result_set, count + (mark ? 1 : 0));
957
958 /* Add all the subpatterns to it */
959 if (subpat_names) {
960 if (offset_capture) {
961 for (i = 0; i < count; i++) {
962 add_offset_pair(&result_set, subject + offsets[i<<1],
963 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i], unmatched_as_null);
964 }
965 } else {
966 for (i = 0; i < count; i++) {
967 if (subpat_names[i]) {
968 if (offsets[i<<1] < 0) {
969 if (unmatched_as_null) {
970 add_assoc_null(&result_set, subpat_names[i]);
971 } else {
972 add_assoc_str(&result_set, subpat_names[i], ZSTR_EMPTY_ALLOC());
973 }
974 } else {
975 add_assoc_stringl(&result_set, subpat_names[i], subject + offsets[i<<1],
976 offsets[(i<<1)+1] - offsets[i<<1]);
977 }
978 }
979 if (offsets[i<<1] < 0) {
980 if (unmatched_as_null) {
981 add_next_index_null(&result_set);
982 } else {
983 add_next_index_str(&result_set, ZSTR_EMPTY_ALLOC());
984 }
985 } else {
986 add_next_index_stringl(&result_set, subject + offsets[i<<1],
987 offsets[(i<<1)+1] - offsets[i<<1]);
988 }
989 }
990 }
991 } else {
992 if (offset_capture) {
993 for (i = 0; i < count; i++) {
994 add_offset_pair(&result_set, subject + offsets[i<<1],
995 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
996 }
997 } else {
998 for (i = 0; i < count; i++) {
999 if (offsets[i<<1] < 0) {
1000 if (unmatched_as_null) {
1001 add_next_index_null(&result_set);
1002 } else {
1003 add_next_index_str(&result_set, ZSTR_EMPTY_ALLOC());
1004 }
1005 } else {
1006 add_next_index_stringl(&result_set, subject + offsets[i<<1],
1007 offsets[(i<<1)+1] - offsets[i<<1]);
1008 }
1009 }
1010 }
1011 }
1012 /* Add MARK, if available */
1013 if (mark) {
1014 add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
1015 }
1016 /* And add it to the output array */
1017 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
1018 }
1019 } else { /* single pattern matching */
1020 /* For each subpattern, insert it into the subpatterns array. */
1021 if (subpat_names) {
1022 if (offset_capture) {
1023 for (i = 0; i < count; i++) {
1024 add_offset_pair(subpats, subject + offsets[i<<1],
1025 offsets[(i<<1)+1] - offsets[i<<1],
1026 offsets[i<<1], subpat_names[i], unmatched_as_null);
1027 }
1028 } else {
1029 for (i = 0; i < count; i++) {
1030 if (subpat_names[i]) {
1031 if (offsets[i<<1] < 0) {
1032 if (unmatched_as_null) {
1033 add_assoc_null(subpats, subpat_names[i]);
1034 } else {
1035 add_assoc_str(subpats, subpat_names[i], ZSTR_EMPTY_ALLOC());
1036 }
1037 } else {
1038 add_assoc_stringl(subpats, subpat_names[i], subject + offsets[i<<1],
1039 offsets[(i<<1)+1] - offsets[i<<1]);
1040 }
1041 }
1042 if (offsets[i<<1] < 0) {
1043 if (unmatched_as_null) {
1044 add_next_index_null(subpats);
1045 } else {
1046 add_next_index_str(subpats, ZSTR_EMPTY_ALLOC());
1047 }
1048 } else {
1049 add_next_index_stringl(subpats, subject + offsets[i<<1],
1050 offsets[(i<<1)+1] - offsets[i<<1]);
1051 }
1052 }
1053 }
1054 } else {
1055 if (offset_capture) {
1056 for (i = 0; i < count; i++) {
1057 add_offset_pair(subpats, subject + offsets[i<<1],
1058 offsets[(i<<1)+1] - offsets[i<<1],
1059 offsets[i<<1], NULL, unmatched_as_null);
1060 }
1061 } else {
1062 for (i = 0; i < count; i++) {
1063 if (offsets[i<<1] < 0) {
1064 if (unmatched_as_null) {
1065 add_next_index_null(subpats);
1066 } else {
1067 add_next_index_str(subpats, ZSTR_EMPTY_ALLOC());
1068 }
1069 } else {
1070 add_next_index_stringl(subpats, subject + offsets[i<<1],
1071 offsets[(i<<1)+1] - offsets[i<<1]);
1072 }
1073 }
1074 }
1075 }
1076 /* Add MARK, if available */
1077 if (mark) {
1078 add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
1079 }
1080 break;
1081 }
1082 }
1083
1084 /* Advance to the next piece. */
1085 start_offset = offsets[1];
1086
1087 /* If we have matched an empty string, mimic what Perl's /g options does.
1088 This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
1089 the match again at the same point. If this fails (picked up above) we
1090 advance to the next character. */
1091 g_notempty = (start_offset == offsets[0]) ? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
1092
1093 } else if (count == PCRE_ERROR_NOMATCH) {
1094 /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
1095 this is not necessarily the end. We need to advance
1096 the start offset, and continue. Fudge the offset values
1097 to achieve this, unless we're already at the end of the string. */
1098 if (g_notempty != 0 && start_offset < subject_len) {
1099 int unit_len = calculate_unit_length(pce, subject + start_offset);
1100
1101 start_offset += unit_len;
1102 g_notempty = 0;
1103 } else
1104 break;
1105 } else {
1106 pcre_handle_exec_error(count);
1107 break;
1108 }
1109 } while (global);
1110
1111 /* Add the match sets to the output array and clean up */
1112 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1113 if (subpat_names) {
1114 for (i = 0; i < num_subpats; i++) {
1115 if (subpat_names[i]) {
1116 zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
1117 strlen(subpat_names[i]), &match_sets[i]);
1118 Z_ADDREF(match_sets[i]);
1119 }
1120 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1121 }
1122 } else {
1123 for (i = 0; i < num_subpats; i++) {
1124 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1125 }
1126 }
1127 efree(match_sets);
1128
1129 if (Z_TYPE(marks) != IS_UNDEF) {
1130 add_assoc_zval(subpats, "MARK", &marks);
1131 }
1132 }
1133
1134 if (size_offsets <= 32) {
1135 free_alloca(offsets, use_heap);
1136 } else {
1137 efree(offsets);
1138 }
1139 if (subpat_names) {
1140 efree(subpat_names);
1141 }
1142
1143 /* Did we encounter an error? */
1144 if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1145 RETVAL_LONG(matched);
1146 } else {
1147 RETVAL_FALSE;
1148 }
1149 }
1150 /* }}} */
1151
1152 /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1153 Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)1154 static PHP_FUNCTION(preg_match)
1155 {
1156 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1157 }
1158 /* }}} */
1159
1160 /* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1161 Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)1162 static PHP_FUNCTION(preg_match_all)
1163 {
1164 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1165 }
1166 /* }}} */
1167
1168 /* {{{ preg_get_backref
1169 */
preg_get_backref(char ** str,int * backref)1170 static int preg_get_backref(char **str, int *backref)
1171 {
1172 register char in_brace = 0;
1173 register char *walk = *str;
1174
1175 if (walk[1] == 0)
1176 return 0;
1177
1178 if (*walk == '$' && walk[1] == '{') {
1179 in_brace = 1;
1180 walk++;
1181 }
1182 walk++;
1183
1184 if (*walk >= '0' && *walk <= '9') {
1185 *backref = *walk - '0';
1186 walk++;
1187 } else
1188 return 0;
1189
1190 if (*walk && *walk >= '0' && *walk <= '9') {
1191 *backref = *backref * 10 + *walk - '0';
1192 walk++;
1193 }
1194
1195 if (in_brace) {
1196 if (*walk != '}')
1197 return 0;
1198 else
1199 walk++;
1200 }
1201
1202 *str = walk;
1203 return 1;
1204 }
1205 /* }}} */
1206
1207 /* {{{ preg_do_repl_func
1208 */
preg_do_repl_func(zend_fcall_info * fci,zend_fcall_info_cache * fcc,char * subject,int * offsets,char ** subpat_names,int count,unsigned char * mark)1209 static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark)
1210 {
1211 zend_string *result_str;
1212 zval retval; /* Function return value */
1213 zval arg; /* Argument to pass to function */
1214 int i;
1215
1216 array_init_size(&arg, count + (mark ? 1 : 0));
1217 if (subpat_names) {
1218 for (i = 0; i < count; i++) {
1219 if (subpat_names[i]) {
1220 add_assoc_stringl(&arg, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
1221 }
1222 add_next_index_stringl(&arg, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
1223 }
1224 } else {
1225 for (i = 0; i < count; i++) {
1226 add_next_index_stringl(&arg, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
1227 }
1228 }
1229 if (mark) {
1230 add_assoc_string(&arg, "MARK", (char *) mark);
1231 }
1232
1233 fci->retval = &retval;
1234 fci->param_count = 1;
1235 fci->params = &arg;
1236 fci->no_separation = 0;
1237
1238 if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1239 result_str = zval_get_string(&retval);
1240 zval_ptr_dtor(&retval);
1241 } else {
1242 if (!EG(exception)) {
1243 php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1244 }
1245
1246 result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1247 }
1248
1249 zval_ptr_dtor(&arg);
1250
1251 return result_str;
1252 }
1253 /* }}} */
1254
1255 /* {{{ php_pcre_replace
1256 */
php_pcre_replace(zend_string * regex,zend_string * subject_str,char * subject,int subject_len,zend_string * replace_str,int limit,int * replace_count)1257 PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1258 zend_string *subject_str,
1259 char *subject, int subject_len,
1260 zend_string *replace_str,
1261 int limit, int *replace_count)
1262 {
1263 pcre_cache_entry *pce; /* Compiled regular expression */
1264 zend_string *result; /* Function result */
1265
1266 /* Compile regex or get it from cache. */
1267 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1268 return NULL;
1269 }
1270 pce->refcount++;
1271 result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1272 limit, replace_count);
1273 pce->refcount--;
1274
1275 return result;
1276 }
1277 /* }}} */
1278
1279 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry * pce,zend_string * subject_str,char * subject,int subject_len,zend_string * replace_str,int limit,int * replace_count)1280 PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zend_string *replace_str, int limit, int *replace_count)
1281 {
1282 pcre_extra *extra = pce->extra;/* Holds results of studying */
1283 pcre_extra extra_data; /* Used locally for exec options */
1284 int no_utf_check = 0; /* Execution options */
1285 int count = 0; /* Count of matched subpatterns */
1286 int *offsets; /* Array of subpattern offsets */
1287 char **subpat_names; /* Array for named subpatterns */
1288 int num_subpats; /* Number of captured subpatterns */
1289 int size_offsets; /* Size of the offsets array */
1290 size_t new_len; /* Length of needed storage */
1291 size_t alloc_len; /* Actual allocated length */
1292 int match_len; /* Length of the current match */
1293 int backref; /* Backreference number */
1294 int start_offset; /* Where the new search starts */
1295 int g_notempty=0; /* If the match should not be empty */
1296 char *walkbuf, /* Location of current replacement in the result */
1297 *walk, /* Used to walk the replacement string */
1298 *match, /* The current match */
1299 *piece, /* The current piece of subject */
1300 *replace_end, /* End of replacement string */
1301 walk_last; /* Last walked character */
1302 size_t result_len; /* Length of result */
1303 zend_string *result; /* Result of replacement */
1304
1305 ALLOCA_FLAG(use_heap);
1306
1307 if (extra == NULL) {
1308 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1309 extra = &extra_data;
1310 }
1311
1312 extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1313 extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1314
1315 if (UNEXPECTED(pce->preg_options & PREG_REPLACE_EVAL)) {
1316 php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
1317 return NULL;
1318 }
1319
1320 /* Calculate the size of the offsets array, and allocate memory for it. */
1321 num_subpats = pce->capture_count + 1;
1322 size_offsets = num_subpats * 3;
1323 if (size_offsets <= 32) {
1324 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1325 } else {
1326 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1327 }
1328
1329 /*
1330 * Build a mapping from subpattern numbers to their names. We will
1331 * allocate the table only if there are any named subpatterns.
1332 */
1333 subpat_names = NULL;
1334 if (UNEXPECTED(pce->name_count > 0)) {
1335 subpat_names = make_subpats_table(num_subpats, pce);
1336 if (!subpat_names) {
1337 if (size_offsets <= 32) {
1338 free_alloca(offsets, use_heap);
1339 } else {
1340 efree(offsets);
1341 }
1342 return NULL;
1343 }
1344 }
1345
1346 alloc_len = 0;
1347 result = NULL;
1348
1349 /* Initialize */
1350 match = NULL;
1351 start_offset = 0;
1352 result_len = 0;
1353 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1354
1355 #ifdef HAVE_PCRE_JIT_SUPPORT
1356 if (!(pce->compile_options & PCRE_UTF8)) {
1357 no_utf_check = PCRE_NO_UTF8_CHECK;
1358 }
1359 #endif
1360
1361 #ifdef PCRE_EXTRA_MARK
1362 extra->flags &= ~PCRE_EXTRA_MARK;
1363 #endif
1364
1365 while (1) {
1366 /* Execute the regular expression. */
1367 #ifdef HAVE_PCRE_JIT_SUPPORT
1368 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
1369 && no_utf_check && !g_notempty) {
1370 count = pcre_jit_exec(pce->re, extra, subject, subject_len, start_offset,
1371 no_utf_check|g_notempty, offsets, size_offsets, jit_stack);
1372 } else
1373 #endif
1374 count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1375 no_utf_check|g_notempty, offsets, size_offsets);
1376
1377 /* the string was already proved to be valid UTF-8 */
1378 no_utf_check = PCRE_NO_UTF8_CHECK;
1379
1380 /* Check for too many substrings condition. */
1381 if (UNEXPECTED(count == 0)) {
1382 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1383 count = size_offsets / 3;
1384 }
1385
1386 piece = subject + start_offset;
1387
1388 /* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */
1389 if (count > 0 && (offsets[1] - offsets[0] >= 0) && limit) {
1390 zend_bool simple_string = 1;
1391
1392 if (replace_count) {
1393 ++*replace_count;
1394 }
1395
1396 /* Set the match location in subject */
1397 match = subject + offsets[0];
1398
1399 new_len = result_len + offsets[0] - start_offset; /* part before the match */
1400
1401 walk = ZSTR_VAL(replace_str);
1402 replace_end = walk + ZSTR_LEN(replace_str);
1403 walk_last = 0;
1404
1405 while (walk < replace_end) {
1406 if ('\\' == *walk || '$' == *walk) {
1407 simple_string = 0;
1408 if (walk_last == '\\') {
1409 walk++;
1410 walk_last = 0;
1411 continue;
1412 }
1413 if (preg_get_backref(&walk, &backref)) {
1414 if (backref < count)
1415 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1416 continue;
1417 }
1418 }
1419 new_len++;
1420 walk++;
1421 walk_last = walk[-1];
1422 }
1423
1424 if (new_len >= alloc_len) {
1425 alloc_len = zend_safe_address_guarded(2, new_len, alloc_len);
1426 if (result == NULL) {
1427 result = zend_string_alloc(alloc_len, 0);
1428 } else {
1429 result = zend_string_extend(result, alloc_len, 0);
1430 }
1431 }
1432
1433 if (match-piece > 0) {
1434 /* copy the part of the string before the match */
1435 memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1436 result_len += (match-piece);
1437 }
1438
1439 if (simple_string) {
1440 /* copy replacement */
1441 memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1442 result_len += ZSTR_LEN(replace_str);
1443 } else {
1444 /* copy replacement and backrefs */
1445 walkbuf = ZSTR_VAL(result) + result_len;
1446
1447 walk = ZSTR_VAL(replace_str);
1448 walk_last = 0;
1449 while (walk < replace_end) {
1450 if ('\\' == *walk || '$' == *walk) {
1451 if (walk_last == '\\') {
1452 *(walkbuf-1) = *walk++;
1453 walk_last = 0;
1454 continue;
1455 }
1456 if (preg_get_backref(&walk, &backref)) {
1457 if (backref < count) {
1458 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1459 memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1460 walkbuf += match_len;
1461 }
1462 continue;
1463 }
1464 }
1465 *walkbuf++ = *walk++;
1466 walk_last = walk[-1];
1467 }
1468 *walkbuf = '\0';
1469 /* increment the result length by how much we've added to the string */
1470 result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1471 }
1472
1473 if (limit) {
1474 limit--;
1475 }
1476
1477 /* Advance to the next piece. */
1478 start_offset = offsets[1];
1479
1480 /* If we have matched an empty string, mimic what Perl's /g options does.
1481 This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
1482 the match again at the same point. If this fails (picked up above) we
1483 advance to the next character. */
1484 g_notempty = (start_offset == offsets[0]) ? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
1485
1486 } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1487 /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
1488 this is not necessarily the end. We need to advance
1489 the start offset, and continue. Fudge the offset values
1490 to achieve this, unless we're already at the end of the string. */
1491 if (g_notempty != 0 && start_offset < subject_len) {
1492 int unit_len = calculate_unit_length(pce, piece);
1493
1494 start_offset += unit_len;
1495 memcpy(ZSTR_VAL(result) + result_len, piece, unit_len);
1496 result_len += unit_len;
1497 g_notempty = 0;
1498 } else {
1499 if (!result && subject_str) {
1500 result = zend_string_copy(subject_str);
1501 break;
1502 }
1503 new_len = result_len + subject_len - start_offset;
1504 if (new_len >= alloc_len) {
1505 alloc_len = new_len; /* now we know exactly how long it is */
1506 if (NULL != result) {
1507 result = zend_string_realloc(result, alloc_len, 0);
1508 } else {
1509 result = zend_string_alloc(alloc_len, 0);
1510 }
1511 }
1512 /* stick that last bit of string on our output */
1513 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - start_offset);
1514 result_len += subject_len - start_offset;
1515 ZSTR_VAL(result)[result_len] = '\0';
1516 ZSTR_LEN(result) = result_len;
1517 break;
1518 }
1519 } else {
1520 pcre_handle_exec_error(count);
1521 if (result) {
1522 zend_string_release(result);
1523 result = NULL;
1524 }
1525 break;
1526 }
1527 }
1528
1529 if (size_offsets <= 32) {
1530 free_alloca(offsets, use_heap);
1531 } else {
1532 efree(offsets);
1533 }
1534 if (UNEXPECTED(subpat_names)) {
1535 efree(subpat_names);
1536 }
1537
1538 return result;
1539 }
1540 /* }}} */
1541
1542 /* {{{ php_pcre_replace_func_impl() */
php_pcre_replace_func_impl(pcre_cache_entry * pce,zend_string * subject_str,char * subject,int subject_len,zend_fcall_info * fci,zend_fcall_info_cache * fcc,int limit,int * replace_count)1543 static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, int limit, int *replace_count)
1544 {
1545 pcre_extra *extra = pce->extra;/* Holds results of studying */
1546 pcre_extra extra_data; /* Used locally for exec options */
1547 int no_utf_check = 0; /* Execution options */
1548 int count = 0; /* Count of matched subpatterns */
1549 int *offsets; /* Array of subpattern offsets */
1550 char **subpat_names; /* Array for named subpatterns */
1551 int num_subpats; /* Number of captured subpatterns */
1552 int size_offsets; /* Size of the offsets array */
1553 size_t new_len; /* Length of needed storage */
1554 size_t alloc_len; /* Actual allocated length */
1555 int start_offset; /* Where the new search starts */
1556 int g_notempty=0; /* If the match should not be empty */
1557 char *match, /* The current match */
1558 *piece; /* The current piece of subject */
1559 size_t result_len; /* Length of result */
1560 unsigned char *mark = NULL; /* Target for MARK name */
1561 zend_string *result; /* Result of replacement */
1562 zend_string *eval_result=NULL; /* Result of custom function */
1563
1564 ALLOCA_FLAG(use_heap);
1565
1566 if (extra == NULL) {
1567 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1568 extra = &extra_data;
1569 }
1570
1571 extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1572 extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1573
1574 if (UNEXPECTED(pce->preg_options & PREG_REPLACE_EVAL)) {
1575 php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
1576 return NULL;
1577 }
1578
1579 /* Calculate the size of the offsets array, and allocate memory for it. */
1580 num_subpats = pce->capture_count + 1;
1581 size_offsets = num_subpats * 3;
1582 if (size_offsets <= 32) {
1583 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1584 } else {
1585 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1586 }
1587
1588 /*
1589 * Build a mapping from subpattern numbers to their names. We will
1590 * allocate the table only if there are any named subpatterns.
1591 */
1592 subpat_names = NULL;
1593 if (UNEXPECTED(pce->name_count > 0)) {
1594 subpat_names = make_subpats_table(num_subpats, pce);
1595 if (!subpat_names) {
1596 if (size_offsets <= 32) {
1597 free_alloca(offsets, use_heap);
1598 } else {
1599 efree(offsets);
1600 }
1601 return NULL;
1602 }
1603 }
1604
1605 alloc_len = 0;
1606 result = NULL;
1607
1608 /* Initialize */
1609 match = NULL;
1610 start_offset = 0;
1611 result_len = 0;
1612 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1613
1614 #ifdef HAVE_PCRE_JIT_SUPPORT
1615 if (!(pce->compile_options & PCRE_UTF8)) {
1616 no_utf_check = PCRE_NO_UTF8_CHECK;
1617 }
1618 #endif
1619
1620 #ifdef PCRE_EXTRA_MARK
1621 extra->mark = &mark;
1622 extra->flags |= PCRE_EXTRA_MARK;
1623 #endif
1624
1625 while (1) {
1626 /* Execute the regular expression. */
1627 #ifdef HAVE_PCRE_JIT_SUPPORT
1628 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
1629 && no_utf_check && !g_notempty) {
1630 count = pcre_jit_exec(pce->re, extra, subject, subject_len, start_offset,
1631 no_utf_check|g_notempty, offsets, size_offsets, jit_stack);
1632 } else
1633 #endif
1634 count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1635 no_utf_check|g_notempty, offsets, size_offsets);
1636
1637 /* the string was already proved to be valid UTF-8 */
1638 no_utf_check = PCRE_NO_UTF8_CHECK;
1639
1640 /* Check for too many substrings condition. */
1641 if (UNEXPECTED(count == 0)) {
1642 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1643 count = size_offsets / 3;
1644 }
1645
1646 piece = subject + start_offset;
1647
1648 /* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */
1649 if (count > 0 && (offsets[1] - offsets[0] >= 0) && limit) {
1650 if (replace_count) {
1651 ++*replace_count;
1652 }
1653
1654 /* Set the match location in subject */
1655 match = subject + offsets[0];
1656
1657 new_len = result_len + offsets[0] - start_offset; /* part before the match */
1658
1659 /* Use custom function to get replacement string and its length. */
1660 eval_result = preg_do_repl_func(fci, fcc, subject, offsets, subpat_names, count, mark);
1661 ZEND_ASSERT(eval_result);
1662 new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result), new_len);
1663 if (new_len >= alloc_len) {
1664 alloc_len = zend_safe_address_guarded(2, new_len, alloc_len);
1665 if (result == NULL) {
1666 result = zend_string_alloc(alloc_len, 0);
1667 } else {
1668 result = zend_string_extend(result, alloc_len, 0);
1669 }
1670 }
1671
1672 if (match-piece > 0) {
1673 /* copy the part of the string before the match */
1674 memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1675 result_len += (int)(match-piece);
1676 }
1677
1678 /* If using custom function, copy result to the buffer and clean up. */
1679 memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1680 result_len += (int)ZSTR_LEN(eval_result);
1681 zend_string_release(eval_result);
1682
1683 if (limit) {
1684 limit--;
1685 }
1686
1687 /* Advance to the next piece. */
1688 start_offset = offsets[1];
1689
1690 /* If we have matched an empty string, mimic what Perl's /g options does.
1691 This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
1692 the match again at the same point. If this fails (picked up above) we
1693 advance to the next character. */
1694 g_notempty = (start_offset == offsets[0]) ? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
1695
1696 #ifdef PCRE_EXTRA_MARK
1697 /* replace function may use the same regex recursively */
1698 extra->mark = &mark;
1699 extra->flags |= PCRE_EXTRA_MARK;
1700 #endif
1701 } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1702 /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
1703 this is not necessarily the end. We need to advance
1704 the start offset, and continue. Fudge the offset values
1705 to achieve this, unless we're already at the end of the string. */
1706 if (g_notempty != 0 && start_offset < subject_len) {
1707 int unit_len = calculate_unit_length(pce, piece);
1708
1709 start_offset += unit_len;
1710 memcpy(ZSTR_VAL(result) + result_len, piece, unit_len);
1711 result_len += unit_len;
1712 g_notempty = 0;
1713 } else {
1714 if (!result && subject_str) {
1715 result = zend_string_copy(subject_str);
1716 break;
1717 }
1718 new_len = result_len + subject_len - start_offset;
1719 if (new_len >= alloc_len) {
1720 alloc_len = new_len; /* now we know exactly how long it is */
1721 if (NULL != result) {
1722 result = zend_string_realloc(result, alloc_len, 0);
1723 } else {
1724 result = zend_string_alloc(alloc_len, 0);
1725 }
1726 }
1727 /* stick that last bit of string on our output */
1728 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - start_offset);
1729 result_len += subject_len - start_offset;
1730 ZSTR_VAL(result)[result_len] = '\0';
1731 ZSTR_LEN(result) = result_len;
1732 break;
1733 }
1734 } else {
1735 pcre_handle_exec_error(count);
1736 if (result) {
1737 zend_string_release(result);
1738 result = NULL;
1739 }
1740 break;
1741 }
1742 }
1743
1744 if (size_offsets <= 32) {
1745 free_alloca(offsets, use_heap);
1746 } else {
1747 efree(offsets);
1748 }
1749 if (UNEXPECTED(subpat_names)) {
1750 efree(subpat_names);
1751 }
1752
1753 return result;
1754 }
1755 /* }}} */
1756
1757 /* {{{ php_pcre_replace_func
1758 */
php_pcre_replace_func(zend_string * regex,zend_string * subject_str,zend_fcall_info * fci,zend_fcall_info_cache * fcc,int limit,int * replace_count)1759 static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
1760 zend_string *subject_str,
1761 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
1762 int limit, int *replace_count)
1763 {
1764 pcre_cache_entry *pce; /* Compiled regular expression */
1765 zend_string *result; /* Function result */
1766
1767 /* Compile regex or get it from cache. */
1768 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1769 return NULL;
1770 }
1771 pce->refcount++;
1772 result = php_pcre_replace_func_impl(pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
1773 limit, replace_count);
1774 pce->refcount--;
1775
1776 return result;
1777 }
1778 /* }}} */
1779
1780 /* {{{ php_pcre_replace_array
1781 */
php_pcre_replace_array(HashTable * regex,zval * replace,zend_string * subject_str,int limit,int * replace_count)1782 static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend_string *subject_str, int limit, int *replace_count)
1783 {
1784 zval *regex_entry;
1785 zend_string *result;
1786 zend_string *replace_str;
1787
1788 if (Z_TYPE_P(replace) == IS_ARRAY) {
1789 uint32_t replace_idx = 0;
1790 HashTable *replace_ht = Z_ARRVAL_P(replace);
1791
1792 /* For each entry in the regex array, get the entry */
1793 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
1794 /* Make sure we're dealing with strings. */
1795 zend_string *regex_str = zval_get_string(regex_entry);
1796 zval *zv;
1797
1798 /* Get current entry */
1799 while (1) {
1800 if (replace_idx == replace_ht->nNumUsed) {
1801 replace_str = ZSTR_EMPTY_ALLOC();
1802 break;
1803 }
1804 zv = &replace_ht->arData[replace_idx].val;
1805 replace_idx++;
1806 if (Z_TYPE_P(zv) != IS_UNDEF) {
1807 replace_str = zval_get_string(zv);
1808 break;
1809 }
1810 }
1811
1812 /* Do the actual replacement and put the result back into subject_str
1813 for further replacements. */
1814 result = php_pcre_replace(regex_str,
1815 subject_str,
1816 ZSTR_VAL(subject_str),
1817 (int)ZSTR_LEN(subject_str),
1818 replace_str,
1819 limit,
1820 replace_count);
1821 zend_string_release(replace_str);
1822 zend_string_release(regex_str);
1823 zend_string_release(subject_str);
1824 subject_str = result;
1825 if (UNEXPECTED(result == NULL)) {
1826 break;
1827 }
1828 } ZEND_HASH_FOREACH_END();
1829
1830 } else {
1831 replace_str = Z_STR_P(replace);
1832
1833 /* For each entry in the regex array, get the entry */
1834 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
1835 /* Make sure we're dealing with strings. */
1836 zend_string *regex_str = zval_get_string(regex_entry);
1837
1838 /* Do the actual replacement and put the result back into subject_str
1839 for further replacements. */
1840 result = php_pcre_replace(regex_str,
1841 subject_str,
1842 ZSTR_VAL(subject_str),
1843 (int)ZSTR_LEN(subject_str),
1844 replace_str,
1845 limit,
1846 replace_count);
1847 zend_string_release(regex_str);
1848 zend_string_release(subject_str);
1849 subject_str = result;
1850
1851 if (UNEXPECTED(result == NULL)) {
1852 break;
1853 }
1854 } ZEND_HASH_FOREACH_END();
1855 }
1856
1857 return subject_str;
1858 }
1859 /* }}} */
1860
1861 /* {{{ php_replace_in_subject
1862 */
php_replace_in_subject(zval * regex,zval * replace,zval * subject,int limit,int * replace_count)1863 static zend_always_inline zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int *replace_count)
1864 {
1865 zend_string *result;
1866 zend_string *subject_str = zval_get_string(subject);
1867
1868 if (UNEXPECTED(ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject_str)))) {
1869 zend_string_release(subject_str);
1870 php_error_docref(NULL, E_WARNING, "Subject is too long");
1871 result = NULL;
1872 } else if (Z_TYPE_P(regex) != IS_ARRAY) {
1873 result = php_pcre_replace(Z_STR_P(regex),
1874 subject_str,
1875 ZSTR_VAL(subject_str),
1876 (int)ZSTR_LEN(subject_str),
1877 Z_STR_P(replace),
1878 limit,
1879 replace_count);
1880 zend_string_release(subject_str);
1881 } else {
1882 result = php_pcre_replace_array(Z_ARRVAL_P(regex),
1883 replace,
1884 subject_str,
1885 limit,
1886 replace_count);
1887 }
1888 return result;
1889 }
1890 /* }}} */
1891
1892 /* {{{ php_replace_in_subject_func
1893 */
php_replace_in_subject_func(zval * regex,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zval * subject,int limit,int * replace_count)1894 static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, int limit, int *replace_count)
1895 {
1896 zval *regex_entry;
1897 zend_string *result;
1898 zend_string *subject_str = zval_get_string(subject);
1899
1900 if (UNEXPECTED(ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject_str)))) {
1901 php_error_docref(NULL, E_WARNING, "Subject is too long");
1902 return NULL;
1903 }
1904
1905 if (Z_TYPE_P(regex) != IS_ARRAY) {
1906 result = php_pcre_replace_func(Z_STR_P(regex),
1907 subject_str,
1908 fci, fcc,
1909 limit,
1910 replace_count);
1911 zend_string_release(subject_str);
1912 return result;
1913 } else {
1914 /* If regex is an array */
1915
1916 /* For each entry in the regex array, get the entry */
1917 ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
1918 /* Make sure we're dealing with strings. */
1919 zend_string *regex_str = zval_get_string(regex_entry);
1920
1921 /* Do the actual replacement and put the result back into subject_str
1922 for further replacements. */
1923 result = php_pcre_replace_func(regex_str,
1924 subject_str,
1925 fci, fcc,
1926 limit,
1927 replace_count);
1928 zend_string_release(regex_str);
1929 zend_string_release(subject_str);
1930 subject_str = result;
1931 if (UNEXPECTED(result == NULL)) {
1932 break;
1933 }
1934 } ZEND_HASH_FOREACH_END();
1935
1936 return subject_str;
1937 }
1938 }
1939 /* }}} */
1940
1941 /* {{{ preg_replace_func_impl
1942 */
preg_replace_func_impl(zval * return_value,zval * regex,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zval * subject,zend_long limit_val)1943 static int preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, zend_long limit_val)
1944 {
1945 zend_string *result;
1946 int replace_count = 0;
1947
1948 if (Z_TYPE_P(regex) != IS_ARRAY) {
1949 convert_to_string_ex(regex);
1950 }
1951
1952 if (Z_TYPE_P(subject) != IS_ARRAY) {
1953 result = php_replace_in_subject_func(regex, fci, fcc, subject, limit_val, &replace_count);
1954 if (result != NULL) {
1955 RETVAL_STR(result);
1956 } else {
1957 RETVAL_NULL();
1958 }
1959 } else {
1960 /* if subject is an array */
1961 zval *subject_entry, zv;
1962 zend_string *string_key;
1963 zend_ulong num_key;
1964
1965 array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
1966
1967 /* For each subject entry, convert it to string, then perform replacement
1968 and add the result to the return_value array. */
1969 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
1970 result = php_replace_in_subject_func(regex, fci, fcc, subject_entry, limit_val, &replace_count);
1971 if (result != NULL) {
1972 /* Add to return array */
1973 ZVAL_STR(&zv, result);
1974 if (string_key) {
1975 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
1976 } else {
1977 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
1978 }
1979 }
1980 } ZEND_HASH_FOREACH_END();
1981 }
1982
1983 return replace_count;
1984 }
1985 /* }}} */
1986
1987 /* {{{ preg_replace_common
1988 */
preg_replace_common(INTERNAL_FUNCTION_PARAMETERS,int is_filter)1989 static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, int is_filter)
1990 {
1991 zval *regex, *replace, *subject, *zcount = NULL;
1992 zend_long limit = -1;
1993 int replace_count = 0;
1994 zend_string *result;
1995 int old_replace_count;
1996
1997 /* Get function parameters and do error-checking. */
1998 ZEND_PARSE_PARAMETERS_START(3, 5)
1999 Z_PARAM_ZVAL(regex)
2000 Z_PARAM_ZVAL(replace)
2001 Z_PARAM_ZVAL(subject)
2002 Z_PARAM_OPTIONAL
2003 Z_PARAM_LONG(limit)
2004 Z_PARAM_ZVAL_DEREF(zcount)
2005 ZEND_PARSE_PARAMETERS_END();
2006
2007 if (Z_TYPE_P(replace) != IS_ARRAY) {
2008 convert_to_string_ex(replace);
2009 if (Z_TYPE_P(regex) != IS_ARRAY) {
2010 convert_to_string_ex(regex);
2011 }
2012 } else {
2013 if (Z_TYPE_P(regex) != IS_ARRAY) {
2014 php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
2015 RETURN_FALSE;
2016 }
2017 }
2018
2019 if (Z_TYPE_P(subject) != IS_ARRAY) {
2020 old_replace_count = replace_count;
2021 result = php_replace_in_subject(regex,
2022 replace,
2023 subject,
2024 limit,
2025 &replace_count);
2026 if (result != NULL) {
2027 if (!is_filter || replace_count > old_replace_count) {
2028 RETVAL_STR(result);
2029 } else {
2030 zend_string_release(result);
2031 RETVAL_NULL();
2032 }
2033 } else {
2034 RETVAL_NULL();
2035 }
2036 } else {
2037 /* if subject is an array */
2038 zval *subject_entry, zv;
2039 zend_string *string_key;
2040 zend_ulong num_key;
2041
2042 array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
2043
2044 /* For each subject entry, convert it to string, then perform replacement
2045 and add the result to the return_value array. */
2046 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
2047 old_replace_count = replace_count;
2048 result = php_replace_in_subject(regex,
2049 replace,
2050 subject_entry,
2051 limit,
2052 &replace_count);
2053 if (result != NULL) {
2054 if (!is_filter || replace_count > old_replace_count) {
2055 /* Add to return array */
2056 ZVAL_STR(&zv, result);
2057 if (string_key) {
2058 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2059 } else {
2060 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2061 }
2062 } else {
2063 zend_string_release(result);
2064 }
2065 }
2066 } ZEND_HASH_FOREACH_END();
2067 }
2068
2069 if (zcount) {
2070 zval_ptr_dtor(zcount);
2071 ZVAL_LONG(zcount, replace_count);
2072 }
2073 }
2074 /* }}} */
2075
2076 /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
2077 Perform Perl-style regular expression replacement. */
PHP_FUNCTION(preg_replace)2078 static PHP_FUNCTION(preg_replace)
2079 {
2080 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
2081 }
2082 /* }}} */
2083
2084 /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
2085 Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback)2086 static PHP_FUNCTION(preg_replace_callback)
2087 {
2088 zval *regex, *replace, *subject, *zcount = NULL;
2089 zend_long limit = -1;
2090 int replace_count;
2091 zend_fcall_info fci;
2092 zend_fcall_info_cache fcc;
2093
2094 /* Get function parameters and do error-checking. */
2095 ZEND_PARSE_PARAMETERS_START(3, 5)
2096 Z_PARAM_ZVAL(regex)
2097 Z_PARAM_ZVAL(replace)
2098 Z_PARAM_ZVAL(subject)
2099 Z_PARAM_OPTIONAL
2100 Z_PARAM_LONG(limit)
2101 Z_PARAM_ZVAL_DEREF(zcount)
2102 ZEND_PARSE_PARAMETERS_END();
2103
2104 if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2105 zend_string *callback_name = zend_get_callable_name(replace);
2106 php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", ZSTR_VAL(callback_name));
2107 zend_string_release(callback_name);
2108 ZVAL_STR(return_value, zval_get_string(subject));
2109 return;
2110 }
2111
2112 fci.size = sizeof(fci);
2113 fci.object = NULL;
2114 ZVAL_COPY_VALUE(&fci.function_name, replace);
2115
2116 replace_count = preg_replace_func_impl(return_value, regex, &fci, &fcc, subject, limit);
2117 if (zcount) {
2118 zval_ptr_dtor(zcount);
2119 ZVAL_LONG(zcount, replace_count);
2120 }
2121 }
2122 /* }}} */
2123
2124 /* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]])
2125 Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback_array)2126 static PHP_FUNCTION(preg_replace_callback_array)
2127 {
2128 zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
2129 zend_long limit = -1;
2130 zend_string *str_idx;
2131 int replace_count = 0;
2132 zend_fcall_info fci;
2133 zend_fcall_info_cache fcc;
2134
2135 /* Get function parameters and do error-checking. */
2136 ZEND_PARSE_PARAMETERS_START(2, 4)
2137 Z_PARAM_ARRAY(pattern)
2138 Z_PARAM_ZVAL(subject)
2139 Z_PARAM_OPTIONAL
2140 Z_PARAM_LONG(limit)
2141 Z_PARAM_ZVAL_DEREF(zcount)
2142 ZEND_PARSE_PARAMETERS_END();
2143
2144 fci.size = sizeof(fci);
2145 fci.object = NULL;
2146
2147 ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(pattern), str_idx, replace) {
2148 if (str_idx) {
2149 ZVAL_STR_COPY(®ex, str_idx);
2150 } else {
2151 php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
2152 zval_ptr_dtor(return_value);
2153 RETURN_NULL();
2154 }
2155
2156 if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2157 zend_string *callback_name = zend_get_callable_name(replace);
2158 php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", ZSTR_VAL(callback_name));
2159 zend_string_release(callback_name);
2160 zval_ptr_dtor(®ex);
2161 zval_ptr_dtor(return_value);
2162 ZVAL_COPY(return_value, subject);
2163 return;
2164 }
2165
2166 ZVAL_COPY_VALUE(&fci.function_name, replace);
2167
2168 replace_count += preg_replace_func_impl(&zv, ®ex, &fci, &fcc, subject, limit);
2169 if (subject != return_value) {
2170 subject = return_value;
2171 } else {
2172 zval_ptr_dtor(return_value);
2173 }
2174
2175 zval_ptr_dtor(®ex);
2176
2177 ZVAL_COPY_VALUE(return_value, &zv);
2178
2179 if (UNEXPECTED(EG(exception))) {
2180 zval_ptr_dtor(return_value);
2181 RETURN_NULL();
2182 }
2183 } ZEND_HASH_FOREACH_END();
2184
2185 if (zcount) {
2186 zval_ptr_dtor(zcount);
2187 ZVAL_LONG(zcount, replace_count);
2188 }
2189 }
2190 /* }}} */
2191
2192 /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
2193 Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTION(preg_filter)2194 static PHP_FUNCTION(preg_filter)
2195 {
2196 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
2197 }
2198 /* }}} */
2199
2200 /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
2201 Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)2202 static PHP_FUNCTION(preg_split)
2203 {
2204 zend_string *regex; /* Regular expression */
2205 zend_string *subject; /* String to match against */
2206 zend_long limit_val = -1;/* Integer value of limit */
2207 zend_long flags = 0; /* Match control flags */
2208 pcre_cache_entry *pce; /* Compiled regular expression */
2209
2210 /* Get function parameters and do error checking */
2211 ZEND_PARSE_PARAMETERS_START(2, 4)
2212 Z_PARAM_STR(regex)
2213 Z_PARAM_STR(subject)
2214 Z_PARAM_OPTIONAL
2215 Z_PARAM_LONG(limit_val)
2216 Z_PARAM_LONG(flags)
2217 ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
2218
2219 if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
2220 php_error_docref(NULL, E_WARNING, "Subject is too long");
2221 RETURN_FALSE;
2222 }
2223
2224 /* Compile regex or get it from cache. */
2225 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2226 RETURN_FALSE;
2227 }
2228
2229 pce->refcount++;
2230 php_pcre_split_impl(pce, subject, return_value, (int)limit_val, flags);
2231 pce->refcount--;
2232 }
2233 /* }}} */
2234
2235 /* {{{ php_pcre_split
2236 */
php_pcre_split_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zend_long limit_val,zend_long flags)2237 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2238 zend_long limit_val, zend_long flags)
2239 {
2240 pcre_extra *extra = pce->extra;/* Holds results of studying */
2241 pcre_extra extra_data; /* Used locally for exec options */
2242 int *offsets; /* Array of subpattern offsets */
2243 int size_offsets; /* Size of the offsets array */
2244 int no_utf_check = 0; /* Execution options */
2245 int count = 0; /* Count of matched subpatterns */
2246 int start_offset; /* Where the new search starts */
2247 int next_offset; /* End of the last delimiter match + 1 */
2248 int g_notempty = 0; /* If the match should not be empty */
2249 char *last_match; /* Location of last match */
2250 int no_empty; /* If NO_EMPTY flag is set */
2251 int delim_capture; /* If delimiters should be captured */
2252 int offset_capture; /* If offsets should be captured */
2253 zval tmp;
2254 ALLOCA_FLAG(use_heap);
2255
2256 no_empty = flags & PREG_SPLIT_NO_EMPTY;
2257 delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2258 offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2259
2260 if (limit_val == 0) {
2261 limit_val = -1;
2262 }
2263
2264 if (extra == NULL) {
2265 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2266 extra = &extra_data;
2267 }
2268 extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
2269 extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
2270 #ifdef PCRE_EXTRA_MARK
2271 extra->flags &= ~PCRE_EXTRA_MARK;
2272 #endif
2273
2274 /* Initialize return value */
2275 array_init(return_value);
2276
2277 /* Calculate the size of the offsets array, and allocate memory for it. */
2278 size_offsets = (pce->capture_count + 1) * 3;
2279 if (size_offsets <= 32) {
2280 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
2281 } else {
2282 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
2283 }
2284
2285 /* Start at the beginning of the string */
2286 start_offset = 0;
2287 next_offset = 0;
2288 last_match = ZSTR_VAL(subject_str);
2289 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2290
2291 #ifdef HAVE_PCRE_JIT_SUPPORT
2292 if (!(pce->compile_options & PCRE_UTF8)) {
2293 no_utf_check = PCRE_NO_UTF8_CHECK;
2294 }
2295 #endif
2296
2297 /* Get next piece if no limit or limit not yet reached and something matched*/
2298 while ((limit_val == -1 || limit_val > 1)) {
2299 #ifdef HAVE_PCRE_JIT_SUPPORT
2300 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
2301 && no_utf_check && !g_notempty) {
2302 count = pcre_jit_exec(pce->re, extra, ZSTR_VAL(subject_str),
2303 ZSTR_LEN(subject_str), start_offset,
2304 no_utf_check|g_notempty, offsets, size_offsets, jit_stack);
2305 } else
2306 #endif
2307 count = pcre_exec(pce->re, extra, ZSTR_VAL(subject_str),
2308 ZSTR_LEN(subject_str), start_offset,
2309 no_utf_check|g_notempty, offsets, size_offsets);
2310
2311 /* the string was already proved to be valid UTF-8 */
2312 no_utf_check = PCRE_NO_UTF8_CHECK;
2313
2314 /* Check for too many substrings condition. */
2315 if (count == 0) {
2316 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2317 count = size_offsets/3;
2318 }
2319
2320 /* If something matched */
2321 if (count > 0 && (offsets[1] - offsets[0] >= 0)) {
2322 if (!no_empty || &ZSTR_VAL(subject_str)[offsets[0]] != last_match) {
2323
2324 if (offset_capture) {
2325 /* Add (match, offset) pair to the return value */
2326 add_offset_pair(return_value, last_match, (int)(&ZSTR_VAL(subject_str)[offsets[0]]-last_match), next_offset, NULL, 0);
2327 } else {
2328 /* Add the piece to the return value */
2329 ZVAL_STRINGL(&tmp, last_match, &ZSTR_VAL(subject_str)[offsets[0]]-last_match);
2330 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2331 }
2332
2333 /* One less left to do */
2334 if (limit_val != -1)
2335 limit_val--;
2336 }
2337
2338 last_match = &ZSTR_VAL(subject_str)[offsets[1]];
2339 next_offset = offsets[1];
2340
2341 if (delim_capture) {
2342 int i, match_len;
2343 for (i = 1; i < count; i++) {
2344 match_len = offsets[(i<<1)+1] - offsets[i<<1];
2345 /* If we have matched a delimiter */
2346 if (!no_empty || match_len > 0) {
2347 if (offset_capture) {
2348 add_offset_pair(return_value, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len, offsets[i<<1], NULL, 0);
2349 } else {
2350 ZVAL_STRINGL(&tmp, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len);
2351 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2352 }
2353 }
2354 }
2355 }
2356
2357 /* Advance to the position right after the last full match */
2358 start_offset = offsets[1];
2359
2360 /* If we have matched an empty string, mimic what Perl's /g options does.
2361 This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
2362 the match again at the same point. If this fails (picked up above) we
2363 advance to the next character. */
2364 g_notempty = (start_offset == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
2365
2366 } else if (count == PCRE_ERROR_NOMATCH) {
2367 /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
2368 this is not necessarily the end. We need to advance
2369 the start offset, and continue. Fudge the offset values
2370 to achieve this, unless we're already at the end of the string. */
2371 if (g_notempty != 0 && start_offset < ZSTR_LEN(subject_str)) {
2372 start_offset += calculate_unit_length(pce, ZSTR_VAL(subject_str) + start_offset);
2373 g_notempty = 0;
2374 } else {
2375 break;
2376 }
2377 } else {
2378 pcre_handle_exec_error(count);
2379 break;
2380 }
2381 }
2382
2383
2384 start_offset = (int)(last_match - ZSTR_VAL(subject_str)); /* the offset might have been incremented, but without further successful matches */
2385
2386 if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2387 if (offset_capture) {
2388 /* Add the last (match, offset) pair to the return value */
2389 add_offset_pair(return_value, &ZSTR_VAL(subject_str)[start_offset], ZSTR_LEN(subject_str) - start_offset, start_offset, NULL, 0);
2390 } else {
2391 /* Add the last piece to the return value */
2392 if (last_match == ZSTR_VAL(subject_str)) {
2393 ZVAL_STR_COPY(&tmp, subject_str);
2394 } else {
2395 ZVAL_STRINGL(&tmp, last_match, ZSTR_VAL(subject_str) + ZSTR_LEN(subject_str) - last_match);
2396 }
2397 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2398 }
2399 }
2400
2401
2402 /* Clean up */
2403 if (size_offsets <= 32) {
2404 free_alloca(offsets, use_heap);
2405 } else {
2406 efree(offsets);
2407 }
2408 }
2409 /* }}} */
2410
2411 /* {{{ proto string preg_quote(string str [, string delim_char])
2412 Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)2413 static PHP_FUNCTION(preg_quote)
2414 {
2415 zend_string *str; /* Input string argument */
2416 zend_string *delim = NULL; /* Additional delimiter argument */
2417 char *in_str; /* Input string */
2418 char *in_str_end; /* End of the input string */
2419 zend_string *out_str; /* Output string with quoted characters */
2420 size_t extra_len; /* Number of additional characters */
2421 char *p, /* Iterator for input string */
2422 *q, /* Iterator for output string */
2423 delim_char = '\0', /* Delimiter character to be quoted */
2424 c; /* Current character */
2425
2426 /* Get the arguments and check for errors */
2427 ZEND_PARSE_PARAMETERS_START(1, 2)
2428 Z_PARAM_STR(str)
2429 Z_PARAM_OPTIONAL
2430 Z_PARAM_STR_EX(delim, 1, 0)
2431 ZEND_PARSE_PARAMETERS_END();
2432
2433 /* Nothing to do if we got an empty string */
2434 if (ZSTR_LEN(str) == 0) {
2435 RETURN_EMPTY_STRING();
2436 }
2437
2438 in_str = ZSTR_VAL(str);
2439 in_str_end = in_str + ZSTR_LEN(str);
2440
2441 if (delim) {
2442 delim_char = ZSTR_VAL(delim)[0];
2443 }
2444
2445 /* Go through the string and quote necessary characters */
2446 extra_len = 0;
2447 p = in_str;
2448 do {
2449 c = *p;
2450 switch(c) {
2451 case '.':
2452 case '\\':
2453 case '+':
2454 case '*':
2455 case '?':
2456 case '[':
2457 case '^':
2458 case ']':
2459 case '$':
2460 case '(':
2461 case ')':
2462 case '{':
2463 case '}':
2464 case '=':
2465 case '!':
2466 case '>':
2467 case '<':
2468 case '|':
2469 case ':':
2470 case '-':
2471 extra_len++;
2472 break;
2473
2474 case '\0':
2475 extra_len+=3;
2476 break;
2477
2478 default:
2479 if (c == delim_char) {
2480 extra_len++;
2481 }
2482 break;
2483 }
2484 p++;
2485 } while (p != in_str_end);
2486
2487 if (extra_len == 0) {
2488 RETURN_STR_COPY(str);
2489 }
2490
2491 /* Allocate enough memory so that even if each character
2492 is quoted, we won't run out of room */
2493 out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2494 q = ZSTR_VAL(out_str);
2495 p = in_str;
2496
2497 do {
2498 c = *p;
2499 switch(c) {
2500 case '.':
2501 case '\\':
2502 case '+':
2503 case '*':
2504 case '?':
2505 case '[':
2506 case '^':
2507 case ']':
2508 case '$':
2509 case '(':
2510 case ')':
2511 case '{':
2512 case '}':
2513 case '=':
2514 case '!':
2515 case '>':
2516 case '<':
2517 case '|':
2518 case ':':
2519 case '-':
2520 *q++ = '\\';
2521 *q++ = c;
2522 break;
2523
2524 case '\0':
2525 *q++ = '\\';
2526 *q++ = '0';
2527 *q++ = '0';
2528 *q++ = '0';
2529 break;
2530
2531 default:
2532 if (c == delim_char) {
2533 *q++ = '\\';
2534 }
2535 *q++ = c;
2536 break;
2537 }
2538 p++;
2539 } while (p != in_str_end);
2540 *q = '\0';
2541
2542 RETURN_NEW_STR(out_str);
2543 }
2544 /* }}} */
2545
2546 /* {{{ proto array preg_grep(string regex, array input [, int flags])
2547 Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)2548 static PHP_FUNCTION(preg_grep)
2549 {
2550 zend_string *regex; /* Regular expression */
2551 zval *input; /* Input array */
2552 zend_long flags = 0; /* Match control flags */
2553 pcre_cache_entry *pce; /* Compiled regular expression */
2554
2555 /* Get arguments and do error checking */
2556 ZEND_PARSE_PARAMETERS_START(2, 3)
2557 Z_PARAM_STR(regex)
2558 Z_PARAM_ARRAY(input)
2559 Z_PARAM_OPTIONAL
2560 Z_PARAM_LONG(flags)
2561 ZEND_PARSE_PARAMETERS_END();
2562
2563 /* Compile regex or get it from cache. */
2564 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2565 RETURN_FALSE;
2566 }
2567
2568 pce->refcount++;
2569 php_pcre_grep_impl(pce, input, return_value, flags);
2570 pce->refcount--;
2571 }
2572 /* }}} */
2573
php_pcre_grep_impl(pcre_cache_entry * pce,zval * input,zval * return_value,zend_long flags)2574 PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2575 {
2576 zval *entry; /* An entry in the input array */
2577 pcre_extra *extra = pce->extra;/* Holds results of studying */
2578 pcre_extra extra_data; /* Used locally for exec options */
2579 int *offsets; /* Array of subpattern offsets */
2580 int size_offsets; /* Size of the offsets array */
2581 int count = 0; /* Count of matched subpatterns */
2582 int no_utf_check = 0; /* Execution options */
2583 zend_string *string_key;
2584 zend_ulong num_key;
2585 zend_bool invert; /* Whether to return non-matching
2586 entries */
2587 ALLOCA_FLAG(use_heap);
2588
2589 invert = flags & PREG_GREP_INVERT ? 1 : 0;
2590
2591 if (extra == NULL) {
2592 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2593 extra = &extra_data;
2594 }
2595 extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
2596 extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
2597 #ifdef PCRE_EXTRA_MARK
2598 extra->flags &= ~PCRE_EXTRA_MARK;
2599 #endif
2600
2601 /* Calculate the size of the offsets array, and allocate memory for it. */
2602 size_offsets = (pce->capture_count + 1) * 3;
2603 if (size_offsets <= 32) {
2604 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
2605 } else {
2606 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
2607 }
2608
2609 /* Initialize return array */
2610 array_init(return_value);
2611
2612 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2613
2614 #ifdef HAVE_PCRE_JIT_SUPPORT
2615 no_utf_check = (pce->compile_options & PCRE_UTF8) ? 0 : PCRE_NO_UTF8_CHECK;
2616 #endif
2617
2618 /* Go through the input array */
2619 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2620 zend_string *subject_str = zval_get_string(entry);
2621
2622 /* Perform the match */
2623 #ifdef HAVE_PCRE_JIT_SUPPORT
2624 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
2625 && no_utf_check) {
2626 count = pcre_jit_exec(pce->re, extra, ZSTR_VAL(subject_str),
2627 (int)ZSTR_LEN(subject_str), 0,
2628 no_utf_check, offsets, size_offsets, jit_stack);
2629 } else
2630 #endif
2631 count = pcre_exec(pce->re, extra, ZSTR_VAL(subject_str),
2632 (int)ZSTR_LEN(subject_str), 0,
2633 no_utf_check, offsets, size_offsets);
2634
2635 /* Check for too many substrings condition. */
2636 if (count == 0) {
2637 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2638 count = size_offsets/3;
2639 } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
2640 pcre_handle_exec_error(count);
2641 zend_string_release(subject_str);
2642 break;
2643 }
2644
2645 /* If the entry fits our requirements */
2646 if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
2647 if (Z_REFCOUNTED_P(entry)) {
2648 Z_ADDREF_P(entry);
2649 }
2650
2651 /* Add to return array */
2652 if (string_key) {
2653 zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2654 } else {
2655 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2656 }
2657 }
2658
2659 zend_string_release(subject_str);
2660 } ZEND_HASH_FOREACH_END();
2661
2662 /* Clean up */
2663 if (size_offsets <= 32) {
2664 free_alloca(offsets, use_heap);
2665 } else {
2666 efree(offsets);
2667 }
2668 }
2669 /* }}} */
2670
2671 /* {{{ proto int preg_last_error()
2672 Returns the error code of the last regexp execution. */
PHP_FUNCTION(preg_last_error)2673 static PHP_FUNCTION(preg_last_error)
2674 {
2675 ZEND_PARSE_PARAMETERS_START(0, 0)
2676 ZEND_PARSE_PARAMETERS_END();
2677
2678 RETURN_LONG(PCRE_G(error_code));
2679 }
2680 /* }}} */
2681
2682 /* {{{ module definition structures */
2683
2684 /* {{{ arginfo */
2685 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
2686 ZEND_ARG_INFO(0, pattern)
2687 ZEND_ARG_INFO(0, subject)
2688 ZEND_ARG_INFO(1, subpatterns) /* array */
2689 ZEND_ARG_INFO(0, flags)
2690 ZEND_ARG_INFO(0, offset)
2691 ZEND_END_ARG_INFO()
2692
2693 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
2694 ZEND_ARG_INFO(0, pattern)
2695 ZEND_ARG_INFO(0, subject)
2696 ZEND_ARG_INFO(1, subpatterns) /* array */
2697 ZEND_ARG_INFO(0, flags)
2698 ZEND_ARG_INFO(0, offset)
2699 ZEND_END_ARG_INFO()
2700
2701 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
2702 ZEND_ARG_INFO(0, regex)
2703 ZEND_ARG_INFO(0, replace)
2704 ZEND_ARG_INFO(0, subject)
2705 ZEND_ARG_INFO(0, limit)
2706 ZEND_ARG_INFO(1, count)
2707 ZEND_END_ARG_INFO()
2708
2709 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
2710 ZEND_ARG_INFO(0, regex)
2711 ZEND_ARG_INFO(0, callback)
2712 ZEND_ARG_INFO(0, subject)
2713 ZEND_ARG_INFO(0, limit)
2714 ZEND_ARG_INFO(1, count)
2715 ZEND_END_ARG_INFO()
2716
2717 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
2718 ZEND_ARG_INFO(0, pattern)
2719 ZEND_ARG_INFO(0, subject)
2720 ZEND_ARG_INFO(0, limit)
2721 ZEND_ARG_INFO(1, count)
2722 ZEND_END_ARG_INFO()
2723
2724 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
2725 ZEND_ARG_INFO(0, pattern)
2726 ZEND_ARG_INFO(0, subject)
2727 ZEND_ARG_INFO(0, limit)
2728 ZEND_ARG_INFO(0, flags)
2729 ZEND_END_ARG_INFO()
2730
2731 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
2732 ZEND_ARG_INFO(0, str)
2733 ZEND_ARG_INFO(0, delim_char)
2734 ZEND_END_ARG_INFO()
2735
2736 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
2737 ZEND_ARG_INFO(0, regex)
2738 ZEND_ARG_INFO(0, input) /* array */
2739 ZEND_ARG_INFO(0, flags)
2740 ZEND_END_ARG_INFO()
2741
2742 ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
2743 ZEND_END_ARG_INFO()
2744 /* }}} */
2745
2746 static const zend_function_entry pcre_functions[] = {
2747 PHP_FE(preg_match, arginfo_preg_match)
2748 PHP_FE(preg_match_all, arginfo_preg_match_all)
2749 PHP_FE(preg_replace, arginfo_preg_replace)
2750 PHP_FE(preg_replace_callback, arginfo_preg_replace_callback)
2751 PHP_FE(preg_replace_callback_array, arginfo_preg_replace_callback_array)
2752 PHP_FE(preg_filter, arginfo_preg_replace)
2753 PHP_FE(preg_split, arginfo_preg_split)
2754 PHP_FE(preg_quote, arginfo_preg_quote)
2755 PHP_FE(preg_grep, arginfo_preg_grep)
2756 PHP_FE(preg_last_error, arginfo_preg_last_error)
2757 PHP_FE_END
2758 };
2759
2760 zend_module_entry pcre_module_entry = {
2761 STANDARD_MODULE_HEADER,
2762 "pcre",
2763 pcre_functions,
2764 PHP_MINIT(pcre),
2765 PHP_MSHUTDOWN(pcre),
2766 #ifdef HAVE_PCRE_JIT_SUPPORT
2767 PHP_RINIT(pcre),
2768 #else
2769 NULL,
2770 #endif
2771 NULL,
2772 PHP_MINFO(pcre),
2773 PHP_PCRE_VERSION,
2774 PHP_MODULE_GLOBALS(pcre),
2775 PHP_GINIT(pcre),
2776 PHP_GSHUTDOWN(pcre),
2777 NULL,
2778 STANDARD_MODULE_PROPERTIES_EX
2779 };
2780
2781 #ifdef COMPILE_DL_PCRE
2782 ZEND_GET_MODULE(pcre)
2783 #endif
2784
2785 /* }}} */
2786
2787 #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
2788
2789 /*
2790 * Local variables:
2791 * tab-width: 4
2792 * c-basic-offset: 4
2793 * End:
2794 * vim600: sw=4 ts=4 fdm=marker
2795 * vim<600: sw=4 ts=4
2796 */
2797