1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2017 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Andrei Zmievski <andrei@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 /* $Id$ */
20
21 #include "php.h"
22 #include "php_ini.h"
23 #include "php_globals.h"
24 #include "php_pcre.h"
25 #include "ext/standard/info.h"
26 #include "ext/standard/basic_functions.h"
27 #include "zend_smart_str.h"
28
29 #if HAVE_PCRE || HAVE_BUNDLED_PCRE
30
31 #include "ext/standard/php_string.h"
32
33 #define PREG_PATTERN_ORDER 1
34 #define PREG_SET_ORDER 2
35 #define PREG_OFFSET_CAPTURE (1<<8)
36
37 #define PREG_SPLIT_NO_EMPTY (1<<0)
38 #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
39 #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
40
41 #define PREG_REPLACE_EVAL (1<<0)
42
43 #define PREG_GREP_INVERT (1<<0)
44
45 #define PCRE_CACHE_SIZE 4096
46
47 /* not fully functional workaround for libpcre < 8.0, see bug #70232 */
48 #ifndef PCRE_NOTEMPTY_ATSTART
49 # define PCRE_NOTEMPTY_ATSTART PCRE_NOTEMPTY
50 #endif
51
52 enum {
53 PHP_PCRE_NO_ERROR = 0,
54 PHP_PCRE_INTERNAL_ERROR,
55 PHP_PCRE_BACKTRACK_LIMIT_ERROR,
56 PHP_PCRE_RECURSION_LIMIT_ERROR,
57 PHP_PCRE_BAD_UTF8_ERROR,
58 PHP_PCRE_BAD_UTF8_OFFSET_ERROR,
59 PHP_PCRE_JIT_STACKLIMIT_ERROR
60 };
61
62
63 PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
64
65 #ifdef HAVE_PCRE_JIT_SUPPORT
66 #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
67 #define PCRE_JIT_STACK_MAX_SIZE (64 * 1024)
68 ZEND_TLS pcre_jit_stack *jit_stack = NULL;
69 #endif
70
pcre_handle_exec_error(int pcre_code)71 static void pcre_handle_exec_error(int pcre_code) /* {{{ */
72 {
73 int preg_code = 0;
74
75 switch (pcre_code) {
76 case PCRE_ERROR_MATCHLIMIT:
77 preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
78 break;
79
80 case PCRE_ERROR_RECURSIONLIMIT:
81 preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
82 break;
83
84 case PCRE_ERROR_BADUTF8:
85 preg_code = PHP_PCRE_BAD_UTF8_ERROR;
86 break;
87
88 case PCRE_ERROR_BADUTF8_OFFSET:
89 preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
90 break;
91
92 #ifdef HAVE_PCRE_JIT_SUPPORT
93 case PCRE_ERROR_JIT_STACKLIMIT:
94 preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
95 break;
96 #endif
97
98 default:
99 preg_code = PHP_PCRE_INTERNAL_ERROR;
100 break;
101 }
102
103 PCRE_G(error_code) = preg_code;
104 }
105 /* }}} */
106
php_free_pcre_cache(zval * data)107 static void php_free_pcre_cache(zval *data) /* {{{ */
108 {
109 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
110 if (!pce) return;
111 pcre_free(pce->re);
112 if (pce->extra) {
113 pcre_free_study(pce->extra);
114 }
115 #if HAVE_SETLOCALE
116 if ((void*)pce->tables) pefree((void*)pce->tables, 1);
117 #endif
118 pefree(pce, 1);
119 }
120 /* }}} */
121
PHP_GINIT_FUNCTION(pcre)122 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
123 {
124 zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
125 pcre_globals->backtrack_limit = 0;
126 pcre_globals->recursion_limit = 0;
127 pcre_globals->error_code = PHP_PCRE_NO_ERROR;
128 }
129 /* }}} */
130
PHP_GSHUTDOWN_FUNCTION(pcre)131 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
132 {
133 zend_hash_destroy(&pcre_globals->pcre_cache);
134
135 #ifdef HAVE_PCRE_JIT_SUPPORT
136 /* Stack may only be destroyed when no cached patterns
137 possibly associated with it do exist. */
138 if (jit_stack) {
139 pcre_jit_stack_free(jit_stack);
140 jit_stack = NULL;
141 }
142 #endif
143
144 }
145 /* }}} */
146
147 PHP_INI_BEGIN()
148 STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
149 STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
150 #ifdef HAVE_PCRE_JIT_SUPPORT
151 STD_PHP_INI_ENTRY("pcre.jit", "1", PHP_INI_ALL, OnUpdateBool, jit, zend_pcre_globals, pcre_globals)
152 #endif
PHP_INI_END()153 PHP_INI_END()
154
155
156 /* {{{ PHP_MINFO_FUNCTION(pcre) */
157 static PHP_MINFO_FUNCTION(pcre)
158 {
159 #ifdef HAVE_PCRE_JIT_SUPPORT
160 int jit_yes = 0;
161 #endif
162
163 php_info_print_table_start();
164 php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
165 php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
166
167 #ifdef HAVE_PCRE_JIT_SUPPORT
168 if (!pcre_config(PCRE_CONFIG_JIT, &jit_yes)) {
169 php_info_print_table_row(2, "PCRE JIT Support", jit_yes ? "enabled" : "disabled");
170 } else {
171 php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
172 }
173 #else
174 php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
175 #endif
176
177 php_info_print_table_end();
178
179 DISPLAY_INI_ENTRIES();
180 }
181 /* }}} */
182
183 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTION(pcre)184 static PHP_MINIT_FUNCTION(pcre)
185 {
186 REGISTER_INI_ENTRIES();
187
188 REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
189 REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
190 REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
191 REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
192 REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
193 REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
194 REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
195
196 REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
197 REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
198 REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
199 REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
200 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
201 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
202 REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
203 REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
204
205 return SUCCESS;
206 }
207 /* }}} */
208
209 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTION(pcre)210 static PHP_MSHUTDOWN_FUNCTION(pcre)
211 {
212 UNREGISTER_INI_ENTRIES();
213
214 return SUCCESS;
215 }
216 /* }}} */
217
218 #ifdef HAVE_PCRE_JIT_SUPPORT
219 /* {{{ PHP_RINIT_FUNCTION(pcre) */
PHP_RINIT_FUNCTION(pcre)220 static PHP_RINIT_FUNCTION(pcre)
221 {
222 if (PCRE_G(jit) && jit_stack == NULL) {
223 jit_stack = pcre_jit_stack_alloc(PCRE_JIT_STACK_MIN_SIZE,PCRE_JIT_STACK_MAX_SIZE);
224 }
225
226 return SUCCESS;
227 }
228 /* }}} */
229 #endif
230
231 /* {{{ static pcre_clean_cache */
pcre_clean_cache(zval * data,void * arg)232 static int pcre_clean_cache(zval *data, void *arg)
233 {
234 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
235 int *num_clean = (int *)arg;
236
237 if (*num_clean > 0 && !pce->refcount) {
238 (*num_clean)--;
239 return ZEND_HASH_APPLY_REMOVE;
240 } else {
241 return ZEND_HASH_APPLY_KEEP;
242 }
243 }
244 /* }}} */
245
246 /* {{{ static make_subpats_table */
make_subpats_table(int num_subpats,pcre_cache_entry * pce)247 static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
248 {
249 pcre_extra *extra = pce->extra;
250 int name_cnt = pce->name_count, name_size, ni = 0;
251 int rc;
252 char *name_table;
253 unsigned short name_idx;
254 char **subpat_names;
255 int rc1, rc2;
256
257 rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
258 rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
259 rc = rc2 ? rc2 : rc1;
260 if (rc < 0) {
261 php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
262 return NULL;
263 }
264
265 subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
266 while (ni++ < name_cnt) {
267 name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
268 subpat_names[name_idx] = name_table + 2;
269 if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
270 php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
271 efree(subpat_names);
272 return NULL;
273 }
274 name_table += name_size;
275 }
276 return subpat_names;
277 }
278 /* }}} */
279
280 /* {{{ static calculate_unit_length */
281 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */
calculate_unit_length(pcre_cache_entry * pce,char * start)282 static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start)
283 {
284 int unit_len;
285
286 if (pce->compile_options & PCRE_UTF8) {
287 char *end = start;
288
289 /* skip continuation bytes */
290 while ((*++end & 0xC0) == 0x80);
291 unit_len = end - start;
292 } else {
293 unit_len = 1;
294 }
295 return unit_len;
296 }
297 /* }}} */
298
299 /* {{{ pcre_get_compiled_regex_cache
300 */
pcre_get_compiled_regex_cache(zend_string * regex)301 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
302 {
303 pcre *re = NULL;
304 pcre_extra *extra;
305 int coptions = 0;
306 int soptions = 0;
307 const char *error;
308 int erroffset;
309 char delimiter;
310 char start_delimiter;
311 char end_delimiter;
312 char *p, *pp;
313 char *pattern;
314 int do_study = 0;
315 int poptions = 0;
316 unsigned const char *tables = NULL;
317 pcre_cache_entry *pce;
318 pcre_cache_entry new_entry;
319 int rc;
320 zend_string *key;
321
322 #if HAVE_SETLOCALE
323 if (BG(locale_string) &&
324 (ZSTR_LEN(BG(locale_string)) != 1 && ZSTR_VAL(BG(locale_string))[0] != 'C')) {
325 key = zend_string_alloc(ZSTR_LEN(regex) + ZSTR_LEN(BG(locale_string)) + 1, 0);
326 memcpy(ZSTR_VAL(key), ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)) + 1);
327 memcpy(ZSTR_VAL(key) + ZSTR_LEN(BG(locale_string)), ZSTR_VAL(regex), ZSTR_LEN(regex) + 1);
328 } else
329 #endif
330 {
331 key = regex;
332 }
333
334 /* Try to lookup the cached regex entry, and if successful, just pass
335 back the compiled pattern, otherwise go on and compile it. */
336 pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), key);
337 if (pce) {
338 #if HAVE_SETLOCALE
339 if (key != regex) {
340 zend_string_release(key);
341 }
342 #endif
343 return pce;
344 }
345
346 p = ZSTR_VAL(regex);
347
348 /* Parse through the leading whitespace, and display a warning if we
349 get to the end without encountering a delimiter. */
350 while (isspace((int)*(unsigned char *)p)) p++;
351 if (*p == 0) {
352 #if HAVE_SETLOCALE
353 if (key != regex) {
354 zend_string_release(key);
355 }
356 #endif
357 php_error_docref(NULL, E_WARNING,
358 p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
359 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
360 return NULL;
361 }
362
363 /* Get the delimiter and display a warning if it is alphanumeric
364 or a backslash. */
365 delimiter = *p++;
366 if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
367 #if HAVE_SETLOCALE
368 if (key != regex) {
369 zend_string_release(key);
370 }
371 #endif
372 php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
373 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
374 return NULL;
375 }
376
377 start_delimiter = delimiter;
378 if ((pp = strchr("([{< )]}> )]}>", delimiter)))
379 delimiter = pp[5];
380 end_delimiter = delimiter;
381
382 pp = p;
383
384 if (start_delimiter == end_delimiter) {
385 /* We need to iterate through the pattern, searching for the ending delimiter,
386 but skipping the backslashed delimiters. If the ending delimiter is not
387 found, display a warning. */
388 while (*pp != 0) {
389 if (*pp == '\\' && pp[1] != 0) pp++;
390 else if (*pp == delimiter)
391 break;
392 pp++;
393 }
394 } else {
395 /* We iterate through the pattern, searching for the matching ending
396 * delimiter. For each matching starting delimiter, we increment nesting
397 * level, and decrement it for each matching ending delimiter. If we
398 * reach the end of the pattern without matching, display a warning.
399 */
400 int brackets = 1; /* brackets nesting level */
401 while (*pp != 0) {
402 if (*pp == '\\' && pp[1] != 0) pp++;
403 else if (*pp == end_delimiter && --brackets <= 0)
404 break;
405 else if (*pp == start_delimiter)
406 brackets++;
407 pp++;
408 }
409 }
410
411 if (*pp == 0) {
412 #if HAVE_SETLOCALE
413 if (key != regex) {
414 zend_string_release(key);
415 }
416 #endif
417 if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
418 php_error_docref(NULL,E_WARNING, "Null byte in regex");
419 } else if (start_delimiter == end_delimiter) {
420 php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
421 } else {
422 php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
423 }
424 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
425 return NULL;
426 }
427
428 /* Make a copy of the actual pattern. */
429 pattern = estrndup(p, pp-p);
430
431 /* Move on to the options */
432 pp++;
433
434 /* Parse through the options, setting appropriate flags. Display
435 a warning if we encounter an unknown modifier. */
436 while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
437 switch (*pp++) {
438 /* Perl compatible options */
439 case 'i': coptions |= PCRE_CASELESS; break;
440 case 'm': coptions |= PCRE_MULTILINE; break;
441 case 's': coptions |= PCRE_DOTALL; break;
442 case 'x': coptions |= PCRE_EXTENDED; break;
443
444 /* PCRE specific options */
445 case 'A': coptions |= PCRE_ANCHORED; break;
446 case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break;
447 case 'S': do_study = 1; break;
448 case 'U': coptions |= PCRE_UNGREEDY; break;
449 case 'X': coptions |= PCRE_EXTRA; break;
450 case 'u': coptions |= PCRE_UTF8;
451 /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
452 characters, even in UTF-8 mode. However, this can be changed by setting
453 the PCRE_UCP option. */
454 #ifdef PCRE_UCP
455 coptions |= PCRE_UCP;
456 #endif
457 break;
458 case 'J': coptions |= PCRE_DUPNAMES; break;
459
460 /* Custom preg options */
461 case 'e': poptions |= PREG_REPLACE_EVAL; break;
462
463 case ' ':
464 case '\n':
465 break;
466
467 default:
468 if (pp[-1]) {
469 php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
470 } else {
471 php_error_docref(NULL,E_WARNING, "Null byte in regex");
472 }
473 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
474 efree(pattern);
475 #if HAVE_SETLOCALE
476 if (key != regex) {
477 zend_string_release(key);
478 }
479 #endif
480 return NULL;
481 }
482 }
483
484 #if HAVE_SETLOCALE
485 if (key != regex) {
486 tables = pcre_maketables();
487 }
488 #endif
489
490 /* Compile pattern and display a warning if compilation failed. */
491 re = pcre_compile(pattern,
492 coptions,
493 &error,
494 &erroffset,
495 tables);
496
497 if (re == NULL) {
498 #if HAVE_SETLOCALE
499 if (key != regex) {
500 zend_string_release(key);
501 }
502 #endif
503 php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
504 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
505 efree(pattern);
506 if (tables) {
507 pefree((void*)tables, 1);
508 }
509 return NULL;
510 }
511
512 #ifdef HAVE_PCRE_JIT_SUPPORT
513 if (PCRE_G(jit)) {
514 /* Enable PCRE JIT compiler */
515 do_study = 1;
516 soptions |= PCRE_STUDY_JIT_COMPILE;
517 }
518 #endif
519
520 /* If study option was specified, study the pattern and
521 store the result in extra for passing to pcre_exec. */
522 if (do_study) {
523 extra = pcre_study(re, soptions, &error);
524 if (extra) {
525 extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
526 extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
527 extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
528 #ifdef HAVE_PCRE_JIT_SUPPORT
529 if (PCRE_G(jit) && jit_stack) {
530 pcre_assign_jit_stack(extra, NULL, jit_stack);
531 }
532 #endif
533 }
534 if (error != NULL) {
535 php_error_docref(NULL, E_WARNING, "Error while studying pattern");
536 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
537 }
538 } else {
539 extra = NULL;
540 }
541
542 efree(pattern);
543
544 /*
545 * If we reached cache limit, clean out the items from the head of the list;
546 * these are supposedly the oldest ones (but not necessarily the least used
547 * ones).
548 */
549 if (!pce && zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
550 int num_clean = PCRE_CACHE_SIZE / 8;
551 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
552 }
553
554 /* Store the compiled pattern and extra info in the cache. */
555 new_entry.re = re;
556 new_entry.extra = extra;
557 new_entry.preg_options = poptions;
558 new_entry.compile_options = coptions;
559 #if HAVE_SETLOCALE
560 new_entry.locale = NULL;
561 new_entry.tables = tables;
562 #endif
563 new_entry.refcount = 0;
564
565 rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
566 if (rc < 0) {
567 #if HAVE_SETLOCALE
568 if (key != regex) {
569 zend_string_release(key);
570 }
571 #endif
572 php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
573 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
574 return NULL;
575 }
576
577 rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
578 if (rc < 0) {
579 #if HAVE_SETLOCALE
580 if (key != regex) {
581 zend_string_release(key);
582 }
583 #endif
584 php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
585 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
586 return NULL;
587 }
588
589 /*
590 * Interned strings are not duplicated when stored in HashTable,
591 * but all the interned strings created during HTTP request are removed
592 * at end of request. However PCRE_G(pcre_cache) must be consistent
593 * on the next request as well. So we disable usage of interned strings
594 * as hash keys especually for this table.
595 * See bug #63180
596 */
597 if (!ZSTR_IS_INTERNED(key) || !(GC_FLAGS(key) & IS_STR_PERMANENT)) {
598 pce = zend_hash_str_update_mem(&PCRE_G(pcre_cache),
599 ZSTR_VAL(key), ZSTR_LEN(key), &new_entry, sizeof(pcre_cache_entry));
600 #if HAVE_SETLOCALE
601 if (key != regex) {
602 zend_string_release(key);
603 }
604 #endif
605 } else {
606 pce = zend_hash_update_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
607 }
608
609 return pce;
610 }
611 /* }}} */
612
613 /* {{{ pcre_get_compiled_regex
614 */
pcre_get_compiled_regex(zend_string * regex,pcre_extra ** extra,int * preg_options)615 PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options)
616 {
617 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
618
619 if (extra) {
620 *extra = pce ? pce->extra : NULL;
621 }
622 if (preg_options) {
623 *preg_options = pce ? pce->preg_options : 0;
624 }
625
626 return pce ? pce->re : NULL;
627 }
628 /* }}} */
629
630 /* {{{ pcre_get_compiled_regex_ex
631 */
pcre_get_compiled_regex_ex(zend_string * regex,pcre_extra ** extra,int * preg_options,int * compile_options)632 PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options)
633 {
634 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
635
636 if (extra) {
637 *extra = pce ? pce->extra : NULL;
638 }
639 if (preg_options) {
640 *preg_options = pce ? pce->preg_options : 0;
641 }
642 if (compile_options) {
643 *compile_options = pce ? pce->compile_options : 0;
644 }
645
646 return pce ? pce->re : NULL;
647 }
648 /* }}} */
649
650 /* {{{ add_offset_pair */
add_offset_pair(zval * result,char * str,int len,int offset,char * name)651 static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
652 {
653 zval match_pair, tmp;
654
655 array_init_size(&match_pair, 2);
656
657 /* Add (match, offset) to the return value */
658 ZVAL_STRINGL(&tmp, str, len);
659 zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
660 ZVAL_LONG(&tmp, offset);
661 zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
662
663 if (name) {
664 Z_ADDREF(match_pair);
665 zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
666 }
667 zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
668 }
669 /* }}} */
670
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS,int global)671 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
672 {
673 /* parameters */
674 zend_string *regex; /* Regular expression */
675 zend_string *subject; /* String to match against */
676 pcre_cache_entry *pce; /* Compiled regular expression */
677 zval *subpats = NULL; /* Array for subpatterns */
678 zend_long flags = 0; /* Match control flags */
679 zend_long start_offset = 0; /* Where the new search starts */
680
681 ZEND_PARSE_PARAMETERS_START(2, 5)
682 Z_PARAM_STR(regex)
683 Z_PARAM_STR(subject)
684 Z_PARAM_OPTIONAL
685 Z_PARAM_ZVAL_EX(subpats, 0, 1)
686 Z_PARAM_LONG(flags)
687 Z_PARAM_LONG(start_offset)
688 ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
689
690 if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
691 php_error_docref(NULL, E_WARNING, "Subject is too long");
692 RETURN_FALSE;
693 }
694
695 /* Compile regex or get it from cache. */
696 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
697 RETURN_FALSE;
698 }
699
700 pce->refcount++;
701 php_pcre_match_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, subpats,
702 global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
703 pce->refcount--;
704 }
705 /* }}} */
706
707 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry * pce,char * subject,int subject_len,zval * return_value,zval * subpats,int global,int use_flags,zend_long flags,zend_long start_offset)708 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
709 zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset)
710 {
711 zval result_set, /* Holds a set of subpatterns after
712 a global match */
713 *match_sets = NULL; /* An array of sets of matches for each
714 subpattern after a global match */
715 pcre_extra *extra = pce->extra;/* Holds results of studying */
716 pcre_extra extra_data; /* Used locally for exec options */
717 int exoptions = 0; /* Execution options */
718 int count = 0; /* Count of matched subpatterns */
719 int *offsets; /* Array of subpattern offsets */
720 int num_subpats; /* Number of captured subpatterns */
721 int size_offsets; /* Size of the offsets array */
722 int matched; /* Has anything matched */
723 int g_notempty = 0; /* If the match should not be empty */
724 const char **stringlist; /* Holds list of subpatterns */
725 char **subpat_names; /* Array for named subpatterns */
726 int i;
727 int subpats_order; /* Order of subpattern matches */
728 int offset_capture; /* Capture match offsets: yes/no */
729 unsigned char *mark = NULL; /* Target for MARK name */
730 zval marks; /* Array of marks for PREG_PATTERN_ORDER */
731 ALLOCA_FLAG(use_heap);
732
733 ZVAL_UNDEF(&marks);
734
735 /* Overwrite the passed-in value for subpatterns with an empty array. */
736 if (subpats != NULL) {
737 zval_ptr_dtor(subpats);
738 array_init(subpats);
739 }
740
741 subpats_order = global ? PREG_PATTERN_ORDER : 0;
742
743 if (use_flags) {
744 offset_capture = flags & PREG_OFFSET_CAPTURE;
745
746 /*
747 * subpats_order is pre-set to pattern mode so we change it only if
748 * necessary.
749 */
750 if (flags & 0xff) {
751 subpats_order = flags & 0xff;
752 }
753 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
754 (!global && subpats_order != 0)) {
755 php_error_docref(NULL, E_WARNING, "Invalid flags specified");
756 return;
757 }
758 } else {
759 offset_capture = 0;
760 }
761
762 /* Negative offset counts from the end of the string. */
763 if (start_offset < 0) {
764 start_offset = subject_len + start_offset;
765 if (start_offset < 0) {
766 start_offset = 0;
767 }
768 }
769
770 if (extra == NULL) {
771 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
772 extra = &extra_data;
773 }
774 extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
775 extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
776 #ifdef PCRE_EXTRA_MARK
777 extra->mark = &mark;
778 extra->flags |= PCRE_EXTRA_MARK;
779 #endif
780
781 /* Calculate the size of the offsets array, and allocate memory for it. */
782 num_subpats = pce->capture_count + 1;
783 size_offsets = num_subpats * 3;
784
785 /*
786 * Build a mapping from subpattern numbers to their names. We will
787 * allocate the table only if there are any named subpatterns.
788 */
789 subpat_names = NULL;
790 if (pce->name_count > 0) {
791 subpat_names = make_subpats_table(num_subpats, pce);
792 if (!subpat_names) {
793 RETURN_FALSE;
794 }
795 }
796
797 if (size_offsets <= 32) {
798 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
799 } else {
800 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
801 }
802 memset(offsets, 0, size_offsets*sizeof(int));
803 /* Allocate match sets array and initialize the values. */
804 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
805 match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
806 for (i=0; i<num_subpats; i++) {
807 array_init(&match_sets[i]);
808 }
809 }
810
811 matched = 0;
812 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
813
814 do {
815 /* Execute the regular expression. */
816 count = pcre_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
817 exoptions|g_notempty, offsets, size_offsets);
818
819 /* the string was already proved to be valid UTF-8 */
820 exoptions |= PCRE_NO_UTF8_CHECK;
821
822 /* Check for too many substrings condition. */
823 if (count == 0) {
824 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
825 count = size_offsets/3;
826 }
827
828 /* If something has matched */
829 if (count > 0) {
830 matched++;
831
832 /* If subpatterns array has been passed, fill it in with values. */
833 if (subpats != NULL) {
834 /* Try to get the list of substrings and display a warning if failed. */
835 if ((offsets[1] - offsets[0] < 0) || pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
836 if (subpat_names) {
837 efree(subpat_names);
838 }
839 if (size_offsets <= 32) {
840 free_alloca(offsets, use_heap);
841 } else {
842 efree(offsets);
843 }
844 if (match_sets) efree(match_sets);
845 php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
846 RETURN_FALSE;
847 }
848
849 if (global) { /* global pattern matching */
850 if (subpats && subpats_order == PREG_PATTERN_ORDER) {
851 /* For each subpattern, insert it into the appropriate array. */
852 if (offset_capture) {
853 for (i = 0; i < count; i++) {
854 add_offset_pair(&match_sets[i], (char *)stringlist[i],
855 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
856 }
857 } else {
858 for (i = 0; i < count; i++) {
859 add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
860 offsets[(i<<1)+1] - offsets[i<<1]);
861 }
862 }
863 /* Add MARK, if available */
864 if (mark) {
865 if (Z_TYPE(marks) == IS_UNDEF) {
866 array_init(&marks);
867 }
868 add_index_string(&marks, matched - 1, (char *) mark);
869 }
870 /*
871 * If the number of captured subpatterns on this run is
872 * less than the total possible number, pad the result
873 * arrays with empty strings.
874 */
875 if (count < num_subpats) {
876 for (; i < num_subpats; i++) {
877 add_next_index_string(&match_sets[i], "");
878 }
879 }
880 } else {
881 /* Allocate the result set array */
882 array_init_size(&result_set, count + (mark ? 1 : 0));
883
884 /* Add all the subpatterns to it */
885 if (subpat_names) {
886 if (offset_capture) {
887 for (i = 0; i < count; i++) {
888 add_offset_pair(&result_set, (char *)stringlist[i],
889 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
890 }
891 } else {
892 for (i = 0; i < count; i++) {
893 if (subpat_names[i]) {
894 add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
895 offsets[(i<<1)+1] - offsets[i<<1]);
896 }
897 add_next_index_stringl(&result_set, (char *)stringlist[i],
898 offsets[(i<<1)+1] - offsets[i<<1]);
899 }
900 }
901 } else {
902 if (offset_capture) {
903 for (i = 0; i < count; i++) {
904 add_offset_pair(&result_set, (char *)stringlist[i],
905 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
906 }
907 } else {
908 for (i = 0; i < count; i++) {
909 add_next_index_stringl(&result_set, (char *)stringlist[i],
910 offsets[(i<<1)+1] - offsets[i<<1]);
911 }
912 }
913 }
914 /* Add MARK, if available */
915 if (mark) {
916 add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
917 }
918 /* And add it to the output array */
919 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
920 }
921 } else { /* single pattern matching */
922 /* For each subpattern, insert it into the subpatterns array. */
923 if (subpat_names) {
924 if (offset_capture) {
925 for (i = 0; i < count; i++) {
926 add_offset_pair(subpats, (char *)stringlist[i],
927 offsets[(i<<1)+1] - offsets[i<<1],
928 offsets[i<<1], subpat_names[i]);
929 }
930 } else {
931 for (i = 0; i < count; i++) {
932 if (subpat_names[i]) {
933 add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
934 offsets[(i<<1)+1] - offsets[i<<1]);
935 }
936 add_next_index_stringl(subpats, (char *)stringlist[i],
937 offsets[(i<<1)+1] - offsets[i<<1]);
938 }
939 }
940 } else {
941 if (offset_capture) {
942 for (i = 0; i < count; i++) {
943 add_offset_pair(subpats, (char *)stringlist[i],
944 offsets[(i<<1)+1] - offsets[i<<1],
945 offsets[i<<1], NULL);
946 }
947 } else {
948 for (i = 0; i < count; i++) {
949 add_next_index_stringl(subpats, (char *)stringlist[i],
950 offsets[(i<<1)+1] - offsets[i<<1]);
951 }
952 }
953 }
954 /* Add MARK, if available */
955 if (mark) {
956 add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
957 }
958 }
959
960 pcre_free((void *) stringlist);
961 }
962 } else if (count == PCRE_ERROR_NOMATCH) {
963 /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
964 this is not necessarily the end. We need to advance
965 the start offset, and continue. Fudge the offset values
966 to achieve this, unless we're already at the end of the string. */
967 if (g_notempty != 0 && start_offset < subject_len) {
968 int unit_len = calculate_unit_length(pce, subject + start_offset);
969
970 offsets[0] = (int)start_offset;
971 offsets[1] = (int)(start_offset + unit_len);
972 } else
973 break;
974 } else {
975 pcre_handle_exec_error(count);
976 break;
977 }
978
979 /* If we have matched an empty string, mimic what Perl's /g options does.
980 This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
981 the match again at the same point. If this fails (picked up above) we
982 advance to the next character. */
983 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
984
985 /* Advance to the position right after the last full match */
986 start_offset = offsets[1];
987 } while (global);
988
989 /* Add the match sets to the output array and clean up */
990 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
991 if (subpat_names) {
992 for (i = 0; i < num_subpats; i++) {
993 if (subpat_names[i]) {
994 zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
995 strlen(subpat_names[i]), &match_sets[i]);
996 Z_ADDREF(match_sets[i]);
997 }
998 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
999 }
1000 } else {
1001 for (i = 0; i < num_subpats; i++) {
1002 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1003 }
1004 }
1005 efree(match_sets);
1006
1007 if (Z_TYPE(marks) != IS_UNDEF) {
1008 add_assoc_zval(subpats, "MARK", &marks);
1009 }
1010 }
1011
1012 if (size_offsets <= 32) {
1013 free_alloca(offsets, use_heap);
1014 } else {
1015 efree(offsets);
1016 }
1017 if (subpat_names) {
1018 efree(subpat_names);
1019 }
1020
1021 /* Did we encounter an error? */
1022 if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1023 RETVAL_LONG(matched);
1024 } else {
1025 RETVAL_FALSE;
1026 }
1027 }
1028 /* }}} */
1029
1030 /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1031 Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)1032 static PHP_FUNCTION(preg_match)
1033 {
1034 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1035 }
1036 /* }}} */
1037
1038 /* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1039 Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)1040 static PHP_FUNCTION(preg_match_all)
1041 {
1042 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1043 }
1044 /* }}} */
1045
1046 /* {{{ preg_get_backref
1047 */
preg_get_backref(char ** str,int * backref)1048 static int preg_get_backref(char **str, int *backref)
1049 {
1050 register char in_brace = 0;
1051 register char *walk = *str;
1052
1053 if (walk[1] == 0)
1054 return 0;
1055
1056 if (*walk == '$' && walk[1] == '{') {
1057 in_brace = 1;
1058 walk++;
1059 }
1060 walk++;
1061
1062 if (*walk >= '0' && *walk <= '9') {
1063 *backref = *walk - '0';
1064 walk++;
1065 } else
1066 return 0;
1067
1068 if (*walk && *walk >= '0' && *walk <= '9') {
1069 *backref = *backref * 10 + *walk - '0';
1070 walk++;
1071 }
1072
1073 if (in_brace) {
1074 if (*walk == 0 || *walk != '}')
1075 return 0;
1076 else
1077 walk++;
1078 }
1079
1080 *str = walk;
1081 return 1;
1082 }
1083 /* }}} */
1084
1085 /* {{{ preg_do_repl_func
1086 */
preg_do_repl_func(zval * function,char * subject,int * offsets,char ** subpat_names,int count,unsigned char * mark)1087 static zend_string *preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark)
1088 {
1089 zend_string *result_str;
1090 zval retval; /* Function return value */
1091 zval args[1]; /* Argument to pass to function */
1092 int i;
1093
1094 array_init_size(&args[0], count + (mark ? 1 : 0));
1095 if (subpat_names) {
1096 for (i = 0; i < count; i++) {
1097 if (subpat_names[i]) {
1098 add_assoc_stringl(&args[0], subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
1099 }
1100 add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
1101 }
1102 } else {
1103 for (i = 0; i < count; i++) {
1104 add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
1105 }
1106 }
1107 if (mark) {
1108 add_assoc_string(&args[0], "MARK", (char *) mark);
1109 }
1110
1111 if (call_user_function_ex(EG(function_table), NULL, function, &retval, 1, args, 0, NULL) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1112 result_str = zval_get_string(&retval);
1113 zval_ptr_dtor(&retval);
1114 } else {
1115 if (!EG(exception)) {
1116 php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1117 }
1118
1119 result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1120 }
1121
1122 zval_ptr_dtor(&args[0]);
1123
1124 return result_str;
1125 }
1126 /* }}} */
1127
1128 /* {{{ php_pcre_replace
1129 */
php_pcre_replace(zend_string * regex,zend_string * subject_str,char * subject,int subject_len,zval * replace_val,int is_callable_replace,int limit,int * replace_count)1130 PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1131 zend_string *subject_str,
1132 char *subject, int subject_len,
1133 zval *replace_val, int is_callable_replace,
1134 int limit, int *replace_count)
1135 {
1136 pcre_cache_entry *pce; /* Compiled regular expression */
1137 zend_string *result; /* Function result */
1138
1139 /* Compile regex or get it from cache. */
1140 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1141 return NULL;
1142 }
1143 pce->refcount++;
1144 result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_val,
1145 is_callable_replace, limit, replace_count);
1146 pce->refcount--;
1147
1148 return result;
1149 }
1150 /* }}} */
1151
1152 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry * pce,zend_string * subject_str,char * subject,int subject_len,zval * replace_val,int is_callable_replace,int limit,int * replace_count)1153 PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zval *replace_val, int is_callable_replace, int limit, int *replace_count)
1154 {
1155 pcre_extra *extra = pce->extra;/* Holds results of studying */
1156 pcre_extra extra_data; /* Used locally for exec options */
1157 int exoptions = 0; /* Execution options */
1158 int count = 0; /* Count of matched subpatterns */
1159 int *offsets; /* Array of subpattern offsets */
1160 char **subpat_names; /* Array for named subpatterns */
1161 int num_subpats; /* Number of captured subpatterns */
1162 int size_offsets; /* Size of the offsets array */
1163 size_t new_len; /* Length of needed storage */
1164 size_t alloc_len; /* Actual allocated length */
1165 int match_len; /* Length of the current match */
1166 int backref; /* Backreference number */
1167 int start_offset; /* Where the new search starts */
1168 int g_notempty=0; /* If the match should not be empty */
1169 char *replace=NULL, /* Replacement string */
1170 *walkbuf, /* Location of current replacement in the result */
1171 *walk, /* Used to walk the replacement string */
1172 *match, /* The current match */
1173 *piece, /* The current piece of subject */
1174 *replace_end=NULL, /* End of replacement string */
1175 walk_last; /* Last walked character */
1176 size_t result_len; /* Length of result */
1177 unsigned char *mark = NULL; /* Target for MARK name */
1178 zend_string *result; /* Result of replacement */
1179 zend_string *eval_result=NULL; /* Result of custom function */
1180
1181 ALLOCA_FLAG(use_heap);
1182
1183 if (extra == NULL) {
1184 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1185 extra = &extra_data;
1186 }
1187
1188 extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1189 extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1190
1191 if (UNEXPECTED(pce->preg_options & PREG_REPLACE_EVAL)) {
1192 php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
1193 return NULL;
1194 }
1195
1196 if (!is_callable_replace) {
1197 replace = Z_STRVAL_P(replace_val);
1198 replace_end = replace + Z_STRLEN_P(replace_val);
1199 }
1200
1201 /* Calculate the size of the offsets array, and allocate memory for it. */
1202 num_subpats = pce->capture_count + 1;
1203 size_offsets = num_subpats * 3;
1204 if (size_offsets <= 32) {
1205 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1206 } else {
1207 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1208 }
1209
1210 /*
1211 * Build a mapping from subpattern numbers to their names. We will
1212 * allocate the table only if there are any named subpatterns.
1213 */
1214 subpat_names = NULL;
1215 if (UNEXPECTED(pce->name_count > 0)) {
1216 subpat_names = make_subpats_table(num_subpats, pce);
1217 if (!subpat_names) {
1218 if (size_offsets <= 32) {
1219 free_alloca(offsets, use_heap);
1220 } else {
1221 efree(offsets);
1222 }
1223 return NULL;
1224 }
1225 }
1226
1227 alloc_len = 0;
1228 result = NULL;
1229
1230 /* Initialize */
1231 match = NULL;
1232 start_offset = 0;
1233 result_len = 0;
1234 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1235
1236 while (1) {
1237 #ifdef PCRE_EXTRA_MARK
1238 extra->mark = &mark;
1239 extra->flags |= PCRE_EXTRA_MARK;
1240 #endif
1241 /* Execute the regular expression. */
1242 count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1243 exoptions|g_notempty, offsets, size_offsets);
1244
1245 /* the string was already proved to be valid UTF-8 */
1246 exoptions |= PCRE_NO_UTF8_CHECK;
1247
1248 /* Check for too many substrings condition. */
1249 if (UNEXPECTED(count == 0)) {
1250 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1251 count = size_offsets / 3;
1252 }
1253
1254 piece = subject + start_offset;
1255
1256 /* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */
1257 if (EXPECTED(count > 0 && (offsets[1] - offsets[0] >= 0) && limit)) {
1258 if (UNEXPECTED(replace_count)) {
1259 ++*replace_count;
1260 }
1261
1262 /* Set the match location in subject */
1263 match = subject + offsets[0];
1264
1265 new_len = result_len + offsets[0] - start_offset; /* part before the match */
1266
1267 /* if (!is_callable_replace) */
1268 if (EXPECTED(replace)) {
1269 /* do regular substitution */
1270 walk = replace;
1271 walk_last = 0;
1272
1273 while (walk < replace_end) {
1274 if ('\\' == *walk || '$' == *walk) {
1275 if (walk_last == '\\') {
1276 walk++;
1277 walk_last = 0;
1278 continue;
1279 }
1280 if (preg_get_backref(&walk, &backref)) {
1281 if (backref < count)
1282 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1283 continue;
1284 }
1285 }
1286 new_len++;
1287 walk++;
1288 walk_last = walk[-1];
1289 }
1290
1291 if (new_len >= alloc_len) {
1292 alloc_len = zend_safe_address_guarded(2, new_len, alloc_len);
1293 if (result == NULL) {
1294 result = zend_string_alloc(alloc_len, 0);
1295 } else {
1296 result = zend_string_extend(result, alloc_len, 0);
1297 }
1298 }
1299
1300 /* copy the part of the string before the match */
1301 memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1302 result_len += (match-piece);
1303
1304 /* copy replacement and backrefs */
1305 walkbuf = ZSTR_VAL(result) + result_len;
1306
1307 walk = replace;
1308 walk_last = 0;
1309 while (walk < replace_end) {
1310 if ('\\' == *walk || '$' == *walk) {
1311 if (walk_last == '\\') {
1312 *(walkbuf-1) = *walk++;
1313 walk_last = 0;
1314 continue;
1315 }
1316 if (preg_get_backref(&walk, &backref)) {
1317 if (backref < count) {
1318 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1319 memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1320 walkbuf += match_len;
1321 }
1322 continue;
1323 }
1324 }
1325 *walkbuf++ = *walk++;
1326 walk_last = walk[-1];
1327 }
1328 *walkbuf = '\0';
1329 /* increment the result length by how much we've added to the string */
1330 result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1331 } else {
1332 /* Use custom function to get replacement string and its length. */
1333 eval_result = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark);
1334 ZEND_ASSERT(eval_result);
1335 new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result), new_len);
1336 if (new_len >= alloc_len) {
1337 alloc_len = zend_safe_address_guarded(2, new_len, alloc_len);
1338 if (result == NULL) {
1339 result = zend_string_alloc(alloc_len, 0);
1340 } else {
1341 result = zend_string_extend(result, alloc_len, 0);
1342 }
1343 }
1344 /* copy the part of the string before the match */
1345 memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1346 result_len += (int)(match-piece);
1347
1348 /* copy replacement and backrefs */
1349 walkbuf = ZSTR_VAL(result) + result_len;
1350
1351 /* If using custom function, copy result to the buffer and clean up. */
1352 memcpy(walkbuf, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1353 result_len += (int)ZSTR_LEN(eval_result);
1354 zend_string_release(eval_result);
1355 }
1356
1357 if (EXPECTED(limit)) {
1358 limit--;
1359 }
1360 } else if (count == PCRE_ERROR_NOMATCH || UNEXPECTED(limit == 0)) {
1361 /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
1362 this is not necessarily the end. We need to advance
1363 the start offset, and continue. Fudge the offset values
1364 to achieve this, unless we're already at the end of the string. */
1365 if (g_notempty != 0 && start_offset < subject_len) {
1366 int unit_len = calculate_unit_length(pce, piece);
1367
1368 offsets[0] = start_offset;
1369 offsets[1] = start_offset + unit_len;
1370 memcpy(ZSTR_VAL(result) + result_len, piece, unit_len);
1371 result_len += unit_len;
1372 } else {
1373 if (!result && subject_str) {
1374 result = zend_string_copy(subject_str);
1375 break;
1376 }
1377 new_len = result_len + subject_len - start_offset;
1378 if (new_len >= alloc_len) {
1379 alloc_len = new_len; /* now we know exactly how long it is */
1380 if (NULL != result) {
1381 result = zend_string_realloc(result, alloc_len, 0);
1382 } else {
1383 result = zend_string_alloc(alloc_len, 0);
1384 }
1385 }
1386 /* stick that last bit of string on our output */
1387 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - start_offset);
1388 result_len += subject_len - start_offset;
1389 ZSTR_VAL(result)[result_len] = '\0';
1390 ZSTR_LEN(result) = result_len;
1391 break;
1392 }
1393 } else {
1394 pcre_handle_exec_error(count);
1395 if (result) {
1396 zend_string_free(result);
1397 result = NULL;
1398 }
1399 break;
1400 }
1401
1402 /* If we have matched an empty string, mimic what Perl's /g options does.
1403 This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
1404 the match again at the same point. If this fails (picked up above) we
1405 advance to the next character. */
1406 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
1407
1408 /* Advance to the next piece. */
1409 start_offset = offsets[1];
1410 }
1411
1412 if (size_offsets <= 32) {
1413 free_alloca(offsets, use_heap);
1414 } else {
1415 efree(offsets);
1416 }
1417 if (UNEXPECTED(subpat_names)) {
1418 efree(subpat_names);
1419 }
1420
1421 return result;
1422 }
1423 /* }}} */
1424
1425 /* {{{ php_replace_in_subject
1426 */
php_replace_in_subject(zval * regex,zval * replace,zval * subject,int limit,int is_callable_replace,int * replace_count)1427 static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int is_callable_replace, int *replace_count)
1428 {
1429 zval *regex_entry,
1430 *replace_value,
1431 empty_replace;
1432 zend_string *result;
1433 uint32_t replace_idx;
1434 zend_string *subject_str = zval_get_string(subject);
1435
1436 /* FIXME: This might need to be changed to ZSTR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1437 ZVAL_EMPTY_STRING(&empty_replace);
1438
1439 if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject_str))) {
1440 php_error_docref(NULL, E_WARNING, "Subject is too long");
1441 return NULL;
1442 }
1443
1444 /* If regex is an array */
1445 if (Z_TYPE_P(regex) == IS_ARRAY) {
1446 replace_value = replace;
1447 replace_idx = 0;
1448
1449 /* For each entry in the regex array, get the entry */
1450 ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
1451 zval replace_str;
1452 /* Make sure we're dealing with strings. */
1453 zend_string *regex_str = zval_get_string(regex_entry);
1454
1455 ZVAL_UNDEF(&replace_str);
1456 /* If replace is an array and not a callable construct */
1457 if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1458 /* Get current entry */
1459 while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) {
1460 if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNDEF) {
1461 ZVAL_COPY(&replace_str, &Z_ARRVAL_P(replace)->arData[replace_idx].val);
1462 break;
1463 }
1464 replace_idx++;
1465 }
1466 if (!Z_ISUNDEF(replace_str)) {
1467 if (!is_callable_replace) {
1468 convert_to_string(&replace_str);
1469 }
1470 replace_value = &replace_str;
1471 replace_idx++;
1472 } else {
1473 /* We've run out of replacement strings, so use an empty one */
1474 replace_value = &empty_replace;
1475 }
1476 }
1477
1478 /* Do the actual replacement and put the result back into subject_str
1479 for further replacements. */
1480 if ((result = php_pcre_replace(regex_str,
1481 subject_str,
1482 ZSTR_VAL(subject_str),
1483 (int)ZSTR_LEN(subject_str),
1484 replace_value,
1485 is_callable_replace,
1486 limit,
1487 replace_count)) != NULL) {
1488 zend_string_release(subject_str);
1489 subject_str = result;
1490 } else {
1491 zend_string_release(subject_str);
1492 zend_string_release(regex_str);
1493 zval_dtor(&replace_str);
1494 return NULL;
1495 }
1496
1497 zend_string_release(regex_str);
1498 zval_dtor(&replace_str);
1499 } ZEND_HASH_FOREACH_END();
1500
1501 return subject_str;
1502 } else {
1503 result = php_pcre_replace(Z_STR_P(regex),
1504 subject_str,
1505 ZSTR_VAL(subject_str),
1506 (int)ZSTR_LEN(subject_str),
1507 replace,
1508 is_callable_replace,
1509 limit,
1510 replace_count);
1511 zend_string_release(subject_str);
1512 return result;
1513 }
1514 }
1515 /* }}} */
1516
1517 /* {{{ preg_replace_impl
1518 */
preg_replace_impl(zval * return_value,zval * regex,zval * replace,zval * subject,zend_long limit_val,int is_callable_replace,int is_filter)1519 static int preg_replace_impl(zval *return_value, zval *regex, zval *replace, zval *subject, zend_long limit_val, int is_callable_replace, int is_filter)
1520 {
1521 zval *subject_entry;
1522 zend_string *result;
1523 zend_string *string_key;
1524 zend_ulong num_key;
1525 int replace_count = 0, old_replace_count;
1526
1527 if (Z_TYPE_P(replace) != IS_ARRAY && (Z_TYPE_P(replace) != IS_OBJECT || !is_callable_replace)) {
1528 convert_to_string_ex(replace);
1529 }
1530
1531 if (Z_TYPE_P(regex) != IS_ARRAY) {
1532 convert_to_string_ex(regex);
1533 }
1534
1535 /* if subject is an array */
1536 if (Z_TYPE_P(subject) == IS_ARRAY) {
1537 array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
1538
1539 /* For each subject entry, convert it to string, then perform replacement
1540 and add the result to the return_value array. */
1541 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
1542 old_replace_count = replace_count;
1543 if ((result = php_replace_in_subject(regex, replace, subject_entry, limit_val, is_callable_replace, &replace_count)) != NULL) {
1544 if (!is_filter || replace_count > old_replace_count) {
1545 /* Add to return array */
1546 zval zv;
1547
1548 ZVAL_STR(&zv, result);
1549 if (string_key) {
1550 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
1551 } else {
1552 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
1553 }
1554 } else {
1555 zend_string_release(result);
1556 }
1557 }
1558 } ZEND_HASH_FOREACH_END();
1559 } else {
1560 /* if subject is not an array */
1561 old_replace_count = replace_count;
1562 if ((result = php_replace_in_subject(regex, replace, subject, limit_val, is_callable_replace, &replace_count)) != NULL) {
1563 if (!is_filter || replace_count > old_replace_count) {
1564 RETVAL_STR(result);
1565 } else {
1566 zend_string_release(result);
1567 RETVAL_NULL();
1568 }
1569 } else {
1570 RETVAL_NULL();
1571 }
1572 }
1573
1574 return replace_count;
1575 }
1576 /* }}} */
1577
1578 /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1579 Perform Perl-style regular expression replacement. */
PHP_FUNCTION(preg_replace)1580 static PHP_FUNCTION(preg_replace)
1581 {
1582 zval *regex, *replace, *subject, *zcount = NULL;
1583 zend_long limit = -1;
1584 int replace_count;
1585
1586 /* Get function parameters and do error-checking. */
1587 ZEND_PARSE_PARAMETERS_START(3, 5)
1588 Z_PARAM_ZVAL(regex)
1589 Z_PARAM_ZVAL(replace)
1590 Z_PARAM_ZVAL(subject)
1591 Z_PARAM_OPTIONAL
1592 Z_PARAM_LONG(limit)
1593 Z_PARAM_ZVAL_EX(zcount, 0, 1)
1594 ZEND_PARSE_PARAMETERS_END();
1595
1596 if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1597 php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1598 RETURN_FALSE;
1599 }
1600
1601 replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 0);
1602 if (zcount) {
1603 zval_ptr_dtor(zcount);
1604 ZVAL_LONG(zcount, replace_count);
1605 }
1606 }
1607 /* }}} */
1608
1609 /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
1610 Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback)1611 static PHP_FUNCTION(preg_replace_callback)
1612 {
1613 zval *regex, *replace, *subject, *zcount = NULL;
1614 zend_long limit = -1;
1615 zend_string *callback_name;
1616 int replace_count;
1617
1618 /* Get function parameters and do error-checking. */
1619 ZEND_PARSE_PARAMETERS_START(3, 5)
1620 Z_PARAM_ZVAL(regex)
1621 Z_PARAM_ZVAL(replace)
1622 Z_PARAM_ZVAL(subject)
1623 Z_PARAM_OPTIONAL
1624 Z_PARAM_LONG(limit)
1625 Z_PARAM_ZVAL_EX(zcount, 0, 1)
1626 ZEND_PARSE_PARAMETERS_END();
1627
1628 if (!zend_is_callable(replace, 0, &callback_name)) {
1629 php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", ZSTR_VAL(callback_name));
1630 zend_string_release(callback_name);
1631 ZVAL_COPY(return_value, subject);
1632 return;
1633 }
1634 zend_string_release(callback_name);
1635
1636 replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 1, 0);
1637 if (zcount) {
1638 zval_ptr_dtor(zcount);
1639 ZVAL_LONG(zcount, replace_count);
1640 }
1641 }
1642 /* }}} */
1643
1644 /* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]])
1645 Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback_array)1646 static PHP_FUNCTION(preg_replace_callback_array)
1647 {
1648 zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
1649 zend_long limit = -1;
1650 zend_string *str_idx;
1651 zend_string *callback_name;
1652 int replace_count = 0;
1653
1654 /* Get function parameters and do error-checking. */
1655 ZEND_PARSE_PARAMETERS_START(2, 4)
1656 Z_PARAM_ARRAY(pattern)
1657 Z_PARAM_ZVAL(subject)
1658 Z_PARAM_OPTIONAL
1659 Z_PARAM_LONG(limit)
1660 Z_PARAM_ZVAL_EX(zcount, 0, 1)
1661 ZEND_PARSE_PARAMETERS_END();
1662
1663 ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(pattern), str_idx, replace) {
1664 if (str_idx) {
1665 ZVAL_STR_COPY(®ex, str_idx);
1666 } else {
1667 php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
1668 zval_ptr_dtor(return_value);
1669 RETURN_NULL();
1670 }
1671
1672 if (!zend_is_callable(replace, 0, &callback_name)) {
1673 php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", ZSTR_VAL(callback_name));
1674 zend_string_release(callback_name);
1675 zval_ptr_dtor(®ex);
1676 zval_ptr_dtor(return_value);
1677 ZVAL_COPY(return_value, subject);
1678 return;
1679 }
1680 zend_string_release(callback_name);
1681
1682 if (Z_ISNULL_P(return_value)) {
1683 replace_count += preg_replace_impl(&zv, ®ex, replace, subject, limit, 1, 0);
1684 } else {
1685 replace_count += preg_replace_impl(&zv, ®ex, replace, return_value, limit, 1, 0);
1686 zval_ptr_dtor(return_value);
1687 }
1688
1689 zval_ptr_dtor(®ex);
1690
1691 ZVAL_COPY_VALUE(return_value, &zv);
1692
1693 if (UNEXPECTED(EG(exception))) {
1694 zval_ptr_dtor(return_value);
1695 RETURN_NULL();
1696 }
1697 } ZEND_HASH_FOREACH_END();
1698
1699 if (zcount) {
1700 zval_ptr_dtor(zcount);
1701 ZVAL_LONG(zcount, replace_count);
1702 }
1703 }
1704 /* }}} */
1705
1706 /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1707 Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTION(preg_filter)1708 static PHP_FUNCTION(preg_filter)
1709 {
1710 zval *regex, *replace, *subject, *zcount = NULL;
1711 zend_long limit = -1;
1712 int replace_count;
1713
1714 /* Get function parameters and do error-checking. */
1715 ZEND_PARSE_PARAMETERS_START(3, 5)
1716 Z_PARAM_ZVAL(regex)
1717 Z_PARAM_ZVAL(replace)
1718 Z_PARAM_ZVAL(subject)
1719 Z_PARAM_OPTIONAL
1720 Z_PARAM_LONG(limit)
1721 Z_PARAM_ZVAL_EX(zcount, 0, 1)
1722 ZEND_PARSE_PARAMETERS_END();
1723
1724 if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1725 php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1726 RETURN_FALSE;
1727 }
1728
1729 replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 1);
1730 if (zcount) {
1731 zval_ptr_dtor(zcount);
1732 ZVAL_LONG(zcount, replace_count);
1733 }
1734 }
1735 /* }}} */
1736
1737 /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1738 Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)1739 static PHP_FUNCTION(preg_split)
1740 {
1741 zend_string *regex; /* Regular expression */
1742 zend_string *subject; /* String to match against */
1743 zend_long limit_val = -1;/* Integer value of limit */
1744 zend_long flags = 0; /* Match control flags */
1745 pcre_cache_entry *pce; /* Compiled regular expression */
1746
1747 /* Get function parameters and do error checking */
1748 ZEND_PARSE_PARAMETERS_START(2, 4)
1749 Z_PARAM_STR(regex)
1750 Z_PARAM_STR(subject)
1751 Z_PARAM_OPTIONAL
1752 Z_PARAM_LONG(limit_val)
1753 Z_PARAM_LONG(flags)
1754 ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
1755
1756 if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
1757 php_error_docref(NULL, E_WARNING, "Subject is too long");
1758 RETURN_FALSE;
1759 }
1760
1761 /* Compile regex or get it from cache. */
1762 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1763 RETURN_FALSE;
1764 }
1765
1766 pce->refcount++;
1767 php_pcre_split_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, (int)limit_val, flags);
1768 pce->refcount--;
1769 }
1770 /* }}} */
1771
1772 /* {{{ php_pcre_split
1773 */
php_pcre_split_impl(pcre_cache_entry * pce,char * subject,int subject_len,zval * return_value,zend_long limit_val,zend_long flags)1774 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1775 zend_long limit_val, zend_long flags)
1776 {
1777 pcre_extra *extra = pce->extra;/* Holds results of studying */
1778 pcre_extra extra_data; /* Used locally for exec options */
1779 int *offsets; /* Array of subpattern offsets */
1780 int size_offsets; /* Size of the offsets array */
1781 int exoptions = 0; /* Execution options */
1782 int count = 0; /* Count of matched subpatterns */
1783 int start_offset; /* Where the new search starts */
1784 int next_offset; /* End of the last delimiter match + 1 */
1785 int g_notempty = 0; /* If the match should not be empty */
1786 char *last_match; /* Location of last match */
1787 int no_empty; /* If NO_EMPTY flag is set */
1788 int delim_capture; /* If delimiters should be captured */
1789 int offset_capture; /* If offsets should be captured */
1790 zval tmp;
1791 ALLOCA_FLAG(use_heap);
1792
1793 no_empty = flags & PREG_SPLIT_NO_EMPTY;
1794 delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1795 offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1796
1797 if (limit_val == 0) {
1798 limit_val = -1;
1799 }
1800
1801 if (extra == NULL) {
1802 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1803 extra = &extra_data;
1804 }
1805 extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1806 extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1807 #ifdef PCRE_EXTRA_MARK
1808 extra->flags &= ~PCRE_EXTRA_MARK;
1809 #endif
1810
1811 /* Initialize return value */
1812 array_init(return_value);
1813
1814 /* Calculate the size of the offsets array, and allocate memory for it. */
1815 size_offsets = (pce->capture_count + 1) * 3;
1816 if (size_offsets <= 32) {
1817 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1818 } else {
1819 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1820 }
1821
1822 /* Start at the beginning of the string */
1823 start_offset = 0;
1824 next_offset = 0;
1825 last_match = subject;
1826 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1827
1828 /* Get next piece if no limit or limit not yet reached and something matched*/
1829 while ((limit_val == -1 || limit_val > 1)) {
1830 count = pcre_exec(pce->re, extra, subject,
1831 subject_len, start_offset,
1832 exoptions|g_notempty, offsets, size_offsets);
1833
1834 /* the string was already proved to be valid UTF-8 */
1835 exoptions |= PCRE_NO_UTF8_CHECK;
1836
1837 /* Check for too many substrings condition. */
1838 if (count == 0) {
1839 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1840 count = size_offsets/3;
1841 }
1842
1843 /* If something matched */
1844 if (count > 0 && (offsets[1] - offsets[0] >= 0)) {
1845 if (!no_empty || &subject[offsets[0]] != last_match) {
1846
1847 if (offset_capture) {
1848 /* Add (match, offset) pair to the return value */
1849 add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL);
1850 } else {
1851 /* Add the piece to the return value */
1852 ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
1853 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1854 }
1855
1856 /* One less left to do */
1857 if (limit_val != -1)
1858 limit_val--;
1859 }
1860
1861 last_match = &subject[offsets[1]];
1862 next_offset = offsets[1];
1863
1864 if (delim_capture) {
1865 int i, match_len;
1866 for (i = 1; i < count; i++) {
1867 match_len = offsets[(i<<1)+1] - offsets[i<<1];
1868 /* If we have matched a delimiter */
1869 if (!no_empty || match_len > 0) {
1870 if (offset_capture) {
1871 add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1872 } else {
1873 ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len);
1874 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1875 }
1876 }
1877 }
1878 }
1879 } else if (count == PCRE_ERROR_NOMATCH) {
1880 /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
1881 this is not necessarily the end. We need to advance
1882 the start offset, and continue. Fudge the offset values
1883 to achieve this, unless we're already at the end of the string. */
1884 if (g_notempty != 0 && start_offset < subject_len) {
1885 offsets[0] = start_offset;
1886 offsets[1] = start_offset + calculate_unit_length(pce, subject + start_offset);
1887 } else {
1888 break;
1889 }
1890 } else {
1891 pcre_handle_exec_error(count);
1892 break;
1893 }
1894
1895 /* If we have matched an empty string, mimic what Perl's /g options does.
1896 This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
1897 the match again at the same point. If this fails (picked up above) we
1898 advance to the next character. */
1899 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
1900
1901 /* Advance to the position right after the last full match */
1902 start_offset = offsets[1];
1903 }
1904
1905
1906 start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */
1907
1908 if (!no_empty || start_offset < subject_len)
1909 {
1910 if (offset_capture) {
1911 /* Add the last (match, offset) pair to the return value */
1912 add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1913 } else {
1914 /* Add the last piece to the return value */
1915 ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match);
1916 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1917 }
1918 }
1919
1920
1921 /* Clean up */
1922 if (size_offsets <= 32) {
1923 free_alloca(offsets, use_heap);
1924 } else {
1925 efree(offsets);
1926 }
1927 }
1928 /* }}} */
1929
1930 /* {{{ proto string preg_quote(string str [, string delim_char])
1931 Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)1932 static PHP_FUNCTION(preg_quote)
1933 {
1934 size_t in_str_len;
1935 char *in_str; /* Input string argument */
1936 char *in_str_end; /* End of the input string */
1937 size_t delim_len = 0;
1938 char *delim = NULL; /* Additional delimiter argument */
1939 zend_string *out_str; /* Output string with quoted characters */
1940 char *p, /* Iterator for input string */
1941 *q, /* Iterator for output string */
1942 delim_char=0, /* Delimiter character to be quoted */
1943 c; /* Current character */
1944 zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1945
1946 /* Get the arguments and check for errors */
1947 ZEND_PARSE_PARAMETERS_START(1, 2)
1948 Z_PARAM_STRING(in_str, in_str_len)
1949 Z_PARAM_OPTIONAL
1950 Z_PARAM_STRING(delim, delim_len)
1951 ZEND_PARSE_PARAMETERS_END();
1952
1953 in_str_end = in_str + in_str_len;
1954
1955 /* Nothing to do if we got an empty string */
1956 if (in_str == in_str_end) {
1957 RETURN_EMPTY_STRING();
1958 }
1959
1960 if (delim && *delim) {
1961 delim_char = delim[0];
1962 quote_delim = 1;
1963 }
1964
1965 /* Allocate enough memory so that even if each character
1966 is quoted, we won't run out of room */
1967 out_str = zend_string_safe_alloc(4, in_str_len, 0, 0);
1968
1969 /* Go through the string and quote necessary characters */
1970 for (p = in_str, q = ZSTR_VAL(out_str); p != in_str_end; p++) {
1971 c = *p;
1972 switch(c) {
1973 case '.':
1974 case '\\':
1975 case '+':
1976 case '*':
1977 case '?':
1978 case '[':
1979 case '^':
1980 case ']':
1981 case '$':
1982 case '(':
1983 case ')':
1984 case '{':
1985 case '}':
1986 case '=':
1987 case '!':
1988 case '>':
1989 case '<':
1990 case '|':
1991 case ':':
1992 case '-':
1993 *q++ = '\\';
1994 *q++ = c;
1995 break;
1996
1997 case '\0':
1998 *q++ = '\\';
1999 *q++ = '0';
2000 *q++ = '0';
2001 *q++ = '0';
2002 break;
2003
2004 default:
2005 if (quote_delim && c == delim_char)
2006 *q++ = '\\';
2007 *q++ = c;
2008 break;
2009 }
2010 }
2011 *q = '\0';
2012
2013 /* Reallocate string and return it */
2014 out_str = zend_string_truncate(out_str, q - ZSTR_VAL(out_str), 0);
2015 RETURN_NEW_STR(out_str);
2016 }
2017 /* }}} */
2018
2019 /* {{{ proto array preg_grep(string regex, array input [, int flags])
2020 Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)2021 static PHP_FUNCTION(preg_grep)
2022 {
2023 zend_string *regex; /* Regular expression */
2024 zval *input; /* Input array */
2025 zend_long flags = 0; /* Match control flags */
2026 pcre_cache_entry *pce; /* Compiled regular expression */
2027
2028 /* Get arguments and do error checking */
2029 ZEND_PARSE_PARAMETERS_START(2, 3)
2030 Z_PARAM_STR(regex)
2031 Z_PARAM_ARRAY(input)
2032 Z_PARAM_OPTIONAL
2033 Z_PARAM_LONG(flags)
2034 ZEND_PARSE_PARAMETERS_END();
2035
2036 /* Compile regex or get it from cache. */
2037 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2038 RETURN_FALSE;
2039 }
2040
2041 pce->refcount++;
2042 php_pcre_grep_impl(pce, input, return_value, flags);
2043 pce->refcount--;
2044 }
2045 /* }}} */
2046
php_pcre_grep_impl(pcre_cache_entry * pce,zval * input,zval * return_value,zend_long flags)2047 PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2048 {
2049 zval *entry; /* An entry in the input array */
2050 pcre_extra *extra = pce->extra;/* Holds results of studying */
2051 pcre_extra extra_data; /* Used locally for exec options */
2052 int *offsets; /* Array of subpattern offsets */
2053 int size_offsets; /* Size of the offsets array */
2054 int count = 0; /* Count of matched subpatterns */
2055 zend_string *string_key;
2056 zend_ulong num_key;
2057 zend_bool invert; /* Whether to return non-matching
2058 entries */
2059 ALLOCA_FLAG(use_heap);
2060
2061 invert = flags & PREG_GREP_INVERT ? 1 : 0;
2062
2063 if (extra == NULL) {
2064 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2065 extra = &extra_data;
2066 }
2067 extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
2068 extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
2069 #ifdef PCRE_EXTRA_MARK
2070 extra->flags &= ~PCRE_EXTRA_MARK;
2071 #endif
2072
2073 /* Calculate the size of the offsets array, and allocate memory for it. */
2074 size_offsets = (pce->capture_count + 1) * 3;
2075 if (size_offsets <= 32) {
2076 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
2077 } else {
2078 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
2079 }
2080
2081 /* Initialize return array */
2082 array_init(return_value);
2083
2084 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2085
2086 /* Go through the input array */
2087 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2088 zend_string *subject_str = zval_get_string(entry);
2089
2090 /* Perform the match */
2091 count = pcre_exec(pce->re, extra, ZSTR_VAL(subject_str),
2092 (int)ZSTR_LEN(subject_str), 0,
2093 0, offsets, size_offsets);
2094
2095 /* Check for too many substrings condition. */
2096 if (count == 0) {
2097 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2098 count = size_offsets/3;
2099 } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
2100 pcre_handle_exec_error(count);
2101 zend_string_release(subject_str);
2102 break;
2103 }
2104
2105 /* If the entry fits our requirements */
2106 if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
2107 if (Z_REFCOUNTED_P(entry)) {
2108 Z_ADDREF_P(entry);
2109 }
2110
2111 /* Add to return array */
2112 if (string_key) {
2113 zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2114 } else {
2115 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2116 }
2117 }
2118
2119 zend_string_release(subject_str);
2120 } ZEND_HASH_FOREACH_END();
2121
2122 /* Clean up */
2123 if (size_offsets <= 32) {
2124 free_alloca(offsets, use_heap);
2125 } else {
2126 efree(offsets);
2127 }
2128 }
2129 /* }}} */
2130
2131 /* {{{ proto int preg_last_error()
2132 Returns the error code of the last regexp execution. */
PHP_FUNCTION(preg_last_error)2133 static PHP_FUNCTION(preg_last_error)
2134 {
2135 ZEND_PARSE_PARAMETERS_START(0, 0)
2136 ZEND_PARSE_PARAMETERS_END();
2137
2138 RETURN_LONG(PCRE_G(error_code));
2139 }
2140 /* }}} */
2141
2142 /* {{{ module definition structures */
2143
2144 /* {{{ arginfo */
2145 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
2146 ZEND_ARG_INFO(0, pattern)
2147 ZEND_ARG_INFO(0, subject)
2148 ZEND_ARG_INFO(1, subpatterns) /* array */
2149 ZEND_ARG_INFO(0, flags)
2150 ZEND_ARG_INFO(0, offset)
2151 ZEND_END_ARG_INFO()
2152
2153 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
2154 ZEND_ARG_INFO(0, pattern)
2155 ZEND_ARG_INFO(0, subject)
2156 ZEND_ARG_INFO(1, subpatterns) /* array */
2157 ZEND_ARG_INFO(0, flags)
2158 ZEND_ARG_INFO(0, offset)
2159 ZEND_END_ARG_INFO()
2160
2161 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
2162 ZEND_ARG_INFO(0, regex)
2163 ZEND_ARG_INFO(0, replace)
2164 ZEND_ARG_INFO(0, subject)
2165 ZEND_ARG_INFO(0, limit)
2166 ZEND_ARG_INFO(1, count)
2167 ZEND_END_ARG_INFO()
2168
2169 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
2170 ZEND_ARG_INFO(0, regex)
2171 ZEND_ARG_INFO(0, callback)
2172 ZEND_ARG_INFO(0, subject)
2173 ZEND_ARG_INFO(0, limit)
2174 ZEND_ARG_INFO(1, count)
2175 ZEND_END_ARG_INFO()
2176
2177 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
2178 ZEND_ARG_INFO(0, pattern)
2179 ZEND_ARG_INFO(0, subject)
2180 ZEND_ARG_INFO(0, limit)
2181 ZEND_ARG_INFO(1, count)
2182 ZEND_END_ARG_INFO()
2183
2184 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
2185 ZEND_ARG_INFO(0, pattern)
2186 ZEND_ARG_INFO(0, subject)
2187 ZEND_ARG_INFO(0, limit)
2188 ZEND_ARG_INFO(0, flags)
2189 ZEND_END_ARG_INFO()
2190
2191 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
2192 ZEND_ARG_INFO(0, str)
2193 ZEND_ARG_INFO(0, delim_char)
2194 ZEND_END_ARG_INFO()
2195
2196 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
2197 ZEND_ARG_INFO(0, regex)
2198 ZEND_ARG_INFO(0, input) /* array */
2199 ZEND_ARG_INFO(0, flags)
2200 ZEND_END_ARG_INFO()
2201
2202 ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
2203 ZEND_END_ARG_INFO()
2204 /* }}} */
2205
2206 static const zend_function_entry pcre_functions[] = {
2207 PHP_FE(preg_match, arginfo_preg_match)
2208 PHP_FE(preg_match_all, arginfo_preg_match_all)
2209 PHP_FE(preg_replace, arginfo_preg_replace)
2210 PHP_FE(preg_replace_callback, arginfo_preg_replace_callback)
2211 PHP_FE(preg_replace_callback_array, arginfo_preg_replace_callback_array)
2212 PHP_FE(preg_filter, arginfo_preg_replace)
2213 PHP_FE(preg_split, arginfo_preg_split)
2214 PHP_FE(preg_quote, arginfo_preg_quote)
2215 PHP_FE(preg_grep, arginfo_preg_grep)
2216 PHP_FE(preg_last_error, arginfo_preg_last_error)
2217 PHP_FE_END
2218 };
2219
2220 zend_module_entry pcre_module_entry = {
2221 STANDARD_MODULE_HEADER,
2222 "pcre",
2223 pcre_functions,
2224 PHP_MINIT(pcre),
2225 PHP_MSHUTDOWN(pcre),
2226 #ifdef HAVE_PCRE_JIT_SUPPORT
2227 PHP_RINIT(pcre),
2228 #else
2229 NULL,
2230 #endif
2231 NULL,
2232 PHP_MINFO(pcre),
2233 PHP_PCRE_VERSION,
2234 PHP_MODULE_GLOBALS(pcre),
2235 PHP_GINIT(pcre),
2236 PHP_GSHUTDOWN(pcre),
2237 NULL,
2238 STANDARD_MODULE_PROPERTIES_EX
2239 };
2240
2241 #ifdef COMPILE_DL_PCRE
2242 ZEND_GET_MODULE(pcre)
2243 #endif
2244
2245 /* }}} */
2246
2247 #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
2248
2249 /*
2250 * Local variables:
2251 * tab-width: 4
2252 * c-basic-offset: 4
2253 * End:
2254 * vim600: sw=4 ts=4 fdm=marker
2255 * vim<600: sw=4 ts=4
2256 */
2257