1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 5 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2014 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Andrei Zmievski <andrei@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 /* $Id$ */
20
21 #include "php.h"
22 #include "php_ini.h"
23 #include "php_globals.h"
24 #include "php_pcre.h"
25 #include "ext/standard/info.h"
26 #include "ext/standard/php_smart_str.h"
27
28 #if HAVE_PCRE || HAVE_BUNDLED_PCRE
29
30 #include "ext/standard/php_string.h"
31
32 #define PREG_PATTERN_ORDER 1
33 #define PREG_SET_ORDER 2
34 #define PREG_OFFSET_CAPTURE (1<<8)
35
36 #define PREG_SPLIT_NO_EMPTY (1<<0)
37 #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
38 #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
39
40 #define PREG_REPLACE_EVAL (1<<0)
41
42 #define PREG_GREP_INVERT (1<<0)
43
44 #define PCRE_CACHE_SIZE 4096
45
46 enum {
47 PHP_PCRE_NO_ERROR = 0,
48 PHP_PCRE_INTERNAL_ERROR,
49 PHP_PCRE_BACKTRACK_LIMIT_ERROR,
50 PHP_PCRE_RECURSION_LIMIT_ERROR,
51 PHP_PCRE_BAD_UTF8_ERROR,
52 PHP_PCRE_BAD_UTF8_OFFSET_ERROR
53 };
54
55
ZEND_DECLARE_MODULE_GLOBALS(pcre)56 ZEND_DECLARE_MODULE_GLOBALS(pcre)
57
58
59 static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */
60 {
61 int preg_code = 0;
62
63 switch (pcre_code) {
64 case PCRE_ERROR_MATCHLIMIT:
65 preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
66 break;
67
68 case PCRE_ERROR_RECURSIONLIMIT:
69 preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
70 break;
71
72 case PCRE_ERROR_BADUTF8:
73 preg_code = PHP_PCRE_BAD_UTF8_ERROR;
74 break;
75
76 case PCRE_ERROR_BADUTF8_OFFSET:
77 preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
78 break;
79
80 default:
81 preg_code = PHP_PCRE_INTERNAL_ERROR;
82 break;
83 }
84
85 PCRE_G(error_code) = preg_code;
86 }
87 /* }}} */
88
php_free_pcre_cache(void * data)89 static void php_free_pcre_cache(void *data) /* {{{ */
90 {
91 pcre_cache_entry *pce = (pcre_cache_entry *) data;
92 if (!pce) return;
93 pefree(pce->re, 1);
94 if (pce->extra) pefree(pce->extra, 1);
95 #if HAVE_SETLOCALE
96 if ((void*)pce->tables) pefree((void*)pce->tables, 1);
97 pefree(pce->locale, 1);
98 #endif
99 }
100 /* }}} */
101
PHP_GINIT_FUNCTION(pcre)102 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
103 {
104 zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
105 pcre_globals->backtrack_limit = 0;
106 pcre_globals->recursion_limit = 0;
107 pcre_globals->error_code = PHP_PCRE_NO_ERROR;
108 }
109 /* }}} */
110
PHP_GSHUTDOWN_FUNCTION(pcre)111 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
112 {
113 zend_hash_destroy(&pcre_globals->pcre_cache);
114 }
115 /* }}} */
116
117 PHP_INI_BEGIN()
118 STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
119 STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
PHP_INI_END()120 PHP_INI_END()
121
122
123 /* {{{ PHP_MINFO_FUNCTION(pcre) */
124 static PHP_MINFO_FUNCTION(pcre)
125 {
126 php_info_print_table_start();
127 php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
128 php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
129 php_info_print_table_end();
130
131 DISPLAY_INI_ENTRIES();
132 }
133 /* }}} */
134
135 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTION(pcre)136 static PHP_MINIT_FUNCTION(pcre)
137 {
138 REGISTER_INI_ENTRIES();
139
140 REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
141 REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
142 REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
143 REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
144 REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
145 REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
146 REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
147
148 REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
149 REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
150 REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
151 REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
152 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
153 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
154 REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
155
156 return SUCCESS;
157 }
158 /* }}} */
159
160 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTION(pcre)161 static PHP_MSHUTDOWN_FUNCTION(pcre)
162 {
163 UNREGISTER_INI_ENTRIES();
164
165 return SUCCESS;
166 }
167 /* }}} */
168
169 /* {{{ static pcre_clean_cache */
pcre_clean_cache(void * data,void * arg TSRMLS_DC)170 static int pcre_clean_cache(void *data, void *arg TSRMLS_DC)
171 {
172 int *num_clean = (int *)arg;
173
174 if (*num_clean > 0) {
175 (*num_clean)--;
176 return 1;
177 } else {
178 return 0;
179 }
180 }
181 /* }}} */
182
183 /* {{{ static make_subpats_table */
make_subpats_table(int num_subpats,pcre_cache_entry * pce TSRMLS_DC)184 static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC)
185 {
186 pcre_extra *extra = pce->extra;
187 int name_cnt = 0, name_size, ni = 0;
188 int rc;
189 char *name_table;
190 unsigned short name_idx;
191 char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
192
193 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
194 if (rc < 0) {
195 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
196 efree(subpat_names);
197 return NULL;
198 }
199 if (name_cnt > 0) {
200 int rc1, rc2;
201
202 rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
203 rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
204 rc = rc2 ? rc2 : rc1;
205 if (rc < 0) {
206 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
207 efree(subpat_names);
208 return NULL;
209 }
210
211 while (ni++ < name_cnt) {
212 name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
213 subpat_names[name_idx] = name_table + 2;
214 if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
215 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
216 efree(subpat_names);
217 return NULL;
218 }
219 name_table += name_size;
220 }
221 }
222
223 return subpat_names;
224 }
225 /* }}} */
226
227 /* {{{ pcre_get_compiled_regex_cache
228 */
pcre_get_compiled_regex_cache(char * regex,int regex_len TSRMLS_DC)229 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
230 {
231 pcre *re = NULL;
232 pcre_extra *extra;
233 int coptions = 0;
234 int soptions = 0;
235 const char *error;
236 int erroffset;
237 char delimiter;
238 char start_delimiter;
239 char end_delimiter;
240 char *p, *pp;
241 char *pattern;
242 int do_study = 0;
243 int poptions = 0;
244 int count = 0;
245 unsigned const char *tables = NULL;
246 #if HAVE_SETLOCALE
247 char *locale;
248 #endif
249 pcre_cache_entry *pce;
250 pcre_cache_entry new_entry;
251 char *tmp = NULL;
252
253 #if HAVE_SETLOCALE
254 # if defined(PHP_WIN32) && defined(ZTS)
255 _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
256 # endif
257 locale = setlocale(LC_CTYPE, NULL);
258 #endif
259
260 /* Try to lookup the cached regex entry, and if successful, just pass
261 back the compiled pattern, otherwise go on and compile it. */
262 if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) {
263 /*
264 * We use a quick pcre_fullinfo() check to see whether cache is corrupted, and if it
265 * is, we flush it and compile the pattern from scratch.
266 */
267 if (pcre_fullinfo(pce->re, NULL, PCRE_INFO_CAPTURECOUNT, &count) == PCRE_ERROR_BADMAGIC) {
268 zend_hash_clean(&PCRE_G(pcre_cache));
269 } else {
270 #if HAVE_SETLOCALE
271 if (!strcmp(pce->locale, locale)) {
272 #endif
273 return pce;
274 #if HAVE_SETLOCALE
275 }
276 #endif
277 }
278 }
279
280 p = regex;
281
282 /* Parse through the leading whitespace, and display a warning if we
283 get to the end without encountering a delimiter. */
284 while (isspace((int)*(unsigned char *)p)) p++;
285 if (*p == 0) {
286 php_error_docref(NULL TSRMLS_CC, E_WARNING,
287 p < regex + regex_len ? "Null byte in regex" : "Empty regular expression");
288 return NULL;
289 }
290
291 /* Get the delimiter and display a warning if it is alphanumeric
292 or a backslash. */
293 delimiter = *p++;
294 if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
295 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash");
296 return NULL;
297 }
298
299 start_delimiter = delimiter;
300 if ((pp = strchr("([{< )]}> )]}>", delimiter)))
301 delimiter = pp[5];
302 end_delimiter = delimiter;
303
304 pp = p;
305
306 if (start_delimiter == end_delimiter) {
307 /* We need to iterate through the pattern, searching for the ending delimiter,
308 but skipping the backslashed delimiters. If the ending delimiter is not
309 found, display a warning. */
310 while (*pp != 0) {
311 if (*pp == '\\' && pp[1] != 0) pp++;
312 else if (*pp == delimiter)
313 break;
314 pp++;
315 }
316 } else {
317 /* We iterate through the pattern, searching for the matching ending
318 * delimiter. For each matching starting delimiter, we increment nesting
319 * level, and decrement it for each matching ending delimiter. If we
320 * reach the end of the pattern without matching, display a warning.
321 */
322 int brackets = 1; /* brackets nesting level */
323 while (*pp != 0) {
324 if (*pp == '\\' && pp[1] != 0) pp++;
325 else if (*pp == end_delimiter && --brackets <= 0)
326 break;
327 else if (*pp == start_delimiter)
328 brackets++;
329 pp++;
330 }
331 }
332
333 if (*pp == 0) {
334 if (pp < regex + regex_len) {
335 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex");
336 } else if (start_delimiter == end_delimiter) {
337 php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter);
338 } else {
339 php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
340 }
341 return NULL;
342 }
343
344 /* Make a copy of the actual pattern. */
345 pattern = estrndup(p, pp-p);
346
347 /* Move on to the options */
348 pp++;
349
350 /* Parse through the options, setting appropriate flags. Display
351 a warning if we encounter an unknown modifier. */
352 while (pp < regex + regex_len) {
353 switch (*pp++) {
354 /* Perl compatible options */
355 case 'i': coptions |= PCRE_CASELESS; break;
356 case 'm': coptions |= PCRE_MULTILINE; break;
357 case 's': coptions |= PCRE_DOTALL; break;
358 case 'x': coptions |= PCRE_EXTENDED; break;
359
360 /* PCRE specific options */
361 case 'A': coptions |= PCRE_ANCHORED; break;
362 case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break;
363 case 'S': do_study = 1; break;
364 case 'U': coptions |= PCRE_UNGREEDY; break;
365 case 'X': coptions |= PCRE_EXTRA; break;
366 case 'u': coptions |= PCRE_UTF8;
367 /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
368 characters, even in UTF-8 mode. However, this can be changed by setting
369 the PCRE_UCP option. */
370 #ifdef PCRE_UCP
371 coptions |= PCRE_UCP;
372 #endif
373 break;
374
375 /* Custom preg options */
376 case 'e': poptions |= PREG_REPLACE_EVAL; break;
377
378 case ' ':
379 case '\n':
380 break;
381
382 default:
383 if (pp[-1]) {
384 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]);
385 } else {
386 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex");
387 }
388 efree(pattern);
389 return NULL;
390 }
391 }
392
393 #if HAVE_SETLOCALE
394 if (strcmp(locale, "C"))
395 tables = pcre_maketables();
396 #endif
397
398 /* Compile pattern and display a warning if compilation failed. */
399 re = pcre_compile(pattern,
400 coptions,
401 &error,
402 &erroffset,
403 tables);
404
405 if (re == NULL) {
406 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
407 efree(pattern);
408 if (tables) {
409 pefree((void*)tables, 1);
410 }
411 return NULL;
412 }
413
414 /* If study option was specified, study the pattern and
415 store the result in extra for passing to pcre_exec. */
416 if (do_study) {
417 extra = pcre_study(re, soptions, &error);
418 if (extra) {
419 extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
420 }
421 if (error != NULL) {
422 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern");
423 }
424 } else {
425 extra = NULL;
426 }
427
428 efree(pattern);
429
430 /*
431 * If we reached cache limit, clean out the items from the head of the list;
432 * these are supposedly the oldest ones (but not necessarily the least used
433 * ones).
434 */
435 if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
436 int num_clean = PCRE_CACHE_SIZE / 8;
437 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC);
438 }
439
440 /* Store the compiled pattern and extra info in the cache. */
441 new_entry.re = re;
442 new_entry.extra = extra;
443 new_entry.preg_options = poptions;
444 new_entry.compile_options = coptions;
445 #if HAVE_SETLOCALE
446 new_entry.locale = pestrdup(locale, 1);
447 new_entry.tables = tables;
448 #endif
449
450 /*
451 * Interned strings are not duplicated when stored in HashTable,
452 * but all the interned strings created during HTTP request are removed
453 * at end of request. However PCRE_G(pcre_cache) must be consistent
454 * on the next request as well. So we disable usage of interned strings
455 * as hash keys especually for this table.
456 * See bug #63180
457 */
458 if (IS_INTERNED(regex)) {
459 regex = tmp = estrndup(regex, regex_len);
460 }
461
462 zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry,
463 sizeof(pcre_cache_entry), (void**)&pce);
464
465 if (tmp) {
466 efree(tmp);
467 }
468
469 return pce;
470 }
471 /* }}} */
472
473 /* {{{ pcre_get_compiled_regex
474 */
pcre_get_compiled_regex(char * regex,pcre_extra ** extra,int * preg_options TSRMLS_DC)475 PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC)
476 {
477 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
478
479 if (extra) {
480 *extra = pce ? pce->extra : NULL;
481 }
482 if (preg_options) {
483 *preg_options = pce ? pce->preg_options : 0;
484 }
485
486 return pce ? pce->re : NULL;
487 }
488 /* }}} */
489
490 /* {{{ pcre_get_compiled_regex_ex
491 */
pcre_get_compiled_regex_ex(char * regex,pcre_extra ** extra,int * preg_options,int * compile_options TSRMLS_DC)492 PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC)
493 {
494 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
495
496 if (extra) {
497 *extra = pce ? pce->extra : NULL;
498 }
499 if (preg_options) {
500 *preg_options = pce ? pce->preg_options : 0;
501 }
502 if (compile_options) {
503 *compile_options = pce ? pce->compile_options : 0;
504 }
505
506 return pce ? pce->re : NULL;
507 }
508 /* }}} */
509
510 /* {{{ add_offset_pair */
add_offset_pair(zval * result,char * str,int len,int offset,char * name)511 static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
512 {
513 zval *match_pair;
514
515 ALLOC_ZVAL(match_pair);
516 array_init(match_pair);
517 INIT_PZVAL(match_pair);
518
519 /* Add (match, offset) to the return value */
520 add_next_index_stringl(match_pair, str, len, 1);
521 add_next_index_long(match_pair, offset);
522
523 if (name) {
524 zval_add_ref(&match_pair);
525 zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL);
526 }
527 zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);
528 }
529 /* }}} */
530
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS,int global)531 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
532 {
533 /* parameters */
534 char *regex; /* Regular expression */
535 char *subject; /* String to match against */
536 int regex_len;
537 int subject_len;
538 pcre_cache_entry *pce; /* Compiled regular expression */
539 zval *subpats = NULL; /* Array for subpatterns */
540 long flags = 0; /* Match control flags */
541 long start_offset = 0; /* Where the new search starts */
542
543 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|zll", ®ex, ®ex_len,
544 &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) {
545 RETURN_FALSE;
546 }
547
548 /* Compile regex or get it from cache. */
549 if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
550 RETURN_FALSE;
551 }
552
553 php_pcre_match_impl(pce, subject, subject_len, return_value, subpats,
554 global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC);
555 }
556 /* }}} */
557
558 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry * pce,char * subject,int subject_len,zval * return_value,zval * subpats,int global,int use_flags,long flags,long start_offset TSRMLS_DC)559 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
560 zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
561 {
562 zval *result_set, /* Holds a set of subpatterns after
563 a global match */
564 **match_sets = NULL; /* An array of sets of matches for each
565 subpattern after a global match */
566 pcre_extra *extra = pce->extra;/* Holds results of studying */
567 pcre_extra extra_data; /* Used locally for exec options */
568 int exoptions = 0; /* Execution options */
569 int count = 0; /* Count of matched subpatterns */
570 int *offsets; /* Array of subpattern offsets */
571 int num_subpats; /* Number of captured subpatterns */
572 int size_offsets; /* Size of the offsets array */
573 int matched; /* Has anything matched */
574 int g_notempty = 0; /* If the match should not be empty */
575 const char **stringlist; /* Holds list of subpatterns */
576 char **subpat_names; /* Array for named subpatterns */
577 int i, rc;
578 int subpats_order; /* Order of subpattern matches */
579 int offset_capture; /* Capture match offsets: yes/no */
580
581 /* Overwrite the passed-in value for subpatterns with an empty array. */
582 if (subpats != NULL) {
583 zval_dtor(subpats);
584 array_init(subpats);
585 }
586
587 subpats_order = global ? PREG_PATTERN_ORDER : 0;
588
589 if (use_flags) {
590 offset_capture = flags & PREG_OFFSET_CAPTURE;
591
592 /*
593 * subpats_order is pre-set to pattern mode so we change it only if
594 * necessary.
595 */
596 if (flags & 0xff) {
597 subpats_order = flags & 0xff;
598 }
599 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
600 (!global && subpats_order != 0)) {
601 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified");
602 return;
603 }
604 } else {
605 offset_capture = 0;
606 }
607
608 /* Negative offset counts from the end of the string. */
609 if (start_offset < 0) {
610 start_offset = subject_len + start_offset;
611 if (start_offset < 0) {
612 start_offset = 0;
613 }
614 }
615
616 if (extra == NULL) {
617 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
618 extra = &extra_data;
619 }
620 extra->match_limit = PCRE_G(backtrack_limit);
621 extra->match_limit_recursion = PCRE_G(recursion_limit);
622
623 /* Calculate the size of the offsets array, and allocate memory for it. */
624 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
625 if (rc < 0) {
626 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
627 RETURN_FALSE;
628 }
629 num_subpats++;
630 size_offsets = num_subpats * 3;
631
632 /*
633 * Build a mapping from subpattern numbers to their names. We will always
634 * allocate the table, even though there may be no named subpatterns. This
635 * avoids somewhat more complicated logic in the inner loops.
636 */
637 subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
638 if (!subpat_names) {
639 RETURN_FALSE;
640 }
641
642 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
643 memset(offsets, 0, size_offsets*sizeof(int));
644 /* Allocate match sets array and initialize the values. */
645 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
646 match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0);
647 for (i=0; i<num_subpats; i++) {
648 ALLOC_ZVAL(match_sets[i]);
649 array_init(match_sets[i]);
650 INIT_PZVAL(match_sets[i]);
651 }
652 }
653
654 matched = 0;
655 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
656
657 do {
658 /* Execute the regular expression. */
659 count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
660 exoptions|g_notempty, offsets, size_offsets);
661
662 /* the string was already proved to be valid UTF-8 */
663 exoptions |= PCRE_NO_UTF8_CHECK;
664
665 /* Check for too many substrings condition. */
666 if (count == 0) {
667 php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
668 count = size_offsets/3;
669 }
670
671 /* If something has matched */
672 if (count > 0) {
673 matched++;
674
675 /* If subpatterns array has been passed, fill it in with values. */
676 if (subpats != NULL) {
677 /* Try to get the list of substrings and display a warning if failed. */
678 if ((offsets[1] - offsets[0] < 0) || pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
679 efree(subpat_names);
680 efree(offsets);
681 if (match_sets) efree(match_sets);
682 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed");
683 RETURN_FALSE;
684 }
685
686 if (global) { /* global pattern matching */
687 if (subpats && subpats_order == PREG_PATTERN_ORDER) {
688 /* For each subpattern, insert it into the appropriate array. */
689 for (i = 0; i < count; i++) {
690 if (offset_capture) {
691 add_offset_pair(match_sets[i], (char *)stringlist[i],
692 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
693 } else {
694 add_next_index_stringl(match_sets[i], (char *)stringlist[i],
695 offsets[(i<<1)+1] - offsets[i<<1], 1);
696 }
697 }
698 /*
699 * If the number of captured subpatterns on this run is
700 * less than the total possible number, pad the result
701 * arrays with empty strings.
702 */
703 if (count < num_subpats) {
704 for (; i < num_subpats; i++) {
705 add_next_index_string(match_sets[i], "", 1);
706 }
707 }
708 } else {
709 /* Allocate the result set array */
710 ALLOC_ZVAL(result_set);
711 array_init(result_set);
712 INIT_PZVAL(result_set);
713
714 /* Add all the subpatterns to it */
715 for (i = 0; i < count; i++) {
716 if (offset_capture) {
717 add_offset_pair(result_set, (char *)stringlist[i],
718 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
719 } else {
720 if (subpat_names[i]) {
721 add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i],
722 offsets[(i<<1)+1] - offsets[i<<1], 1);
723 }
724 add_next_index_stringl(result_set, (char *)stringlist[i],
725 offsets[(i<<1)+1] - offsets[i<<1], 1);
726 }
727 }
728 /* And add it to the output array */
729 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL);
730 }
731 } else { /* single pattern matching */
732 /* For each subpattern, insert it into the subpatterns array. */
733 for (i = 0; i < count; i++) {
734 if (offset_capture) {
735 add_offset_pair(subpats, (char *)stringlist[i],
736 offsets[(i<<1)+1] - offsets[i<<1],
737 offsets[i<<1], subpat_names[i]);
738 } else {
739 if (subpat_names[i]) {
740 add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
741 offsets[(i<<1)+1] - offsets[i<<1], 1);
742 }
743 add_next_index_stringl(subpats, (char *)stringlist[i],
744 offsets[(i<<1)+1] - offsets[i<<1], 1);
745 }
746 }
747 }
748
749 pcre_free((void *) stringlist);
750 }
751 } else if (count == PCRE_ERROR_NOMATCH) {
752 /* If we previously set PCRE_NOTEMPTY after a null match,
753 this is not necessarily the end. We need to advance
754 the start offset, and continue. Fudge the offset values
755 to achieve this, unless we're already at the end of the string. */
756 if (g_notempty != 0 && start_offset < subject_len) {
757 offsets[0] = start_offset;
758 offsets[1] = start_offset + 1;
759 } else
760 break;
761 } else {
762 pcre_handle_exec_error(count TSRMLS_CC);
763 break;
764 }
765
766 /* If we have matched an empty string, mimic what Perl's /g options does.
767 This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
768 the match again at the same point. If this fails (picked up above) we
769 advance to the next character. */
770 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
771
772 /* Advance to the position right after the last full match */
773 start_offset = offsets[1];
774 } while (global);
775
776 /* Add the match sets to the output array and clean up */
777 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
778 for (i = 0; i < num_subpats; i++) {
779 if (subpat_names[i]) {
780 zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i],
781 strlen(subpat_names[i])+1, &match_sets[i], sizeof(zval *), NULL);
782 Z_ADDREF_P(match_sets[i]);
783 }
784 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL);
785 }
786 efree(match_sets);
787 }
788
789 efree(offsets);
790 efree(subpat_names);
791
792 /* Did we encounter an error? */
793 if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
794 RETVAL_LONG(matched);
795 } else {
796 RETVAL_FALSE;
797 }
798 }
799 /* }}} */
800
801 /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
802 Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)803 static PHP_FUNCTION(preg_match)
804 {
805 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
806 }
807 /* }}} */
808
809 /* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
810 Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)811 static PHP_FUNCTION(preg_match_all)
812 {
813 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
814 }
815 /* }}} */
816
817 /* {{{ preg_get_backref
818 */
preg_get_backref(char ** str,int * backref)819 static int preg_get_backref(char **str, int *backref)
820 {
821 register char in_brace = 0;
822 register char *walk = *str;
823
824 if (walk[1] == 0)
825 return 0;
826
827 if (*walk == '$' && walk[1] == '{') {
828 in_brace = 1;
829 walk++;
830 }
831 walk++;
832
833 if (*walk >= '0' && *walk <= '9') {
834 *backref = *walk - '0';
835 walk++;
836 } else
837 return 0;
838
839 if (*walk && *walk >= '0' && *walk <= '9') {
840 *backref = *backref * 10 + *walk - '0';
841 walk++;
842 }
843
844 if (in_brace) {
845 if (*walk == 0 || *walk != '}')
846 return 0;
847 else
848 walk++;
849 }
850
851 *str = walk;
852 return 1;
853 }
854 /* }}} */
855
856 /* {{{ preg_do_repl_func
857 */
preg_do_repl_func(zval * function,char * subject,int * offsets,char ** subpat_names,int count,char ** result TSRMLS_DC)858 static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC)
859 {
860 zval *retval_ptr; /* Function return value */
861 zval **args[1]; /* Argument to pass to function */
862 zval *subpats; /* Captured subpatterns */
863 int result_len; /* Return value length */
864 int i;
865
866 MAKE_STD_ZVAL(subpats);
867 array_init(subpats);
868 for (i = 0; i < count; i++) {
869 if (subpat_names[i]) {
870 add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1], 1);
871 }
872 add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
873 }
874 args[0] = &subpats;
875
876 if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
877 convert_to_string_ex(&retval_ptr);
878 *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
879 result_len = Z_STRLEN_P(retval_ptr);
880 zval_ptr_dtor(&retval_ptr);
881 } else {
882 if (!EG(exception)) {
883 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
884 }
885 result_len = offsets[1] - offsets[0];
886 *result = estrndup(&subject[offsets[0]], result_len);
887 }
888
889 zval_ptr_dtor(&subpats);
890
891 return result_len;
892 }
893 /* }}} */
894
895 /* {{{ preg_do_eval
896 */
preg_do_eval(char * eval_str,int eval_str_len,char * subject,int * offsets,int count,char ** result TSRMLS_DC)897 static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,
898 int *offsets, int count, char **result TSRMLS_DC)
899 {
900 zval retval; /* Return value from evaluation */
901 char *eval_str_end, /* End of eval string */
902 *match, /* Current match for a backref */
903 *esc_match, /* Quote-escaped match */
904 *walk, /* Used to walk the code string */
905 *segment, /* Start of segment to append while walking */
906 walk_last; /* Last walked character */
907 int match_len; /* Length of the match */
908 int esc_match_len; /* Length of the quote-escaped match */
909 int result_len; /* Length of the result of the evaluation */
910 int backref; /* Current backref */
911 char *compiled_string_description;
912 smart_str code = {0};
913
914 eval_str_end = eval_str + eval_str_len;
915 walk = segment = eval_str;
916 walk_last = 0;
917
918 while (walk < eval_str_end) {
919 /* If found a backreference.. */
920 if ('\\' == *walk || '$' == *walk) {
921 smart_str_appendl(&code, segment, walk - segment);
922 if (walk_last == '\\') {
923 code.c[code.len-1] = *walk++;
924 segment = walk;
925 walk_last = 0;
926 continue;
927 }
928 segment = walk;
929 if (preg_get_backref(&walk, &backref)) {
930 if (backref < count) {
931 /* Find the corresponding string match and substitute it
932 in instead of the backref */
933 match = subject + offsets[backref<<1];
934 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
935 if (match_len) {
936 esc_match = php_addslashes(match, match_len, &esc_match_len, 0 TSRMLS_CC);
937 } else {
938 esc_match = match;
939 esc_match_len = 0;
940 }
941 } else {
942 esc_match = "";
943 esc_match_len = 0;
944 }
945 smart_str_appendl(&code, esc_match, esc_match_len);
946
947 segment = walk;
948
949 /* Clean up and reassign */
950 if (esc_match_len)
951 efree(esc_match);
952 continue;
953 }
954 }
955 walk++;
956 walk_last = walk[-1];
957 }
958 smart_str_appendl(&code, segment, walk - segment);
959 smart_str_0(&code);
960
961 compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC);
962 /* Run the code */
963 if (zend_eval_stringl(code.c, code.len, &retval, compiled_string_description TSRMLS_CC) == FAILURE) {
964 efree(compiled_string_description);
965 php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.c);
966 /* zend_error() does not return in this case */
967 }
968 efree(compiled_string_description);
969 convert_to_string(&retval);
970
971 /* Save the return value and its length */
972 *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval));
973 result_len = Z_STRLEN(retval);
974
975 /* Clean up */
976 zval_dtor(&retval);
977 smart_str_free(&code);
978
979 return result_len;
980 }
981 /* }}} */
982
983 /* {{{ php_pcre_replace
984 */
php_pcre_replace(char * regex,int regex_len,char * subject,int subject_len,zval * replace_val,int is_callable_replace,int * result_len,int limit,int * replace_count TSRMLS_DC)985 PHPAPI char *php_pcre_replace(char *regex, int regex_len,
986 char *subject, int subject_len,
987 zval *replace_val, int is_callable_replace,
988 int *result_len, int limit, int *replace_count TSRMLS_DC)
989 {
990 pcre_cache_entry *pce; /* Compiled regular expression */
991
992 /* Compile regex or get it from cache. */
993 if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
994 return NULL;
995 }
996
997 return php_pcre_replace_impl(pce, subject, subject_len, replace_val,
998 is_callable_replace, result_len, limit, replace_count TSRMLS_CC);
999 }
1000 /* }}} */
1001
1002 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry * pce,char * subject,int subject_len,zval * replace_val,int is_callable_replace,int * result_len,int limit,int * replace_count TSRMLS_DC)1003 PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val,
1004 int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC)
1005 {
1006 pcre_extra *extra = pce->extra;/* Holds results of studying */
1007 pcre_extra extra_data; /* Used locally for exec options */
1008 int exoptions = 0; /* Execution options */
1009 int count = 0; /* Count of matched subpatterns */
1010 int *offsets; /* Array of subpattern offsets */
1011 char **subpat_names; /* Array for named subpatterns */
1012 int num_subpats; /* Number of captured subpatterns */
1013 int size_offsets; /* Size of the offsets array */
1014 int new_len; /* Length of needed storage */
1015 int alloc_len; /* Actual allocated length */
1016 int eval_result_len=0; /* Length of the eval'ed or
1017 function-returned string */
1018 int match_len; /* Length of the current match */
1019 int backref; /* Backreference number */
1020 int eval; /* If the replacement string should be eval'ed */
1021 int start_offset; /* Where the new search starts */
1022 int g_notempty=0; /* If the match should not be empty */
1023 int replace_len=0; /* Length of replacement string */
1024 char *result, /* Result of replacement */
1025 *replace=NULL, /* Replacement string */
1026 *new_buf, /* Temporary buffer for re-allocation */
1027 *walkbuf, /* Location of current replacement in the result */
1028 *walk, /* Used to walk the replacement string */
1029 *match, /* The current match */
1030 *piece, /* The current piece of subject */
1031 *replace_end=NULL, /* End of replacement string */
1032 *eval_result, /* Result of eval or custom function */
1033 walk_last; /* Last walked character */
1034 int rc;
1035
1036 if (extra == NULL) {
1037 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1038 extra = &extra_data;
1039 }
1040 extra->match_limit = PCRE_G(backtrack_limit);
1041 extra->match_limit_recursion = PCRE_G(recursion_limit);
1042
1043 eval = pce->preg_options & PREG_REPLACE_EVAL;
1044 if (is_callable_replace) {
1045 if (eval) {
1046 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback");
1047 return NULL;
1048 }
1049 } else {
1050 replace = Z_STRVAL_P(replace_val);
1051 replace_len = Z_STRLEN_P(replace_val);
1052 replace_end = replace + replace_len;
1053 }
1054
1055 /* Calculate the size of the offsets array, and allocate memory for it. */
1056 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
1057 if (rc < 0) {
1058 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1059 return NULL;
1060 }
1061 num_subpats++;
1062 size_offsets = num_subpats * 3;
1063
1064 /*
1065 * Build a mapping from subpattern numbers to their names. We will always
1066 * allocate the table, even though there may be no named subpatterns. This
1067 * avoids somewhat more complicated logic in the inner loops.
1068 */
1069 subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
1070 if (!subpat_names) {
1071 return NULL;
1072 }
1073
1074 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1075
1076 alloc_len = 2 * subject_len + 1;
1077 result = safe_emalloc(alloc_len, sizeof(char), 0);
1078
1079 /* Initialize */
1080 match = NULL;
1081 *result_len = 0;
1082 start_offset = 0;
1083 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1084
1085 while (1) {
1086 /* Execute the regular expression. */
1087 count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1088 exoptions|g_notempty, offsets, size_offsets);
1089
1090 /* the string was already proved to be valid UTF-8 */
1091 exoptions |= PCRE_NO_UTF8_CHECK;
1092
1093 /* Check for too many substrings condition. */
1094 if (count == 0) {
1095 php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1096 count = size_offsets/3;
1097 }
1098
1099 piece = subject + start_offset;
1100
1101 if (count > 0 && (offsets[1] - offsets[0] >= 0) && (limit == -1 || limit > 0)) {
1102 if (replace_count) {
1103 ++*replace_count;
1104 }
1105 /* Set the match location in subject */
1106 match = subject + offsets[0];
1107
1108 new_len = *result_len + offsets[0] - start_offset; /* part before the match */
1109
1110 /* If evaluating, do it and add the return string's length */
1111 if (eval) {
1112 eval_result_len = preg_do_eval(replace, replace_len, subject,
1113 offsets, count, &eval_result TSRMLS_CC);
1114 new_len += eval_result_len;
1115 } else if (is_callable_replace) {
1116 /* Use custom function to get replacement string and its length. */
1117 eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC);
1118 new_len += eval_result_len;
1119 } else { /* do regular substitution */
1120 walk = replace;
1121 walk_last = 0;
1122 while (walk < replace_end) {
1123 if ('\\' == *walk || '$' == *walk) {
1124 if (walk_last == '\\') {
1125 walk++;
1126 walk_last = 0;
1127 continue;
1128 }
1129 if (preg_get_backref(&walk, &backref)) {
1130 if (backref < count)
1131 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1132 continue;
1133 }
1134 }
1135 new_len++;
1136 walk++;
1137 walk_last = walk[-1];
1138 }
1139 }
1140
1141 if (new_len + 1 > alloc_len) {
1142 alloc_len = 1 + alloc_len + 2 * new_len;
1143 new_buf = emalloc(alloc_len);
1144 memcpy(new_buf, result, *result_len);
1145 efree(result);
1146 result = new_buf;
1147 }
1148 /* copy the part of the string before the match */
1149 memcpy(&result[*result_len], piece, match-piece);
1150 *result_len += match-piece;
1151
1152 /* copy replacement and backrefs */
1153 walkbuf = result + *result_len;
1154
1155 /* If evaluating or using custom function, copy result to the buffer
1156 * and clean up. */
1157 if (eval || is_callable_replace) {
1158 memcpy(walkbuf, eval_result, eval_result_len);
1159 *result_len += eval_result_len;
1160 STR_FREE(eval_result);
1161 } else { /* do regular backreference copying */
1162 walk = replace;
1163 walk_last = 0;
1164 while (walk < replace_end) {
1165 if ('\\' == *walk || '$' == *walk) {
1166 if (walk_last == '\\') {
1167 *(walkbuf-1) = *walk++;
1168 walk_last = 0;
1169 continue;
1170 }
1171 if (preg_get_backref(&walk, &backref)) {
1172 if (backref < count) {
1173 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1174 memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1175 walkbuf += match_len;
1176 }
1177 continue;
1178 }
1179 }
1180 *walkbuf++ = *walk++;
1181 walk_last = walk[-1];
1182 }
1183 *walkbuf = '\0';
1184 /* increment the result length by how much we've added to the string */
1185 *result_len += walkbuf - (result + *result_len);
1186 }
1187
1188 if (limit != -1)
1189 limit--;
1190
1191 } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1192 /* If we previously set PCRE_NOTEMPTY after a null match,
1193 this is not necessarily the end. We need to advance
1194 the start offset, and continue. Fudge the offset values
1195 to achieve this, unless we're already at the end of the string. */
1196 if (g_notempty != 0 && start_offset < subject_len) {
1197 offsets[0] = start_offset;
1198 offsets[1] = start_offset + 1;
1199 memcpy(&result[*result_len], piece, 1);
1200 (*result_len)++;
1201 } else {
1202 new_len = *result_len + subject_len - start_offset;
1203 if (new_len + 1 > alloc_len) {
1204 alloc_len = new_len + 1; /* now we know exactly how long it is */
1205 new_buf = safe_emalloc(alloc_len, sizeof(char), 0);
1206 memcpy(new_buf, result, *result_len);
1207 efree(result);
1208 result = new_buf;
1209 }
1210 /* stick that last bit of string on our output */
1211 memcpy(&result[*result_len], piece, subject_len - start_offset);
1212 *result_len += subject_len - start_offset;
1213 result[*result_len] = '\0';
1214 break;
1215 }
1216 } else {
1217 pcre_handle_exec_error(count TSRMLS_CC);
1218 efree(result);
1219 result = NULL;
1220 break;
1221 }
1222
1223 /* If we have matched an empty string, mimic what Perl's /g options does.
1224 This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1225 the match again at the same point. If this fails (picked up above) we
1226 advance to the next character. */
1227 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1228
1229 /* Advance to the next piece. */
1230 start_offset = offsets[1];
1231 }
1232
1233 efree(offsets);
1234 efree(subpat_names);
1235
1236 return result;
1237 }
1238 /* }}} */
1239
1240 /* {{{ php_replace_in_subject
1241 */
php_replace_in_subject(zval * regex,zval * replace,zval ** subject,int * result_len,int limit,int is_callable_replace,int * replace_count TSRMLS_DC)1242 static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, int *result_len, int limit, int is_callable_replace, int *replace_count TSRMLS_DC)
1243 {
1244 zval **regex_entry,
1245 **replace_entry = NULL,
1246 *replace_value,
1247 empty_replace;
1248 char *subject_value,
1249 *result;
1250 int subject_len;
1251
1252 /* Make sure we're dealing with strings. */
1253 convert_to_string_ex(subject);
1254 /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1255 ZVAL_STRINGL(&empty_replace, "", 0, 0);
1256
1257 /* If regex is an array */
1258 if (Z_TYPE_P(regex) == IS_ARRAY) {
1259 /* Duplicate subject string for repeated replacement */
1260 subject_value = estrndup(Z_STRVAL_PP(subject), Z_STRLEN_PP(subject));
1261 subject_len = Z_STRLEN_PP(subject);
1262 *result_len = subject_len;
1263
1264 zend_hash_internal_pointer_reset(Z_ARRVAL_P(regex));
1265
1266 replace_value = replace;
1267 if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace)
1268 zend_hash_internal_pointer_reset(Z_ARRVAL_P(replace));
1269
1270 /* For each entry in the regex array, get the entry */
1271 while (zend_hash_get_current_data(Z_ARRVAL_P(regex), (void **)®ex_entry) == SUCCESS) {
1272 /* Make sure we're dealing with strings. */
1273 convert_to_string_ex(regex_entry);
1274
1275 /* If replace is an array and not a callable construct */
1276 if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1277 /* Get current entry */
1278 if (zend_hash_get_current_data(Z_ARRVAL_P(replace), (void **)&replace_entry) == SUCCESS) {
1279 if (!is_callable_replace) {
1280 convert_to_string_ex(replace_entry);
1281 }
1282 replace_value = *replace_entry;
1283 zend_hash_move_forward(Z_ARRVAL_P(replace));
1284 } else {
1285 /* We've run out of replacement strings, so use an empty one */
1286 replace_value = &empty_replace;
1287 }
1288 }
1289
1290 /* Do the actual replacement and put the result back into subject_value
1291 for further replacements. */
1292 if ((result = php_pcre_replace(Z_STRVAL_PP(regex_entry),
1293 Z_STRLEN_PP(regex_entry),
1294 subject_value,
1295 subject_len,
1296 replace_value,
1297 is_callable_replace,
1298 result_len,
1299 limit,
1300 replace_count TSRMLS_CC)) != NULL) {
1301 efree(subject_value);
1302 subject_value = result;
1303 subject_len = *result_len;
1304 } else {
1305 efree(subject_value);
1306 return NULL;
1307 }
1308
1309 zend_hash_move_forward(Z_ARRVAL_P(regex));
1310 }
1311
1312 return subject_value;
1313 } else {
1314 result = php_pcre_replace(Z_STRVAL_P(regex),
1315 Z_STRLEN_P(regex),
1316 Z_STRVAL_PP(subject),
1317 Z_STRLEN_PP(subject),
1318 replace,
1319 is_callable_replace,
1320 result_len,
1321 limit,
1322 replace_count TSRMLS_CC);
1323 return result;
1324 }
1325 }
1326 /* }}} */
1327
1328 /* {{{ preg_replace_impl
1329 */
preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS,int is_callable_replace,int is_filter)1330 static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter)
1331 {
1332 zval **regex,
1333 **replace,
1334 **subject,
1335 **subject_entry,
1336 **zcount = NULL;
1337 char *result;
1338 int result_len;
1339 int limit_val = -1;
1340 long limit = -1;
1341 char *string_key;
1342 ulong num_key;
1343 char *callback_name;
1344 int replace_count=0, old_replace_count;
1345
1346 /* Get function parameters and do error-checking. */
1347 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZZ|lZ", ®ex, &replace, &subject, &limit, &zcount) == FAILURE) {
1348 return;
1349 }
1350
1351 if (!is_callable_replace && Z_TYPE_PP(replace) == IS_ARRAY && Z_TYPE_PP(regex) != IS_ARRAY) {
1352 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1353 RETURN_FALSE;
1354 }
1355
1356 SEPARATE_ZVAL(replace);
1357 if (Z_TYPE_PP(replace) != IS_ARRAY && (Z_TYPE_PP(replace) != IS_OBJECT || !is_callable_replace)) {
1358 convert_to_string_ex(replace);
1359 }
1360 if (is_callable_replace) {
1361 if (!zend_is_callable(*replace, 0, &callback_name TSRMLS_CC)) {
1362 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name);
1363 efree(callback_name);
1364 MAKE_COPY_ZVAL(subject, return_value);
1365 return;
1366 }
1367 efree(callback_name);
1368 }
1369
1370 SEPARATE_ZVAL(regex);
1371 SEPARATE_ZVAL(subject);
1372
1373 if (ZEND_NUM_ARGS() > 3) {
1374 limit_val = limit;
1375 }
1376
1377 if (Z_TYPE_PP(regex) != IS_ARRAY)
1378 convert_to_string_ex(regex);
1379
1380 /* if subject is an array */
1381 if (Z_TYPE_PP(subject) == IS_ARRAY) {
1382 array_init(return_value);
1383 zend_hash_internal_pointer_reset(Z_ARRVAL_PP(subject));
1384
1385 /* For each subject entry, convert it to string, then perform replacement
1386 and add the result to the return_value array. */
1387 while (zend_hash_get_current_data(Z_ARRVAL_PP(subject), (void **)&subject_entry) == SUCCESS) {
1388 SEPARATE_ZVAL(subject_entry);
1389 old_replace_count = replace_count;
1390 if ((result = php_replace_in_subject(*regex, *replace, subject_entry, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
1391 if (!is_filter || replace_count > old_replace_count) {
1392 /* Add to return array */
1393 switch(zend_hash_get_current_key(Z_ARRVAL_PP(subject), &string_key, &num_key, 0))
1394 {
1395 case HASH_KEY_IS_STRING:
1396 add_assoc_stringl(return_value, string_key, result, result_len, 0);
1397 break;
1398
1399 case HASH_KEY_IS_LONG:
1400 add_index_stringl(return_value, num_key, result, result_len, 0);
1401 break;
1402 }
1403 } else {
1404 efree(result);
1405 }
1406 }
1407
1408 zend_hash_move_forward(Z_ARRVAL_PP(subject));
1409 }
1410 } else { /* if subject is not an array */
1411 old_replace_count = replace_count;
1412 if ((result = php_replace_in_subject(*regex, *replace, subject, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
1413 if (!is_filter || replace_count > old_replace_count) {
1414 RETVAL_STRINGL(result, result_len, 0);
1415 } else {
1416 efree(result);
1417 }
1418 }
1419 }
1420 if (ZEND_NUM_ARGS() > 4) {
1421 zval_dtor(*zcount);
1422 ZVAL_LONG(*zcount, replace_count);
1423 }
1424
1425 }
1426 /* }}} */
1427
1428 /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1429 Perform Perl-style regular expression replacement. */
PHP_FUNCTION(preg_replace)1430 static PHP_FUNCTION(preg_replace)
1431 {
1432 preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1433 }
1434 /* }}} */
1435
1436 /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
1437 Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback)1438 static PHP_FUNCTION(preg_replace_callback)
1439 {
1440 preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0);
1441 }
1442 /* }}} */
1443
1444 /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1445 Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTION(preg_filter)1446 static PHP_FUNCTION(preg_filter)
1447 {
1448 preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1449 }
1450 /* }}} */
1451
1452 /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1453 Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)1454 static PHP_FUNCTION(preg_split)
1455 {
1456 char *regex; /* Regular expression */
1457 char *subject; /* String to match against */
1458 int regex_len;
1459 int subject_len;
1460 long limit_val = -1;/* Integer value of limit */
1461 long flags = 0; /* Match control flags */
1462 pcre_cache_entry *pce; /* Compiled regular expression */
1463
1464 /* Get function parameters and do error checking */
1465 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", ®ex, ®ex_len,
1466 &subject, &subject_len, &limit_val, &flags) == FAILURE) {
1467 RETURN_FALSE;
1468 }
1469
1470 /* Compile regex or get it from cache. */
1471 if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1472 RETURN_FALSE;
1473 }
1474
1475 php_pcre_split_impl(pce, subject, subject_len, return_value, limit_val, flags TSRMLS_CC);
1476 }
1477 /* }}} */
1478
1479 /* {{{ php_pcre_split
1480 */
php_pcre_split_impl(pcre_cache_entry * pce,char * subject,int subject_len,zval * return_value,long limit_val,long flags TSRMLS_DC)1481 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1482 long limit_val, long flags TSRMLS_DC)
1483 {
1484 pcre_extra *extra = NULL; /* Holds results of studying */
1485 pcre *re_bump = NULL; /* Regex instance for empty matches */
1486 pcre_extra *extra_bump = NULL; /* Almost dummy */
1487 pcre_extra extra_data; /* Used locally for exec options */
1488 int *offsets; /* Array of subpattern offsets */
1489 int size_offsets; /* Size of the offsets array */
1490 int exoptions = 0; /* Execution options */
1491 int count = 0; /* Count of matched subpatterns */
1492 int start_offset; /* Where the new search starts */
1493 int next_offset; /* End of the last delimiter match + 1 */
1494 int g_notempty = 0; /* If the match should not be empty */
1495 char *last_match; /* Location of last match */
1496 int rc;
1497 int no_empty; /* If NO_EMPTY flag is set */
1498 int delim_capture; /* If delimiters should be captured */
1499 int offset_capture; /* If offsets should be captured */
1500
1501 no_empty = flags & PREG_SPLIT_NO_EMPTY;
1502 delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1503 offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1504
1505 if (limit_val == 0) {
1506 limit_val = -1;
1507 }
1508
1509 if (extra == NULL) {
1510 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1511 extra = &extra_data;
1512 }
1513 extra->match_limit = PCRE_G(backtrack_limit);
1514 extra->match_limit_recursion = PCRE_G(recursion_limit);
1515
1516 /* Initialize return value */
1517 array_init(return_value);
1518
1519 /* Calculate the size of the offsets array, and allocate memory for it. */
1520 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1521 if (rc < 0) {
1522 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1523 RETURN_FALSE;
1524 }
1525 size_offsets = (size_offsets + 1) * 3;
1526 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1527
1528 /* Start at the beginning of the string */
1529 start_offset = 0;
1530 next_offset = 0;
1531 last_match = subject;
1532 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1533
1534 /* Get next piece if no limit or limit not yet reached and something matched*/
1535 while ((limit_val == -1 || limit_val > 1)) {
1536 count = pcre_exec(pce->re, extra, subject,
1537 subject_len, start_offset,
1538 exoptions|g_notempty, offsets, size_offsets);
1539
1540 /* the string was already proved to be valid UTF-8 */
1541 exoptions |= PCRE_NO_UTF8_CHECK;
1542
1543 /* Check for too many substrings condition. */
1544 if (count == 0) {
1545 php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1546 count = size_offsets/3;
1547 }
1548
1549 /* If something matched */
1550 if (count > 0 && (offsets[1] - offsets[0] >= 0)) {
1551 if (!no_empty || &subject[offsets[0]] != last_match) {
1552
1553 if (offset_capture) {
1554 /* Add (match, offset) pair to the return value */
1555 add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL);
1556 } else {
1557 /* Add the piece to the return value */
1558 add_next_index_stringl(return_value, last_match,
1559 &subject[offsets[0]]-last_match, 1);
1560 }
1561
1562 /* One less left to do */
1563 if (limit_val != -1)
1564 limit_val--;
1565 }
1566
1567 last_match = &subject[offsets[1]];
1568 next_offset = offsets[1];
1569
1570 if (delim_capture) {
1571 int i, match_len;
1572 for (i = 1; i < count; i++) {
1573 match_len = offsets[(i<<1)+1] - offsets[i<<1];
1574 /* If we have matched a delimiter */
1575 if (!no_empty || match_len > 0) {
1576 if (offset_capture) {
1577 add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1578 } else {
1579 add_next_index_stringl(return_value,
1580 &subject[offsets[i<<1]],
1581 match_len, 1);
1582 }
1583 }
1584 }
1585 }
1586 } else if (count == PCRE_ERROR_NOMATCH) {
1587 /* If we previously set PCRE_NOTEMPTY after a null match,
1588 this is not necessarily the end. We need to advance
1589 the start offset, and continue. Fudge the offset values
1590 to achieve this, unless we're already at the end of the string. */
1591 if (g_notempty != 0 && start_offset < subject_len) {
1592 if (pce->compile_options & PCRE_UTF8) {
1593 if (re_bump == NULL) {
1594 int dummy;
1595
1596 if ((re_bump = pcre_get_compiled_regex("/./us", &extra_bump, &dummy TSRMLS_CC)) == NULL) {
1597 RETURN_FALSE;
1598 }
1599 }
1600 count = pcre_exec(re_bump, extra_bump, subject,
1601 subject_len, start_offset,
1602 exoptions, offsets, size_offsets);
1603 if (count < 1) {
1604 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
1605 RETURN_FALSE;
1606 }
1607 } else {
1608 offsets[0] = start_offset;
1609 offsets[1] = start_offset + 1;
1610 }
1611 } else
1612 break;
1613 } else {
1614 pcre_handle_exec_error(count TSRMLS_CC);
1615 break;
1616 }
1617
1618 /* If we have matched an empty string, mimic what Perl's /g options does.
1619 This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1620 the match again at the same point. If this fails (picked up above) we
1621 advance to the next character. */
1622 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1623
1624 /* Advance to the position right after the last full match */
1625 start_offset = offsets[1];
1626 }
1627
1628
1629 start_offset = last_match - subject; /* the offset might have been incremented, but without further successful matches */
1630
1631 if (!no_empty || start_offset < subject_len)
1632 {
1633 if (offset_capture) {
1634 /* Add the last (match, offset) pair to the return value */
1635 add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1636 } else {
1637 /* Add the last piece to the return value */
1638 add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1);
1639 }
1640 }
1641
1642
1643 /* Clean up */
1644 efree(offsets);
1645 }
1646 /* }}} */
1647
1648 /* {{{ proto string preg_quote(string str [, string delim_char])
1649 Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)1650 static PHP_FUNCTION(preg_quote)
1651 {
1652 int in_str_len;
1653 char *in_str; /* Input string argument */
1654 char *in_str_end; /* End of the input string */
1655 int delim_len = 0;
1656 char *delim = NULL; /* Additional delimiter argument */
1657 char *out_str, /* Output string with quoted characters */
1658 *p, /* Iterator for input string */
1659 *q, /* Iterator for output string */
1660 delim_char=0, /* Delimiter character to be quoted */
1661 c; /* Current character */
1662 zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1663
1664 /* Get the arguments and check for errors */
1665 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len,
1666 &delim, &delim_len) == FAILURE) {
1667 return;
1668 }
1669
1670 in_str_end = in_str + in_str_len;
1671
1672 /* Nothing to do if we got an empty string */
1673 if (in_str == in_str_end) {
1674 RETURN_EMPTY_STRING();
1675 }
1676
1677 if (delim && *delim) {
1678 delim_char = delim[0];
1679 quote_delim = 1;
1680 }
1681
1682 /* Allocate enough memory so that even if each character
1683 is quoted, we won't run out of room */
1684 out_str = safe_emalloc(4, in_str_len, 1);
1685
1686 /* Go through the string and quote necessary characters */
1687 for(p = in_str, q = out_str; p != in_str_end; p++) {
1688 c = *p;
1689 switch(c) {
1690 case '.':
1691 case '\\':
1692 case '+':
1693 case '*':
1694 case '?':
1695 case '[':
1696 case '^':
1697 case ']':
1698 case '$':
1699 case '(':
1700 case ')':
1701 case '{':
1702 case '}':
1703 case '=':
1704 case '!':
1705 case '>':
1706 case '<':
1707 case '|':
1708 case ':':
1709 case '-':
1710 *q++ = '\\';
1711 *q++ = c;
1712 break;
1713
1714 case '\0':
1715 *q++ = '\\';
1716 *q++ = '0';
1717 *q++ = '0';
1718 *q++ = '0';
1719 break;
1720
1721 default:
1722 if (quote_delim && c == delim_char)
1723 *q++ = '\\';
1724 *q++ = c;
1725 break;
1726 }
1727 }
1728 *q = '\0';
1729
1730 /* Reallocate string and return it */
1731 RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0);
1732 }
1733 /* }}} */
1734
1735 /* {{{ proto array preg_grep(string regex, array input [, int flags])
1736 Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)1737 static PHP_FUNCTION(preg_grep)
1738 {
1739 char *regex; /* Regular expression */
1740 int regex_len;
1741 zval *input; /* Input array */
1742 long flags = 0; /* Match control flags */
1743 pcre_cache_entry *pce; /* Compiled regular expression */
1744
1745 /* Get arguments and do error checking */
1746 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", ®ex, ®ex_len,
1747 &input, &flags) == FAILURE) {
1748 return;
1749 }
1750
1751 /* Compile regex or get it from cache. */
1752 if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1753 RETURN_FALSE;
1754 }
1755
1756 php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC);
1757 }
1758 /* }}} */
1759
php_pcre_grep_impl(pcre_cache_entry * pce,zval * input,zval * return_value,long flags TSRMLS_DC)1760 PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, long flags TSRMLS_DC) /* {{{ */
1761 {
1762 zval **entry; /* An entry in the input array */
1763 pcre_extra *extra = pce->extra;/* Holds results of studying */
1764 pcre_extra extra_data; /* Used locally for exec options */
1765 int *offsets; /* Array of subpattern offsets */
1766 int size_offsets; /* Size of the offsets array */
1767 int count = 0; /* Count of matched subpatterns */
1768 char *string_key;
1769 ulong num_key;
1770 zend_bool invert; /* Whether to return non-matching
1771 entries */
1772 int rc;
1773
1774 invert = flags & PREG_GREP_INVERT ? 1 : 0;
1775
1776 if (extra == NULL) {
1777 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1778 extra = &extra_data;
1779 }
1780 extra->match_limit = PCRE_G(backtrack_limit);
1781 extra->match_limit_recursion = PCRE_G(recursion_limit);
1782
1783 /* Calculate the size of the offsets array, and allocate memory for it. */
1784 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1785 if (rc < 0) {
1786 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1787 RETURN_FALSE;
1788 }
1789 size_offsets = (size_offsets + 1) * 3;
1790 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1791
1792 /* Initialize return array */
1793 array_init(return_value);
1794
1795 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1796
1797 /* Go through the input array */
1798 zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
1799 while (zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) {
1800 zval subject = **entry;
1801
1802 if (Z_TYPE_PP(entry) != IS_STRING) {
1803 zval_copy_ctor(&subject);
1804 convert_to_string(&subject);
1805 }
1806
1807 /* Perform the match */
1808 count = pcre_exec(pce->re, extra, Z_STRVAL(subject),
1809 Z_STRLEN(subject), 0,
1810 0, offsets, size_offsets);
1811
1812 /* Check for too many substrings condition. */
1813 if (count == 0) {
1814 php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
1815 count = size_offsets/3;
1816 } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
1817 pcre_handle_exec_error(count TSRMLS_CC);
1818 break;
1819 }
1820
1821 /* If the entry fits our requirements */
1822 if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
1823
1824 Z_ADDREF_PP(entry);
1825
1826 /* Add to return array */
1827 switch (zend_hash_get_current_key(Z_ARRVAL_P(input), &string_key, &num_key, 0))
1828 {
1829 case HASH_KEY_IS_STRING:
1830 zend_hash_update(Z_ARRVAL_P(return_value), string_key,
1831 strlen(string_key)+1, entry, sizeof(zval *), NULL);
1832 break;
1833
1834 case HASH_KEY_IS_LONG:
1835 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry,
1836 sizeof(zval *), NULL);
1837 break;
1838 }
1839 }
1840
1841 if (Z_TYPE_PP(entry) != IS_STRING) {
1842 zval_dtor(&subject);
1843 }
1844
1845 zend_hash_move_forward(Z_ARRVAL_P(input));
1846 }
1847 zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
1848 /* Clean up */
1849 efree(offsets);
1850 }
1851 /* }}} */
1852
1853 /* {{{ proto int preg_last_error()
1854 Returns the error code of the last regexp execution. */
PHP_FUNCTION(preg_last_error)1855 static PHP_FUNCTION(preg_last_error)
1856 {
1857 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
1858 return;
1859 }
1860
1861 RETURN_LONG(PCRE_G(error_code));
1862 }
1863 /* }}} */
1864
1865 /* {{{ module definition structures */
1866
1867 /* {{{ arginfo */
1868 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
1869 ZEND_ARG_INFO(0, pattern)
1870 ZEND_ARG_INFO(0, subject)
1871 ZEND_ARG_INFO(1, subpatterns) /* array */
1872 ZEND_ARG_INFO(0, flags)
1873 ZEND_ARG_INFO(0, offset)
1874 ZEND_END_ARG_INFO()
1875
1876 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
1877 ZEND_ARG_INFO(0, pattern)
1878 ZEND_ARG_INFO(0, subject)
1879 ZEND_ARG_INFO(1, subpatterns) /* array */
1880 ZEND_ARG_INFO(0, flags)
1881 ZEND_ARG_INFO(0, offset)
1882 ZEND_END_ARG_INFO()
1883
1884 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
1885 ZEND_ARG_INFO(0, regex)
1886 ZEND_ARG_INFO(0, replace)
1887 ZEND_ARG_INFO(0, subject)
1888 ZEND_ARG_INFO(0, limit)
1889 ZEND_ARG_INFO(1, count)
1890 ZEND_END_ARG_INFO()
1891
1892 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
1893 ZEND_ARG_INFO(0, regex)
1894 ZEND_ARG_INFO(0, callback)
1895 ZEND_ARG_INFO(0, subject)
1896 ZEND_ARG_INFO(0, limit)
1897 ZEND_ARG_INFO(1, count)
1898 ZEND_END_ARG_INFO()
1899
1900 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
1901 ZEND_ARG_INFO(0, pattern)
1902 ZEND_ARG_INFO(0, subject)
1903 ZEND_ARG_INFO(0, limit)
1904 ZEND_ARG_INFO(0, flags)
1905 ZEND_END_ARG_INFO()
1906
1907 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
1908 ZEND_ARG_INFO(0, str)
1909 ZEND_ARG_INFO(0, delim_char)
1910 ZEND_END_ARG_INFO()
1911
1912 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
1913 ZEND_ARG_INFO(0, regex)
1914 ZEND_ARG_INFO(0, input) /* array */
1915 ZEND_ARG_INFO(0, flags)
1916 ZEND_END_ARG_INFO()
1917
1918 ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
1919 ZEND_END_ARG_INFO()
1920 /* }}} */
1921
1922 static const zend_function_entry pcre_functions[] = {
1923 PHP_FE(preg_match, arginfo_preg_match)
1924 PHP_FE(preg_match_all, arginfo_preg_match_all)
1925 PHP_FE(preg_replace, arginfo_preg_replace)
1926 PHP_FE(preg_replace_callback, arginfo_preg_replace_callback)
1927 PHP_FE(preg_filter, arginfo_preg_replace)
1928 PHP_FE(preg_split, arginfo_preg_split)
1929 PHP_FE(preg_quote, arginfo_preg_quote)
1930 PHP_FE(preg_grep, arginfo_preg_grep)
1931 PHP_FE(preg_last_error, arginfo_preg_last_error)
1932 PHP_FE_END
1933 };
1934
1935 zend_module_entry pcre_module_entry = {
1936 STANDARD_MODULE_HEADER,
1937 "pcre",
1938 pcre_functions,
1939 PHP_MINIT(pcre),
1940 PHP_MSHUTDOWN(pcre),
1941 NULL,
1942 NULL,
1943 PHP_MINFO(pcre),
1944 NO_VERSION_YET,
1945 PHP_MODULE_GLOBALS(pcre),
1946 PHP_GINIT(pcre),
1947 PHP_GSHUTDOWN(pcre),
1948 NULL,
1949 STANDARD_MODULE_PROPERTIES_EX
1950 };
1951
1952 #ifdef COMPILE_DL_PCRE
1953 ZEND_GET_MODULE(pcre)
1954 #endif
1955
1956 /* }}} */
1957
1958 #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
1959
1960 /*
1961 * Local variables:
1962 * tab-width: 4
1963 * c-basic-offset: 4
1964 * End:
1965 * vim600: sw=4 ts=4 fdm=marker
1966 * vim<600: sw=4 ts=4
1967 */
1968