1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 5 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2013 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Andrei Zmievski <andrei@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 /* $Id$ */
20
21 #include "php.h"
22 #include "php_ini.h"
23 #include "php_globals.h"
24 #include "php_pcre.h"
25 #include "ext/standard/info.h"
26 #include "ext/standard/php_smart_str.h"
27
28 #if HAVE_PCRE || HAVE_BUNDLED_PCRE
29
30 #include "ext/standard/php_string.h"
31
32 #define PREG_PATTERN_ORDER 1
33 #define PREG_SET_ORDER 2
34 #define PREG_OFFSET_CAPTURE (1<<8)
35
36 #define PREG_SPLIT_NO_EMPTY (1<<0)
37 #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
38 #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
39
40 #define PREG_REPLACE_EVAL (1<<0)
41
42 #define PREG_GREP_INVERT (1<<0)
43
44 #define PCRE_CACHE_SIZE 4096
45
46 enum {
47 PHP_PCRE_NO_ERROR = 0,
48 PHP_PCRE_INTERNAL_ERROR,
49 PHP_PCRE_BACKTRACK_LIMIT_ERROR,
50 PHP_PCRE_RECURSION_LIMIT_ERROR,
51 PHP_PCRE_BAD_UTF8_ERROR,
52 PHP_PCRE_BAD_UTF8_OFFSET_ERROR
53 };
54
55
ZEND_DECLARE_MODULE_GLOBALS(pcre)56 ZEND_DECLARE_MODULE_GLOBALS(pcre)
57
58
59 static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */
60 {
61 int preg_code = 0;
62
63 switch (pcre_code) {
64 case PCRE_ERROR_MATCHLIMIT:
65 preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
66 break;
67
68 case PCRE_ERROR_RECURSIONLIMIT:
69 preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
70 break;
71
72 case PCRE_ERROR_BADUTF8:
73 preg_code = PHP_PCRE_BAD_UTF8_ERROR;
74 break;
75
76 case PCRE_ERROR_BADUTF8_OFFSET:
77 preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
78 break;
79
80 default:
81 preg_code = PHP_PCRE_INTERNAL_ERROR;
82 break;
83 }
84
85 PCRE_G(error_code) = preg_code;
86 }
87 /* }}} */
88
php_free_pcre_cache(void * data)89 static void php_free_pcre_cache(void *data) /* {{{ */
90 {
91 pcre_cache_entry *pce = (pcre_cache_entry *) data;
92 if (!pce) return;
93 pefree(pce->re, 1);
94 if (pce->extra) pefree(pce->extra, 1);
95 #if HAVE_SETLOCALE
96 if ((void*)pce->tables) pefree((void*)pce->tables, 1);
97 pefree(pce->locale, 1);
98 #endif
99 }
100 /* }}} */
101
PHP_GINIT_FUNCTION(pcre)102 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
103 {
104 zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
105 pcre_globals->backtrack_limit = 0;
106 pcre_globals->recursion_limit = 0;
107 pcre_globals->error_code = PHP_PCRE_NO_ERROR;
108 }
109 /* }}} */
110
PHP_GSHUTDOWN_FUNCTION(pcre)111 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
112 {
113 zend_hash_destroy(&pcre_globals->pcre_cache);
114 }
115 /* }}} */
116
117 PHP_INI_BEGIN()
118 STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
119 STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
PHP_INI_END()120 PHP_INI_END()
121
122
123 /* {{{ PHP_MINFO_FUNCTION(pcre) */
124 static PHP_MINFO_FUNCTION(pcre)
125 {
126 php_info_print_table_start();
127 php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
128 php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
129 php_info_print_table_end();
130
131 DISPLAY_INI_ENTRIES();
132 }
133 /* }}} */
134
135 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTION(pcre)136 static PHP_MINIT_FUNCTION(pcre)
137 {
138 REGISTER_INI_ENTRIES();
139
140 REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
141 REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
142 REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
143 REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
144 REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
145 REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
146 REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
147
148 REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
149 REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
150 REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
151 REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
152 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
153 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
154 REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
155
156 return SUCCESS;
157 }
158 /* }}} */
159
160 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTION(pcre)161 static PHP_MSHUTDOWN_FUNCTION(pcre)
162 {
163 UNREGISTER_INI_ENTRIES();
164
165 return SUCCESS;
166 }
167 /* }}} */
168
169 /* {{{ static pcre_clean_cache */
pcre_clean_cache(void * data,void * arg TSRMLS_DC)170 static int pcre_clean_cache(void *data, void *arg TSRMLS_DC)
171 {
172 int *num_clean = (int *)arg;
173
174 if (*num_clean > 0) {
175 (*num_clean)--;
176 return 1;
177 } else {
178 return 0;
179 }
180 }
181 /* }}} */
182
183 /* {{{ static make_subpats_table */
make_subpats_table(int num_subpats,pcre_cache_entry * pce TSRMLS_DC)184 static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC)
185 {
186 pcre_extra *extra = pce->extra;
187 int name_cnt = 0, name_size, ni = 0;
188 int rc;
189 char *name_table;
190 unsigned short name_idx;
191 char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
192
193 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
194 if (rc < 0) {
195 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
196 efree(subpat_names);
197 return NULL;
198 }
199 if (name_cnt > 0) {
200 int rc1, rc2;
201
202 rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
203 rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
204 rc = rc2 ? rc2 : rc1;
205 if (rc < 0) {
206 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
207 efree(subpat_names);
208 return NULL;
209 }
210
211 while (ni++ < name_cnt) {
212 name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
213 subpat_names[name_idx] = name_table + 2;
214 if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
215 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
216 efree(subpat_names);
217 return NULL;
218 }
219 name_table += name_size;
220 }
221 }
222
223 return subpat_names;
224 }
225 /* }}} */
226
227 /* {{{ pcre_get_compiled_regex_cache
228 */
pcre_get_compiled_regex_cache(char * regex,int regex_len TSRMLS_DC)229 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
230 {
231 pcre *re = NULL;
232 pcre_extra *extra;
233 int coptions = 0;
234 int soptions = 0;
235 const char *error;
236 int erroffset;
237 char delimiter;
238 char start_delimiter;
239 char end_delimiter;
240 char *p, *pp;
241 char *pattern;
242 int do_study = 0;
243 int poptions = 0;
244 int count = 0;
245 unsigned const char *tables = NULL;
246 #if HAVE_SETLOCALE
247 char *locale;
248 #endif
249 pcre_cache_entry *pce;
250 pcre_cache_entry new_entry;
251
252 #if HAVE_SETLOCALE
253 # if defined(PHP_WIN32) && defined(ZTS)
254 _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
255 # endif
256 locale = setlocale(LC_CTYPE, NULL);
257 #endif
258
259 /* Try to lookup the cached regex entry, and if successful, just pass
260 back the compiled pattern, otherwise go on and compile it. */
261 if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) {
262 /*
263 * We use a quick pcre_fullinfo() check to see whether cache is corrupted, and if it
264 * is, we flush it and compile the pattern from scratch.
265 */
266 if (pcre_fullinfo(pce->re, NULL, PCRE_INFO_CAPTURECOUNT, &count) == PCRE_ERROR_BADMAGIC) {
267 zend_hash_clean(&PCRE_G(pcre_cache));
268 } else {
269 #if HAVE_SETLOCALE
270 if (!strcmp(pce->locale, locale)) {
271 #endif
272 return pce;
273 #if HAVE_SETLOCALE
274 }
275 #endif
276 }
277 }
278
279 p = regex;
280
281 /* Parse through the leading whitespace, and display a warning if we
282 get to the end without encountering a delimiter. */
283 while (isspace((int)*(unsigned char *)p)) p++;
284 if (*p == 0) {
285 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
286 return NULL;
287 }
288
289 /* Get the delimiter and display a warning if it is alphanumeric
290 or a backslash. */
291 delimiter = *p++;
292 if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
293 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash");
294 return NULL;
295 }
296
297 start_delimiter = delimiter;
298 if ((pp = strchr("([{< )]}> )]}>", delimiter)))
299 delimiter = pp[5];
300 end_delimiter = delimiter;
301
302 if (start_delimiter == end_delimiter) {
303 /* We need to iterate through the pattern, searching for the ending delimiter,
304 but skipping the backslashed delimiters. If the ending delimiter is not
305 found, display a warning. */
306 pp = p;
307 while (*pp != 0) {
308 if (*pp == '\\' && pp[1] != 0) pp++;
309 else if (*pp == delimiter)
310 break;
311 pp++;
312 }
313 if (*pp == 0) {
314 php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter);
315 return NULL;
316 }
317 } else {
318 /* We iterate through the pattern, searching for the matching ending
319 * delimiter. For each matching starting delimiter, we increment nesting
320 * level, and decrement it for each matching ending delimiter. If we
321 * reach the end of the pattern without matching, display a warning.
322 */
323 int brackets = 1; /* brackets nesting level */
324 pp = p;
325 while (*pp != 0) {
326 if (*pp == '\\' && pp[1] != 0) pp++;
327 else if (*pp == end_delimiter && --brackets <= 0)
328 break;
329 else if (*pp == start_delimiter)
330 brackets++;
331 pp++;
332 }
333 if (*pp == 0) {
334 php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", end_delimiter);
335 return NULL;
336 }
337 }
338
339 /* Make a copy of the actual pattern. */
340 pattern = estrndup(p, pp-p);
341
342 /* Move on to the options */
343 pp++;
344
345 /* Parse through the options, setting appropriate flags. Display
346 a warning if we encounter an unknown modifier. */
347 while (*pp != 0) {
348 switch (*pp++) {
349 /* Perl compatible options */
350 case 'i': coptions |= PCRE_CASELESS; break;
351 case 'm': coptions |= PCRE_MULTILINE; break;
352 case 's': coptions |= PCRE_DOTALL; break;
353 case 'x': coptions |= PCRE_EXTENDED; break;
354
355 /* PCRE specific options */
356 case 'A': coptions |= PCRE_ANCHORED; break;
357 case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break;
358 case 'S': do_study = 1; break;
359 case 'U': coptions |= PCRE_UNGREEDY; break;
360 case 'X': coptions |= PCRE_EXTRA; break;
361 case 'u': coptions |= PCRE_UTF8;
362 /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
363 characters, even in UTF-8 mode. However, this can be changed by setting
364 the PCRE_UCP option. */
365 #ifdef PCRE_UCP
366 coptions |= PCRE_UCP;
367 #endif
368 break;
369
370 /* Custom preg options */
371 case 'e': poptions |= PREG_REPLACE_EVAL; break;
372
373 case ' ':
374 case '\n':
375 break;
376
377 default:
378 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]);
379 efree(pattern);
380 return NULL;
381 }
382 }
383
384 #if HAVE_SETLOCALE
385 if (strcmp(locale, "C"))
386 tables = pcre_maketables();
387 #endif
388
389 /* Compile pattern and display a warning if compilation failed. */
390 re = pcre_compile(pattern,
391 coptions,
392 &error,
393 &erroffset,
394 tables);
395
396 if (re == NULL) {
397 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
398 efree(pattern);
399 if (tables) {
400 pefree((void*)tables, 1);
401 }
402 return NULL;
403 }
404
405 /* If study option was specified, study the pattern and
406 store the result in extra for passing to pcre_exec. */
407 if (do_study) {
408 extra = pcre_study(re, soptions, &error);
409 if (extra) {
410 extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
411 }
412 if (error != NULL) {
413 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern");
414 }
415 } else {
416 extra = NULL;
417 }
418
419 efree(pattern);
420
421 /*
422 * If we reached cache limit, clean out the items from the head of the list;
423 * these are supposedly the oldest ones (but not necessarily the least used
424 * ones).
425 */
426 if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
427 int num_clean = PCRE_CACHE_SIZE / 8;
428 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC);
429 }
430
431 /* Store the compiled pattern and extra info in the cache. */
432 new_entry.re = re;
433 new_entry.extra = extra;
434 new_entry.preg_options = poptions;
435 new_entry.compile_options = coptions;
436 #if HAVE_SETLOCALE
437 new_entry.locale = pestrdup(locale, 1);
438 new_entry.tables = tables;
439 #endif
440 zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry,
441 sizeof(pcre_cache_entry), (void**)&pce);
442
443 return pce;
444 }
445 /* }}} */
446
447 /* {{{ pcre_get_compiled_regex
448 */
pcre_get_compiled_regex(char * regex,pcre_extra ** extra,int * preg_options TSRMLS_DC)449 PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC)
450 {
451 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
452
453 if (extra) {
454 *extra = pce ? pce->extra : NULL;
455 }
456 if (preg_options) {
457 *preg_options = pce ? pce->preg_options : 0;
458 }
459
460 return pce ? pce->re : NULL;
461 }
462 /* }}} */
463
464 /* {{{ pcre_get_compiled_regex_ex
465 */
pcre_get_compiled_regex_ex(char * regex,pcre_extra ** extra,int * preg_options,int * compile_options TSRMLS_DC)466 PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC)
467 {
468 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
469
470 if (extra) {
471 *extra = pce ? pce->extra : NULL;
472 }
473 if (preg_options) {
474 *preg_options = pce ? pce->preg_options : 0;
475 }
476 if (compile_options) {
477 *compile_options = pce ? pce->compile_options : 0;
478 }
479
480 return pce ? pce->re : NULL;
481 }
482 /* }}} */
483
484 /* {{{ add_offset_pair */
add_offset_pair(zval * result,char * str,int len,int offset,char * name)485 static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
486 {
487 zval *match_pair;
488
489 ALLOC_ZVAL(match_pair);
490 array_init(match_pair);
491 INIT_PZVAL(match_pair);
492
493 /* Add (match, offset) to the return value */
494 add_next_index_stringl(match_pair, str, len, 1);
495 add_next_index_long(match_pair, offset);
496
497 if (name) {
498 zval_add_ref(&match_pair);
499 zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL);
500 }
501 zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);
502 }
503 /* }}} */
504
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS,int global)505 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
506 {
507 /* parameters */
508 char *regex; /* Regular expression */
509 char *subject; /* String to match against */
510 int regex_len;
511 int subject_len;
512 pcre_cache_entry *pce; /* Compiled regular expression */
513 zval *subpats = NULL; /* Array for subpatterns */
514 long flags = 0; /* Match control flags */
515 long start_offset = 0; /* Where the new search starts */
516
517 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ((global) ? "ssz|ll" : "ss|zll"), ®ex, ®ex_len,
518 &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) {
519 RETURN_FALSE;
520 }
521
522 /* Compile regex or get it from cache. */
523 if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
524 RETURN_FALSE;
525 }
526
527 php_pcre_match_impl(pce, subject, subject_len, return_value, subpats,
528 global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC);
529 }
530 /* }}} */
531
532 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry * pce,char * subject,int subject_len,zval * return_value,zval * subpats,int global,int use_flags,long flags,long start_offset TSRMLS_DC)533 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
534 zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
535 {
536 zval *result_set, /* Holds a set of subpatterns after
537 a global match */
538 **match_sets = NULL; /* An array of sets of matches for each
539 subpattern after a global match */
540 pcre_extra *extra = pce->extra;/* Holds results of studying */
541 pcre_extra extra_data; /* Used locally for exec options */
542 int exoptions = 0; /* Execution options */
543 int count = 0; /* Count of matched subpatterns */
544 int *offsets; /* Array of subpattern offsets */
545 int num_subpats; /* Number of captured subpatterns */
546 int size_offsets; /* Size of the offsets array */
547 int matched; /* Has anything matched */
548 int g_notempty = 0; /* If the match should not be empty */
549 const char **stringlist; /* Holds list of subpatterns */
550 char **subpat_names; /* Array for named subpatterns */
551 int i, rc;
552 int subpats_order; /* Order of subpattern matches */
553 int offset_capture; /* Capture match offsets: yes/no */
554
555 /* Overwrite the passed-in value for subpatterns with an empty array. */
556 if (subpats != NULL) {
557 zval_dtor(subpats);
558 array_init(subpats);
559 }
560
561 subpats_order = global ? PREG_PATTERN_ORDER : 0;
562
563 if (use_flags) {
564 offset_capture = flags & PREG_OFFSET_CAPTURE;
565
566 /*
567 * subpats_order is pre-set to pattern mode so we change it only if
568 * necessary.
569 */
570 if (flags & 0xff) {
571 subpats_order = flags & 0xff;
572 }
573 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
574 (!global && subpats_order != 0)) {
575 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified");
576 return;
577 }
578 } else {
579 offset_capture = 0;
580 }
581
582 /* Negative offset counts from the end of the string. */
583 if (start_offset < 0) {
584 start_offset = subject_len + start_offset;
585 if (start_offset < 0) {
586 start_offset = 0;
587 }
588 }
589
590 if (extra == NULL) {
591 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
592 extra = &extra_data;
593 }
594 extra->match_limit = PCRE_G(backtrack_limit);
595 extra->match_limit_recursion = PCRE_G(recursion_limit);
596
597 /* Calculate the size of the offsets array, and allocate memory for it. */
598 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
599 if (rc < 0) {
600 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
601 RETURN_FALSE;
602 }
603 num_subpats++;
604 size_offsets = num_subpats * 3;
605
606 /*
607 * Build a mapping from subpattern numbers to their names. We will always
608 * allocate the table, even though there may be no named subpatterns. This
609 * avoids somewhat more complicated logic in the inner loops.
610 */
611 subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
612 if (!subpat_names) {
613 RETURN_FALSE;
614 }
615
616 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
617
618 /* Allocate match sets array and initialize the values. */
619 if (global && subpats_order == PREG_PATTERN_ORDER) {
620 match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0);
621 for (i=0; i<num_subpats; i++) {
622 ALLOC_ZVAL(match_sets[i]);
623 array_init(match_sets[i]);
624 INIT_PZVAL(match_sets[i]);
625 }
626 }
627
628 matched = 0;
629 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
630
631 do {
632 /* Execute the regular expression. */
633 count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
634 exoptions|g_notempty, offsets, size_offsets);
635
636 /* the string was already proved to be valid UTF-8 */
637 exoptions |= PCRE_NO_UTF8_CHECK;
638
639 /* Check for too many substrings condition. */
640 if (count == 0) {
641 php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
642 count = size_offsets/3;
643 }
644
645 /* If something has matched */
646 if (count > 0) {
647 matched++;
648
649 /* If subpatterns array has been passed, fill it in with values. */
650 if (subpats != NULL) {
651 /* Try to get the list of substrings and display a warning if failed. */
652 if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
653 efree(subpat_names);
654 efree(offsets);
655 if (match_sets) efree(match_sets);
656 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed");
657 RETURN_FALSE;
658 }
659
660 if (global) { /* global pattern matching */
661 if (subpats_order == PREG_PATTERN_ORDER) {
662 /* For each subpattern, insert it into the appropriate array. */
663 for (i = 0; i < count; i++) {
664 if (offset_capture) {
665 add_offset_pair(match_sets[i], (char *)stringlist[i],
666 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
667 } else {
668 add_next_index_stringl(match_sets[i], (char *)stringlist[i],
669 offsets[(i<<1)+1] - offsets[i<<1], 1);
670 }
671 }
672 /*
673 * If the number of captured subpatterns on this run is
674 * less than the total possible number, pad the result
675 * arrays with empty strings.
676 */
677 if (count < num_subpats) {
678 for (; i < num_subpats; i++) {
679 add_next_index_string(match_sets[i], "", 1);
680 }
681 }
682 } else {
683 /* Allocate the result set array */
684 ALLOC_ZVAL(result_set);
685 array_init(result_set);
686 INIT_PZVAL(result_set);
687
688 /* Add all the subpatterns to it */
689 for (i = 0; i < count; i++) {
690 if (offset_capture) {
691 add_offset_pair(result_set, (char *)stringlist[i],
692 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
693 } else {
694 if (subpat_names[i]) {
695 add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i],
696 offsets[(i<<1)+1] - offsets[i<<1], 1);
697 }
698 add_next_index_stringl(result_set, (char *)stringlist[i],
699 offsets[(i<<1)+1] - offsets[i<<1], 1);
700 }
701 }
702 /* And add it to the output array */
703 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL);
704 }
705 } else { /* single pattern matching */
706 /* For each subpattern, insert it into the subpatterns array. */
707 for (i = 0; i < count; i++) {
708 if (offset_capture) {
709 add_offset_pair(subpats, (char *)stringlist[i],
710 offsets[(i<<1)+1] - offsets[i<<1],
711 offsets[i<<1], subpat_names[i]);
712 } else {
713 if (subpat_names[i]) {
714 add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
715 offsets[(i<<1)+1] - offsets[i<<1], 1);
716 }
717 add_next_index_stringl(subpats, (char *)stringlist[i],
718 offsets[(i<<1)+1] - offsets[i<<1], 1);
719 }
720 }
721 }
722
723 pcre_free((void *) stringlist);
724 }
725 } else if (count == PCRE_ERROR_NOMATCH) {
726 /* If we previously set PCRE_NOTEMPTY after a null match,
727 this is not necessarily the end. We need to advance
728 the start offset, and continue. Fudge the offset values
729 to achieve this, unless we're already at the end of the string. */
730 if (g_notempty != 0 && start_offset < subject_len) {
731 offsets[0] = start_offset;
732 offsets[1] = start_offset + 1;
733 } else
734 break;
735 } else {
736 pcre_handle_exec_error(count TSRMLS_CC);
737 break;
738 }
739
740 /* If we have matched an empty string, mimic what Perl's /g options does.
741 This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
742 the match again at the same point. If this fails (picked up above) we
743 advance to the next character. */
744 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
745
746 /* Advance to the position right after the last full match */
747 start_offset = offsets[1];
748 } while (global);
749
750 /* Add the match sets to the output array and clean up */
751 if (global && subpats_order == PREG_PATTERN_ORDER) {
752 for (i = 0; i < num_subpats; i++) {
753 if (subpat_names[i]) {
754 zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i],
755 strlen(subpat_names[i])+1, &match_sets[i], sizeof(zval *), NULL);
756 Z_ADDREF_P(match_sets[i]);
757 }
758 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL);
759 }
760 efree(match_sets);
761 }
762
763 efree(offsets);
764 efree(subpat_names);
765
766 /* Did we encounter an error? */
767 if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
768 RETVAL_LONG(matched);
769 } else {
770 RETVAL_FALSE;
771 }
772 }
773 /* }}} */
774
775 /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
776 Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)777 static PHP_FUNCTION(preg_match)
778 {
779 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
780 }
781 /* }}} */
782
783 /* {{{ proto int preg_match_all(string pattern, string subject, array &subpatterns [, int flags [, int offset]])
784 Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)785 static PHP_FUNCTION(preg_match_all)
786 {
787 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
788 }
789 /* }}} */
790
791 /* {{{ preg_get_backref
792 */
preg_get_backref(char ** str,int * backref)793 static int preg_get_backref(char **str, int *backref)
794 {
795 register char in_brace = 0;
796 register char *walk = *str;
797
798 if (walk[1] == 0)
799 return 0;
800
801 if (*walk == '$' && walk[1] == '{') {
802 in_brace = 1;
803 walk++;
804 }
805 walk++;
806
807 if (*walk >= '0' && *walk <= '9') {
808 *backref = *walk - '0';
809 walk++;
810 } else
811 return 0;
812
813 if (*walk && *walk >= '0' && *walk <= '9') {
814 *backref = *backref * 10 + *walk - '0';
815 walk++;
816 }
817
818 if (in_brace) {
819 if (*walk == 0 || *walk != '}')
820 return 0;
821 else
822 walk++;
823 }
824
825 *str = walk;
826 return 1;
827 }
828 /* }}} */
829
830 /* {{{ preg_do_repl_func
831 */
preg_do_repl_func(zval * function,char * subject,int * offsets,char ** subpat_names,int count,char ** result TSRMLS_DC)832 static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC)
833 {
834 zval *retval_ptr; /* Function return value */
835 zval **args[1]; /* Argument to pass to function */
836 zval *subpats; /* Captured subpatterns */
837 int result_len; /* Return value length */
838 int i;
839
840 MAKE_STD_ZVAL(subpats);
841 array_init(subpats);
842 for (i = 0; i < count; i++) {
843 if (subpat_names[i]) {
844 add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1], 1);
845 }
846 add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
847 }
848 args[0] = &subpats;
849
850 if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
851 convert_to_string_ex(&retval_ptr);
852 *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
853 result_len = Z_STRLEN_P(retval_ptr);
854 zval_ptr_dtor(&retval_ptr);
855 } else {
856 if (!EG(exception)) {
857 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
858 }
859 result_len = offsets[1] - offsets[0];
860 *result = estrndup(&subject[offsets[0]], result_len);
861 }
862
863 zval_ptr_dtor(&subpats);
864
865 return result_len;
866 }
867 /* }}} */
868
869 /* {{{ preg_do_eval
870 */
preg_do_eval(char * eval_str,int eval_str_len,char * subject,int * offsets,int count,char ** result TSRMLS_DC)871 static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,
872 int *offsets, int count, char **result TSRMLS_DC)
873 {
874 zval retval; /* Return value from evaluation */
875 char *eval_str_end, /* End of eval string */
876 *match, /* Current match for a backref */
877 *esc_match, /* Quote-escaped match */
878 *walk, /* Used to walk the code string */
879 *segment, /* Start of segment to append while walking */
880 walk_last; /* Last walked character */
881 int match_len; /* Length of the match */
882 int esc_match_len; /* Length of the quote-escaped match */
883 int result_len; /* Length of the result of the evaluation */
884 int backref; /* Current backref */
885 char *compiled_string_description;
886 smart_str code = {0};
887
888 eval_str_end = eval_str + eval_str_len;
889 walk = segment = eval_str;
890 walk_last = 0;
891
892 while (walk < eval_str_end) {
893 /* If found a backreference.. */
894 if ('\\' == *walk || '$' == *walk) {
895 smart_str_appendl(&code, segment, walk - segment);
896 if (walk_last == '\\') {
897 code.c[code.len-1] = *walk++;
898 segment = walk;
899 walk_last = 0;
900 continue;
901 }
902 segment = walk;
903 if (preg_get_backref(&walk, &backref)) {
904 if (backref < count) {
905 /* Find the corresponding string match and substitute it
906 in instead of the backref */
907 match = subject + offsets[backref<<1];
908 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
909 if (match_len) {
910 esc_match = php_addslashes_ex(match, match_len, &esc_match_len, 0, 1 TSRMLS_CC);
911 } else {
912 esc_match = match;
913 esc_match_len = 0;
914 }
915 } else {
916 esc_match = "";
917 esc_match_len = 0;
918 }
919 smart_str_appendl(&code, esc_match, esc_match_len);
920
921 segment = walk;
922
923 /* Clean up and reassign */
924 if (esc_match_len)
925 efree(esc_match);
926 continue;
927 }
928 }
929 walk++;
930 walk_last = walk[-1];
931 }
932 smart_str_appendl(&code, segment, walk - segment);
933 smart_str_0(&code);
934
935 compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC);
936 /* Run the code */
937 if (zend_eval_stringl(code.c, code.len, &retval, compiled_string_description TSRMLS_CC) == FAILURE) {
938 efree(compiled_string_description);
939 php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.c);
940 /* zend_error() does not return in this case */
941 }
942 efree(compiled_string_description);
943 convert_to_string(&retval);
944
945 /* Save the return value and its length */
946 *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval));
947 result_len = Z_STRLEN(retval);
948
949 /* Clean up */
950 zval_dtor(&retval);
951 smart_str_free(&code);
952
953 return result_len;
954 }
955 /* }}} */
956
957 /* {{{ php_pcre_replace
958 */
php_pcre_replace(char * regex,int regex_len,char * subject,int subject_len,zval * replace_val,int is_callable_replace,int * result_len,int limit,int * replace_count TSRMLS_DC)959 PHPAPI char *php_pcre_replace(char *regex, int regex_len,
960 char *subject, int subject_len,
961 zval *replace_val, int is_callable_replace,
962 int *result_len, int limit, int *replace_count TSRMLS_DC)
963 {
964 pcre_cache_entry *pce; /* Compiled regular expression */
965
966 /* Compile regex or get it from cache. */
967 if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
968 return NULL;
969 }
970
971 return php_pcre_replace_impl(pce, subject, subject_len, replace_val,
972 is_callable_replace, result_len, limit, replace_count TSRMLS_CC);
973 }
974 /* }}} */
975
976 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry * pce,char * subject,int subject_len,zval * replace_val,int is_callable_replace,int * result_len,int limit,int * replace_count TSRMLS_DC)977 PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val,
978 int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC)
979 {
980 pcre_extra *extra = pce->extra;/* Holds results of studying */
981 pcre_extra extra_data; /* Used locally for exec options */
982 int exoptions = 0; /* Execution options */
983 int count = 0; /* Count of matched subpatterns */
984 int *offsets; /* Array of subpattern offsets */
985 char **subpat_names; /* Array for named subpatterns */
986 int num_subpats; /* Number of captured subpatterns */
987 int size_offsets; /* Size of the offsets array */
988 int new_len; /* Length of needed storage */
989 int alloc_len; /* Actual allocated length */
990 int eval_result_len=0; /* Length of the eval'ed or
991 function-returned string */
992 int match_len; /* Length of the current match */
993 int backref; /* Backreference number */
994 int eval; /* If the replacement string should be eval'ed */
995 int start_offset; /* Where the new search starts */
996 int g_notempty=0; /* If the match should not be empty */
997 int replace_len=0; /* Length of replacement string */
998 char *result, /* Result of replacement */
999 *replace=NULL, /* Replacement string */
1000 *new_buf, /* Temporary buffer for re-allocation */
1001 *walkbuf, /* Location of current replacement in the result */
1002 *walk, /* Used to walk the replacement string */
1003 *match, /* The current match */
1004 *piece, /* The current piece of subject */
1005 *replace_end=NULL, /* End of replacement string */
1006 *eval_result, /* Result of eval or custom function */
1007 walk_last; /* Last walked character */
1008 int rc;
1009
1010 if (extra == NULL) {
1011 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1012 extra = &extra_data;
1013 }
1014 extra->match_limit = PCRE_G(backtrack_limit);
1015 extra->match_limit_recursion = PCRE_G(recursion_limit);
1016
1017 eval = pce->preg_options & PREG_REPLACE_EVAL;
1018 if (is_callable_replace) {
1019 if (eval) {
1020 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback");
1021 return NULL;
1022 }
1023 } else {
1024 replace = Z_STRVAL_P(replace_val);
1025 replace_len = Z_STRLEN_P(replace_val);
1026 replace_end = replace + replace_len;
1027 }
1028
1029 /* Calculate the size of the offsets array, and allocate memory for it. */
1030 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
1031 if (rc < 0) {
1032 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1033 return NULL;
1034 }
1035 num_subpats++;
1036 size_offsets = num_subpats * 3;
1037
1038 /*
1039 * Build a mapping from subpattern numbers to their names. We will always
1040 * allocate the table, even though there may be no named subpatterns. This
1041 * avoids somewhat more complicated logic in the inner loops.
1042 */
1043 subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
1044 if (!subpat_names) {
1045 return NULL;
1046 }
1047
1048 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1049
1050 alloc_len = 2 * subject_len + 1;
1051 result = safe_emalloc(alloc_len, sizeof(char), 0);
1052
1053 /* Initialize */
1054 match = NULL;
1055 *result_len = 0;
1056 start_offset = 0;
1057 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1058
1059 while (1) {
1060 /* Execute the regular expression. */
1061 count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1062 exoptions|g_notempty, offsets, size_offsets);
1063
1064 /* the string was already proved to be valid UTF-8 */
1065 exoptions |= PCRE_NO_UTF8_CHECK;
1066
1067 /* Check for too many substrings condition. */
1068 if (count == 0) {
1069 php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1070 count = size_offsets/3;
1071 }
1072
1073 piece = subject + start_offset;
1074
1075 if (count > 0 && (limit == -1 || limit > 0)) {
1076 if (replace_count) {
1077 ++*replace_count;
1078 }
1079 /* Set the match location in subject */
1080 match = subject + offsets[0];
1081
1082 new_len = *result_len + offsets[0] - start_offset; /* part before the match */
1083
1084 /* If evaluating, do it and add the return string's length */
1085 if (eval) {
1086 eval_result_len = preg_do_eval(replace, replace_len, subject,
1087 offsets, count, &eval_result TSRMLS_CC);
1088 new_len += eval_result_len;
1089 } else if (is_callable_replace) {
1090 /* Use custom function to get replacement string and its length. */
1091 eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC);
1092 new_len += eval_result_len;
1093 } else { /* do regular substitution */
1094 walk = replace;
1095 walk_last = 0;
1096 while (walk < replace_end) {
1097 if ('\\' == *walk || '$' == *walk) {
1098 if (walk_last == '\\') {
1099 walk++;
1100 walk_last = 0;
1101 continue;
1102 }
1103 if (preg_get_backref(&walk, &backref)) {
1104 if (backref < count)
1105 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1106 continue;
1107 }
1108 }
1109 new_len++;
1110 walk++;
1111 walk_last = walk[-1];
1112 }
1113 }
1114
1115 if (new_len + 1 > alloc_len) {
1116 alloc_len = 1 + alloc_len + 2 * new_len;
1117 new_buf = emalloc(alloc_len);
1118 memcpy(new_buf, result, *result_len);
1119 efree(result);
1120 result = new_buf;
1121 }
1122 /* copy the part of the string before the match */
1123 memcpy(&result[*result_len], piece, match-piece);
1124 *result_len += match-piece;
1125
1126 /* copy replacement and backrefs */
1127 walkbuf = result + *result_len;
1128
1129 /* If evaluating or using custom function, copy result to the buffer
1130 * and clean up. */
1131 if (eval || is_callable_replace) {
1132 memcpy(walkbuf, eval_result, eval_result_len);
1133 *result_len += eval_result_len;
1134 STR_FREE(eval_result);
1135 } else { /* do regular backreference copying */
1136 walk = replace;
1137 walk_last = 0;
1138 while (walk < replace_end) {
1139 if ('\\' == *walk || '$' == *walk) {
1140 if (walk_last == '\\') {
1141 *(walkbuf-1) = *walk++;
1142 walk_last = 0;
1143 continue;
1144 }
1145 if (preg_get_backref(&walk, &backref)) {
1146 if (backref < count) {
1147 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1148 memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1149 walkbuf += match_len;
1150 }
1151 continue;
1152 }
1153 }
1154 *walkbuf++ = *walk++;
1155 walk_last = walk[-1];
1156 }
1157 *walkbuf = '\0';
1158 /* increment the result length by how much we've added to the string */
1159 *result_len += walkbuf - (result + *result_len);
1160 }
1161
1162 if (limit != -1)
1163 limit--;
1164
1165 } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1166 /* If we previously set PCRE_NOTEMPTY after a null match,
1167 this is not necessarily the end. We need to advance
1168 the start offset, and continue. Fudge the offset values
1169 to achieve this, unless we're already at the end of the string. */
1170 if (g_notempty != 0 && start_offset < subject_len) {
1171 offsets[0] = start_offset;
1172 offsets[1] = start_offset + 1;
1173 memcpy(&result[*result_len], piece, 1);
1174 (*result_len)++;
1175 } else {
1176 new_len = *result_len + subject_len - start_offset;
1177 if (new_len + 1 > alloc_len) {
1178 alloc_len = new_len + 1; /* now we know exactly how long it is */
1179 new_buf = safe_emalloc(alloc_len, sizeof(char), 0);
1180 memcpy(new_buf, result, *result_len);
1181 efree(result);
1182 result = new_buf;
1183 }
1184 /* stick that last bit of string on our output */
1185 memcpy(&result[*result_len], piece, subject_len - start_offset);
1186 *result_len += subject_len - start_offset;
1187 result[*result_len] = '\0';
1188 break;
1189 }
1190 } else {
1191 pcre_handle_exec_error(count TSRMLS_CC);
1192 efree(result);
1193 result = NULL;
1194 break;
1195 }
1196
1197 /* If we have matched an empty string, mimic what Perl's /g options does.
1198 This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1199 the match again at the same point. If this fails (picked up above) we
1200 advance to the next character. */
1201 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1202
1203 /* Advance to the next piece. */
1204 start_offset = offsets[1];
1205 }
1206
1207 efree(offsets);
1208 efree(subpat_names);
1209
1210 return result;
1211 }
1212 /* }}} */
1213
1214 /* {{{ php_replace_in_subject
1215 */
php_replace_in_subject(zval * regex,zval * replace,zval ** subject,int * result_len,int limit,int is_callable_replace,int * replace_count TSRMLS_DC)1216 static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, int *result_len, int limit, int is_callable_replace, int *replace_count TSRMLS_DC)
1217 {
1218 zval **regex_entry,
1219 **replace_entry = NULL,
1220 *replace_value,
1221 empty_replace;
1222 char *subject_value,
1223 *result;
1224 int subject_len;
1225
1226 /* Make sure we're dealing with strings. */
1227 convert_to_string_ex(subject);
1228 /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1229 ZVAL_STRINGL(&empty_replace, "", 0, 0);
1230
1231 /* If regex is an array */
1232 if (Z_TYPE_P(regex) == IS_ARRAY) {
1233 /* Duplicate subject string for repeated replacement */
1234 subject_value = estrndup(Z_STRVAL_PP(subject), Z_STRLEN_PP(subject));
1235 subject_len = Z_STRLEN_PP(subject);
1236 *result_len = subject_len;
1237
1238 zend_hash_internal_pointer_reset(Z_ARRVAL_P(regex));
1239
1240 replace_value = replace;
1241 if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace)
1242 zend_hash_internal_pointer_reset(Z_ARRVAL_P(replace));
1243
1244 /* For each entry in the regex array, get the entry */
1245 while (zend_hash_get_current_data(Z_ARRVAL_P(regex), (void **)®ex_entry) == SUCCESS) {
1246 /* Make sure we're dealing with strings. */
1247 convert_to_string_ex(regex_entry);
1248
1249 /* If replace is an array and not a callable construct */
1250 if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1251 /* Get current entry */
1252 if (zend_hash_get_current_data(Z_ARRVAL_P(replace), (void **)&replace_entry) == SUCCESS) {
1253 if (!is_callable_replace) {
1254 convert_to_string_ex(replace_entry);
1255 }
1256 replace_value = *replace_entry;
1257 zend_hash_move_forward(Z_ARRVAL_P(replace));
1258 } else {
1259 /* We've run out of replacement strings, so use an empty one */
1260 replace_value = &empty_replace;
1261 }
1262 }
1263
1264 /* Do the actual replacement and put the result back into subject_value
1265 for further replacements. */
1266 if ((result = php_pcre_replace(Z_STRVAL_PP(regex_entry),
1267 Z_STRLEN_PP(regex_entry),
1268 subject_value,
1269 subject_len,
1270 replace_value,
1271 is_callable_replace,
1272 result_len,
1273 limit,
1274 replace_count TSRMLS_CC)) != NULL) {
1275 efree(subject_value);
1276 subject_value = result;
1277 subject_len = *result_len;
1278 } else {
1279 efree(subject_value);
1280 return NULL;
1281 }
1282
1283 zend_hash_move_forward(Z_ARRVAL_P(regex));
1284 }
1285
1286 return subject_value;
1287 } else {
1288 result = php_pcre_replace(Z_STRVAL_P(regex),
1289 Z_STRLEN_P(regex),
1290 Z_STRVAL_PP(subject),
1291 Z_STRLEN_PP(subject),
1292 replace,
1293 is_callable_replace,
1294 result_len,
1295 limit,
1296 replace_count TSRMLS_CC);
1297 return result;
1298 }
1299 }
1300 /* }}} */
1301
1302 /* {{{ preg_replace_impl
1303 */
preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS,int is_callable_replace,int is_filter)1304 static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter)
1305 {
1306 zval **regex,
1307 **replace,
1308 **subject,
1309 **subject_entry,
1310 **zcount = NULL;
1311 char *result;
1312 int result_len;
1313 int limit_val = -1;
1314 long limit = -1;
1315 char *string_key;
1316 ulong num_key;
1317 char *callback_name;
1318 int replace_count=0, old_replace_count;
1319
1320 /* Get function parameters and do error-checking. */
1321 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZZ|lZ", ®ex, &replace, &subject, &limit, &zcount) == FAILURE) {
1322 return;
1323 }
1324
1325 if (!is_callable_replace && Z_TYPE_PP(replace) == IS_ARRAY && Z_TYPE_PP(regex) != IS_ARRAY) {
1326 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1327 RETURN_FALSE;
1328 }
1329
1330 SEPARATE_ZVAL(replace);
1331 if (Z_TYPE_PP(replace) != IS_ARRAY && (Z_TYPE_PP(replace) != IS_OBJECT || !is_callable_replace)) {
1332 convert_to_string_ex(replace);
1333 }
1334 if (is_callable_replace) {
1335 if (!zend_is_callable(*replace, 0, &callback_name TSRMLS_CC)) {
1336 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name);
1337 efree(callback_name);
1338 MAKE_COPY_ZVAL(subject, return_value);
1339 return;
1340 }
1341 efree(callback_name);
1342 }
1343
1344 SEPARATE_ZVAL(regex);
1345 SEPARATE_ZVAL(subject);
1346
1347 if (ZEND_NUM_ARGS() > 3) {
1348 limit_val = limit;
1349 }
1350
1351 if (Z_TYPE_PP(regex) != IS_ARRAY)
1352 convert_to_string_ex(regex);
1353
1354 /* if subject is an array */
1355 if (Z_TYPE_PP(subject) == IS_ARRAY) {
1356 array_init(return_value);
1357 zend_hash_internal_pointer_reset(Z_ARRVAL_PP(subject));
1358
1359 /* For each subject entry, convert it to string, then perform replacement
1360 and add the result to the return_value array. */
1361 while (zend_hash_get_current_data(Z_ARRVAL_PP(subject), (void **)&subject_entry) == SUCCESS) {
1362 SEPARATE_ZVAL(subject_entry);
1363 old_replace_count = replace_count;
1364 if ((result = php_replace_in_subject(*regex, *replace, subject_entry, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
1365 if (!is_filter || replace_count > old_replace_count) {
1366 /* Add to return array */
1367 switch(zend_hash_get_current_key(Z_ARRVAL_PP(subject), &string_key, &num_key, 0))
1368 {
1369 case HASH_KEY_IS_STRING:
1370 add_assoc_stringl(return_value, string_key, result, result_len, 0);
1371 break;
1372
1373 case HASH_KEY_IS_LONG:
1374 add_index_stringl(return_value, num_key, result, result_len, 0);
1375 break;
1376 }
1377 } else {
1378 efree(result);
1379 }
1380 }
1381
1382 zend_hash_move_forward(Z_ARRVAL_PP(subject));
1383 }
1384 } else { /* if subject is not an array */
1385 old_replace_count = replace_count;
1386 if ((result = php_replace_in_subject(*regex, *replace, subject, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
1387 if (!is_filter || replace_count > old_replace_count) {
1388 RETVAL_STRINGL(result, result_len, 0);
1389 } else {
1390 efree(result);
1391 }
1392 }
1393 }
1394 if (ZEND_NUM_ARGS() > 4) {
1395 zval_dtor(*zcount);
1396 ZVAL_LONG(*zcount, replace_count);
1397 }
1398
1399 }
1400 /* }}} */
1401
1402 /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1403 Perform Perl-style regular expression replacement. */
PHP_FUNCTION(preg_replace)1404 static PHP_FUNCTION(preg_replace)
1405 {
1406 preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1407 }
1408 /* }}} */
1409
1410 /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
1411 Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback)1412 static PHP_FUNCTION(preg_replace_callback)
1413 {
1414 preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0);
1415 }
1416 /* }}} */
1417
1418 /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1419 Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTION(preg_filter)1420 static PHP_FUNCTION(preg_filter)
1421 {
1422 preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1423 }
1424 /* }}} */
1425
1426 /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1427 Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)1428 static PHP_FUNCTION(preg_split)
1429 {
1430 char *regex; /* Regular expression */
1431 char *subject; /* String to match against */
1432 int regex_len;
1433 int subject_len;
1434 long limit_val = -1;/* Integer value of limit */
1435 long flags = 0; /* Match control flags */
1436 pcre_cache_entry *pce; /* Compiled regular expression */
1437
1438 /* Get function parameters and do error checking */
1439 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", ®ex, ®ex_len,
1440 &subject, &subject_len, &limit_val, &flags) == FAILURE) {
1441 RETURN_FALSE;
1442 }
1443
1444 /* Compile regex or get it from cache. */
1445 if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1446 RETURN_FALSE;
1447 }
1448
1449 php_pcre_split_impl(pce, subject, subject_len, return_value, limit_val, flags TSRMLS_CC);
1450 }
1451 /* }}} */
1452
1453 /* {{{ php_pcre_split
1454 */
php_pcre_split_impl(pcre_cache_entry * pce,char * subject,int subject_len,zval * return_value,long limit_val,long flags TSRMLS_DC)1455 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1456 long limit_val, long flags TSRMLS_DC)
1457 {
1458 pcre_extra *extra = NULL; /* Holds results of studying */
1459 pcre *re_bump = NULL; /* Regex instance for empty matches */
1460 pcre_extra *extra_bump = NULL; /* Almost dummy */
1461 pcre_extra extra_data; /* Used locally for exec options */
1462 int *offsets; /* Array of subpattern offsets */
1463 int size_offsets; /* Size of the offsets array */
1464 int exoptions = 0; /* Execution options */
1465 int count = 0; /* Count of matched subpatterns */
1466 int start_offset; /* Where the new search starts */
1467 int next_offset; /* End of the last delimiter match + 1 */
1468 int g_notempty = 0; /* If the match should not be empty */
1469 char *last_match; /* Location of last match */
1470 int rc;
1471 int no_empty; /* If NO_EMPTY flag is set */
1472 int delim_capture; /* If delimiters should be captured */
1473 int offset_capture; /* If offsets should be captured */
1474
1475 no_empty = flags & PREG_SPLIT_NO_EMPTY;
1476 delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1477 offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1478
1479 if (limit_val == 0) {
1480 limit_val = -1;
1481 }
1482
1483 if (extra == NULL) {
1484 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1485 extra = &extra_data;
1486 }
1487 extra->match_limit = PCRE_G(backtrack_limit);
1488 extra->match_limit_recursion = PCRE_G(recursion_limit);
1489
1490 /* Initialize return value */
1491 array_init(return_value);
1492
1493 /* Calculate the size of the offsets array, and allocate memory for it. */
1494 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1495 if (rc < 0) {
1496 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1497 RETURN_FALSE;
1498 }
1499 size_offsets = (size_offsets + 1) * 3;
1500 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1501
1502 /* Start at the beginning of the string */
1503 start_offset = 0;
1504 next_offset = 0;
1505 last_match = subject;
1506 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1507
1508 /* Get next piece if no limit or limit not yet reached and something matched*/
1509 while ((limit_val == -1 || limit_val > 1)) {
1510 count = pcre_exec(pce->re, extra, subject,
1511 subject_len, start_offset,
1512 exoptions|g_notempty, offsets, size_offsets);
1513
1514 /* the string was already proved to be valid UTF-8 */
1515 exoptions |= PCRE_NO_UTF8_CHECK;
1516
1517 /* Check for too many substrings condition. */
1518 if (count == 0) {
1519 php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1520 count = size_offsets/3;
1521 }
1522
1523 /* If something matched */
1524 if (count > 0) {
1525 if (!no_empty || &subject[offsets[0]] != last_match) {
1526
1527 if (offset_capture) {
1528 /* Add (match, offset) pair to the return value */
1529 add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL);
1530 } else {
1531 /* Add the piece to the return value */
1532 add_next_index_stringl(return_value, last_match,
1533 &subject[offsets[0]]-last_match, 1);
1534 }
1535
1536 /* One less left to do */
1537 if (limit_val != -1)
1538 limit_val--;
1539 }
1540
1541 last_match = &subject[offsets[1]];
1542 next_offset = offsets[1];
1543
1544 if (delim_capture) {
1545 int i, match_len;
1546 for (i = 1; i < count; i++) {
1547 match_len = offsets[(i<<1)+1] - offsets[i<<1];
1548 /* If we have matched a delimiter */
1549 if (!no_empty || match_len > 0) {
1550 if (offset_capture) {
1551 add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1552 } else {
1553 add_next_index_stringl(return_value,
1554 &subject[offsets[i<<1]],
1555 match_len, 1);
1556 }
1557 }
1558 }
1559 }
1560 } else if (count == PCRE_ERROR_NOMATCH) {
1561 /* If we previously set PCRE_NOTEMPTY after a null match,
1562 this is not necessarily the end. We need to advance
1563 the start offset, and continue. Fudge the offset values
1564 to achieve this, unless we're already at the end of the string. */
1565 if (g_notempty != 0 && start_offset < subject_len) {
1566 if (pce->compile_options & PCRE_UTF8) {
1567 if (re_bump == NULL) {
1568 int dummy;
1569
1570 if ((re_bump = pcre_get_compiled_regex("/./us", &extra_bump, &dummy TSRMLS_CC)) == NULL) {
1571 RETURN_FALSE;
1572 }
1573 }
1574 count = pcre_exec(re_bump, extra_bump, subject,
1575 subject_len, start_offset,
1576 exoptions, offsets, size_offsets);
1577 if (count < 1) {
1578 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
1579 RETURN_FALSE;
1580 }
1581 } else {
1582 offsets[0] = start_offset;
1583 offsets[1] = start_offset + 1;
1584 }
1585 } else
1586 break;
1587 } else {
1588 pcre_handle_exec_error(count TSRMLS_CC);
1589 break;
1590 }
1591
1592 /* If we have matched an empty string, mimic what Perl's /g options does.
1593 This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1594 the match again at the same point. If this fails (picked up above) we
1595 advance to the next character. */
1596 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1597
1598 /* Advance to the position right after the last full match */
1599 start_offset = offsets[1];
1600 }
1601
1602
1603 start_offset = last_match - subject; /* the offset might have been incremented, but without further successful matches */
1604
1605 if (!no_empty || start_offset < subject_len)
1606 {
1607 if (offset_capture) {
1608 /* Add the last (match, offset) pair to the return value */
1609 add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1610 } else {
1611 /* Add the last piece to the return value */
1612 add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1);
1613 }
1614 }
1615
1616
1617 /* Clean up */
1618 efree(offsets);
1619 }
1620 /* }}} */
1621
1622 /* {{{ proto string preg_quote(string str [, string delim_char])
1623 Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)1624 static PHP_FUNCTION(preg_quote)
1625 {
1626 int in_str_len;
1627 char *in_str; /* Input string argument */
1628 char *in_str_end; /* End of the input string */
1629 int delim_len = 0;
1630 char *delim = NULL; /* Additional delimiter argument */
1631 char *out_str, /* Output string with quoted characters */
1632 *p, /* Iterator for input string */
1633 *q, /* Iterator for output string */
1634 delim_char=0, /* Delimiter character to be quoted */
1635 c; /* Current character */
1636 zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1637
1638 /* Get the arguments and check for errors */
1639 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len,
1640 &delim, &delim_len) == FAILURE) {
1641 return;
1642 }
1643
1644 in_str_end = in_str + in_str_len;
1645
1646 /* Nothing to do if we got an empty string */
1647 if (in_str == in_str_end) {
1648 RETURN_EMPTY_STRING();
1649 }
1650
1651 if (delim && *delim) {
1652 delim_char = delim[0];
1653 quote_delim = 1;
1654 }
1655
1656 /* Allocate enough memory so that even if each character
1657 is quoted, we won't run out of room */
1658 out_str = safe_emalloc(4, in_str_len, 1);
1659
1660 /* Go through the string and quote necessary characters */
1661 for(p = in_str, q = out_str; p != in_str_end; p++) {
1662 c = *p;
1663 switch(c) {
1664 case '.':
1665 case '\\':
1666 case '+':
1667 case '*':
1668 case '?':
1669 case '[':
1670 case '^':
1671 case ']':
1672 case '$':
1673 case '(':
1674 case ')':
1675 case '{':
1676 case '}':
1677 case '=':
1678 case '!':
1679 case '>':
1680 case '<':
1681 case '|':
1682 case ':':
1683 case '-':
1684 *q++ = '\\';
1685 *q++ = c;
1686 break;
1687
1688 case '\0':
1689 *q++ = '\\';
1690 *q++ = '0';
1691 *q++ = '0';
1692 *q++ = '0';
1693 break;
1694
1695 default:
1696 if (quote_delim && c == delim_char)
1697 *q++ = '\\';
1698 *q++ = c;
1699 break;
1700 }
1701 }
1702 *q = '\0';
1703
1704 /* Reallocate string and return it */
1705 RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0);
1706 }
1707 /* }}} */
1708
1709 /* {{{ proto array preg_grep(string regex, array input [, int flags])
1710 Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)1711 static PHP_FUNCTION(preg_grep)
1712 {
1713 char *regex; /* Regular expression */
1714 int regex_len;
1715 zval *input; /* Input array */
1716 long flags = 0; /* Match control flags */
1717 pcre_cache_entry *pce; /* Compiled regular expression */
1718
1719 /* Get arguments and do error checking */
1720 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", ®ex, ®ex_len,
1721 &input, &flags) == FAILURE) {
1722 return;
1723 }
1724
1725 /* Compile regex or get it from cache. */
1726 if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1727 RETURN_FALSE;
1728 }
1729
1730 php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC);
1731 }
1732 /* }}} */
1733
php_pcre_grep_impl(pcre_cache_entry * pce,zval * input,zval * return_value,long flags TSRMLS_DC)1734 PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, long flags TSRMLS_DC) /* {{{ */
1735 {
1736 zval **entry; /* An entry in the input array */
1737 pcre_extra *extra = pce->extra;/* Holds results of studying */
1738 pcre_extra extra_data; /* Used locally for exec options */
1739 int *offsets; /* Array of subpattern offsets */
1740 int size_offsets; /* Size of the offsets array */
1741 int count = 0; /* Count of matched subpatterns */
1742 char *string_key;
1743 ulong num_key;
1744 zend_bool invert; /* Whether to return non-matching
1745 entries */
1746 int rc;
1747
1748 invert = flags & PREG_GREP_INVERT ? 1 : 0;
1749
1750 if (extra == NULL) {
1751 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1752 extra = &extra_data;
1753 }
1754 extra->match_limit = PCRE_G(backtrack_limit);
1755 extra->match_limit_recursion = PCRE_G(recursion_limit);
1756
1757 /* Calculate the size of the offsets array, and allocate memory for it. */
1758 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1759 if (rc < 0) {
1760 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1761 RETURN_FALSE;
1762 }
1763 size_offsets = (size_offsets + 1) * 3;
1764 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1765
1766 /* Initialize return array */
1767 array_init(return_value);
1768
1769 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1770
1771 /* Go through the input array */
1772 zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
1773 while (zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) {
1774 zval subject = **entry;
1775
1776 if (Z_TYPE_PP(entry) != IS_STRING) {
1777 zval_copy_ctor(&subject);
1778 convert_to_string(&subject);
1779 }
1780
1781 /* Perform the match */
1782 count = pcre_exec(pce->re, extra, Z_STRVAL(subject),
1783 Z_STRLEN(subject), 0,
1784 0, offsets, size_offsets);
1785
1786 /* Check for too many substrings condition. */
1787 if (count == 0) {
1788 php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
1789 count = size_offsets/3;
1790 } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
1791 pcre_handle_exec_error(count TSRMLS_CC);
1792 break;
1793 }
1794
1795 /* If the entry fits our requirements */
1796 if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
1797
1798 Z_ADDREF_PP(entry);
1799
1800 /* Add to return array */
1801 switch (zend_hash_get_current_key(Z_ARRVAL_P(input), &string_key, &num_key, 0))
1802 {
1803 case HASH_KEY_IS_STRING:
1804 zend_hash_update(Z_ARRVAL_P(return_value), string_key,
1805 strlen(string_key)+1, entry, sizeof(zval *), NULL);
1806 break;
1807
1808 case HASH_KEY_IS_LONG:
1809 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry,
1810 sizeof(zval *), NULL);
1811 break;
1812 }
1813 }
1814
1815 if (Z_TYPE_PP(entry) != IS_STRING) {
1816 zval_dtor(&subject);
1817 }
1818
1819 zend_hash_move_forward(Z_ARRVAL_P(input));
1820 }
1821 zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
1822 /* Clean up */
1823 efree(offsets);
1824 }
1825 /* }}} */
1826
1827 /* {{{ proto int preg_last_error()
1828 Returns the error code of the last regexp execution. */
PHP_FUNCTION(preg_last_error)1829 static PHP_FUNCTION(preg_last_error)
1830 {
1831 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
1832 return;
1833 }
1834
1835 RETURN_LONG(PCRE_G(error_code));
1836 }
1837 /* }}} */
1838
1839 /* {{{ module definition structures */
1840
1841 /* {{{ arginfo */
1842 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
1843 ZEND_ARG_INFO(0, pattern)
1844 ZEND_ARG_INFO(0, subject)
1845 ZEND_ARG_INFO(1, subpatterns) /* array */
1846 ZEND_ARG_INFO(0, flags)
1847 ZEND_ARG_INFO(0, offset)
1848 ZEND_END_ARG_INFO()
1849
1850 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 3)
1851 ZEND_ARG_INFO(0, pattern)
1852 ZEND_ARG_INFO(0, subject)
1853 ZEND_ARG_INFO(1, subpatterns) /* array */
1854 ZEND_ARG_INFO(0, flags)
1855 ZEND_ARG_INFO(0, offset)
1856 ZEND_END_ARG_INFO()
1857
1858 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
1859 ZEND_ARG_INFO(0, regex)
1860 ZEND_ARG_INFO(0, replace)
1861 ZEND_ARG_INFO(0, subject)
1862 ZEND_ARG_INFO(0, limit)
1863 ZEND_ARG_INFO(1, count)
1864 ZEND_END_ARG_INFO()
1865
1866 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
1867 ZEND_ARG_INFO(0, regex)
1868 ZEND_ARG_INFO(0, callback)
1869 ZEND_ARG_INFO(0, subject)
1870 ZEND_ARG_INFO(0, limit)
1871 ZEND_ARG_INFO(1, count)
1872 ZEND_END_ARG_INFO()
1873
1874 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
1875 ZEND_ARG_INFO(0, pattern)
1876 ZEND_ARG_INFO(0, subject)
1877 ZEND_ARG_INFO(0, limit)
1878 ZEND_ARG_INFO(0, flags)
1879 ZEND_END_ARG_INFO()
1880
1881 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
1882 ZEND_ARG_INFO(0, str)
1883 ZEND_ARG_INFO(0, delim_char)
1884 ZEND_END_ARG_INFO()
1885
1886 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
1887 ZEND_ARG_INFO(0, regex)
1888 ZEND_ARG_INFO(0, input) /* array */
1889 ZEND_ARG_INFO(0, flags)
1890 ZEND_END_ARG_INFO()
1891
1892 ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
1893 ZEND_END_ARG_INFO()
1894 /* }}} */
1895
1896 static const zend_function_entry pcre_functions[] = {
1897 PHP_FE(preg_match, arginfo_preg_match)
1898 PHP_FE(preg_match_all, arginfo_preg_match_all)
1899 PHP_FE(preg_replace, arginfo_preg_replace)
1900 PHP_FE(preg_replace_callback, arginfo_preg_replace_callback)
1901 PHP_FE(preg_filter, arginfo_preg_replace)
1902 PHP_FE(preg_split, arginfo_preg_split)
1903 PHP_FE(preg_quote, arginfo_preg_quote)
1904 PHP_FE(preg_grep, arginfo_preg_grep)
1905 PHP_FE(preg_last_error, arginfo_preg_last_error)
1906 PHP_FE_END
1907 };
1908
1909 zend_module_entry pcre_module_entry = {
1910 STANDARD_MODULE_HEADER,
1911 "pcre",
1912 pcre_functions,
1913 PHP_MINIT(pcre),
1914 PHP_MSHUTDOWN(pcre),
1915 NULL,
1916 NULL,
1917 PHP_MINFO(pcre),
1918 NO_VERSION_YET,
1919 PHP_MODULE_GLOBALS(pcre),
1920 PHP_GINIT(pcre),
1921 PHP_GSHUTDOWN(pcre),
1922 NULL,
1923 STANDARD_MODULE_PROPERTIES_EX
1924 };
1925
1926 #ifdef COMPILE_DL_PCRE
1927 ZEND_GET_MODULE(pcre)
1928 #endif
1929
1930 /* }}} */
1931
1932 #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
1933
1934 /*
1935 * Local variables:
1936 * tab-width: 4
1937 * c-basic-offset: 4
1938 * End:
1939 * vim600: sw=4 ts=4 fdm=marker
1940 * vim<600: sw=4 ts=4
1941 */
1942