xref: /PHP-7.2/ext/pcre/php_pcre.c (revision a8f60ac9)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1997-2018 The PHP Group                                |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Andrei Zmievski <andrei@php.net>                             |
16    +----------------------------------------------------------------------+
17  */
18 
19 /* $Id$ */
20 
21 #include "php.h"
22 #include "php_ini.h"
23 #include "php_globals.h"
24 #include "php_pcre.h"
25 #include "ext/standard/info.h"
26 #include "ext/standard/basic_functions.h"
27 #include "zend_smart_str.h"
28 
29 #if HAVE_PCRE || HAVE_BUNDLED_PCRE
30 
31 #include "ext/standard/php_string.h"
32 
33 #define PREG_PATTERN_ORDER			1
34 #define PREG_SET_ORDER				2
35 #define PREG_OFFSET_CAPTURE			(1<<8)
36 #define PREG_UNMATCHED_AS_NULL		(1<<9)
37 
38 #define	PREG_SPLIT_NO_EMPTY			(1<<0)
39 #define PREG_SPLIT_DELIM_CAPTURE	(1<<1)
40 #define PREG_SPLIT_OFFSET_CAPTURE	(1<<2)
41 
42 #define PREG_REPLACE_EVAL			(1<<0)
43 
44 #define PREG_GREP_INVERT			(1<<0)
45 
46 #define PCRE_CACHE_SIZE 4096
47 
48 /* not fully functional workaround for libpcre < 8.0, see bug #70232 */
49 #ifndef PCRE_NOTEMPTY_ATSTART
50 # define PCRE_NOTEMPTY_ATSTART PCRE_NOTEMPTY
51 #endif
52 
53 enum {
54 	PHP_PCRE_NO_ERROR = 0,
55 	PHP_PCRE_INTERNAL_ERROR,
56 	PHP_PCRE_BACKTRACK_LIMIT_ERROR,
57 	PHP_PCRE_RECURSION_LIMIT_ERROR,
58 	PHP_PCRE_BAD_UTF8_ERROR,
59 	PHP_PCRE_BAD_UTF8_OFFSET_ERROR,
60 	PHP_PCRE_JIT_STACKLIMIT_ERROR
61 };
62 
63 
64 PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
65 
66 #ifdef HAVE_PCRE_JIT_SUPPORT
67 #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
68 #define PCRE_JIT_STACK_MAX_SIZE (64 * 1024)
69 ZEND_TLS pcre_jit_stack *jit_stack = NULL;
70 #endif
71 #if defined(ZTS)
72 static MUTEX_T pcre_mt = NULL;
73 #define php_pcre_mutex_alloc() if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
74 #define php_pcre_mutex_free() if (tsrm_is_main_thread() && pcre_mt) tsrm_mutex_free(pcre_mt); pcre_mt = NULL;
75 #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
76 #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
77 #else
78 #define php_pcre_mutex_alloc()
79 #define php_pcre_mutex_free()
80 #define php_pcre_mutex_lock()
81 #define php_pcre_mutex_unlock()
82 #endif
83 
pcre_handle_exec_error(int pcre_code)84 static void pcre_handle_exec_error(int pcre_code) /* {{{ */
85 {
86 	int preg_code = 0;
87 
88 	switch (pcre_code) {
89 		case PCRE_ERROR_MATCHLIMIT:
90 			preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
91 			break;
92 
93 		case PCRE_ERROR_RECURSIONLIMIT:
94 			preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
95 			break;
96 
97 		case PCRE_ERROR_BADUTF8:
98 			preg_code = PHP_PCRE_BAD_UTF8_ERROR;
99 			break;
100 
101 		case PCRE_ERROR_BADUTF8_OFFSET:
102 			preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
103 			break;
104 
105 #ifdef HAVE_PCRE_JIT_SUPPORT
106 		case PCRE_ERROR_JIT_STACKLIMIT:
107 			preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
108 			break;
109 #endif
110 
111 		default:
112 			preg_code = PHP_PCRE_INTERNAL_ERROR;
113 			break;
114 	}
115 
116 	PCRE_G(error_code) = preg_code;
117 }
118 /* }}} */
119 
php_free_pcre_cache(zval * data)120 static void php_free_pcre_cache(zval *data) /* {{{ */
121 {
122 	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
123 	if (!pce) return;
124 	pcre_free(pce->re);
125 	if (pce->extra) {
126 		pcre_free_study(pce->extra);
127 	}
128 #if HAVE_SETLOCALE
129 	if ((void*)pce->tables) pefree((void*)pce->tables, 1);
130 #endif
131 	pefree(pce, 1);
132 }
133 /* }}} */
134 
PHP_GINIT_FUNCTION(pcre)135 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
136 {
137 	zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
138 	pcre_globals->backtrack_limit = 0;
139 	pcre_globals->recursion_limit = 0;
140 	pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
141 }
142 /* }}} */
143 
PHP_GSHUTDOWN_FUNCTION(pcre)144 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
145 {
146 	zend_hash_destroy(&pcre_globals->pcre_cache);
147 
148 #ifdef HAVE_PCRE_JIT_SUPPORT
149 	/* Stack may only be destroyed when no cached patterns
150 	 	possibly associated with it do exist. */
151 	if (jit_stack) {
152 		pcre_jit_stack_free(jit_stack);
153 		jit_stack = NULL;
154 	}
155 #endif
156 
157 }
158 /* }}} */
159 
160 PHP_INI_BEGIN()
161 	STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
162 	STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
163 #ifdef HAVE_PCRE_JIT_SUPPORT
164 	STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateBool, jit,             zend_pcre_globals, pcre_globals)
165 #endif
PHP_INI_END()166 PHP_INI_END()
167 
168 
169 /* {{{ PHP_MINFO_FUNCTION(pcre) */
170 static PHP_MINFO_FUNCTION(pcre)
171 {
172 #ifdef HAVE_PCRE_JIT_SUPPORT
173 	int jit_yes = 0;
174 #endif
175 
176 	php_info_print_table_start();
177 	php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
178 	php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
179 
180 #ifdef HAVE_PCRE_JIT_SUPPORT
181 	if (!pcre_config(PCRE_CONFIG_JIT, &jit_yes)) {
182 		php_info_print_table_row(2, "PCRE JIT Support", jit_yes ? "enabled" : "disabled");
183 	} else {
184 		php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
185 	}
186 #else
187 	php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
188 #endif
189 
190 #ifdef HAVE_PCRE_VALGRIND_SUPPORT
191 	php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
192 #endif
193 
194 	php_info_print_table_end();
195 
196 	DISPLAY_INI_ENTRIES();
197 }
198 /* }}} */
199 
200 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTION(pcre)201 static PHP_MINIT_FUNCTION(pcre)
202 {
203 	REGISTER_INI_ENTRIES();
204 
205 	php_pcre_mutex_alloc();
206 
207 	REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
208 	REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
209 	REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
210 	REGISTER_LONG_CONSTANT("PREG_UNMATCHED_AS_NULL", PREG_UNMATCHED_AS_NULL, CONST_CS | CONST_PERSISTENT);
211 	REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
212 	REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
213 	REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
214 	REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
215 
216 	REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
217 	REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
218 	REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
219 	REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
220 	REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
221 	REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
222 	REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
223 	REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
224 
225 	return SUCCESS;
226 }
227 /* }}} */
228 
229 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTION(pcre)230 static PHP_MSHUTDOWN_FUNCTION(pcre)
231 {
232 	UNREGISTER_INI_ENTRIES();
233 
234 	php_pcre_mutex_free();
235 
236 	return SUCCESS;
237 }
238 /* }}} */
239 
240 #ifdef HAVE_PCRE_JIT_SUPPORT
241 /* {{{ PHP_RINIT_FUNCTION(pcre) */
PHP_RINIT_FUNCTION(pcre)242 static PHP_RINIT_FUNCTION(pcre)
243 {
244 	if (PCRE_G(jit) && jit_stack == NULL) {
245 		php_pcre_mutex_lock();
246 		jit_stack = pcre_jit_stack_alloc(PCRE_JIT_STACK_MIN_SIZE,PCRE_JIT_STACK_MAX_SIZE);
247 		php_pcre_mutex_unlock();
248 	}
249 
250 	return SUCCESS;
251 }
252 /* }}} */
253 #endif
254 
255 /* {{{ static pcre_clean_cache */
pcre_clean_cache(zval * data,void * arg)256 static int pcre_clean_cache(zval *data, void *arg)
257 {
258 	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
259 	int *num_clean = (int *)arg;
260 
261 	if (*num_clean > 0 && !pce->refcount) {
262 		(*num_clean)--;
263 		return ZEND_HASH_APPLY_REMOVE;
264 	} else {
265 		return ZEND_HASH_APPLY_KEEP;
266 	}
267 }
268 /* }}} */
269 
270 /* {{{ static make_subpats_table */
make_subpats_table(int num_subpats,pcre_cache_entry * pce)271 static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
272 {
273 	pcre_extra *extra = pce->extra;
274 	int name_cnt = pce->name_count, name_size, ni = 0;
275 	int rc;
276 	char *name_table;
277 	unsigned short name_idx;
278 	char **subpat_names;
279 	int rc1, rc2;
280 
281 	rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
282 	rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
283 	rc = rc2 ? rc2 : rc1;
284 	if (rc < 0) {
285 		php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
286 		return NULL;
287 	}
288 
289 	subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
290 	while (ni++ < name_cnt) {
291 		name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
292 		subpat_names[name_idx] = name_table + 2;
293 		if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
294 			php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
295 			efree(subpat_names);
296 			return NULL;
297 		}
298 		name_table += name_size;
299 	}
300 	return subpat_names;
301 }
302 /* }}} */
303 
304 /* {{{ static calculate_unit_length */
305 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */
calculate_unit_length(pcre_cache_entry * pce,char * start)306 static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start)
307 {
308 	int unit_len;
309 
310 	if (pce->compile_options & PCRE_UTF8) {
311 		char *end = start;
312 
313 		/* skip continuation bytes */
314 		while ((*++end & 0xC0) == 0x80);
315 		unit_len = end - start;
316 	} else {
317 		unit_len = 1;
318 	}
319 	return unit_len;
320 }
321 /* }}} */
322 
323 /* {{{ pcre_get_compiled_regex_cache
324  */
pcre_get_compiled_regex_cache_ex(zend_string * regex,int locale_aware)325 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware)
326 {
327 	pcre				*re = NULL;
328 	pcre_extra			*extra;
329 	int					 coptions = 0;
330 	int					 soptions = 0;
331 	const char			*error;
332 	int					 erroffset;
333 	char				 delimiter;
334 	char				 start_delimiter;
335 	char				 end_delimiter;
336 	char				*p, *pp;
337 	char				*pattern;
338 	int					 do_study = 0;
339 	int					 poptions = 0;
340 	unsigned const char *tables = NULL;
341 	pcre_cache_entry	*pce;
342 	pcre_cache_entry	 new_entry;
343 	int					 rc;
344 	zend_string 		*key;
345 
346 #if HAVE_SETLOCALE
347 	if (locale_aware && BG(locale_string) &&
348 		(ZSTR_LEN(BG(locale_string)) != 1 && ZSTR_VAL(BG(locale_string))[0] != 'C')) {
349 		key = zend_string_alloc(ZSTR_LEN(regex) + ZSTR_LEN(BG(locale_string)) + 1, 0);
350 		memcpy(ZSTR_VAL(key), ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)) + 1);
351 		memcpy(ZSTR_VAL(key) + ZSTR_LEN(BG(locale_string)), ZSTR_VAL(regex), ZSTR_LEN(regex) + 1);
352 	} else
353 #endif
354 	{
355 		key = regex;
356 	}
357 
358 	/* Try to lookup the cached regex entry, and if successful, just pass
359 	   back the compiled pattern, otherwise go on and compile it. */
360 	pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), key);
361 	if (pce) {
362 #if HAVE_SETLOCALE
363 		if (key != regex) {
364 			zend_string_release(key);
365 		}
366 #endif
367 		return pce;
368 	}
369 
370 	p = ZSTR_VAL(regex);
371 
372 	/* Parse through the leading whitespace, and display a warning if we
373 	   get to the end without encountering a delimiter. */
374 	while (isspace((int)*(unsigned char *)p)) p++;
375 	if (*p == 0) {
376 #if HAVE_SETLOCALE
377 		if (key != regex) {
378 			zend_string_release(key);
379 		}
380 #endif
381 		php_error_docref(NULL, E_WARNING,
382 						 p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
383 		pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
384 		return NULL;
385 	}
386 
387 	/* Get the delimiter and display a warning if it is alphanumeric
388 	   or a backslash. */
389 	delimiter = *p++;
390 	if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
391 #if HAVE_SETLOCALE
392 		if (key != regex) {
393 			zend_string_release(key);
394 		}
395 #endif
396 		php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
397 		pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
398 		return NULL;
399 	}
400 
401 	start_delimiter = delimiter;
402 	if ((pp = strchr("([{< )]}> )]}>", delimiter)))
403 		delimiter = pp[5];
404 	end_delimiter = delimiter;
405 
406 	pp = p;
407 
408 	if (start_delimiter == end_delimiter) {
409 		/* We need to iterate through the pattern, searching for the ending delimiter,
410 		   but skipping the backslashed delimiters.  If the ending delimiter is not
411 		   found, display a warning. */
412 		while (*pp != 0) {
413 			if (*pp == '\\' && pp[1] != 0) pp++;
414 			else if (*pp == delimiter)
415 				break;
416 			pp++;
417 		}
418 	} else {
419 		/* We iterate through the pattern, searching for the matching ending
420 		 * delimiter. For each matching starting delimiter, we increment nesting
421 		 * level, and decrement it for each matching ending delimiter. If we
422 		 * reach the end of the pattern without matching, display a warning.
423 		 */
424 		int brackets = 1; 	/* brackets nesting level */
425 		while (*pp != 0) {
426 			if (*pp == '\\' && pp[1] != 0) pp++;
427 			else if (*pp == end_delimiter && --brackets <= 0)
428 				break;
429 			else if (*pp == start_delimiter)
430 				brackets++;
431 			pp++;
432 		}
433 	}
434 
435 	if (*pp == 0) {
436 #if HAVE_SETLOCALE
437 		if (key != regex) {
438 			zend_string_release(key);
439 		}
440 #endif
441 		if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
442 			php_error_docref(NULL,E_WARNING, "Null byte in regex");
443 		} else if (start_delimiter == end_delimiter) {
444 			php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
445 		} else {
446 			php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
447 		}
448 		pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
449 		return NULL;
450 	}
451 
452 	/* Make a copy of the actual pattern. */
453 	pattern = estrndup(p, pp-p);
454 
455 	/* Move on to the options */
456 	pp++;
457 
458 	/* Parse through the options, setting appropriate flags.  Display
459 	   a warning if we encounter an unknown modifier. */
460 	while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
461 		switch (*pp++) {
462 			/* Perl compatible options */
463 			case 'i':	coptions |= PCRE_CASELESS;		break;
464 			case 'm':	coptions |= PCRE_MULTILINE;		break;
465 			case 's':	coptions |= PCRE_DOTALL;		break;
466 			case 'x':	coptions |= PCRE_EXTENDED;		break;
467 
468 			/* PCRE specific options */
469 			case 'A':	coptions |= PCRE_ANCHORED;		break;
470 			case 'D':	coptions |= PCRE_DOLLAR_ENDONLY;break;
471 			case 'S':	do_study  = 1;					break;
472 			case 'U':	coptions |= PCRE_UNGREEDY;		break;
473 			case 'X':	coptions |= PCRE_EXTRA;			break;
474 			case 'u':	coptions |= PCRE_UTF8;
475 	/* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
476        characters, even in UTF-8 mode. However, this can be changed by setting
477        the PCRE_UCP option. */
478 #ifdef PCRE_UCP
479 						coptions |= PCRE_UCP;
480 #endif
481 				break;
482 			case 'J':	coptions |= PCRE_DUPNAMES;		break;
483 
484 			/* Custom preg options */
485 			case 'e':	poptions |= PREG_REPLACE_EVAL;	break;
486 
487 			case ' ':
488 			case '\n':
489 			case '\r':
490 				break;
491 
492 			default:
493 				if (pp[-1]) {
494 					php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
495 				} else {
496 					php_error_docref(NULL,E_WARNING, "Null byte in regex");
497 				}
498 				pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
499 				efree(pattern);
500 #if HAVE_SETLOCALE
501 				if (key != regex) {
502 					zend_string_release(key);
503 				}
504 #endif
505 				return NULL;
506 		}
507 	}
508 
509 #if HAVE_SETLOCALE
510 	if (key != regex) {
511 		tables = pcre_maketables();
512 	}
513 #endif
514 
515 	/* Compile pattern and display a warning if compilation failed. */
516 	re = pcre_compile(pattern,
517 					  coptions,
518 					  &error,
519 					  &erroffset,
520 					  tables);
521 
522 	if (re == NULL) {
523 #if HAVE_SETLOCALE
524 		if (key != regex) {
525 			zend_string_release(key);
526 		}
527 #endif
528 		php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
529 		pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
530 		efree(pattern);
531 		if (tables) {
532 			pefree((void*)tables, 1);
533 		}
534 		return NULL;
535 	}
536 
537 #ifdef HAVE_PCRE_JIT_SUPPORT
538 	if (PCRE_G(jit)) {
539 		/* Enable PCRE JIT compiler */
540 		do_study = 1;
541 		soptions |= PCRE_STUDY_JIT_COMPILE;
542 	}
543 #endif
544 
545 	/* If study option was specified, study the pattern and
546 	   store the result in extra for passing to pcre_exec. */
547 	if (do_study) {
548 		php_pcre_mutex_lock();
549 		extra = pcre_study(re, soptions, &error);
550 		php_pcre_mutex_unlock();
551 		if (extra) {
552 			extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
553 			extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
554 			extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
555 #ifdef HAVE_PCRE_JIT_SUPPORT
556 			if (PCRE_G(jit) && jit_stack) {
557 				pcre_assign_jit_stack(extra, NULL, jit_stack);
558 			}
559 #endif
560 		}
561 		if (error != NULL) {
562 			php_error_docref(NULL, E_WARNING, "Error while studying pattern");
563 			pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
564 		}
565 	} else {
566 		extra = NULL;
567 	}
568 
569 	efree(pattern);
570 
571 	/*
572 	 * If we reached cache limit, clean out the items from the head of the list;
573 	 * these are supposedly the oldest ones (but not necessarily the least used
574 	 * ones).
575 	 */
576 	if (!pce && zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
577 		int num_clean = PCRE_CACHE_SIZE / 8;
578 		zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
579 	}
580 
581 	/* Store the compiled pattern and extra info in the cache. */
582 	new_entry.re = re;
583 	new_entry.extra = extra;
584 	new_entry.preg_options = poptions;
585 	new_entry.compile_options = coptions;
586 #if HAVE_SETLOCALE
587 	new_entry.tables = tables;
588 #endif
589 	new_entry.refcount = 0;
590 
591 	rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
592 	if (rc < 0) {
593 #if HAVE_SETLOCALE
594 		if (key != regex) {
595 			zend_string_release(key);
596 		}
597 #endif
598 		php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
599 		pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
600 		return NULL;
601 	}
602 
603 	rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
604 	if (rc < 0) {
605 #if HAVE_SETLOCALE
606 		if (key != regex) {
607 			zend_string_release(key);
608 		}
609 #endif
610 		php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
611 		pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
612 		return NULL;
613 	}
614 
615 	/*
616 	 * Interned strings are not duplicated when stored in HashTable,
617 	 * but all the interned strings created during HTTP request are removed
618 	 * at end of request. However PCRE_G(pcre_cache) must be consistent
619 	 * on the next request as well. So we disable usage of interned strings
620 	 * as hash keys especually for this table.
621 	 * See bug #63180
622 	 */
623 	if (!ZSTR_IS_INTERNED(key) || !(GC_FLAGS(key) & IS_STR_PERMANENT)) {
624 		pce = zend_hash_str_update_mem(&PCRE_G(pcre_cache),
625 				ZSTR_VAL(key), ZSTR_LEN(key), &new_entry, sizeof(pcre_cache_entry));
626 #if HAVE_SETLOCALE
627 		if (key != regex) {
628 			zend_string_release(key);
629 		}
630 #endif
631 	} else {
632 		pce = zend_hash_update_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
633 	}
634 
635 	return pce;
636 }
637 /* }}} */
638 
639 /* {{{ pcre_get_compiled_regex_cache
640  */
pcre_get_compiled_regex_cache(zend_string * regex)641 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
642 {
643 	return pcre_get_compiled_regex_cache_ex(regex, 1);
644 }
645 /* }}} */
646 
647 /* {{{ pcre_get_compiled_regex
648  */
pcre_get_compiled_regex(zend_string * regex,pcre_extra ** extra,int * preg_options)649 PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options)
650 {
651 	pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
652 
653 	if (extra) {
654 		*extra = pce ? pce->extra : NULL;
655 	}
656 	if (preg_options) {
657 		*preg_options = pce ? pce->preg_options : 0;
658 	}
659 
660 	return pce ? pce->re : NULL;
661 }
662 /* }}} */
663 
664 /* {{{ pcre_get_compiled_regex_ex
665  */
pcre_get_compiled_regex_ex(zend_string * regex,pcre_extra ** extra,int * preg_options,int * compile_options)666 PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options)
667 {
668 	pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
669 
670 	if (extra) {
671 		*extra = pce ? pce->extra : NULL;
672 	}
673 	if (preg_options) {
674 		*preg_options = pce ? pce->preg_options : 0;
675 	}
676 	if (compile_options) {
677 		*compile_options = pce ? pce->compile_options : 0;
678 	}
679 
680 	return pce ? pce->re : NULL;
681 }
682 /* }}} */
683 
684 /* {{{ add_offset_pair */
add_offset_pair(zval * result,char * str,int len,int offset,char * name,int unmatched_as_null)685 static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name, int unmatched_as_null)
686 {
687 	zval match_pair, tmp;
688 
689 	array_init_size(&match_pair, 2);
690 
691 	/* Add (match, offset) to the return value */
692 	if (offset < 0) {
693 		if (unmatched_as_null) {
694 			ZVAL_NULL(&tmp);
695 		} else {
696 			ZVAL_EMPTY_STRING(&tmp);
697 		}
698 	} else {
699 		ZVAL_STRINGL(&tmp, str, len);
700 	}
701 	zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
702 	ZVAL_LONG(&tmp, offset);
703 	zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
704 
705 	if (name) {
706 		Z_ADDREF(match_pair);
707 		zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
708 	}
709 	zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
710 }
711 /* }}} */
712 
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS,int global)713 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
714 {
715 	/* parameters */
716 	zend_string		 *regex;			/* Regular expression */
717 	zend_string		 *subject;			/* String to match against */
718 	pcre_cache_entry *pce;				/* Compiled regular expression */
719 	zval			 *subpats = NULL;	/* Array for subpatterns */
720 	zend_long		  flags = 0;		/* Match control flags */
721 	zend_long		  start_offset = 0;	/* Where the new search starts */
722 
723 	ZEND_PARSE_PARAMETERS_START(2, 5)
724 		Z_PARAM_STR(regex)
725 		Z_PARAM_STR(subject)
726 		Z_PARAM_OPTIONAL
727 		Z_PARAM_ZVAL_DEREF(subpats)
728 		Z_PARAM_LONG(flags)
729 		Z_PARAM_LONG(start_offset)
730 	ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
731 
732 	if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
733 			php_error_docref(NULL, E_WARNING, "Subject is too long");
734 			RETURN_FALSE;
735 	}
736 
737 	/* Compile regex or get it from cache. */
738 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
739 		RETURN_FALSE;
740 	}
741 
742 	pce->refcount++;
743 	php_pcre_match_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, subpats,
744 		global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
745 	pce->refcount--;
746 }
747 /* }}} */
748 
749 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry * pce,char * subject,int subject_len,zval * return_value,zval * subpats,int global,int use_flags,zend_long flags,zend_long start_offset)750 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
751 	zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset)
752 {
753 	zval			 result_set,		/* Holds a set of subpatterns after
754 										   a global match */
755 					*match_sets = NULL;	/* An array of sets of matches for each
756 										   subpattern after a global match */
757 	pcre_extra		*extra = pce->extra;/* Holds results of studying */
758 	pcre_extra		 extra_data;		/* Used locally for exec options */
759 	int				 no_utf_check = 0;  /* Execution options */
760 	int				 count = 0;			/* Count of matched subpatterns */
761 	int				*offsets;			/* Array of subpattern offsets */
762 	int				 num_subpats;		/* Number of captured subpatterns */
763 	int				 size_offsets;		/* Size of the offsets array */
764 	int				 matched;			/* Has anything matched */
765 	int				 g_notempty = 0;	/* If the match should not be empty */
766 	char 		   **subpat_names;		/* Array for named subpatterns */
767 	int				 i;
768 	int				 subpats_order;		/* Order of subpattern matches */
769 	int				 offset_capture;	/* Capture match offsets: yes/no */
770 	int				 unmatched_as_null;	/* Null non-matches: yes/no */
771 	unsigned char   *mark = NULL;		/* Target for MARK name */
772 	zval			 marks;				/* Array of marks for PREG_PATTERN_ORDER */
773 
774 	ALLOCA_FLAG(use_heap);
775 
776 	ZVAL_UNDEF(&marks);
777 
778 	/* Overwrite the passed-in value for subpatterns with an empty array. */
779 	if (subpats != NULL) {
780 		zval_ptr_dtor(subpats);
781 		array_init(subpats);
782 	}
783 
784 	subpats_order = global ? PREG_PATTERN_ORDER : 0;
785 
786 	if (use_flags) {
787 		offset_capture = flags & PREG_OFFSET_CAPTURE;
788 		unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
789 
790 		/*
791 		 * subpats_order is pre-set to pattern mode so we change it only if
792 		 * necessary.
793 		 */
794 		if (flags & 0xff) {
795 			subpats_order = flags & 0xff;
796 		}
797 		if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
798 			(!global && subpats_order != 0)) {
799 			php_error_docref(NULL, E_WARNING, "Invalid flags specified");
800 			return;
801 		}
802 	} else {
803 		offset_capture = 0;
804 		unmatched_as_null = 0;
805 	}
806 
807 	/* Negative offset counts from the end of the string. */
808 	if (start_offset < 0) {
809 		start_offset = subject_len + start_offset;
810 		if (start_offset < 0) {
811 			start_offset = 0;
812 		}
813 	}
814 
815 	if (extra == NULL) {
816 		extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
817 		extra = &extra_data;
818 	}
819 	extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
820 	extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
821 #ifdef PCRE_EXTRA_MARK
822 	extra->mark = &mark;
823 	extra->flags |= PCRE_EXTRA_MARK;
824 #endif
825 
826 	/* Calculate the size of the offsets array, and allocate memory for it. */
827 	num_subpats = pce->capture_count + 1;
828 	size_offsets = num_subpats * 3;
829 
830 	/*
831 	 * Build a mapping from subpattern numbers to their names. We will
832 	 * allocate the table only if there are any named subpatterns.
833 	 */
834 	subpat_names = NULL;
835 	if (pce->name_count > 0) {
836 		subpat_names = make_subpats_table(num_subpats, pce);
837 		if (!subpat_names) {
838 			RETURN_FALSE;
839 		}
840 	}
841 
842 	if (size_offsets <= 32) {
843 		offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
844 	} else {
845 		offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
846 	}
847 	memset(offsets, 0, size_offsets*sizeof(int));
848 	/* Allocate match sets array and initialize the values. */
849 	if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
850 		match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
851 		for (i=0; i<num_subpats; i++) {
852 			array_init(&match_sets[i]);
853 		}
854 	}
855 
856 	matched = 0;
857 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
858 
859 #ifdef HAVE_PCRE_JIT_SUPPORT
860 	if (!(pce->compile_options & PCRE_UTF8)) {
861 		no_utf_check = PCRE_NO_UTF8_CHECK;
862 	}
863 #endif
864 
865 	do {
866 		/* Execute the regular expression. */
867 #ifdef HAVE_PCRE_JIT_SUPPORT
868 		if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
869 		 && no_utf_check && !g_notempty) {
870 			if (start_offset < 0 || start_offset > subject_len) {
871 				pcre_handle_exec_error(PCRE_ERROR_BADOFFSET);
872 				break;
873 			}
874 			count = pcre_jit_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
875 						  no_utf_check|g_notempty, offsets, size_offsets, jit_stack);
876 		} else
877 #endif
878 		count = pcre_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
879 						  no_utf_check|g_notempty, offsets, size_offsets);
880 
881 		/* the string was already proved to be valid UTF-8 */
882 		no_utf_check = PCRE_NO_UTF8_CHECK;
883 
884 		/* Check for too many substrings condition. */
885 		if (count == 0) {
886 			php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
887 			count = size_offsets/3;
888 		}
889 
890 		/* If something has matched */
891 		if (count > 0) {
892 			matched++;
893 
894 			/* If subpatterns array has been passed, fill it in with values. */
895 			if (subpats != NULL) {
896 				/* Try to get the list of substrings and display a warning if failed. */
897 				if (offsets[1] - offsets[0] < 0) {
898 					if (subpat_names) {
899 						efree(subpat_names);
900 					}
901 					if (size_offsets <= 32) {
902 						free_alloca(offsets, use_heap);
903 					} else {
904 						efree(offsets);
905 					}
906 					if (match_sets) efree(match_sets);
907 					php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
908 					RETURN_FALSE;
909 				}
910 
911 				if (global) {	/* global pattern matching */
912 					if (subpats && subpats_order == PREG_PATTERN_ORDER) {
913 						/* For each subpattern, insert it into the appropriate array. */
914 						if (offset_capture) {
915 							for (i = 0; i < count; i++) {
916 								add_offset_pair(&match_sets[i], subject + offsets[i<<1],
917 												offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
918 							}
919 						} else {
920 							for (i = 0; i < count; i++) {
921 								if (offsets[i<<1] < 0) {
922 									if (unmatched_as_null) {
923 										add_next_index_null(&match_sets[i]);
924 									} else {
925 										add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
926 									}
927 								} else {
928 									add_next_index_stringl(&match_sets[i], subject + offsets[i<<1],
929 														   offsets[(i<<1)+1] - offsets[i<<1]);
930 								}
931 							}
932 						}
933 						/* Add MARK, if available */
934 						if (mark) {
935 							if (Z_TYPE(marks) == IS_UNDEF) {
936 								array_init(&marks);
937 							}
938 							add_index_string(&marks, matched - 1, (char *) mark);
939 						}
940 						/*
941 						 * If the number of captured subpatterns on this run is
942 						 * less than the total possible number, pad the result
943 						 * arrays with NULLs or empty strings.
944 						 */
945 						if (count < num_subpats) {
946 							for (; i < num_subpats; i++) {
947 								if (unmatched_as_null) {
948 									add_next_index_null(&match_sets[i]);
949 								} else {
950 									add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
951 								}
952 							}
953 						}
954 					} else {
955 						/* Allocate the result set array */
956 						array_init_size(&result_set, count + (mark ? 1 : 0));
957 
958 						/* Add all the subpatterns to it */
959 						if (subpat_names) {
960 							if (offset_capture) {
961 								for (i = 0; i < count; i++) {
962 									add_offset_pair(&result_set, subject + offsets[i<<1],
963 													offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i], unmatched_as_null);
964 								}
965 							} else {
966 								for (i = 0; i < count; i++) {
967 									if (subpat_names[i]) {
968 										if (offsets[i<<1] < 0) {
969 											if (unmatched_as_null) {
970 												add_assoc_null(&result_set, subpat_names[i]);
971 											} else {
972 												add_assoc_str(&result_set, subpat_names[i], ZSTR_EMPTY_ALLOC());
973 											}
974 										} else {
975 											add_assoc_stringl(&result_set, subpat_names[i], subject + offsets[i<<1],
976 															  offsets[(i<<1)+1] - offsets[i<<1]);
977 										}
978 									}
979 									if (offsets[i<<1] < 0) {
980 										if (unmatched_as_null) {
981 											add_next_index_null(&result_set);
982 										} else {
983 											add_next_index_str(&result_set, ZSTR_EMPTY_ALLOC());
984 										}
985 									} else {
986 										add_next_index_stringl(&result_set, subject + offsets[i<<1],
987 															   offsets[(i<<1)+1] - offsets[i<<1]);
988 									}
989 								}
990 							}
991 						} else {
992 							if (offset_capture) {
993 								for (i = 0; i < count; i++) {
994 									add_offset_pair(&result_set, subject + offsets[i<<1],
995 													offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
996 								}
997 							} else {
998 								for (i = 0; i < count; i++) {
999 									if (offsets[i<<1] < 0) {
1000 										if (unmatched_as_null) {
1001 											add_next_index_null(&result_set);
1002 										} else {
1003 											add_next_index_str(&result_set, ZSTR_EMPTY_ALLOC());
1004 										}
1005 									} else {
1006 										add_next_index_stringl(&result_set, subject + offsets[i<<1],
1007 															   offsets[(i<<1)+1] - offsets[i<<1]);
1008 									}
1009 								}
1010 							}
1011 						}
1012 						/* Add MARK, if available */
1013 						if (mark) {
1014 							add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
1015 						}
1016 						/* And add it to the output array */
1017 						zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
1018 					}
1019 				} else {			/* single pattern matching */
1020 					/* For each subpattern, insert it into the subpatterns array. */
1021 					if (subpat_names) {
1022 						if (offset_capture) {
1023 							for (i = 0; i < count; i++) {
1024 								add_offset_pair(subpats, subject + offsets[i<<1],
1025 												offsets[(i<<1)+1] - offsets[i<<1],
1026 												offsets[i<<1], subpat_names[i], unmatched_as_null);
1027 							}
1028 						} else {
1029 							for (i = 0; i < count; i++) {
1030 								if (subpat_names[i]) {
1031 									if (offsets[i<<1] < 0) {
1032 										if (unmatched_as_null) {
1033 											add_assoc_null(subpats, subpat_names[i]);
1034 										} else {
1035 											add_assoc_str(subpats, subpat_names[i], ZSTR_EMPTY_ALLOC());
1036 										}
1037 									} else {
1038 										add_assoc_stringl(subpats, subpat_names[i], subject + offsets[i<<1],
1039 														  offsets[(i<<1)+1] - offsets[i<<1]);
1040 									}
1041 								}
1042 								if (offsets[i<<1] < 0) {
1043 									if (unmatched_as_null) {
1044 										add_next_index_null(subpats);
1045 									} else {
1046 										add_next_index_str(subpats, ZSTR_EMPTY_ALLOC());
1047 									}
1048 								} else {
1049 									add_next_index_stringl(subpats, subject + offsets[i<<1],
1050 														   offsets[(i<<1)+1] - offsets[i<<1]);
1051 								}
1052 							}
1053 						}
1054 					} else {
1055 						if (offset_capture) {
1056 							for (i = 0; i < count; i++) {
1057 								add_offset_pair(subpats, subject + offsets[i<<1],
1058 												offsets[(i<<1)+1] - offsets[i<<1],
1059 												offsets[i<<1], NULL, unmatched_as_null);
1060 							}
1061 						} else {
1062 							for (i = 0; i < count; i++) {
1063 								if (offsets[i<<1] < 0) {
1064 									if (unmatched_as_null) {
1065 										add_next_index_null(subpats);
1066 									} else {
1067 										add_next_index_str(subpats, ZSTR_EMPTY_ALLOC());
1068 									}
1069 								} else {
1070 									add_next_index_stringl(subpats, subject + offsets[i<<1],
1071 														   offsets[(i<<1)+1] - offsets[i<<1]);
1072 								}
1073 							}
1074 						}
1075 					}
1076 					/* Add MARK, if available */
1077 					if (mark) {
1078 						add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
1079 					}
1080 					break;
1081 				}
1082 			}
1083 
1084 			/* Advance to the next piece. */
1085 			start_offset = offsets[1];
1086 
1087 			/* If we have matched an empty string, mimic what Perl's /g options does.
1088 			   This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
1089 			   the match again at the same point. If this fails (picked up above) we
1090 			   advance to the next character. */
1091 			g_notempty = (start_offset == offsets[0]) ? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
1092 
1093 		} else if (count == PCRE_ERROR_NOMATCH) {
1094 			/* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
1095 			   this is not necessarily the end. We need to advance
1096 			   the start offset, and continue. Fudge the offset values
1097 			   to achieve this, unless we're already at the end of the string. */
1098 			if (g_notempty != 0 && start_offset < subject_len) {
1099 				int unit_len = calculate_unit_length(pce, subject + start_offset);
1100 
1101 				start_offset += unit_len;
1102 				g_notempty = 0;
1103 			} else
1104 				break;
1105 		} else {
1106 			pcre_handle_exec_error(count);
1107 			break;
1108 		}
1109 	} while (global);
1110 
1111 	/* Add the match sets to the output array and clean up */
1112 	if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1113 		if (subpat_names) {
1114 			for (i = 0; i < num_subpats; i++) {
1115 				if (subpat_names[i]) {
1116 					zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
1117 									 strlen(subpat_names[i]), &match_sets[i]);
1118 					Z_ADDREF(match_sets[i]);
1119 				}
1120 				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1121 			}
1122 		} else {
1123 			for (i = 0; i < num_subpats; i++) {
1124 				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1125 			}
1126 		}
1127 		efree(match_sets);
1128 
1129 		if (Z_TYPE(marks) != IS_UNDEF) {
1130 			add_assoc_zval(subpats, "MARK", &marks);
1131 		}
1132 	}
1133 
1134 	if (size_offsets <= 32) {
1135 		free_alloca(offsets, use_heap);
1136 	} else {
1137 		efree(offsets);
1138 	}
1139 	if (subpat_names) {
1140 		efree(subpat_names);
1141 	}
1142 
1143 	/* Did we encounter an error? */
1144 	if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1145 		RETVAL_LONG(matched);
1146 	} else {
1147 		RETVAL_FALSE;
1148 	}
1149 }
1150 /* }}} */
1151 
1152 /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1153    Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)1154 static PHP_FUNCTION(preg_match)
1155 {
1156 	php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1157 }
1158 /* }}} */
1159 
1160 /* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1161    Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)1162 static PHP_FUNCTION(preg_match_all)
1163 {
1164 	php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1165 }
1166 /* }}} */
1167 
1168 /* {{{ preg_get_backref
1169  */
preg_get_backref(char ** str,int * backref)1170 static int preg_get_backref(char **str, int *backref)
1171 {
1172 	register char in_brace = 0;
1173 	register char *walk = *str;
1174 
1175 	if (walk[1] == 0)
1176 		return 0;
1177 
1178 	if (*walk == '$' && walk[1] == '{') {
1179 		in_brace = 1;
1180 		walk++;
1181 	}
1182 	walk++;
1183 
1184 	if (*walk >= '0' && *walk <= '9') {
1185 		*backref = *walk - '0';
1186 		walk++;
1187 	} else
1188 		return 0;
1189 
1190 	if (*walk && *walk >= '0' && *walk <= '9') {
1191 		*backref = *backref * 10 + *walk - '0';
1192 		walk++;
1193 	}
1194 
1195 	if (in_brace) {
1196 		if (*walk != '}')
1197 			return 0;
1198 		else
1199 			walk++;
1200 	}
1201 
1202 	*str = walk;
1203 	return 1;
1204 }
1205 /* }}} */
1206 
1207 /* {{{ preg_do_repl_func
1208  */
preg_do_repl_func(zend_fcall_info * fci,zend_fcall_info_cache * fcc,char * subject,int * offsets,char ** subpat_names,int count,unsigned char * mark)1209 static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark)
1210 {
1211 	zend_string *result_str;
1212 	zval		 retval;			/* Function return value */
1213 	zval	     arg;				/* Argument to pass to function */
1214 	int			 i;
1215 
1216 	array_init_size(&arg, count + (mark ? 1 : 0));
1217 	if (subpat_names) {
1218 		for (i = 0; i < count; i++) {
1219 			if (subpat_names[i]) {
1220 				add_assoc_stringl(&arg, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
1221 			}
1222 			add_next_index_stringl(&arg, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
1223 		}
1224 	} else {
1225 		for (i = 0; i < count; i++) {
1226 			add_next_index_stringl(&arg, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
1227 		}
1228 	}
1229 	if (mark) {
1230 		add_assoc_string(&arg, "MARK", (char *) mark);
1231 	}
1232 
1233 	fci->retval = &retval;
1234 	fci->param_count = 1;
1235 	fci->params = &arg;
1236 	fci->no_separation = 0;
1237 
1238 	if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1239 		result_str = zval_get_string(&retval);
1240 		zval_ptr_dtor(&retval);
1241 	} else {
1242 		if (!EG(exception)) {
1243 			php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1244 		}
1245 
1246 		result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1247 	}
1248 
1249 	zval_ptr_dtor(&arg);
1250 
1251 	return result_str;
1252 }
1253 /* }}} */
1254 
1255 /* {{{ php_pcre_replace
1256  */
php_pcre_replace(zend_string * regex,zend_string * subject_str,char * subject,int subject_len,zend_string * replace_str,int limit,int * replace_count)1257 PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1258 							  zend_string *subject_str,
1259 							  char *subject, int subject_len,
1260 							  zend_string *replace_str,
1261 							  int limit, int *replace_count)
1262 {
1263 	pcre_cache_entry	*pce;			    /* Compiled regular expression */
1264 	zend_string	 		*result;			/* Function result */
1265 
1266 	/* Compile regex or get it from cache. */
1267 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1268 		return NULL;
1269 	}
1270 	pce->refcount++;
1271 	result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1272 		limit, replace_count);
1273 	pce->refcount--;
1274 
1275 	return result;
1276 }
1277 /* }}} */
1278 
1279 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry * pce,zend_string * subject_str,char * subject,int subject_len,zend_string * replace_str,int limit,int * replace_count)1280 PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zend_string *replace_str, int limit, int *replace_count)
1281 {
1282 	pcre_extra		*extra = pce->extra;/* Holds results of studying */
1283 	pcre_extra		 extra_data;		/* Used locally for exec options */
1284 	int				 no_utf_check = 0;	/* Execution options */
1285 	int				 count = 0;			/* Count of matched subpatterns */
1286 	int				*offsets;			/* Array of subpattern offsets */
1287 	char 			**subpat_names;		/* Array for named subpatterns */
1288 	int				 num_subpats;		/* Number of captured subpatterns */
1289 	int				 size_offsets;		/* Size of the offsets array */
1290 	size_t			 new_len;			/* Length of needed storage */
1291 	size_t			 alloc_len;			/* Actual allocated length */
1292 	int				 match_len;			/* Length of the current match */
1293 	int				 backref;			/* Backreference number */
1294 	int				 start_offset;		/* Where the new search starts */
1295 	int				 g_notempty=0;		/* If the match should not be empty */
1296 	char			*walkbuf,			/* Location of current replacement in the result */
1297 					*walk,				/* Used to walk the replacement string */
1298 					*match,				/* The current match */
1299 					*piece,				/* The current piece of subject */
1300 					*replace_end,		/* End of replacement string */
1301 					 walk_last;			/* Last walked character */
1302 	size_t			result_len; 		/* Length of result */
1303 	zend_string		*result;			/* Result of replacement */
1304 
1305 	ALLOCA_FLAG(use_heap);
1306 
1307 	if (extra == NULL) {
1308 		extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1309 		extra = &extra_data;
1310 	}
1311 
1312 	extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1313 	extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1314 
1315 	if (UNEXPECTED(pce->preg_options & PREG_REPLACE_EVAL)) {
1316 		php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
1317 		return NULL;
1318 	}
1319 
1320 	/* Calculate the size of the offsets array, and allocate memory for it. */
1321 	num_subpats = pce->capture_count + 1;
1322 	size_offsets = num_subpats * 3;
1323 	if (size_offsets <= 32) {
1324 		offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1325 	} else {
1326 		offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1327 	}
1328 
1329 	/*
1330 	 * Build a mapping from subpattern numbers to their names. We will
1331 	 * allocate the table only if there are any named subpatterns.
1332 	 */
1333 	subpat_names = NULL;
1334 	if (UNEXPECTED(pce->name_count > 0)) {
1335 		subpat_names = make_subpats_table(num_subpats, pce);
1336 		if (!subpat_names) {
1337 			if (size_offsets <= 32) {
1338 				free_alloca(offsets, use_heap);
1339 			} else {
1340 				efree(offsets);
1341 			}
1342 			return NULL;
1343 		}
1344 	}
1345 
1346 	alloc_len = 0;
1347 	result = NULL;
1348 
1349 	/* Initialize */
1350 	match = NULL;
1351 	start_offset = 0;
1352 	result_len = 0;
1353 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1354 
1355 #ifdef HAVE_PCRE_JIT_SUPPORT
1356 	if (!(pce->compile_options & PCRE_UTF8)) {
1357 		no_utf_check = PCRE_NO_UTF8_CHECK;
1358 	}
1359 #endif
1360 
1361 #ifdef PCRE_EXTRA_MARK
1362 	extra->flags &= ~PCRE_EXTRA_MARK;
1363 #endif
1364 
1365 	while (1) {
1366 		/* Execute the regular expression. */
1367 #ifdef HAVE_PCRE_JIT_SUPPORT
1368 		if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
1369 		 && no_utf_check && !g_notempty) {
1370 			count = pcre_jit_exec(pce->re, extra, subject, subject_len, start_offset,
1371 						  no_utf_check|g_notempty, offsets, size_offsets, jit_stack);
1372 		} else
1373 #endif
1374 		count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1375 						  no_utf_check|g_notempty, offsets, size_offsets);
1376 
1377 		/* the string was already proved to be valid UTF-8 */
1378 		no_utf_check = PCRE_NO_UTF8_CHECK;
1379 
1380 		/* Check for too many substrings condition. */
1381 		if (UNEXPECTED(count == 0)) {
1382 			php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1383 			count = size_offsets / 3;
1384 		}
1385 
1386 		piece = subject + start_offset;
1387 
1388 		/* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */
1389 		if (count > 0 && (offsets[1] - offsets[0] >= 0) && limit) {
1390 			zend_bool simple_string = 1;
1391 
1392 			if (replace_count) {
1393 				++*replace_count;
1394 			}
1395 
1396 			/* Set the match location in subject */
1397 			match = subject + offsets[0];
1398 
1399 			new_len = result_len + offsets[0] - start_offset; /* part before the match */
1400 
1401 			walk = ZSTR_VAL(replace_str);
1402 			replace_end = walk + ZSTR_LEN(replace_str);
1403 			walk_last = 0;
1404 
1405 			while (walk < replace_end) {
1406 				if ('\\' == *walk || '$' == *walk) {
1407 					simple_string = 0;
1408 					if (walk_last == '\\') {
1409 						walk++;
1410 						walk_last = 0;
1411 						continue;
1412 					}
1413 					if (preg_get_backref(&walk, &backref)) {
1414 						if (backref < count)
1415 							new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1416 						continue;
1417 					}
1418 				}
1419 				new_len++;
1420 				walk++;
1421 				walk_last = walk[-1];
1422 			}
1423 
1424 			if (new_len >= alloc_len) {
1425 				alloc_len = zend_safe_address_guarded(2, new_len, alloc_len);
1426 				if (result == NULL) {
1427 					result = zend_string_alloc(alloc_len, 0);
1428 				} else {
1429 					result = zend_string_extend(result, alloc_len, 0);
1430 				}
1431 			}
1432 
1433 			if (match-piece > 0) {
1434 				/* copy the part of the string before the match */
1435 				memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1436 				result_len += (match-piece);
1437 			}
1438 
1439 			if (simple_string) {
1440 				/* copy replacement */
1441 				memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1442 				result_len += ZSTR_LEN(replace_str);
1443 			} else {
1444 				/* copy replacement and backrefs */
1445 				walkbuf = ZSTR_VAL(result) + result_len;
1446 
1447 				walk = ZSTR_VAL(replace_str);
1448 				walk_last = 0;
1449 				while (walk < replace_end) {
1450 					if ('\\' == *walk || '$' == *walk) {
1451 						if (walk_last == '\\') {
1452 							*(walkbuf-1) = *walk++;
1453 							walk_last = 0;
1454 							continue;
1455 						}
1456 						if (preg_get_backref(&walk, &backref)) {
1457 							if (backref < count) {
1458 								match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1459 								memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1460 								walkbuf += match_len;
1461 							}
1462 							continue;
1463 						}
1464 					}
1465 					*walkbuf++ = *walk++;
1466 					walk_last = walk[-1];
1467 				}
1468 				*walkbuf = '\0';
1469 				/* increment the result length by how much we've added to the string */
1470 				result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1471 			}
1472 
1473 			if (limit) {
1474 				limit--;
1475 			}
1476 
1477 			/* Advance to the next piece. */
1478 			start_offset = offsets[1];
1479 
1480 			/* If we have matched an empty string, mimic what Perl's /g options does.
1481 			   This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
1482 			   the match again at the same point. If this fails (picked up above) we
1483 			   advance to the next character. */
1484 			g_notempty = (start_offset == offsets[0]) ? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
1485 
1486 		} else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1487 			/* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
1488 			   this is not necessarily the end. We need to advance
1489 			   the start offset, and continue. Fudge the offset values
1490 			   to achieve this, unless we're already at the end of the string. */
1491 			if (g_notempty != 0 && start_offset < subject_len) {
1492 				int unit_len = calculate_unit_length(pce, piece);
1493 
1494 				start_offset += unit_len;
1495 				memcpy(ZSTR_VAL(result) + result_len, piece, unit_len);
1496 				result_len += unit_len;
1497 				g_notempty = 0;
1498 			} else {
1499 				if (!result && subject_str) {
1500 					result = zend_string_copy(subject_str);
1501 					break;
1502 				}
1503 				new_len = result_len + subject_len - start_offset;
1504 				if (new_len >= alloc_len) {
1505 					alloc_len = new_len; /* now we know exactly how long it is */
1506 					if (NULL != result) {
1507 						result = zend_string_realloc(result, alloc_len, 0);
1508 					} else {
1509 						result = zend_string_alloc(alloc_len, 0);
1510 					}
1511 				}
1512 				/* stick that last bit of string on our output */
1513 				memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - start_offset);
1514 				result_len += subject_len - start_offset;
1515 				ZSTR_VAL(result)[result_len] = '\0';
1516 				ZSTR_LEN(result) = result_len;
1517 				break;
1518 			}
1519 		} else {
1520 			pcre_handle_exec_error(count);
1521 			if (result) {
1522 				zend_string_release(result);
1523 				result = NULL;
1524 			}
1525 			break;
1526 		}
1527 	}
1528 
1529 	if (size_offsets <= 32) {
1530 		free_alloca(offsets, use_heap);
1531 	} else {
1532 		efree(offsets);
1533 	}
1534 	if (UNEXPECTED(subpat_names)) {
1535 		efree(subpat_names);
1536 	}
1537 
1538 	return result;
1539 }
1540 /* }}} */
1541 
1542 /* {{{ php_pcre_replace_func_impl() */
php_pcre_replace_func_impl(pcre_cache_entry * pce,zend_string * subject_str,char * subject,int subject_len,zend_fcall_info * fci,zend_fcall_info_cache * fcc,int limit,int * replace_count)1543 static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, int limit, int *replace_count)
1544 {
1545 	pcre_extra		*extra = pce->extra;/* Holds results of studying */
1546 	pcre_extra		 extra_data;		/* Used locally for exec options */
1547 	int				 no_utf_check = 0;	/* Execution options */
1548 	int				 count = 0;			/* Count of matched subpatterns */
1549 	int				*offsets;			/* Array of subpattern offsets */
1550 	char 			**subpat_names;		/* Array for named subpatterns */
1551 	int				 num_subpats;		/* Number of captured subpatterns */
1552 	int				 size_offsets;		/* Size of the offsets array */
1553 	size_t			 new_len;			/* Length of needed storage */
1554 	size_t			 alloc_len;			/* Actual allocated length */
1555 	int				 start_offset;		/* Where the new search starts */
1556 	int				 g_notempty=0;		/* If the match should not be empty */
1557 	char			*match,				/* The current match */
1558 					*piece;				/* The current piece of subject */
1559 	size_t			result_len; 		/* Length of result */
1560 	unsigned char   *mark = NULL;       /* Target for MARK name */
1561 	zend_string		*result;			/* Result of replacement */
1562 	zend_string     *eval_result=NULL;  /* Result of custom function */
1563 
1564 	ALLOCA_FLAG(use_heap);
1565 
1566 	if (extra == NULL) {
1567 		extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1568 		extra = &extra_data;
1569 	}
1570 
1571 	extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1572 	extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1573 
1574 	if (UNEXPECTED(pce->preg_options & PREG_REPLACE_EVAL)) {
1575 		php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
1576 		return NULL;
1577 	}
1578 
1579 	/* Calculate the size of the offsets array, and allocate memory for it. */
1580 	num_subpats = pce->capture_count + 1;
1581 	size_offsets = num_subpats * 3;
1582 	if (size_offsets <= 32) {
1583 		offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1584 	} else {
1585 		offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1586 	}
1587 
1588 	/*
1589 	 * Build a mapping from subpattern numbers to their names. We will
1590 	 * allocate the table only if there are any named subpatterns.
1591 	 */
1592 	subpat_names = NULL;
1593 	if (UNEXPECTED(pce->name_count > 0)) {
1594 		subpat_names = make_subpats_table(num_subpats, pce);
1595 		if (!subpat_names) {
1596 			if (size_offsets <= 32) {
1597 				free_alloca(offsets, use_heap);
1598 			} else {
1599 				efree(offsets);
1600 			}
1601 			return NULL;
1602 		}
1603 	}
1604 
1605 	alloc_len = 0;
1606 	result = NULL;
1607 
1608 	/* Initialize */
1609 	match = NULL;
1610 	start_offset = 0;
1611 	result_len = 0;
1612 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1613 
1614 #ifdef HAVE_PCRE_JIT_SUPPORT
1615 	if (!(pce->compile_options & PCRE_UTF8)) {
1616 		no_utf_check = PCRE_NO_UTF8_CHECK;
1617 	}
1618 #endif
1619 
1620 #ifdef PCRE_EXTRA_MARK
1621 	extra->mark = &mark;
1622 	extra->flags |= PCRE_EXTRA_MARK;
1623 #endif
1624 
1625 	while (1) {
1626 		/* Execute the regular expression. */
1627 #ifdef HAVE_PCRE_JIT_SUPPORT
1628 		if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
1629 		 && no_utf_check && !g_notempty) {
1630 			count = pcre_jit_exec(pce->re, extra, subject, subject_len, start_offset,
1631 						  no_utf_check|g_notempty, offsets, size_offsets, jit_stack);
1632 		} else
1633 #endif
1634 		count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1635 						  no_utf_check|g_notempty, offsets, size_offsets);
1636 
1637 		/* the string was already proved to be valid UTF-8 */
1638 		no_utf_check = PCRE_NO_UTF8_CHECK;
1639 
1640 		/* Check for too many substrings condition. */
1641 		if (UNEXPECTED(count == 0)) {
1642 			php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1643 			count = size_offsets / 3;
1644 		}
1645 
1646 		piece = subject + start_offset;
1647 
1648 		/* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */
1649 		if (count > 0 && (offsets[1] - offsets[0] >= 0) && limit) {
1650 			if (replace_count) {
1651 				++*replace_count;
1652 			}
1653 
1654 			/* Set the match location in subject */
1655 			match = subject + offsets[0];
1656 
1657 			new_len = result_len + offsets[0] - start_offset; /* part before the match */
1658 
1659 			/* Use custom function to get replacement string and its length. */
1660 			eval_result = preg_do_repl_func(fci, fcc, subject, offsets, subpat_names, count, mark);
1661 			ZEND_ASSERT(eval_result);
1662 			new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result), new_len);
1663 			if (new_len >= alloc_len) {
1664 				alloc_len = zend_safe_address_guarded(2, new_len, alloc_len);
1665 				if (result == NULL) {
1666 					result = zend_string_alloc(alloc_len, 0);
1667 				} else {
1668 					result = zend_string_extend(result, alloc_len, 0);
1669 				}
1670 			}
1671 
1672 			if (match-piece > 0) {
1673 				/* copy the part of the string before the match */
1674 				memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1675 				result_len += (int)(match-piece);
1676 			}
1677 
1678 			/* If using custom function, copy result to the buffer and clean up. */
1679 			memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1680 			result_len += (int)ZSTR_LEN(eval_result);
1681 			zend_string_release(eval_result);
1682 
1683 			if (limit) {
1684 				limit--;
1685 			}
1686 
1687 			/* Advance to the next piece. */
1688 			start_offset = offsets[1];
1689 
1690 			/* If we have matched an empty string, mimic what Perl's /g options does.
1691 			   This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
1692 			   the match again at the same point. If this fails (picked up above) we
1693 			   advance to the next character. */
1694 			g_notempty = (start_offset == offsets[0]) ? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
1695 
1696 #ifdef PCRE_EXTRA_MARK
1697 			/* replace function may use the same regex recursively */
1698 			extra->mark = &mark;
1699 			extra->flags |= PCRE_EXTRA_MARK;
1700 #endif
1701 		} else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1702 			/* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
1703 			   this is not necessarily the end. We need to advance
1704 			   the start offset, and continue. Fudge the offset values
1705 			   to achieve this, unless we're already at the end of the string. */
1706 			if (g_notempty != 0 && start_offset < subject_len) {
1707 				int unit_len = calculate_unit_length(pce, piece);
1708 
1709 				start_offset += unit_len;
1710 				memcpy(ZSTR_VAL(result) + result_len, piece, unit_len);
1711 				result_len += unit_len;
1712 				g_notempty = 0;
1713 			} else {
1714 				if (!result && subject_str) {
1715 					result = zend_string_copy(subject_str);
1716 					break;
1717 				}
1718 				new_len = result_len + subject_len - start_offset;
1719 				if (new_len >= alloc_len) {
1720 					alloc_len = new_len; /* now we know exactly how long it is */
1721 					if (NULL != result) {
1722 						result = zend_string_realloc(result, alloc_len, 0);
1723 					} else {
1724 						result = zend_string_alloc(alloc_len, 0);
1725 					}
1726 				}
1727 				/* stick that last bit of string on our output */
1728 				memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - start_offset);
1729 				result_len += subject_len - start_offset;
1730 				ZSTR_VAL(result)[result_len] = '\0';
1731 				ZSTR_LEN(result) = result_len;
1732 				break;
1733 			}
1734 		} else {
1735 			pcre_handle_exec_error(count);
1736 			if (result) {
1737 				zend_string_release(result);
1738 				result = NULL;
1739 			}
1740 			break;
1741 		}
1742 	}
1743 
1744 	if (size_offsets <= 32) {
1745 		free_alloca(offsets, use_heap);
1746 	} else {
1747 		efree(offsets);
1748 	}
1749 	if (UNEXPECTED(subpat_names)) {
1750 		efree(subpat_names);
1751 	}
1752 
1753 	return result;
1754 }
1755 /* }}} */
1756 
1757 /* {{{ php_pcre_replace_func
1758  */
php_pcre_replace_func(zend_string * regex,zend_string * subject_str,zend_fcall_info * fci,zend_fcall_info_cache * fcc,int limit,int * replace_count)1759 static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
1760 							  zend_string *subject_str,
1761 							  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
1762 							  int limit, int *replace_count)
1763 {
1764 	pcre_cache_entry	*pce;			    /* Compiled regular expression */
1765 	zend_string	 		*result;			/* Function result */
1766 
1767 	/* Compile regex or get it from cache. */
1768 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1769 		return NULL;
1770 	}
1771 	pce->refcount++;
1772 	result = php_pcre_replace_func_impl(pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
1773 		limit, replace_count);
1774 	pce->refcount--;
1775 
1776 	return result;
1777 }
1778 /* }}} */
1779 
1780 /* {{{ php_pcre_replace_array
1781  */
php_pcre_replace_array(HashTable * regex,zval * replace,zend_string * subject_str,int limit,int * replace_count)1782 static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend_string *subject_str, int limit, int *replace_count)
1783 {
1784 	zval		*regex_entry;
1785 	zend_string *result;
1786 	zend_string *replace_str;
1787 
1788 	if (Z_TYPE_P(replace) == IS_ARRAY) {
1789 		uint32_t replace_idx = 0;
1790 		HashTable *replace_ht = Z_ARRVAL_P(replace);
1791 
1792 		/* For each entry in the regex array, get the entry */
1793 		ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
1794 			/* Make sure we're dealing with strings. */
1795 			zend_string *regex_str = zval_get_string(regex_entry);
1796 			zval *zv;
1797 
1798 			/* Get current entry */
1799 			while (1) {
1800 				if (replace_idx == replace_ht->nNumUsed) {
1801 					replace_str = ZSTR_EMPTY_ALLOC();
1802 					break;
1803 				}
1804 				zv = &replace_ht->arData[replace_idx].val;
1805 				replace_idx++;
1806 				if (Z_TYPE_P(zv) != IS_UNDEF) {
1807 					replace_str = zval_get_string(zv);
1808 					break;
1809 				}
1810 			}
1811 
1812 			/* Do the actual replacement and put the result back into subject_str
1813 			   for further replacements. */
1814 			result = php_pcre_replace(regex_str,
1815 									  subject_str,
1816 									  ZSTR_VAL(subject_str),
1817 									  (int)ZSTR_LEN(subject_str),
1818 									  replace_str,
1819 									  limit,
1820 									  replace_count);
1821 			zend_string_release(replace_str);
1822 			zend_string_release(regex_str);
1823 			zend_string_release(subject_str);
1824 			subject_str = result;
1825 			if (UNEXPECTED(result == NULL)) {
1826 				break;
1827 			}
1828 		} ZEND_HASH_FOREACH_END();
1829 
1830 	} else {
1831 		replace_str = Z_STR_P(replace);
1832 
1833 		/* For each entry in the regex array, get the entry */
1834 		ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
1835 			/* Make sure we're dealing with strings. */
1836 			zend_string *regex_str = zval_get_string(regex_entry);
1837 
1838 			/* Do the actual replacement and put the result back into subject_str
1839 			   for further replacements. */
1840 			result = php_pcre_replace(regex_str,
1841 									  subject_str,
1842 									  ZSTR_VAL(subject_str),
1843 									  (int)ZSTR_LEN(subject_str),
1844 									  replace_str,
1845 									  limit,
1846 									  replace_count);
1847 			zend_string_release(regex_str);
1848 			zend_string_release(subject_str);
1849 			subject_str = result;
1850 
1851 			if (UNEXPECTED(result == NULL)) {
1852 				break;
1853 			}
1854 		} ZEND_HASH_FOREACH_END();
1855 	}
1856 
1857 	return subject_str;
1858 }
1859 /* }}} */
1860 
1861 /* {{{ php_replace_in_subject
1862  */
php_replace_in_subject(zval * regex,zval * replace,zval * subject,int limit,int * replace_count)1863 static zend_always_inline zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int *replace_count)
1864 {
1865 	zend_string *result;
1866 	zend_string *subject_str = zval_get_string(subject);
1867 
1868 	if (UNEXPECTED(ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject_str)))) {
1869 		zend_string_release(subject_str);
1870 		php_error_docref(NULL, E_WARNING, "Subject is too long");
1871 		result = NULL;
1872 	} else if (Z_TYPE_P(regex) != IS_ARRAY) {
1873 		result = php_pcre_replace(Z_STR_P(regex),
1874 								  subject_str,
1875 								  ZSTR_VAL(subject_str),
1876 								  (int)ZSTR_LEN(subject_str),
1877 								  Z_STR_P(replace),
1878 								  limit,
1879 								  replace_count);
1880 		zend_string_release(subject_str);
1881 	} else {
1882 		result = php_pcre_replace_array(Z_ARRVAL_P(regex),
1883 										replace,
1884 										subject_str,
1885 										limit,
1886 										replace_count);
1887 	}
1888 	return result;
1889 }
1890 /* }}} */
1891 
1892 /* {{{ php_replace_in_subject_func
1893  */
php_replace_in_subject_func(zval * regex,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zval * subject,int limit,int * replace_count)1894 static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, int limit, int *replace_count)
1895 {
1896 	zval		*regex_entry;
1897 	zend_string *result;
1898 	zend_string	*subject_str = zval_get_string(subject);
1899 
1900 	if (UNEXPECTED(ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject_str)))) {
1901 		php_error_docref(NULL, E_WARNING, "Subject is too long");
1902 		return NULL;
1903 	}
1904 
1905 	if (Z_TYPE_P(regex) != IS_ARRAY) {
1906 		result = php_pcre_replace_func(Z_STR_P(regex),
1907 								  subject_str,
1908 								  fci, fcc,
1909 								  limit,
1910 								  replace_count);
1911 		zend_string_release(subject_str);
1912 		return result;
1913 	} else {
1914 		/* If regex is an array */
1915 
1916 		/* For each entry in the regex array, get the entry */
1917 		ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
1918 			/* Make sure we're dealing with strings. */
1919 			zend_string *regex_str = zval_get_string(regex_entry);
1920 
1921 			/* Do the actual replacement and put the result back into subject_str
1922 			   for further replacements. */
1923 			result = php_pcre_replace_func(regex_str,
1924 										   subject_str,
1925 										   fci, fcc,
1926 										   limit,
1927 										   replace_count);
1928 			zend_string_release(regex_str);
1929 			zend_string_release(subject_str);
1930 			subject_str = result;
1931 			if (UNEXPECTED(result == NULL)) {
1932 				break;
1933 			}
1934 		} ZEND_HASH_FOREACH_END();
1935 
1936 		return subject_str;
1937 	}
1938 }
1939 /* }}} */
1940 
1941 /* {{{ preg_replace_func_impl
1942  */
preg_replace_func_impl(zval * return_value,zval * regex,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zval * subject,zend_long limit_val)1943 static int preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, zend_long limit_val)
1944 {
1945 	zend_string	*result;
1946 	int			 replace_count = 0;
1947 
1948 	if (Z_TYPE_P(regex) != IS_ARRAY) {
1949 		convert_to_string_ex(regex);
1950 	}
1951 
1952 	if (Z_TYPE_P(subject) != IS_ARRAY) {
1953 		result = php_replace_in_subject_func(regex, fci, fcc, subject, limit_val, &replace_count);
1954 		if (result != NULL) {
1955 			RETVAL_STR(result);
1956 		} else {
1957 			RETVAL_NULL();
1958 		}
1959 	} else {
1960 		/* if subject is an array */
1961 		zval		*subject_entry, zv;
1962 		zend_string	*string_key;
1963 		zend_ulong	 num_key;
1964 
1965 		array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
1966 
1967 		/* For each subject entry, convert it to string, then perform replacement
1968 		   and add the result to the return_value array. */
1969 		ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
1970 			result = php_replace_in_subject_func(regex, fci, fcc, subject_entry, limit_val, &replace_count);
1971 			if (result != NULL) {
1972 				/* Add to return array */
1973 				ZVAL_STR(&zv, result);
1974 				if (string_key) {
1975 					zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
1976 				} else {
1977 					zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
1978 				}
1979 			}
1980 		} ZEND_HASH_FOREACH_END();
1981 	}
1982 
1983 	return replace_count;
1984 }
1985 /* }}} */
1986 
1987 /* {{{ preg_replace_common
1988  */
preg_replace_common(INTERNAL_FUNCTION_PARAMETERS,int is_filter)1989 static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, int is_filter)
1990 {
1991 	zval *regex, *replace, *subject, *zcount = NULL;
1992 	zend_long limit = -1;
1993 	int replace_count = 0;
1994 	zend_string	*result;
1995 	int old_replace_count;
1996 
1997 	/* Get function parameters and do error-checking. */
1998 	ZEND_PARSE_PARAMETERS_START(3, 5)
1999 		Z_PARAM_ZVAL(regex)
2000 		Z_PARAM_ZVAL(replace)
2001 		Z_PARAM_ZVAL(subject)
2002 		Z_PARAM_OPTIONAL
2003 		Z_PARAM_LONG(limit)
2004 		Z_PARAM_ZVAL_DEREF(zcount)
2005 	ZEND_PARSE_PARAMETERS_END();
2006 
2007 	if (Z_TYPE_P(replace) != IS_ARRAY) {
2008 		convert_to_string_ex(replace);
2009 		if (Z_TYPE_P(regex) != IS_ARRAY) {
2010 			convert_to_string_ex(regex);
2011 		}
2012 	} else {
2013 		if (Z_TYPE_P(regex) != IS_ARRAY) {
2014 			php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
2015 			RETURN_FALSE;
2016 		}
2017 	}
2018 
2019 	if (Z_TYPE_P(subject) != IS_ARRAY) {
2020 		old_replace_count = replace_count;
2021 		result = php_replace_in_subject(regex,
2022 										replace,
2023 										subject,
2024 										limit,
2025 										&replace_count);
2026 		if (result != NULL) {
2027 			if (!is_filter || replace_count > old_replace_count) {
2028 				RETVAL_STR(result);
2029 			} else {
2030 				zend_string_release(result);
2031 				RETVAL_NULL();
2032 			}
2033 		} else {
2034 			RETVAL_NULL();
2035 		}
2036 	} else {
2037 		/* if subject is an array */
2038 		zval		*subject_entry, zv;
2039 		zend_string	*string_key;
2040 		zend_ulong	 num_key;
2041 
2042 		array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
2043 
2044 		/* For each subject entry, convert it to string, then perform replacement
2045 		   and add the result to the return_value array. */
2046 		ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
2047 			old_replace_count = replace_count;
2048 			result = php_replace_in_subject(regex,
2049 											replace,
2050 											subject_entry,
2051 											limit,
2052 											&replace_count);
2053 			if (result != NULL) {
2054 				if (!is_filter || replace_count > old_replace_count) {
2055 					/* Add to return array */
2056 					ZVAL_STR(&zv, result);
2057 					if (string_key) {
2058 						zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2059 					} else {
2060 						zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2061 					}
2062 				} else {
2063 					zend_string_release(result);
2064 				}
2065 			}
2066 		} ZEND_HASH_FOREACH_END();
2067 	}
2068 
2069 	if (zcount) {
2070 		zval_ptr_dtor(zcount);
2071 		ZVAL_LONG(zcount, replace_count);
2072 	}
2073 }
2074 /* }}} */
2075 
2076 /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
2077    Perform Perl-style regular expression replacement. */
PHP_FUNCTION(preg_replace)2078 static PHP_FUNCTION(preg_replace)
2079 {
2080 	preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
2081 }
2082 /* }}} */
2083 
2084 /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
2085    Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback)2086 static PHP_FUNCTION(preg_replace_callback)
2087 {
2088 	zval *regex, *replace, *subject, *zcount = NULL;
2089 	zend_long limit = -1;
2090 	int replace_count;
2091 	zend_fcall_info fci;
2092 	zend_fcall_info_cache fcc;
2093 
2094 	/* Get function parameters and do error-checking. */
2095 	ZEND_PARSE_PARAMETERS_START(3, 5)
2096 		Z_PARAM_ZVAL(regex)
2097 		Z_PARAM_ZVAL(replace)
2098 		Z_PARAM_ZVAL(subject)
2099 		Z_PARAM_OPTIONAL
2100 		Z_PARAM_LONG(limit)
2101 		Z_PARAM_ZVAL_DEREF(zcount)
2102 	ZEND_PARSE_PARAMETERS_END();
2103 
2104 	if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2105 		zend_string	*callback_name = zend_get_callable_name(replace);
2106 		php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", ZSTR_VAL(callback_name));
2107 		zend_string_release(callback_name);
2108 		ZVAL_STR(return_value, zval_get_string(subject));
2109 		return;
2110 	}
2111 
2112 	fci.size = sizeof(fci);
2113 	fci.object = NULL;
2114 	ZVAL_COPY_VALUE(&fci.function_name, replace);
2115 
2116 	replace_count = preg_replace_func_impl(return_value, regex, &fci, &fcc, subject, limit);
2117 	if (zcount) {
2118 		zval_ptr_dtor(zcount);
2119 		ZVAL_LONG(zcount, replace_count);
2120 	}
2121 }
2122 /* }}} */
2123 
2124 /* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]])
2125    Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback_array)2126 static PHP_FUNCTION(preg_replace_callback_array)
2127 {
2128 	zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
2129 	zend_long limit = -1;
2130 	zend_string *str_idx;
2131 	int replace_count = 0;
2132 	zend_fcall_info fci;
2133 	zend_fcall_info_cache fcc;
2134 
2135 	/* Get function parameters and do error-checking. */
2136 	ZEND_PARSE_PARAMETERS_START(2, 4)
2137 		Z_PARAM_ARRAY(pattern)
2138 		Z_PARAM_ZVAL(subject)
2139 		Z_PARAM_OPTIONAL
2140 		Z_PARAM_LONG(limit)
2141 		Z_PARAM_ZVAL_DEREF(zcount)
2142 	ZEND_PARSE_PARAMETERS_END();
2143 
2144 	fci.size = sizeof(fci);
2145 	fci.object = NULL;
2146 
2147 	ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(pattern), str_idx, replace) {
2148 		if (str_idx) {
2149 			ZVAL_STR_COPY(&regex, str_idx);
2150 		} else {
2151 			php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
2152 			zval_ptr_dtor(return_value);
2153 			RETURN_NULL();
2154 		}
2155 
2156 		if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2157 			zend_string *callback_name = zend_get_callable_name(replace);
2158 			php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", ZSTR_VAL(callback_name));
2159 			zend_string_release(callback_name);
2160 			zval_ptr_dtor(&regex);
2161 			zval_ptr_dtor(return_value);
2162 			ZVAL_COPY(return_value, subject);
2163 			return;
2164 		}
2165 
2166 		ZVAL_COPY_VALUE(&fci.function_name, replace);
2167 
2168 		replace_count += preg_replace_func_impl(&zv, &regex, &fci, &fcc, subject, limit);
2169 		if (subject != return_value) {
2170 			subject = return_value;
2171 		} else {
2172 			zval_ptr_dtor(return_value);
2173 		}
2174 
2175 		zval_ptr_dtor(&regex);
2176 
2177 		ZVAL_COPY_VALUE(return_value, &zv);
2178 
2179 		if (UNEXPECTED(EG(exception))) {
2180 			zval_ptr_dtor(return_value);
2181 			RETURN_NULL();
2182 		}
2183 	} ZEND_HASH_FOREACH_END();
2184 
2185 	if (zcount) {
2186 		zval_ptr_dtor(zcount);
2187 		ZVAL_LONG(zcount, replace_count);
2188 	}
2189 }
2190 /* }}} */
2191 
2192 /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
2193    Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTION(preg_filter)2194 static PHP_FUNCTION(preg_filter)
2195 {
2196 	preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
2197 }
2198 /* }}} */
2199 
2200 /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
2201    Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)2202 static PHP_FUNCTION(preg_split)
2203 {
2204 	zend_string			*regex;			/* Regular expression */
2205 	zend_string			*subject;		/* String to match against */
2206 	zend_long			 limit_val = -1;/* Integer value of limit */
2207 	zend_long			 flags = 0;		/* Match control flags */
2208 	pcre_cache_entry	*pce;			/* Compiled regular expression */
2209 
2210 	/* Get function parameters and do error checking */
2211 	ZEND_PARSE_PARAMETERS_START(2, 4)
2212 		Z_PARAM_STR(regex)
2213 		Z_PARAM_STR(subject)
2214 		Z_PARAM_OPTIONAL
2215 		Z_PARAM_LONG(limit_val)
2216 		Z_PARAM_LONG(flags)
2217 	ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
2218 
2219 	if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
2220 			php_error_docref(NULL, E_WARNING, "Subject is too long");
2221 			RETURN_FALSE;
2222 	}
2223 
2224 	/* Compile regex or get it from cache. */
2225 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2226 		RETURN_FALSE;
2227 	}
2228 
2229 	pce->refcount++;
2230 	php_pcre_split_impl(pce, subject, return_value, (int)limit_val, flags);
2231 	pce->refcount--;
2232 }
2233 /* }}} */
2234 
2235 /* {{{ php_pcre_split
2236  */
php_pcre_split_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zend_long limit_val,zend_long flags)2237 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2238 	zend_long limit_val, zend_long flags)
2239 {
2240 	pcre_extra		*extra = pce->extra;/* Holds results of studying */
2241 	pcre_extra		 extra_data;		/* Used locally for exec options */
2242 	int				*offsets;			/* Array of subpattern offsets */
2243 	int				 size_offsets;		/* Size of the offsets array */
2244 	int				 no_utf_check = 0;	/* Execution options */
2245 	int				 count = 0;			/* Count of matched subpatterns */
2246 	int				 start_offset;		/* Where the new search starts */
2247 	int				 next_offset;		/* End of the last delimiter match + 1 */
2248 	int				 g_notempty = 0;	/* If the match should not be empty */
2249 	char			*last_match;		/* Location of last match */
2250 	int				 no_empty;			/* If NO_EMPTY flag is set */
2251 	int				 delim_capture; 	/* If delimiters should be captured */
2252 	int				 offset_capture;	/* If offsets should be captured */
2253 	zval			 tmp;
2254 	ALLOCA_FLAG(use_heap);
2255 
2256 	no_empty = flags & PREG_SPLIT_NO_EMPTY;
2257 	delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2258 	offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2259 
2260 	if (limit_val == 0) {
2261 		limit_val = -1;
2262 	}
2263 
2264 	if (extra == NULL) {
2265 		extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2266 		extra = &extra_data;
2267 	}
2268 	extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
2269 	extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
2270 #ifdef PCRE_EXTRA_MARK
2271 	extra->flags &= ~PCRE_EXTRA_MARK;
2272 #endif
2273 
2274 	/* Initialize return value */
2275 	array_init(return_value);
2276 
2277 	/* Calculate the size of the offsets array, and allocate memory for it. */
2278 	size_offsets = (pce->capture_count + 1) * 3;
2279 	if (size_offsets <= 32) {
2280 		offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
2281 	} else {
2282 		offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
2283 	}
2284 
2285 	/* Start at the beginning of the string */
2286 	start_offset = 0;
2287 	next_offset = 0;
2288 	last_match = ZSTR_VAL(subject_str);
2289 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2290 
2291 #ifdef HAVE_PCRE_JIT_SUPPORT
2292 	if (!(pce->compile_options & PCRE_UTF8)) {
2293 		no_utf_check = PCRE_NO_UTF8_CHECK;
2294 	}
2295 #endif
2296 
2297 	/* Get next piece if no limit or limit not yet reached and something matched*/
2298 	while ((limit_val == -1 || limit_val > 1)) {
2299 #ifdef HAVE_PCRE_JIT_SUPPORT
2300 		if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
2301 		 && no_utf_check && !g_notempty) {
2302 			count = pcre_jit_exec(pce->re, extra, ZSTR_VAL(subject_str),
2303 						  ZSTR_LEN(subject_str), start_offset,
2304 						  no_utf_check|g_notempty, offsets, size_offsets, jit_stack);
2305 		} else
2306 #endif
2307 		count = pcre_exec(pce->re, extra, ZSTR_VAL(subject_str),
2308 						  ZSTR_LEN(subject_str), start_offset,
2309 						  no_utf_check|g_notempty, offsets, size_offsets);
2310 
2311 		/* the string was already proved to be valid UTF-8 */
2312 		no_utf_check = PCRE_NO_UTF8_CHECK;
2313 
2314 		/* Check for too many substrings condition. */
2315 		if (count == 0) {
2316 			php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2317 			count = size_offsets/3;
2318 		}
2319 
2320 		/* If something matched */
2321 		if (count > 0 && (offsets[1] - offsets[0] >= 0)) {
2322 			if (!no_empty || &ZSTR_VAL(subject_str)[offsets[0]] != last_match) {
2323 
2324 				if (offset_capture) {
2325 					/* Add (match, offset) pair to the return value */
2326 					add_offset_pair(return_value, last_match, (int)(&ZSTR_VAL(subject_str)[offsets[0]]-last_match), next_offset, NULL, 0);
2327 				} else {
2328 					/* Add the piece to the return value */
2329 					ZVAL_STRINGL(&tmp, last_match, &ZSTR_VAL(subject_str)[offsets[0]]-last_match);
2330 					zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2331 				}
2332 
2333 				/* One less left to do */
2334 				if (limit_val != -1)
2335 					limit_val--;
2336 			}
2337 
2338 			last_match = &ZSTR_VAL(subject_str)[offsets[1]];
2339 			next_offset = offsets[1];
2340 
2341 			if (delim_capture) {
2342 				int i, match_len;
2343 				for (i = 1; i < count; i++) {
2344 					match_len = offsets[(i<<1)+1] - offsets[i<<1];
2345 					/* If we have matched a delimiter */
2346 					if (!no_empty || match_len > 0) {
2347 						if (offset_capture) {
2348 							add_offset_pair(return_value, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len, offsets[i<<1], NULL, 0);
2349 						} else {
2350 							ZVAL_STRINGL(&tmp, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len);
2351 							zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2352 						}
2353 					}
2354 				}
2355 			}
2356 
2357 			/* Advance to the position right after the last full match */
2358 			start_offset = offsets[1];
2359 
2360 			/* If we have matched an empty string, mimic what Perl's /g options does.
2361 			   This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
2362 			   the match again at the same point. If this fails (picked up above) we
2363 			   advance to the next character. */
2364 			g_notempty = (start_offset == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
2365 
2366 		} else if (count == PCRE_ERROR_NOMATCH) {
2367 			/* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
2368 			   this is not necessarily the end. We need to advance
2369 			   the start offset, and continue. Fudge the offset values
2370 			   to achieve this, unless we're already at the end of the string. */
2371 			if (g_notempty != 0 && start_offset < ZSTR_LEN(subject_str)) {
2372 				start_offset += calculate_unit_length(pce, ZSTR_VAL(subject_str) + start_offset);
2373 				g_notempty = 0;
2374 			} else {
2375 				break;
2376 			}
2377 		} else {
2378 			pcre_handle_exec_error(count);
2379 			break;
2380 		}
2381 	}
2382 
2383 
2384 	start_offset = (int)(last_match - ZSTR_VAL(subject_str)); /* the offset might have been incremented, but without further successful matches */
2385 
2386 	if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2387 		if (offset_capture) {
2388 			/* Add the last (match, offset) pair to the return value */
2389 			add_offset_pair(return_value, &ZSTR_VAL(subject_str)[start_offset], ZSTR_LEN(subject_str) - start_offset, start_offset, NULL, 0);
2390 		} else {
2391 			/* Add the last piece to the return value */
2392 			if (last_match == ZSTR_VAL(subject_str)) {
2393 				ZVAL_STR_COPY(&tmp, subject_str);
2394 			} else {
2395 				ZVAL_STRINGL(&tmp, last_match, ZSTR_VAL(subject_str) + ZSTR_LEN(subject_str) - last_match);
2396 			}
2397 			zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2398 		}
2399 	}
2400 
2401 
2402 	/* Clean up */
2403 	if (size_offsets <= 32) {
2404 		free_alloca(offsets, use_heap);
2405 	} else {
2406 		efree(offsets);
2407 	}
2408 }
2409 /* }}} */
2410 
2411 /* {{{ proto string preg_quote(string str [, string delim_char])
2412    Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)2413 static PHP_FUNCTION(preg_quote)
2414 {
2415 	zend_string *str;       		/* Input string argument */
2416 	zend_string	*delim = NULL;		/* Additional delimiter argument */
2417 	char		*in_str;			/* Input string */
2418 	char		*in_str_end;    	/* End of the input string */
2419 	zend_string	*out_str;			/* Output string with quoted characters */
2420 	size_t       extra_len;         /* Number of additional characters */
2421 	char 		*p,					/* Iterator for input string */
2422 				*q,					/* Iterator for output string */
2423 				 delim_char = '\0',	/* Delimiter character to be quoted */
2424 				 c;					/* Current character */
2425 
2426 	/* Get the arguments and check for errors */
2427 	ZEND_PARSE_PARAMETERS_START(1, 2)
2428 		Z_PARAM_STR(str)
2429 		Z_PARAM_OPTIONAL
2430 		Z_PARAM_STR_EX(delim, 1, 0)
2431 	ZEND_PARSE_PARAMETERS_END();
2432 
2433 	/* Nothing to do if we got an empty string */
2434 	if (ZSTR_LEN(str) == 0) {
2435 		RETURN_EMPTY_STRING();
2436 	}
2437 
2438 	in_str = ZSTR_VAL(str);
2439 	in_str_end = in_str + ZSTR_LEN(str);
2440 
2441 	if (delim) {
2442 		delim_char = ZSTR_VAL(delim)[0];
2443 	}
2444 
2445 	/* Go through the string and quote necessary characters */
2446 	extra_len = 0;
2447 	p = in_str;
2448 	do {
2449 		c = *p;
2450 		switch(c) {
2451 			case '.':
2452 			case '\\':
2453 			case '+':
2454 			case '*':
2455 			case '?':
2456 			case '[':
2457 			case '^':
2458 			case ']':
2459 			case '$':
2460 			case '(':
2461 			case ')':
2462 			case '{':
2463 			case '}':
2464 			case '=':
2465 			case '!':
2466 			case '>':
2467 			case '<':
2468 			case '|':
2469 			case ':':
2470 			case '-':
2471 				extra_len++;
2472 				break;
2473 
2474 			case '\0':
2475 				extra_len+=3;
2476 				break;
2477 
2478 			default:
2479 				if (c == delim_char) {
2480 					extra_len++;
2481 				}
2482 				break;
2483 		}
2484 		p++;
2485 	} while (p != in_str_end);
2486 
2487 	if (extra_len == 0) {
2488 		RETURN_STR_COPY(str);
2489 	}
2490 
2491 	/* Allocate enough memory so that even if each character
2492 	   is quoted, we won't run out of room */
2493 	out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2494 	q = ZSTR_VAL(out_str);
2495 	p = in_str;
2496 
2497 	do {
2498 		c = *p;
2499 		switch(c) {
2500 			case '.':
2501 			case '\\':
2502 			case '+':
2503 			case '*':
2504 			case '?':
2505 			case '[':
2506 			case '^':
2507 			case ']':
2508 			case '$':
2509 			case '(':
2510 			case ')':
2511 			case '{':
2512 			case '}':
2513 			case '=':
2514 			case '!':
2515 			case '>':
2516 			case '<':
2517 			case '|':
2518 			case ':':
2519 			case '-':
2520 				*q++ = '\\';
2521 				*q++ = c;
2522 				break;
2523 
2524 			case '\0':
2525 				*q++ = '\\';
2526 				*q++ = '0';
2527 				*q++ = '0';
2528 				*q++ = '0';
2529 				break;
2530 
2531 			default:
2532 				if (c == delim_char) {
2533 					*q++ = '\\';
2534 				}
2535 				*q++ = c;
2536 				break;
2537 		}
2538 		p++;
2539 	} while (p != in_str_end);
2540 	*q = '\0';
2541 
2542 	RETURN_NEW_STR(out_str);
2543 }
2544 /* }}} */
2545 
2546 /* {{{ proto array preg_grep(string regex, array input [, int flags])
2547    Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)2548 static PHP_FUNCTION(preg_grep)
2549 {
2550 	zend_string			*regex;			/* Regular expression */
2551 	zval				*input;			/* Input array */
2552 	zend_long			 flags = 0;		/* Match control flags */
2553 	pcre_cache_entry	*pce;			/* Compiled regular expression */
2554 
2555 	/* Get arguments and do error checking */
2556 	ZEND_PARSE_PARAMETERS_START(2, 3)
2557 		Z_PARAM_STR(regex)
2558 		Z_PARAM_ARRAY(input)
2559 		Z_PARAM_OPTIONAL
2560 		Z_PARAM_LONG(flags)
2561 	ZEND_PARSE_PARAMETERS_END();
2562 
2563 	/* Compile regex or get it from cache. */
2564 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2565 		RETURN_FALSE;
2566 	}
2567 
2568 	pce->refcount++;
2569 	php_pcre_grep_impl(pce, input, return_value, flags);
2570 	pce->refcount--;
2571 }
2572 /* }}} */
2573 
php_pcre_grep_impl(pcre_cache_entry * pce,zval * input,zval * return_value,zend_long flags)2574 PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2575 {
2576 	zval		    *entry;				/* An entry in the input array */
2577 	pcre_extra		*extra = pce->extra;/* Holds results of studying */
2578 	pcre_extra		 extra_data;		/* Used locally for exec options */
2579 	int				*offsets;			/* Array of subpattern offsets */
2580 	int				 size_offsets;		/* Size of the offsets array */
2581 	int				 count = 0;			/* Count of matched subpatterns */
2582 	int				 no_utf_check = 0;		/* Execution options */
2583 	zend_string		*string_key;
2584 	zend_ulong		 num_key;
2585 	zend_bool		 invert;			/* Whether to return non-matching
2586 										   entries */
2587 	ALLOCA_FLAG(use_heap);
2588 
2589 	invert = flags & PREG_GREP_INVERT ? 1 : 0;
2590 
2591 	if (extra == NULL) {
2592 		extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2593 		extra = &extra_data;
2594 	}
2595 	extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
2596 	extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
2597 #ifdef PCRE_EXTRA_MARK
2598 	extra->flags &= ~PCRE_EXTRA_MARK;
2599 #endif
2600 
2601 	/* Calculate the size of the offsets array, and allocate memory for it. */
2602 	size_offsets = (pce->capture_count + 1) * 3;
2603 	if (size_offsets <= 32) {
2604 		offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
2605 	} else {
2606 		offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
2607 	}
2608 
2609 	/* Initialize return array */
2610 	array_init(return_value);
2611 
2612 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2613 
2614 #ifdef HAVE_PCRE_JIT_SUPPORT
2615 	no_utf_check = (pce->compile_options & PCRE_UTF8) ? 0 : PCRE_NO_UTF8_CHECK;
2616 #endif
2617 
2618 	/* Go through the input array */
2619 	ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2620 		zend_string *subject_str = zval_get_string(entry);
2621 
2622 		/* Perform the match */
2623 #ifdef HAVE_PCRE_JIT_SUPPORT
2624 		if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
2625 		 && no_utf_check) {
2626 			count = pcre_jit_exec(pce->re, extra, ZSTR_VAL(subject_str),
2627 						  (int)ZSTR_LEN(subject_str), 0,
2628 						  no_utf_check, offsets, size_offsets, jit_stack);
2629 		} else
2630 #endif
2631 		count = pcre_exec(pce->re, extra, ZSTR_VAL(subject_str),
2632 						  (int)ZSTR_LEN(subject_str), 0,
2633 						  no_utf_check, offsets, size_offsets);
2634 
2635 		/* Check for too many substrings condition. */
2636 		if (count == 0) {
2637 			php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2638 			count = size_offsets/3;
2639 		} else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
2640 			pcre_handle_exec_error(count);
2641 			zend_string_release(subject_str);
2642 			break;
2643 		}
2644 
2645 		/* If the entry fits our requirements */
2646 		if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
2647 			if (Z_REFCOUNTED_P(entry)) {
2648 			   	Z_ADDREF_P(entry);
2649 			}
2650 
2651 			/* Add to return array */
2652 			if (string_key) {
2653 				zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2654 			} else {
2655 				zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2656 			}
2657 		}
2658 
2659 		zend_string_release(subject_str);
2660 	} ZEND_HASH_FOREACH_END();
2661 
2662 	/* Clean up */
2663 	if (size_offsets <= 32) {
2664 		free_alloca(offsets, use_heap);
2665 	} else {
2666 		efree(offsets);
2667 	}
2668 }
2669 /* }}} */
2670 
2671 /* {{{ proto int preg_last_error()
2672    Returns the error code of the last regexp execution. */
PHP_FUNCTION(preg_last_error)2673 static PHP_FUNCTION(preg_last_error)
2674 {
2675 	ZEND_PARSE_PARAMETERS_START(0, 0)
2676 	ZEND_PARSE_PARAMETERS_END();
2677 
2678 	RETURN_LONG(PCRE_G(error_code));
2679 }
2680 /* }}} */
2681 
2682 /* {{{ module definition structures */
2683 
2684 /* {{{ arginfo */
2685 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
2686     ZEND_ARG_INFO(0, pattern)
2687     ZEND_ARG_INFO(0, subject)
2688     ZEND_ARG_INFO(1, subpatterns) /* array */
2689     ZEND_ARG_INFO(0, flags)
2690     ZEND_ARG_INFO(0, offset)
2691 ZEND_END_ARG_INFO()
2692 
2693 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
2694     ZEND_ARG_INFO(0, pattern)
2695     ZEND_ARG_INFO(0, subject)
2696     ZEND_ARG_INFO(1, subpatterns) /* array */
2697     ZEND_ARG_INFO(0, flags)
2698     ZEND_ARG_INFO(0, offset)
2699 ZEND_END_ARG_INFO()
2700 
2701 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
2702     ZEND_ARG_INFO(0, regex)
2703     ZEND_ARG_INFO(0, replace)
2704     ZEND_ARG_INFO(0, subject)
2705     ZEND_ARG_INFO(0, limit)
2706     ZEND_ARG_INFO(1, count)
2707 ZEND_END_ARG_INFO()
2708 
2709 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
2710     ZEND_ARG_INFO(0, regex)
2711     ZEND_ARG_INFO(0, callback)
2712     ZEND_ARG_INFO(0, subject)
2713     ZEND_ARG_INFO(0, limit)
2714     ZEND_ARG_INFO(1, count)
2715 ZEND_END_ARG_INFO()
2716 
2717 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
2718     ZEND_ARG_INFO(0, pattern)
2719     ZEND_ARG_INFO(0, subject)
2720     ZEND_ARG_INFO(0, limit)
2721     ZEND_ARG_INFO(1, count)
2722 ZEND_END_ARG_INFO()
2723 
2724 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
2725     ZEND_ARG_INFO(0, pattern)
2726     ZEND_ARG_INFO(0, subject)
2727     ZEND_ARG_INFO(0, limit)
2728     ZEND_ARG_INFO(0, flags)
2729 ZEND_END_ARG_INFO()
2730 
2731 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
2732     ZEND_ARG_INFO(0, str)
2733     ZEND_ARG_INFO(0, delim_char)
2734 ZEND_END_ARG_INFO()
2735 
2736 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
2737     ZEND_ARG_INFO(0, regex)
2738     ZEND_ARG_INFO(0, input) /* array */
2739     ZEND_ARG_INFO(0, flags)
2740 ZEND_END_ARG_INFO()
2741 
2742 ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
2743 ZEND_END_ARG_INFO()
2744 /* }}} */
2745 
2746 static const zend_function_entry pcre_functions[] = {
2747 	PHP_FE(preg_match,					arginfo_preg_match)
2748 	PHP_FE(preg_match_all,				arginfo_preg_match_all)
2749 	PHP_FE(preg_replace,				arginfo_preg_replace)
2750 	PHP_FE(preg_replace_callback,		arginfo_preg_replace_callback)
2751 	PHP_FE(preg_replace_callback_array,	arginfo_preg_replace_callback_array)
2752 	PHP_FE(preg_filter,					arginfo_preg_replace)
2753 	PHP_FE(preg_split,					arginfo_preg_split)
2754 	PHP_FE(preg_quote,					arginfo_preg_quote)
2755 	PHP_FE(preg_grep,					arginfo_preg_grep)
2756 	PHP_FE(preg_last_error,				arginfo_preg_last_error)
2757 	PHP_FE_END
2758 };
2759 
2760 zend_module_entry pcre_module_entry = {
2761 	STANDARD_MODULE_HEADER,
2762    "pcre",
2763 	pcre_functions,
2764 	PHP_MINIT(pcre),
2765 	PHP_MSHUTDOWN(pcre),
2766 #ifdef HAVE_PCRE_JIT_SUPPORT
2767 	PHP_RINIT(pcre),
2768 #else
2769 	NULL,
2770 #endif
2771 	NULL,
2772 	PHP_MINFO(pcre),
2773 	PHP_PCRE_VERSION,
2774 	PHP_MODULE_GLOBALS(pcre),
2775 	PHP_GINIT(pcre),
2776 	PHP_GSHUTDOWN(pcre),
2777 	NULL,
2778 	STANDARD_MODULE_PROPERTIES_EX
2779 };
2780 
2781 #ifdef COMPILE_DL_PCRE
2782 ZEND_GET_MODULE(pcre)
2783 #endif
2784 
2785 /* }}} */
2786 
2787 #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
2788 
2789 /*
2790  * Local variables:
2791  * tab-width: 4
2792  * c-basic-offset: 4
2793  * End:
2794  * vim600: sw=4 ts=4 fdm=marker
2795  * vim<600: sw=4 ts=4
2796  */
2797