xref: /PHP-7.3/ext/pcre/php_pcre.c (revision 13bfa9f5)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1997-2018 The PHP Group                                |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Andrei Zmievski <andrei@php.net>                             |
16    +----------------------------------------------------------------------+
17  */
18 
19 #include "php.h"
20 #include "php_ini.h"
21 #include "php_globals.h"
22 #include "php_pcre.h"
23 #include "ext/standard/info.h"
24 #include "ext/standard/basic_functions.h"
25 #include "zend_smart_str.h"
26 
27 #if HAVE_PCRE || HAVE_BUNDLED_PCRE
28 
29 #include "ext/standard/php_string.h"
30 
31 #define PREG_PATTERN_ORDER			1
32 #define PREG_SET_ORDER				2
33 #define PREG_OFFSET_CAPTURE			(1<<8)
34 #define PREG_UNMATCHED_AS_NULL		(1<<9)
35 
36 #define	PREG_SPLIT_NO_EMPTY			(1<<0)
37 #define PREG_SPLIT_DELIM_CAPTURE	(1<<1)
38 #define PREG_SPLIT_OFFSET_CAPTURE	(1<<2)
39 
40 #define PREG_REPLACE_EVAL			(1<<0)
41 
42 #define PREG_GREP_INVERT			(1<<0)
43 
44 #define PREG_JIT                    (1<<3)
45 
46 #define PCRE_CACHE_SIZE 4096
47 
48 struct _pcre_cache_entry {
49 	pcre2_code *re;
50 	uint32_t preg_options;
51 	uint32_t capture_count;
52 	uint32_t name_count;
53 	uint32_t compile_options;
54 	uint32_t extra_compile_options;
55 	uint32_t refcount;
56 };
57 
58 enum {
59 	PHP_PCRE_NO_ERROR = 0,
60 	PHP_PCRE_INTERNAL_ERROR,
61 	PHP_PCRE_BACKTRACK_LIMIT_ERROR,
62 	PHP_PCRE_RECURSION_LIMIT_ERROR,
63 	PHP_PCRE_BAD_UTF8_ERROR,
64 	PHP_PCRE_BAD_UTF8_OFFSET_ERROR,
65 	PHP_PCRE_JIT_STACKLIMIT_ERROR
66 };
67 
68 
69 PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
70 
71 #ifdef HAVE_PCRE_JIT_SUPPORT
72 #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
73 #define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
74 ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
75 #endif
76 ZEND_TLS pcre2_general_context *gctx = NULL;
77 /* These two are global per thread for now. Though it is possible to use these
78  	per pattern. Either one can copy it and use in pce, or one does no global
79 	contexts at all, but creates for every pce. */
80 ZEND_TLS pcre2_compile_context *cctx = NULL;
81 ZEND_TLS pcre2_match_context   *mctx = NULL;
82 ZEND_TLS pcre2_match_data      *mdata = NULL;
83 ZEND_TLS zend_bool              mdata_used = 0;
84 ZEND_TLS uint8_t pcre2_init_ok = 0;
85 #if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
86 static MUTEX_T pcre_mt = NULL;
87 #define php_pcre_mutex_alloc() \
88 	if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
89 #define php_pcre_mutex_free() \
90 	if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
91 #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
92 #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
93 #else
94 #define php_pcre_mutex_alloc()
95 #define php_pcre_mutex_free()
96 #define php_pcre_mutex_lock()
97 #define php_pcre_mutex_unlock()
98 #endif
99 
100 #if HAVE_SETLOCALE
101 ZEND_TLS HashTable char_tables;
102 
php_pcre_free_char_table(zval * data)103 static void php_pcre_free_char_table(zval *data)
104 {/*{{{*/
105 	void *ptr = Z_PTR_P(data);
106 	pefree(ptr, 1);
107 }/*}}}*/
108 #endif
109 
pcre_handle_exec_error(int pcre_code)110 static void pcre_handle_exec_error(int pcre_code) /* {{{ */
111 {
112 	int preg_code = 0;
113 
114 	switch (pcre_code) {
115 		case PCRE2_ERROR_MATCHLIMIT:
116 			preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
117 			break;
118 
119 		case PCRE2_ERROR_RECURSIONLIMIT:
120 			preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
121 			break;
122 
123 		case PCRE2_ERROR_BADUTFOFFSET:
124 			preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
125 			break;
126 
127 #ifdef HAVE_PCRE_JIT_SUPPORT
128 		case PCRE2_ERROR_JIT_STACKLIMIT:
129 			preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
130 			break;
131 #endif
132 
133 		default:
134 			if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
135 				preg_code = PHP_PCRE_BAD_UTF8_ERROR;
136 			} else  {
137 				preg_code = PHP_PCRE_INTERNAL_ERROR;
138 			}
139 			break;
140 	}
141 
142 	PCRE_G(error_code) = preg_code;
143 }
144 /* }}} */
145 
php_free_pcre_cache(zval * data)146 static void php_free_pcre_cache(zval *data) /* {{{ */
147 {
148 	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
149 	if (!pce) return;
150 	pcre2_code_free(pce->re);
151 	pefree(pce, 1);
152 }
153 /* }}} */
154 
php_pcre_malloc(PCRE2_SIZE size,void * data)155 static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
156 {/*{{{*/
157 	void *p = pemalloc(size, 1);
158 	return p;
159 }/*}}}*/
160 
php_pcre_free(void * block,void * data)161 static void php_pcre_free(void *block, void *data)
162 {/*{{{*/
163 	pefree(block, 1);
164 }/*}}}*/
165 
166 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
167 #define PHP_PCRE_PREALLOC_MDATA_SIZE 32
168 
php_pcre_init_pcre2(uint8_t jit)169 static void php_pcre_init_pcre2(uint8_t jit)
170 {/*{{{*/
171 	if (!gctx) {
172 		gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
173 		if (!gctx) {
174 			pcre2_init_ok = 0;
175 			return;
176 		}
177 	}
178 
179 	if (!cctx) {
180 		cctx = pcre2_compile_context_create(gctx);
181 		if (!cctx) {
182 			pcre2_init_ok = 0;
183 			return;
184 		}
185 	}
186 
187 	/* XXX The 'X' modifier is the default behavior in PCRE2. This option is
188 		called dangerous in the manual, as typos in patterns can cause
189 		unexpected results. We might want to to switch to the default PCRE2
190 		behavior, too, thus causing a certain BC break. */
191 	pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
192 
193 	if (!mctx) {
194 		mctx = pcre2_match_context_create(gctx);
195 		if (!mctx) {
196 			pcre2_init_ok = 0;
197 			return;
198 		}
199 	}
200 
201 #ifdef HAVE_PCRE_JIT_SUPPORT
202 	if (jit && !jit_stack) {
203 		jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
204 		if (!jit_stack) {
205 			pcre2_init_ok = 0;
206 			return;
207 		}
208 	}
209 #endif
210 
211 	if (!mdata) {
212 		mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
213 		if (!mdata) {
214 			pcre2_init_ok = 0;
215 			return;
216 		}
217 	}
218 
219 	pcre2_init_ok = 1;
220 }/*}}}*/
221 
php_pcre_shutdown_pcre2(void)222 static void php_pcre_shutdown_pcre2(void)
223 {/*{{{*/
224 	if (gctx) {
225 		pcre2_general_context_free(gctx);
226 		gctx = NULL;
227 	}
228 
229 	if (cctx) {
230 		pcre2_compile_context_free(cctx);
231 		cctx = NULL;
232 	}
233 
234 	if (mctx) {
235 		pcre2_match_context_free(mctx);
236 		mctx = NULL;
237 	}
238 
239 #ifdef HAVE_PCRE_JIT_SUPPORT
240 	/* Stack may only be destroyed when no cached patterns
241 	 	possibly associated with it do exist. */
242 	if (jit_stack) {
243 		pcre2_jit_stack_free(jit_stack);
244 		jit_stack = NULL;
245 	}
246 #endif
247 
248 	if (mdata) {
249 		pcre2_match_data_free(mdata);
250 		mdata = NULL;
251 	}
252 
253 	pcre2_init_ok = 0;
254 }/*}}}*/
255 
PHP_GINIT_FUNCTION(pcre)256 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
257 {
258 	php_pcre_mutex_alloc();
259 
260 	zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
261 	pcre_globals->backtrack_limit = 0;
262 	pcre_globals->recursion_limit = 0;
263 	pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
264 #ifdef HAVE_PCRE_JIT_SUPPORT
265 	pcre_globals->jit = 1;
266 #endif
267 
268 	php_pcre_init_pcre2(1);
269 #if HAVE_SETLOCALE
270 	zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
271 #endif
272 }
273 /* }}} */
274 
PHP_GSHUTDOWN_FUNCTION(pcre)275 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
276 {
277 	zend_hash_destroy(&pcre_globals->pcre_cache);
278 
279 	php_pcre_shutdown_pcre2();
280 #if HAVE_SETLOCALE
281 	zend_hash_destroy(&char_tables);
282 #endif
283 
284 	php_pcre_mutex_free();
285 }
286 /* }}} */
287 
PHP_INI_MH(OnUpdateBacktrackLimit)288 static PHP_INI_MH(OnUpdateBacktrackLimit)
289 {/*{{{*/
290 	OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
291 	if (mctx) {
292 		pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
293 	}
294 
295 	return SUCCESS;
296 }/*}}}*/
297 
PHP_INI_MH(OnUpdateRecursionLimit)298 static PHP_INI_MH(OnUpdateRecursionLimit)
299 {/*{{{*/
300 	OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
301 	if (mctx) {
302 		pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
303 	}
304 
305 	return SUCCESS;
306 }/*}}}*/
307 
308 #ifdef HAVE_PCRE_JIT_SUPPORT
PHP_INI_MH(OnUpdateJit)309 static PHP_INI_MH(OnUpdateJit)
310 {/*{{{*/
311 	OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
312 	if (PCRE_G(jit) && jit_stack) {
313 		pcre2_jit_stack_assign(mctx, NULL, jit_stack);
314 	} else {
315 		pcre2_jit_stack_assign(mctx, NULL, NULL);
316 	}
317 
318 	return SUCCESS;
319 }/*}}}*/
320 #endif
321 
322 PHP_INI_BEGIN()
323 	STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
324 	STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
325 #ifdef HAVE_PCRE_JIT_SUPPORT
326 	STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateJit, jit,             zend_pcre_globals, pcre_globals)
327 #endif
PHP_INI_END()328 PHP_INI_END()
329 
330 static char *_pcre2_config_str(uint32_t what)
331 {/*{{{*/
332 	int len = pcre2_config(what, NULL);
333 	char *ret = (char *) malloc(len + 1);
334 
335 	len = pcre2_config(what, ret);
336 	if (!len) {
337 		free(ret);
338 		return NULL;
339 	}
340 
341 	return ret;
342 }/*}}}*/
343 
344 /* {{{ PHP_MINFO_FUNCTION(pcre) */
PHP_MINFO_FUNCTION(pcre)345 static PHP_MINFO_FUNCTION(pcre)
346 {
347 #ifdef HAVE_PCRE_JIT_SUPPORT
348 	uint32_t flag = 0;
349 	char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
350 #endif
351 	char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
352 	char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
353 
354 	php_info_print_table_start();
355 	php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
356 	php_info_print_table_row(2, "PCRE Library Version", version);
357 	free(version);
358 	php_info_print_table_row(2, "PCRE Unicode Version", unicode);
359 	free(unicode);
360 
361 #ifdef HAVE_PCRE_JIT_SUPPORT
362 	if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
363 		php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
364 	} else {
365 		php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
366 	}
367 	if (jit_target) {
368 		php_info_print_table_row(2, "PCRE JIT Target", jit_target);
369 	}
370 	free(jit_target);
371 #else
372 	php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
373 #endif
374 
375 #ifdef HAVE_PCRE_VALGRIND_SUPPORT
376 	php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
377 #endif
378 
379 	php_info_print_table_end();
380 
381 	DISPLAY_INI_ENTRIES();
382 }
383 /* }}} */
384 
385 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTION(pcre)386 static PHP_MINIT_FUNCTION(pcre)
387 {
388 	char *version;
389 
390 #ifdef HAVE_PCRE_JIT_SUPPORT
391 	if (UNEXPECTED(!pcre2_init_ok)) {
392 		/* Retry. */
393 		php_pcre_init_pcre2(PCRE_G(jit));
394 		if (!pcre2_init_ok) {
395 			return FAILURE;
396 		}
397 	}
398 #endif
399 
400 	REGISTER_INI_ENTRIES();
401 
402 	REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
403 	REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
404 	REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
405 	REGISTER_LONG_CONSTANT("PREG_UNMATCHED_AS_NULL", PREG_UNMATCHED_AS_NULL, CONST_CS | CONST_PERSISTENT);
406 	REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
407 	REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
408 	REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
409 	REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
410 
411 	REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
412 	REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
413 	REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
414 	REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
415 	REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
416 	REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
417 	REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
418 	version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
419 	REGISTER_STRING_CONSTANT("PCRE_VERSION", version, CONST_CS | CONST_PERSISTENT);
420 	free(version);
421 	REGISTER_LONG_CONSTANT("PCRE_VERSION_MAJOR", PCRE2_MAJOR, CONST_CS | CONST_PERSISTENT);
422 	REGISTER_LONG_CONSTANT("PCRE_VERSION_MINOR", PCRE2_MINOR, CONST_CS | CONST_PERSISTENT);
423 
424 #ifdef HAVE_PCRE_JIT_SUPPORT
425 	REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 1, CONST_CS | CONST_PERSISTENT);
426 #else
427 	REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 0, CONST_CS | CONST_PERSISTENT);
428 #endif
429 
430 	return SUCCESS;
431 }
432 /* }}} */
433 
434 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTION(pcre)435 static PHP_MSHUTDOWN_FUNCTION(pcre)
436 {
437 	UNREGISTER_INI_ENTRIES();
438 
439 	return SUCCESS;
440 }
441 /* }}} */
442 
443 #ifdef HAVE_PCRE_JIT_SUPPORT
444 /* {{{ PHP_RINIT_FUNCTION(pcre) */
PHP_RINIT_FUNCTION(pcre)445 static PHP_RINIT_FUNCTION(pcre)
446 {
447 	if (UNEXPECTED(!pcre2_init_ok)) {
448 		/* Retry. */
449 		php_pcre_mutex_lock();
450 		php_pcre_init_pcre2(PCRE_G(jit));
451 		if (!pcre2_init_ok) {
452 			php_pcre_mutex_unlock();
453 			return FAILURE;
454 		}
455 		php_pcre_mutex_unlock();
456 	}
457 
458 	mdata_used = 0;
459 
460 	return SUCCESS;
461 }
462 /* }}} */
463 #endif
464 
465 /* {{{ static pcre_clean_cache */
pcre_clean_cache(zval * data,void * arg)466 static int pcre_clean_cache(zval *data, void *arg)
467 {
468 	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
469 	int *num_clean = (int *)arg;
470 
471 	if (*num_clean > 0 && !pce->refcount) {
472 		(*num_clean)--;
473 		return ZEND_HASH_APPLY_REMOVE;
474 	} else {
475 		return ZEND_HASH_APPLY_KEEP;
476 	}
477 }
478 /* }}} */
479 
480 /* {{{ static make_subpats_table */
make_subpats_table(uint32_t num_subpats,pcre_cache_entry * pce)481 static char **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
482 {
483 	uint32_t name_cnt = pce->name_count, name_size, ni = 0;
484 	char *name_table;
485 	unsigned short name_idx;
486 	char **subpat_names;
487 	int rc1, rc2;
488 
489 	rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
490 	rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
491 	if (rc1 < 0 || rc2 < 0) {
492 		php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc1 < 0 ? rc1 : rc2);
493 		return NULL;
494 	}
495 
496 	subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
497 	while (ni++ < name_cnt) {
498 		name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
499 		subpat_names[name_idx] = name_table + 2;
500 		if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
501 			php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
502 			efree(subpat_names);
503 			return NULL;
504 		}
505 		name_table += name_size;
506 	}
507 	return subpat_names;
508 }
509 /* }}} */
510 
511 /* {{{ static calculate_unit_length */
512 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
calculate_unit_length(pcre_cache_entry * pce,char * start)513 static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, char *start)
514 {
515 	size_t unit_len;
516 
517 	if (pce->compile_options & PCRE2_UTF) {
518 		char *end = start;
519 
520 		/* skip continuation bytes */
521 		while ((*++end & 0xC0) == 0x80);
522 		unit_len = end - start;
523 	} else {
524 		unit_len = 1;
525 	}
526 	return unit_len;
527 }
528 /* }}} */
529 
530 /* {{{ pcre_get_compiled_regex_cache
531  */
pcre_get_compiled_regex_cache_ex(zend_string * regex,int locale_aware)532 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware)
533 {
534 	pcre2_code			*re = NULL;
535 	uint32_t			 coptions = 0;
536 	uint32_t			 extra_coptions = PHP_PCRE_DEFAULT_EXTRA_COPTIONS;
537 	PCRE2_UCHAR	         error[128];
538 	PCRE2_SIZE           erroffset;
539 	int                  errnumber;
540 	char				 delimiter;
541 	char				 start_delimiter;
542 	char				 end_delimiter;
543 	char				*p, *pp;
544 	char				*pattern;
545 	size_t				 pattern_len;
546 	uint32_t			 poptions = 0;
547 #if HAVE_SETLOCALE
548 	const uint8_t       *tables = NULL;
549 #endif
550 	zval                *zv;
551 	pcre_cache_entry	 new_entry;
552 	int					 rc;
553 	zend_string 		*key;
554 	pcre_cache_entry *ret;
555 
556 #if HAVE_SETLOCALE
557 	if (locale_aware && BG(locale_string) &&
558 		(ZSTR_LEN(BG(locale_string)) != 1 && ZSTR_VAL(BG(locale_string))[0] != 'C')) {
559 		key = zend_string_alloc(ZSTR_LEN(regex) + ZSTR_LEN(BG(locale_string)) + 1, 0);
560 		memcpy(ZSTR_VAL(key), ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)) + 1);
561 		memcpy(ZSTR_VAL(key) + ZSTR_LEN(BG(locale_string)), ZSTR_VAL(regex), ZSTR_LEN(regex) + 1);
562 	} else
563 #endif
564 	{
565 		key = regex;
566 	}
567 
568 	/* Try to lookup the cached regex entry, and if successful, just pass
569 	   back the compiled pattern, otherwise go on and compile it. */
570 	zv = zend_hash_find(&PCRE_G(pcre_cache), key);
571 	if (zv) {
572 #if HAVE_SETLOCALE
573 		if (key != regex) {
574 			zend_string_release_ex(key, 0);
575 		}
576 #endif
577 		return (pcre_cache_entry*)Z_PTR_P(zv);
578 	}
579 
580 	p = ZSTR_VAL(regex);
581 
582 	/* Parse through the leading whitespace, and display a warning if we
583 	   get to the end without encountering a delimiter. */
584 	while (isspace((int)*(unsigned char *)p)) p++;
585 	if (*p == 0) {
586 #if HAVE_SETLOCALE
587 		if (key != regex) {
588 			zend_string_release_ex(key, 0);
589 		}
590 #endif
591 		php_error_docref(NULL, E_WARNING,
592 						 p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
593 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
594 		return NULL;
595 	}
596 
597 	/* Get the delimiter and display a warning if it is alphanumeric
598 	   or a backslash. */
599 	delimiter = *p++;
600 	if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
601 #if HAVE_SETLOCALE
602 		if (key != regex) {
603 			zend_string_release_ex(key, 0);
604 		}
605 #endif
606 		php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
607 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
608 		return NULL;
609 	}
610 
611 	start_delimiter = delimiter;
612 	if ((pp = strchr("([{< )]}> )]}>", delimiter)))
613 		delimiter = pp[5];
614 	end_delimiter = delimiter;
615 
616 	pp = p;
617 
618 	if (start_delimiter == end_delimiter) {
619 		/* We need to iterate through the pattern, searching for the ending delimiter,
620 		   but skipping the backslashed delimiters.  If the ending delimiter is not
621 		   found, display a warning. */
622 		while (*pp != 0) {
623 			if (*pp == '\\' && pp[1] != 0) pp++;
624 			else if (*pp == delimiter)
625 				break;
626 			pp++;
627 		}
628 	} else {
629 		/* We iterate through the pattern, searching for the matching ending
630 		 * delimiter. For each matching starting delimiter, we increment nesting
631 		 * level, and decrement it for each matching ending delimiter. If we
632 		 * reach the end of the pattern without matching, display a warning.
633 		 */
634 		int brackets = 1; 	/* brackets nesting level */
635 		while (*pp != 0) {
636 			if (*pp == '\\' && pp[1] != 0) pp++;
637 			else if (*pp == end_delimiter && --brackets <= 0)
638 				break;
639 			else if (*pp == start_delimiter)
640 				brackets++;
641 			pp++;
642 		}
643 	}
644 
645 	if (*pp == 0) {
646 #if HAVE_SETLOCALE
647 		if (key != regex) {
648 			zend_string_release_ex(key, 0);
649 		}
650 #endif
651 		if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
652 			php_error_docref(NULL,E_WARNING, "Null byte in regex");
653 		} else if (start_delimiter == end_delimiter) {
654 			php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
655 		} else {
656 			php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
657 		}
658 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
659 		return NULL;
660 	}
661 
662 	/* Make a copy of the actual pattern. */
663 	pattern_len = pp - p;
664 	pattern = estrndup(p, pattern_len);
665 
666 	/* Move on to the options */
667 	pp++;
668 
669 	/* Parse through the options, setting appropriate flags.  Display
670 	   a warning if we encounter an unknown modifier. */
671 	while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
672 		switch (*pp++) {
673 			/* Perl compatible options */
674 			case 'i':	coptions |= PCRE2_CASELESS;		break;
675 			case 'm':	coptions |= PCRE2_MULTILINE;		break;
676 			case 's':	coptions |= PCRE2_DOTALL;		break;
677 			case 'x':	coptions |= PCRE2_EXTENDED;		break;
678 
679 			/* PCRE specific options */
680 			case 'A':	coptions |= PCRE2_ANCHORED;		break;
681 			case 'D':	coptions |= PCRE2_DOLLAR_ENDONLY;break;
682 			case 'S':	/* Pass. */					break;
683 			case 'U':	coptions |= PCRE2_UNGREEDY;		break;
684 			case 'X':	extra_coptions &= ~PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL;			break;
685 			case 'u':	coptions |= PCRE2_UTF;
686 	/* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
687        characters, even in UTF-8 mode. However, this can be changed by setting
688        the PCRE2_UCP option. */
689 #ifdef PCRE2_UCP
690 						coptions |= PCRE2_UCP;
691 #endif
692 				break;
693 			case 'J':	coptions |= PCRE2_DUPNAMES;		break;
694 
695 			/* Custom preg options */
696 			case 'e':	poptions |= PREG_REPLACE_EVAL;	break;
697 
698 			case ' ':
699 			case '\n':
700 			case '\r':
701 				break;
702 
703 			default:
704 				if (pp[-1]) {
705 					php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
706 				} else {
707 					php_error_docref(NULL,E_WARNING, "Null byte in regex");
708 				}
709 				pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
710 				efree(pattern);
711 #if HAVE_SETLOCALE
712 				if (key != regex) {
713 					zend_string_release_ex(key, 0);
714 				}
715 #endif
716 				return NULL;
717 		}
718 	}
719 
720 	if (poptions & PREG_REPLACE_EVAL) {
721 		php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
722 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
723 		efree(pattern);
724 #if HAVE_SETLOCALE
725 		if (key != regex) {
726 			zend_string_release_ex(key, 0);
727 		}
728 #endif
729 		return NULL;
730 	}
731 
732 #if HAVE_SETLOCALE
733 	if (key != regex) {
734 		tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(locale_string));
735 		if (!tables) {
736 			zend_string *_k;
737 			tables = pcre2_maketables(gctx);
738 			if (UNEXPECTED(!tables)) {
739 				php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
740 				pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
741 				zend_string_release_ex(key, 0);
742 				efree(pattern);
743 				return NULL;
744 			}
745 			_k = zend_string_init(ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)), 1);
746 			zend_hash_add_ptr(&char_tables, _k, (void *)tables);
747 			zend_string_release(_k);
748 		}
749 		pcre2_set_character_tables(cctx, tables);
750 	}
751 #endif
752 
753 	/* Set extra options for the compile context. */
754 	if (PHP_PCRE_DEFAULT_EXTRA_COPTIONS != extra_coptions) {
755 		pcre2_set_compile_extra_options(cctx, extra_coptions);
756 	}
757 
758 	/* Compile pattern and display a warning if compilation failed. */
759 	re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
760 
761 	/* Reset the compile context extra options to default. */
762 	if (PHP_PCRE_DEFAULT_EXTRA_COPTIONS != extra_coptions) {
763 		pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
764 	}
765 
766 	if (re == NULL) {
767 #if HAVE_SETLOCALE
768 		if (key != regex) {
769 			zend_string_release_ex(key, 0);
770 		}
771 #endif
772 		pcre2_get_error_message(errnumber, error, sizeof(error));
773 		php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
774 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
775 		efree(pattern);
776 		return NULL;
777 	}
778 
779 #ifdef HAVE_PCRE_JIT_SUPPORT
780 	if (PCRE_G(jit)) {
781 		/* Enable PCRE JIT compiler */
782 		rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
783 		if (EXPECTED(rc >= 0)) {
784 			size_t jit_size = 0;
785 			if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
786 				poptions |= PREG_JIT;
787 			}
788 		} else if (rc == PCRE2_ERROR_NOMEMORY) {
789 			php_error_docref(NULL, E_WARNING,
790 				"Allocation of JIT memory failed, PCRE JIT will be disabled. "
791 				"This is likely caused by security restrictions. "
792 				"Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
793 			PCRE_G(jit) = 0;
794 		} else {
795 			pcre2_get_error_message(rc, error, sizeof(error));
796 			php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
797 			pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
798 		}
799 	}
800 #endif
801 	efree(pattern);
802 
803 	/*
804 	 * If we reached cache limit, clean out the items from the head of the list;
805 	 * these are supposedly the oldest ones (but not necessarily the least used
806 	 * ones).
807 	 */
808 	if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
809 		int num_clean = PCRE_CACHE_SIZE / 8;
810 		zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
811 	}
812 
813 	/* Store the compiled pattern and extra info in the cache. */
814 	new_entry.re = re;
815 	new_entry.preg_options = poptions;
816 	new_entry.compile_options = coptions;
817 	new_entry.extra_compile_options = extra_coptions;
818 	new_entry.refcount = 0;
819 
820 	rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
821 	if (rc < 0) {
822 #if HAVE_SETLOCALE
823 		if (key != regex) {
824 			zend_string_release_ex(key, 0);
825 		}
826 #endif
827 		php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
828 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
829 		return NULL;
830 	}
831 
832 	rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
833 	if (rc < 0) {
834 #if HAVE_SETLOCALE
835 		if (key != regex) {
836 			zend_string_release_ex(key, 0);
837 		}
838 #endif
839 		php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
840 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
841 		return NULL;
842 	}
843 
844 	/*
845 	 * Interned strings are not duplicated when stored in HashTable,
846 	 * but all the interned strings created during HTTP request are removed
847 	 * at end of request. However PCRE_G(pcre_cache) must be consistent
848 	 * on the next request as well. So we disable usage of interned strings
849 	 * as hash keys especually for this table.
850 	 * See bug #63180
851 	 */
852 	if (!(GC_FLAGS(key) & IS_STR_PERMANENT)) {
853 		zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
854 
855 		GC_MAKE_PERSISTENT_LOCAL(str);
856 
857 #if HAVE_SETLOCALE
858 		if (key != regex) {
859 			zend_string_release_ex(key, 0);
860 		}
861 #endif
862 		ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
863 		zend_string_release(str);
864 	} else {
865 		ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
866 	}
867 
868 	return ret;
869 }
870 /* }}} */
871 
872 /* {{{ pcre_get_compiled_regex_cache
873  */
pcre_get_compiled_regex_cache(zend_string * regex)874 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
875 {
876 	return pcre_get_compiled_regex_cache_ex(regex, 1);
877 }
878 /* }}} */
879 
880 /* {{{ pcre_get_compiled_regex
881  */
pcre_get_compiled_regex(zend_string * regex,uint32_t * capture_count,uint32_t * preg_options)882 PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options)
883 {
884 	pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
885 
886 	if (preg_options) {
887 		*preg_options = 0;
888 	}
889 	if (capture_count) {
890 		*capture_count = pce ? pce->capture_count : 0;
891 	}
892 
893 	return pce ? pce->re : NULL;
894 }
895 /* }}} */
896 
897 /* {{{ pcre_get_compiled_regex_ex
898  */
pcre_get_compiled_regex_ex(zend_string * regex,uint32_t * capture_count,uint32_t * preg_options,uint32_t * compile_options)899 PHPAPI pcre2_code* pcre_get_compiled_regex_ex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options, uint32_t *compile_options)
900 {
901 	pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
902 
903 	if (preg_options) {
904 		*preg_options = 0;
905 	}
906 	if (compile_options) {
907 		*compile_options = pce ? pce->compile_options : 0;
908 	}
909 	if (capture_count) {
910 		*capture_count = pce ? pce->capture_count : 0;
911 	}
912 
913 	return pce ? pce->re : NULL;
914 }
915 /* }}} */
916 
917 /* XXX For the cases where it's only about match yes/no and no capture
918 		required, perhaps just a minimum sized data would suffice. */
php_pcre_create_match_data(uint32_t capture_count,pcre2_code * re)919 PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
920 {/*{{{*/
921 
922 	assert(NULL != re);
923 
924 	if (EXPECTED(!mdata_used)) {
925 		int rc = 0;
926 
927 		if (!capture_count) {
928 			/* As we deal with a non cached pattern, no other way to gather this info. */
929 			rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
930 		}
931 
932 		if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
933 			mdata_used = 1;
934 			return mdata;
935 		}
936 	}
937 
938 	return pcre2_match_data_create_from_pattern(re, gctx);
939 }/*}}}*/
940 
php_pcre_free_match_data(pcre2_match_data * match_data)941 PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
942 {/*{{{*/
943 	if (UNEXPECTED(match_data != mdata)) {
944 		pcre2_match_data_free(match_data);
945 	} else {
946 		mdata_used = 0;
947 	}
948 }/*}}}*/
949 
950 /* {{{ add_offset_pair */
add_offset_pair(zval * result,char * str,size_t len,PCRE2_SIZE offset,char * name,uint32_t unmatched_as_null)951 static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SIZE offset, char *name, uint32_t unmatched_as_null)
952 {
953 	zval match_pair, tmp;
954 
955 	array_init_size(&match_pair, 2);
956 
957 	/* Add (match, offset) to the return value */
958 	if (PCRE2_UNSET == offset) {
959 		if (unmatched_as_null) {
960 			ZVAL_NULL(&tmp);
961 		} else {
962 			ZVAL_EMPTY_STRING(&tmp);
963 		}
964 	} else {
965 		ZVAL_STRINGL(&tmp, str, len);
966 	}
967 	zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
968 	ZVAL_LONG(&tmp, offset);
969 	zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
970 
971 	if (name) {
972 		Z_ADDREF(match_pair);
973 		zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
974 	}
975 	zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
976 }
977 /* }}} */
978 
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS,int global)979 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
980 {
981 	/* parameters */
982 	zend_string		 *regex;			/* Regular expression */
983 	zend_string		 *subject;			/* String to match against */
984 	pcre_cache_entry *pce;				/* Compiled regular expression */
985 	zval			 *subpats = NULL;	/* Array for subpatterns */
986 	zend_long		  flags = 0;		/* Match control flags */
987 	zend_long		  start_offset = 0;	/* Where the new search starts */
988 
989 	ZEND_PARSE_PARAMETERS_START(2, 5)
990 		Z_PARAM_STR(regex)
991 		Z_PARAM_STR(subject)
992 		Z_PARAM_OPTIONAL
993 		Z_PARAM_ZVAL_DEREF(subpats)
994 		Z_PARAM_LONG(flags)
995 		Z_PARAM_LONG(start_offset)
996 	ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
997 
998 	/* Compile regex or get it from cache. */
999 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1000 		RETURN_FALSE;
1001 	}
1002 
1003 	pce->refcount++;
1004 	php_pcre_match_impl(pce, ZSTR_VAL(subject), ZSTR_LEN(subject), return_value, subpats,
1005 		global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
1006 	pce->refcount--;
1007 }
1008 /* }}} */
1009 
1010 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry * pce,char * subject,size_t subject_len,zval * return_value,zval * subpats,int global,int use_flags,zend_long flags,zend_off_t start_offset)1011 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t subject_len, zval *return_value,
1012 	zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset)
1013 {
1014 	zval			 result_set,		/* Holds a set of subpatterns after
1015 										   a global match */
1016 					*match_sets = NULL;	/* An array of sets of matches for each
1017 										   subpattern after a global match */
1018 	uint32_t		 options;			/* Execution options */
1019 	int				 count;				/* Count of matched subpatterns */
1020 	PCRE2_SIZE		*offsets;			/* Array of subpattern offsets */
1021 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
1022 	int				 matched;			/* Has anything matched */
1023 	char 		   **subpat_names;		/* Array for named subpatterns */
1024 	size_t			 i;
1025 	uint32_t		 subpats_order;		/* Order of subpattern matches */
1026 	uint32_t		 offset_capture;	/* Capture match offsets: yes/no */
1027 	uint32_t		 unmatched_as_null;	/* Null non-matches: yes/no */
1028 	PCRE2_SPTR       mark = NULL;		/* Target for MARK name */
1029 	zval			 marks;				/* Array of marks for PREG_PATTERN_ORDER */
1030 	pcre2_match_data *match_data;
1031 	PCRE2_SIZE		 start_offset2;
1032 
1033 	ZVAL_UNDEF(&marks);
1034 
1035 	/* Overwrite the passed-in value for subpatterns with an empty array. */
1036 	if (subpats != NULL) {
1037 		zval_ptr_dtor(subpats);
1038 		array_init(subpats);
1039 	}
1040 
1041 	subpats_order = global ? PREG_PATTERN_ORDER : 0;
1042 
1043 	if (use_flags) {
1044 		offset_capture = flags & PREG_OFFSET_CAPTURE;
1045 		unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1046 
1047 		/*
1048 		 * subpats_order is pre-set to pattern mode so we change it only if
1049 		 * necessary.
1050 		 */
1051 		if (flags & 0xff) {
1052 			subpats_order = flags & 0xff;
1053 		}
1054 		if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1055 			(!global && subpats_order != 0)) {
1056 			php_error_docref(NULL, E_WARNING, "Invalid flags specified");
1057 			return;
1058 		}
1059 	} else {
1060 		offset_capture = 0;
1061 		unmatched_as_null = 0;
1062 	}
1063 
1064 	/* Negative offset counts from the end of the string. */
1065 	if (start_offset < 0) {
1066 		if ((PCRE2_SIZE)-start_offset <= subject_len) {
1067 			start_offset2 = subject_len + start_offset;
1068 		} else {
1069 			start_offset2 = 0;
1070 		}
1071 	} else {
1072 		start_offset2 = (PCRE2_SIZE)start_offset;
1073 	}
1074 
1075 	if (start_offset2 > subject_len) {
1076 		pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1077 		RETURN_FALSE;
1078 	}
1079 
1080 	/* Calculate the size of the offsets array, and allocate memory for it. */
1081 	num_subpats = pce->capture_count + 1;
1082 
1083 	/*
1084 	 * Build a mapping from subpattern numbers to their names. We will
1085 	 * allocate the table only if there are any named subpatterns.
1086 	 */
1087 	subpat_names = NULL;
1088 	if (pce->name_count > 0) {
1089 		subpat_names = make_subpats_table(num_subpats, pce);
1090 		if (!subpat_names) {
1091 			RETURN_FALSE;
1092 		}
1093 	}
1094 
1095 	/* Allocate match sets array and initialize the values. */
1096 	if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1097 		match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
1098 		for (i=0; i<num_subpats; i++) {
1099 			array_init(&match_sets[i]);
1100 		}
1101 	}
1102 
1103 	matched = 0;
1104 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1105 
1106 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1107 		match_data = mdata;
1108 	} else {
1109 		match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1110 		if (!match_data) {
1111 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1112 			if (subpat_names) {
1113 				efree(subpat_names);
1114 			}
1115 			if (match_sets) {
1116 				efree(match_sets);
1117 			}
1118 			RETURN_FALSE;
1119 		}
1120 	}
1121 
1122 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1123 
1124 	/* Execute the regular expression. */
1125 #ifdef HAVE_PCRE_JIT_SUPPORT
1126 	if ((pce->preg_options & PREG_JIT) && options) {
1127 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1128 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1129 	} else
1130 #endif
1131 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1132 			options, match_data, mctx);
1133 
1134 	while (1) {
1135 		/* If something has matched */
1136 		if (count >= 0) {
1137 			/* Check for too many substrings condition. */
1138 			if (UNEXPECTED(count == 0)) {
1139 				php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1140 				count = num_subpats;
1141 			}
1142 
1143 matched:
1144 			matched++;
1145 
1146 			offsets = pcre2_get_ovector_pointer(match_data);
1147 
1148 			/* If subpatterns array has been passed, fill it in with values. */
1149 			if (subpats != NULL) {
1150 				/* Try to get the list of substrings and display a warning if failed. */
1151 				if (offsets[1] < offsets[0]) {
1152 					if (subpat_names) {
1153 						efree(subpat_names);
1154 					}
1155 					if (match_sets) efree(match_sets);
1156 					php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1157 					RETURN_FALSE;
1158 				}
1159 
1160 				if (global) {	/* global pattern matching */
1161 					if (subpats && subpats_order == PREG_PATTERN_ORDER) {
1162 						/* For each subpattern, insert it into the appropriate array. */
1163 						if (offset_capture) {
1164 							for (i = 0; i < count; i++) {
1165 								add_offset_pair(&match_sets[i], subject + offsets[i<<1],
1166 												offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
1167 							}
1168 						} else {
1169 							for (i = 0; i < count; i++) {
1170 								if (PCRE2_UNSET == offsets[i<<1]) {
1171 									if (unmatched_as_null) {
1172 										add_next_index_null(&match_sets[i]);
1173 									} else {
1174 										add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
1175 									}
1176 								} else {
1177 									add_next_index_stringl(&match_sets[i], subject + offsets[i<<1],
1178 														   offsets[(i<<1)+1] - offsets[i<<1]);
1179 								}
1180 							}
1181 						}
1182 						mark = pcre2_get_mark(match_data);
1183 						/* Add MARK, if available */
1184 						if (mark) {
1185 							if (Z_TYPE(marks) == IS_UNDEF) {
1186 								array_init(&marks);
1187 							}
1188 							add_index_string(&marks, matched - 1, (char *) mark);
1189 						}
1190 						/*
1191 						 * If the number of captured subpatterns on this run is
1192 						 * less than the total possible number, pad the result
1193 						 * arrays with NULLs or empty strings.
1194 						 */
1195 						if (count < num_subpats) {
1196 							for (; i < num_subpats; i++) {
1197 								if (unmatched_as_null) {
1198 									add_next_index_null(&match_sets[i]);
1199 								} else {
1200 									add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
1201 								}
1202 							}
1203 						}
1204 					} else {
1205 						/* Allocate the result set array */
1206 						array_init_size(&result_set, count + (mark ? 1 : 0));
1207 
1208 						/* Add all the subpatterns to it */
1209 						if (subpat_names) {
1210 							if (offset_capture) {
1211 								for (i = 0; i < count; i++) {
1212 									add_offset_pair(&result_set, subject + offsets[i<<1],
1213 													offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i], unmatched_as_null);
1214 								}
1215 							} else {
1216 								for (i = 0; i < count; i++) {
1217 									if (subpat_names[i]) {
1218 										if (PCRE2_UNSET == offsets[i<<1]) {
1219 											if (unmatched_as_null) {
1220 												add_assoc_null(&result_set, subpat_names[i]);
1221 											} else {
1222 												add_assoc_str(&result_set, subpat_names[i], ZSTR_EMPTY_ALLOC());
1223 											}
1224 										} else {
1225 											add_assoc_stringl(&result_set, subpat_names[i], subject + offsets[i<<1],
1226 															  offsets[(i<<1)+1] - offsets[i<<1]);
1227 										}
1228 									}
1229 									if (PCRE2_UNSET == offsets[i<<1]) {
1230 										if (unmatched_as_null) {
1231 											add_next_index_null(&result_set);
1232 										} else {
1233 											add_next_index_str(&result_set, ZSTR_EMPTY_ALLOC());
1234 										}
1235 									} else {
1236 										add_next_index_stringl(&result_set, subject + offsets[i<<1],
1237 															   offsets[(i<<1)+1] - offsets[i<<1]);
1238 									}
1239 								}
1240 							}
1241 						} else {
1242 							if (offset_capture) {
1243 								for (i = 0; i < count; i++) {
1244 									add_offset_pair(&result_set, subject + offsets[i<<1],
1245 													offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
1246 								}
1247 							} else {
1248 								for (i = 0; i < count; i++) {
1249 									if (PCRE2_UNSET == offsets[i<<1]) {
1250 										if (unmatched_as_null) {
1251 											add_next_index_null(&result_set);
1252 										} else {
1253 											add_next_index_str(&result_set, ZSTR_EMPTY_ALLOC());
1254 										}
1255 									} else {
1256 										add_next_index_stringl(&result_set, subject + offsets[i<<1],
1257 															   offsets[(i<<1)+1] - offsets[i<<1]);
1258 									}
1259 								}
1260 							}
1261 						}
1262 						/* Add MARK, if available */
1263 						mark = pcre2_get_mark(match_data);
1264 						if (mark) {
1265 							add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
1266 						}
1267 						/* And add it to the output array */
1268 						zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
1269 					}
1270 				} else {			/* single pattern matching */
1271 					/* For each subpattern, insert it into the subpatterns array. */
1272 					if (subpat_names) {
1273 						if (offset_capture) {
1274 							for (i = 0; i < count; i++) {
1275 								add_offset_pair(subpats, subject + offsets[i<<1],
1276 												offsets[(i<<1)+1] - offsets[i<<1],
1277 												offsets[i<<1], subpat_names[i], unmatched_as_null);
1278 							}
1279 						} else {
1280 							for (i = 0; i < count; i++) {
1281 								if (subpat_names[i]) {
1282 									if (PCRE2_UNSET == offsets[i<<1]) {
1283 										if (unmatched_as_null) {
1284 											add_assoc_null(subpats, subpat_names[i]);
1285 										} else {
1286 											add_assoc_str(subpats, subpat_names[i], ZSTR_EMPTY_ALLOC());
1287 										}
1288 									} else {
1289 										add_assoc_stringl(subpats, subpat_names[i], subject + offsets[i<<1],
1290 														  offsets[(i<<1)+1] - offsets[i<<1]);
1291 									}
1292 								}
1293 								if (PCRE2_UNSET == offsets[i<<1]) {
1294 									if (unmatched_as_null) {
1295 										add_next_index_null(subpats);
1296 									} else {
1297 										add_next_index_str(subpats, ZSTR_EMPTY_ALLOC());
1298 									}
1299 								} else {
1300 									add_next_index_stringl(subpats, subject + offsets[i<<1],
1301 														   offsets[(i<<1)+1] - offsets[i<<1]);
1302 								}
1303 							}
1304 						}
1305 					} else {
1306 						if (offset_capture) {
1307 							for (i = 0; i < count; i++) {
1308 								add_offset_pair(subpats, subject + offsets[i<<1],
1309 												offsets[(i<<1)+1] - offsets[i<<1],
1310 												offsets[i<<1], NULL, unmatched_as_null);
1311 							}
1312 						} else {
1313 							for (i = 0; i < count; i++) {
1314 								if (PCRE2_UNSET == offsets[i<<1]) {
1315 									if (unmatched_as_null) {
1316 										add_next_index_null(subpats);
1317 									} else {
1318 										add_next_index_str(subpats, ZSTR_EMPTY_ALLOC());
1319 									}
1320 								} else {
1321 									add_next_index_stringl(subpats, subject + offsets[i<<1],
1322 														   offsets[(i<<1)+1] - offsets[i<<1]);
1323 								}
1324 							}
1325 						}
1326 					}
1327 					/* Add MARK, if available */
1328 					mark = pcre2_get_mark(match_data);
1329 					if (mark) {
1330 						add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
1331 					}
1332 					break;
1333 				}
1334 			}
1335 
1336 			/* Advance to the next piece. */
1337 			start_offset2 = offsets[1];
1338 
1339 			/* If we have matched an empty string, mimic what Perl's /g options does.
1340 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1341 			   the match again at the same point. If this fails (picked up above) we
1342 			   advance to the next character. */
1343 			if (start_offset2 == offsets[0]) {
1344 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1345 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1346 				if (count >= 0) {
1347 					if (global) {
1348 						goto matched;
1349 					} else {
1350 						break;
1351 					}
1352 				} else if (count == PCRE2_ERROR_NOMATCH) {
1353 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1354 					   this is not necessarily the end. We need to advance
1355 					   the start offset, and continue. Fudge the offset values
1356 					   to achieve this, unless we're already at the end of the string. */
1357 					if (start_offset2 < subject_len) {
1358 						size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1359 
1360 						start_offset2 += unit_len;
1361 					} else {
1362 						break;
1363 					}
1364 				} else {
1365 					goto error;
1366 				}
1367 			}
1368 		} else if (count == PCRE2_ERROR_NOMATCH) {
1369 			break;
1370 		} else {
1371 error:
1372 			pcre_handle_exec_error(count);
1373 			break;
1374 		}
1375 
1376 		if (!global) {
1377 			break;
1378 		}
1379 
1380 		/* Execute the regular expression. */
1381 #ifdef HAVE_PCRE_JIT_SUPPORT
1382 		if ((pce->preg_options & PREG_JIT)) {
1383 			if (PCRE2_UNSET == start_offset2 || start_offset2 > subject_len) {
1384 				pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1385 				break;
1386 			}
1387 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1388 					PCRE2_NO_UTF_CHECK, match_data, mctx);
1389 		} else
1390 #endif
1391 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1392 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1393 	}
1394 	if (match_data != mdata) {
1395 		pcre2_match_data_free(match_data);
1396 	}
1397 
1398 	/* Add the match sets to the output array and clean up */
1399 	if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1400 		if (subpat_names) {
1401 			for (i = 0; i < num_subpats; i++) {
1402 				if (subpat_names[i]) {
1403 					zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
1404 									 strlen(subpat_names[i]), &match_sets[i]);
1405 					Z_ADDREF(match_sets[i]);
1406 				}
1407 				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1408 			}
1409 		} else {
1410 			for (i = 0; i < num_subpats; i++) {
1411 				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1412 			}
1413 		}
1414 		efree(match_sets);
1415 
1416 		if (Z_TYPE(marks) != IS_UNDEF) {
1417 			add_assoc_zval(subpats, "MARK", &marks);
1418 		}
1419 	}
1420 
1421 	if (subpat_names) {
1422 		efree(subpat_names);
1423 	}
1424 
1425 	/* Did we encounter an error? */
1426 	if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1427 		RETVAL_LONG(matched);
1428 	} else {
1429 		RETVAL_FALSE;
1430 	}
1431 }
1432 /* }}} */
1433 
1434 /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1435    Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)1436 static PHP_FUNCTION(preg_match)
1437 {
1438 	php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1439 }
1440 /* }}} */
1441 
1442 /* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1443    Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)1444 static PHP_FUNCTION(preg_match_all)
1445 {
1446 	php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1447 }
1448 /* }}} */
1449 
1450 /* {{{ preg_get_backref
1451  */
preg_get_backref(char ** str,int * backref)1452 static int preg_get_backref(char **str, int *backref)
1453 {
1454 	register char in_brace = 0;
1455 	register char *walk = *str;
1456 
1457 	if (walk[1] == 0)
1458 		return 0;
1459 
1460 	if (*walk == '$' && walk[1] == '{') {
1461 		in_brace = 1;
1462 		walk++;
1463 	}
1464 	walk++;
1465 
1466 	if (*walk >= '0' && *walk <= '9') {
1467 		*backref = *walk - '0';
1468 		walk++;
1469 	} else
1470 		return 0;
1471 
1472 	if (*walk && *walk >= '0' && *walk <= '9') {
1473 		*backref = *backref * 10 + *walk - '0';
1474 		walk++;
1475 	}
1476 
1477 	if (in_brace) {
1478 		if (*walk != '}')
1479 			return 0;
1480 		else
1481 			walk++;
1482 	}
1483 
1484 	*str = walk;
1485 	return 1;
1486 }
1487 /* }}} */
1488 
1489 /* {{{ preg_do_repl_func
1490  */
preg_do_repl_func(zend_fcall_info * fci,zend_fcall_info_cache * fcc,char * subject,PCRE2_SIZE * offsets,char ** subpat_names,int count,const PCRE2_SPTR mark)1491 static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, PCRE2_SIZE *offsets, char **subpat_names, int count, const PCRE2_SPTR mark)
1492 {
1493 	zend_string *result_str;
1494 	zval		 retval;			/* Function return value */
1495 	zval	     arg;				/* Argument to pass to function */
1496 	int			 i;
1497 
1498 	array_init_size(&arg, count + (mark ? 1 : 0));
1499 	if (subpat_names) {
1500 		for (i = 0; i < count; i++) {
1501 			if (subpat_names[i]) {
1502 				add_assoc_stringl(&arg, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
1503 			}
1504 			add_next_index_stringl(&arg, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
1505 		}
1506 	} else {
1507 		for (i = 0; i < count; i++) {
1508 			add_next_index_stringl(&arg, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
1509 		}
1510 	}
1511 	if (mark) {
1512 		add_assoc_string(&arg, "MARK", (char *) mark);
1513 	}
1514 
1515 	fci->retval = &retval;
1516 	fci->param_count = 1;
1517 	fci->params = &arg;
1518 	fci->no_separation = 0;
1519 
1520 	if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1521 		if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1522 			result_str = Z_STR(retval);
1523 		} else {
1524 			result_str = zval_get_string_func(&retval);
1525 			zval_ptr_dtor(&retval);
1526 		}
1527 	} else {
1528 		if (!EG(exception)) {
1529 			php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1530 		}
1531 
1532 		result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1533 	}
1534 
1535 	zval_ptr_dtor(&arg);
1536 
1537 	return result_str;
1538 }
1539 /* }}} */
1540 
1541 /* {{{ php_pcre_replace
1542  */
php_pcre_replace(zend_string * regex,zend_string * subject_str,char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1543 PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1544 							  zend_string *subject_str,
1545 							  char *subject, size_t subject_len,
1546 							  zend_string *replace_str,
1547 							  size_t limit, size_t *replace_count)
1548 {
1549 	pcre_cache_entry	*pce;			    /* Compiled regular expression */
1550 	zend_string	 		*result;			/* Function result */
1551 
1552 	/* Compile regex or get it from cache. */
1553 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1554 		return NULL;
1555 	}
1556 	pce->refcount++;
1557 	result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1558 		limit, replace_count);
1559 	pce->refcount--;
1560 
1561 	return result;
1562 }
1563 /* }}} */
1564 
1565 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry * pce,zend_string * subject_str,char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1566 PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1567 {
1568 	uint32_t		 options;			/* Execution options */
1569 	int				 count;				/* Count of matched subpatterns */
1570 	PCRE2_SIZE		*offsets;			/* Array of subpattern offsets */
1571 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
1572 	size_t			 new_len;			/* Length of needed storage */
1573 	size_t			 alloc_len;			/* Actual allocated length */
1574 	size_t			 match_len;			/* Length of the current match */
1575 	int				 backref;			/* Backreference number */
1576 	PCRE2_SIZE		 start_offset;		/* Where the new search starts */
1577 	size_t			 last_end_offset;	/* Where the last search ended */
1578 	char			*walkbuf,			/* Location of current replacement in the result */
1579 					*walk,				/* Used to walk the replacement string */
1580 					*match,				/* The current match */
1581 					*piece,				/* The current piece of subject */
1582 					*replace_end,		/* End of replacement string */
1583 					 walk_last;			/* Last walked character */
1584 	size_t			result_len; 		/* Length of result */
1585 	zend_string		*result;			/* Result of replacement */
1586 	pcre2_match_data *match_data;
1587 
1588 	/* Calculate the size of the offsets array, and allocate memory for it. */
1589 	num_subpats = pce->capture_count + 1;
1590 	alloc_len = 0;
1591 	result = NULL;
1592 
1593 	/* Initialize */
1594 	match = NULL;
1595 	start_offset = 0;
1596 	last_end_offset = 0;
1597 	result_len = 0;
1598 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1599 
1600 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1601 		match_data = mdata;
1602 	} else {
1603 		match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1604 		if (!match_data) {
1605 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1606 			return NULL;
1607 		}
1608 	}
1609 
1610 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1611 
1612 	/* Execute the regular expression. */
1613 #ifdef HAVE_PCRE_JIT_SUPPORT
1614 	if ((pce->preg_options & PREG_JIT) && options) {
1615 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1616 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1617 	} else
1618 #endif
1619 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1620 			options, match_data, mctx);
1621 
1622 	while (1) {
1623 		piece = subject + last_end_offset;
1624 
1625 		if (count >= 0 && limit > 0) {
1626 			zend_bool simple_string;
1627 
1628 			/* Check for too many substrings condition. */
1629 			if (UNEXPECTED(count == 0)) {
1630 				php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1631 				count = num_subpats;
1632 			}
1633 
1634 matched:
1635 			offsets = pcre2_get_ovector_pointer(match_data);
1636 
1637 			if (UNEXPECTED(offsets[1] < offsets[0])) {
1638 				PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1639 				if (result) {
1640 					zend_string_release_ex(result, 0);
1641 					result = NULL;
1642 				}
1643 				break;
1644 			}
1645 
1646 			if (replace_count) {
1647 				++*replace_count;
1648 			}
1649 
1650 			/* Set the match location in subject */
1651 			match = subject + offsets[0];
1652 
1653 			new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1654 
1655 			walk = ZSTR_VAL(replace_str);
1656 			replace_end = walk + ZSTR_LEN(replace_str);
1657 			walk_last = 0;
1658 			simple_string = 1;
1659 			while (walk < replace_end) {
1660 				if ('\\' == *walk || '$' == *walk) {
1661 					simple_string = 0;
1662 					if (walk_last == '\\') {
1663 						walk++;
1664 						walk_last = 0;
1665 						continue;
1666 					}
1667 					if (preg_get_backref(&walk, &backref)) {
1668 						if (backref < count)
1669 							new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1670 						continue;
1671 					}
1672 				}
1673 				new_len++;
1674 				walk++;
1675 				walk_last = walk[-1];
1676 			}
1677 
1678 			if (new_len >= alloc_len) {
1679 				alloc_len = zend_safe_address_guarded(2, new_len, alloc_len);
1680 				if (result == NULL) {
1681 					result = zend_string_alloc(alloc_len, 0);
1682 				} else {
1683 					result = zend_string_extend(result, alloc_len, 0);
1684 				}
1685 			}
1686 
1687 			if (match-piece > 0) {
1688 				/* copy the part of the string before the match */
1689 				memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1690 				result_len += (match-piece);
1691 			}
1692 
1693 			if (simple_string) {
1694 				/* copy replacement */
1695 				memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1696 				result_len += ZSTR_LEN(replace_str);
1697 			} else {
1698 				/* copy replacement and backrefs */
1699 				walkbuf = ZSTR_VAL(result) + result_len;
1700 
1701 				walk = ZSTR_VAL(replace_str);
1702 				walk_last = 0;
1703 				while (walk < replace_end) {
1704 					if ('\\' == *walk || '$' == *walk) {
1705 						if (walk_last == '\\') {
1706 							*(walkbuf-1) = *walk++;
1707 							walk_last = 0;
1708 							continue;
1709 						}
1710 						if (preg_get_backref(&walk, &backref)) {
1711 							if (backref < count) {
1712 								match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1713 								memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1714 								walkbuf += match_len;
1715 							}
1716 							continue;
1717 						}
1718 					}
1719 					*walkbuf++ = *walk++;
1720 					walk_last = walk[-1];
1721 				}
1722 				*walkbuf = '\0';
1723 				/* increment the result length by how much we've added to the string */
1724 				result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1725 			}
1726 
1727 			limit--;
1728 
1729 			/* Advance to the next piece. */
1730 			start_offset = last_end_offset = offsets[1];
1731 
1732 			/* If we have matched an empty string, mimic what Perl's /g options does.
1733 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1734 			   the match again at the same point. If this fails (picked up above) we
1735 			   advance to the next character. */
1736 			if (start_offset == offsets[0]) {
1737 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1738 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1739 
1740 				piece = subject + start_offset;
1741 				if (count >= 0 && limit > 0) {
1742 					goto matched;
1743 				} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1744 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1745 					   this is not necessarily the end. We need to advance
1746 					   the start offset, and continue. Fudge the offset values
1747 					   to achieve this, unless we're already at the end of the string. */
1748 					if (start_offset < subject_len) {
1749 						size_t unit_len = calculate_unit_length(pce, piece);
1750 						start_offset += unit_len;
1751 					} else {
1752 						goto not_matched;
1753 					}
1754 				} else {
1755 					goto error;
1756 				}
1757 			}
1758 
1759 		} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1760 not_matched:
1761 			if (!result && subject_str) {
1762 				result = zend_string_copy(subject_str);
1763 				break;
1764 			}
1765 			new_len = result_len + subject_len - last_end_offset;
1766 			if (new_len >= alloc_len) {
1767 				alloc_len = new_len; /* now we know exactly how long it is */
1768 				if (NULL != result) {
1769 					result = zend_string_realloc(result, alloc_len, 0);
1770 				} else {
1771 					result = zend_string_alloc(alloc_len, 0);
1772 				}
1773 			}
1774 			/* stick that last bit of string on our output */
1775 			memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1776 			result_len += subject_len - last_end_offset;
1777 			ZSTR_VAL(result)[result_len] = '\0';
1778 			ZSTR_LEN(result) = result_len;
1779 			break;
1780 		} else {
1781 error:
1782 			pcre_handle_exec_error(count);
1783 			if (result) {
1784 				zend_string_release_ex(result, 0);
1785 				result = NULL;
1786 			}
1787 			break;
1788 		}
1789 
1790 #ifdef HAVE_PCRE_JIT_SUPPORT
1791 		if (pce->preg_options & PREG_JIT) {
1792 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1793 					PCRE2_NO_UTF_CHECK, match_data, mctx);
1794 		} else
1795 #endif
1796 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1797 					PCRE2_NO_UTF_CHECK, match_data, mctx);
1798 	}
1799 	if (match_data != mdata) {
1800 		pcre2_match_data_free(match_data);
1801 	}
1802 
1803 	return result;
1804 }
1805 /* }}} */
1806 
1807 /* {{{ php_pcre_replace_func_impl() */
php_pcre_replace_func_impl(pcre_cache_entry * pce,zend_string * subject_str,char * subject,size_t subject_len,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count)1808 static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count)
1809 {
1810 	uint32_t		 options;			/* Execution options */
1811 	int				 count;				/* Count of matched subpatterns */
1812 	PCRE2_SIZE		*offsets;			/* Array of subpattern offsets */
1813 	char 			**subpat_names;		/* Array for named subpatterns */
1814 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
1815 	size_t			 new_len;			/* Length of needed storage */
1816 	size_t			 alloc_len;			/* Actual allocated length */
1817 	PCRE2_SIZE		 start_offset;		/* Where the new search starts */
1818 	size_t			 last_end_offset;	/* Where the last search ended */
1819 	char			*match,				/* The current match */
1820 					*piece;				/* The current piece of subject */
1821 	size_t			result_len; 		/* Length of result */
1822 	zend_string		*result;			/* Result of replacement */
1823 	zend_string     *eval_result;		/* Result of custom function */
1824 	pcre2_match_data *match_data;
1825 	zend_bool old_mdata_used;
1826 
1827 	/* Calculate the size of the offsets array, and allocate memory for it. */
1828 	num_subpats = pce->capture_count + 1;
1829 
1830 	/*
1831 	 * Build a mapping from subpattern numbers to their names. We will
1832 	 * allocate the table only if there are any named subpatterns.
1833 	 */
1834 	subpat_names = NULL;
1835 	if (UNEXPECTED(pce->name_count > 0)) {
1836 		subpat_names = make_subpats_table(num_subpats, pce);
1837 		if (!subpat_names) {
1838 			return NULL;
1839 		}
1840 	}
1841 
1842 	alloc_len = 0;
1843 	result = NULL;
1844 
1845 	/* Initialize */
1846 	match = NULL;
1847 	start_offset = 0;
1848 	last_end_offset = 0;
1849 	result_len = 0;
1850 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1851 
1852 	old_mdata_used = mdata_used;
1853 	if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1854 		mdata_used = 1;
1855 		match_data = mdata;
1856 	} else {
1857 		match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1858 		if (!match_data) {
1859 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1860 			if (subpat_names) {
1861 				efree(subpat_names);
1862 			}
1863 			mdata_used = old_mdata_used;
1864 			return NULL;
1865 		}
1866 	}
1867 
1868 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1869 
1870 	/* Execute the regular expression. */
1871 #ifdef HAVE_PCRE_JIT_SUPPORT
1872 	if ((pce->preg_options & PREG_JIT) && options) {
1873 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1874 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1875 	} else
1876 #endif
1877 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1878 			options, match_data, mctx);
1879 
1880 	while (1) {
1881 		piece = subject + last_end_offset;
1882 
1883 		if (count >= 0 && limit) {
1884 			/* Check for too many substrings condition. */
1885 			if (UNEXPECTED(count == 0)) {
1886 				php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1887 				count = num_subpats;
1888 			}
1889 
1890 matched:
1891 			offsets = pcre2_get_ovector_pointer(match_data);
1892 
1893 			if (UNEXPECTED(offsets[1] < offsets[0])) {
1894 				PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1895 				if (result) {
1896 					zend_string_release_ex(result, 0);
1897 					result = NULL;
1898 				}
1899 				break;
1900 			}
1901 
1902 			if (replace_count) {
1903 				++*replace_count;
1904 			}
1905 
1906 			/* Set the match location in subject */
1907 			match = subject + offsets[0];
1908 
1909 			new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1910 
1911 			/* Use custom function to get replacement string and its length. */
1912 			eval_result = preg_do_repl_func(fci, fcc, subject, offsets, subpat_names, count,
1913 				pcre2_get_mark(match_data));
1914 
1915 			ZEND_ASSERT(eval_result);
1916 			new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result), new_len);
1917 			if (new_len >= alloc_len) {
1918 				alloc_len = zend_safe_address_guarded(2, new_len, alloc_len);
1919 				if (result == NULL) {
1920 					result = zend_string_alloc(alloc_len, 0);
1921 				} else {
1922 					result = zend_string_extend(result, alloc_len, 0);
1923 				}
1924 			}
1925 
1926 			if (match-piece > 0) {
1927 				/* copy the part of the string before the match */
1928 				memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1929 				result_len += (match-piece);
1930 			}
1931 
1932 			/* If using custom function, copy result to the buffer and clean up. */
1933 			memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1934 			result_len += ZSTR_LEN(eval_result);
1935 			zend_string_release_ex(eval_result, 0);
1936 
1937 			limit--;
1938 
1939 			/* Advance to the next piece. */
1940 			start_offset = last_end_offset = offsets[1];
1941 
1942 			/* If we have matched an empty string, mimic what Perl's /g options does.
1943 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1944 			   the match again at the same point. If this fails (picked up above) we
1945 			   advance to the next character. */
1946 			if (start_offset == offsets[0]) {
1947 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1948 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1949 
1950 				piece = subject + start_offset;
1951 				if (count >= 0 && limit) {
1952 					goto matched;
1953 				} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1954 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1955 					   this is not necessarily the end. We need to advance
1956 					   the start offset, and continue. Fudge the offset values
1957 					   to achieve this, unless we're already at the end of the string. */
1958 					if (start_offset < subject_len) {
1959 						size_t unit_len = calculate_unit_length(pce, piece);
1960 						start_offset += unit_len;
1961 					} else {
1962 						goto not_matched;
1963 					}
1964 				} else {
1965 					goto error;
1966 				}
1967 			}
1968 
1969 		} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1970 not_matched:
1971 			if (!result && subject_str) {
1972 				result = zend_string_copy(subject_str);
1973 				break;
1974 			}
1975 			new_len = result_len + subject_len - last_end_offset;
1976 			if (new_len >= alloc_len) {
1977 				alloc_len = new_len; /* now we know exactly how long it is */
1978 				if (NULL != result) {
1979 					result = zend_string_realloc(result, alloc_len, 0);
1980 				} else {
1981 					result = zend_string_alloc(alloc_len, 0);
1982 				}
1983 			}
1984 			/* stick that last bit of string on our output */
1985 			memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1986 			result_len += subject_len - last_end_offset;
1987 			ZSTR_VAL(result)[result_len] = '\0';
1988 			ZSTR_LEN(result) = result_len;
1989 			break;
1990 		} else {
1991 error:
1992 			pcre_handle_exec_error(count);
1993 			if (result) {
1994 				zend_string_release_ex(result, 0);
1995 				result = NULL;
1996 			}
1997 			break;
1998 		}
1999 #ifdef HAVE_PCRE_JIT_SUPPORT
2000 		if ((pce->preg_options & PREG_JIT)) {
2001 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2002 					PCRE2_NO_UTF_CHECK, match_data, mctx);
2003 		} else
2004 #endif
2005 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2006 				PCRE2_NO_UTF_CHECK, match_data, mctx);
2007 	}
2008 	if (match_data != mdata) {
2009 		pcre2_match_data_free(match_data);
2010 	}
2011 	mdata_used = old_mdata_used;
2012 
2013 	if (UNEXPECTED(subpat_names)) {
2014 		efree(subpat_names);
2015 	}
2016 
2017 	return result;
2018 }
2019 /* }}} */
2020 
2021 /* {{{ php_pcre_replace_func
2022  */
php_pcre_replace_func(zend_string * regex,zend_string * subject_str,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count)2023 static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2024 							  zend_string *subject_str,
2025 							  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2026 							  size_t limit, size_t *replace_count)
2027 {
2028 	pcre_cache_entry	*pce;			    /* Compiled regular expression */
2029 	zend_string	 		*result;			/* Function result */
2030 
2031 	/* Compile regex or get it from cache. */
2032 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2033 		return NULL;
2034 	}
2035 	pce->refcount++;
2036 	result = php_pcre_replace_func_impl(pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
2037 		limit, replace_count);
2038 	pce->refcount--;
2039 
2040 	return result;
2041 }
2042 /* }}} */
2043 
2044 /* {{{ php_pcre_replace_array
2045  */
php_pcre_replace_array(HashTable * regex,zval * replace,zend_string * subject_str,size_t limit,size_t * replace_count)2046 static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend_string *subject_str, size_t limit, size_t *replace_count)
2047 {
2048 	zval		*regex_entry;
2049 	zend_string *result;
2050 	zend_string *replace_str, *tmp_replace_str;
2051 
2052 	if (Z_TYPE_P(replace) == IS_ARRAY) {
2053 		uint32_t replace_idx = 0;
2054 		HashTable *replace_ht = Z_ARRVAL_P(replace);
2055 
2056 		/* For each entry in the regex array, get the entry */
2057 		ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2058 			/* Make sure we're dealing with strings. */
2059 			zend_string *tmp_regex_str;
2060 			zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2061 			zval *zv;
2062 
2063 			/* Get current entry */
2064 			while (1) {
2065 				if (replace_idx == replace_ht->nNumUsed) {
2066 					replace_str = ZSTR_EMPTY_ALLOC();
2067 					tmp_replace_str = NULL;
2068 					break;
2069 				}
2070 				zv = &replace_ht->arData[replace_idx].val;
2071 				replace_idx++;
2072 				if (Z_TYPE_P(zv) != IS_UNDEF) {
2073 					replace_str = zval_get_tmp_string(zv, &tmp_replace_str);
2074 					break;
2075 				}
2076 			}
2077 
2078 			/* Do the actual replacement and put the result back into subject_str
2079 			   for further replacements. */
2080 			result = php_pcre_replace(regex_str,
2081 									  subject_str,
2082 									  ZSTR_VAL(subject_str),
2083 									  ZSTR_LEN(subject_str),
2084 									  replace_str,
2085 									  limit,
2086 									  replace_count);
2087 			zend_tmp_string_release(tmp_replace_str);
2088 			zend_tmp_string_release(tmp_regex_str);
2089 			zend_string_release_ex(subject_str, 0);
2090 			subject_str = result;
2091 			if (UNEXPECTED(result == NULL)) {
2092 				break;
2093 			}
2094 		} ZEND_HASH_FOREACH_END();
2095 
2096 	} else {
2097 		replace_str = Z_STR_P(replace);
2098 
2099 		/* For each entry in the regex array, get the entry */
2100 		ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2101 			/* Make sure we're dealing with strings. */
2102 			zend_string *tmp_regex_str;
2103 			zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2104 
2105 			/* Do the actual replacement and put the result back into subject_str
2106 			   for further replacements. */
2107 			result = php_pcre_replace(regex_str,
2108 									  subject_str,
2109 									  ZSTR_VAL(subject_str),
2110 									  ZSTR_LEN(subject_str),
2111 									  replace_str,
2112 									  limit,
2113 									  replace_count);
2114 			zend_tmp_string_release(tmp_regex_str);
2115 			zend_string_release_ex(subject_str, 0);
2116 			subject_str = result;
2117 
2118 			if (UNEXPECTED(result == NULL)) {
2119 				break;
2120 			}
2121 		} ZEND_HASH_FOREACH_END();
2122 	}
2123 
2124 	return subject_str;
2125 }
2126 /* }}} */
2127 
2128 /* {{{ php_replace_in_subject
2129  */
php_replace_in_subject(zval * regex,zval * replace,zval * subject,size_t limit,size_t * replace_count)2130 static zend_always_inline zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, size_t limit, size_t *replace_count)
2131 {
2132 	zend_string *result;
2133 	zend_string *subject_str = zval_get_string(subject);
2134 
2135 	if (Z_TYPE_P(regex) != IS_ARRAY) {
2136 		result = php_pcre_replace(Z_STR_P(regex),
2137 								  subject_str,
2138 								  ZSTR_VAL(subject_str),
2139 								  ZSTR_LEN(subject_str),
2140 								  Z_STR_P(replace),
2141 								  limit,
2142 								  replace_count);
2143 		zend_string_release_ex(subject_str, 0);
2144 	} else {
2145 		result = php_pcre_replace_array(Z_ARRVAL_P(regex),
2146 										replace,
2147 										subject_str,
2148 										limit,
2149 										replace_count);
2150 	}
2151 	return result;
2152 }
2153 /* }}} */
2154 
2155 /* {{{ php_replace_in_subject_func
2156  */
php_replace_in_subject_func(zval * regex,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zval * subject,size_t limit,size_t * replace_count)2157 static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, size_t limit, size_t *replace_count)
2158 {
2159 	zend_string *result;
2160 	zend_string	*subject_str = zval_get_string(subject);
2161 
2162 	if (Z_TYPE_P(regex) != IS_ARRAY) {
2163 		result = php_pcre_replace_func(Z_STR_P(regex),
2164 								  subject_str,
2165 								  fci, fcc,
2166 								  limit,
2167 								  replace_count);
2168 		zend_string_release_ex(subject_str, 0);
2169 		return result;
2170 	} else {
2171 		zval		*regex_entry;
2172 
2173 		/* If regex is an array */
2174 
2175 		/* For each entry in the regex array, get the entry */
2176 		ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
2177 			/* Make sure we're dealing with strings. */
2178 			zend_string *tmp_regex_str;
2179 			zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2180 
2181 			/* Do the actual replacement and put the result back into subject_str
2182 			   for further replacements. */
2183 			result = php_pcre_replace_func(regex_str,
2184 										   subject_str,
2185 										   fci, fcc,
2186 										   limit,
2187 										   replace_count);
2188 			zend_tmp_string_release(tmp_regex_str);
2189 			zend_string_release_ex(subject_str, 0);
2190 			subject_str = result;
2191 			if (UNEXPECTED(result == NULL)) {
2192 				break;
2193 			}
2194 		} ZEND_HASH_FOREACH_END();
2195 
2196 		return subject_str;
2197 	}
2198 }
2199 /* }}} */
2200 
2201 /* {{{ preg_replace_func_impl
2202  */
preg_replace_func_impl(zval * return_value,zval * regex,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zval * subject,zend_long limit_val)2203 static size_t preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, zend_long limit_val)
2204 {
2205 	zend_string	*result;
2206 	size_t replace_count = 0;
2207 
2208 	if (Z_TYPE_P(regex) != IS_ARRAY) {
2209 		convert_to_string_ex(regex);
2210 	}
2211 
2212 	if (Z_TYPE_P(subject) != IS_ARRAY) {
2213 		result = php_replace_in_subject_func(regex, fci, fcc, subject, limit_val, &replace_count);
2214 		if (result != NULL) {
2215 			RETVAL_STR(result);
2216 		} else {
2217 			RETVAL_NULL();
2218 		}
2219 	} else {
2220 		/* if subject is an array */
2221 		zval		*subject_entry, zv;
2222 		zend_string	*string_key;
2223 		zend_ulong	 num_key;
2224 
2225 		array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
2226 
2227 		/* For each subject entry, convert it to string, then perform replacement
2228 		   and add the result to the return_value array. */
2229 		ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
2230 			result = php_replace_in_subject_func(regex, fci, fcc, subject_entry, limit_val, &replace_count);
2231 			if (result != NULL) {
2232 				/* Add to return array */
2233 				ZVAL_STR(&zv, result);
2234 				if (string_key) {
2235 					zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2236 				} else {
2237 					zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2238 				}
2239 			}
2240 		} ZEND_HASH_FOREACH_END();
2241 	}
2242 
2243 	return replace_count;
2244 }
2245 /* }}} */
2246 
2247 /* {{{ preg_replace_common
2248  */
preg_replace_common(INTERNAL_FUNCTION_PARAMETERS,int is_filter)2249 static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, int is_filter)
2250 {
2251 	zval *regex, *replace, *subject, *zcount = NULL;
2252 	zend_long limit = -1;
2253 	size_t replace_count = 0;
2254 	zend_string	*result;
2255 	size_t old_replace_count;
2256 
2257 	/* Get function parameters and do error-checking. */
2258 	ZEND_PARSE_PARAMETERS_START(3, 5)
2259 		Z_PARAM_ZVAL(regex)
2260 		Z_PARAM_ZVAL(replace)
2261 		Z_PARAM_ZVAL(subject)
2262 		Z_PARAM_OPTIONAL
2263 		Z_PARAM_LONG(limit)
2264 		Z_PARAM_ZVAL_DEREF(zcount)
2265 	ZEND_PARSE_PARAMETERS_END();
2266 
2267 	if (Z_TYPE_P(replace) != IS_ARRAY) {
2268 		convert_to_string_ex(replace);
2269 		if (Z_TYPE_P(regex) != IS_ARRAY) {
2270 			convert_to_string_ex(regex);
2271 		}
2272 	} else {
2273 		if (Z_TYPE_P(regex) != IS_ARRAY) {
2274 			php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
2275 			RETURN_FALSE;
2276 		}
2277 	}
2278 
2279 	if (Z_TYPE_P(subject) != IS_ARRAY) {
2280 		old_replace_count = replace_count;
2281 		result = php_replace_in_subject(regex,
2282 										replace,
2283 										subject,
2284 										limit,
2285 										&replace_count);
2286 		if (result != NULL) {
2287 			if (!is_filter || replace_count > old_replace_count) {
2288 				RETVAL_STR(result);
2289 			} else {
2290 				zend_string_release_ex(result, 0);
2291 				RETVAL_NULL();
2292 			}
2293 		} else {
2294 			RETVAL_NULL();
2295 		}
2296 	} else {
2297 		/* if subject is an array */
2298 		zval		*subject_entry, zv;
2299 		zend_string	*string_key;
2300 		zend_ulong	 num_key;
2301 
2302 		array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
2303 
2304 		/* For each subject entry, convert it to string, then perform replacement
2305 		   and add the result to the return_value array. */
2306 		ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
2307 			old_replace_count = replace_count;
2308 			result = php_replace_in_subject(regex,
2309 											replace,
2310 											subject_entry,
2311 											limit,
2312 											&replace_count);
2313 			if (result != NULL) {
2314 				if (!is_filter || replace_count > old_replace_count) {
2315 					/* Add to return array */
2316 					ZVAL_STR(&zv, result);
2317 					if (string_key) {
2318 						zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2319 					} else {
2320 						zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2321 					}
2322 				} else {
2323 					zend_string_release_ex(result, 0);
2324 				}
2325 			}
2326 		} ZEND_HASH_FOREACH_END();
2327 	}
2328 
2329 	if (zcount) {
2330 		zval_ptr_dtor(zcount);
2331 		ZVAL_LONG(zcount, replace_count);
2332 	}
2333 }
2334 /* }}} */
2335 
2336 /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
2337    Perform Perl-style regular expression replacement. */
PHP_FUNCTION(preg_replace)2338 static PHP_FUNCTION(preg_replace)
2339 {
2340 	preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
2341 }
2342 /* }}} */
2343 
2344 /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
2345    Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback)2346 static PHP_FUNCTION(preg_replace_callback)
2347 {
2348 	zval *regex, *replace, *subject, *zcount = NULL;
2349 	zend_long limit = -1;
2350 	size_t replace_count;
2351 	zend_fcall_info fci;
2352 	zend_fcall_info_cache fcc;
2353 
2354 	/* Get function parameters and do error-checking. */
2355 	ZEND_PARSE_PARAMETERS_START(3, 5)
2356 		Z_PARAM_ZVAL(regex)
2357 		Z_PARAM_ZVAL(replace)
2358 		Z_PARAM_ZVAL(subject)
2359 		Z_PARAM_OPTIONAL
2360 		Z_PARAM_LONG(limit)
2361 		Z_PARAM_ZVAL_DEREF(zcount)
2362 	ZEND_PARSE_PARAMETERS_END();
2363 
2364 	if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2365 		zend_string	*callback_name = zend_get_callable_name(replace);
2366 		php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", ZSTR_VAL(callback_name));
2367 		zend_string_release_ex(callback_name, 0);
2368 		ZVAL_STR(return_value, zval_get_string(subject));
2369 		return;
2370 	}
2371 
2372 	fci.size = sizeof(fci);
2373 	fci.object = NULL;
2374 	ZVAL_COPY_VALUE(&fci.function_name, replace);
2375 
2376 	replace_count = preg_replace_func_impl(return_value, regex, &fci, &fcc, subject, limit);
2377 	if (zcount) {
2378 		zval_ptr_dtor(zcount);
2379 		ZVAL_LONG(zcount, replace_count);
2380 	}
2381 }
2382 /* }}} */
2383 
2384 /* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]])
2385    Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback_array)2386 static PHP_FUNCTION(preg_replace_callback_array)
2387 {
2388 	zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
2389 	zend_long limit = -1;
2390 	zend_string *str_idx;
2391 	size_t replace_count = 0;
2392 	zend_fcall_info fci;
2393 	zend_fcall_info_cache fcc;
2394 
2395 	/* Get function parameters and do error-checking. */
2396 	ZEND_PARSE_PARAMETERS_START(2, 4)
2397 		Z_PARAM_ARRAY(pattern)
2398 		Z_PARAM_ZVAL(subject)
2399 		Z_PARAM_OPTIONAL
2400 		Z_PARAM_LONG(limit)
2401 		Z_PARAM_ZVAL_DEREF(zcount)
2402 	ZEND_PARSE_PARAMETERS_END();
2403 
2404 	fci.size = sizeof(fci);
2405 	fci.object = NULL;
2406 
2407 	ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(pattern), str_idx, replace) {
2408 		if (str_idx) {
2409 			ZVAL_STR_COPY(&regex, str_idx);
2410 		} else {
2411 			php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
2412 			zval_ptr_dtor(return_value);
2413 			RETURN_NULL();
2414 		}
2415 
2416 		if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2417 			zend_string *callback_name = zend_get_callable_name(replace);
2418 			php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", ZSTR_VAL(callback_name));
2419 			zend_string_release_ex(callback_name, 0);
2420 			zval_ptr_dtor(&regex);
2421 			zval_ptr_dtor(return_value);
2422 			ZVAL_COPY(return_value, subject);
2423 			return;
2424 		}
2425 
2426 		ZVAL_COPY_VALUE(&fci.function_name, replace);
2427 
2428 		replace_count += preg_replace_func_impl(&zv, &regex, &fci, &fcc, subject, limit);
2429 		if (subject != return_value) {
2430 			subject = return_value;
2431 		} else {
2432 			zval_ptr_dtor(return_value);
2433 		}
2434 
2435 		zval_ptr_dtor(&regex);
2436 
2437 		ZVAL_COPY_VALUE(return_value, &zv);
2438 
2439 		if (UNEXPECTED(EG(exception))) {
2440 			zval_ptr_dtor(return_value);
2441 			RETURN_NULL();
2442 		}
2443 	} ZEND_HASH_FOREACH_END();
2444 
2445 	if (zcount) {
2446 		zval_ptr_dtor(zcount);
2447 		ZVAL_LONG(zcount, replace_count);
2448 	}
2449 }
2450 /* }}} */
2451 
2452 /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
2453    Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTION(preg_filter)2454 static PHP_FUNCTION(preg_filter)
2455 {
2456 	preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
2457 }
2458 /* }}} */
2459 
2460 /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
2461    Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)2462 static PHP_FUNCTION(preg_split)
2463 {
2464 	zend_string			*regex;			/* Regular expression */
2465 	zend_string			*subject;		/* String to match against */
2466 	zend_long			 limit_val = -1;/* Integer value of limit */
2467 	zend_long			 flags = 0;		/* Match control flags */
2468 	pcre_cache_entry	*pce;			/* Compiled regular expression */
2469 
2470 	/* Get function parameters and do error checking */
2471 	ZEND_PARSE_PARAMETERS_START(2, 4)
2472 		Z_PARAM_STR(regex)
2473 		Z_PARAM_STR(subject)
2474 		Z_PARAM_OPTIONAL
2475 		Z_PARAM_LONG(limit_val)
2476 		Z_PARAM_LONG(flags)
2477 	ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
2478 
2479 	/* Compile regex or get it from cache. */
2480 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2481 		RETURN_FALSE;
2482 	}
2483 
2484 	pce->refcount++;
2485 	php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2486 	pce->refcount--;
2487 }
2488 /* }}} */
2489 
2490 /* {{{ php_pcre_split
2491  */
php_pcre_split_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zend_long limit_val,zend_long flags)2492 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2493 	zend_long limit_val, zend_long flags)
2494 {
2495 	PCRE2_SIZE		*offsets;			/* Array of subpattern offsets */
2496 	uint32_t		 options;			/* Execution options */
2497 	int				 count;				/* Count of matched subpatterns */
2498 	PCRE2_SIZE		 start_offset;		/* Where the new search starts */
2499 	PCRE2_SIZE		 next_offset;		/* End of the last delimiter match + 1 */
2500 	char			*last_match;		/* Location of last match */
2501 	uint32_t		 no_empty;			/* If NO_EMPTY flag is set */
2502 	uint32_t		 delim_capture; 	/* If delimiters should be captured */
2503 	uint32_t		 offset_capture;	/* If offsets should be captured */
2504 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
2505 	zval			 tmp;
2506 	pcre2_match_data *match_data;
2507 
2508 	no_empty = flags & PREG_SPLIT_NO_EMPTY;
2509 	delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2510 	offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2511 
2512 	/* Initialize return value */
2513 	array_init(return_value);
2514 
2515 	/* Calculate the size of the offsets array, and allocate memory for it. */
2516 	num_subpats = pce->capture_count + 1;
2517 
2518 	/* Start at the beginning of the string */
2519 	start_offset = 0;
2520 	next_offset = 0;
2521 	last_match = ZSTR_VAL(subject_str);
2522 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2523 
2524 
2525 	if (limit_val == -1) {
2526 		/* pass */
2527 	} else if (limit_val == 0) {
2528 		limit_val = -1;
2529 	} else if (limit_val <= 1) {
2530 		goto last;
2531 	}
2532 
2533 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2534 		match_data = mdata;
2535 	} else {
2536 		match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
2537 		if (!match_data) {
2538 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2539 			zval_ptr_dtor(return_value);
2540 			RETURN_FALSE;
2541 		}
2542 	}
2543 
2544 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2545 
2546 #ifdef HAVE_PCRE_JIT_SUPPORT
2547 	if ((pce->preg_options & PREG_JIT) && options) {
2548 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2549 				PCRE2_NO_UTF_CHECK, match_data, mctx);
2550 	} else
2551 #endif
2552 	count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2553 			options, match_data, mctx);
2554 
2555 	while (1) {
2556 		/* If something matched */
2557 		if (count >= 0) {
2558 			/* Check for too many substrings condition. */
2559 			if (UNEXPECTED(count == 0)) {
2560 				php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2561 				count = num_subpats;
2562 			}
2563 
2564 matched:
2565 			offsets = pcre2_get_ovector_pointer(match_data);
2566 
2567 			if (UNEXPECTED(offsets[1] < offsets[0])) {
2568 				PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2569 				break;
2570 			}
2571 
2572 			if (!no_empty || &ZSTR_VAL(subject_str)[offsets[0]] != last_match) {
2573 
2574 				if (offset_capture) {
2575 					/* Add (match, offset) pair to the return value */
2576 					add_offset_pair(return_value, last_match, (&ZSTR_VAL(subject_str)[offsets[0]]-last_match), next_offset, NULL, 0);
2577 				} else {
2578 					/* Add the piece to the return value */
2579 					ZVAL_STRINGL(&tmp, last_match, &ZSTR_VAL(subject_str)[offsets[0]]-last_match);
2580 					zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2581 				}
2582 
2583 				/* One less left to do */
2584 				if (limit_val != -1)
2585 					limit_val--;
2586 			}
2587 
2588 			last_match = &ZSTR_VAL(subject_str)[offsets[1]];
2589 			next_offset = offsets[1];
2590 
2591 			if (delim_capture) {
2592 				size_t i, match_len;
2593 				for (i = 1; i < count; i++) {
2594 					match_len = offsets[(i<<1)+1] - offsets[i<<1];
2595 					/* If we have matched a delimiter */
2596 					if (!no_empty || match_len > 0) {
2597 						if (offset_capture) {
2598 							add_offset_pair(return_value, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len, offsets[i<<1], NULL, 0);
2599 						} else {
2600 							ZVAL_STRINGL(&tmp, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len);
2601 							zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2602 						}
2603 					}
2604 				}
2605 			}
2606 
2607 			/* Advance to the position right after the last full match */
2608 			start_offset = offsets[1];
2609 
2610 			/* If we have matched an empty string, mimic what Perl's /g options does.
2611 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2612 			   the match again at the same point. If this fails (picked up above) we
2613 			   advance to the next character. */
2614 			if (start_offset == offsets[0]) {
2615 				count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2616 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2617 				if (count >= 0) {
2618 					goto matched;
2619 				} else if (count == PCRE2_ERROR_NOMATCH) {
2620 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2621 					   this is not necessarily the end. We need to advance
2622 					   the start offset, and continue. Fudge the offset values
2623 					   to achieve this, unless we're already at the end of the string. */
2624 					if (start_offset < ZSTR_LEN(subject_str)) {
2625 						start_offset += calculate_unit_length(pce, ZSTR_VAL(subject_str) + start_offset);
2626 					} else {
2627 						break;
2628 					}
2629 				} else {
2630 					goto error;
2631 				}
2632 			}
2633 
2634 		} else if (count == PCRE2_ERROR_NOMATCH) {
2635 			break;
2636 		} else {
2637 error:
2638 			pcre_handle_exec_error(count);
2639 			break;
2640 		}
2641 
2642 		/* Get next piece if no limit or limit not yet reached and something matched*/
2643 		if (limit_val != -1 && limit_val <= 1) {
2644 			break;
2645 		}
2646 
2647 #ifdef HAVE_PCRE_JIT_SUPPORT
2648 		if (pce->preg_options & PREG_JIT) {
2649 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2650 					PCRE2_NO_UTF_CHECK, match_data, mctx);
2651 		} else
2652 #endif
2653 		count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2654 				PCRE2_NO_UTF_CHECK, match_data, mctx);
2655 	}
2656 	if (match_data != mdata) {
2657 		pcre2_match_data_free(match_data);
2658 	}
2659 
2660 	if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2661 		zval_ptr_dtor(return_value);
2662 		RETURN_FALSE;
2663 	}
2664 
2665 last:
2666 	start_offset = (last_match - ZSTR_VAL(subject_str)); /* the offset might have been incremented, but without further successful matches */
2667 
2668 	if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2669 		if (offset_capture) {
2670 			/* Add the last (match, offset) pair to the return value */
2671 			add_offset_pair(return_value, &ZSTR_VAL(subject_str)[start_offset], ZSTR_LEN(subject_str) - start_offset, start_offset, NULL, 0);
2672 		} else {
2673 			/* Add the last piece to the return value */
2674 			if (last_match == ZSTR_VAL(subject_str)) {
2675 				ZVAL_STR_COPY(&tmp, subject_str);
2676 			} else {
2677 				ZVAL_STRINGL(&tmp, last_match, ZSTR_VAL(subject_str) + ZSTR_LEN(subject_str) - last_match);
2678 			}
2679 			zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2680 		}
2681 	}
2682 }
2683 /* }}} */
2684 
2685 /* {{{ proto string preg_quote(string str [, string delim_char])
2686    Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)2687 static PHP_FUNCTION(preg_quote)
2688 {
2689 	zend_string *str;       		/* Input string argument */
2690 	zend_string	*delim = NULL;		/* Additional delimiter argument */
2691 	char		*in_str;			/* Input string */
2692 	char		*in_str_end;    	/* End of the input string */
2693 	zend_string	*out_str;			/* Output string with quoted characters */
2694 	size_t       extra_len;         /* Number of additional characters */
2695 	char 		*p,					/* Iterator for input string */
2696 				*q,					/* Iterator for output string */
2697 				 delim_char = '\0',	/* Delimiter character to be quoted */
2698 				 c;					/* Current character */
2699 
2700 	/* Get the arguments and check for errors */
2701 	ZEND_PARSE_PARAMETERS_START(1, 2)
2702 		Z_PARAM_STR(str)
2703 		Z_PARAM_OPTIONAL
2704 		Z_PARAM_STR_EX(delim, 1, 0)
2705 	ZEND_PARSE_PARAMETERS_END();
2706 
2707 	/* Nothing to do if we got an empty string */
2708 	if (ZSTR_LEN(str) == 0) {
2709 		RETURN_EMPTY_STRING();
2710 	}
2711 
2712 	in_str = ZSTR_VAL(str);
2713 	in_str_end = in_str + ZSTR_LEN(str);
2714 
2715 	if (delim) {
2716 		delim_char = ZSTR_VAL(delim)[0];
2717 	}
2718 
2719 	/* Go through the string and quote necessary characters */
2720 	extra_len = 0;
2721 	p = in_str;
2722 	do {
2723 		c = *p;
2724 		switch(c) {
2725 			case '.':
2726 			case '\\':
2727 			case '+':
2728 			case '*':
2729 			case '?':
2730 			case '[':
2731 			case '^':
2732 			case ']':
2733 			case '$':
2734 			case '(':
2735 			case ')':
2736 			case '{':
2737 			case '}':
2738 			case '=':
2739 			case '!':
2740 			case '>':
2741 			case '<':
2742 			case '|':
2743 			case ':':
2744 			case '-':
2745 			case '#':
2746 				extra_len++;
2747 				break;
2748 
2749 			case '\0':
2750 				extra_len+=3;
2751 				break;
2752 
2753 			default:
2754 				if (c == delim_char) {
2755 					extra_len++;
2756 				}
2757 				break;
2758 		}
2759 		p++;
2760 	} while (p != in_str_end);
2761 
2762 	if (extra_len == 0) {
2763 		RETURN_STR_COPY(str);
2764 	}
2765 
2766 	/* Allocate enough memory so that even if each character
2767 	   is quoted, we won't run out of room */
2768 	out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2769 	q = ZSTR_VAL(out_str);
2770 	p = in_str;
2771 
2772 	do {
2773 		c = *p;
2774 		switch(c) {
2775 			case '.':
2776 			case '\\':
2777 			case '+':
2778 			case '*':
2779 			case '?':
2780 			case '[':
2781 			case '^':
2782 			case ']':
2783 			case '$':
2784 			case '(':
2785 			case ')':
2786 			case '{':
2787 			case '}':
2788 			case '=':
2789 			case '!':
2790 			case '>':
2791 			case '<':
2792 			case '|':
2793 			case ':':
2794 			case '-':
2795 			case '#':
2796 				*q++ = '\\';
2797 				*q++ = c;
2798 				break;
2799 
2800 			case '\0':
2801 				*q++ = '\\';
2802 				*q++ = '0';
2803 				*q++ = '0';
2804 				*q++ = '0';
2805 				break;
2806 
2807 			default:
2808 				if (c == delim_char) {
2809 					*q++ = '\\';
2810 				}
2811 				*q++ = c;
2812 				break;
2813 		}
2814 		p++;
2815 	} while (p != in_str_end);
2816 	*q = '\0';
2817 
2818 	RETURN_NEW_STR(out_str);
2819 }
2820 /* }}} */
2821 
2822 /* {{{ proto array preg_grep(string regex, array input [, int flags])
2823    Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)2824 static PHP_FUNCTION(preg_grep)
2825 {
2826 	zend_string			*regex;			/* Regular expression */
2827 	zval				*input;			/* Input array */
2828 	zend_long			 flags = 0;		/* Match control flags */
2829 	pcre_cache_entry	*pce;			/* Compiled regular expression */
2830 
2831 	/* Get arguments and do error checking */
2832 	ZEND_PARSE_PARAMETERS_START(2, 3)
2833 		Z_PARAM_STR(regex)
2834 		Z_PARAM_ARRAY(input)
2835 		Z_PARAM_OPTIONAL
2836 		Z_PARAM_LONG(flags)
2837 	ZEND_PARSE_PARAMETERS_END();
2838 
2839 	/* Compile regex or get it from cache. */
2840 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2841 		RETURN_FALSE;
2842 	}
2843 
2844 	pce->refcount++;
2845 	php_pcre_grep_impl(pce, input, return_value, flags);
2846 	pce->refcount--;
2847 }
2848 /* }}} */
2849 
php_pcre_grep_impl(pcre_cache_entry * pce,zval * input,zval * return_value,zend_long flags)2850 PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2851 {
2852 	zval            *entry;             /* An entry in the input array */
2853 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
2854 	int				 count;				/* Count of matched subpatterns */
2855 	uint32_t		 options;			/* Execution options */
2856 	zend_string		*string_key;
2857 	zend_ulong		 num_key;
2858 	zend_bool		 invert;			/* Whether to return non-matching
2859 										   entries */
2860 	pcre2_match_data *match_data;
2861 	invert = flags & PREG_GREP_INVERT ? 1 : 0;
2862 
2863 	/* Calculate the size of the offsets array, and allocate memory for it. */
2864 	num_subpats = pce->capture_count + 1;
2865 
2866 	/* Initialize return array */
2867 	array_init(return_value);
2868 
2869 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2870 
2871 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2872 		match_data = mdata;
2873 	} else {
2874 		match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
2875 		if (!match_data) {
2876 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2877 			return;
2878 		}
2879 	}
2880 
2881 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2882 
2883 	/* Go through the input array */
2884 	ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2885 		zend_string *tmp_subject_str;
2886 		zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2887 
2888 		/* Perform the match */
2889 #ifdef HAVE_PCRE_JIT_SUPPORT
2890 		if ((pce->preg_options & PREG_JIT) && options) {
2891 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2892 					PCRE2_NO_UTF_CHECK, match_data, mctx);
2893 		} else
2894 #endif
2895 		count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2896 				options, match_data, mctx);
2897 
2898 		/* If the entry fits our requirements */
2899 		if (count >= 0) {
2900 			/* Check for too many substrings condition. */
2901 			if (UNEXPECTED(count == 0)) {
2902 				php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2903 			}
2904 			if (!invert) {
2905 				Z_TRY_ADDREF_P(entry);
2906 
2907 				/* Add to return array */
2908 				if (string_key) {
2909 					zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2910 				} else {
2911 					zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2912 				}
2913 			}
2914 		} else if (count == PCRE2_ERROR_NOMATCH) {
2915 			if (invert) {
2916 				Z_TRY_ADDREF_P(entry);
2917 
2918 				/* Add to return array */
2919 				if (string_key) {
2920 					zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2921 				} else {
2922 					zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2923 				}
2924 			}
2925 		} else {
2926 			pcre_handle_exec_error(count);
2927 			zend_tmp_string_release(tmp_subject_str);
2928 			break;
2929 		}
2930 
2931 		zend_tmp_string_release(tmp_subject_str);
2932 	} ZEND_HASH_FOREACH_END();
2933 	if (match_data != mdata) {
2934 		pcre2_match_data_free(match_data);
2935 	}
2936 }
2937 /* }}} */
2938 
2939 /* {{{ proto int preg_last_error()
2940    Returns the error code of the last regexp execution. */
PHP_FUNCTION(preg_last_error)2941 static PHP_FUNCTION(preg_last_error)
2942 {
2943 	ZEND_PARSE_PARAMETERS_START(0, 0)
2944 	ZEND_PARSE_PARAMETERS_END();
2945 
2946 	RETURN_LONG(PCRE_G(error_code));
2947 }
2948 /* }}} */
2949 
2950 /* {{{ module definition structures */
2951 
2952 /* {{{ arginfo */
2953 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
2954     ZEND_ARG_INFO(0, pattern)
2955     ZEND_ARG_INFO(0, subject)
2956     ZEND_ARG_INFO(1, subpatterns) /* array */
2957     ZEND_ARG_INFO(0, flags)
2958     ZEND_ARG_INFO(0, offset)
2959 ZEND_END_ARG_INFO()
2960 
2961 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
2962     ZEND_ARG_INFO(0, pattern)
2963     ZEND_ARG_INFO(0, subject)
2964     ZEND_ARG_INFO(1, subpatterns) /* array */
2965     ZEND_ARG_INFO(0, flags)
2966     ZEND_ARG_INFO(0, offset)
2967 ZEND_END_ARG_INFO()
2968 
2969 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
2970     ZEND_ARG_INFO(0, regex)
2971     ZEND_ARG_INFO(0, replace)
2972     ZEND_ARG_INFO(0, subject)
2973     ZEND_ARG_INFO(0, limit)
2974     ZEND_ARG_INFO(1, count)
2975 ZEND_END_ARG_INFO()
2976 
2977 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
2978     ZEND_ARG_INFO(0, regex)
2979     ZEND_ARG_INFO(0, callback)
2980     ZEND_ARG_INFO(0, subject)
2981     ZEND_ARG_INFO(0, limit)
2982     ZEND_ARG_INFO(1, count)
2983 ZEND_END_ARG_INFO()
2984 
2985 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
2986     ZEND_ARG_INFO(0, pattern)
2987     ZEND_ARG_INFO(0, subject)
2988     ZEND_ARG_INFO(0, limit)
2989     ZEND_ARG_INFO(1, count)
2990 ZEND_END_ARG_INFO()
2991 
2992 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
2993     ZEND_ARG_INFO(0, pattern)
2994     ZEND_ARG_INFO(0, subject)
2995     ZEND_ARG_INFO(0, limit)
2996     ZEND_ARG_INFO(0, flags)
2997 ZEND_END_ARG_INFO()
2998 
2999 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
3000     ZEND_ARG_INFO(0, str)
3001     ZEND_ARG_INFO(0, delim_char)
3002 ZEND_END_ARG_INFO()
3003 
3004 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
3005     ZEND_ARG_INFO(0, regex)
3006     ZEND_ARG_INFO(0, input) /* array */
3007     ZEND_ARG_INFO(0, flags)
3008 ZEND_END_ARG_INFO()
3009 
3010 ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
3011 ZEND_END_ARG_INFO()
3012 /* }}} */
3013 
3014 static const zend_function_entry pcre_functions[] = {
3015 	PHP_FE(preg_match,					arginfo_preg_match)
3016 	PHP_FE(preg_match_all,				arginfo_preg_match_all)
3017 	PHP_FE(preg_replace,				arginfo_preg_replace)
3018 	PHP_FE(preg_replace_callback,		arginfo_preg_replace_callback)
3019 	PHP_FE(preg_replace_callback_array,	arginfo_preg_replace_callback_array)
3020 	PHP_FE(preg_filter,					arginfo_preg_replace)
3021 	PHP_FE(preg_split,					arginfo_preg_split)
3022 	PHP_FE(preg_quote,					arginfo_preg_quote)
3023 	PHP_FE(preg_grep,					arginfo_preg_grep)
3024 	PHP_FE(preg_last_error,				arginfo_preg_last_error)
3025 	PHP_FE_END
3026 };
3027 
3028 zend_module_entry pcre_module_entry = {
3029 	STANDARD_MODULE_HEADER,
3030    "pcre",
3031 	pcre_functions,
3032 	PHP_MINIT(pcre),
3033 	PHP_MSHUTDOWN(pcre),
3034 #ifdef HAVE_PCRE_JIT_SUPPORT
3035 	PHP_RINIT(pcre),
3036 #else
3037 	NULL,
3038 #endif
3039 	NULL,
3040 	PHP_MINFO(pcre),
3041 	PHP_PCRE_VERSION,
3042 	PHP_MODULE_GLOBALS(pcre),
3043 	PHP_GINIT(pcre),
3044 	PHP_GSHUTDOWN(pcre),
3045 	NULL,
3046 	STANDARD_MODULE_PROPERTIES_EX
3047 };
3048 
3049 #ifdef COMPILE_DL_PCRE
ZEND_GET_MODULE(pcre)3050 ZEND_GET_MODULE(pcre)
3051 #endif
3052 
3053 /* }}} */
3054 
3055 PHPAPI pcre2_match_context *php_pcre_mctx(void)
3056 {/*{{{*/
3057 	return mctx;
3058 }/*}}}*/
3059 
php_pcre_gctx(void)3060 PHPAPI pcre2_general_context *php_pcre_gctx(void)
3061 {/*{{{*/
3062 	return gctx;
3063 }/*}}}*/
3064 
php_pcre_cctx(void)3065 PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3066 {/*{{{*/
3067 	return cctx;
3068 }/*}}}*/
3069 
php_pcre_pce_incref(pcre_cache_entry * pce)3070 PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3071 {/*{{{*/
3072 	assert(NULL != pce);
3073 	pce->refcount++;
3074 }/*}}}*/
3075 
php_pcre_pce_decref(pcre_cache_entry * pce)3076 PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3077 {/*{{{*/
3078 	assert(NULL != pce);
3079 	assert(0 != pce->refcount);
3080 	pce->refcount--;
3081 }/*}}}*/
3082 
php_pcre_pce_re(pcre_cache_entry * pce)3083 PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3084 {/*{{{*/
3085 	assert(NULL != pce);
3086 	return pce->re;
3087 }/*}}}*/
3088 
3089 #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
3090 
3091 /*
3092  * Local variables:
3093  * tab-width: 4
3094  * c-basic-offset: 4
3095  * End:
3096  * vim600: sw=4 ts=4 fdm=marker
3097  * vim<600: sw=4 ts=4
3098  */
3099