xref: /PHP-8.4/ext/pcre/php_pcre.c (revision c4bb0755)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | https://www.php.net/license/3_01.txt                                 |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Author: Andrei Zmievski <andrei@php.net>                             |
14    +----------------------------------------------------------------------+
15  */
16 
17 #include "php.h"
18 #include "php_ini.h"
19 #include "php_pcre.h"
20 #include "ext/standard/info.h"
21 #include "ext/standard/basic_functions.h"
22 #include "zend_smart_str.h"
23 #include "SAPI.h"
24 
25 #define PREG_PATTERN_ORDER			1
26 #define PREG_SET_ORDER				2
27 #define PREG_OFFSET_CAPTURE			(1<<8)
28 #define PREG_UNMATCHED_AS_NULL		(1<<9)
29 
30 #define	PREG_SPLIT_NO_EMPTY			(1<<0)
31 #define PREG_SPLIT_DELIM_CAPTURE	(1<<1)
32 #define PREG_SPLIT_OFFSET_CAPTURE	(1<<2)
33 
34 #define PREG_GREP_INVERT			(1<<0)
35 
36 #define PREG_JIT                    (1<<3)
37 
38 #define PCRE_CACHE_SIZE 4096
39 
40 #ifdef HAVE_PCRE_JIT_SUPPORT
41 #define PHP_PCRE_JIT_SUPPORT 1
42 #else
43 #define PHP_PCRE_JIT_SUPPORT 0
44 #endif
45 
46 char *php_pcre_version;
47 
48 #include "php_pcre_arginfo.h"
49 
50 struct _pcre_cache_entry {
51 	pcre2_code *re;
52 	/* Pointer is not NULL when there are named captures.
53 	 * Length is equal to capture_count + 1 to account for capture group 0. */
54 	zend_string **subpats_table;
55 	uint32_t preg_options;
56 	uint32_t capture_count;
57 	uint32_t compile_options;
58 	uint32_t refcount;
59 };
60 
61 PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
62 
63 #ifdef HAVE_PCRE_JIT_SUPPORT
64 #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
65 #define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
66 ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
67 #endif
68 /* General context using (infallible) system allocator. */
69 ZEND_TLS pcre2_general_context *gctx = NULL;
70 /* These two are global per thread for now. Though it is possible to use these
71  	per pattern. Either one can copy it and use in pce, or one does no global
72 	contexts at all, but creates for every pce. */
73 ZEND_TLS pcre2_compile_context *cctx = NULL;
74 ZEND_TLS pcre2_match_context   *mctx = NULL;
75 ZEND_TLS pcre2_match_data      *mdata = NULL;
76 ZEND_TLS bool              mdata_used = 0;
77 ZEND_TLS uint8_t pcre2_init_ok = 0;
78 #if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
79 static MUTEX_T pcre_mt = NULL;
80 #define php_pcre_mutex_alloc() \
81 	if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
82 #define php_pcre_mutex_free() \
83 	if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
84 #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
85 #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
86 #else
87 #define php_pcre_mutex_alloc()
88 #define php_pcre_mutex_free()
89 #define php_pcre_mutex_lock()
90 #define php_pcre_mutex_unlock()
91 #endif
92 
93 ZEND_TLS HashTable char_tables;
94 
95 static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats);
96 
php_pcre_free_char_table(zval * data)97 static void php_pcre_free_char_table(zval *data)
98 {/*{{{*/
99 	void *ptr = Z_PTR_P(data);
100 	pefree(ptr, 1);
101 }/*}}}*/
102 
pcre_handle_exec_error(int pcre_code)103 static void pcre_handle_exec_error(int pcre_code) /* {{{ */
104 {
105 	int preg_code = 0;
106 
107 	switch (pcre_code) {
108 		case PCRE2_ERROR_MATCHLIMIT:
109 			preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
110 			break;
111 
112 		case PCRE2_ERROR_RECURSIONLIMIT:
113 			preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
114 			break;
115 
116 		case PCRE2_ERROR_BADUTFOFFSET:
117 			preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
118 			break;
119 
120 #ifdef HAVE_PCRE_JIT_SUPPORT
121 		case PCRE2_ERROR_JIT_STACKLIMIT:
122 			preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
123 			break;
124 #endif
125 
126 		default:
127 			if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
128 				preg_code = PHP_PCRE_BAD_UTF8_ERROR;
129 			} else  {
130 				preg_code = PHP_PCRE_INTERNAL_ERROR;
131 			}
132 			break;
133 	}
134 
135 	PCRE_G(error_code) = preg_code;
136 }
137 /* }}} */
138 
php_pcre_get_error_msg(php_pcre_error_code error_code)139 static const char *php_pcre_get_error_msg(php_pcre_error_code error_code) /* {{{ */
140 {
141 	switch (error_code) {
142 		case PHP_PCRE_NO_ERROR:
143 			return "No error";
144 		case PHP_PCRE_INTERNAL_ERROR:
145 			return "Internal error";
146 		case PHP_PCRE_BAD_UTF8_ERROR:
147 			return "Malformed UTF-8 characters, possibly incorrectly encoded";
148 		case PHP_PCRE_BAD_UTF8_OFFSET_ERROR:
149 			return "The offset did not correspond to the beginning of a valid UTF-8 code point";
150 		case PHP_PCRE_BACKTRACK_LIMIT_ERROR:
151 			return "Backtrack limit exhausted";
152 		case PHP_PCRE_RECURSION_LIMIT_ERROR:
153 			return "Recursion limit exhausted";
154 
155 #ifdef HAVE_PCRE_JIT_SUPPORT
156 		case PHP_PCRE_JIT_STACKLIMIT_ERROR:
157 			return "JIT stack limit exhausted";
158 #endif
159 
160 		default:
161 			return "Unknown error";
162 	}
163 }
164 /* }}} */
165 
php_free_pcre_cache(zval * data)166 static void php_free_pcre_cache(zval *data) /* {{{ */
167 {
168 	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
169 	if (!pce) return;
170 	if (pce->subpats_table) {
171 		free_subpats_table(pce->subpats_table, pce->capture_count + 1);
172 	}
173 	pcre2_code_free(pce->re);
174 	free(pce);
175 }
176 /* }}} */
177 
php_pcre_malloc(PCRE2_SIZE size,void * data)178 static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
179 {
180 	return pemalloc(size, 1);
181 }
182 
php_pcre_free(void * block,void * data)183 static void php_pcre_free(void *block, void *data)
184 {
185 	pefree(block, 1);
186 }
187 
php_pcre_emalloc(PCRE2_SIZE size,void * data)188 static void *php_pcre_emalloc(PCRE2_SIZE size, void *data)
189 {
190 	return emalloc(size);
191 }
192 
php_pcre_efree(void * block,void * data)193 static void php_pcre_efree(void *block, void *data)
194 {
195 	efree(block);
196 }
197 
198 #ifdef PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
199 	/* pcre 10.38 needs PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, disabled by default */
200 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
201 #else
202 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS 0
203 #endif
204 
205 #define PHP_PCRE_PREALLOC_MDATA_SIZE 32
206 
php_pcre_init_pcre2(uint8_t jit)207 static void php_pcre_init_pcre2(uint8_t jit)
208 {/*{{{*/
209 	if (!gctx) {
210 		gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
211 		if (!gctx) {
212 			pcre2_init_ok = 0;
213 			return;
214 		}
215 	}
216 
217 	if (!cctx) {
218 		cctx = pcre2_compile_context_create(gctx);
219 		if (!cctx) {
220 			pcre2_init_ok = 0;
221 			return;
222 		}
223 	}
224 
225 	pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
226 
227 	if (!mctx) {
228 		mctx = pcre2_match_context_create(gctx);
229 		if (!mctx) {
230 			pcre2_init_ok = 0;
231 			return;
232 		}
233 	}
234 
235 #ifdef HAVE_PCRE_JIT_SUPPORT
236 	if (jit && !jit_stack) {
237 		jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
238 		if (!jit_stack) {
239 			pcre2_init_ok = 0;
240 			return;
241 		}
242 	}
243 #endif
244 
245 	if (!mdata) {
246 		mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
247 		if (!mdata) {
248 			pcre2_init_ok = 0;
249 			return;
250 		}
251 	}
252 
253 	pcre2_init_ok = 1;
254 }/*}}}*/
255 
php_pcre_shutdown_pcre2(void)256 static void php_pcre_shutdown_pcre2(void)
257 {/*{{{*/
258 	if (gctx) {
259 		pcre2_general_context_free(gctx);
260 		gctx = NULL;
261 	}
262 
263 	if (cctx) {
264 		pcre2_compile_context_free(cctx);
265 		cctx = NULL;
266 	}
267 
268 	if (mctx) {
269 		pcre2_match_context_free(mctx);
270 		mctx = NULL;
271 	}
272 
273 #ifdef HAVE_PCRE_JIT_SUPPORT
274 	/* Stack may only be destroyed when no cached patterns
275 	 	possibly associated with it do exist. */
276 	if (jit_stack) {
277 		pcre2_jit_stack_free(jit_stack);
278 		jit_stack = NULL;
279 	}
280 #endif
281 
282 	if (mdata) {
283 		pcre2_match_data_free(mdata);
284 		mdata = NULL;
285 	}
286 
287 	pcre2_init_ok = 0;
288 }/*}}}*/
289 
PHP_GINIT_FUNCTION(pcre)290 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
291 {
292 	php_pcre_mutex_alloc();
293 
294 	zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
295 
296 	pcre_globals->backtrack_limit = 0;
297 	pcre_globals->recursion_limit = 0;
298 	pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
299 	ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
300 	ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
301 #ifdef HAVE_PCRE_JIT_SUPPORT
302 	pcre_globals->jit = 1;
303 #endif
304 
305 	php_pcre_init_pcre2(1);
306 	zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
307 }
308 /* }}} */
309 
PHP_GSHUTDOWN_FUNCTION(pcre)310 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
311 {
312 	zend_hash_destroy(&pcre_globals->pcre_cache);
313 
314 	php_pcre_shutdown_pcre2();
315 	zend_hash_destroy(&char_tables);
316 	php_pcre_mutex_free();
317 }
318 /* }}} */
319 
PHP_INI_MH(OnUpdateBacktrackLimit)320 static PHP_INI_MH(OnUpdateBacktrackLimit)
321 {/*{{{*/
322 	OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
323 	if (mctx) {
324 		pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
325 	}
326 
327 	return SUCCESS;
328 }/*}}}*/
329 
PHP_INI_MH(OnUpdateRecursionLimit)330 static PHP_INI_MH(OnUpdateRecursionLimit)
331 {/*{{{*/
332 	OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
333 	if (mctx) {
334 		pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
335 	}
336 
337 	return SUCCESS;
338 }/*}}}*/
339 
340 #ifdef HAVE_PCRE_JIT_SUPPORT
PHP_INI_MH(OnUpdateJit)341 static PHP_INI_MH(OnUpdateJit)
342 {/*{{{*/
343 	OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
344 	if (PCRE_G(jit) && jit_stack) {
345 		pcre2_jit_stack_assign(mctx, NULL, jit_stack);
346 	} else {
347 		pcre2_jit_stack_assign(mctx, NULL, NULL);
348 	}
349 
350 	return SUCCESS;
351 }/*}}}*/
352 #endif
353 
354 PHP_INI_BEGIN()
355 	STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
356 	STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
357 #ifdef HAVE_PCRE_JIT_SUPPORT
358 	STD_PHP_INI_BOOLEAN("pcre.jit",           "1",       PHP_INI_ALL, OnUpdateJit,            jit,             zend_pcre_globals, pcre_globals)
359 #endif
PHP_INI_END()360 PHP_INI_END()
361 
362 static char *_pcre2_config_str(uint32_t what)
363 {/*{{{*/
364 	int len = pcre2_config(what, NULL);
365 	char *ret = (char *) malloc(len + 1);
366 
367 	len = pcre2_config(what, ret);
368 	if (!len) {
369 		free(ret);
370 		return NULL;
371 	}
372 
373 	return ret;
374 }/*}}}*/
375 
376 /* {{{ PHP_MINFO_FUNCTION(pcre) */
PHP_MINFO_FUNCTION(pcre)377 static PHP_MINFO_FUNCTION(pcre)
378 {
379 #ifdef HAVE_PCRE_JIT_SUPPORT
380 	uint32_t flag = 0;
381 	char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
382 #endif
383 	char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
384 	char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
385 
386 	php_info_print_table_start();
387 	php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
388 	php_info_print_table_row(2, "PCRE Library Version", version);
389 	free(version);
390 	php_info_print_table_row(2, "PCRE Unicode Version", unicode);
391 	free(unicode);
392 
393 #ifdef HAVE_PCRE_JIT_SUPPORT
394 	if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
395 		php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
396 	} else {
397 		php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
398 	}
399 	if (jit_target) {
400 		php_info_print_table_row(2, "PCRE JIT Target", jit_target);
401 	}
402 	free(jit_target);
403 #else
404 	php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
405 #endif
406 
407 #ifdef HAVE_PCRE_VALGRIND_SUPPORT
408 	php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
409 #endif
410 
411 	php_info_print_table_end();
412 
413 	DISPLAY_INI_ENTRIES();
414 }
415 /* }}} */
416 
417 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTION(pcre)418 static PHP_MINIT_FUNCTION(pcre)
419 {
420 #ifdef HAVE_PCRE_JIT_SUPPORT
421 	if (UNEXPECTED(!pcre2_init_ok)) {
422 		/* Retry. */
423 		php_pcre_init_pcre2(PCRE_G(jit));
424 		if (!pcre2_init_ok) {
425 			return FAILURE;
426 		}
427 	}
428 #endif
429 
430 	REGISTER_INI_ENTRIES();
431 
432 	php_pcre_version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
433 
434 	register_php_pcre_symbols(module_number);
435 
436 	return SUCCESS;
437 }
438 /* }}} */
439 
440 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTION(pcre)441 static PHP_MSHUTDOWN_FUNCTION(pcre)
442 {
443 	UNREGISTER_INI_ENTRIES();
444 
445 	free(php_pcre_version);
446 
447 	return SUCCESS;
448 }
449 /* }}} */
450 
451 /* {{{ PHP_RINIT_FUNCTION(pcre) */
PHP_RINIT_FUNCTION(pcre)452 static PHP_RINIT_FUNCTION(pcre)
453 {
454 #ifdef HAVE_PCRE_JIT_SUPPORT
455 	if (UNEXPECTED(!pcre2_init_ok)) {
456 		/* Retry. */
457 		php_pcre_mutex_lock();
458 		php_pcre_init_pcre2(PCRE_G(jit));
459 		if (!pcre2_init_ok) {
460 			php_pcre_mutex_unlock();
461 			return FAILURE;
462 		}
463 		php_pcre_mutex_unlock();
464 	}
465 
466 	mdata_used = 0;
467 #endif
468 
469 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
470 	PCRE_G(gctx_zmm) = pcre2_general_context_create(php_pcre_emalloc, php_pcre_efree, NULL);
471 	if (!PCRE_G(gctx_zmm)) {
472 		return FAILURE;
473 	}
474 
475 	return SUCCESS;
476 }
477 /* }}} */
478 
PHP_RSHUTDOWN_FUNCTION(pcre)479 static PHP_RSHUTDOWN_FUNCTION(pcre)
480 {
481 	pcre2_general_context_free(PCRE_G(gctx_zmm));
482 	PCRE_G(gctx_zmm) = NULL;
483 
484 	zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
485 	zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
486 	ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
487 	ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
488 	return SUCCESS;
489 }
490 
491 /* {{{ static pcre_clean_cache */
pcre_clean_cache(zval * data,void * arg)492 static int pcre_clean_cache(zval *data, void *arg)
493 {
494 	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
495 	int *num_clean = (int *)arg;
496 
497 	if (!pce->refcount) {
498 		if (--(*num_clean) == 0) {
499 			return ZEND_HASH_APPLY_REMOVE|ZEND_HASH_APPLY_STOP;
500 		}
501 		return ZEND_HASH_APPLY_REMOVE;
502 	} else {
503 		return ZEND_HASH_APPLY_KEEP;
504 	}
505 }
506 /* }}} */
507 
free_subpats_table(zend_string ** subpat_names,uint32_t num_subpats)508 static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
509 	uint32_t i;
510 	for (i = 0; i < num_subpats; i++) {
511 		if (subpat_names[i]) {
512 			zend_string_release_ex(subpat_names[i], true);
513 		}
514 	}
515 	pefree(subpat_names, true);
516 }
517 
518 /* {{{ static make_subpats_table */
make_subpats_table(uint32_t name_cnt,pcre_cache_entry * pce)519 static zend_string **make_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce)
520 {
521 	uint32_t num_subpats = pce->capture_count + 1;
522 	uint32_t name_size, ni = 0;
523 	char *name_table;
524 	zend_string **subpat_names;
525 	int rc1, rc2;
526 
527 	rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
528 	rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
529 	if (rc1 < 0 || rc2 < 0) {
530 		php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
531 		return NULL;
532 	}
533 
534 	subpat_names = pecalloc(num_subpats, sizeof(zend_string *), true);
535 	while (ni++ < name_cnt) {
536 		unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
537 		const char *name = name_table + 2;
538 		/* Note: this makes a persistent string when the cache is not request-based because the string
539 		 * has to outlive the request. In that case, they will only be used within this thread
540 		 * and never be shared.
541 		 * Although we will be storing them in user-exposed arrays, they cannot cause problems
542 		 * because they only live in this thread and the last reference is deleted on shutdown
543 		 * instead of by user code. */
544 		subpat_names[name_idx] = zend_string_init(name, strlen(name), true);
545 		GC_MAKE_PERSISTENT_LOCAL(subpat_names[name_idx]);
546 		name_table += name_size;
547 	}
548 	return subpat_names;
549 }
550 /* }}} */
551 
552 /* {{{ static calculate_unit_length */
553 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
calculate_unit_length(pcre_cache_entry * pce,const char * start)554 static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
555 {
556 	size_t unit_len;
557 
558 	if (pce->compile_options & PCRE2_UTF) {
559 		const char *end = start;
560 
561 		/* skip continuation bytes */
562 		while ((*++end & 0xC0) == 0x80);
563 		unit_len = end - start;
564 	} else {
565 		unit_len = 1;
566 	}
567 	return unit_len;
568 }
569 /* }}} */
570 
571 /* {{{ pcre_get_compiled_regex_cache */
pcre_get_compiled_regex_cache_ex(zend_string * regex,bool locale_aware)572 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bool locale_aware)
573 {
574 	pcre2_code			*re = NULL;
575 #if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR && !defined(HAVE_BUNDLED_PCRE)
576 	uint32_t			 coptions = PCRE2_NO_START_OPTIMIZE;
577 #else
578 	uint32_t			 coptions = 0;
579 #endif
580 	uint32_t			 eoptions = PHP_PCRE_DEFAULT_EXTRA_COPTIONS;
581 	PCRE2_UCHAR	         error[128];
582 	PCRE2_SIZE           erroffset;
583 	int                  errnumber;
584 	char				 delimiter;
585 	char				 start_delimiter;
586 	char				 end_delimiter;
587 	char				*p, *pp;
588 	char				*pattern;
589 	size_t				 pattern_len;
590 	uint32_t			 poptions = 0;
591 	const uint8_t       *tables = NULL;
592 	zval                *zv;
593 	pcre_cache_entry	 new_entry;
594 	int					 rc;
595 	zend_string 		*key;
596 	pcre_cache_entry	*ret;
597 
598 	if (locale_aware && BG(ctype_string)) {
599 		key = zend_string_concat2(
600 			ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)),
601 			ZSTR_VAL(regex), ZSTR_LEN(regex));
602 	} else {
603 		key = regex;
604 	}
605 
606 	/* Try to lookup the cached regex entry, and if successful, just pass
607 	   back the compiled pattern, otherwise go on and compile it. */
608 	zv = zend_hash_find(&PCRE_G(pcre_cache), key);
609 	if (zv) {
610 		if (key != regex) {
611 			zend_string_release_ex(key, 0);
612 		}
613 		return (pcre_cache_entry*)Z_PTR_P(zv);
614 	}
615 
616 	p = ZSTR_VAL(regex);
617 	const char* end_p = ZSTR_VAL(regex) + ZSTR_LEN(regex);
618 
619 	/* Parse through the leading whitespace, and display a warning if we
620 	   get to the end without encountering a delimiter. */
621 	while (isspace((int)*(unsigned char *)p)) p++;
622 	if (p >= end_p) {
623 		if (key != regex) {
624 			zend_string_release_ex(key, 0);
625 		}
626 		php_error_docref(NULL, E_WARNING, "Empty regular expression");
627 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
628 		return NULL;
629 	}
630 
631 	/* Get the delimiter and display a warning if it is alphanumeric
632 	   or a backslash. */
633 	delimiter = *p++;
634 	if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\' || delimiter == '\0') {
635 		if (key != regex) {
636 			zend_string_release_ex(key, 0);
637 		}
638 		php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric, backslash, or NUL byte");
639 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
640 		return NULL;
641 	}
642 
643 	start_delimiter = delimiter;
644 	if ((pp = strchr("([{< )]}> )]}>", delimiter)))
645 		delimiter = pp[5];
646 	end_delimiter = delimiter;
647 
648 	pp = p;
649 
650 	if (start_delimiter == end_delimiter) {
651 		/* We need to iterate through the pattern, searching for the ending delimiter,
652 		   but skipping the backslashed delimiters.  If the ending delimiter is not
653 		   found, display a warning. */
654 		while (pp < end_p) {
655 			if (*pp == '\\' && pp + 1 < end_p) pp++;
656 			else if (*pp == delimiter)
657 				break;
658 			pp++;
659 		}
660 	} else {
661 		/* We iterate through the pattern, searching for the matching ending
662 		 * delimiter. For each matching starting delimiter, we increment nesting
663 		 * level, and decrement it for each matching ending delimiter. If we
664 		 * reach the end of the pattern without matching, display a warning.
665 		 */
666 		int brackets = 1; 	/* brackets nesting level */
667 		while (pp < end_p) {
668 			if (*pp == '\\' && pp + 1 < end_p) pp++;
669 			else if (*pp == end_delimiter && --brackets <= 0)
670 				break;
671 			else if (*pp == start_delimiter)
672 				brackets++;
673 			pp++;
674 		}
675 	}
676 
677 	if (pp >= end_p) {
678 		if (key != regex) {
679 			zend_string_release_ex(key, 0);
680 		}
681 		if (start_delimiter == end_delimiter) {
682 			php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
683 		} else {
684 			php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
685 		}
686 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
687 		return NULL;
688 	}
689 
690 	/* Make a copy of the actual pattern. */
691 	pattern_len = pp - p;
692 	pattern = estrndup(p, pattern_len);
693 
694 	/* Move on to the options */
695 	pp++;
696 
697 	/* Parse through the options, setting appropriate flags.  Display
698 	   a warning if we encounter an unknown modifier. */
699 	while (pp < end_p) {
700 		switch (*pp++) {
701 			/* Perl compatible options */
702 			case 'i':	coptions |= PCRE2_CASELESS;		break;
703 			case 'm':	coptions |= PCRE2_MULTILINE;		break;
704 			case 'n':	coptions |= PCRE2_NO_AUTO_CAPTURE;	break;
705 			case 's':	coptions |= PCRE2_DOTALL;		break;
706 			case 'x':	coptions |= PCRE2_EXTENDED;		break;
707 
708 			/* PCRE specific options */
709 			case 'A':	coptions |= PCRE2_ANCHORED;		break;
710 			case 'D':	coptions |= PCRE2_DOLLAR_ENDONLY;break;
711 #ifdef PCRE2_EXTRA_CASELESS_RESTRICT
712 			case 'r':	eoptions |= PCRE2_EXTRA_CASELESS_RESTRICT; break;
713 #endif
714 			case 'S':	/* Pass. */					break;
715 			case 'X':	/* Pass. */					break;
716 			case 'U':	coptions |= PCRE2_UNGREEDY;		break;
717 			case 'u':	coptions |= PCRE2_UTF;
718 	/* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
719 	   characters, even in UTF-8 mode. However, this can be changed by setting
720 	   the PCRE2_UCP option. */
721 #ifdef PCRE2_UCP
722 						coptions |= PCRE2_UCP;
723 #endif
724 				break;
725 			case 'J':	coptions |= PCRE2_DUPNAMES;		break;
726 
727 			case ' ':
728 			case '\n':
729 			case '\r':
730 				break;
731 
732 			case 'e': /* legacy eval */
733 			default:
734 				if (pp[-1]) {
735 					php_error_docref(NULL, E_WARNING, "Unknown modifier '%c'", pp[-1]);
736 				} else {
737 					php_error_docref(NULL, E_WARNING, "NUL byte is not a valid modifier");
738 				}
739 				pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
740 				efree(pattern);
741 				if (key != regex) {
742 					zend_string_release_ex(key, 0);
743 				}
744 				return NULL;
745 		}
746 	}
747 
748 	if (key != regex) {
749 		tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(ctype_string));
750 		if (!tables) {
751 			zend_string *_k;
752 			tables = pcre2_maketables(gctx);
753 			if (UNEXPECTED(!tables)) {
754 				php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
755 				pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
756 				zend_string_release_ex(key, 0);
757 				efree(pattern);
758 				return NULL;
759 			}
760 			_k = zend_string_init(ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)), 1);
761 			GC_MAKE_PERSISTENT_LOCAL(_k);
762 			zend_hash_add_ptr(&char_tables, _k, (void *)tables);
763 			zend_string_release(_k);
764 		}
765 	}
766 	pcre2_set_character_tables(cctx, tables);
767 
768 	pcre2_set_compile_extra_options(cctx, eoptions);
769 
770 	/* Compile pattern and display a warning if compilation failed. */
771 	re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
772 
773 	if (re == NULL) {
774 		if (key != regex) {
775 			zend_string_release_ex(key, 0);
776 		}
777 		pcre2_get_error_message(errnumber, error, sizeof(error));
778 		php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
779 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
780 		efree(pattern);
781 		return NULL;
782 	}
783 
784 #ifdef HAVE_PCRE_JIT_SUPPORT
785 	if (PCRE_G(jit)) {
786 		/* Enable PCRE JIT compiler */
787 		rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
788 		if (EXPECTED(rc >= 0)) {
789 			size_t jit_size = 0;
790 			if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
791 				poptions |= PREG_JIT;
792 			}
793 		} else if (rc == PCRE2_ERROR_NOMEMORY) {
794 			php_error_docref(NULL, E_WARNING,
795 				"Allocation of JIT memory failed, PCRE JIT will be disabled. "
796 				"This is likely caused by security restrictions. "
797 				"Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
798 			PCRE_G(jit) = 0;
799 		} else {
800 			pcre2_get_error_message(rc, error, sizeof(error));
801 			php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
802 			pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
803 		}
804 	}
805 #endif
806 	efree(pattern);
807 
808 	/*
809 	 * If we reached cache limit, clean out the items from the head of the list;
810 	 * these are supposedly the oldest ones (but not necessarily the least used
811 	 * ones).
812 	 */
813 	if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
814 		int num_clean = PCRE_CACHE_SIZE / 8;
815 		zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
816 	}
817 
818 	/* Store the compiled pattern and extra info in the cache. */
819 	new_entry.re = re;
820 	new_entry.preg_options = poptions;
821 	new_entry.compile_options = coptions;
822 	new_entry.refcount = 0;
823 
824 	rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
825 	if (rc < 0) {
826 		if (key != regex) {
827 			zend_string_release_ex(key, 0);
828 		}
829 		php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
830 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
831 		return NULL;
832 	}
833 
834 	uint32_t name_count;
835 	rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &name_count);
836 	if (rc < 0) {
837 		if (key != regex) {
838 			zend_string_release_ex(key, 0);
839 		}
840 		php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
841 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
842 		return NULL;
843 	}
844 
845 	/* Compute and cache the subpattern table to avoid computing it again over and over. */
846 	if (name_count > 0) {
847 		new_entry.subpats_table = make_subpats_table(name_count, &new_entry);
848 		if (!new_entry.subpats_table) {
849 			if (key != regex) {
850 				zend_string_release_ex(key, false);
851 			}
852 			/* Warning already emitted by make_subpats_table() */
853 			pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
854 			return NULL;
855 		}
856 	} else {
857 		new_entry.subpats_table = NULL;
858 	}
859 
860 	/*
861 	 * Interned strings are not duplicated when stored in HashTable,
862 	 * but all the interned strings created during HTTP request are removed
863 	 * at end of request. However PCRE_G(pcre_cache) must be consistent
864 	 * on the next request as well. So we disable usage of interned strings
865 	 * as hash keys especually for this table.
866 	 * See bug #63180
867 	 */
868 	if (!(GC_FLAGS(key) & IS_STR_PERMANENT)) {
869 		zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
870 		GC_MAKE_PERSISTENT_LOCAL(str);
871 
872 		ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
873 		zend_string_release(str);
874 	} else {
875 		ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
876 	}
877 
878 	if (key != regex) {
879 		zend_string_release_ex(key, 0);
880 	}
881 
882 	return ret;
883 }
884 /* }}} */
885 
886 /* {{{ pcre_get_compiled_regex_cache */
pcre_get_compiled_regex_cache(zend_string * regex)887 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
888 {
889 	return pcre_get_compiled_regex_cache_ex(regex, true);
890 }
891 /* }}} */
892 
893 /* {{{ pcre_get_compiled_regex */
pcre_get_compiled_regex(zend_string * regex,uint32_t * capture_count)894 PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
895 {
896 	pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
897 
898 	if (capture_count) {
899 		*capture_count = pce ? pce->capture_count : 0;
900 	}
901 
902 	return pce ? pce->re : NULL;
903 }
904 /* }}} */
905 
906 /* XXX For the cases where it's only about match yes/no and no capture
907 		required, perhaps just a minimum sized data would suffice. */
php_pcre_create_match_data(uint32_t capture_count,pcre2_code * re)908 PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
909 {/*{{{*/
910 
911 	assert(NULL != re);
912 
913 	if (EXPECTED(!mdata_used)) {
914 		int rc = 0;
915 
916 		if (!capture_count) {
917 			/* As we deal with a non cached pattern, no other way to gather this info. */
918 			rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
919 		}
920 
921 		if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
922 			mdata_used = 1;
923 			return mdata;
924 		}
925 	}
926 
927 	return pcre2_match_data_create_from_pattern(re, gctx);
928 }/*}}}*/
929 
php_pcre_free_match_data(pcre2_match_data * match_data)930 PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
931 {/*{{{*/
932 	if (UNEXPECTED(match_data != mdata)) {
933 		pcre2_match_data_free(match_data);
934 	} else {
935 		mdata_used = 0;
936 	}
937 }/*}}}*/
938 
init_unmatched_null_pair(zval * pair)939 static void init_unmatched_null_pair(zval *pair) {
940 	zval val1, val2;
941 	ZVAL_NULL(&val1);
942 	ZVAL_LONG(&val2, -1);
943 	ZVAL_ARR(pair, zend_new_pair(&val1, &val2));
944 }
945 
init_unmatched_empty_pair(zval * pair)946 static void init_unmatched_empty_pair(zval *pair) {
947 	zval val1, val2;
948 	ZVAL_EMPTY_STRING(&val1);
949 	ZVAL_LONG(&val2, -1);
950 	ZVAL_ARR(pair, zend_new_pair(&val1, &val2));
951 }
952 
populate_match_value_str(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset)953 static zend_always_inline void populate_match_value_str(
954 		zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
955 	ZVAL_STRINGL_FAST(val, subject + start_offset, end_offset - start_offset);
956 }
957 
populate_match_value(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,bool unmatched_as_null)958 static zend_always_inline void populate_match_value(
959 		zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
960 		bool unmatched_as_null) {
961 	if (PCRE2_UNSET == start_offset) {
962 		if (unmatched_as_null) {
963 			ZVAL_NULL(val);
964 		} else {
965 			ZVAL_EMPTY_STRING(val);
966 		}
967 	} else {
968 		populate_match_value_str(val, subject, start_offset, end_offset);
969 	}
970 }
971 
add_named(HashTable * const subpats,zend_string * name,zval * val,bool unmatched)972 static inline void add_named(
973 		HashTable *const subpats, zend_string *name, zval *val, bool unmatched) {
974 	/* If the DUPNAMES option is used, multiple subpatterns might have the same name.
975 	 * In this case we want to preserve the one that actually has a value. */
976 	if (!unmatched) {
977 		zend_hash_update(subpats, name, val);
978 	} else {
979 		if (!zend_hash_add(subpats, name, val)) {
980 			return;
981 		}
982 	}
983 	Z_TRY_ADDREF_P(val);
984 }
985 
986 /* {{{ add_offset_pair */
add_offset_pair(HashTable * const result,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,zend_string * name,zend_long unmatched_as_null)987 static inline void add_offset_pair(
988 		HashTable *const result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
989 		zend_string *name, zend_long unmatched_as_null)
990 {
991 	zval match_pair;
992 
993 	/* Add (match, offset) to the return value */
994 	if (PCRE2_UNSET == start_offset) {
995 		if (unmatched_as_null) {
996 			do {
997 				if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
998 					if (UNEXPECTED(EG(flags) & EG_FLAGS_IN_SHUTDOWN)) {
999 						init_unmatched_null_pair(&match_pair);
1000 						break;
1001 					} else {
1002 						init_unmatched_null_pair(&PCRE_G(unmatched_null_pair));
1003 					}
1004 				}
1005 				ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
1006 			} while (0);
1007 		} else {
1008 			do {
1009 				if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
1010 					if (UNEXPECTED(EG(flags) & EG_FLAGS_IN_SHUTDOWN)) {
1011 						init_unmatched_empty_pair(&match_pair);
1012 						break;
1013 					} else {
1014 						init_unmatched_empty_pair(&PCRE_G(unmatched_empty_pair));
1015 					}
1016 				}
1017 				ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
1018 			} while (0);
1019 		}
1020 	} else {
1021 		zval val1, val2;
1022 		populate_match_value_str(&val1, subject, start_offset, end_offset);
1023 		ZVAL_LONG(&val2, start_offset);
1024 		ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1025 	}
1026 
1027 	if (name) {
1028 		add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1029 	}
1030 	zend_hash_next_index_insert_new(result, &match_pair);
1031 }
1032 /* }}} */
1033 
populate_subpat_array(zval * subpats,const char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1034 static void populate_subpat_array(
1035 		zval *subpats, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1036 		uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1037 	zend_long offset_capture = flags & PREG_OFFSET_CAPTURE;
1038 	zend_long unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1039 	zval val;
1040 	int i;
1041 	HashTable *subpats_ht = Z_ARRVAL_P(subpats);
1042 	if (subpat_names) {
1043 		if (offset_capture) {
1044 			for (i = 0; i < count; i++) {
1045 				add_offset_pair(
1046 					subpats_ht, subject, offsets[2*i], offsets[2*i+1],
1047 					subpat_names[i], unmatched_as_null);
1048 			}
1049 			if (unmatched_as_null) {
1050 				for (i = count; i < num_subpats; i++) {
1051 					add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1052 				}
1053 			}
1054 		} else {
1055 			for (i = 0; i < count; i++) {
1056 				populate_match_value(
1057 					&val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1058 				if (subpat_names[i]) {
1059 					add_named(subpats_ht, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1060 				}
1061 				zend_hash_next_index_insert_new(subpats_ht, &val);
1062 			}
1063 			if (unmatched_as_null) {
1064 				for (i = count; i < num_subpats; i++) {
1065 					ZVAL_NULL(&val);
1066 					if (subpat_names[i]) {
1067 						zend_hash_add(subpats_ht, subpat_names[i], &val);
1068 					}
1069 					zend_hash_next_index_insert_new(subpats_ht, &val);
1070 				}
1071 			}
1072 		}
1073 	} else {
1074 		if (offset_capture) {
1075 			for (i = 0; i < count; i++) {
1076 				add_offset_pair(
1077 					subpats_ht, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1078 			}
1079 			if (unmatched_as_null) {
1080 				for (i = count; i < num_subpats; i++) {
1081 					add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1082 				}
1083 			}
1084 		} else {
1085 			for (i = 0; i < count; i++) {
1086 				populate_match_value(
1087 					&val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1088 				zend_hash_next_index_insert_new(subpats_ht, &val);
1089 			}
1090 			if (unmatched_as_null) {
1091 				for (i = count; i < num_subpats; i++) {
1092 					add_next_index_null(subpats);
1093 				}
1094 			}
1095 		}
1096 	}
1097 	/* Add MARK, if available */
1098 	if (mark) {
1099 		add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
1100 	}
1101 }
1102 
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS,bool global)1103 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, bool global) /* {{{ */
1104 {
1105 	/* parameters */
1106 	zend_string		 *regex;			/* Regular expression */
1107 	zend_string		 *subject;			/* String to match against */
1108 	pcre_cache_entry *pce;				/* Compiled regular expression */
1109 	zval			 *subpats = NULL;	/* Array for subpatterns */
1110 	zend_long		  flags = 0;		/* Match control flags */
1111 	zend_long		  start_offset = 0;	/* Where the new search starts */
1112 
1113 	ZEND_PARSE_PARAMETERS_START(2, 5)
1114 		Z_PARAM_STR(regex)
1115 		Z_PARAM_STR(subject)
1116 		Z_PARAM_OPTIONAL
1117 		Z_PARAM_ZVAL(subpats)
1118 		Z_PARAM_LONG(flags)
1119 		Z_PARAM_LONG(start_offset)
1120 	ZEND_PARSE_PARAMETERS_END();
1121 
1122 	/* Compile regex or get it from cache. */
1123 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1124 		RETURN_FALSE;
1125 	}
1126 
1127 	if (start_offset == ZEND_LONG_MIN) {
1128 		zend_argument_value_error(5, "must be greater than " ZEND_LONG_FMT, ZEND_LONG_MIN);
1129 		RETURN_THROWS();
1130 	}
1131 
1132 	pce->refcount++;
1133 	php_pcre_match_impl(pce, subject, return_value, subpats,
1134 		global, flags, start_offset);
1135 	pce->refcount--;
1136 }
1137 /* }}} */
1138 
is_known_valid_utf8(zend_string * subject_str,PCRE2_SIZE start_offset)1139 static zend_always_inline bool is_known_valid_utf8(
1140 		zend_string *subject_str, PCRE2_SIZE start_offset) {
1141 	if (!ZSTR_IS_VALID_UTF8(subject_str)) {
1142 		/* We don't know whether the string is valid UTF-8 or not. */
1143 		return 0;
1144 	}
1145 
1146 	if (start_offset == ZSTR_LEN(subject_str)) {
1147 		/* Degenerate case: Offset points to end of string. */
1148 		return 1;
1149 	}
1150 
1151 	/* Check that the offset does not point to an UTF-8 continuation byte. */
1152 	return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1153 }
1154 
1155 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zval * subpats,bool global,zend_long flags,zend_off_t start_offset)1156 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1157 	zval *subpats, bool global, zend_long flags, zend_off_t start_offset)
1158 {
1159 	zval			 result_set;		/* Holds a set of subpatterns after
1160 										   a global match */
1161 	HashTable	   **match_sets = NULL;	/* An array of sets of matches for each
1162 										   subpattern after a global match */
1163 	uint32_t		 options;			/* Execution options */
1164 	int				 count;				/* Count of matched subpatterns */
1165 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
1166 	int				 matched;			/* Has anything matched */
1167 	zend_string	   **subpat_names;		/* Array for named subpatterns */
1168 	size_t			 i;
1169 	uint32_t		 subpats_order;		/* Order of subpattern matches */
1170 	uint32_t		 offset_capture;	/* Capture match offsets: yes/no */
1171 	zend_long		 unmatched_as_null;	/* Null non-matches: yes/no */
1172 	PCRE2_SPTR       mark = NULL;		/* Target for MARK name */
1173 	HashTable		*marks = NULL;		/* Array of marks for PREG_PATTERN_ORDER */
1174 	pcre2_match_data *match_data;
1175 	PCRE2_SIZE		 start_offset2, orig_start_offset;
1176 
1177 	char *subject = ZSTR_VAL(subject_str);
1178 	size_t subject_len = ZSTR_LEN(subject_str);
1179 
1180 	/* Overwrite the passed-in value for subpatterns with an empty array. */
1181 	if (subpats != NULL) {
1182 		subpats = zend_try_array_init(subpats);
1183 		if (!subpats) {
1184 			RETURN_THROWS();
1185 		}
1186 	}
1187 
1188 	subpats_order = global ? PREG_PATTERN_ORDER : 0;
1189 
1190 	if (flags) {
1191 		offset_capture = flags & PREG_OFFSET_CAPTURE;
1192 		unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1193 
1194 		/*
1195 		 * subpats_order is pre-set to pattern mode so we change it only if
1196 		 * necessary.
1197 		 */
1198 		if (flags & 0xff) {
1199 			subpats_order = flags & 0xff;
1200 			if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1201 				(!global && subpats_order != 0)) {
1202 				zend_argument_value_error(4, "must be a PREG_* constant");
1203 				RETURN_THROWS();
1204 			}
1205 		}
1206 	} else {
1207 		offset_capture = 0;
1208 		unmatched_as_null = 0;
1209 	}
1210 
1211 	/* Negative offset counts from the end of the string. */
1212 	if (start_offset < 0) {
1213 		if ((PCRE2_SIZE)-start_offset <= subject_len) {
1214 			start_offset2 = subject_len + start_offset;
1215 		} else {
1216 			start_offset2 = 0;
1217 		}
1218 	} else {
1219 		start_offset2 = (PCRE2_SIZE)start_offset;
1220 	}
1221 
1222 	if (start_offset2 > subject_len) {
1223 		pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1224 		RETURN_FALSE;
1225 	}
1226 
1227 	/* Calculate the size of the offsets array, and allocate memory for it. */
1228 	num_subpats = pce->capture_count + 1;
1229 
1230 	/*
1231 	 * Build a mapping from subpattern numbers to their names. We will
1232 	 * allocate the table only if there are any named subpatterns.
1233 	 */
1234 	subpat_names = NULL;
1235 	if (subpats) {
1236 		subpat_names = pce->subpats_table;
1237 	}
1238 
1239 	matched = 0;
1240 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1241 
1242 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1243 		match_data = mdata;
1244 	} else {
1245 		match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1246 		if (!match_data) {
1247 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1248 			RETURN_FALSE;
1249 		}
1250 	}
1251 
1252 	/* Allocate match sets array and initialize the values. */
1253 	if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1254 		match_sets = safe_emalloc(num_subpats, sizeof(HashTable *), 0);
1255 		for (i=0; i<num_subpats; i++) {
1256 			match_sets[i] = zend_new_array(0);
1257 		}
1258 	}
1259 
1260 	/* Array of subpattern offsets */
1261 	PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1262 
1263 	orig_start_offset = start_offset2;
1264 	options =
1265 		(pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1266 			? 0 : PCRE2_NO_UTF_CHECK;
1267 
1268 	/* Execute the regular expression. */
1269 #ifdef HAVE_PCRE_JIT_SUPPORT
1270 	if ((pce->preg_options & PREG_JIT) && options) {
1271 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1272 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1273 	} else
1274 #endif
1275 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1276 			options, match_data, mctx);
1277 
1278 	while (1) {
1279 		/* If something has matched */
1280 		if (count >= 0) {
1281 			/* Check for too many substrings condition. */
1282 			if (UNEXPECTED(count == 0)) {
1283 				php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1284 				count = num_subpats;
1285 			}
1286 
1287 matched:
1288 			matched++;
1289 
1290 			/* If subpatterns array has been passed, fill it in with values. */
1291 			if (subpats != NULL) {
1292 				/* Try to get the list of substrings and display a warning if failed. */
1293 				if (UNEXPECTED(offsets[1] < offsets[0])) {
1294 					if (match_sets) efree(match_sets);
1295 					php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1296 					RETURN_FALSE;
1297 				}
1298 
1299 				if (global) {	/* global pattern matching */
1300 					if (subpats_order == PREG_PATTERN_ORDER) {
1301 						/* For each subpattern, insert it into the appropriate array. */
1302 						if (offset_capture) {
1303 							for (i = 0; i < count; i++) {
1304 								add_offset_pair(
1305 									match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1306 									NULL, unmatched_as_null);
1307 							}
1308 						} else {
1309 							for (i = 0; i < count; i++) {
1310 								zval val;
1311 								populate_match_value(
1312 									&val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1313 								zend_hash_next_index_insert_new(match_sets[i], &val);
1314 							}
1315 						}
1316 						mark = pcre2_get_mark(match_data);
1317 						/* Add MARK, if available */
1318 						if (mark) {
1319 							if (!marks) {
1320 								marks = zend_new_array(0);
1321 							}
1322 							zval tmp;
1323 							ZVAL_STRING(&tmp, (char *) mark);
1324 							zend_hash_index_add_new(marks, matched - 1, &tmp);
1325 						}
1326 						/*
1327 						 * If the number of captured subpatterns on this run is
1328 						 * less than the total possible number, pad the result
1329 						 * arrays with NULLs or empty strings.
1330 						 */
1331 						if (count < num_subpats) {
1332 							for (int i = count; i < num_subpats; i++) {
1333 								if (offset_capture) {
1334 									add_offset_pair(
1335 										match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1336 										NULL, unmatched_as_null);
1337 								} else if (unmatched_as_null) {
1338 									zval tmp;
1339 									ZVAL_NULL(&tmp);
1340 									zend_hash_next_index_insert_new(match_sets[i], &tmp);
1341 								} else {
1342 									zval tmp;
1343 									ZVAL_EMPTY_STRING(&tmp);
1344 									zend_hash_next_index_insert_new(match_sets[i], &tmp);
1345 								}
1346 							}
1347 						}
1348 					} else {
1349 						/* Allocate and populate the result set array */
1350 						mark = pcre2_get_mark(match_data);
1351 						array_init_size(&result_set, count + (mark ? 1 : 0));
1352 						populate_subpat_array(
1353 							&result_set, subject, offsets, subpat_names,
1354 							num_subpats, count, mark, flags);
1355 						/* And add it to the output array */
1356 						zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &result_set);
1357 					}
1358 				} else {			/* single pattern matching */
1359 					/* For each subpattern, insert it into the subpatterns array. */
1360 					mark = pcre2_get_mark(match_data);
1361 					populate_subpat_array(
1362 						subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1363 					break;
1364 				}
1365 			}
1366 
1367 			/* Advance to the next piece. */
1368 			start_offset2 = offsets[1];
1369 
1370 			/* If we have matched an empty string, mimic what Perl's /g options does.
1371 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1372 			   the match again at the same point. If this fails (picked up above) we
1373 			   advance to the next character. */
1374 			if (start_offset2 == offsets[0]) {
1375 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1376 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1377 				if (count >= 0) {
1378 					if (global) {
1379 						goto matched;
1380 					} else {
1381 						break;
1382 					}
1383 				} else if (count == PCRE2_ERROR_NOMATCH) {
1384 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1385 					   this is not necessarily the end. We need to advance
1386 					   the start offset, and continue. Fudge the offset values
1387 					   to achieve this, unless we're already at the end of the string. */
1388 					if (start_offset2 < subject_len) {
1389 						size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1390 
1391 						start_offset2 += unit_len;
1392 					} else {
1393 						break;
1394 					}
1395 				} else {
1396 					goto error;
1397 				}
1398 			}
1399 		} else if (count == PCRE2_ERROR_NOMATCH) {
1400 			break;
1401 		} else {
1402 error:
1403 			pcre_handle_exec_error(count);
1404 			break;
1405 		}
1406 
1407 		if (!global) {
1408 			break;
1409 		}
1410 
1411 		/* Execute the regular expression. */
1412 #ifdef HAVE_PCRE_JIT_SUPPORT
1413 		if ((pce->preg_options & PREG_JIT)) {
1414 			if (start_offset2 > subject_len) {
1415 				pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1416 				break;
1417 			}
1418 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1419 					PCRE2_NO_UTF_CHECK, match_data, mctx);
1420 		} else
1421 #endif
1422 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1423 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1424 	}
1425 	if (match_data != mdata) {
1426 		pcre2_match_data_free(match_data);
1427 	}
1428 
1429 	/* Add the match sets to the output array and clean up */
1430 	if (match_sets) {
1431 		if (subpat_names) {
1432 			for (i = 0; i < num_subpats; i++) {
1433 				zval wrapper;
1434 				ZVAL_ARR(&wrapper, match_sets[i]);
1435 				if (subpat_names[i]) {
1436 					zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &wrapper);
1437 					GC_ADDREF(match_sets[i]);
1438 				}
1439 				zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1440 			}
1441 		} else {
1442 			for (i = 0; i < num_subpats; i++) {
1443 				zval wrapper;
1444 				ZVAL_ARR(&wrapper, match_sets[i]);
1445 				zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1446 			}
1447 		}
1448 		efree(match_sets);
1449 
1450 		if (marks) {
1451 			zval tmp;
1452 			ZVAL_ARR(&tmp, marks);
1453 			zend_hash_str_update(Z_ARRVAL_P(subpats), "MARK", sizeof("MARK") - 1, &tmp);
1454 		}
1455 	}
1456 
1457 	if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1458 		/* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1459 		if ((pce->compile_options & PCRE2_UTF)
1460 				&& !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1461 			GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1462 		}
1463 
1464 		RETVAL_LONG(matched);
1465 	} else {
1466 		RETVAL_FALSE;
1467 	}
1468 }
1469 /* }}} */
1470 
1471 /* {{{ Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)1472 PHP_FUNCTION(preg_match)
1473 {
1474 	php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
1475 }
1476 /* }}} */
1477 
1478 ZEND_FRAMELESS_FUNCTION(preg_match, 2)
1479 {
1480 	zval regex_tmp, subject_tmp;
1481 	zend_string *regex, *subject;
1482 
1483 	Z_FLF_PARAM_STR(1, regex, regex_tmp);
1484 	Z_FLF_PARAM_STR(2, subject, subject_tmp);
1485 
1486 	/* Compile regex or get it from cache. */
1487 	pcre_cache_entry *pce;
1488 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1489 		RETURN_FALSE;
1490 	}
1491 
1492 	pce->refcount++;
1493 	php_pcre_match_impl(pce, subject, return_value, /* subpats */ NULL,
1494 		/* global */ false, /* flags */ 0, /* start_offset */ 0);
1495 	pce->refcount--;
1496 
1497 flf_clean:
1498 	Z_FLF_PARAM_FREE_STR(1, regex_tmp);
1499 	Z_FLF_PARAM_FREE_STR(2, subject_tmp);
1500 }
1501 
1502 /* {{{ Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)1503 PHP_FUNCTION(preg_match_all)
1504 {
1505 	php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
1506 }
1507 /* }}} */
1508 
1509 /* {{{ preg_get_backref */
preg_get_backref(char ** str,int * backref)1510 static int preg_get_backref(char **str, int *backref)
1511 {
1512 	char in_brace = 0;
1513 	char *walk = *str;
1514 
1515 	if (walk[1] == 0)
1516 		return 0;
1517 
1518 	if (*walk == '$' && walk[1] == '{') {
1519 		in_brace = 1;
1520 		walk++;
1521 	}
1522 	walk++;
1523 
1524 	if (*walk >= '0' && *walk <= '9') {
1525 		*backref = *walk - '0';
1526 		walk++;
1527 	} else
1528 		return 0;
1529 
1530 	if (*walk && *walk >= '0' && *walk <= '9') {
1531 		*backref = *backref * 10 + *walk - '0';
1532 		walk++;
1533 	}
1534 
1535 	if (in_brace) {
1536 		if (*walk != '}')
1537 			return 0;
1538 		else
1539 			walk++;
1540 	}
1541 
1542 	*str = walk;
1543 	return 1;
1544 }
1545 /* }}} */
1546 
1547 /* {{{ preg_do_repl_func */
preg_do_repl_func(zend_fcall_info * fci,zend_fcall_info_cache * fcc,const char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1548 static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1549 {
1550 	zend_string *result_str;
1551 	zval		 retval;			/* Function return value */
1552 	zval	     arg;				/* Argument to pass to function */
1553 
1554 	array_init_size(&arg, count + (mark ? 1 : 0));
1555 	populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1556 
1557 	fci->retval = &retval;
1558 	fci->param_count = 1;
1559 	fci->params = &arg;
1560 
1561 	if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1562 		if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1563 			result_str = Z_STR(retval);
1564 		} else {
1565 			result_str = zval_get_string_func(&retval);
1566 			zval_ptr_dtor(&retval);
1567 		}
1568 	} else {
1569 		if (!EG(exception)) {
1570 			php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1571 		}
1572 
1573 		result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1574 	}
1575 
1576 	zval_ptr_dtor(&arg);
1577 
1578 	return result_str;
1579 }
1580 /* }}} */
1581 
1582 /* {{{ php_pcre_replace */
php_pcre_replace(zend_string * regex,zend_string * subject_str,const char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1583 PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1584 							  zend_string *subject_str,
1585 							  const char *subject, size_t subject_len,
1586 							  zend_string *replace_str,
1587 							  size_t limit, size_t *replace_count)
1588 {
1589 	pcre_cache_entry	*pce;			    /* Compiled regular expression */
1590 	zend_string	 		*result;			/* Function result */
1591 
1592 	/* Abort on pending exception, e.g. thrown from __toString(). */
1593 	if (UNEXPECTED(EG(exception))) {
1594 		return NULL;
1595 	}
1596 
1597 	/* Compile regex or get it from cache. */
1598 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1599 		return NULL;
1600 	}
1601 	pce->refcount++;
1602 	result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1603 		limit, replace_count);
1604 	pce->refcount--;
1605 
1606 	return result;
1607 }
1608 /* }}} */
1609 
1610 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry * pce,zend_string * subject_str,const char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1611 PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1612 {
1613 	uint32_t		 options;			/* Execution options */
1614 	int				 count;				/* Count of matched subpatterns */
1615 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
1616 	size_t			 new_len;			/* Length of needed storage */
1617 	size_t			 alloc_len;			/* Actual allocated length */
1618 	size_t			 match_len;			/* Length of the current match */
1619 	int				 backref;			/* Backreference number */
1620 	PCRE2_SIZE		 start_offset;		/* Where the new search starts */
1621 	size_t			 last_end_offset;	/* Where the last search ended */
1622 	char			*walkbuf,			/* Location of current replacement in the result */
1623 					*walk,				/* Used to walk the replacement string */
1624 					 walk_last;			/* Last walked character */
1625 	const char		*match,				/* The current match */
1626 					*piece,				/* The current piece of subject */
1627 					*replace_end;		/* End of replacement string */
1628 	size_t			result_len; 		/* Length of result */
1629 	zend_string		*result;			/* Result of replacement */
1630 	pcre2_match_data *match_data;
1631 
1632 	/* Calculate the size of the offsets array, and allocate memory for it. */
1633 	num_subpats = pce->capture_count + 1;
1634 	alloc_len = 0;
1635 	result = NULL;
1636 
1637 	/* Initialize */
1638 	match = NULL;
1639 	start_offset = 0;
1640 	last_end_offset = 0;
1641 	result_len = 0;
1642 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1643 
1644 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1645 		match_data = mdata;
1646 	} else {
1647 		match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1648 		if (!match_data) {
1649 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1650 			return NULL;
1651 		}
1652 	}
1653 
1654 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1655 
1656 	/* Array of subpattern offsets */
1657 	PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1658 
1659 	/* Execute the regular expression. */
1660 #ifdef HAVE_PCRE_JIT_SUPPORT
1661 	if ((pce->preg_options & PREG_JIT) && options) {
1662 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1663 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1664 	} else
1665 #endif
1666 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1667 			options, match_data, mctx);
1668 
1669 	while (1) {
1670 		piece = subject + last_end_offset;
1671 
1672 		if (count >= 0 && limit > 0) {
1673 			bool simple_string;
1674 
1675 			/* Check for too many substrings condition. */
1676 			if (UNEXPECTED(count == 0)) {
1677 				php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1678 				count = num_subpats;
1679 			}
1680 
1681 matched:
1682 			if (UNEXPECTED(offsets[1] < offsets[0])) {
1683 				PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1684 				if (result) {
1685 					zend_string_release_ex(result, 0);
1686 					result = NULL;
1687 				}
1688 				break;
1689 			}
1690 
1691 			if (replace_count) {
1692 				++*replace_count;
1693 			}
1694 
1695 			/* Set the match location in subject */
1696 			match = subject + offsets[0];
1697 
1698 			new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1699 
1700 			walk = ZSTR_VAL(replace_str);
1701 			replace_end = walk + ZSTR_LEN(replace_str);
1702 			walk_last = 0;
1703 			simple_string = 1;
1704 			while (walk < replace_end) {
1705 				if ('\\' == *walk || '$' == *walk) {
1706 					simple_string = 0;
1707 					if (walk_last == '\\') {
1708 						walk++;
1709 						walk_last = 0;
1710 						continue;
1711 					}
1712 					if (preg_get_backref(&walk, &backref)) {
1713 						if (backref < count)
1714 							new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1715 						continue;
1716 					}
1717 				}
1718 				new_len++;
1719 				walk++;
1720 				walk_last = walk[-1];
1721 			}
1722 
1723 			if (new_len >= alloc_len) {
1724 				alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1725 				if (result == NULL) {
1726 					result = zend_string_alloc(alloc_len, 0);
1727 				} else {
1728 					result = zend_string_extend(result, alloc_len, 0);
1729 				}
1730 			}
1731 
1732 			if (match-piece > 0) {
1733 				/* copy the part of the string before the match */
1734 				memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1735 				result_len += (match-piece);
1736 			}
1737 
1738 			if (simple_string) {
1739 				/* copy replacement */
1740 				memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1741 				result_len += ZSTR_LEN(replace_str);
1742 			} else {
1743 				/* copy replacement and backrefs */
1744 				walkbuf = ZSTR_VAL(result) + result_len;
1745 
1746 				walk = ZSTR_VAL(replace_str);
1747 				walk_last = 0;
1748 				while (walk < replace_end) {
1749 					if ('\\' == *walk || '$' == *walk) {
1750 						if (walk_last == '\\') {
1751 							*(walkbuf-1) = *walk++;
1752 							walk_last = 0;
1753 							continue;
1754 						}
1755 						if (preg_get_backref(&walk, &backref)) {
1756 							if (backref < count) {
1757 								if (offsets[backref<<1] < SIZE_MAX) {
1758 									match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1759 									walkbuf = zend_mempcpy(walkbuf, subject + offsets[backref << 1], match_len);
1760 								}
1761 							}
1762 							continue;
1763 						}
1764 					}
1765 					*walkbuf++ = *walk++;
1766 					walk_last = walk[-1];
1767 				}
1768 				*walkbuf = '\0';
1769 				/* increment the result length by how much we've added to the string */
1770 				result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1771 			}
1772 
1773 			limit--;
1774 
1775 			/* Advance to the next piece. */
1776 			start_offset = last_end_offset = offsets[1];
1777 
1778 			/* If we have matched an empty string, mimic what Perl's /g options does.
1779 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1780 			   the match again at the same point. If this fails (picked up above) we
1781 			   advance to the next character. */
1782 			if (start_offset == offsets[0]) {
1783 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1784 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1785 
1786 				piece = subject + start_offset;
1787 				if (count >= 0 && limit > 0) {
1788 					goto matched;
1789 				} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1790 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1791 					   this is not necessarily the end. We need to advance
1792 					   the start offset, and continue. Fudge the offset values
1793 					   to achieve this, unless we're already at the end of the string. */
1794 					if (start_offset < subject_len) {
1795 						size_t unit_len = calculate_unit_length(pce, piece);
1796 						start_offset += unit_len;
1797 					} else {
1798 						goto not_matched;
1799 					}
1800 				} else {
1801 					goto error;
1802 				}
1803 			}
1804 
1805 		} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1806 not_matched:
1807 			if (!result && subject_str) {
1808 				result = zend_string_copy(subject_str);
1809 				break;
1810 			}
1811 			/* now we know exactly how long it is */
1812 			alloc_len = result_len + subject_len - last_end_offset;
1813 			if (NULL != result) {
1814 				result = zend_string_realloc(result, alloc_len, 0);
1815 			} else {
1816 				result = zend_string_alloc(alloc_len, 0);
1817 			}
1818 			/* stick that last bit of string on our output */
1819 			memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1820 			result_len += subject_len - last_end_offset;
1821 			ZSTR_VAL(result)[result_len] = '\0';
1822 			ZSTR_LEN(result) = result_len;
1823 			break;
1824 		} else {
1825 error:
1826 			pcre_handle_exec_error(count);
1827 			if (result) {
1828 				zend_string_release_ex(result, 0);
1829 				result = NULL;
1830 			}
1831 			break;
1832 		}
1833 
1834 #ifdef HAVE_PCRE_JIT_SUPPORT
1835 		if (pce->preg_options & PREG_JIT) {
1836 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1837 					PCRE2_NO_UTF_CHECK, match_data, mctx);
1838 		} else
1839 #endif
1840 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1841 					PCRE2_NO_UTF_CHECK, match_data, mctx);
1842 	}
1843 	if (match_data != mdata) {
1844 		pcre2_match_data_free(match_data);
1845 	}
1846 
1847 	return result;
1848 }
1849 /* }}} */
1850 
1851 /* {{{ php_pcre_replace_func_impl() */
php_pcre_replace_func_impl(pcre_cache_entry * pce,zend_string * subject_str,const char * subject,size_t subject_len,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)1852 static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)
1853 {
1854 	uint32_t		 options;			/* Execution options */
1855 	int				 count;				/* Count of matched subpatterns */
1856 	zend_string		**subpat_names;		/* Array for named subpatterns */
1857 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
1858 	size_t			 new_len;			/* Length of needed storage */
1859 	size_t			 alloc_len;			/* Actual allocated length */
1860 	PCRE2_SIZE		 start_offset;		/* Where the new search starts */
1861 	size_t			 last_end_offset;	/* Where the last search ended */
1862 	const char		*match,				/* The current match */
1863 					*piece;				/* The current piece of subject */
1864 	size_t			result_len; 		/* Length of result */
1865 	zend_string		*result;			/* Result of replacement */
1866 	zend_string     *eval_result;		/* Result of custom function */
1867 	pcre2_match_data *match_data;
1868 	bool old_mdata_used;
1869 
1870 	/* Calculate the size of the offsets array, and allocate memory for it. */
1871 	num_subpats = pce->capture_count + 1;
1872 	subpat_names = pce->subpats_table;
1873 
1874 	alloc_len = 0;
1875 	result = NULL;
1876 
1877 	/* Initialize */
1878 	match = NULL;
1879 	start_offset = 0;
1880 	last_end_offset = 0;
1881 	result_len = 0;
1882 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1883 
1884 	old_mdata_used = mdata_used;
1885 	if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1886 		mdata_used = 1;
1887 		match_data = mdata;
1888 	} else {
1889 		match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1890 		if (!match_data) {
1891 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1892 			mdata_used = old_mdata_used;
1893 			return NULL;
1894 		}
1895 	}
1896 
1897 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1898 
1899 	/* Array of subpattern offsets */
1900 	PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1901 
1902 	/* Execute the regular expression. */
1903 #ifdef HAVE_PCRE_JIT_SUPPORT
1904 	if ((pce->preg_options & PREG_JIT) && options) {
1905 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1906 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1907 	} else
1908 #endif
1909 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1910 			options, match_data, mctx);
1911 
1912 	while (1) {
1913 		piece = subject + last_end_offset;
1914 
1915 		if (count >= 0 && limit) {
1916 			/* Check for too many substrings condition. */
1917 			if (UNEXPECTED(count == 0)) {
1918 				php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1919 				count = num_subpats;
1920 			}
1921 
1922 matched:
1923 			if (UNEXPECTED(offsets[1] < offsets[0])) {
1924 				PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1925 				if (result) {
1926 					zend_string_release_ex(result, 0);
1927 					result = NULL;
1928 				}
1929 				break;
1930 			}
1931 
1932 			if (replace_count) {
1933 				++*replace_count;
1934 			}
1935 
1936 			/* Set the match location in subject */
1937 			match = subject + offsets[0];
1938 
1939 			new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1940 
1941 			/* Use custom function to get replacement string and its length. */
1942 			eval_result = preg_do_repl_func(
1943 				fci, fcc, subject, offsets, subpat_names, num_subpats, count,
1944 				pcre2_get_mark(match_data), flags);
1945 
1946 			ZEND_ASSERT(eval_result);
1947 			new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD;
1948 			if (new_len >= alloc_len) {
1949 				alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1950 				if (result == NULL) {
1951 					result = zend_string_alloc(alloc_len, 0);
1952 				} else {
1953 					result = zend_string_extend(result, alloc_len, 0);
1954 				}
1955 			}
1956 
1957 			if (match-piece > 0) {
1958 				/* copy the part of the string before the match */
1959 				memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1960 				result_len += (match-piece);
1961 			}
1962 
1963 			/* If using custom function, copy result to the buffer and clean up. */
1964 			memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1965 			result_len += ZSTR_LEN(eval_result);
1966 			zend_string_release_ex(eval_result, 0);
1967 
1968 			limit--;
1969 
1970 			/* Advance to the next piece. */
1971 			start_offset = last_end_offset = offsets[1];
1972 
1973 			/* If we have matched an empty string, mimic what Perl's /g options does.
1974 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1975 			   the match again at the same point. If this fails (picked up above) we
1976 			   advance to the next character. */
1977 			if (start_offset == offsets[0]) {
1978 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1979 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1980 
1981 				piece = subject + start_offset;
1982 				if (count >= 0 && limit) {
1983 					goto matched;
1984 				} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1985 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1986 					   this is not necessarily the end. We need to advance
1987 					   the start offset, and continue. Fudge the offset values
1988 					   to achieve this, unless we're already at the end of the string. */
1989 					if (start_offset < subject_len) {
1990 						size_t unit_len = calculate_unit_length(pce, piece);
1991 						start_offset += unit_len;
1992 					} else {
1993 						goto not_matched;
1994 					}
1995 				} else {
1996 					goto error;
1997 				}
1998 			}
1999 
2000 		} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2001 not_matched:
2002 			if (!result && subject_str) {
2003 				result = zend_string_copy(subject_str);
2004 				break;
2005 			}
2006 			/* now we know exactly how long it is */
2007 			alloc_len = result_len + subject_len - last_end_offset;
2008 			if (NULL != result) {
2009 				result = zend_string_realloc(result, alloc_len, 0);
2010 			} else {
2011 				result = zend_string_alloc(alloc_len, 0);
2012 			}
2013 			/* stick that last bit of string on our output */
2014 			memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
2015 			result_len += subject_len - last_end_offset;
2016 			ZSTR_VAL(result)[result_len] = '\0';
2017 			ZSTR_LEN(result) = result_len;
2018 			break;
2019 		} else {
2020 error:
2021 			pcre_handle_exec_error(count);
2022 			if (result) {
2023 				zend_string_release_ex(result, 0);
2024 				result = NULL;
2025 			}
2026 			break;
2027 		}
2028 #ifdef HAVE_PCRE_JIT_SUPPORT
2029 		if ((pce->preg_options & PREG_JIT)) {
2030 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2031 					PCRE2_NO_UTF_CHECK, match_data, mctx);
2032 		} else
2033 #endif
2034 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2035 				PCRE2_NO_UTF_CHECK, match_data, mctx);
2036 	}
2037 	if (match_data != mdata) {
2038 		pcre2_match_data_free(match_data);
2039 	}
2040 	mdata_used = old_mdata_used;
2041 
2042 	return result;
2043 }
2044 /* }}} */
2045 
2046 /* {{{ php_pcre_replace_func */
php_pcre_replace_func(zend_string * regex,zend_string * subject_str,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)2047 static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2048 							  zend_string *subject_str,
2049 							  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2050 							  size_t limit, size_t *replace_count, zend_long flags)
2051 {
2052 	pcre_cache_entry	*pce;			    /* Compiled regular expression */
2053 	zend_string	 		*result;			/* Function result */
2054 
2055 	/* Compile regex or get it from cache. */
2056 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2057 		return NULL;
2058 	}
2059 	pce->refcount++;
2060 	result = php_pcre_replace_func_impl(
2061 		pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
2062 		limit, replace_count, flags);
2063 	pce->refcount--;
2064 
2065 	return result;
2066 }
2067 /* }}} */
2068 
2069 /* {{{ php_pcre_replace_array */
php_pcre_replace_array(HashTable * regex,zend_string * replace_str,HashTable * replace_ht,zend_string * subject_str,size_t limit,size_t * replace_count)2070 static zend_string *php_pcre_replace_array(HashTable *regex,
2071 	zend_string *replace_str, HashTable *replace_ht,
2072 	zend_string *subject_str, size_t limit, size_t *replace_count)
2073 {
2074 	zval		*regex_entry;
2075 	zend_string *result;
2076 
2077 	zend_string_addref(subject_str);
2078 
2079 	if (replace_ht) {
2080 		uint32_t replace_idx = 0;
2081 
2082 		/* For each entry in the regex array, get the entry */
2083 		ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2084 			/* Make sure we're dealing with strings. */
2085 			zend_string *tmp_regex_str;
2086 			zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2087 			zend_string *replace_entry_str, *tmp_replace_entry_str;
2088 			zval *zv;
2089 
2090 			/* Get current entry */
2091 			while (1) {
2092 				if (replace_idx == replace_ht->nNumUsed) {
2093 					replace_entry_str = ZSTR_EMPTY_ALLOC();
2094 					tmp_replace_entry_str = NULL;
2095 					break;
2096 				}
2097 				zv = ZEND_HASH_ELEMENT(replace_ht, replace_idx);
2098 				replace_idx++;
2099 				if (Z_TYPE_P(zv) != IS_UNDEF) {
2100 					replace_entry_str = zval_get_tmp_string(zv, &tmp_replace_entry_str);
2101 					break;
2102 				}
2103 			}
2104 
2105 			/* Do the actual replacement and put the result back into subject_str
2106 			   for further replacements. */
2107 			result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2108 				ZSTR_LEN(subject_str), replace_entry_str, limit, replace_count);
2109 			zend_tmp_string_release(tmp_replace_entry_str);
2110 			zend_tmp_string_release(tmp_regex_str);
2111 			zend_string_release_ex(subject_str, 0);
2112 			subject_str = result;
2113 			if (UNEXPECTED(result == NULL)) {
2114 				break;
2115 			}
2116 		} ZEND_HASH_FOREACH_END();
2117 
2118 	} else {
2119 		ZEND_ASSERT(replace_str != NULL);
2120 
2121 		/* For each entry in the regex array, get the entry */
2122 		ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2123 			/* Make sure we're dealing with strings. */
2124 			zend_string *tmp_regex_str;
2125 			zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2126 
2127 			/* Do the actual replacement and put the result back into subject_str
2128 			   for further replacements. */
2129 			result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2130 				ZSTR_LEN(subject_str), replace_str, limit, replace_count);
2131 			zend_tmp_string_release(tmp_regex_str);
2132 			zend_string_release_ex(subject_str, 0);
2133 			subject_str = result;
2134 
2135 			if (UNEXPECTED(result == NULL)) {
2136 				break;
2137 			}
2138 		} ZEND_HASH_FOREACH_END();
2139 	}
2140 
2141 	return subject_str;
2142 }
2143 /* }}} */
2144 
2145 /* {{{ php_replace_in_subject */
php_replace_in_subject(zend_string * regex_str,HashTable * regex_ht,zend_string * replace_str,HashTable * replace_ht,zend_string * subject,size_t limit,size_t * replace_count)2146 static zend_always_inline zend_string *php_replace_in_subject(
2147 	zend_string *regex_str, HashTable *regex_ht,
2148 	zend_string *replace_str, HashTable *replace_ht,
2149 	zend_string *subject, size_t limit, size_t *replace_count)
2150 {
2151 	zend_string *result;
2152 
2153 	if (regex_str) {
2154 		ZEND_ASSERT(replace_str != NULL);
2155 		result = php_pcre_replace(regex_str, subject, ZSTR_VAL(subject), ZSTR_LEN(subject),
2156 			replace_str, limit, replace_count);
2157 	} else {
2158 		ZEND_ASSERT(regex_ht != NULL);
2159 		result = php_pcre_replace_array(regex_ht, replace_str, replace_ht, subject,
2160 			limit, replace_count);
2161 	}
2162 	return result;
2163 }
2164 /* }}} */
2165 
2166 /* {{{ php_replace_in_subject_func */
php_replace_in_subject_func(zend_string * regex_str,HashTable * regex_ht,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zend_string * subject,size_t limit,size_t * replace_count,zend_long flags)2167 static zend_string *php_replace_in_subject_func(zend_string *regex_str, HashTable *regex_ht,
2168 	zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2169 	zend_string *subject, size_t limit, size_t *replace_count, zend_long flags)
2170 {
2171 	zend_string *result;
2172 
2173 	if (regex_str) {
2174 		result = php_pcre_replace_func(
2175 			regex_str, subject, fci, fcc, limit, replace_count, flags);
2176 		return result;
2177 	} else {
2178 		/* If regex is an array */
2179 		zval		*regex_entry;
2180 
2181 		ZEND_ASSERT(regex_ht != NULL);
2182 
2183 		zend_string_addref(subject);
2184 
2185 		/* For each entry in the regex array, get the entry */
2186 		ZEND_HASH_FOREACH_VAL(regex_ht, regex_entry) {
2187 			/* Make sure we're dealing with strings. */
2188 			zend_string *tmp_regex_entry_str;
2189 			zend_string *regex_entry_str = zval_get_tmp_string(regex_entry, &tmp_regex_entry_str);
2190 
2191 			/* Do the actual replacement and put the result back into subject
2192 			   for further replacements. */
2193 			result = php_pcre_replace_func(
2194 				regex_entry_str, subject, fci, fcc, limit, replace_count, flags);
2195 			zend_tmp_string_release(tmp_regex_entry_str);
2196 			zend_string_release(subject);
2197 			subject = result;
2198 			if (UNEXPECTED(result == NULL)) {
2199 				break;
2200 			}
2201 		} ZEND_HASH_FOREACH_END();
2202 
2203 		return subject;
2204 	}
2205 }
2206 /* }}} */
2207 
2208 /* {{{ preg_replace_func_impl */
preg_replace_func_impl(zval * return_value,zend_string * regex_str,HashTable * regex_ht,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zend_string * subject_str,HashTable * subject_ht,zend_long limit_val,zend_long flags)2209 static size_t preg_replace_func_impl(zval *return_value,
2210 	zend_string *regex_str, HashTable *regex_ht,
2211 	zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2212 	zend_string *subject_str, HashTable *subject_ht, zend_long limit_val, zend_long flags)
2213 {
2214 	zend_string	*result;
2215 	size_t replace_count = 0;
2216 
2217 	if (subject_str) {
2218 		result = php_replace_in_subject_func(
2219 			regex_str, regex_ht, fci, fcc, subject_str, limit_val, &replace_count, flags);
2220 		if (result != NULL) {
2221 			RETVAL_STR(result);
2222 		} else {
2223 			RETVAL_NULL();
2224 		}
2225 	} else {
2226 		/* if subject is an array */
2227 		zval		*subject_entry, zv;
2228 		zend_string	*string_key;
2229 		zend_ulong	 num_key;
2230 
2231 		ZEND_ASSERT(subject_ht != NULL);
2232 
2233 		array_init_size(return_value, zend_hash_num_elements(subject_ht));
2234 		HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2235 
2236 		/* For each subject entry, convert it to string, then perform replacement
2237 		   and add the result to the return_value array. */
2238 		ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2239 			zend_string *tmp_subject_entry_str;
2240 			zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2241 
2242 			result = php_replace_in_subject_func(
2243 				regex_str, regex_ht, fci, fcc, subject_entry_str, limit_val, &replace_count, flags);
2244 			if (result != NULL) {
2245 				/* Add to return array */
2246 				ZVAL_STR(&zv, result);
2247 				if (string_key) {
2248 					zend_hash_add_new(return_value_ht, string_key, &zv);
2249 				} else {
2250 					zend_hash_index_add_new(return_value_ht, num_key, &zv);
2251 				}
2252 			}
2253 			zend_tmp_string_release(tmp_subject_entry_str);
2254 		} ZEND_HASH_FOREACH_END();
2255 	}
2256 
2257 	return replace_count;
2258 }
2259 /* }}} */
2260 
_preg_replace_common(zval * return_value,HashTable * regex_ht,zend_string * regex_str,HashTable * replace_ht,zend_string * replace_str,HashTable * subject_ht,zend_string * subject_str,zend_long limit,zval * zcount,bool is_filter)2261 static void _preg_replace_common(
2262 	zval *return_value,
2263 	HashTable *regex_ht, zend_string *regex_str,
2264 	HashTable *replace_ht, zend_string *replace_str,
2265 	HashTable *subject_ht, zend_string *subject_str,
2266 	zend_long limit,
2267 	zval *zcount,
2268 	bool is_filter
2269 ) {
2270 	size_t replace_count = 0;
2271 	zend_string	*result;
2272 	size_t old_replace_count;
2273 
2274 	/* If replace is an array then the regex argument needs to also be an array */
2275 	if (replace_ht && !regex_ht) {
2276 		zend_argument_type_error(1, "must be of type array when argument #2 ($replacement) is an array, string given");
2277 		RETURN_THROWS();
2278 	}
2279 
2280 	if (subject_str) {
2281 		old_replace_count = replace_count;
2282 		result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2283 			subject_str, limit, &replace_count);
2284 		if (result != NULL) {
2285 			if (!is_filter || replace_count > old_replace_count) {
2286 				RETVAL_STR(result);
2287 			} else {
2288 				zend_string_release_ex(result, 0);
2289 				RETVAL_NULL();
2290 			}
2291 		} else {
2292 			RETVAL_NULL();
2293 		}
2294 	} else {
2295 		/* if subject is an array */
2296 		zval		*subject_entry, zv;
2297 		zend_string	*string_key;
2298 		zend_ulong	 num_key;
2299 
2300 		ZEND_ASSERT(subject_ht != NULL);
2301 
2302 		array_init_size(return_value, zend_hash_num_elements(subject_ht));
2303 		HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2304 
2305 		/* For each subject entry, convert it to string, then perform replacement
2306 		   and add the result to the return_value array. */
2307 		ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2308 			old_replace_count = replace_count;
2309 			zend_string *tmp_subject_entry_str;
2310 			zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2311 			result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2312 				subject_entry_str, limit, &replace_count);
2313 
2314 			if (result != NULL) {
2315 				if (!is_filter || replace_count > old_replace_count) {
2316 					/* Add to return array */
2317 					ZVAL_STR(&zv, result);
2318 					if (string_key) {
2319 						zend_hash_add_new(return_value_ht, string_key, &zv);
2320 					} else {
2321 						zend_hash_index_add_new(return_value_ht, num_key, &zv);
2322 					}
2323 				} else {
2324 					zend_string_release_ex(result, 0);
2325 				}
2326 			}
2327 			zend_tmp_string_release(tmp_subject_entry_str);
2328 		} ZEND_HASH_FOREACH_END();
2329 	}
2330 
2331 	if (zcount) {
2332 		ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2333 	}
2334 }
2335 
2336 /* {{{ preg_replace_common */
preg_replace_common(INTERNAL_FUNCTION_PARAMETERS,bool is_filter)2337 static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter)
2338 {
2339 	zend_string *regex_str, *replace_str, *subject_str;
2340 	HashTable *regex_ht, *replace_ht, *subject_ht;
2341 	zend_long limit = -1;
2342 	zval *zcount = NULL;
2343 
2344 	/* Get function parameters and do error-checking. */
2345 	ZEND_PARSE_PARAMETERS_START(3, 5)
2346 		Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2347 		Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
2348 		Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2349 		Z_PARAM_OPTIONAL
2350 		Z_PARAM_LONG(limit)
2351 		Z_PARAM_ZVAL(zcount)
2352 	ZEND_PARSE_PARAMETERS_END();
2353 
2354 	_preg_replace_common(
2355 		return_value,
2356 		regex_ht, regex_str,
2357 		replace_ht, replace_str,
2358 		subject_ht, subject_str,
2359 		limit, zcount, is_filter);
2360 }
2361 /* }}} */
2362 
2363 /* {{{ Perform Perl-style regular expression replacement. */
PHP_FUNCTION(preg_replace)2364 PHP_FUNCTION(preg_replace)
2365 {
2366 	preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
2367 }
2368 /* }}} */
2369 
2370 ZEND_FRAMELESS_FUNCTION(preg_replace, 3)
2371 {
2372 	zend_string *regex_str, *replace_str, *subject_str;
2373 	HashTable *regex_ht, *replace_ht, *subject_ht;
2374 	zval regex_tmp, replace_tmp, subject_tmp;
2375 
2376 	Z_FLF_PARAM_ARRAY_HT_OR_STR(1, regex_ht, regex_str, regex_tmp);
2377 	Z_FLF_PARAM_ARRAY_HT_OR_STR(2, replace_ht, replace_str, replace_tmp);
2378 	Z_FLF_PARAM_ARRAY_HT_OR_STR(3, subject_ht, subject_str, subject_tmp);
2379 
2380 	_preg_replace_common(
2381 		return_value,
2382 		regex_ht, regex_str,
2383 		replace_ht, replace_str,
2384 		subject_ht, subject_str,
2385 		/* limit */ -1, /* zcount */ NULL, /* is_filter */ false);
2386 
2387 flf_clean:;
2388 	Z_FLF_PARAM_FREE_STR(1, regex_tmp);
2389 	Z_FLF_PARAM_FREE_STR(2, replace_tmp);
2390 	Z_FLF_PARAM_FREE_STR(3, subject_tmp);
2391 }
2392 
2393 /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback)2394 PHP_FUNCTION(preg_replace_callback)
2395 {
2396 	zval *zcount = NULL;
2397 	zend_string *regex_str;
2398 	HashTable *regex_ht;
2399 	zend_string *subject_str;
2400 	HashTable *subject_ht;
2401 	zend_long limit = -1, flags = 0;
2402 	size_t replace_count;
2403 	zend_fcall_info fci;
2404 	zend_fcall_info_cache fcc;
2405 
2406 	/* Get function parameters and do error-checking. */
2407 	ZEND_PARSE_PARAMETERS_START(3, 6)
2408 		Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2409 		Z_PARAM_FUNC(fci, fcc)
2410 		Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2411 		Z_PARAM_OPTIONAL
2412 		Z_PARAM_LONG(limit)
2413 		Z_PARAM_ZVAL(zcount)
2414 		Z_PARAM_LONG(flags)
2415 	ZEND_PARSE_PARAMETERS_END();
2416 
2417 	replace_count = preg_replace_func_impl(return_value, regex_str, regex_ht,
2418 		&fci, &fcc,
2419 		subject_str, subject_ht, limit, flags);
2420 	if (zcount) {
2421 		ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2422 	}
2423 }
2424 /* }}} */
2425 
2426 /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback_array)2427 PHP_FUNCTION(preg_replace_callback_array)
2428 {
2429 	zval zv, *replace, *zcount = NULL;
2430 	HashTable *pattern, *subject_ht;
2431 	zend_string *subject_str, *str_idx_regex;
2432 	zend_long limit = -1, flags = 0;
2433 	size_t replace_count = 0;
2434 	zend_fcall_info fci;
2435 	zend_fcall_info_cache fcc;
2436 
2437 	/* Get function parameters and do error-checking. */
2438 	ZEND_PARSE_PARAMETERS_START(2, 5)
2439 		Z_PARAM_ARRAY_HT(pattern)
2440 		Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2441 		Z_PARAM_OPTIONAL
2442 		Z_PARAM_LONG(limit)
2443 		Z_PARAM_ZVAL(zcount)
2444 		Z_PARAM_LONG(flags)
2445 	ZEND_PARSE_PARAMETERS_END();
2446 
2447 	fci.size = sizeof(fci);
2448 	fci.object = NULL;
2449 	fci.named_params = NULL;
2450 
2451 	if (subject_ht) {
2452 		GC_TRY_ADDREF(subject_ht);
2453 	} else {
2454 		GC_TRY_ADDREF(subject_str);
2455 	}
2456 
2457 	ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) {
2458 		if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2459 			zend_argument_type_error(1, "must contain only valid callbacks");
2460 			goto error;
2461 		}
2462 		if (!str_idx_regex) {
2463 			zend_argument_type_error(1, "must contain only string patterns as keys");
2464 			goto error;
2465 		}
2466 
2467 		ZVAL_COPY_VALUE(&fci.function_name, replace);
2468 
2469 		replace_count += preg_replace_func_impl(&zv, str_idx_regex, /* regex_ht */ NULL, &fci, &fcc,
2470 			subject_str, subject_ht, limit, flags);
2471 		switch (Z_TYPE(zv)) {
2472 			case IS_ARRAY:
2473 				ZEND_ASSERT(subject_ht);
2474 				zend_array_release(subject_ht);
2475 				subject_ht = Z_ARR(zv);
2476 				break;
2477 			case IS_STRING:
2478 				ZEND_ASSERT(subject_str);
2479 				zend_string_release(subject_str);
2480 				subject_str = Z_STR(zv);
2481 				break;
2482 			case IS_NULL:
2483 				RETVAL_NULL();
2484 				goto error;
2485 			EMPTY_SWITCH_DEFAULT_CASE()
2486 		}
2487 
2488 		if (EG(exception)) {
2489 			goto error;
2490 		}
2491 	} ZEND_HASH_FOREACH_END();
2492 
2493 	if (zcount) {
2494 		ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2495 	}
2496 
2497 	if (subject_ht) {
2498 		RETVAL_ARR(subject_ht);
2499 		// Unset the type_flags of immutable arrays to prevent the VM from performing refcounting
2500 		if (GC_FLAGS(subject_ht) & IS_ARRAY_IMMUTABLE) {
2501 			Z_TYPE_FLAGS_P(return_value) = 0;
2502 		}
2503 		return;
2504 	} else {
2505 		RETURN_STR(subject_str);
2506 	}
2507 
2508 error:
2509 	if (subject_ht) {
2510 		zend_array_release(subject_ht);
2511 	} else {
2512 		zend_string_release(subject_str);
2513 	}
2514 }
2515 /* }}} */
2516 
2517 /* {{{ Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTION(preg_filter)2518 PHP_FUNCTION(preg_filter)
2519 {
2520 	preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
2521 }
2522 /* }}} */
2523 
2524 /* {{{ Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)2525 PHP_FUNCTION(preg_split)
2526 {
2527 	zend_string			*regex;			/* Regular expression */
2528 	zend_string			*subject;		/* String to match against */
2529 	zend_long			 limit_val = -1;/* Integer value of limit */
2530 	zend_long			 flags = 0;		/* Match control flags */
2531 	pcre_cache_entry	*pce;			/* Compiled regular expression */
2532 
2533 	/* Get function parameters and do error checking */
2534 	ZEND_PARSE_PARAMETERS_START(2, 4)
2535 		Z_PARAM_STR(regex)
2536 		Z_PARAM_STR(subject)
2537 		Z_PARAM_OPTIONAL
2538 		Z_PARAM_LONG(limit_val)
2539 		Z_PARAM_LONG(flags)
2540 	ZEND_PARSE_PARAMETERS_END();
2541 
2542 	/* Compile regex or get it from cache. */
2543 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2544 		RETURN_FALSE;
2545 	}
2546 
2547 	pce->refcount++;
2548 	php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2549 	pce->refcount--;
2550 }
2551 /* }}} */
2552 
2553 /* {{{ php_pcre_split */
php_pcre_split_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zend_long limit_val,zend_long flags)2554 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2555 	zend_long limit_val, zend_long flags)
2556 {
2557 	uint32_t		 options;			/* Execution options */
2558 	int				 count;				/* Count of matched subpatterns */
2559 	PCRE2_SIZE		 start_offset;		/* Where the new search starts */
2560 	PCRE2_SIZE		 last_match_offset;	/* Location of last match */
2561 	uint32_t		 no_empty;			/* If NO_EMPTY flag is set */
2562 	uint32_t		 delim_capture; 	/* If delimiters should be captured */
2563 	uint32_t		 offset_capture;	/* If offsets should be captured */
2564 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
2565 	zval			 tmp;
2566 	pcre2_match_data *match_data;
2567 	char *subject = ZSTR_VAL(subject_str);
2568 
2569 	no_empty = flags & PREG_SPLIT_NO_EMPTY;
2570 	delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2571 	offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2572 
2573 	/* Initialize return value */
2574 	array_init(return_value);
2575 	HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2576 
2577 	/* Calculate the size of the offsets array, and allocate memory for it. */
2578 	num_subpats = pce->capture_count + 1;
2579 
2580 	/* Start at the beginning of the string */
2581 	start_offset = 0;
2582 	last_match_offset = 0;
2583 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2584 
2585 	if (limit_val == -1) {
2586 		/* pass */
2587 	} else if (limit_val == 0) {
2588 		limit_val = -1;
2589 	} else if (limit_val <= 1) {
2590 		goto last;
2591 	}
2592 
2593 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2594 		match_data = mdata;
2595 	} else {
2596 		match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2597 		if (!match_data) {
2598 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2599 			zval_ptr_dtor(return_value);
2600 			RETURN_FALSE;
2601 		}
2602 	}
2603 
2604 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2605 
2606 	/* Array of subpattern offsets */
2607 	PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
2608 
2609 #ifdef HAVE_PCRE_JIT_SUPPORT
2610 	if ((pce->preg_options & PREG_JIT) && options) {
2611 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2612 				PCRE2_NO_UTF_CHECK, match_data, mctx);
2613 	} else
2614 #endif
2615 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2616 			options, match_data, mctx);
2617 
2618 	while (1) {
2619 		/* If something matched */
2620 		if (count >= 0) {
2621 			/* Check for too many substrings condition. */
2622 			if (UNEXPECTED(count == 0)) {
2623 				php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2624 				count = num_subpats;
2625 			}
2626 
2627 matched:
2628 			if (UNEXPECTED(offsets[1] < offsets[0])) {
2629 				PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2630 				break;
2631 			}
2632 
2633 			if (!no_empty || offsets[0] != last_match_offset) {
2634 				if (offset_capture) {
2635 					/* Add (match, offset) pair to the return value */
2636 					add_offset_pair(
2637 						return_value_ht, subject, last_match_offset, offsets[0],
2638 						NULL, 0);
2639 				} else {
2640 					/* Add the piece to the return value */
2641 					populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2642 					zend_hash_next_index_insert_new(return_value_ht, &tmp);
2643 				}
2644 
2645 				/* One less left to do */
2646 				if (limit_val != -1)
2647 					limit_val--;
2648 			}
2649 
2650 			if (delim_capture) {
2651 				size_t i;
2652 				for (i = 1; i < count; i++) {
2653 					/* If we have matched a delimiter */
2654 					if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2655 						if (offset_capture) {
2656 							add_offset_pair(
2657 								return_value_ht, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2658 						} else {
2659 							populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2660 							zend_hash_next_index_insert_new(return_value_ht, &tmp);
2661 						}
2662 					}
2663 				}
2664 			}
2665 
2666 			/* Advance to the position right after the last full match */
2667 			start_offset = last_match_offset = offsets[1];
2668 
2669 			/* If we have matched an empty string, mimic what Perl's /g options does.
2670 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2671 			   the match again at the same point. If this fails (picked up above) we
2672 			   advance to the next character. */
2673 			if (start_offset == offsets[0]) {
2674 				/* Get next piece if no limit or limit not yet reached and something matched*/
2675 				if (limit_val != -1 && limit_val <= 1) {
2676 					break;
2677 				}
2678 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2679 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2680 				if (count >= 0) {
2681 					goto matched;
2682 				} else if (count == PCRE2_ERROR_NOMATCH) {
2683 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2684 					   this is not necessarily the end. We need to advance
2685 					   the start offset, and continue. Fudge the offset values
2686 					   to achieve this, unless we're already at the end of the string. */
2687 					if (start_offset < ZSTR_LEN(subject_str)) {
2688 						start_offset += calculate_unit_length(pce, subject + start_offset);
2689 					} else {
2690 						break;
2691 					}
2692 				} else {
2693 					goto error;
2694 				}
2695 			}
2696 
2697 		} else if (count == PCRE2_ERROR_NOMATCH) {
2698 			break;
2699 		} else {
2700 error:
2701 			pcre_handle_exec_error(count);
2702 			break;
2703 		}
2704 
2705 		/* Get next piece if no limit or limit not yet reached and something matched*/
2706 		if (limit_val != -1 && limit_val <= 1) {
2707 			break;
2708 		}
2709 
2710 #ifdef HAVE_PCRE_JIT_SUPPORT
2711 		if (pce->preg_options & PREG_JIT) {
2712 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2713 					PCRE2_NO_UTF_CHECK, match_data, mctx);
2714 		} else
2715 #endif
2716 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2717 				PCRE2_NO_UTF_CHECK, match_data, mctx);
2718 	}
2719 	if (match_data != mdata) {
2720 		pcre2_match_data_free(match_data);
2721 	}
2722 
2723 	if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2724 		zval_ptr_dtor(return_value);
2725 		RETURN_FALSE;
2726 	}
2727 
2728 last:
2729 	start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2730 
2731 	if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2732 		if (offset_capture) {
2733 			/* Add the last (match, offset) pair to the return value */
2734 			add_offset_pair(return_value_ht, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2735 		} else {
2736 			/* Add the last piece to the return value */
2737 			if (start_offset == 0) {
2738 				ZVAL_STR_COPY(&tmp, subject_str);
2739 			} else {
2740 				populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2741 			}
2742 			zend_hash_next_index_insert_new(return_value_ht, &tmp);
2743 		}
2744 	}
2745 }
2746 /* }}} */
2747 
2748 /* {{{ Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)2749 PHP_FUNCTION(preg_quote)
2750 {
2751 	zend_string *str;       		/* Input string argument */
2752 	zend_string	*delim = NULL;		/* Additional delimiter argument */
2753 	char		*in_str;			/* Input string */
2754 	char		*in_str_end;    	/* End of the input string */
2755 	zend_string	*out_str;			/* Output string with quoted characters */
2756 	size_t       extra_len;         /* Number of additional characters */
2757 	char 		*p,					/* Iterator for input string */
2758 				*q,					/* Iterator for output string */
2759 				 delim_char = '\0',	/* Delimiter character to be quoted */
2760 				 c;					/* Current character */
2761 
2762 	/* Get the arguments and check for errors */
2763 	ZEND_PARSE_PARAMETERS_START(1, 2)
2764 		Z_PARAM_STR(str)
2765 		Z_PARAM_OPTIONAL
2766 		Z_PARAM_STR_OR_NULL(delim)
2767 	ZEND_PARSE_PARAMETERS_END();
2768 
2769 	/* Nothing to do if we got an empty string */
2770 	if (ZSTR_LEN(str) == 0) {
2771 		RETURN_EMPTY_STRING();
2772 	}
2773 
2774 	in_str = ZSTR_VAL(str);
2775 	in_str_end = in_str + ZSTR_LEN(str);
2776 
2777 	if (delim) {
2778 		delim_char = ZSTR_VAL(delim)[0];
2779 	}
2780 
2781 	/* Go through the string and quote necessary characters */
2782 	extra_len = 0;
2783 	p = in_str;
2784 	do {
2785 		c = *p;
2786 		switch(c) {
2787 			case '.':
2788 			case '\\':
2789 			case '+':
2790 			case '*':
2791 			case '?':
2792 			case '[':
2793 			case '^':
2794 			case ']':
2795 			case '$':
2796 			case '(':
2797 			case ')':
2798 			case '{':
2799 			case '}':
2800 			case '=':
2801 			case '!':
2802 			case '>':
2803 			case '<':
2804 			case '|':
2805 			case ':':
2806 			case '-':
2807 			case '#':
2808 				extra_len++;
2809 				break;
2810 
2811 			case '\0':
2812 				extra_len+=3;
2813 				break;
2814 
2815 			default:
2816 				if (c == delim_char) {
2817 					extra_len++;
2818 				}
2819 				break;
2820 		}
2821 		p++;
2822 	} while (p != in_str_end);
2823 
2824 	if (extra_len == 0) {
2825 		RETURN_STR_COPY(str);
2826 	}
2827 
2828 	/* Allocate enough memory so that even if each character
2829 	   is quoted, we won't run out of room */
2830 	out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2831 	q = ZSTR_VAL(out_str);
2832 	p = in_str;
2833 
2834 	do {
2835 		c = *p;
2836 		switch(c) {
2837 			case '.':
2838 			case '\\':
2839 			case '+':
2840 			case '*':
2841 			case '?':
2842 			case '[':
2843 			case '^':
2844 			case ']':
2845 			case '$':
2846 			case '(':
2847 			case ')':
2848 			case '{':
2849 			case '}':
2850 			case '=':
2851 			case '!':
2852 			case '>':
2853 			case '<':
2854 			case '|':
2855 			case ':':
2856 			case '-':
2857 			case '#':
2858 				*q++ = '\\';
2859 				*q++ = c;
2860 				break;
2861 
2862 			case '\0':
2863 				*q++ = '\\';
2864 				*q++ = '0';
2865 				*q++ = '0';
2866 				*q++ = '0';
2867 				break;
2868 
2869 			default:
2870 				if (c == delim_char) {
2871 					*q++ = '\\';
2872 				}
2873 				*q++ = c;
2874 				break;
2875 		}
2876 		p++;
2877 	} while (p != in_str_end);
2878 	*q = '\0';
2879 
2880 	RETURN_NEW_STR(out_str);
2881 }
2882 /* }}} */
2883 
2884 /* {{{ Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)2885 PHP_FUNCTION(preg_grep)
2886 {
2887 	zend_string			*regex;			/* Regular expression */
2888 	zval				*input;			/* Input array */
2889 	zend_long			 flags = 0;		/* Match control flags */
2890 	pcre_cache_entry	*pce;			/* Compiled regular expression */
2891 
2892 	/* Get arguments and do error checking */
2893 	ZEND_PARSE_PARAMETERS_START(2, 3)
2894 		Z_PARAM_STR(regex)
2895 		Z_PARAM_ARRAY(input)
2896 		Z_PARAM_OPTIONAL
2897 		Z_PARAM_LONG(flags)
2898 	ZEND_PARSE_PARAMETERS_END();
2899 
2900 	/* Compile regex or get it from cache. */
2901 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2902 		RETURN_FALSE;
2903 	}
2904 
2905 	pce->refcount++;
2906 	php_pcre_grep_impl(pce, input, return_value, flags);
2907 	pce->refcount--;
2908 }
2909 /* }}} */
2910 
php_pcre_grep_impl(pcre_cache_entry * pce,zval * input,zval * return_value,zend_long flags)2911 PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2912 {
2913 	zval            *entry;             /* An entry in the input array */
2914 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
2915 	int				 count;				/* Count of matched subpatterns */
2916 	uint32_t		 options;			/* Execution options */
2917 	zend_string		*string_key;
2918 	zend_ulong		 num_key;
2919 	bool		 invert;			/* Whether to return non-matching
2920 										   entries */
2921 	pcre2_match_data *match_data;
2922 	invert = flags & PREG_GREP_INVERT ? 1 : 0;
2923 
2924 	/* Calculate the size of the offsets array, and allocate memory for it. */
2925 	num_subpats = pce->capture_count + 1;
2926 
2927 	/* Initialize return array */
2928 	array_init(return_value);
2929 	HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2930 
2931 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2932 
2933 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2934 		match_data = mdata;
2935 	} else {
2936 		match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2937 		if (!match_data) {
2938 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2939 			return;
2940 		}
2941 	}
2942 
2943 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2944 
2945 	/* Go through the input array */
2946 	ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2947 		zend_string *tmp_subject_str;
2948 		zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2949 
2950 		/* Perform the match */
2951 #ifdef HAVE_PCRE_JIT_SUPPORT
2952 		if ((pce->preg_options & PREG_JIT) && options) {
2953 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2954 					PCRE2_NO_UTF_CHECK, match_data, mctx);
2955 		} else
2956 #endif
2957 		count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2958 				options, match_data, mctx);
2959 
2960 		/* If the entry fits our requirements */
2961 		if (count >= 0) {
2962 			/* Check for too many substrings condition. */
2963 			if (UNEXPECTED(count == 0)) {
2964 				php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2965 			}
2966 			if (!invert) {
2967 				Z_TRY_ADDREF_P(entry);
2968 
2969 				/* Add to return array */
2970 				if (string_key) {
2971 					zend_hash_update(return_value_ht, string_key, entry);
2972 				} else {
2973 					zend_hash_index_update(return_value_ht, num_key, entry);
2974 				}
2975 			}
2976 		} else if (count == PCRE2_ERROR_NOMATCH) {
2977 			if (invert) {
2978 				Z_TRY_ADDREF_P(entry);
2979 
2980 				/* Add to return array */
2981 				if (string_key) {
2982 					zend_hash_update(return_value_ht, string_key, entry);
2983 				} else {
2984 					zend_hash_index_update(return_value_ht, num_key, entry);
2985 				}
2986 			}
2987 		} else {
2988 			pcre_handle_exec_error(count);
2989 			zend_tmp_string_release(tmp_subject_str);
2990 			break;
2991 		}
2992 
2993 		zend_tmp_string_release(tmp_subject_str);
2994 	} ZEND_HASH_FOREACH_END();
2995 	if (match_data != mdata) {
2996 		pcre2_match_data_free(match_data);
2997 	}
2998 }
2999 /* }}} */
3000 
3001 /* {{{ Returns the error code of the last regexp execution. */
PHP_FUNCTION(preg_last_error)3002 PHP_FUNCTION(preg_last_error)
3003 {
3004 	ZEND_PARSE_PARAMETERS_NONE();
3005 
3006 	RETURN_LONG(PCRE_G(error_code));
3007 }
3008 /* }}} */
3009 
3010 /* {{{ Returns the error message of the last regexp execution. */
PHP_FUNCTION(preg_last_error_msg)3011 PHP_FUNCTION(preg_last_error_msg)
3012 {
3013 	ZEND_PARSE_PARAMETERS_NONE();
3014 
3015 	RETURN_STRING(php_pcre_get_error_msg(PCRE_G(error_code)));
3016 }
3017 /* }}} */
3018 
3019 /* {{{ module definition structures */
3020 
3021 zend_module_entry pcre_module_entry = {
3022 	STANDARD_MODULE_HEADER,
3023 	"pcre",
3024 	ext_functions,
3025 	PHP_MINIT(pcre),
3026 	PHP_MSHUTDOWN(pcre),
3027 	PHP_RINIT(pcre),
3028 	PHP_RSHUTDOWN(pcre),
3029 	PHP_MINFO(pcre),
3030 	PHP_PCRE_VERSION,
3031 	PHP_MODULE_GLOBALS(pcre),
3032 	PHP_GINIT(pcre),
3033 	PHP_GSHUTDOWN(pcre),
3034 	NULL,
3035 	STANDARD_MODULE_PROPERTIES_EX
3036 };
3037 
3038 #ifdef COMPILE_DL_PCRE
ZEND_GET_MODULE(pcre)3039 ZEND_GET_MODULE(pcre)
3040 #endif
3041 
3042 /* }}} */
3043 
3044 PHPAPI pcre2_match_context *php_pcre_mctx(void)
3045 {/*{{{*/
3046 	return mctx;
3047 }/*}}}*/
3048 
php_pcre_gctx(void)3049 PHPAPI pcre2_general_context *php_pcre_gctx(void)
3050 {/*{{{*/
3051 	return gctx;
3052 }/*}}}*/
3053 
php_pcre_cctx(void)3054 PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3055 {/*{{{*/
3056 	return cctx;
3057 }/*}}}*/
3058 
php_pcre_pce_incref(pcre_cache_entry * pce)3059 PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3060 {/*{{{*/
3061 	assert(NULL != pce);
3062 	pce->refcount++;
3063 }/*}}}*/
3064 
php_pcre_pce_decref(pcre_cache_entry * pce)3065 PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3066 {/*{{{*/
3067 	assert(NULL != pce);
3068 	assert(0 != pce->refcount);
3069 	pce->refcount--;
3070 }/*}}}*/
3071 
php_pcre_pce_re(pcre_cache_entry * pce)3072 PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3073 {/*{{{*/
3074 	assert(NULL != pce);
3075 	return pce->re;
3076 }/*}}}*/
3077