xref: /PHP-7.4/ext/pcre/php_pcre.c (revision 712fc54e)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) The PHP Group                                          |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Andrei Zmievski <andrei@php.net>                             |
16    +----------------------------------------------------------------------+
17  */
18 
19 #include "php.h"
20 #include "php_ini.h"
21 #include "php_globals.h"
22 #include "php_pcre.h"
23 #include "ext/standard/info.h"
24 #include "ext/standard/basic_functions.h"
25 #include "zend_smart_str.h"
26 #include "SAPI.h"
27 
28 #include "ext/standard/php_string.h"
29 
30 #define PREG_PATTERN_ORDER			1
31 #define PREG_SET_ORDER				2
32 #define PREG_OFFSET_CAPTURE			(1<<8)
33 #define PREG_UNMATCHED_AS_NULL		(1<<9)
34 
35 #define	PREG_SPLIT_NO_EMPTY			(1<<0)
36 #define PREG_SPLIT_DELIM_CAPTURE	(1<<1)
37 #define PREG_SPLIT_OFFSET_CAPTURE	(1<<2)
38 
39 #define PREG_REPLACE_EVAL			(1<<0)
40 
41 #define PREG_GREP_INVERT			(1<<0)
42 
43 #define PREG_JIT                    (1<<3)
44 
45 #define PCRE_CACHE_SIZE 4096
46 
47 struct _pcre_cache_entry {
48 	pcre2_code *re;
49 	uint32_t preg_options;
50 	uint32_t capture_count;
51 	uint32_t name_count;
52 	uint32_t compile_options;
53 	uint32_t extra_compile_options;
54 	uint32_t refcount;
55 };
56 
57 enum {
58 	PHP_PCRE_NO_ERROR = 0,
59 	PHP_PCRE_INTERNAL_ERROR,
60 	PHP_PCRE_BACKTRACK_LIMIT_ERROR,
61 	PHP_PCRE_RECURSION_LIMIT_ERROR,
62 	PHP_PCRE_BAD_UTF8_ERROR,
63 	PHP_PCRE_BAD_UTF8_OFFSET_ERROR,
64 	PHP_PCRE_JIT_STACKLIMIT_ERROR
65 };
66 
67 
68 PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
69 
70 #ifdef HAVE_PCRE_JIT_SUPPORT
71 #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
72 #define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
73 ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
74 #endif
75 ZEND_TLS pcre2_general_context *gctx = NULL;
76 /* These two are global per thread for now. Though it is possible to use these
77  	per pattern. Either one can copy it and use in pce, or one does no global
78 	contexts at all, but creates for every pce. */
79 ZEND_TLS pcre2_compile_context *cctx = NULL;
80 ZEND_TLS pcre2_match_context   *mctx = NULL;
81 ZEND_TLS pcre2_match_data      *mdata = NULL;
82 ZEND_TLS zend_bool              mdata_used = 0;
83 ZEND_TLS uint8_t pcre2_init_ok = 0;
84 #if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
85 static MUTEX_T pcre_mt = NULL;
86 #define php_pcre_mutex_alloc() \
87 	if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
88 #define php_pcre_mutex_free() \
89 	if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
90 #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
91 #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
92 #else
93 #define php_pcre_mutex_alloc()
94 #define php_pcre_mutex_free()
95 #define php_pcre_mutex_lock()
96 #define php_pcre_mutex_unlock()
97 #endif
98 
99 ZEND_TLS HashTable char_tables;
100 
php_pcre_free_char_table(zval * data)101 static void php_pcre_free_char_table(zval *data)
102 {/*{{{*/
103 	void *ptr = Z_PTR_P(data);
104 	pefree(ptr, 1);
105 }/*}}}*/
106 
pcre_handle_exec_error(int pcre_code)107 static void pcre_handle_exec_error(int pcre_code) /* {{{ */
108 {
109 	int preg_code = 0;
110 
111 	switch (pcre_code) {
112 		case PCRE2_ERROR_MATCHLIMIT:
113 			preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
114 			break;
115 
116 		case PCRE2_ERROR_RECURSIONLIMIT:
117 			preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
118 			break;
119 
120 		case PCRE2_ERROR_BADUTFOFFSET:
121 			preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
122 			break;
123 
124 #ifdef HAVE_PCRE_JIT_SUPPORT
125 		case PCRE2_ERROR_JIT_STACKLIMIT:
126 			preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
127 			break;
128 #endif
129 
130 		default:
131 			if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
132 				preg_code = PHP_PCRE_BAD_UTF8_ERROR;
133 			} else  {
134 				preg_code = PHP_PCRE_INTERNAL_ERROR;
135 			}
136 			break;
137 	}
138 
139 	PCRE_G(error_code) = preg_code;
140 }
141 /* }}} */
142 
php_free_pcre_cache(zval * data)143 static void php_free_pcre_cache(zval *data) /* {{{ */
144 {
145 	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
146 	if (!pce) return;
147 	pcre2_code_free(pce->re);
148 	free(pce);
149 }
150 /* }}} */
151 
php_efree_pcre_cache(zval * data)152 static void php_efree_pcre_cache(zval *data) /* {{{ */
153 {
154 	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
155 	if (!pce) return;
156 	pcre2_code_free(pce->re);
157 	efree(pce);
158 }
159 /* }}} */
160 
php_pcre_malloc(PCRE2_SIZE size,void * data)161 static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
162 {/*{{{*/
163 	void *p = pemalloc(size, 1);
164 	return p;
165 }/*}}}*/
166 
php_pcre_free(void * block,void * data)167 static void php_pcre_free(void *block, void *data)
168 {/*{{{*/
169 	pefree(block, 1);
170 }/*}}}*/
171 
172 #ifdef PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
173 	/* pcre 10.38 needs PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, disabled by default */
174 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS (PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL|PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)
175 #else
176 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
177 #endif
178 
179 #define PHP_PCRE_PREALLOC_MDATA_SIZE 32
180 
php_pcre_init_pcre2(uint8_t jit)181 static void php_pcre_init_pcre2(uint8_t jit)
182 {/*{{{*/
183 	if (!gctx) {
184 		gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
185 		if (!gctx) {
186 			pcre2_init_ok = 0;
187 			return;
188 		}
189 	}
190 
191 	if (!cctx) {
192 		cctx = pcre2_compile_context_create(gctx);
193 		if (!cctx) {
194 			pcre2_init_ok = 0;
195 			return;
196 		}
197 	}
198 
199 	/* XXX The 'X' modifier is the default behavior in PCRE2. This option is
200 		called dangerous in the manual, as typos in patterns can cause
201 		unexpected results. We might want to to switch to the default PCRE2
202 		behavior, too, thus causing a certain BC break. */
203 	pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
204 
205 	if (!mctx) {
206 		mctx = pcre2_match_context_create(gctx);
207 		if (!mctx) {
208 			pcre2_init_ok = 0;
209 			return;
210 		}
211 	}
212 
213 #ifdef HAVE_PCRE_JIT_SUPPORT
214 	if (jit && !jit_stack) {
215 		jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
216 		if (!jit_stack) {
217 			pcre2_init_ok = 0;
218 			return;
219 		}
220 	}
221 #endif
222 
223 	if (!mdata) {
224 		mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
225 		if (!mdata) {
226 			pcre2_init_ok = 0;
227 			return;
228 		}
229 	}
230 
231 	pcre2_init_ok = 1;
232 }/*}}}*/
233 
php_pcre_shutdown_pcre2(void)234 static void php_pcre_shutdown_pcre2(void)
235 {/*{{{*/
236 	if (gctx) {
237 		pcre2_general_context_free(gctx);
238 		gctx = NULL;
239 	}
240 
241 	if (cctx) {
242 		pcre2_compile_context_free(cctx);
243 		cctx = NULL;
244 	}
245 
246 	if (mctx) {
247 		pcre2_match_context_free(mctx);
248 		mctx = NULL;
249 	}
250 
251 #ifdef HAVE_PCRE_JIT_SUPPORT
252 	/* Stack may only be destroyed when no cached patterns
253 	 	possibly associated with it do exist. */
254 	if (jit_stack) {
255 		pcre2_jit_stack_free(jit_stack);
256 		jit_stack = NULL;
257 	}
258 #endif
259 
260 	if (mdata) {
261 		pcre2_match_data_free(mdata);
262 		mdata = NULL;
263 	}
264 
265 	pcre2_init_ok = 0;
266 }/*}}}*/
267 
PHP_GINIT_FUNCTION(pcre)268 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
269 {
270 	php_pcre_mutex_alloc();
271 
272 	/* If we're on the CLI SAPI, there will only be one request, so we don't need the
273 	 * cache to survive after RSHUTDOWN. */
274 	pcre_globals->per_request_cache = strcmp(sapi_module.name, "cli") == 0;
275 	if (!pcre_globals->per_request_cache) {
276 		zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
277 	}
278 
279 	pcre_globals->backtrack_limit = 0;
280 	pcre_globals->recursion_limit = 0;
281 	pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
282 	ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
283 	ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
284 #ifdef HAVE_PCRE_JIT_SUPPORT
285 	pcre_globals->jit = 1;
286 #endif
287 
288 	php_pcre_init_pcre2(1);
289 	zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
290 }
291 /* }}} */
292 
PHP_GSHUTDOWN_FUNCTION(pcre)293 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
294 {
295 	if (!pcre_globals->per_request_cache) {
296 		zend_hash_destroy(&pcre_globals->pcre_cache);
297 	}
298 
299 	php_pcre_shutdown_pcre2();
300 	zend_hash_destroy(&char_tables);
301 	php_pcre_mutex_free();
302 }
303 /* }}} */
304 
PHP_INI_MH(OnUpdateBacktrackLimit)305 static PHP_INI_MH(OnUpdateBacktrackLimit)
306 {/*{{{*/
307 	OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
308 	if (mctx) {
309 		pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
310 	}
311 
312 	return SUCCESS;
313 }/*}}}*/
314 
PHP_INI_MH(OnUpdateRecursionLimit)315 static PHP_INI_MH(OnUpdateRecursionLimit)
316 {/*{{{*/
317 	OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
318 	if (mctx) {
319 		pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
320 	}
321 
322 	return SUCCESS;
323 }/*}}}*/
324 
325 #ifdef HAVE_PCRE_JIT_SUPPORT
PHP_INI_MH(OnUpdateJit)326 static PHP_INI_MH(OnUpdateJit)
327 {/*{{{*/
328 	OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
329 	if (PCRE_G(jit) && jit_stack) {
330 		pcre2_jit_stack_assign(mctx, NULL, jit_stack);
331 	} else {
332 		pcre2_jit_stack_assign(mctx, NULL, NULL);
333 	}
334 
335 	return SUCCESS;
336 }/*}}}*/
337 #endif
338 
339 PHP_INI_BEGIN()
340 	STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
341 	STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
342 #ifdef HAVE_PCRE_JIT_SUPPORT
343 	STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateJit, jit,             zend_pcre_globals, pcre_globals)
344 #endif
PHP_INI_END()345 PHP_INI_END()
346 
347 static char *_pcre2_config_str(uint32_t what)
348 {/*{{{*/
349 	int len = pcre2_config(what, NULL);
350 	char *ret = (char *) malloc(len + 1);
351 
352 	len = pcre2_config(what, ret);
353 	if (!len) {
354 		free(ret);
355 		return NULL;
356 	}
357 
358 	return ret;
359 }/*}}}*/
360 
361 /* {{{ PHP_MINFO_FUNCTION(pcre) */
PHP_MINFO_FUNCTION(pcre)362 static PHP_MINFO_FUNCTION(pcre)
363 {
364 #ifdef HAVE_PCRE_JIT_SUPPORT
365 	uint32_t flag = 0;
366 	char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
367 #endif
368 	char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
369 	char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
370 
371 	php_info_print_table_start();
372 	php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
373 	php_info_print_table_row(2, "PCRE Library Version", version);
374 	free(version);
375 	php_info_print_table_row(2, "PCRE Unicode Version", unicode);
376 	free(unicode);
377 
378 #ifdef HAVE_PCRE_JIT_SUPPORT
379 	if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
380 		php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
381 	} else {
382 		php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
383 	}
384 	if (jit_target) {
385 		php_info_print_table_row(2, "PCRE JIT Target", jit_target);
386 	}
387 	free(jit_target);
388 #else
389 	php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
390 #endif
391 
392 #ifdef HAVE_PCRE_VALGRIND_SUPPORT
393 	php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
394 #endif
395 
396 	php_info_print_table_end();
397 
398 	DISPLAY_INI_ENTRIES();
399 }
400 /* }}} */
401 
402 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTION(pcre)403 static PHP_MINIT_FUNCTION(pcre)
404 {
405 	char *version;
406 
407 #ifdef HAVE_PCRE_JIT_SUPPORT
408 	if (UNEXPECTED(!pcre2_init_ok)) {
409 		/* Retry. */
410 		php_pcre_init_pcre2(PCRE_G(jit));
411 		if (!pcre2_init_ok) {
412 			return FAILURE;
413 		}
414 	}
415 #endif
416 
417 	REGISTER_INI_ENTRIES();
418 
419 	REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
420 	REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
421 	REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
422 	REGISTER_LONG_CONSTANT("PREG_UNMATCHED_AS_NULL", PREG_UNMATCHED_AS_NULL, CONST_CS | CONST_PERSISTENT);
423 	REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
424 	REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
425 	REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
426 	REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
427 
428 	REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
429 	REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
430 	REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
431 	REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
432 	REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
433 	REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
434 	REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
435 	version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
436 	REGISTER_STRING_CONSTANT("PCRE_VERSION", version, CONST_CS | CONST_PERSISTENT);
437 	free(version);
438 	REGISTER_LONG_CONSTANT("PCRE_VERSION_MAJOR", PCRE2_MAJOR, CONST_CS | CONST_PERSISTENT);
439 	REGISTER_LONG_CONSTANT("PCRE_VERSION_MINOR", PCRE2_MINOR, CONST_CS | CONST_PERSISTENT);
440 
441 #ifdef HAVE_PCRE_JIT_SUPPORT
442 	REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 1, CONST_CS | CONST_PERSISTENT);
443 #else
444 	REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 0, CONST_CS | CONST_PERSISTENT);
445 #endif
446 
447 	return SUCCESS;
448 }
449 /* }}} */
450 
451 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTION(pcre)452 static PHP_MSHUTDOWN_FUNCTION(pcre)
453 {
454 	UNREGISTER_INI_ENTRIES();
455 
456 	return SUCCESS;
457 }
458 /* }}} */
459 
460 /* {{{ PHP_RINIT_FUNCTION(pcre) */
PHP_RINIT_FUNCTION(pcre)461 static PHP_RINIT_FUNCTION(pcre)
462 {
463 #ifdef HAVE_PCRE_JIT_SUPPORT
464 	if (UNEXPECTED(!pcre2_init_ok)) {
465 		/* Retry. */
466 		php_pcre_mutex_lock();
467 		php_pcre_init_pcre2(PCRE_G(jit));
468 		if (!pcre2_init_ok) {
469 			php_pcre_mutex_unlock();
470 			return FAILURE;
471 		}
472 		php_pcre_mutex_unlock();
473 	}
474 
475 	mdata_used = 0;
476 #endif
477 
478 	if (PCRE_G(per_request_cache)) {
479 		zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, php_efree_pcre_cache, 0);
480 	}
481 
482 	return SUCCESS;
483 }
484 /* }}} */
485 
PHP_RSHUTDOWN_FUNCTION(pcre)486 static PHP_RSHUTDOWN_FUNCTION(pcre)
487 {
488 	if (PCRE_G(per_request_cache)) {
489 		zend_hash_destroy(&PCRE_G(pcre_cache));
490 	}
491 
492 	zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
493 	zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
494 	ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
495 	ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
496 	return SUCCESS;
497 }
498 
499 /* {{{ static pcre_clean_cache */
pcre_clean_cache(zval * data,void * arg)500 static int pcre_clean_cache(zval *data, void *arg)
501 {
502 	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
503 	int *num_clean = (int *)arg;
504 
505 	if (*num_clean > 0 && !pce->refcount) {
506 		(*num_clean)--;
507 		return ZEND_HASH_APPLY_REMOVE;
508 	} else {
509 		return ZEND_HASH_APPLY_KEEP;
510 	}
511 }
512 /* }}} */
513 
free_subpats_table(zend_string ** subpat_names,uint32_t num_subpats)514 static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
515 	uint32_t i;
516 	for (i = 0; i < num_subpats; i++) {
517 		if (subpat_names[i]) {
518 			zend_string_release(subpat_names[i]);
519 		}
520 	}
521 	efree(subpat_names);
522 }
523 
524 /* {{{ static make_subpats_table */
make_subpats_table(uint32_t num_subpats,pcre_cache_entry * pce)525 static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
526 {
527 	uint32_t name_cnt = pce->name_count, name_size, ni = 0;
528 	char *name_table;
529 	zend_string **subpat_names;
530 	int rc1, rc2;
531 
532 	rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
533 	rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
534 	if (rc1 < 0 || rc2 < 0) {
535 		php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
536 		return NULL;
537 	}
538 
539 	subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
540 	while (ni++ < name_cnt) {
541 		unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
542 		const char *name = name_table + 2;
543 		subpat_names[name_idx] = zend_string_init(name, strlen(name), 0);
544 		if (is_numeric_string(ZSTR_VAL(subpat_names[name_idx]), ZSTR_LEN(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
545 			php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
546 			free_subpats_table(subpat_names, num_subpats);
547 			return NULL;
548 		}
549 		name_table += name_size;
550 	}
551 	return subpat_names;
552 }
553 /* }}} */
554 
555 /* {{{ static calculate_unit_length */
556 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
calculate_unit_length(pcre_cache_entry * pce,char * start)557 static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, char *start)
558 {
559 	size_t unit_len;
560 
561 	if (pce->compile_options & PCRE2_UTF) {
562 		char *end = start;
563 
564 		/* skip continuation bytes */
565 		while ((*++end & 0xC0) == 0x80);
566 		unit_len = end - start;
567 	} else {
568 		unit_len = 1;
569 	}
570 	return unit_len;
571 }
572 /* }}} */
573 
574 /* {{{ pcre_get_compiled_regex_cache
575  */
pcre_get_compiled_regex_cache_ex(zend_string * regex,int locale_aware)576 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware)
577 {
578 	pcre2_code			*re = NULL;
579 #if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR
580 	uint32_t			 coptions = PCRE2_NO_START_OPTIMIZE;
581 #else
582 	uint32_t			 coptions = 0;
583 #endif
584 	uint32_t			 extra_coptions = PHP_PCRE_DEFAULT_EXTRA_COPTIONS;
585 	PCRE2_UCHAR	         error[128];
586 	PCRE2_SIZE           erroffset;
587 	int                  errnumber;
588 	char				 delimiter;
589 	char				 start_delimiter;
590 	char				 end_delimiter;
591 	char				*p, *pp;
592 	char				*pattern;
593 	size_t				 pattern_len;
594 	uint32_t			 poptions = 0;
595 	const uint8_t       *tables = NULL;
596 	zval                *zv;
597 	pcre_cache_entry	 new_entry;
598 	int					 rc;
599 	zend_string 		*key;
600 	pcre_cache_entry *ret;
601 
602 	if (locale_aware && BG(locale_string) &&
603 		(ZSTR_LEN(BG(locale_string)) != 1 && ZSTR_VAL(BG(locale_string))[0] != 'C')) {
604 		key = zend_string_alloc(ZSTR_LEN(regex) + ZSTR_LEN(BG(locale_string)) + 1, 0);
605 		memcpy(ZSTR_VAL(key), ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)) + 1);
606 		memcpy(ZSTR_VAL(key) + ZSTR_LEN(BG(locale_string)), ZSTR_VAL(regex), ZSTR_LEN(regex) + 1);
607 	} else {
608 		key = regex;
609 	}
610 
611 	/* Try to lookup the cached regex entry, and if successful, just pass
612 	   back the compiled pattern, otherwise go on and compile it. */
613 	zv = zend_hash_find(&PCRE_G(pcre_cache), key);
614 	if (zv) {
615 		if (key != regex) {
616 			zend_string_release_ex(key, 0);
617 		}
618 		return (pcre_cache_entry*)Z_PTR_P(zv);
619 	}
620 
621 	p = ZSTR_VAL(regex);
622 
623 	/* Parse through the leading whitespace, and display a warning if we
624 	   get to the end without encountering a delimiter. */
625 	while (isspace((int)*(unsigned char *)p)) p++;
626 	if (*p == 0) {
627 		if (key != regex) {
628 			zend_string_release_ex(key, 0);
629 		}
630 		php_error_docref(NULL, E_WARNING,
631 						 p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
632 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
633 		return NULL;
634 	}
635 
636 	/* Get the delimiter and display a warning if it is alphanumeric
637 	   or a backslash. */
638 	delimiter = *p++;
639 	if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
640 		if (key != regex) {
641 			zend_string_release_ex(key, 0);
642 		}
643 		php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
644 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
645 		return NULL;
646 	}
647 
648 	start_delimiter = delimiter;
649 	if ((pp = strchr("([{< )]}> )]}>", delimiter)))
650 		delimiter = pp[5];
651 	end_delimiter = delimiter;
652 
653 	pp = p;
654 
655 	if (start_delimiter == end_delimiter) {
656 		/* We need to iterate through the pattern, searching for the ending delimiter,
657 		   but skipping the backslashed delimiters.  If the ending delimiter is not
658 		   found, display a warning. */
659 		while (*pp != 0) {
660 			if (*pp == '\\' && pp[1] != 0) pp++;
661 			else if (*pp == delimiter)
662 				break;
663 			pp++;
664 		}
665 	} else {
666 		/* We iterate through the pattern, searching for the matching ending
667 		 * delimiter. For each matching starting delimiter, we increment nesting
668 		 * level, and decrement it for each matching ending delimiter. If we
669 		 * reach the end of the pattern without matching, display a warning.
670 		 */
671 		int brackets = 1; 	/* brackets nesting level */
672 		while (*pp != 0) {
673 			if (*pp == '\\' && pp[1] != 0) pp++;
674 			else if (*pp == end_delimiter && --brackets <= 0)
675 				break;
676 			else if (*pp == start_delimiter)
677 				brackets++;
678 			pp++;
679 		}
680 	}
681 
682 	if (*pp == 0) {
683 		if (key != regex) {
684 			zend_string_release_ex(key, 0);
685 		}
686 		if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
687 			php_error_docref(NULL,E_WARNING, "Null byte in regex");
688 		} else if (start_delimiter == end_delimiter) {
689 			php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
690 		} else {
691 			php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
692 		}
693 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
694 		return NULL;
695 	}
696 
697 	/* Make a copy of the actual pattern. */
698 	pattern_len = pp - p;
699 	pattern = estrndup(p, pattern_len);
700 
701 	/* Move on to the options */
702 	pp++;
703 
704 	/* Parse through the options, setting appropriate flags.  Display
705 	   a warning if we encounter an unknown modifier. */
706 	while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
707 		switch (*pp++) {
708 			/* Perl compatible options */
709 			case 'i':	coptions |= PCRE2_CASELESS;		break;
710 			case 'm':	coptions |= PCRE2_MULTILINE;		break;
711 			case 's':	coptions |= PCRE2_DOTALL;		break;
712 			case 'x':	coptions |= PCRE2_EXTENDED;		break;
713 
714 			/* PCRE specific options */
715 			case 'A':	coptions |= PCRE2_ANCHORED;		break;
716 			case 'D':	coptions |= PCRE2_DOLLAR_ENDONLY;break;
717 			case 'S':	/* Pass. */					break;
718 			case 'U':	coptions |= PCRE2_UNGREEDY;		break;
719 			case 'X':	extra_coptions &= ~PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL;			break;
720 			case 'u':	coptions |= PCRE2_UTF;
721 	/* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
722        characters, even in UTF-8 mode. However, this can be changed by setting
723        the PCRE2_UCP option. */
724 #ifdef PCRE2_UCP
725 						coptions |= PCRE2_UCP;
726 #endif
727 				break;
728 			case 'J':	coptions |= PCRE2_DUPNAMES;		break;
729 
730 			/* Custom preg options */
731 			case 'e':	poptions |= PREG_REPLACE_EVAL;	break;
732 
733 			case ' ':
734 			case '\n':
735 			case '\r':
736 				break;
737 
738 			default:
739 				if (pp[-1]) {
740 					php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
741 				} else {
742 					php_error_docref(NULL,E_WARNING, "Null byte in regex");
743 				}
744 				pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
745 				efree(pattern);
746 				if (key != regex) {
747 					zend_string_release_ex(key, 0);
748 				}
749 				return NULL;
750 		}
751 	}
752 
753 	if (poptions & PREG_REPLACE_EVAL) {
754 		php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
755 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
756 		efree(pattern);
757 		if (key != regex) {
758 			zend_string_release_ex(key, 0);
759 		}
760 		return NULL;
761 	}
762 
763 	if (key != regex) {
764 		tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(locale_string));
765 		if (!tables) {
766 			zend_string *_k;
767 			tables = pcre2_maketables(gctx);
768 			if (UNEXPECTED(!tables)) {
769 				php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
770 				pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
771 				zend_string_release_ex(key, 0);
772 				efree(pattern);
773 				return NULL;
774 			}
775 			_k = zend_string_init(ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)), 1);
776 			GC_MAKE_PERSISTENT_LOCAL(_k);
777 			zend_hash_add_ptr(&char_tables, _k, (void *)tables);
778 			zend_string_release(_k);
779 		}
780 	}
781 	pcre2_set_character_tables(cctx, tables);
782 
783 	/* Set extra options for the compile context. */
784 	if (PHP_PCRE_DEFAULT_EXTRA_COPTIONS != extra_coptions) {
785 		pcre2_set_compile_extra_options(cctx, extra_coptions);
786 	}
787 
788 	/* Compile pattern and display a warning if compilation failed. */
789 	re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
790 
791 	/* Reset the compile context extra options to default. */
792 	if (PHP_PCRE_DEFAULT_EXTRA_COPTIONS != extra_coptions) {
793 		pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
794 	}
795 
796 	if (re == NULL) {
797 		if (key != regex) {
798 			zend_string_release_ex(key, 0);
799 		}
800 		pcre2_get_error_message(errnumber, error, sizeof(error));
801 		php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
802 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
803 		efree(pattern);
804 		return NULL;
805 	}
806 
807 #ifdef HAVE_PCRE_JIT_SUPPORT
808 	if (PCRE_G(jit)) {
809 		/* Enable PCRE JIT compiler */
810 		rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
811 		if (EXPECTED(rc >= 0)) {
812 			size_t jit_size = 0;
813 			if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
814 				poptions |= PREG_JIT;
815 			}
816 		} else if (rc == PCRE2_ERROR_NOMEMORY) {
817 			php_error_docref(NULL, E_WARNING,
818 				"Allocation of JIT memory failed, PCRE JIT will be disabled. "
819 				"This is likely caused by security restrictions. "
820 				"Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
821 			PCRE_G(jit) = 0;
822 		} else {
823 			pcre2_get_error_message(rc, error, sizeof(error));
824 			php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
825 			pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
826 		}
827 	}
828 #endif
829 	efree(pattern);
830 
831 	/*
832 	 * If we reached cache limit, clean out the items from the head of the list;
833 	 * these are supposedly the oldest ones (but not necessarily the least used
834 	 * ones).
835 	 */
836 	if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
837 		int num_clean = PCRE_CACHE_SIZE / 8;
838 		zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
839 	}
840 
841 	/* Store the compiled pattern and extra info in the cache. */
842 	new_entry.re = re;
843 	new_entry.preg_options = poptions;
844 	new_entry.compile_options = coptions;
845 	new_entry.extra_compile_options = extra_coptions;
846 	new_entry.refcount = 0;
847 
848 	rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
849 	if (rc < 0) {
850 		if (key != regex) {
851 			zend_string_release_ex(key, 0);
852 		}
853 		php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
854 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
855 		return NULL;
856 	}
857 
858 	rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
859 	if (rc < 0) {
860 		if (key != regex) {
861 			zend_string_release_ex(key, 0);
862 		}
863 		php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
864 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
865 		return NULL;
866 	}
867 
868 	/*
869 	 * Interned strings are not duplicated when stored in HashTable,
870 	 * but all the interned strings created during HTTP request are removed
871 	 * at end of request. However PCRE_G(pcre_cache) must be consistent
872 	 * on the next request as well. So we disable usage of interned strings
873 	 * as hash keys especually for this table.
874 	 * See bug #63180
875 	 */
876 	if (!(GC_FLAGS(key) & IS_STR_PERMANENT) && !PCRE_G(per_request_cache)) {
877 		zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
878 		GC_MAKE_PERSISTENT_LOCAL(str);
879 
880 		ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
881 		zend_string_release(str);
882 	} else {
883 		ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
884 	}
885 
886 	if (key != regex) {
887 		zend_string_release_ex(key, 0);
888 	}
889 
890 	return ret;
891 }
892 /* }}} */
893 
894 /* {{{ pcre_get_compiled_regex_cache
895  */
pcre_get_compiled_regex_cache(zend_string * regex)896 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
897 {
898 	return pcre_get_compiled_regex_cache_ex(regex, 1);
899 }
900 /* }}} */
901 
902 /* {{{ pcre_get_compiled_regex
903  */
pcre_get_compiled_regex(zend_string * regex,uint32_t * capture_count)904 PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
905 {
906 	pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
907 
908 	if (capture_count) {
909 		*capture_count = pce ? pce->capture_count : 0;
910 	}
911 
912 	return pce ? pce->re : NULL;
913 }
914 /* }}} */
915 
916 /* {{{ pcre_get_compiled_regex_ex
917  */
pcre_get_compiled_regex_ex(zend_string * regex,uint32_t * capture_count,uint32_t * preg_options,uint32_t * compile_options)918 PHPAPI pcre2_code* pcre_get_compiled_regex_ex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options, uint32_t *compile_options)
919 {
920 	pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
921 
922 	if (preg_options) {
923 		*preg_options = pce ? pce->preg_options : 0;
924 	}
925 	if (compile_options) {
926 		*compile_options = pce ? pce->compile_options : 0;
927 	}
928 	if (capture_count) {
929 		*capture_count = pce ? pce->capture_count : 0;
930 	}
931 
932 	return pce ? pce->re : NULL;
933 }
934 /* }}} */
935 
936 /* XXX For the cases where it's only about match yes/no and no capture
937 		required, perhaps just a minimum sized data would suffice. */
php_pcre_create_match_data(uint32_t capture_count,pcre2_code * re)938 PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
939 {/*{{{*/
940 
941 	assert(NULL != re);
942 
943 	if (EXPECTED(!mdata_used)) {
944 		int rc = 0;
945 
946 		if (!capture_count) {
947 			/* As we deal with a non cached pattern, no other way to gather this info. */
948 			rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
949 		}
950 
951 		if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
952 			mdata_used = 1;
953 			return mdata;
954 		}
955 	}
956 
957 	return pcre2_match_data_create_from_pattern(re, gctx);
958 }/*}}}*/
959 
php_pcre_free_match_data(pcre2_match_data * match_data)960 PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
961 {/*{{{*/
962 	if (UNEXPECTED(match_data != mdata)) {
963 		pcre2_match_data_free(match_data);
964 	} else {
965 		mdata_used = 0;
966 	}
967 }/*}}}*/
968 
init_unmatched_null_pair()969 static void init_unmatched_null_pair() {
970 	zval val1, val2;
971 	ZVAL_NULL(&val1);
972 	ZVAL_LONG(&val2, -1);
973 	ZVAL_ARR(&PCRE_G(unmatched_null_pair), zend_new_pair(&val1, &val2));
974 }
975 
init_unmatched_empty_pair()976 static void init_unmatched_empty_pair() {
977 	zval val1, val2;
978 	ZVAL_EMPTY_STRING(&val1);
979 	ZVAL_LONG(&val2, -1);
980 	ZVAL_ARR(&PCRE_G(unmatched_empty_pair), zend_new_pair(&val1, &val2));
981 }
982 
populate_match_value_str(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset)983 static zend_always_inline void populate_match_value_str(
984 		zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
985 	if (start_offset == end_offset) {
986 		ZVAL_EMPTY_STRING(val);
987 	} else if (start_offset + 1 == end_offset) {
988 		ZVAL_INTERNED_STR(val, ZSTR_CHAR((unsigned char) subject[start_offset]));
989 	} else {
990 		ZVAL_STRINGL(val, subject + start_offset, end_offset - start_offset);
991 	}
992 }
993 
populate_match_value(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,uint32_t unmatched_as_null)994 static inline void populate_match_value(
995 		zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
996 		uint32_t unmatched_as_null) {
997 	if (PCRE2_UNSET == start_offset) {
998 		if (unmatched_as_null) {
999 			ZVAL_NULL(val);
1000 		} else {
1001 			ZVAL_EMPTY_STRING(val);
1002 		}
1003 	} else {
1004 		populate_match_value_str(val, subject, start_offset, end_offset);
1005 	}
1006 }
1007 
add_named(zval * subpats,zend_string * name,zval * val,zend_bool unmatched)1008 static inline void add_named(
1009 		zval *subpats, zend_string *name, zval *val, zend_bool unmatched) {
1010 	/* If the DUPNAMES option is used, multiple subpatterns might have the same name.
1011 	 * In this case we want to preserve the one that actually has a value. */
1012 	if (!unmatched) {
1013 		zend_hash_update(Z_ARRVAL_P(subpats), name, val);
1014 	} else {
1015 		if (!zend_hash_add(Z_ARRVAL_P(subpats), name, val)) {
1016 			return;
1017 		}
1018 	}
1019 	Z_TRY_ADDREF_P(val);
1020 }
1021 
1022 /* {{{ add_offset_pair */
add_offset_pair(zval * result,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,zend_string * name,uint32_t unmatched_as_null)1023 static inline void add_offset_pair(
1024 		zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
1025 		zend_string *name, uint32_t unmatched_as_null)
1026 {
1027 	zval match_pair;
1028 
1029 	/* Add (match, offset) to the return value */
1030 	if (PCRE2_UNSET == start_offset) {
1031 		if (unmatched_as_null) {
1032 			if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
1033 				init_unmatched_null_pair();
1034 			}
1035 			ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
1036 		} else {
1037 			if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
1038 				init_unmatched_empty_pair();
1039 			}
1040 			ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
1041 		}
1042 	} else {
1043 		zval val1, val2;
1044 		populate_match_value_str(&val1, subject, start_offset, end_offset);
1045 		ZVAL_LONG(&val2, start_offset);
1046 		ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1047 	}
1048 
1049 	if (name) {
1050 		add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1051 	}
1052 	zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
1053 }
1054 /* }}} */
1055 
populate_subpat_array(zval * subpats,char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1056 static void populate_subpat_array(
1057 		zval *subpats, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1058 		uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1059 	zend_bool offset_capture = (flags & PREG_OFFSET_CAPTURE) != 0;
1060 	zend_bool unmatched_as_null = (flags & PREG_UNMATCHED_AS_NULL) != 0;
1061 	zval val;
1062 	int i;
1063 	if (subpat_names) {
1064 		if (offset_capture) {
1065 			for (i = 0; i < count; i++) {
1066 				add_offset_pair(
1067 					subpats, subject, offsets[2*i], offsets[2*i+1],
1068 					subpat_names[i], unmatched_as_null);
1069 			}
1070 			if (unmatched_as_null) {
1071 				for (i = count; i < num_subpats; i++) {
1072 					add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1073 				}
1074 			}
1075 		} else {
1076 			for (i = 0; i < count; i++) {
1077 				populate_match_value(
1078 					&val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1079 				if (subpat_names[i]) {
1080 					add_named(subpats, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1081 				}
1082 				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1083 			}
1084 			if (unmatched_as_null) {
1085 				for (i = count; i < num_subpats; i++) {
1086 					ZVAL_NULL(&val);
1087 					if (subpat_names[i]) {
1088 						zend_hash_add(Z_ARRVAL_P(subpats), subpat_names[i], &val);
1089 					}
1090 					zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1091 				}
1092 			}
1093 		}
1094 	} else {
1095 		if (offset_capture) {
1096 			for (i = 0; i < count; i++) {
1097 				add_offset_pair(
1098 					subpats, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1099 			}
1100 			if (unmatched_as_null) {
1101 				for (i = count; i < num_subpats; i++) {
1102 					add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1103 				}
1104 			}
1105 		} else {
1106 			for (i = 0; i < count; i++) {
1107 				populate_match_value(
1108 					&val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1109 				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1110 			}
1111 			if (unmatched_as_null) {
1112 				for (i = count; i < num_subpats; i++) {
1113 					add_next_index_null(subpats);
1114 				}
1115 			}
1116 		}
1117 	}
1118 	/* Add MARK, if available */
1119 	if (mark) {
1120 		add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
1121 	}
1122 }
1123 
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS,int global)1124 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
1125 {
1126 	/* parameters */
1127 	zend_string		 *regex;			/* Regular expression */
1128 	zend_string		 *subject;			/* String to match against */
1129 	pcre_cache_entry *pce;				/* Compiled regular expression */
1130 	zval			 *subpats = NULL;	/* Array for subpatterns */
1131 	zend_long		  flags = 0;		/* Match control flags */
1132 	zend_long		  start_offset = 0;	/* Where the new search starts */
1133 
1134 	ZEND_PARSE_PARAMETERS_START(2, 5)
1135 		Z_PARAM_STR(regex)
1136 		Z_PARAM_STR(subject)
1137 		Z_PARAM_OPTIONAL
1138 		Z_PARAM_ZVAL(subpats)
1139 		Z_PARAM_LONG(flags)
1140 		Z_PARAM_LONG(start_offset)
1141 	ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
1142 
1143 	/* Compile regex or get it from cache. */
1144 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1145 		RETURN_FALSE;
1146 	}
1147 
1148 	pce->refcount++;
1149 	php_pcre_match_impl(pce, subject, return_value, subpats,
1150 		global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
1151 	pce->refcount--;
1152 }
1153 /* }}} */
1154 
is_known_valid_utf8(zend_string * subject_str,PCRE2_SIZE start_offset)1155 static zend_always_inline zend_bool is_known_valid_utf8(
1156 		zend_string *subject_str, PCRE2_SIZE start_offset) {
1157 	if (!(GC_FLAGS(subject_str) & IS_STR_VALID_UTF8)) {
1158 		/* We don't know whether the string is valid UTF-8 or not. */
1159 		return 0;
1160 	}
1161 
1162 	if (start_offset == ZSTR_LEN(subject_str)) {
1163 		/* Degenerate case: Offset points to end of string. */
1164 		return 1;
1165 	}
1166 
1167 	/* Check that the offset does not point to an UTF-8 continuation byte. */
1168 	return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1169 }
1170 
1171 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zval * subpats,int global,int use_flags,zend_long flags,zend_off_t start_offset)1172 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1173 	zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset)
1174 {
1175 	zval			 result_set,		/* Holds a set of subpatterns after
1176 										   a global match */
1177 					*match_sets = NULL;	/* An array of sets of matches for each
1178 										   subpattern after a global match */
1179 	uint32_t		 options;			/* Execution options */
1180 	int				 count;				/* Count of matched subpatterns */
1181 	PCRE2_SIZE		*offsets;			/* Array of subpattern offsets */
1182 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
1183 	int				 matched;			/* Has anything matched */
1184 	zend_string	   **subpat_names;		/* Array for named subpatterns */
1185 	size_t			 i;
1186 	uint32_t		 subpats_order;		/* Order of subpattern matches */
1187 	uint32_t		 offset_capture;	/* Capture match offsets: yes/no */
1188 	uint32_t		 unmatched_as_null;	/* Null non-matches: yes/no */
1189 	PCRE2_SPTR       mark = NULL;		/* Target for MARK name */
1190 	zval			 marks;				/* Array of marks for PREG_PATTERN_ORDER */
1191 	pcre2_match_data *match_data;
1192 	PCRE2_SIZE		 start_offset2, orig_start_offset;
1193 
1194 	char *subject = ZSTR_VAL(subject_str);
1195 	size_t subject_len = ZSTR_LEN(subject_str);
1196 
1197 	ZVAL_UNDEF(&marks);
1198 
1199 	/* Overwrite the passed-in value for subpatterns with an empty array. */
1200 	if (subpats != NULL) {
1201 		subpats = zend_try_array_init(subpats);
1202 		if (!subpats) {
1203 			return;
1204 		}
1205 	}
1206 
1207 	subpats_order = global ? PREG_PATTERN_ORDER : 0;
1208 
1209 	if (use_flags) {
1210 		offset_capture = flags & PREG_OFFSET_CAPTURE;
1211 		unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1212 
1213 		/*
1214 		 * subpats_order is pre-set to pattern mode so we change it only if
1215 		 * necessary.
1216 		 */
1217 		if (flags & 0xff) {
1218 			subpats_order = flags & 0xff;
1219 		}
1220 		if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1221 			(!global && subpats_order != 0)) {
1222 			php_error_docref(NULL, E_WARNING, "Invalid flags specified");
1223 			return;
1224 		}
1225 	} else {
1226 		offset_capture = 0;
1227 		unmatched_as_null = 0;
1228 	}
1229 
1230 	/* Negative offset counts from the end of the string. */
1231 	if (start_offset < 0) {
1232 		if ((PCRE2_SIZE)-start_offset <= subject_len) {
1233 			start_offset2 = subject_len + start_offset;
1234 		} else {
1235 			start_offset2 = 0;
1236 		}
1237 	} else {
1238 		start_offset2 = (PCRE2_SIZE)start_offset;
1239 	}
1240 
1241 	if (start_offset2 > subject_len) {
1242 		pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1243 		RETURN_FALSE;
1244 	}
1245 
1246 	/* Calculate the size of the offsets array, and allocate memory for it. */
1247 	num_subpats = pce->capture_count + 1;
1248 
1249 	/*
1250 	 * Build a mapping from subpattern numbers to their names. We will
1251 	 * allocate the table only if there are any named subpatterns.
1252 	 */
1253 	subpat_names = NULL;
1254 	if (subpats && pce->name_count > 0) {
1255 		subpat_names = make_subpats_table(num_subpats, pce);
1256 		if (!subpat_names) {
1257 			RETURN_FALSE;
1258 		}
1259 	}
1260 
1261 	/* Allocate match sets array and initialize the values. */
1262 	if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1263 		match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
1264 		for (i=0; i<num_subpats; i++) {
1265 			array_init(&match_sets[i]);
1266 		}
1267 	}
1268 
1269 	matched = 0;
1270 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1271 
1272 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1273 		match_data = mdata;
1274 	} else {
1275 		match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1276 		if (!match_data) {
1277 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1278 			if (subpat_names) {
1279 				free_subpats_table(subpat_names, num_subpats);
1280 			}
1281 			if (match_sets) {
1282 				efree(match_sets);
1283 			}
1284 			RETURN_FALSE;
1285 		}
1286 	}
1287 
1288 	orig_start_offset = start_offset2;
1289 	options =
1290 		(pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1291 			? 0 : PCRE2_NO_UTF_CHECK;
1292 
1293 	/* Execute the regular expression. */
1294 #ifdef HAVE_PCRE_JIT_SUPPORT
1295 	if ((pce->preg_options & PREG_JIT) && options) {
1296 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1297 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1298 	} else
1299 #endif
1300 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1301 			options, match_data, mctx);
1302 
1303 	while (1) {
1304 		/* If something has matched */
1305 		if (count >= 0) {
1306 			/* Check for too many substrings condition. */
1307 			if (UNEXPECTED(count == 0)) {
1308 				php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1309 				count = num_subpats;
1310 			}
1311 
1312 matched:
1313 			matched++;
1314 
1315 			offsets = pcre2_get_ovector_pointer(match_data);
1316 
1317 			/* If subpatterns array has been passed, fill it in with values. */
1318 			if (subpats != NULL) {
1319 				/* Try to get the list of substrings and display a warning if failed. */
1320 				if (offsets[1] < offsets[0]) {
1321 					if (subpat_names) {
1322 						free_subpats_table(subpat_names, num_subpats);
1323 					}
1324 					if (match_sets) efree(match_sets);
1325 					php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1326 					RETURN_FALSE;
1327 				}
1328 
1329 				if (global) {	/* global pattern matching */
1330 					if (subpats && subpats_order == PREG_PATTERN_ORDER) {
1331 						/* For each subpattern, insert it into the appropriate array. */
1332 						if (offset_capture) {
1333 							for (i = 0; i < count; i++) {
1334 								add_offset_pair(
1335 									&match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1336 									NULL, unmatched_as_null);
1337 							}
1338 						} else {
1339 							for (i = 0; i < count; i++) {
1340 								zval val;
1341 								populate_match_value(
1342 									&val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1343 								zend_hash_next_index_insert_new(Z_ARRVAL(match_sets[i]), &val);
1344 							}
1345 						}
1346 						mark = pcre2_get_mark(match_data);
1347 						/* Add MARK, if available */
1348 						if (mark) {
1349 							if (Z_TYPE(marks) == IS_UNDEF) {
1350 								array_init(&marks);
1351 							}
1352 							add_index_string(&marks, matched - 1, (char *) mark);
1353 						}
1354 						/*
1355 						 * If the number of captured subpatterns on this run is
1356 						 * less than the total possible number, pad the result
1357 						 * arrays with NULLs or empty strings.
1358 						 */
1359 						if (count < num_subpats) {
1360 							for (; i < num_subpats; i++) {
1361 								if (offset_capture) {
1362 									add_offset_pair(
1363 										&match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1364 										NULL, unmatched_as_null);
1365 								} else if (unmatched_as_null) {
1366 									add_next_index_null(&match_sets[i]);
1367 								} else {
1368 									add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
1369 								}
1370 							}
1371 						}
1372 					} else {
1373 						/* Allocate and populate the result set array */
1374 						array_init_size(&result_set, count + (mark ? 1 : 0));
1375 						mark = pcre2_get_mark(match_data);
1376 						populate_subpat_array(
1377 							&result_set, subject, offsets, subpat_names,
1378 							num_subpats, count, mark, flags);
1379 						/* And add it to the output array */
1380 						zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
1381 					}
1382 				} else {			/* single pattern matching */
1383 					/* For each subpattern, insert it into the subpatterns array. */
1384 					mark = pcre2_get_mark(match_data);
1385 					populate_subpat_array(
1386 						subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1387 					break;
1388 				}
1389 			}
1390 
1391 			/* Advance to the next piece. */
1392 			start_offset2 = offsets[1];
1393 
1394 			/* If we have matched an empty string, mimic what Perl's /g options does.
1395 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1396 			   the match again at the same point. If this fails (picked up above) we
1397 			   advance to the next character. */
1398 			if (start_offset2 == offsets[0]) {
1399 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1400 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1401 				if (count >= 0) {
1402 					if (global) {
1403 						goto matched;
1404 					} else {
1405 						break;
1406 					}
1407 				} else if (count == PCRE2_ERROR_NOMATCH) {
1408 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1409 					   this is not necessarily the end. We need to advance
1410 					   the start offset, and continue. Fudge the offset values
1411 					   to achieve this, unless we're already at the end of the string. */
1412 					if (start_offset2 < subject_len) {
1413 						size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1414 
1415 						start_offset2 += unit_len;
1416 					} else {
1417 						break;
1418 					}
1419 				} else {
1420 					goto error;
1421 				}
1422 			}
1423 		} else if (count == PCRE2_ERROR_NOMATCH) {
1424 			break;
1425 		} else {
1426 error:
1427 			pcre_handle_exec_error(count);
1428 			break;
1429 		}
1430 
1431 		if (!global) {
1432 			break;
1433 		}
1434 
1435 		/* Execute the regular expression. */
1436 #ifdef HAVE_PCRE_JIT_SUPPORT
1437 		if ((pce->preg_options & PREG_JIT)) {
1438 			if (PCRE2_UNSET == start_offset2 || start_offset2 > subject_len) {
1439 				pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1440 				break;
1441 			}
1442 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1443 					PCRE2_NO_UTF_CHECK, match_data, mctx);
1444 		} else
1445 #endif
1446 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1447 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1448 	}
1449 	if (match_data != mdata) {
1450 		pcre2_match_data_free(match_data);
1451 	}
1452 
1453 	/* Add the match sets to the output array and clean up */
1454 	if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1455 		if (subpat_names) {
1456 			for (i = 0; i < num_subpats; i++) {
1457 				if (subpat_names[i]) {
1458 					zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &match_sets[i]);
1459 					Z_ADDREF(match_sets[i]);
1460 				}
1461 				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1462 			}
1463 		} else {
1464 			for (i = 0; i < num_subpats; i++) {
1465 				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1466 			}
1467 		}
1468 		efree(match_sets);
1469 
1470 		if (Z_TYPE(marks) != IS_UNDEF) {
1471 			add_assoc_zval(subpats, "MARK", &marks);
1472 		}
1473 	}
1474 
1475 	if (subpat_names) {
1476 		free_subpats_table(subpat_names, num_subpats);
1477 	}
1478 
1479 	if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1480 		/* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1481 		if ((pce->compile_options & PCRE2_UTF)
1482 				&& !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1483 			GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1484 		}
1485 
1486 		RETVAL_LONG(matched);
1487 	} else {
1488 		RETVAL_FALSE;
1489 	}
1490 }
1491 /* }}} */
1492 
1493 /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1494    Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)1495 static PHP_FUNCTION(preg_match)
1496 {
1497 	php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1498 }
1499 /* }}} */
1500 
1501 /* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1502    Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)1503 static PHP_FUNCTION(preg_match_all)
1504 {
1505 	php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1506 }
1507 /* }}} */
1508 
1509 /* {{{ preg_get_backref
1510  */
preg_get_backref(char ** str,int * backref)1511 static int preg_get_backref(char **str, int *backref)
1512 {
1513 	register char in_brace = 0;
1514 	register char *walk = *str;
1515 
1516 	if (walk[1] == 0)
1517 		return 0;
1518 
1519 	if (*walk == '$' && walk[1] == '{') {
1520 		in_brace = 1;
1521 		walk++;
1522 	}
1523 	walk++;
1524 
1525 	if (*walk >= '0' && *walk <= '9') {
1526 		*backref = *walk - '0';
1527 		walk++;
1528 	} else
1529 		return 0;
1530 
1531 	if (*walk && *walk >= '0' && *walk <= '9') {
1532 		*backref = *backref * 10 + *walk - '0';
1533 		walk++;
1534 	}
1535 
1536 	if (in_brace) {
1537 		if (*walk != '}')
1538 			return 0;
1539 		else
1540 			walk++;
1541 	}
1542 
1543 	*str = walk;
1544 	return 1;
1545 }
1546 /* }}} */
1547 
1548 /* {{{ preg_do_repl_func
1549  */
preg_do_repl_func(zend_fcall_info * fci,zend_fcall_info_cache * fcc,char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1550 static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1551 {
1552 	zend_string *result_str;
1553 	zval		 retval;			/* Function return value */
1554 	zval	     arg;				/* Argument to pass to function */
1555 
1556 	array_init_size(&arg, count + (mark ? 1 : 0));
1557 	populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1558 
1559 	fci->retval = &retval;
1560 	fci->param_count = 1;
1561 	fci->params = &arg;
1562 	fci->no_separation = 0;
1563 
1564 	if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1565 		if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1566 			result_str = Z_STR(retval);
1567 		} else {
1568 			result_str = zval_get_string_func(&retval);
1569 			zval_ptr_dtor(&retval);
1570 		}
1571 	} else {
1572 		if (!EG(exception)) {
1573 			php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1574 		}
1575 
1576 		result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1577 	}
1578 
1579 	zval_ptr_dtor(&arg);
1580 
1581 	return result_str;
1582 }
1583 /* }}} */
1584 
1585 /* {{{ php_pcre_replace
1586  */
php_pcre_replace(zend_string * regex,zend_string * subject_str,char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1587 PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1588 							  zend_string *subject_str,
1589 							  char *subject, size_t subject_len,
1590 							  zend_string *replace_str,
1591 							  size_t limit, size_t *replace_count)
1592 {
1593 	pcre_cache_entry	*pce;			    /* Compiled regular expression */
1594 	zend_string	 		*result;			/* Function result */
1595 
1596 	/* Abort on pending exception, e.g. thrown from __toString(). */
1597 	if (UNEXPECTED(EG(exception))) {
1598 		return NULL;
1599 	}
1600 
1601 	/* Compile regex or get it from cache. */
1602 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1603 		return NULL;
1604 	}
1605 	pce->refcount++;
1606 	result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1607 		limit, replace_count);
1608 	pce->refcount--;
1609 
1610 	return result;
1611 }
1612 /* }}} */
1613 
1614 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry * pce,zend_string * subject_str,char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1615 PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1616 {
1617 	uint32_t		 options;			/* Execution options */
1618 	int				 count;				/* Count of matched subpatterns */
1619 	PCRE2_SIZE		*offsets;			/* Array of subpattern offsets */
1620 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
1621 	size_t			 new_len;			/* Length of needed storage */
1622 	size_t			 alloc_len;			/* Actual allocated length */
1623 	size_t			 match_len;			/* Length of the current match */
1624 	int				 backref;			/* Backreference number */
1625 	PCRE2_SIZE		 start_offset;		/* Where the new search starts */
1626 	size_t			 last_end_offset;	/* Where the last search ended */
1627 	char			*walkbuf,			/* Location of current replacement in the result */
1628 					*walk,				/* Used to walk the replacement string */
1629 					*match,				/* The current match */
1630 					*piece,				/* The current piece of subject */
1631 					*replace_end,		/* End of replacement string */
1632 					 walk_last;			/* Last walked character */
1633 	size_t			result_len; 		/* Length of result */
1634 	zend_string		*result;			/* Result of replacement */
1635 	pcre2_match_data *match_data;
1636 
1637 	/* Calculate the size of the offsets array, and allocate memory for it. */
1638 	num_subpats = pce->capture_count + 1;
1639 	alloc_len = 0;
1640 	result = NULL;
1641 
1642 	/* Initialize */
1643 	match = NULL;
1644 	start_offset = 0;
1645 	last_end_offset = 0;
1646 	result_len = 0;
1647 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1648 
1649 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1650 		match_data = mdata;
1651 	} else {
1652 		match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1653 		if (!match_data) {
1654 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1655 			return NULL;
1656 		}
1657 	}
1658 
1659 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1660 
1661 	/* Execute the regular expression. */
1662 #ifdef HAVE_PCRE_JIT_SUPPORT
1663 	if ((pce->preg_options & PREG_JIT) && options) {
1664 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1665 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1666 	} else
1667 #endif
1668 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1669 			options, match_data, mctx);
1670 
1671 	while (1) {
1672 		piece = subject + last_end_offset;
1673 
1674 		if (count >= 0 && limit > 0) {
1675 			zend_bool simple_string;
1676 
1677 			/* Check for too many substrings condition. */
1678 			if (UNEXPECTED(count == 0)) {
1679 				php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1680 				count = num_subpats;
1681 			}
1682 
1683 matched:
1684 			offsets = pcre2_get_ovector_pointer(match_data);
1685 
1686 			if (UNEXPECTED(offsets[1] < offsets[0])) {
1687 				PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1688 				if (result) {
1689 					zend_string_release_ex(result, 0);
1690 					result = NULL;
1691 				}
1692 				break;
1693 			}
1694 
1695 			if (replace_count) {
1696 				++*replace_count;
1697 			}
1698 
1699 			/* Set the match location in subject */
1700 			match = subject + offsets[0];
1701 
1702 			new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1703 
1704 			walk = ZSTR_VAL(replace_str);
1705 			replace_end = walk + ZSTR_LEN(replace_str);
1706 			walk_last = 0;
1707 			simple_string = 1;
1708 			while (walk < replace_end) {
1709 				if ('\\' == *walk || '$' == *walk) {
1710 					simple_string = 0;
1711 					if (walk_last == '\\') {
1712 						walk++;
1713 						walk_last = 0;
1714 						continue;
1715 					}
1716 					if (preg_get_backref(&walk, &backref)) {
1717 						if (backref < count)
1718 							new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1719 						continue;
1720 					}
1721 				}
1722 				new_len++;
1723 				walk++;
1724 				walk_last = walk[-1];
1725 			}
1726 
1727 			if (new_len >= alloc_len) {
1728 				alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1729 				if (result == NULL) {
1730 					result = zend_string_alloc(alloc_len, 0);
1731 				} else {
1732 					result = zend_string_extend(result, alloc_len, 0);
1733 				}
1734 			}
1735 
1736 			if (match-piece > 0) {
1737 				/* copy the part of the string before the match */
1738 				memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1739 				result_len += (match-piece);
1740 			}
1741 
1742 			if (simple_string) {
1743 				/* copy replacement */
1744 				memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1745 				result_len += ZSTR_LEN(replace_str);
1746 			} else {
1747 				/* copy replacement and backrefs */
1748 				walkbuf = ZSTR_VAL(result) + result_len;
1749 
1750 				walk = ZSTR_VAL(replace_str);
1751 				walk_last = 0;
1752 				while (walk < replace_end) {
1753 					if ('\\' == *walk || '$' == *walk) {
1754 						if (walk_last == '\\') {
1755 							*(walkbuf-1) = *walk++;
1756 							walk_last = 0;
1757 							continue;
1758 						}
1759 						if (preg_get_backref(&walk, &backref)) {
1760 							if (backref < count) {
1761 								match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1762 								memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1763 								walkbuf += match_len;
1764 							}
1765 							continue;
1766 						}
1767 					}
1768 					*walkbuf++ = *walk++;
1769 					walk_last = walk[-1];
1770 				}
1771 				*walkbuf = '\0';
1772 				/* increment the result length by how much we've added to the string */
1773 				result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1774 			}
1775 
1776 			limit--;
1777 
1778 			/* Advance to the next piece. */
1779 			start_offset = last_end_offset = offsets[1];
1780 
1781 			/* If we have matched an empty string, mimic what Perl's /g options does.
1782 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1783 			   the match again at the same point. If this fails (picked up above) we
1784 			   advance to the next character. */
1785 			if (start_offset == offsets[0]) {
1786 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1787 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1788 
1789 				piece = subject + start_offset;
1790 				if (count >= 0 && limit > 0) {
1791 					goto matched;
1792 				} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1793 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1794 					   this is not necessarily the end. We need to advance
1795 					   the start offset, and continue. Fudge the offset values
1796 					   to achieve this, unless we're already at the end of the string. */
1797 					if (start_offset < subject_len) {
1798 						size_t unit_len = calculate_unit_length(pce, piece);
1799 						start_offset += unit_len;
1800 					} else {
1801 						goto not_matched;
1802 					}
1803 				} else {
1804 					goto error;
1805 				}
1806 			}
1807 
1808 		} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1809 not_matched:
1810 			if (!result && subject_str) {
1811 				result = zend_string_copy(subject_str);
1812 				break;
1813 			}
1814 			/* now we know exactly how long it is */
1815 			alloc_len = result_len + subject_len - last_end_offset;
1816 			if (NULL != result) {
1817 				result = zend_string_realloc(result, alloc_len, 0);
1818 			} else {
1819 				result = zend_string_alloc(alloc_len, 0);
1820 			}
1821 			/* stick that last bit of string on our output */
1822 			memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1823 			result_len += subject_len - last_end_offset;
1824 			ZSTR_VAL(result)[result_len] = '\0';
1825 			ZSTR_LEN(result) = result_len;
1826 			break;
1827 		} else {
1828 error:
1829 			pcre_handle_exec_error(count);
1830 			if (result) {
1831 				zend_string_release_ex(result, 0);
1832 				result = NULL;
1833 			}
1834 			break;
1835 		}
1836 
1837 #ifdef HAVE_PCRE_JIT_SUPPORT
1838 		if (pce->preg_options & PREG_JIT) {
1839 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1840 					PCRE2_NO_UTF_CHECK, match_data, mctx);
1841 		} else
1842 #endif
1843 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1844 					PCRE2_NO_UTF_CHECK, match_data, mctx);
1845 	}
1846 	if (match_data != mdata) {
1847 		pcre2_match_data_free(match_data);
1848 	}
1849 
1850 	return result;
1851 }
1852 /* }}} */
1853 
1854 /* {{{ php_pcre_replace_func_impl() */
php_pcre_replace_func_impl(pcre_cache_entry * pce,zend_string * subject_str,char * subject,size_t subject_len,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)1855 static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)
1856 {
1857 	uint32_t		 options;			/* Execution options */
1858 	int				 count;				/* Count of matched subpatterns */
1859 	PCRE2_SIZE		*offsets;			/* Array of subpattern offsets */
1860 	zend_string		**subpat_names;		/* Array for named subpatterns */
1861 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
1862 	size_t			 new_len;			/* Length of needed storage */
1863 	size_t			 alloc_len;			/* Actual allocated length */
1864 	PCRE2_SIZE		 start_offset;		/* Where the new search starts */
1865 	size_t			 last_end_offset;	/* Where the last search ended */
1866 	char			*match,				/* The current match */
1867 					*piece;				/* The current piece of subject */
1868 	size_t			result_len; 		/* Length of result */
1869 	zend_string		*result;			/* Result of replacement */
1870 	zend_string     *eval_result;		/* Result of custom function */
1871 	pcre2_match_data *match_data;
1872 	zend_bool old_mdata_used;
1873 
1874 	/* Calculate the size of the offsets array, and allocate memory for it. */
1875 	num_subpats = pce->capture_count + 1;
1876 
1877 	/*
1878 	 * Build a mapping from subpattern numbers to their names. We will
1879 	 * allocate the table only if there are any named subpatterns.
1880 	 */
1881 	subpat_names = NULL;
1882 	if (UNEXPECTED(pce->name_count > 0)) {
1883 		subpat_names = make_subpats_table(num_subpats, pce);
1884 		if (!subpat_names) {
1885 			return NULL;
1886 		}
1887 	}
1888 
1889 	alloc_len = 0;
1890 	result = NULL;
1891 
1892 	/* Initialize */
1893 	match = NULL;
1894 	start_offset = 0;
1895 	last_end_offset = 0;
1896 	result_len = 0;
1897 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1898 
1899 	old_mdata_used = mdata_used;
1900 	if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1901 		mdata_used = 1;
1902 		match_data = mdata;
1903 	} else {
1904 		match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1905 		if (!match_data) {
1906 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1907 			if (subpat_names) {
1908 				free_subpats_table(subpat_names, num_subpats);
1909 			}
1910 			mdata_used = old_mdata_used;
1911 			return NULL;
1912 		}
1913 	}
1914 
1915 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1916 
1917 	/* Execute the regular expression. */
1918 #ifdef HAVE_PCRE_JIT_SUPPORT
1919 	if ((pce->preg_options & PREG_JIT) && options) {
1920 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1921 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1922 	} else
1923 #endif
1924 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1925 			options, match_data, mctx);
1926 
1927 	while (1) {
1928 		piece = subject + last_end_offset;
1929 
1930 		if (count >= 0 && limit) {
1931 			/* Check for too many substrings condition. */
1932 			if (UNEXPECTED(count == 0)) {
1933 				php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1934 				count = num_subpats;
1935 			}
1936 
1937 matched:
1938 			offsets = pcre2_get_ovector_pointer(match_data);
1939 
1940 			if (UNEXPECTED(offsets[1] < offsets[0])) {
1941 				PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1942 				if (result) {
1943 					zend_string_release_ex(result, 0);
1944 					result = NULL;
1945 				}
1946 				break;
1947 			}
1948 
1949 			if (replace_count) {
1950 				++*replace_count;
1951 			}
1952 
1953 			/* Set the match location in subject */
1954 			match = subject + offsets[0];
1955 
1956 			new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1957 
1958 			/* Use custom function to get replacement string and its length. */
1959 			eval_result = preg_do_repl_func(
1960 				fci, fcc, subject, offsets, subpat_names, num_subpats, count,
1961 				pcre2_get_mark(match_data), flags);
1962 
1963 			ZEND_ASSERT(eval_result);
1964 			new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD;
1965 			if (new_len >= alloc_len) {
1966 				alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1967 				if (result == NULL) {
1968 					result = zend_string_alloc(alloc_len, 0);
1969 				} else {
1970 					result = zend_string_extend(result, alloc_len, 0);
1971 				}
1972 			}
1973 
1974 			if (match-piece > 0) {
1975 				/* copy the part of the string before the match */
1976 				memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1977 				result_len += (match-piece);
1978 			}
1979 
1980 			/* If using custom function, copy result to the buffer and clean up. */
1981 			memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1982 			result_len += ZSTR_LEN(eval_result);
1983 			zend_string_release_ex(eval_result, 0);
1984 
1985 			limit--;
1986 
1987 			/* Advance to the next piece. */
1988 			start_offset = last_end_offset = offsets[1];
1989 
1990 			/* If we have matched an empty string, mimic what Perl's /g options does.
1991 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1992 			   the match again at the same point. If this fails (picked up above) we
1993 			   advance to the next character. */
1994 			if (start_offset == offsets[0]) {
1995 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1996 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1997 
1998 				piece = subject + start_offset;
1999 				if (count >= 0 && limit) {
2000 					goto matched;
2001 				} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2002 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2003 					   this is not necessarily the end. We need to advance
2004 					   the start offset, and continue. Fudge the offset values
2005 					   to achieve this, unless we're already at the end of the string. */
2006 					if (start_offset < subject_len) {
2007 						size_t unit_len = calculate_unit_length(pce, piece);
2008 						start_offset += unit_len;
2009 					} else {
2010 						goto not_matched;
2011 					}
2012 				} else {
2013 					goto error;
2014 				}
2015 			}
2016 
2017 		} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2018 not_matched:
2019 			if (!result && subject_str) {
2020 				result = zend_string_copy(subject_str);
2021 				break;
2022 			}
2023 			/* now we know exactly how long it is */
2024 			alloc_len = result_len + subject_len - last_end_offset;
2025 			if (NULL != result) {
2026 				result = zend_string_realloc(result, alloc_len, 0);
2027 			} else {
2028 				result = zend_string_alloc(alloc_len, 0);
2029 			}
2030 			/* stick that last bit of string on our output */
2031 			memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
2032 			result_len += subject_len - last_end_offset;
2033 			ZSTR_VAL(result)[result_len] = '\0';
2034 			ZSTR_LEN(result) = result_len;
2035 			break;
2036 		} else {
2037 error:
2038 			pcre_handle_exec_error(count);
2039 			if (result) {
2040 				zend_string_release_ex(result, 0);
2041 				result = NULL;
2042 			}
2043 			break;
2044 		}
2045 #ifdef HAVE_PCRE_JIT_SUPPORT
2046 		if ((pce->preg_options & PREG_JIT)) {
2047 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2048 					PCRE2_NO_UTF_CHECK, match_data, mctx);
2049 		} else
2050 #endif
2051 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2052 				PCRE2_NO_UTF_CHECK, match_data, mctx);
2053 	}
2054 	if (match_data != mdata) {
2055 		pcre2_match_data_free(match_data);
2056 	}
2057 	mdata_used = old_mdata_used;
2058 
2059 	if (UNEXPECTED(subpat_names)) {
2060 		free_subpats_table(subpat_names, num_subpats);
2061 	}
2062 
2063 	return result;
2064 }
2065 /* }}} */
2066 
2067 /* {{{ php_pcre_replace_func
2068  */
php_pcre_replace_func(zend_string * regex,zend_string * subject_str,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)2069 static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2070 							  zend_string *subject_str,
2071 							  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2072 							  size_t limit, size_t *replace_count, zend_long flags)
2073 {
2074 	pcre_cache_entry	*pce;			    /* Compiled regular expression */
2075 	zend_string	 		*result;			/* Function result */
2076 
2077 	/* Compile regex or get it from cache. */
2078 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2079 		return NULL;
2080 	}
2081 	pce->refcount++;
2082 	result = php_pcre_replace_func_impl(
2083 		pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
2084 		limit, replace_count, flags);
2085 	pce->refcount--;
2086 
2087 	return result;
2088 }
2089 /* }}} */
2090 
2091 /* {{{ php_pcre_replace_array
2092  */
php_pcre_replace_array(HashTable * regex,zval * replace,zend_string * subject_str,size_t limit,size_t * replace_count)2093 static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend_string *subject_str, size_t limit, size_t *replace_count)
2094 {
2095 	zval		*regex_entry;
2096 	zend_string *result;
2097 	zend_string *replace_str, *tmp_replace_str;
2098 
2099 	if (Z_TYPE_P(replace) == IS_ARRAY) {
2100 		uint32_t replace_idx = 0;
2101 		HashTable *replace_ht = Z_ARRVAL_P(replace);
2102 
2103 		/* For each entry in the regex array, get the entry */
2104 		ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2105 			/* Make sure we're dealing with strings. */
2106 			zend_string *tmp_regex_str;
2107 			zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2108 			zval *zv;
2109 
2110 			/* Get current entry */
2111 			while (1) {
2112 				if (replace_idx == replace_ht->nNumUsed) {
2113 					replace_str = ZSTR_EMPTY_ALLOC();
2114 					tmp_replace_str = NULL;
2115 					break;
2116 				}
2117 				zv = &replace_ht->arData[replace_idx].val;
2118 				replace_idx++;
2119 				if (Z_TYPE_P(zv) != IS_UNDEF) {
2120 					replace_str = zval_get_tmp_string(zv, &tmp_replace_str);
2121 					break;
2122 				}
2123 			}
2124 
2125 			/* Do the actual replacement and put the result back into subject_str
2126 			   for further replacements. */
2127 			result = php_pcre_replace(regex_str,
2128 									  subject_str,
2129 									  ZSTR_VAL(subject_str),
2130 									  ZSTR_LEN(subject_str),
2131 									  replace_str,
2132 									  limit,
2133 									  replace_count);
2134 			zend_tmp_string_release(tmp_replace_str);
2135 			zend_tmp_string_release(tmp_regex_str);
2136 			zend_string_release_ex(subject_str, 0);
2137 			subject_str = result;
2138 			if (UNEXPECTED(result == NULL)) {
2139 				break;
2140 			}
2141 		} ZEND_HASH_FOREACH_END();
2142 
2143 	} else {
2144 		replace_str = Z_STR_P(replace);
2145 
2146 		/* For each entry in the regex array, get the entry */
2147 		ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2148 			/* Make sure we're dealing with strings. */
2149 			zend_string *tmp_regex_str;
2150 			zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2151 
2152 			/* Do the actual replacement and put the result back into subject_str
2153 			   for further replacements. */
2154 			result = php_pcre_replace(regex_str,
2155 									  subject_str,
2156 									  ZSTR_VAL(subject_str),
2157 									  ZSTR_LEN(subject_str),
2158 									  replace_str,
2159 									  limit,
2160 									  replace_count);
2161 			zend_tmp_string_release(tmp_regex_str);
2162 			zend_string_release_ex(subject_str, 0);
2163 			subject_str = result;
2164 
2165 			if (UNEXPECTED(result == NULL)) {
2166 				break;
2167 			}
2168 		} ZEND_HASH_FOREACH_END();
2169 	}
2170 
2171 	return subject_str;
2172 }
2173 /* }}} */
2174 
2175 /* {{{ php_replace_in_subject
2176  */
php_replace_in_subject(zval * regex,zval * replace,zval * subject,size_t limit,size_t * replace_count)2177 static zend_always_inline zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, size_t limit, size_t *replace_count)
2178 {
2179 	zend_string *result;
2180 	zend_string *subject_str = zval_get_string(subject);
2181 
2182 	if (Z_TYPE_P(regex) != IS_ARRAY) {
2183 		result = php_pcre_replace(Z_STR_P(regex),
2184 								  subject_str,
2185 								  ZSTR_VAL(subject_str),
2186 								  ZSTR_LEN(subject_str),
2187 								  Z_STR_P(replace),
2188 								  limit,
2189 								  replace_count);
2190 		zend_string_release_ex(subject_str, 0);
2191 	} else {
2192 		result = php_pcre_replace_array(Z_ARRVAL_P(regex),
2193 										replace,
2194 										subject_str,
2195 										limit,
2196 										replace_count);
2197 	}
2198 	return result;
2199 }
2200 /* }}} */
2201 
2202 /* {{{ php_replace_in_subject_func
2203  */
php_replace_in_subject_func(zval * regex,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zval * subject,size_t limit,size_t * replace_count,zend_long flags)2204 static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, size_t limit, size_t *replace_count, zend_long flags)
2205 {
2206 	zend_string *result;
2207 	zend_string	*subject_str = zval_get_string(subject);
2208 
2209 	if (Z_TYPE_P(regex) != IS_ARRAY) {
2210 		result = php_pcre_replace_func(
2211 			Z_STR_P(regex), subject_str, fci, fcc, limit, replace_count, flags);
2212 		zend_string_release_ex(subject_str, 0);
2213 		return result;
2214 	} else {
2215 		zval		*regex_entry;
2216 
2217 		/* If regex is an array */
2218 
2219 		/* For each entry in the regex array, get the entry */
2220 		ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
2221 			/* Make sure we're dealing with strings. */
2222 			zend_string *tmp_regex_str;
2223 			zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2224 
2225 			/* Do the actual replacement and put the result back into subject_str
2226 			   for further replacements. */
2227 			result = php_pcre_replace_func(
2228 				regex_str, subject_str, fci, fcc, limit, replace_count, flags);
2229 			zend_tmp_string_release(tmp_regex_str);
2230 			zend_string_release_ex(subject_str, 0);
2231 			subject_str = result;
2232 			if (UNEXPECTED(result == NULL)) {
2233 				break;
2234 			}
2235 		} ZEND_HASH_FOREACH_END();
2236 
2237 		return subject_str;
2238 	}
2239 }
2240 /* }}} */
2241 
2242 /* {{{ preg_replace_func_impl
2243  */
preg_replace_func_impl(zval * return_value,zval * regex,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zval * subject,zend_long limit_val,zend_long flags)2244 static size_t preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, zend_long limit_val, zend_long flags)
2245 {
2246 	zend_string	*result;
2247 	size_t replace_count = 0;
2248 
2249 	if (Z_TYPE_P(regex) != IS_ARRAY) {
2250 		convert_to_string_ex(regex);
2251 	}
2252 
2253 	if (Z_TYPE_P(subject) != IS_ARRAY) {
2254 		result = php_replace_in_subject_func(
2255 			regex, fci, fcc, subject, limit_val, &replace_count, flags);
2256 		if (result != NULL) {
2257 			RETVAL_STR(result);
2258 		} else {
2259 			RETVAL_NULL();
2260 		}
2261 	} else {
2262 		/* if subject is an array */
2263 		zval		*subject_entry, zv;
2264 		zend_string	*string_key;
2265 		zend_ulong	 num_key;
2266 
2267 		array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
2268 
2269 		/* For each subject entry, convert it to string, then perform replacement
2270 		   and add the result to the return_value array. */
2271 		ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
2272 			result = php_replace_in_subject_func(
2273 				regex, fci, fcc, subject_entry, limit_val, &replace_count, flags);
2274 			if (result != NULL) {
2275 				/* Add to return array */
2276 				ZVAL_STR(&zv, result);
2277 				if (string_key) {
2278 					zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2279 				} else {
2280 					zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2281 				}
2282 			}
2283 		} ZEND_HASH_FOREACH_END();
2284 	}
2285 
2286 	return replace_count;
2287 }
2288 /* }}} */
2289 
2290 /* {{{ preg_replace_common
2291  */
preg_replace_common(INTERNAL_FUNCTION_PARAMETERS,int is_filter)2292 static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, int is_filter)
2293 {
2294 	zval *regex, *replace, *subject, *zcount = NULL;
2295 	zend_long limit = -1;
2296 	size_t replace_count = 0;
2297 	zend_string	*result;
2298 	size_t old_replace_count;
2299 
2300 	/* Get function parameters and do error-checking. */
2301 	ZEND_PARSE_PARAMETERS_START(3, 5)
2302 		Z_PARAM_ZVAL(regex)
2303 		Z_PARAM_ZVAL(replace)
2304 		Z_PARAM_ZVAL(subject)
2305 		Z_PARAM_OPTIONAL
2306 		Z_PARAM_LONG(limit)
2307 		Z_PARAM_ZVAL(zcount)
2308 	ZEND_PARSE_PARAMETERS_END();
2309 
2310 	if (Z_TYPE_P(replace) != IS_ARRAY) {
2311 		convert_to_string_ex(replace);
2312 		if (Z_TYPE_P(regex) != IS_ARRAY) {
2313 			convert_to_string_ex(regex);
2314 		}
2315 	} else {
2316 		if (Z_TYPE_P(regex) != IS_ARRAY) {
2317 			php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
2318 			RETURN_FALSE;
2319 		}
2320 	}
2321 
2322 	if (Z_TYPE_P(subject) != IS_ARRAY) {
2323 		old_replace_count = replace_count;
2324 		result = php_replace_in_subject(regex,
2325 										replace,
2326 										subject,
2327 										limit,
2328 										&replace_count);
2329 		if (result != NULL) {
2330 			if (!is_filter || replace_count > old_replace_count) {
2331 				RETVAL_STR(result);
2332 			} else {
2333 				zend_string_release_ex(result, 0);
2334 				RETVAL_NULL();
2335 			}
2336 		} else {
2337 			RETVAL_NULL();
2338 		}
2339 	} else {
2340 		/* if subject is an array */
2341 		zval		*subject_entry, zv;
2342 		zend_string	*string_key;
2343 		zend_ulong	 num_key;
2344 
2345 		array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
2346 
2347 		/* For each subject entry, convert it to string, then perform replacement
2348 		   and add the result to the return_value array. */
2349 		ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
2350 			old_replace_count = replace_count;
2351 			result = php_replace_in_subject(regex,
2352 											replace,
2353 											subject_entry,
2354 											limit,
2355 											&replace_count);
2356 			if (result != NULL) {
2357 				if (!is_filter || replace_count > old_replace_count) {
2358 					/* Add to return array */
2359 					ZVAL_STR(&zv, result);
2360 					if (string_key) {
2361 						zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2362 					} else {
2363 						zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2364 					}
2365 				} else {
2366 					zend_string_release_ex(result, 0);
2367 				}
2368 			}
2369 		} ZEND_HASH_FOREACH_END();
2370 	}
2371 
2372 	if (zcount) {
2373 		ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2374 	}
2375 }
2376 /* }}} */
2377 
2378 /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
2379    Perform Perl-style regular expression replacement. */
PHP_FUNCTION(preg_replace)2380 static PHP_FUNCTION(preg_replace)
2381 {
2382 	preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
2383 }
2384 /* }}} */
2385 
2386 /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
2387    Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback)2388 static PHP_FUNCTION(preg_replace_callback)
2389 {
2390 	zval *regex, *replace, *subject, *zcount = NULL;
2391 	zend_long limit = -1, flags = 0;
2392 	size_t replace_count;
2393 	zend_fcall_info fci;
2394 	zend_fcall_info_cache fcc;
2395 
2396 	/* Get function parameters and do error-checking. */
2397 	ZEND_PARSE_PARAMETERS_START(3, 6)
2398 		Z_PARAM_ZVAL(regex)
2399 		Z_PARAM_ZVAL(replace)
2400 		Z_PARAM_ZVAL(subject)
2401 		Z_PARAM_OPTIONAL
2402 		Z_PARAM_LONG(limit)
2403 		Z_PARAM_ZVAL(zcount)
2404 		Z_PARAM_LONG(flags)
2405 	ZEND_PARSE_PARAMETERS_END();
2406 
2407 	if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2408 		zend_string	*callback_name = zend_get_callable_name(replace);
2409 		php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", ZSTR_VAL(callback_name));
2410 		zend_string_release_ex(callback_name, 0);
2411 		ZVAL_STR(return_value, zval_get_string(subject));
2412 		return;
2413 	}
2414 
2415 	fci.size = sizeof(fci);
2416 	fci.object = NULL;
2417 	ZVAL_COPY_VALUE(&fci.function_name, replace);
2418 
2419 	replace_count = preg_replace_func_impl(return_value, regex, &fci, &fcc, subject, limit, flags);
2420 	if (zcount) {
2421 		ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2422 	}
2423 }
2424 /* }}} */
2425 
2426 /* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]])
2427    Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback_array)2428 static PHP_FUNCTION(preg_replace_callback_array)
2429 {
2430 	zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
2431 	zend_long limit = -1, flags = 0;
2432 	zend_string *str_idx;
2433 	size_t replace_count = 0;
2434 	zend_fcall_info fci;
2435 	zend_fcall_info_cache fcc;
2436 
2437 	/* Get function parameters and do error-checking. */
2438 	ZEND_PARSE_PARAMETERS_START(2, 5)
2439 		Z_PARAM_ARRAY(pattern)
2440 		Z_PARAM_ZVAL(subject)
2441 		Z_PARAM_OPTIONAL
2442 		Z_PARAM_LONG(limit)
2443 		Z_PARAM_ZVAL(zcount)
2444 		Z_PARAM_LONG(flags)
2445 	ZEND_PARSE_PARAMETERS_END();
2446 
2447 	fci.size = sizeof(fci);
2448 	fci.object = NULL;
2449 
2450 	ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(pattern), str_idx, replace) {
2451 		if (str_idx) {
2452 			ZVAL_STR_COPY(&regex, str_idx);
2453 		} else {
2454 			php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
2455 			zval_ptr_dtor(return_value);
2456 			RETURN_NULL();
2457 		}
2458 
2459 		if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2460 			zend_string *callback_name = zend_get_callable_name(replace);
2461 			php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", ZSTR_VAL(callback_name));
2462 			zend_string_release_ex(callback_name, 0);
2463 			zval_ptr_dtor(&regex);
2464 			zval_ptr_dtor(return_value);
2465 			ZVAL_COPY(return_value, subject);
2466 			return;
2467 		}
2468 
2469 		ZVAL_COPY_VALUE(&fci.function_name, replace);
2470 
2471 		replace_count += preg_replace_func_impl(&zv, &regex, &fci, &fcc, subject, limit, flags);
2472 		if (subject != return_value) {
2473 			subject = return_value;
2474 		} else {
2475 			zval_ptr_dtor(return_value);
2476 		}
2477 
2478 		zval_ptr_dtor(&regex);
2479 
2480 		ZVAL_COPY_VALUE(return_value, &zv);
2481 
2482 		if (UNEXPECTED(EG(exception))) {
2483 			zval_ptr_dtor(return_value);
2484 			RETURN_NULL();
2485 		}
2486 	} ZEND_HASH_FOREACH_END();
2487 
2488 	if (zcount) {
2489 		ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2490 	}
2491 }
2492 /* }}} */
2493 
2494 /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
2495    Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTION(preg_filter)2496 static PHP_FUNCTION(preg_filter)
2497 {
2498 	preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
2499 }
2500 /* }}} */
2501 
2502 /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
2503    Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)2504 static PHP_FUNCTION(preg_split)
2505 {
2506 	zend_string			*regex;			/* Regular expression */
2507 	zend_string			*subject;		/* String to match against */
2508 	zend_long			 limit_val = -1;/* Integer value of limit */
2509 	zend_long			 flags = 0;		/* Match control flags */
2510 	pcre_cache_entry	*pce;			/* Compiled regular expression */
2511 
2512 	/* Get function parameters and do error checking */
2513 	ZEND_PARSE_PARAMETERS_START(2, 4)
2514 		Z_PARAM_STR(regex)
2515 		Z_PARAM_STR(subject)
2516 		Z_PARAM_OPTIONAL
2517 		Z_PARAM_LONG(limit_val)
2518 		Z_PARAM_LONG(flags)
2519 	ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
2520 
2521 	/* Compile regex or get it from cache. */
2522 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2523 		RETURN_FALSE;
2524 	}
2525 
2526 	pce->refcount++;
2527 	php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2528 	pce->refcount--;
2529 }
2530 /* }}} */
2531 
2532 /* {{{ php_pcre_split
2533  */
php_pcre_split_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zend_long limit_val,zend_long flags)2534 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2535 	zend_long limit_val, zend_long flags)
2536 {
2537 	PCRE2_SIZE		*offsets;			/* Array of subpattern offsets */
2538 	uint32_t		 options;			/* Execution options */
2539 	int				 count;				/* Count of matched subpatterns */
2540 	PCRE2_SIZE		 start_offset;		/* Where the new search starts */
2541 	PCRE2_SIZE		 last_match_offset;	/* Location of last match */
2542 	uint32_t		 no_empty;			/* If NO_EMPTY flag is set */
2543 	uint32_t		 delim_capture; 	/* If delimiters should be captured */
2544 	uint32_t		 offset_capture;	/* If offsets should be captured */
2545 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
2546 	zval			 tmp;
2547 	pcre2_match_data *match_data;
2548 	char *subject = ZSTR_VAL(subject_str);
2549 
2550 	no_empty = flags & PREG_SPLIT_NO_EMPTY;
2551 	delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2552 	offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2553 
2554 	/* Initialize return value */
2555 	array_init(return_value);
2556 
2557 	/* Calculate the size of the offsets array, and allocate memory for it. */
2558 	num_subpats = pce->capture_count + 1;
2559 
2560 	/* Start at the beginning of the string */
2561 	start_offset = 0;
2562 	last_match_offset = 0;
2563 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2564 
2565 	if (limit_val == -1) {
2566 		/* pass */
2567 	} else if (limit_val == 0) {
2568 		limit_val = -1;
2569 	} else if (limit_val <= 1) {
2570 		goto last;
2571 	}
2572 
2573 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2574 		match_data = mdata;
2575 	} else {
2576 		match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
2577 		if (!match_data) {
2578 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2579 			zval_ptr_dtor(return_value);
2580 			RETURN_FALSE;
2581 		}
2582 	}
2583 
2584 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2585 
2586 #ifdef HAVE_PCRE_JIT_SUPPORT
2587 	if ((pce->preg_options & PREG_JIT) && options) {
2588 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2589 				PCRE2_NO_UTF_CHECK, match_data, mctx);
2590 	} else
2591 #endif
2592 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2593 			options, match_data, mctx);
2594 
2595 	while (1) {
2596 		/* If something matched */
2597 		if (count >= 0) {
2598 			/* Check for too many substrings condition. */
2599 			if (UNEXPECTED(count == 0)) {
2600 				php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2601 				count = num_subpats;
2602 			}
2603 
2604 matched:
2605 			offsets = pcre2_get_ovector_pointer(match_data);
2606 
2607 			if (UNEXPECTED(offsets[1] < offsets[0])) {
2608 				PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2609 				break;
2610 			}
2611 
2612 			if (!no_empty || offsets[0] != last_match_offset) {
2613 				if (offset_capture) {
2614 					/* Add (match, offset) pair to the return value */
2615 					add_offset_pair(
2616 						return_value, subject, last_match_offset, offsets[0],
2617 						NULL, 0);
2618 				} else {
2619 					/* Add the piece to the return value */
2620 					populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2621 					zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2622 				}
2623 
2624 				/* One less left to do */
2625 				if (limit_val != -1)
2626 					limit_val--;
2627 			}
2628 
2629 			if (delim_capture) {
2630 				size_t i;
2631 				for (i = 1; i < count; i++) {
2632 					/* If we have matched a delimiter */
2633 					if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2634 						if (offset_capture) {
2635 							add_offset_pair(
2636 								return_value, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2637 						} else {
2638 							populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2639 							zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2640 						}
2641 					}
2642 				}
2643 			}
2644 
2645 			/* Advance to the position right after the last full match */
2646 			start_offset = last_match_offset = offsets[1];
2647 
2648 			/* If we have matched an empty string, mimic what Perl's /g options does.
2649 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2650 			   the match again at the same point. If this fails (picked up above) we
2651 			   advance to the next character. */
2652 			if (start_offset == offsets[0]) {
2653 				/* Get next piece if no limit or limit not yet reached and something matched*/
2654 				if (limit_val != -1 && limit_val <= 1) {
2655 					break;
2656 				}
2657 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2658 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2659 				if (count >= 0) {
2660 					goto matched;
2661 				} else if (count == PCRE2_ERROR_NOMATCH) {
2662 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2663 					   this is not necessarily the end. We need to advance
2664 					   the start offset, and continue. Fudge the offset values
2665 					   to achieve this, unless we're already at the end of the string. */
2666 					if (start_offset < ZSTR_LEN(subject_str)) {
2667 						start_offset += calculate_unit_length(pce, subject + start_offset);
2668 					} else {
2669 						break;
2670 					}
2671 				} else {
2672 					goto error;
2673 				}
2674 			}
2675 
2676 		} else if (count == PCRE2_ERROR_NOMATCH) {
2677 			break;
2678 		} else {
2679 error:
2680 			pcre_handle_exec_error(count);
2681 			break;
2682 		}
2683 
2684 		/* Get next piece if no limit or limit not yet reached and something matched*/
2685 		if (limit_val != -1 && limit_val <= 1) {
2686 			break;
2687 		}
2688 
2689 #ifdef HAVE_PCRE_JIT_SUPPORT
2690 		if (pce->preg_options & PREG_JIT) {
2691 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2692 					PCRE2_NO_UTF_CHECK, match_data, mctx);
2693 		} else
2694 #endif
2695 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2696 				PCRE2_NO_UTF_CHECK, match_data, mctx);
2697 	}
2698 	if (match_data != mdata) {
2699 		pcre2_match_data_free(match_data);
2700 	}
2701 
2702 	if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2703 		zval_ptr_dtor(return_value);
2704 		RETURN_FALSE;
2705 	}
2706 
2707 last:
2708 	start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2709 
2710 	if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2711 		if (offset_capture) {
2712 			/* Add the last (match, offset) pair to the return value */
2713 			add_offset_pair(return_value, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2714 		} else {
2715 			/* Add the last piece to the return value */
2716 			if (start_offset == 0) {
2717 				ZVAL_STR_COPY(&tmp, subject_str);
2718 			} else {
2719 				populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2720 			}
2721 			zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2722 		}
2723 	}
2724 }
2725 /* }}} */
2726 
2727 /* {{{ proto string preg_quote(string str [, string delim_char])
2728    Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)2729 static PHP_FUNCTION(preg_quote)
2730 {
2731 	zend_string *str;       		/* Input string argument */
2732 	zend_string	*delim = NULL;		/* Additional delimiter argument */
2733 	char		*in_str;			/* Input string */
2734 	char		*in_str_end;    	/* End of the input string */
2735 	zend_string	*out_str;			/* Output string with quoted characters */
2736 	size_t       extra_len;         /* Number of additional characters */
2737 	char 		*p,					/* Iterator for input string */
2738 				*q,					/* Iterator for output string */
2739 				 delim_char = '\0',	/* Delimiter character to be quoted */
2740 				 c;					/* Current character */
2741 
2742 	/* Get the arguments and check for errors */
2743 	ZEND_PARSE_PARAMETERS_START(1, 2)
2744 		Z_PARAM_STR(str)
2745 		Z_PARAM_OPTIONAL
2746 		Z_PARAM_STR_EX(delim, 1, 0)
2747 	ZEND_PARSE_PARAMETERS_END();
2748 
2749 	/* Nothing to do if we got an empty string */
2750 	if (ZSTR_LEN(str) == 0) {
2751 		RETURN_EMPTY_STRING();
2752 	}
2753 
2754 	in_str = ZSTR_VAL(str);
2755 	in_str_end = in_str + ZSTR_LEN(str);
2756 
2757 	if (delim) {
2758 		delim_char = ZSTR_VAL(delim)[0];
2759 	}
2760 
2761 	/* Go through the string and quote necessary characters */
2762 	extra_len = 0;
2763 	p = in_str;
2764 	do {
2765 		c = *p;
2766 		switch(c) {
2767 			case '.':
2768 			case '\\':
2769 			case '+':
2770 			case '*':
2771 			case '?':
2772 			case '[':
2773 			case '^':
2774 			case ']':
2775 			case '$':
2776 			case '(':
2777 			case ')':
2778 			case '{':
2779 			case '}':
2780 			case '=':
2781 			case '!':
2782 			case '>':
2783 			case '<':
2784 			case '|':
2785 			case ':':
2786 			case '-':
2787 			case '#':
2788 				extra_len++;
2789 				break;
2790 
2791 			case '\0':
2792 				extra_len+=3;
2793 				break;
2794 
2795 			default:
2796 				if (c == delim_char) {
2797 					extra_len++;
2798 				}
2799 				break;
2800 		}
2801 		p++;
2802 	} while (p != in_str_end);
2803 
2804 	if (extra_len == 0) {
2805 		RETURN_STR_COPY(str);
2806 	}
2807 
2808 	/* Allocate enough memory so that even if each character
2809 	   is quoted, we won't run out of room */
2810 	out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2811 	q = ZSTR_VAL(out_str);
2812 	p = in_str;
2813 
2814 	do {
2815 		c = *p;
2816 		switch(c) {
2817 			case '.':
2818 			case '\\':
2819 			case '+':
2820 			case '*':
2821 			case '?':
2822 			case '[':
2823 			case '^':
2824 			case ']':
2825 			case '$':
2826 			case '(':
2827 			case ')':
2828 			case '{':
2829 			case '}':
2830 			case '=':
2831 			case '!':
2832 			case '>':
2833 			case '<':
2834 			case '|':
2835 			case ':':
2836 			case '-':
2837 			case '#':
2838 				*q++ = '\\';
2839 				*q++ = c;
2840 				break;
2841 
2842 			case '\0':
2843 				*q++ = '\\';
2844 				*q++ = '0';
2845 				*q++ = '0';
2846 				*q++ = '0';
2847 				break;
2848 
2849 			default:
2850 				if (c == delim_char) {
2851 					*q++ = '\\';
2852 				}
2853 				*q++ = c;
2854 				break;
2855 		}
2856 		p++;
2857 	} while (p != in_str_end);
2858 	*q = '\0';
2859 
2860 	RETURN_NEW_STR(out_str);
2861 }
2862 /* }}} */
2863 
2864 /* {{{ proto array preg_grep(string regex, array input [, int flags])
2865    Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)2866 static PHP_FUNCTION(preg_grep)
2867 {
2868 	zend_string			*regex;			/* Regular expression */
2869 	zval				*input;			/* Input array */
2870 	zend_long			 flags = 0;		/* Match control flags */
2871 	pcre_cache_entry	*pce;			/* Compiled regular expression */
2872 
2873 	/* Get arguments and do error checking */
2874 	ZEND_PARSE_PARAMETERS_START(2, 3)
2875 		Z_PARAM_STR(regex)
2876 		Z_PARAM_ARRAY(input)
2877 		Z_PARAM_OPTIONAL
2878 		Z_PARAM_LONG(flags)
2879 	ZEND_PARSE_PARAMETERS_END();
2880 
2881 	/* Compile regex or get it from cache. */
2882 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2883 		RETURN_FALSE;
2884 	}
2885 
2886 	pce->refcount++;
2887 	php_pcre_grep_impl(pce, input, return_value, flags);
2888 	pce->refcount--;
2889 }
2890 /* }}} */
2891 
php_pcre_grep_impl(pcre_cache_entry * pce,zval * input,zval * return_value,zend_long flags)2892 PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2893 {
2894 	zval            *entry;             /* An entry in the input array */
2895 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
2896 	int				 count;				/* Count of matched subpatterns */
2897 	uint32_t		 options;			/* Execution options */
2898 	zend_string		*string_key;
2899 	zend_ulong		 num_key;
2900 	zend_bool		 invert;			/* Whether to return non-matching
2901 										   entries */
2902 	pcre2_match_data *match_data;
2903 	invert = flags & PREG_GREP_INVERT ? 1 : 0;
2904 
2905 	/* Calculate the size of the offsets array, and allocate memory for it. */
2906 	num_subpats = pce->capture_count + 1;
2907 
2908 	/* Initialize return array */
2909 	array_init(return_value);
2910 
2911 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2912 
2913 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2914 		match_data = mdata;
2915 	} else {
2916 		match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
2917 		if (!match_data) {
2918 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2919 			return;
2920 		}
2921 	}
2922 
2923 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2924 
2925 	/* Go through the input array */
2926 	ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2927 		zend_string *tmp_subject_str;
2928 		zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2929 
2930 		/* Perform the match */
2931 #ifdef HAVE_PCRE_JIT_SUPPORT
2932 		if ((pce->preg_options & PREG_JIT) && options) {
2933 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2934 					PCRE2_NO_UTF_CHECK, match_data, mctx);
2935 		} else
2936 #endif
2937 		count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2938 				options, match_data, mctx);
2939 
2940 		/* If the entry fits our requirements */
2941 		if (count >= 0) {
2942 			/* Check for too many substrings condition. */
2943 			if (UNEXPECTED(count == 0)) {
2944 				php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2945 			}
2946 			if (!invert) {
2947 				Z_TRY_ADDREF_P(entry);
2948 
2949 				/* Add to return array */
2950 				if (string_key) {
2951 					zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2952 				} else {
2953 					zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2954 				}
2955 			}
2956 		} else if (count == PCRE2_ERROR_NOMATCH) {
2957 			if (invert) {
2958 				Z_TRY_ADDREF_P(entry);
2959 
2960 				/* Add to return array */
2961 				if (string_key) {
2962 					zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2963 				} else {
2964 					zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2965 				}
2966 			}
2967 		} else {
2968 			pcre_handle_exec_error(count);
2969 			zend_tmp_string_release(tmp_subject_str);
2970 			break;
2971 		}
2972 
2973 		zend_tmp_string_release(tmp_subject_str);
2974 	} ZEND_HASH_FOREACH_END();
2975 	if (match_data != mdata) {
2976 		pcre2_match_data_free(match_data);
2977 	}
2978 }
2979 /* }}} */
2980 
2981 /* {{{ proto int preg_last_error()
2982    Returns the error code of the last regexp execution. */
PHP_FUNCTION(preg_last_error)2983 static PHP_FUNCTION(preg_last_error)
2984 {
2985 	ZEND_PARSE_PARAMETERS_NONE();
2986 
2987 	RETURN_LONG(PCRE_G(error_code));
2988 }
2989 /* }}} */
2990 
2991 /* {{{ module definition structures */
2992 
2993 /* {{{ arginfo */
2994 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
2995     ZEND_ARG_INFO(0, pattern)
2996     ZEND_ARG_INFO(0, subject)
2997     ZEND_ARG_INFO(1, subpatterns) /* array */
2998     ZEND_ARG_INFO(0, flags)
2999     ZEND_ARG_INFO(0, offset)
3000 ZEND_END_ARG_INFO()
3001 
3002 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
3003     ZEND_ARG_INFO(0, pattern)
3004     ZEND_ARG_INFO(0, subject)
3005     ZEND_ARG_INFO(1, subpatterns) /* array */
3006     ZEND_ARG_INFO(0, flags)
3007     ZEND_ARG_INFO(0, offset)
3008 ZEND_END_ARG_INFO()
3009 
3010 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
3011     ZEND_ARG_INFO(0, regex)
3012     ZEND_ARG_INFO(0, replace)
3013     ZEND_ARG_INFO(0, subject)
3014     ZEND_ARG_INFO(0, limit)
3015     ZEND_ARG_INFO(1, count)
3016 ZEND_END_ARG_INFO()
3017 
3018 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
3019     ZEND_ARG_INFO(0, regex)
3020     ZEND_ARG_INFO(0, callback)
3021     ZEND_ARG_INFO(0, subject)
3022     ZEND_ARG_INFO(0, limit)
3023     ZEND_ARG_INFO(1, count)
3024     ZEND_ARG_INFO(0, flags)
3025 ZEND_END_ARG_INFO()
3026 
3027 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
3028     ZEND_ARG_INFO(0, pattern)
3029     ZEND_ARG_INFO(0, subject)
3030     ZEND_ARG_INFO(0, limit)
3031     ZEND_ARG_INFO(1, count)
3032     ZEND_ARG_INFO(0, flags)
3033 ZEND_END_ARG_INFO()
3034 
3035 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
3036     ZEND_ARG_INFO(0, pattern)
3037     ZEND_ARG_INFO(0, subject)
3038     ZEND_ARG_INFO(0, limit)
3039     ZEND_ARG_INFO(0, flags)
3040 ZEND_END_ARG_INFO()
3041 
3042 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
3043     ZEND_ARG_INFO(0, str)
3044     ZEND_ARG_INFO(0, delim_char)
3045 ZEND_END_ARG_INFO()
3046 
3047 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
3048     ZEND_ARG_INFO(0, regex)
3049     ZEND_ARG_INFO(0, input) /* array */
3050     ZEND_ARG_INFO(0, flags)
3051 ZEND_END_ARG_INFO()
3052 
3053 ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
3054 ZEND_END_ARG_INFO()
3055 /* }}} */
3056 
3057 static const zend_function_entry pcre_functions[] = {
3058 	PHP_FE(preg_match,					arginfo_preg_match)
3059 	PHP_FE(preg_match_all,				arginfo_preg_match_all)
3060 	PHP_FE(preg_replace,				arginfo_preg_replace)
3061 	PHP_FE(preg_replace_callback,		arginfo_preg_replace_callback)
3062 	PHP_FE(preg_replace_callback_array,	arginfo_preg_replace_callback_array)
3063 	PHP_FE(preg_filter,					arginfo_preg_replace)
3064 	PHP_FE(preg_split,					arginfo_preg_split)
3065 	PHP_FE(preg_quote,					arginfo_preg_quote)
3066 	PHP_FE(preg_grep,					arginfo_preg_grep)
3067 	PHP_FE(preg_last_error,				arginfo_preg_last_error)
3068 	PHP_FE_END
3069 };
3070 
3071 zend_module_entry pcre_module_entry = {
3072 	STANDARD_MODULE_HEADER,
3073    "pcre",
3074 	pcre_functions,
3075 	PHP_MINIT(pcre),
3076 	PHP_MSHUTDOWN(pcre),
3077 	PHP_RINIT(pcre),
3078 	PHP_RSHUTDOWN(pcre),
3079 	PHP_MINFO(pcre),
3080 	PHP_PCRE_VERSION,
3081 	PHP_MODULE_GLOBALS(pcre),
3082 	PHP_GINIT(pcre),
3083 	PHP_GSHUTDOWN(pcre),
3084 	NULL,
3085 	STANDARD_MODULE_PROPERTIES_EX
3086 };
3087 
3088 #ifdef COMPILE_DL_PCRE
ZEND_GET_MODULE(pcre)3089 ZEND_GET_MODULE(pcre)
3090 #endif
3091 
3092 /* }}} */
3093 
3094 PHPAPI pcre2_match_context *php_pcre_mctx(void)
3095 {/*{{{*/
3096 	return mctx;
3097 }/*}}}*/
3098 
php_pcre_gctx(void)3099 PHPAPI pcre2_general_context *php_pcre_gctx(void)
3100 {/*{{{*/
3101 	return gctx;
3102 }/*}}}*/
3103 
php_pcre_cctx(void)3104 PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3105 {/*{{{*/
3106 	return cctx;
3107 }/*}}}*/
3108 
php_pcre_pce_incref(pcre_cache_entry * pce)3109 PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3110 {/*{{{*/
3111 	assert(NULL != pce);
3112 	pce->refcount++;
3113 }/*}}}*/
3114 
php_pcre_pce_decref(pcre_cache_entry * pce)3115 PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3116 {/*{{{*/
3117 	assert(NULL != pce);
3118 	assert(0 != pce->refcount);
3119 	pce->refcount--;
3120 }/*}}}*/
3121 
php_pcre_pce_re(pcre_cache_entry * pce)3122 PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3123 {/*{{{*/
3124 	assert(NULL != pce);
3125 	return pce->re;
3126 }/*}}}*/
3127