xref: /php-src/ext/pcre/php_pcre.c (revision 2414b3d7)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Author: Andrei Zmievski <andrei@php.net>                             |
14    +----------------------------------------------------------------------+
15  */
16 
17 #include "php.h"
18 #include "php_ini.h"
19 #include "php_globals.h"
20 #include "php_pcre.h"
21 #include "php_pcre_arginfo.h"
22 #include "ext/standard/info.h"
23 #include "ext/standard/basic_functions.h"
24 #include "zend_smart_str.h"
25 #include "SAPI.h"
26 
27 #include "ext/standard/php_string.h"
28 
29 #define PREG_PATTERN_ORDER			1
30 #define PREG_SET_ORDER				2
31 #define PREG_OFFSET_CAPTURE			(1<<8)
32 #define PREG_UNMATCHED_AS_NULL		(1<<9)
33 
34 #define	PREG_SPLIT_NO_EMPTY			(1<<0)
35 #define PREG_SPLIT_DELIM_CAPTURE	(1<<1)
36 #define PREG_SPLIT_OFFSET_CAPTURE	(1<<2)
37 
38 #define PREG_REPLACE_EVAL			(1<<0)
39 
40 #define PREG_GREP_INVERT			(1<<0)
41 
42 #define PREG_JIT                    (1<<3)
43 
44 #define PCRE_CACHE_SIZE 4096
45 
46 struct _pcre_cache_entry {
47 	pcre2_code *re;
48 	uint32_t preg_options;
49 	uint32_t capture_count;
50 	uint32_t name_count;
51 	uint32_t compile_options;
52 	uint32_t refcount;
53 };
54 
55 PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
56 
57 #ifdef HAVE_PCRE_JIT_SUPPORT
58 #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
59 #define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
60 ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
61 #endif
62 ZEND_TLS pcre2_general_context *gctx = NULL;
63 /* These two are global per thread for now. Though it is possible to use these
64  	per pattern. Either one can copy it and use in pce, or one does no global
65 	contexts at all, but creates for every pce. */
66 ZEND_TLS pcre2_compile_context *cctx = NULL;
67 ZEND_TLS pcre2_match_context   *mctx = NULL;
68 ZEND_TLS pcre2_match_data      *mdata = NULL;
69 ZEND_TLS zend_bool              mdata_used = 0;
70 ZEND_TLS uint8_t pcre2_init_ok = 0;
71 #if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
72 static MUTEX_T pcre_mt = NULL;
73 #define php_pcre_mutex_alloc() \
74 	if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
75 #define php_pcre_mutex_free() \
76 	if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
77 #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
78 #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
79 #else
80 #define php_pcre_mutex_alloc()
81 #define php_pcre_mutex_free()
82 #define php_pcre_mutex_lock()
83 #define php_pcre_mutex_unlock()
84 #endif
85 
86 ZEND_TLS HashTable char_tables;
87 
php_pcre_free_char_table(zval * data)88 static void php_pcre_free_char_table(zval *data)
89 {/*{{{*/
90 	void *ptr = Z_PTR_P(data);
91 	pefree(ptr, 1);
92 }/*}}}*/
93 
pcre_handle_exec_error(int pcre_code)94 static void pcre_handle_exec_error(int pcre_code) /* {{{ */
95 {
96 	int preg_code = 0;
97 
98 	switch (pcre_code) {
99 		case PCRE2_ERROR_MATCHLIMIT:
100 			preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
101 			break;
102 
103 		case PCRE2_ERROR_RECURSIONLIMIT:
104 			preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
105 			break;
106 
107 		case PCRE2_ERROR_BADUTFOFFSET:
108 			preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
109 			break;
110 
111 #ifdef HAVE_PCRE_JIT_SUPPORT
112 		case PCRE2_ERROR_JIT_STACKLIMIT:
113 			preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
114 			break;
115 #endif
116 
117 		default:
118 			if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
119 				preg_code = PHP_PCRE_BAD_UTF8_ERROR;
120 			} else  {
121 				preg_code = PHP_PCRE_INTERNAL_ERROR;
122 			}
123 			break;
124 	}
125 
126 	PCRE_G(error_code) = preg_code;
127 }
128 /* }}} */
129 
php_pcre_get_error_msg(php_pcre_error_code error_code)130 static const char *php_pcre_get_error_msg(php_pcre_error_code error_code) /* {{{ */
131 {
132     switch (error_code) {
133         case PHP_PCRE_NO_ERROR:
134             return "No error";
135         case PHP_PCRE_INTERNAL_ERROR:
136             return "Internal error";
137         case PHP_PCRE_BAD_UTF8_ERROR:
138             return "Malformed UTF-8 characters, possibly incorrectly encoded";
139         case PHP_PCRE_BAD_UTF8_OFFSET_ERROR:
140             return "The offset did not correspond to the beginning of a valid UTF-8 code point";
141         case PHP_PCRE_BACKTRACK_LIMIT_ERROR:
142             return "Backtrack limit exhausted";
143         case PHP_PCRE_RECURSION_LIMIT_ERROR:
144             return "Recursion limit exhausted";
145 
146 #ifdef HAVE_PCRE_JIT_SUPPORT
147         case PHP_PCRE_JIT_STACKLIMIT_ERROR:
148             return "JIT stack limit exhausted";
149 #endif
150 
151         default:
152             return "Unknown error";
153     }
154 }
155 /* }}} */
156 
php_free_pcre_cache(zval * data)157 static void php_free_pcre_cache(zval *data) /* {{{ */
158 {
159 	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
160 	if (!pce) return;
161 	pcre2_code_free(pce->re);
162 	free(pce);
163 }
164 /* }}} */
165 
php_efree_pcre_cache(zval * data)166 static void php_efree_pcre_cache(zval *data) /* {{{ */
167 {
168 	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
169 	if (!pce) return;
170 	pcre2_code_free(pce->re);
171 	efree(pce);
172 }
173 /* }}} */
174 
php_pcre_malloc(PCRE2_SIZE size,void * data)175 static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
176 {/*{{{*/
177 	void *p = pemalloc(size, 1);
178 	return p;
179 }/*}}}*/
180 
php_pcre_free(void * block,void * data)181 static void php_pcre_free(void *block, void *data)
182 {/*{{{*/
183 	pefree(block, 1);
184 }/*}}}*/
185 
186 #define PHP_PCRE_PREALLOC_MDATA_SIZE 32
187 
php_pcre_init_pcre2(uint8_t jit)188 static void php_pcre_init_pcre2(uint8_t jit)
189 {/*{{{*/
190 	if (!gctx) {
191 		gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
192 		if (!gctx) {
193 			pcre2_init_ok = 0;
194 			return;
195 		}
196 	}
197 
198 	if (!cctx) {
199 		cctx = pcre2_compile_context_create(gctx);
200 		if (!cctx) {
201 			pcre2_init_ok = 0;
202 			return;
203 		}
204 	}
205 
206 	if (!mctx) {
207 		mctx = pcre2_match_context_create(gctx);
208 		if (!mctx) {
209 			pcre2_init_ok = 0;
210 			return;
211 		}
212 	}
213 
214 #ifdef HAVE_PCRE_JIT_SUPPORT
215 	if (jit && !jit_stack) {
216 		jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
217 		if (!jit_stack) {
218 			pcre2_init_ok = 0;
219 			return;
220 		}
221 	}
222 #endif
223 
224 	if (!mdata) {
225 		mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
226 		if (!mdata) {
227 			pcre2_init_ok = 0;
228 			return;
229 		}
230 	}
231 
232 	pcre2_init_ok = 1;
233 }/*}}}*/
234 
php_pcre_shutdown_pcre2(void)235 static void php_pcre_shutdown_pcre2(void)
236 {/*{{{*/
237 	if (gctx) {
238 		pcre2_general_context_free(gctx);
239 		gctx = NULL;
240 	}
241 
242 	if (cctx) {
243 		pcre2_compile_context_free(cctx);
244 		cctx = NULL;
245 	}
246 
247 	if (mctx) {
248 		pcre2_match_context_free(mctx);
249 		mctx = NULL;
250 	}
251 
252 #ifdef HAVE_PCRE_JIT_SUPPORT
253 	/* Stack may only be destroyed when no cached patterns
254 	 	possibly associated with it do exist. */
255 	if (jit_stack) {
256 		pcre2_jit_stack_free(jit_stack);
257 		jit_stack = NULL;
258 	}
259 #endif
260 
261 	if (mdata) {
262 		pcre2_match_data_free(mdata);
263 		mdata = NULL;
264 	}
265 
266 	pcre2_init_ok = 0;
267 }/*}}}*/
268 
PHP_GINIT_FUNCTION(pcre)269 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
270 {
271 	php_pcre_mutex_alloc();
272 
273 	/* If we're on the CLI SAPI, there will only be one request, so we don't need the
274 	 * cache to survive after RSHUTDOWN. */
275 	pcre_globals->per_request_cache = strcmp(sapi_module.name, "cli") == 0;
276 	if (!pcre_globals->per_request_cache) {
277 		zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
278 	}
279 
280 	pcre_globals->backtrack_limit = 0;
281 	pcre_globals->recursion_limit = 0;
282 	pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
283 	ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
284 	ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
285 #ifdef HAVE_PCRE_JIT_SUPPORT
286 	pcre_globals->jit = 1;
287 #endif
288 
289 	php_pcre_init_pcre2(1);
290 	zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
291 }
292 /* }}} */
293 
PHP_GSHUTDOWN_FUNCTION(pcre)294 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
295 {
296 	if (!pcre_globals->per_request_cache) {
297 		zend_hash_destroy(&pcre_globals->pcre_cache);
298 	}
299 
300 	php_pcre_shutdown_pcre2();
301 	zend_hash_destroy(&char_tables);
302 	php_pcre_mutex_free();
303 }
304 /* }}} */
305 
PHP_INI_MH(OnUpdateBacktrackLimit)306 static PHP_INI_MH(OnUpdateBacktrackLimit)
307 {/*{{{*/
308 	OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
309 	if (mctx) {
310 		pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
311 	}
312 
313 	return SUCCESS;
314 }/*}}}*/
315 
PHP_INI_MH(OnUpdateRecursionLimit)316 static PHP_INI_MH(OnUpdateRecursionLimit)
317 {/*{{{*/
318 	OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
319 	if (mctx) {
320 		pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
321 	}
322 
323 	return SUCCESS;
324 }/*}}}*/
325 
326 #ifdef HAVE_PCRE_JIT_SUPPORT
PHP_INI_MH(OnUpdateJit)327 static PHP_INI_MH(OnUpdateJit)
328 {/*{{{*/
329 	OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
330 	if (PCRE_G(jit) && jit_stack) {
331 		pcre2_jit_stack_assign(mctx, NULL, jit_stack);
332 	} else {
333 		pcre2_jit_stack_assign(mctx, NULL, NULL);
334 	}
335 
336 	return SUCCESS;
337 }/*}}}*/
338 #endif
339 
340 PHP_INI_BEGIN()
341 	STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
342 	STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
343 #ifdef HAVE_PCRE_JIT_SUPPORT
344 	STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateJit, jit,             zend_pcre_globals, pcre_globals)
345 #endif
PHP_INI_END()346 PHP_INI_END()
347 
348 static char *_pcre2_config_str(uint32_t what)
349 {/*{{{*/
350 	int len = pcre2_config(what, NULL);
351 	char *ret = (char *) malloc(len + 1);
352 
353 	len = pcre2_config(what, ret);
354 	if (!len) {
355 		free(ret);
356 		return NULL;
357 	}
358 
359 	return ret;
360 }/*}}}*/
361 
362 /* {{{ PHP_MINFO_FUNCTION(pcre) */
PHP_MINFO_FUNCTION(pcre)363 static PHP_MINFO_FUNCTION(pcre)
364 {
365 #ifdef HAVE_PCRE_JIT_SUPPORT
366 	uint32_t flag = 0;
367 	char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
368 #endif
369 	char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
370 	char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
371 
372 	php_info_print_table_start();
373 	php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
374 	php_info_print_table_row(2, "PCRE Library Version", version);
375 	free(version);
376 	php_info_print_table_row(2, "PCRE Unicode Version", unicode);
377 	free(unicode);
378 
379 #ifdef HAVE_PCRE_JIT_SUPPORT
380 	if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
381 		php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
382 	} else {
383 		php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
384 	}
385 	if (jit_target) {
386 		php_info_print_table_row(2, "PCRE JIT Target", jit_target);
387 	}
388 	free(jit_target);
389 #else
390 	php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
391 #endif
392 
393 #ifdef HAVE_PCRE_VALGRIND_SUPPORT
394 	php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
395 #endif
396 
397 	php_info_print_table_end();
398 
399 	DISPLAY_INI_ENTRIES();
400 }
401 /* }}} */
402 
403 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTION(pcre)404 static PHP_MINIT_FUNCTION(pcre)
405 {
406 	char *version;
407 
408 #ifdef HAVE_PCRE_JIT_SUPPORT
409 	if (UNEXPECTED(!pcre2_init_ok)) {
410 		/* Retry. */
411 		php_pcre_init_pcre2(PCRE_G(jit));
412 		if (!pcre2_init_ok) {
413 			return FAILURE;
414 		}
415 	}
416 #endif
417 
418 	REGISTER_INI_ENTRIES();
419 
420 	REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
421 	REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
422 	REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
423 	REGISTER_LONG_CONSTANT("PREG_UNMATCHED_AS_NULL", PREG_UNMATCHED_AS_NULL, CONST_CS | CONST_PERSISTENT);
424 	REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
425 	REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
426 	REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
427 	REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
428 
429 	REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
430 	REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
431 	REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
432 	REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
433 	REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
434 	REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
435 	REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
436 	version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
437 	REGISTER_STRING_CONSTANT("PCRE_VERSION", version, CONST_CS | CONST_PERSISTENT);
438 	free(version);
439 	REGISTER_LONG_CONSTANT("PCRE_VERSION_MAJOR", PCRE2_MAJOR, CONST_CS | CONST_PERSISTENT);
440 	REGISTER_LONG_CONSTANT("PCRE_VERSION_MINOR", PCRE2_MINOR, CONST_CS | CONST_PERSISTENT);
441 
442 #ifdef HAVE_PCRE_JIT_SUPPORT
443 	REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 1, CONST_CS | CONST_PERSISTENT);
444 #else
445 	REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 0, CONST_CS | CONST_PERSISTENT);
446 #endif
447 
448 	return SUCCESS;
449 }
450 /* }}} */
451 
452 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTION(pcre)453 static PHP_MSHUTDOWN_FUNCTION(pcre)
454 {
455 	UNREGISTER_INI_ENTRIES();
456 
457 	return SUCCESS;
458 }
459 /* }}} */
460 
461 /* {{{ PHP_RINIT_FUNCTION(pcre) */
PHP_RINIT_FUNCTION(pcre)462 static PHP_RINIT_FUNCTION(pcre)
463 {
464 #ifdef HAVE_PCRE_JIT_SUPPORT
465 	if (UNEXPECTED(!pcre2_init_ok)) {
466 		/* Retry. */
467 		php_pcre_mutex_lock();
468 		php_pcre_init_pcre2(PCRE_G(jit));
469 		if (!pcre2_init_ok) {
470 			php_pcre_mutex_unlock();
471 			return FAILURE;
472 		}
473 		php_pcre_mutex_unlock();
474 	}
475 
476 	mdata_used = 0;
477 #endif
478 
479 	if (PCRE_G(per_request_cache)) {
480 		zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, php_efree_pcre_cache, 0);
481 	}
482 
483 	return SUCCESS;
484 }
485 /* }}} */
486 
PHP_RSHUTDOWN_FUNCTION(pcre)487 static PHP_RSHUTDOWN_FUNCTION(pcre)
488 {
489 	if (PCRE_G(per_request_cache)) {
490 		zend_hash_destroy(&PCRE_G(pcre_cache));
491 	}
492 
493 	zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
494 	zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
495 	ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
496 	ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
497 	return SUCCESS;
498 }
499 
500 /* {{{ static pcre_clean_cache */
pcre_clean_cache(zval * data,void * arg)501 static int pcre_clean_cache(zval *data, void *arg)
502 {
503 	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
504 	int *num_clean = (int *)arg;
505 
506 	if (*num_clean > 0 && !pce->refcount) {
507 		(*num_clean)--;
508 		return ZEND_HASH_APPLY_REMOVE;
509 	} else {
510 		return ZEND_HASH_APPLY_KEEP;
511 	}
512 }
513 /* }}} */
514 
free_subpats_table(zend_string ** subpat_names,uint32_t num_subpats)515 static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
516 	uint32_t i;
517 	for (i = 0; i < num_subpats; i++) {
518 		if (subpat_names[i]) {
519 			zend_string_release(subpat_names[i]);
520 		}
521 	}
522 	efree(subpat_names);
523 }
524 
525 /* {{{ static make_subpats_table */
make_subpats_table(uint32_t num_subpats,pcre_cache_entry * pce)526 static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
527 {
528 	uint32_t name_cnt = pce->name_count, name_size, ni = 0;
529 	char *name_table;
530 	zend_string **subpat_names;
531 	int rc1, rc2;
532 
533 	rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
534 	rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
535 	if (rc1 < 0 || rc2 < 0) {
536 		php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
537 		return NULL;
538 	}
539 
540 	subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
541 	while (ni++ < name_cnt) {
542 		unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
543 		const char *name = name_table + 2;
544 		subpat_names[name_idx] = zend_string_init(name, strlen(name), 0);
545 		if (is_numeric_string(ZSTR_VAL(subpat_names[name_idx]), ZSTR_LEN(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
546 			php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
547 			free_subpats_table(subpat_names, num_subpats);
548 			return NULL;
549 		}
550 		name_table += name_size;
551 	}
552 	return subpat_names;
553 }
554 /* }}} */
555 
556 /* {{{ static calculate_unit_length */
557 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
calculate_unit_length(pcre_cache_entry * pce,char * start)558 static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, char *start)
559 {
560 	size_t unit_len;
561 
562 	if (pce->compile_options & PCRE2_UTF) {
563 		char *end = start;
564 
565 		/* skip continuation bytes */
566 		while ((*++end & 0xC0) == 0x80);
567 		unit_len = end - start;
568 	} else {
569 		unit_len = 1;
570 	}
571 	return unit_len;
572 }
573 /* }}} */
574 
575 /* {{{ pcre_get_compiled_regex_cache
576  */
pcre_get_compiled_regex_cache_ex(zend_string * regex,int locale_aware)577 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware)
578 {
579 	pcre2_code			*re = NULL;
580 	uint32_t			 coptions = 0;
581 	PCRE2_UCHAR	         error[128];
582 	PCRE2_SIZE           erroffset;
583 	int                  errnumber;
584 	char				 delimiter;
585 	char				 start_delimiter;
586 	char				 end_delimiter;
587 	char				*p, *pp;
588 	char				*pattern;
589 	size_t				 pattern_len;
590 	uint32_t			 poptions = 0;
591 	const uint8_t       *tables = NULL;
592 	zval                *zv;
593 	pcre_cache_entry	 new_entry;
594 	int					 rc;
595 	zend_string 		*key;
596 	pcre_cache_entry *ret;
597 
598 	if (locale_aware && BG(ctype_string)) {
599 		key = zend_string_concat2(
600 			ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)),
601 			ZSTR_VAL(regex), ZSTR_LEN(regex));
602 	} else {
603 		key = regex;
604 	}
605 
606 	/* Try to lookup the cached regex entry, and if successful, just pass
607 	   back the compiled pattern, otherwise go on and compile it. */
608 	zv = zend_hash_find(&PCRE_G(pcre_cache), key);
609 	if (zv) {
610 		if (key != regex) {
611 			zend_string_release_ex(key, 0);
612 		}
613 		return (pcre_cache_entry*)Z_PTR_P(zv);
614 	}
615 
616 	p = ZSTR_VAL(regex);
617 
618 	/* Parse through the leading whitespace, and display a warning if we
619 	   get to the end without encountering a delimiter. */
620 	while (isspace((int)*(unsigned char *)p)) p++;
621 	if (*p == 0) {
622 		if (key != regex) {
623 			zend_string_release_ex(key, 0);
624 		}
625 		php_error_docref(NULL, E_WARNING,
626 						 p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
627 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
628 		return NULL;
629 	}
630 
631 	/* Get the delimiter and display a warning if it is alphanumeric
632 	   or a backslash. */
633 	delimiter = *p++;
634 	if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
635 		if (key != regex) {
636 			zend_string_release_ex(key, 0);
637 		}
638 		php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
639 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
640 		return NULL;
641 	}
642 
643 	start_delimiter = delimiter;
644 	if ((pp = strchr("([{< )]}> )]}>", delimiter)))
645 		delimiter = pp[5];
646 	end_delimiter = delimiter;
647 
648 	pp = p;
649 
650 	if (start_delimiter == end_delimiter) {
651 		/* We need to iterate through the pattern, searching for the ending delimiter,
652 		   but skipping the backslashed delimiters.  If the ending delimiter is not
653 		   found, display a warning. */
654 		while (*pp != 0) {
655 			if (*pp == '\\' && pp[1] != 0) pp++;
656 			else if (*pp == delimiter)
657 				break;
658 			pp++;
659 		}
660 	} else {
661 		/* We iterate through the pattern, searching for the matching ending
662 		 * delimiter. For each matching starting delimiter, we increment nesting
663 		 * level, and decrement it for each matching ending delimiter. If we
664 		 * reach the end of the pattern without matching, display a warning.
665 		 */
666 		int brackets = 1; 	/* brackets nesting level */
667 		while (*pp != 0) {
668 			if (*pp == '\\' && pp[1] != 0) pp++;
669 			else if (*pp == end_delimiter && --brackets <= 0)
670 				break;
671 			else if (*pp == start_delimiter)
672 				brackets++;
673 			pp++;
674 		}
675 	}
676 
677 	if (*pp == 0) {
678 		if (key != regex) {
679 			zend_string_release_ex(key, 0);
680 		}
681 		if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
682 			php_error_docref(NULL,E_WARNING, "Null byte in regex");
683 		} else if (start_delimiter == end_delimiter) {
684 			php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
685 		} else {
686 			php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
687 		}
688 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
689 		return NULL;
690 	}
691 
692 	/* Make a copy of the actual pattern. */
693 	pattern_len = pp - p;
694 	pattern = estrndup(p, pattern_len);
695 
696 	/* Move on to the options */
697 	pp++;
698 
699 	/* Parse through the options, setting appropriate flags.  Display
700 	   a warning if we encounter an unknown modifier. */
701 	while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
702 		switch (*pp++) {
703 			/* Perl compatible options */
704 			case 'i':	coptions |= PCRE2_CASELESS;		break;
705 			case 'm':	coptions |= PCRE2_MULTILINE;		break;
706 			case 's':	coptions |= PCRE2_DOTALL;		break;
707 			case 'x':	coptions |= PCRE2_EXTENDED;		break;
708 
709 			/* PCRE specific options */
710 			case 'A':	coptions |= PCRE2_ANCHORED;		break;
711 			case 'D':	coptions |= PCRE2_DOLLAR_ENDONLY;break;
712 			case 'S':	/* Pass. */					break;
713 			case 'X':	/* Pass. */					break;
714 			case 'U':	coptions |= PCRE2_UNGREEDY;		break;
715 			case 'u':	coptions |= PCRE2_UTF;
716 	/* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
717        characters, even in UTF-8 mode. However, this can be changed by setting
718        the PCRE2_UCP option. */
719 #ifdef PCRE2_UCP
720 						coptions |= PCRE2_UCP;
721 #endif
722 				break;
723 			case 'J':	coptions |= PCRE2_DUPNAMES;		break;
724 
725 			/* Custom preg options */
726 			case 'e':	poptions |= PREG_REPLACE_EVAL;	break;
727 
728 			case ' ':
729 			case '\n':
730 			case '\r':
731 				break;
732 
733 			default:
734 				if (pp[-1]) {
735 					php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
736 				} else {
737 					php_error_docref(NULL,E_WARNING, "Null byte in regex");
738 				}
739 				pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
740 				efree(pattern);
741 				if (key != regex) {
742 					zend_string_release_ex(key, 0);
743 				}
744 				return NULL;
745 		}
746 	}
747 
748 	if (poptions & PREG_REPLACE_EVAL) {
749 		php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
750 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
751 		efree(pattern);
752 		if (key != regex) {
753 			zend_string_release_ex(key, 0);
754 		}
755 		return NULL;
756 	}
757 
758 	if (key != regex) {
759 		tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(ctype_string));
760 		if (!tables) {
761 			zend_string *_k;
762 			tables = pcre2_maketables(gctx);
763 			if (UNEXPECTED(!tables)) {
764 				php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
765 				pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
766 				zend_string_release_ex(key, 0);
767 				efree(pattern);
768 				return NULL;
769 			}
770 			_k = zend_string_init(ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)), 1);
771 			GC_MAKE_PERSISTENT_LOCAL(_k);
772 			zend_hash_add_ptr(&char_tables, _k, (void *)tables);
773 			zend_string_release(_k);
774 		}
775 		pcre2_set_character_tables(cctx, tables);
776 	}
777 
778 	/* Compile pattern and display a warning if compilation failed. */
779 	re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
780 
781 	if (re == NULL) {
782 		if (key != regex) {
783 			zend_string_release_ex(key, 0);
784 		}
785 		pcre2_get_error_message(errnumber, error, sizeof(error));
786 		php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
787 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
788 		efree(pattern);
789 		return NULL;
790 	}
791 
792 #ifdef HAVE_PCRE_JIT_SUPPORT
793 	if (PCRE_G(jit)) {
794 		/* Enable PCRE JIT compiler */
795 		rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
796 		if (EXPECTED(rc >= 0)) {
797 			size_t jit_size = 0;
798 			if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
799 				poptions |= PREG_JIT;
800 			}
801 		} else if (rc == PCRE2_ERROR_NOMEMORY) {
802 			php_error_docref(NULL, E_WARNING,
803 				"Allocation of JIT memory failed, PCRE JIT will be disabled. "
804 				"This is likely caused by security restrictions. "
805 				"Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
806 			PCRE_G(jit) = 0;
807 		} else {
808 			pcre2_get_error_message(rc, error, sizeof(error));
809 			php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
810 			pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
811 		}
812 	}
813 #endif
814 	efree(pattern);
815 
816 	/*
817 	 * If we reached cache limit, clean out the items from the head of the list;
818 	 * these are supposedly the oldest ones (but not necessarily the least used
819 	 * ones).
820 	 */
821 	if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
822 		int num_clean = PCRE_CACHE_SIZE / 8;
823 		zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
824 	}
825 
826 	/* Store the compiled pattern and extra info in the cache. */
827 	new_entry.re = re;
828 	new_entry.preg_options = poptions;
829 	new_entry.compile_options = coptions;
830 	new_entry.refcount = 0;
831 
832 	rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
833 	if (rc < 0) {
834 		if (key != regex) {
835 			zend_string_release_ex(key, 0);
836 		}
837 		php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
838 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
839 		return NULL;
840 	}
841 
842 	rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
843 	if (rc < 0) {
844 		if (key != regex) {
845 			zend_string_release_ex(key, 0);
846 		}
847 		php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
848 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
849 		return NULL;
850 	}
851 
852 	/*
853 	 * Interned strings are not duplicated when stored in HashTable,
854 	 * but all the interned strings created during HTTP request are removed
855 	 * at end of request. However PCRE_G(pcre_cache) must be consistent
856 	 * on the next request as well. So we disable usage of interned strings
857 	 * as hash keys especually for this table.
858 	 * See bug #63180
859 	 */
860 	if (!(GC_FLAGS(key) & IS_STR_PERMANENT) && !PCRE_G(per_request_cache)) {
861 		zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
862 		GC_MAKE_PERSISTENT_LOCAL(str);
863 
864 		ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
865 		zend_string_release(str);
866 	} else {
867 		ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
868 	}
869 
870 	if (key != regex) {
871 		zend_string_release_ex(key, 0);
872 	}
873 
874 	return ret;
875 }
876 /* }}} */
877 
878 /* {{{ pcre_get_compiled_regex_cache
879  */
pcre_get_compiled_regex_cache(zend_string * regex)880 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
881 {
882 	return pcre_get_compiled_regex_cache_ex(regex, 1);
883 }
884 /* }}} */
885 
886 /* {{{ pcre_get_compiled_regex
887  */
pcre_get_compiled_regex(zend_string * regex,uint32_t * capture_count)888 PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
889 {
890 	pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
891 
892 	if (capture_count) {
893 		*capture_count = pce ? pce->capture_count : 0;
894 	}
895 
896 	return pce ? pce->re : NULL;
897 }
898 /* }}} */
899 
900 /* {{{ pcre_get_compiled_regex_ex
901  */
pcre_get_compiled_regex_ex(zend_string * regex,uint32_t * capture_count,uint32_t * preg_options,uint32_t * compile_options)902 PHPAPI pcre2_code* pcre_get_compiled_regex_ex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options, uint32_t *compile_options)
903 {
904 	pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
905 
906 	if (preg_options) {
907 		*preg_options = pce ? pce->preg_options : 0;
908 	}
909 	if (compile_options) {
910 		*compile_options = pce ? pce->compile_options : 0;
911 	}
912 	if (capture_count) {
913 		*capture_count = pce ? pce->capture_count : 0;
914 	}
915 
916 	return pce ? pce->re : NULL;
917 }
918 /* }}} */
919 
920 /* XXX For the cases where it's only about match yes/no and no capture
921 		required, perhaps just a minimum sized data would suffice. */
php_pcre_create_match_data(uint32_t capture_count,pcre2_code * re)922 PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
923 {/*{{{*/
924 
925 	assert(NULL != re);
926 
927 	if (EXPECTED(!mdata_used)) {
928 		int rc = 0;
929 
930 		if (!capture_count) {
931 			/* As we deal with a non cached pattern, no other way to gather this info. */
932 			rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
933 		}
934 
935 		if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
936 			mdata_used = 1;
937 			return mdata;
938 		}
939 	}
940 
941 	return pcre2_match_data_create_from_pattern(re, gctx);
942 }/*}}}*/
943 
php_pcre_free_match_data(pcre2_match_data * match_data)944 PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
945 {/*{{{*/
946 	if (UNEXPECTED(match_data != mdata)) {
947 		pcre2_match_data_free(match_data);
948 	} else {
949 		mdata_used = 0;
950 	}
951 }/*}}}*/
952 
init_unmatched_null_pair()953 static void init_unmatched_null_pair() {
954 	zval val1, val2;
955 	ZVAL_NULL(&val1);
956 	ZVAL_LONG(&val2, -1);
957 	ZVAL_ARR(&PCRE_G(unmatched_null_pair), zend_new_pair(&val1, &val2));
958 }
959 
init_unmatched_empty_pair()960 static void init_unmatched_empty_pair() {
961 	zval val1, val2;
962 	ZVAL_EMPTY_STRING(&val1);
963 	ZVAL_LONG(&val2, -1);
964 	ZVAL_ARR(&PCRE_G(unmatched_empty_pair), zend_new_pair(&val1, &val2));
965 }
966 
populate_match_value_str(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset)967 static zend_always_inline void populate_match_value_str(
968 		zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
969 	if (start_offset == end_offset) {
970 		ZVAL_EMPTY_STRING(val);
971 	} else if (start_offset + 1 == end_offset) {
972 		ZVAL_INTERNED_STR(val, ZSTR_CHAR((unsigned char) subject[start_offset]));
973 	} else {
974 		ZVAL_STRINGL(val, subject + start_offset, end_offset - start_offset);
975 	}
976 }
977 
populate_match_value(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,uint32_t unmatched_as_null)978 static inline void populate_match_value(
979 		zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
980 		uint32_t unmatched_as_null) {
981 	if (PCRE2_UNSET == start_offset) {
982 		if (unmatched_as_null) {
983 			ZVAL_NULL(val);
984 		} else {
985 			ZVAL_EMPTY_STRING(val);
986 		}
987 	} else {
988 		populate_match_value_str(val, subject, start_offset, end_offset);
989 	}
990 }
991 
add_named(zval * subpats,zend_string * name,zval * val,zend_bool unmatched)992 static inline void add_named(
993 		zval *subpats, zend_string *name, zval *val, zend_bool unmatched) {
994 	/* If the DUPNAMES option is used, multiple subpatterns might have the same name.
995 	 * In this case we want to preserve the one that actually has a value. */
996 	if (!unmatched) {
997 		zend_hash_update(Z_ARRVAL_P(subpats), name, val);
998 	} else {
999 		if (!zend_hash_add(Z_ARRVAL_P(subpats), name, val)) {
1000 			return;
1001 		}
1002 	}
1003 	Z_TRY_ADDREF_P(val);
1004 }
1005 
1006 /* {{{ add_offset_pair */
add_offset_pair(zval * result,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,zend_string * name,uint32_t unmatched_as_null)1007 static inline void add_offset_pair(
1008 		zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
1009 		zend_string *name, uint32_t unmatched_as_null)
1010 {
1011 	zval match_pair;
1012 
1013 	/* Add (match, offset) to the return value */
1014 	if (PCRE2_UNSET == start_offset) {
1015 		if (unmatched_as_null) {
1016 			if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
1017 				init_unmatched_null_pair();
1018 			}
1019 			ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
1020 		} else {
1021 			if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
1022 				init_unmatched_empty_pair();
1023 			}
1024 			ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
1025 		}
1026 	} else {
1027 		zval val1, val2;
1028 		populate_match_value_str(&val1, subject, start_offset, end_offset);
1029 		ZVAL_LONG(&val2, start_offset);
1030 		ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1031 	}
1032 
1033 	if (name) {
1034 		add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1035 	}
1036 	zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
1037 }
1038 /* }}} */
1039 
populate_subpat_array(zval * subpats,char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1040 static void populate_subpat_array(
1041 		zval *subpats, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1042 		uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1043 	zend_bool offset_capture = (flags & PREG_OFFSET_CAPTURE) != 0;
1044 	zend_bool unmatched_as_null = (flags & PREG_UNMATCHED_AS_NULL) != 0;
1045 	zval val;
1046 	int i;
1047 	if (subpat_names) {
1048 		if (offset_capture) {
1049 			for (i = 0; i < count; i++) {
1050 				add_offset_pair(
1051 					subpats, subject, offsets[2*i], offsets[2*i+1],
1052 					subpat_names[i], unmatched_as_null);
1053 			}
1054 			if (unmatched_as_null) {
1055 				for (i = count; i < num_subpats; i++) {
1056 					add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1057 				}
1058 			}
1059 		} else {
1060 			for (i = 0; i < count; i++) {
1061 				populate_match_value(
1062 					&val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1063 				if (subpat_names[i]) {
1064 					add_named(subpats, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1065 				}
1066 				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1067 			}
1068 			if (unmatched_as_null) {
1069 				for (i = count; i < num_subpats; i++) {
1070 					ZVAL_NULL(&val);
1071 					if (subpat_names[i]) {
1072 						zend_hash_add(Z_ARRVAL_P(subpats), subpat_names[i], &val);
1073 					}
1074 					zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1075 				}
1076 			}
1077 		}
1078 	} else {
1079 		if (offset_capture) {
1080 			for (i = 0; i < count; i++) {
1081 				add_offset_pair(
1082 					subpats, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1083 			}
1084 			if (unmatched_as_null) {
1085 				for (i = count; i < num_subpats; i++) {
1086 					add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1087 				}
1088 			}
1089 		} else {
1090 			for (i = 0; i < count; i++) {
1091 				populate_match_value(
1092 					&val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1093 				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1094 			}
1095 			if (unmatched_as_null) {
1096 				for (i = count; i < num_subpats; i++) {
1097 					add_next_index_null(subpats);
1098 				}
1099 			}
1100 		}
1101 	}
1102 	/* Add MARK, if available */
1103 	if (mark) {
1104 		add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
1105 	}
1106 }
1107 
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS,int global)1108 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
1109 {
1110 	/* parameters */
1111 	zend_string		 *regex;			/* Regular expression */
1112 	zend_string		 *subject;			/* String to match against */
1113 	pcre_cache_entry *pce;				/* Compiled regular expression */
1114 	zval			 *subpats = NULL;	/* Array for subpatterns */
1115 	zend_long		  flags = 0;		/* Match control flags */
1116 	zend_long		  start_offset = 0;	/* Where the new search starts */
1117 
1118 	ZEND_PARSE_PARAMETERS_START(2, 5)
1119 		Z_PARAM_STR(regex)
1120 		Z_PARAM_STR(subject)
1121 		Z_PARAM_OPTIONAL
1122 		Z_PARAM_ZVAL(subpats)
1123 		Z_PARAM_LONG(flags)
1124 		Z_PARAM_LONG(start_offset)
1125 	ZEND_PARSE_PARAMETERS_END();
1126 
1127 	/* Compile regex or get it from cache. */
1128 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1129 		RETURN_FALSE;
1130 	}
1131 
1132 	pce->refcount++;
1133 	php_pcre_match_impl(pce, subject, return_value, subpats,
1134 		global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
1135 	pce->refcount--;
1136 }
1137 /* }}} */
1138 
is_known_valid_utf8(zend_string * subject_str,PCRE2_SIZE start_offset)1139 static zend_always_inline zend_bool is_known_valid_utf8(
1140 		zend_string *subject_str, PCRE2_SIZE start_offset) {
1141 	if (!(GC_FLAGS(subject_str) & IS_STR_VALID_UTF8)) {
1142 		/* We don't know whether the string is valid UTF-8 or not. */
1143 		return 0;
1144 	}
1145 
1146 	if (start_offset == ZSTR_LEN(subject_str)) {
1147 		/* Degenerate case: Offset points to end of string. */
1148 		return 1;
1149 	}
1150 
1151 	/* Check that the offset does not point to an UTF-8 continuation byte. */
1152 	return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1153 }
1154 
1155 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zval * subpats,int global,int use_flags,zend_long flags,zend_off_t start_offset)1156 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1157 	zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset)
1158 {
1159 	zval			 result_set,		/* Holds a set of subpatterns after
1160 										   a global match */
1161 					*match_sets = NULL;	/* An array of sets of matches for each
1162 										   subpattern after a global match */
1163 	uint32_t		 options;			/* Execution options */
1164 	int				 count;				/* Count of matched subpatterns */
1165 	PCRE2_SIZE		*offsets;			/* Array of subpattern offsets */
1166 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
1167 	int				 matched;			/* Has anything matched */
1168 	zend_string	   **subpat_names;		/* Array for named subpatterns */
1169 	size_t			 i;
1170 	uint32_t		 subpats_order;		/* Order of subpattern matches */
1171 	uint32_t		 offset_capture;	/* Capture match offsets: yes/no */
1172 	uint32_t		 unmatched_as_null;	/* Null non-matches: yes/no */
1173 	PCRE2_SPTR       mark = NULL;		/* Target for MARK name */
1174 	zval			 marks;				/* Array of marks for PREG_PATTERN_ORDER */
1175 	pcre2_match_data *match_data;
1176 	PCRE2_SIZE		 start_offset2, orig_start_offset;
1177 
1178 	char *subject = ZSTR_VAL(subject_str);
1179 	size_t subject_len = ZSTR_LEN(subject_str);
1180 
1181 	ZVAL_UNDEF(&marks);
1182 
1183 	/* Overwrite the passed-in value for subpatterns with an empty array. */
1184 	if (subpats != NULL) {
1185 		subpats = zend_try_array_init(subpats);
1186 		if (!subpats) {
1187 			return;
1188 		}
1189 	}
1190 
1191 	subpats_order = global ? PREG_PATTERN_ORDER : 0;
1192 
1193 	if (use_flags) {
1194 		offset_capture = flags & PREG_OFFSET_CAPTURE;
1195 		unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1196 
1197 		/*
1198 		 * subpats_order is pre-set to pattern mode so we change it only if
1199 		 * necessary.
1200 		 */
1201 		if (flags & 0xff) {
1202 			subpats_order = flags & 0xff;
1203 		}
1204 		if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1205 			(!global && subpats_order != 0)) {
1206 			php_error_docref(NULL, E_WARNING, "Invalid flags specified");
1207 			return;
1208 		}
1209 	} else {
1210 		offset_capture = 0;
1211 		unmatched_as_null = 0;
1212 	}
1213 
1214 	/* Negative offset counts from the end of the string. */
1215 	if (start_offset < 0) {
1216 		if ((PCRE2_SIZE)-start_offset <= subject_len) {
1217 			start_offset2 = subject_len + start_offset;
1218 		} else {
1219 			start_offset2 = 0;
1220 		}
1221 	} else {
1222 		start_offset2 = (PCRE2_SIZE)start_offset;
1223 	}
1224 
1225 	if (start_offset2 > subject_len) {
1226 		pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1227 		RETURN_FALSE;
1228 	}
1229 
1230 	/* Calculate the size of the offsets array, and allocate memory for it. */
1231 	num_subpats = pce->capture_count + 1;
1232 
1233 	/*
1234 	 * Build a mapping from subpattern numbers to their names. We will
1235 	 * allocate the table only if there are any named subpatterns.
1236 	 */
1237 	subpat_names = NULL;
1238 	if (subpats && pce->name_count > 0) {
1239 		subpat_names = make_subpats_table(num_subpats, pce);
1240 		if (!subpat_names) {
1241 			RETURN_FALSE;
1242 		}
1243 	}
1244 
1245 	/* Allocate match sets array and initialize the values. */
1246 	if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1247 		match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
1248 		for (i=0; i<num_subpats; i++) {
1249 			array_init(&match_sets[i]);
1250 		}
1251 	}
1252 
1253 	matched = 0;
1254 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1255 
1256 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1257 		match_data = mdata;
1258 	} else {
1259 		match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1260 		if (!match_data) {
1261 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1262 			if (subpat_names) {
1263 				free_subpats_table(subpat_names, num_subpats);
1264 			}
1265 			if (match_sets) {
1266 				efree(match_sets);
1267 			}
1268 			RETURN_FALSE;
1269 		}
1270 	}
1271 
1272 	orig_start_offset = start_offset2;
1273 	options =
1274 		(pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1275 			? 0 : PCRE2_NO_UTF_CHECK;
1276 
1277 	/* Execute the regular expression. */
1278 #ifdef HAVE_PCRE_JIT_SUPPORT
1279 	if ((pce->preg_options & PREG_JIT) && options) {
1280 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1281 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1282 	} else
1283 #endif
1284 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1285 			options, match_data, mctx);
1286 
1287 	while (1) {
1288 		/* If something has matched */
1289 		if (count >= 0) {
1290 			/* Check for too many substrings condition. */
1291 			if (UNEXPECTED(count == 0)) {
1292 				php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1293 				count = num_subpats;
1294 			}
1295 
1296 matched:
1297 			matched++;
1298 
1299 			offsets = pcre2_get_ovector_pointer(match_data);
1300 
1301 			/* If subpatterns array has been passed, fill it in with values. */
1302 			if (subpats != NULL) {
1303 				/* Try to get the list of substrings and display a warning if failed. */
1304 				if (offsets[1] < offsets[0]) {
1305 					if (subpat_names) {
1306 						free_subpats_table(subpat_names, num_subpats);
1307 					}
1308 					if (match_sets) efree(match_sets);
1309 					php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1310 					RETURN_FALSE;
1311 				}
1312 
1313 				if (global) {	/* global pattern matching */
1314 					if (subpats && subpats_order == PREG_PATTERN_ORDER) {
1315 						/* For each subpattern, insert it into the appropriate array. */
1316 						if (offset_capture) {
1317 							for (i = 0; i < count; i++) {
1318 								add_offset_pair(
1319 									&match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1320 									NULL, unmatched_as_null);
1321 							}
1322 						} else {
1323 							for (i = 0; i < count; i++) {
1324 								zval val;
1325 								populate_match_value(
1326 									&val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1327 								zend_hash_next_index_insert_new(Z_ARRVAL(match_sets[i]), &val);
1328 							}
1329 						}
1330 						mark = pcre2_get_mark(match_data);
1331 						/* Add MARK, if available */
1332 						if (mark) {
1333 							if (Z_TYPE(marks) == IS_UNDEF) {
1334 								array_init(&marks);
1335 							}
1336 							add_index_string(&marks, matched - 1, (char *) mark);
1337 						}
1338 						/*
1339 						 * If the number of captured subpatterns on this run is
1340 						 * less than the total possible number, pad the result
1341 						 * arrays with NULLs or empty strings.
1342 						 */
1343 						if (count < num_subpats) {
1344 							for (; i < num_subpats; i++) {
1345 								if (offset_capture) {
1346 									add_offset_pair(
1347 										&match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1348 										NULL, unmatched_as_null);
1349 								} else if (unmatched_as_null) {
1350 									add_next_index_null(&match_sets[i]);
1351 								} else {
1352 									add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
1353 								}
1354 							}
1355 						}
1356 					} else {
1357 						/* Allocate and populate the result set array */
1358 						array_init_size(&result_set, count + (mark ? 1 : 0));
1359 						mark = pcre2_get_mark(match_data);
1360 						populate_subpat_array(
1361 							&result_set, subject, offsets, subpat_names,
1362 							num_subpats, count, mark, flags);
1363 						/* And add it to the output array */
1364 						zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
1365 					}
1366 				} else {			/* single pattern matching */
1367 					/* For each subpattern, insert it into the subpatterns array. */
1368 					mark = pcre2_get_mark(match_data);
1369 					populate_subpat_array(
1370 						subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1371 					break;
1372 				}
1373 			}
1374 
1375 			/* Advance to the next piece. */
1376 			start_offset2 = offsets[1];
1377 
1378 			/* If we have matched an empty string, mimic what Perl's /g options does.
1379 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1380 			   the match again at the same point. If this fails (picked up above) we
1381 			   advance to the next character. */
1382 			if (start_offset2 == offsets[0]) {
1383 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1384 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1385 				if (count >= 0) {
1386 					if (global) {
1387 						goto matched;
1388 					} else {
1389 						break;
1390 					}
1391 				} else if (count == PCRE2_ERROR_NOMATCH) {
1392 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1393 					   this is not necessarily the end. We need to advance
1394 					   the start offset, and continue. Fudge the offset values
1395 					   to achieve this, unless we're already at the end of the string. */
1396 					if (start_offset2 < subject_len) {
1397 						size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1398 
1399 						start_offset2 += unit_len;
1400 					} else {
1401 						break;
1402 					}
1403 				} else {
1404 					goto error;
1405 				}
1406 			}
1407 		} else if (count == PCRE2_ERROR_NOMATCH) {
1408 			break;
1409 		} else {
1410 error:
1411 			pcre_handle_exec_error(count);
1412 			break;
1413 		}
1414 
1415 		if (!global) {
1416 			break;
1417 		}
1418 
1419 		/* Execute the regular expression. */
1420 #ifdef HAVE_PCRE_JIT_SUPPORT
1421 		if ((pce->preg_options & PREG_JIT)) {
1422 			if (PCRE2_UNSET == start_offset2 || start_offset2 > subject_len) {
1423 				pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1424 				break;
1425 			}
1426 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1427 					PCRE2_NO_UTF_CHECK, match_data, mctx);
1428 		} else
1429 #endif
1430 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1431 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1432 	}
1433 	if (match_data != mdata) {
1434 		pcre2_match_data_free(match_data);
1435 	}
1436 
1437 	/* Add the match sets to the output array and clean up */
1438 	if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1439 		if (subpat_names) {
1440 			for (i = 0; i < num_subpats; i++) {
1441 				if (subpat_names[i]) {
1442 					zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &match_sets[i]);
1443 					Z_ADDREF(match_sets[i]);
1444 				}
1445 				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1446 			}
1447 		} else {
1448 			for (i = 0; i < num_subpats; i++) {
1449 				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1450 			}
1451 		}
1452 		efree(match_sets);
1453 
1454 		if (Z_TYPE(marks) != IS_UNDEF) {
1455 			add_assoc_zval(subpats, "MARK", &marks);
1456 		}
1457 	}
1458 
1459 	if (subpat_names) {
1460 		free_subpats_table(subpat_names, num_subpats);
1461 	}
1462 
1463 	if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1464 		/* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1465 		if ((pce->compile_options & PCRE2_UTF)
1466 				&& !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1467 			GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1468 		}
1469 
1470 		RETVAL_LONG(matched);
1471 	} else {
1472 		RETVAL_FALSE;
1473 	}
1474 }
1475 /* }}} */
1476 
1477 /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1478    Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)1479 PHP_FUNCTION(preg_match)
1480 {
1481 	php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1482 }
1483 /* }}} */
1484 
1485 /* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1486    Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)1487 PHP_FUNCTION(preg_match_all)
1488 {
1489 	php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1490 }
1491 /* }}} */
1492 
1493 /* {{{ preg_get_backref
1494  */
preg_get_backref(char ** str,int * backref)1495 static int preg_get_backref(char **str, int *backref)
1496 {
1497 	register char in_brace = 0;
1498 	register char *walk = *str;
1499 
1500 	if (walk[1] == 0)
1501 		return 0;
1502 
1503 	if (*walk == '$' && walk[1] == '{') {
1504 		in_brace = 1;
1505 		walk++;
1506 	}
1507 	walk++;
1508 
1509 	if (*walk >= '0' && *walk <= '9') {
1510 		*backref = *walk - '0';
1511 		walk++;
1512 	} else
1513 		return 0;
1514 
1515 	if (*walk && *walk >= '0' && *walk <= '9') {
1516 		*backref = *backref * 10 + *walk - '0';
1517 		walk++;
1518 	}
1519 
1520 	if (in_brace) {
1521 		if (*walk != '}')
1522 			return 0;
1523 		else
1524 			walk++;
1525 	}
1526 
1527 	*str = walk;
1528 	return 1;
1529 }
1530 /* }}} */
1531 
1532 /* {{{ preg_do_repl_func
1533  */
preg_do_repl_func(zend_fcall_info * fci,zend_fcall_info_cache * fcc,char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1534 static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1535 {
1536 	zend_string *result_str;
1537 	zval		 retval;			/* Function return value */
1538 	zval	     arg;				/* Argument to pass to function */
1539 
1540 	array_init_size(&arg, count + (mark ? 1 : 0));
1541 	populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1542 
1543 	fci->retval = &retval;
1544 	fci->param_count = 1;
1545 	fci->params = &arg;
1546 	fci->no_separation = 0;
1547 
1548 	if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1549 		if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1550 			result_str = Z_STR(retval);
1551 		} else {
1552 			result_str = zval_get_string_func(&retval);
1553 			zval_ptr_dtor(&retval);
1554 		}
1555 	} else {
1556 		if (!EG(exception)) {
1557 			php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1558 		}
1559 
1560 		result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1561 	}
1562 
1563 	zval_ptr_dtor(&arg);
1564 
1565 	return result_str;
1566 }
1567 /* }}} */
1568 
1569 /* {{{ php_pcre_replace
1570  */
php_pcre_replace(zend_string * regex,zend_string * subject_str,char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1571 PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1572 							  zend_string *subject_str,
1573 							  char *subject, size_t subject_len,
1574 							  zend_string *replace_str,
1575 							  size_t limit, size_t *replace_count)
1576 {
1577 	pcre_cache_entry	*pce;			    /* Compiled regular expression */
1578 	zend_string	 		*result;			/* Function result */
1579 
1580 	/* Abort on pending exception, e.g. thrown from __toString(). */
1581 	if (UNEXPECTED(EG(exception))) {
1582 		return NULL;
1583 	}
1584 
1585 	/* Compile regex or get it from cache. */
1586 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1587 		return NULL;
1588 	}
1589 	pce->refcount++;
1590 	result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1591 		limit, replace_count);
1592 	pce->refcount--;
1593 
1594 	return result;
1595 }
1596 /* }}} */
1597 
1598 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry * pce,zend_string * subject_str,char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1599 PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1600 {
1601 	uint32_t		 options;			/* Execution options */
1602 	int				 count;				/* Count of matched subpatterns */
1603 	PCRE2_SIZE		*offsets;			/* Array of subpattern offsets */
1604 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
1605 	size_t			 new_len;			/* Length of needed storage */
1606 	size_t			 alloc_len;			/* Actual allocated length */
1607 	size_t			 match_len;			/* Length of the current match */
1608 	int				 backref;			/* Backreference number */
1609 	PCRE2_SIZE		 start_offset;		/* Where the new search starts */
1610 	size_t			 last_end_offset;	/* Where the last search ended */
1611 	char			*walkbuf,			/* Location of current replacement in the result */
1612 					*walk,				/* Used to walk the replacement string */
1613 					*match,				/* The current match */
1614 					*piece,				/* The current piece of subject */
1615 					*replace_end,		/* End of replacement string */
1616 					 walk_last;			/* Last walked character */
1617 	size_t			result_len; 		/* Length of result */
1618 	zend_string		*result;			/* Result of replacement */
1619 	pcre2_match_data *match_data;
1620 
1621 	/* Calculate the size of the offsets array, and allocate memory for it. */
1622 	num_subpats = pce->capture_count + 1;
1623 	alloc_len = 0;
1624 	result = NULL;
1625 
1626 	/* Initialize */
1627 	match = NULL;
1628 	start_offset = 0;
1629 	last_end_offset = 0;
1630 	result_len = 0;
1631 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1632 
1633 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1634 		match_data = mdata;
1635 	} else {
1636 		match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1637 		if (!match_data) {
1638 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1639 			return NULL;
1640 		}
1641 	}
1642 
1643 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1644 
1645 	/* Execute the regular expression. */
1646 #ifdef HAVE_PCRE_JIT_SUPPORT
1647 	if ((pce->preg_options & PREG_JIT) && options) {
1648 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1649 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1650 	} else
1651 #endif
1652 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1653 			options, match_data, mctx);
1654 
1655 	while (1) {
1656 		piece = subject + last_end_offset;
1657 
1658 		if (count >= 0 && limit > 0) {
1659 			zend_bool simple_string;
1660 
1661 			/* Check for too many substrings condition. */
1662 			if (UNEXPECTED(count == 0)) {
1663 				php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1664 				count = num_subpats;
1665 			}
1666 
1667 matched:
1668 			offsets = pcre2_get_ovector_pointer(match_data);
1669 
1670 			if (UNEXPECTED(offsets[1] < offsets[0])) {
1671 				PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1672 				if (result) {
1673 					zend_string_release_ex(result, 0);
1674 					result = NULL;
1675 				}
1676 				break;
1677 			}
1678 
1679 			if (replace_count) {
1680 				++*replace_count;
1681 			}
1682 
1683 			/* Set the match location in subject */
1684 			match = subject + offsets[0];
1685 
1686 			new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1687 
1688 			walk = ZSTR_VAL(replace_str);
1689 			replace_end = walk + ZSTR_LEN(replace_str);
1690 			walk_last = 0;
1691 			simple_string = 1;
1692 			while (walk < replace_end) {
1693 				if ('\\' == *walk || '$' == *walk) {
1694 					simple_string = 0;
1695 					if (walk_last == '\\') {
1696 						walk++;
1697 						walk_last = 0;
1698 						continue;
1699 					}
1700 					if (preg_get_backref(&walk, &backref)) {
1701 						if (backref < count)
1702 							new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1703 						continue;
1704 					}
1705 				}
1706 				new_len++;
1707 				walk++;
1708 				walk_last = walk[-1];
1709 			}
1710 
1711 			if (new_len >= alloc_len) {
1712 				alloc_len = zend_safe_address_guarded(2, new_len, alloc_len);
1713 				if (result == NULL) {
1714 					result = zend_string_alloc(alloc_len, 0);
1715 				} else {
1716 					result = zend_string_extend(result, alloc_len, 0);
1717 				}
1718 			}
1719 
1720 			if (match-piece > 0) {
1721 				/* copy the part of the string before the match */
1722 				memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1723 				result_len += (match-piece);
1724 			}
1725 
1726 			if (simple_string) {
1727 				/* copy replacement */
1728 				memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1729 				result_len += ZSTR_LEN(replace_str);
1730 			} else {
1731 				/* copy replacement and backrefs */
1732 				walkbuf = ZSTR_VAL(result) + result_len;
1733 
1734 				walk = ZSTR_VAL(replace_str);
1735 				walk_last = 0;
1736 				while (walk < replace_end) {
1737 					if ('\\' == *walk || '$' == *walk) {
1738 						if (walk_last == '\\') {
1739 							*(walkbuf-1) = *walk++;
1740 							walk_last = 0;
1741 							continue;
1742 						}
1743 						if (preg_get_backref(&walk, &backref)) {
1744 							if (backref < count) {
1745 								match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1746 								memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1747 								walkbuf += match_len;
1748 							}
1749 							continue;
1750 						}
1751 					}
1752 					*walkbuf++ = *walk++;
1753 					walk_last = walk[-1];
1754 				}
1755 				*walkbuf = '\0';
1756 				/* increment the result length by how much we've added to the string */
1757 				result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1758 			}
1759 
1760 			limit--;
1761 
1762 			/* Advance to the next piece. */
1763 			start_offset = last_end_offset = offsets[1];
1764 
1765 			/* If we have matched an empty string, mimic what Perl's /g options does.
1766 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1767 			   the match again at the same point. If this fails (picked up above) we
1768 			   advance to the next character. */
1769 			if (start_offset == offsets[0]) {
1770 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1771 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1772 
1773 				piece = subject + start_offset;
1774 				if (count >= 0 && limit > 0) {
1775 					goto matched;
1776 				} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1777 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1778 					   this is not necessarily the end. We need to advance
1779 					   the start offset, and continue. Fudge the offset values
1780 					   to achieve this, unless we're already at the end of the string. */
1781 					if (start_offset < subject_len) {
1782 						size_t unit_len = calculate_unit_length(pce, piece);
1783 						start_offset += unit_len;
1784 					} else {
1785 						goto not_matched;
1786 					}
1787 				} else {
1788 					goto error;
1789 				}
1790 			}
1791 
1792 		} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1793 not_matched:
1794 			if (!result && subject_str) {
1795 				result = zend_string_copy(subject_str);
1796 				break;
1797 			}
1798 			new_len = result_len + subject_len - last_end_offset;
1799 			if (new_len >= alloc_len) {
1800 				alloc_len = new_len; /* now we know exactly how long it is */
1801 				if (NULL != result) {
1802 					result = zend_string_realloc(result, alloc_len, 0);
1803 				} else {
1804 					result = zend_string_alloc(alloc_len, 0);
1805 				}
1806 			}
1807 			/* stick that last bit of string on our output */
1808 			memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1809 			result_len += subject_len - last_end_offset;
1810 			ZSTR_VAL(result)[result_len] = '\0';
1811 			ZSTR_LEN(result) = result_len;
1812 			break;
1813 		} else {
1814 error:
1815 			pcre_handle_exec_error(count);
1816 			if (result) {
1817 				zend_string_release_ex(result, 0);
1818 				result = NULL;
1819 			}
1820 			break;
1821 		}
1822 
1823 #ifdef HAVE_PCRE_JIT_SUPPORT
1824 		if (pce->preg_options & PREG_JIT) {
1825 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1826 					PCRE2_NO_UTF_CHECK, match_data, mctx);
1827 		} else
1828 #endif
1829 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1830 					PCRE2_NO_UTF_CHECK, match_data, mctx);
1831 	}
1832 	if (match_data != mdata) {
1833 		pcre2_match_data_free(match_data);
1834 	}
1835 
1836 	return result;
1837 }
1838 /* }}} */
1839 
1840 /* {{{ php_pcre_replace_func_impl() */
php_pcre_replace_func_impl(pcre_cache_entry * pce,zend_string * subject_str,char * subject,size_t subject_len,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)1841 static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)
1842 {
1843 	uint32_t		 options;			/* Execution options */
1844 	int				 count;				/* Count of matched subpatterns */
1845 	PCRE2_SIZE		*offsets;			/* Array of subpattern offsets */
1846 	zend_string		**subpat_names;		/* Array for named subpatterns */
1847 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
1848 	size_t			 new_len;			/* Length of needed storage */
1849 	size_t			 alloc_len;			/* Actual allocated length */
1850 	PCRE2_SIZE		 start_offset;		/* Where the new search starts */
1851 	size_t			 last_end_offset;	/* Where the last search ended */
1852 	char			*match,				/* The current match */
1853 					*piece;				/* The current piece of subject */
1854 	size_t			result_len; 		/* Length of result */
1855 	zend_string		*result;			/* Result of replacement */
1856 	zend_string     *eval_result;		/* Result of custom function */
1857 	pcre2_match_data *match_data;
1858 	zend_bool old_mdata_used;
1859 
1860 	/* Calculate the size of the offsets array, and allocate memory for it. */
1861 	num_subpats = pce->capture_count + 1;
1862 
1863 	/*
1864 	 * Build a mapping from subpattern numbers to their names. We will
1865 	 * allocate the table only if there are any named subpatterns.
1866 	 */
1867 	subpat_names = NULL;
1868 	if (UNEXPECTED(pce->name_count > 0)) {
1869 		subpat_names = make_subpats_table(num_subpats, pce);
1870 		if (!subpat_names) {
1871 			return NULL;
1872 		}
1873 	}
1874 
1875 	alloc_len = 0;
1876 	result = NULL;
1877 
1878 	/* Initialize */
1879 	match = NULL;
1880 	start_offset = 0;
1881 	last_end_offset = 0;
1882 	result_len = 0;
1883 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1884 
1885 	old_mdata_used = mdata_used;
1886 	if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1887 		mdata_used = 1;
1888 		match_data = mdata;
1889 	} else {
1890 		match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1891 		if (!match_data) {
1892 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1893 			if (subpat_names) {
1894 				free_subpats_table(subpat_names, num_subpats);
1895 			}
1896 			mdata_used = old_mdata_used;
1897 			return NULL;
1898 		}
1899 	}
1900 
1901 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1902 
1903 	/* Execute the regular expression. */
1904 #ifdef HAVE_PCRE_JIT_SUPPORT
1905 	if ((pce->preg_options & PREG_JIT) && options) {
1906 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1907 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1908 	} else
1909 #endif
1910 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1911 			options, match_data, mctx);
1912 
1913 	while (1) {
1914 		piece = subject + last_end_offset;
1915 
1916 		if (count >= 0 && limit) {
1917 			/* Check for too many substrings condition. */
1918 			if (UNEXPECTED(count == 0)) {
1919 				php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1920 				count = num_subpats;
1921 			}
1922 
1923 matched:
1924 			offsets = pcre2_get_ovector_pointer(match_data);
1925 
1926 			if (UNEXPECTED(offsets[1] < offsets[0])) {
1927 				PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1928 				if (result) {
1929 					zend_string_release_ex(result, 0);
1930 					result = NULL;
1931 				}
1932 				break;
1933 			}
1934 
1935 			if (replace_count) {
1936 				++*replace_count;
1937 			}
1938 
1939 			/* Set the match location in subject */
1940 			match = subject + offsets[0];
1941 
1942 			new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1943 
1944 			/* Use custom function to get replacement string and its length. */
1945 			eval_result = preg_do_repl_func(
1946 				fci, fcc, subject, offsets, subpat_names, num_subpats, count,
1947 				pcre2_get_mark(match_data), flags);
1948 
1949 			ZEND_ASSERT(eval_result);
1950 			new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result), new_len);
1951 			if (new_len >= alloc_len) {
1952 				alloc_len = zend_safe_address_guarded(2, new_len, alloc_len);
1953 				if (result == NULL) {
1954 					result = zend_string_alloc(alloc_len, 0);
1955 				} else {
1956 					result = zend_string_extend(result, alloc_len, 0);
1957 				}
1958 			}
1959 
1960 			if (match-piece > 0) {
1961 				/* copy the part of the string before the match */
1962 				memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1963 				result_len += (match-piece);
1964 			}
1965 
1966 			/* If using custom function, copy result to the buffer and clean up. */
1967 			memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1968 			result_len += ZSTR_LEN(eval_result);
1969 			zend_string_release_ex(eval_result, 0);
1970 
1971 			limit--;
1972 
1973 			/* Advance to the next piece. */
1974 			start_offset = last_end_offset = offsets[1];
1975 
1976 			/* If we have matched an empty string, mimic what Perl's /g options does.
1977 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1978 			   the match again at the same point. If this fails (picked up above) we
1979 			   advance to the next character. */
1980 			if (start_offset == offsets[0]) {
1981 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1982 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1983 
1984 				piece = subject + start_offset;
1985 				if (count >= 0 && limit) {
1986 					goto matched;
1987 				} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1988 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1989 					   this is not necessarily the end. We need to advance
1990 					   the start offset, and continue. Fudge the offset values
1991 					   to achieve this, unless we're already at the end of the string. */
1992 					if (start_offset < subject_len) {
1993 						size_t unit_len = calculate_unit_length(pce, piece);
1994 						start_offset += unit_len;
1995 					} else {
1996 						goto not_matched;
1997 					}
1998 				} else {
1999 					goto error;
2000 				}
2001 			}
2002 
2003 		} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2004 not_matched:
2005 			if (!result && subject_str) {
2006 				result = zend_string_copy(subject_str);
2007 				break;
2008 			}
2009 			new_len = result_len + subject_len - last_end_offset;
2010 			if (new_len >= alloc_len) {
2011 				alloc_len = new_len; /* now we know exactly how long it is */
2012 				if (NULL != result) {
2013 					result = zend_string_realloc(result, alloc_len, 0);
2014 				} else {
2015 					result = zend_string_alloc(alloc_len, 0);
2016 				}
2017 			}
2018 			/* stick that last bit of string on our output */
2019 			memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
2020 			result_len += subject_len - last_end_offset;
2021 			ZSTR_VAL(result)[result_len] = '\0';
2022 			ZSTR_LEN(result) = result_len;
2023 			break;
2024 		} else {
2025 error:
2026 			pcre_handle_exec_error(count);
2027 			if (result) {
2028 				zend_string_release_ex(result, 0);
2029 				result = NULL;
2030 			}
2031 			break;
2032 		}
2033 #ifdef HAVE_PCRE_JIT_SUPPORT
2034 		if ((pce->preg_options & PREG_JIT)) {
2035 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2036 					PCRE2_NO_UTF_CHECK, match_data, mctx);
2037 		} else
2038 #endif
2039 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2040 				PCRE2_NO_UTF_CHECK, match_data, mctx);
2041 	}
2042 	if (match_data != mdata) {
2043 		pcre2_match_data_free(match_data);
2044 	}
2045 	mdata_used = old_mdata_used;
2046 
2047 	if (UNEXPECTED(subpat_names)) {
2048 		free_subpats_table(subpat_names, num_subpats);
2049 	}
2050 
2051 	return result;
2052 }
2053 /* }}} */
2054 
2055 /* {{{ php_pcre_replace_func
2056  */
php_pcre_replace_func(zend_string * regex,zend_string * subject_str,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)2057 static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2058 							  zend_string *subject_str,
2059 							  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2060 							  size_t limit, size_t *replace_count, zend_long flags)
2061 {
2062 	pcre_cache_entry	*pce;			    /* Compiled regular expression */
2063 	zend_string	 		*result;			/* Function result */
2064 
2065 	/* Compile regex or get it from cache. */
2066 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2067 		return NULL;
2068 	}
2069 	pce->refcount++;
2070 	result = php_pcre_replace_func_impl(
2071 		pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
2072 		limit, replace_count, flags);
2073 	pce->refcount--;
2074 
2075 	return result;
2076 }
2077 /* }}} */
2078 
2079 /* {{{ php_pcre_replace_array
2080  */
php_pcre_replace_array(HashTable * regex,zval * replace,zend_string * subject_str,size_t limit,size_t * replace_count)2081 static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend_string *subject_str, size_t limit, size_t *replace_count)
2082 {
2083 	zval		*regex_entry;
2084 	zend_string *result;
2085 	zend_string *replace_str, *tmp_replace_str;
2086 
2087 	if (Z_TYPE_P(replace) == IS_ARRAY) {
2088 		uint32_t replace_idx = 0;
2089 		HashTable *replace_ht = Z_ARRVAL_P(replace);
2090 
2091 		/* For each entry in the regex array, get the entry */
2092 		ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2093 			/* Make sure we're dealing with strings. */
2094 			zend_string *tmp_regex_str;
2095 			zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2096 			zval *zv;
2097 
2098 			/* Get current entry */
2099 			while (1) {
2100 				if (replace_idx == replace_ht->nNumUsed) {
2101 					replace_str = ZSTR_EMPTY_ALLOC();
2102 					tmp_replace_str = NULL;
2103 					break;
2104 				}
2105 				zv = &replace_ht->arData[replace_idx].val;
2106 				replace_idx++;
2107 				if (Z_TYPE_P(zv) != IS_UNDEF) {
2108 					replace_str = zval_get_tmp_string(zv, &tmp_replace_str);
2109 					break;
2110 				}
2111 			}
2112 
2113 			/* Do the actual replacement and put the result back into subject_str
2114 			   for further replacements. */
2115 			result = php_pcre_replace(regex_str,
2116 									  subject_str,
2117 									  ZSTR_VAL(subject_str),
2118 									  ZSTR_LEN(subject_str),
2119 									  replace_str,
2120 									  limit,
2121 									  replace_count);
2122 			zend_tmp_string_release(tmp_replace_str);
2123 			zend_tmp_string_release(tmp_regex_str);
2124 			zend_string_release_ex(subject_str, 0);
2125 			subject_str = result;
2126 			if (UNEXPECTED(result == NULL)) {
2127 				break;
2128 			}
2129 		} ZEND_HASH_FOREACH_END();
2130 
2131 	} else {
2132 		replace_str = Z_STR_P(replace);
2133 
2134 		/* For each entry in the regex array, get the entry */
2135 		ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2136 			/* Make sure we're dealing with strings. */
2137 			zend_string *tmp_regex_str;
2138 			zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2139 
2140 			/* Do the actual replacement and put the result back into subject_str
2141 			   for further replacements. */
2142 			result = php_pcre_replace(regex_str,
2143 									  subject_str,
2144 									  ZSTR_VAL(subject_str),
2145 									  ZSTR_LEN(subject_str),
2146 									  replace_str,
2147 									  limit,
2148 									  replace_count);
2149 			zend_tmp_string_release(tmp_regex_str);
2150 			zend_string_release_ex(subject_str, 0);
2151 			subject_str = result;
2152 
2153 			if (UNEXPECTED(result == NULL)) {
2154 				break;
2155 			}
2156 		} ZEND_HASH_FOREACH_END();
2157 	}
2158 
2159 	return subject_str;
2160 }
2161 /* }}} */
2162 
2163 /* {{{ php_replace_in_subject
2164  */
php_replace_in_subject(zval * regex,zval * replace,zval * subject,size_t limit,size_t * replace_count)2165 static zend_always_inline zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, size_t limit, size_t *replace_count)
2166 {
2167 	zend_string *result;
2168 	zend_string *subject_str = zval_get_string(subject);
2169 
2170 	if (Z_TYPE_P(regex) != IS_ARRAY) {
2171 		result = php_pcre_replace(Z_STR_P(regex),
2172 								  subject_str,
2173 								  ZSTR_VAL(subject_str),
2174 								  ZSTR_LEN(subject_str),
2175 								  Z_STR_P(replace),
2176 								  limit,
2177 								  replace_count);
2178 		zend_string_release_ex(subject_str, 0);
2179 	} else {
2180 		result = php_pcre_replace_array(Z_ARRVAL_P(regex),
2181 										replace,
2182 										subject_str,
2183 										limit,
2184 										replace_count);
2185 	}
2186 	return result;
2187 }
2188 /* }}} */
2189 
2190 /* {{{ php_replace_in_subject_func
2191  */
php_replace_in_subject_func(zval * regex,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zval * subject,size_t limit,size_t * replace_count,zend_long flags)2192 static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, size_t limit, size_t *replace_count, zend_long flags)
2193 {
2194 	zend_string *result;
2195 	zend_string	*subject_str = zval_get_string(subject);
2196 
2197 	if (Z_TYPE_P(regex) != IS_ARRAY) {
2198 		result = php_pcre_replace_func(
2199 			Z_STR_P(regex), subject_str, fci, fcc, limit, replace_count, flags);
2200 		zend_string_release_ex(subject_str, 0);
2201 		return result;
2202 	} else {
2203 		zval		*regex_entry;
2204 
2205 		/* If regex is an array */
2206 
2207 		/* For each entry in the regex array, get the entry */
2208 		ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
2209 			/* Make sure we're dealing with strings. */
2210 			zend_string *tmp_regex_str;
2211 			zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2212 
2213 			/* Do the actual replacement and put the result back into subject_str
2214 			   for further replacements. */
2215 			result = php_pcre_replace_func(
2216 				regex_str, subject_str, fci, fcc, limit, replace_count, flags);
2217 			zend_tmp_string_release(tmp_regex_str);
2218 			zend_string_release_ex(subject_str, 0);
2219 			subject_str = result;
2220 			if (UNEXPECTED(result == NULL)) {
2221 				break;
2222 			}
2223 		} ZEND_HASH_FOREACH_END();
2224 
2225 		return subject_str;
2226 	}
2227 }
2228 /* }}} */
2229 
2230 /* {{{ preg_replace_func_impl
2231  */
preg_replace_func_impl(zval * return_value,zval * regex,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zval * subject,zend_long limit_val,zend_long flags)2232 static size_t preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, zend_long limit_val, zend_long flags)
2233 {
2234 	zend_string	*result;
2235 	size_t replace_count = 0;
2236 
2237 	if (Z_TYPE_P(regex) != IS_ARRAY) {
2238 		convert_to_string_ex(regex);
2239 	}
2240 
2241 	if (Z_TYPE_P(subject) != IS_ARRAY) {
2242 		result = php_replace_in_subject_func(
2243 			regex, fci, fcc, subject, limit_val, &replace_count, flags);
2244 		if (result != NULL) {
2245 			RETVAL_STR(result);
2246 		} else {
2247 			RETVAL_NULL();
2248 		}
2249 	} else {
2250 		/* if subject is an array */
2251 		zval		*subject_entry, zv;
2252 		zend_string	*string_key;
2253 		zend_ulong	 num_key;
2254 
2255 		array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
2256 
2257 		/* For each subject entry, convert it to string, then perform replacement
2258 		   and add the result to the return_value array. */
2259 		ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
2260 			result = php_replace_in_subject_func(
2261 				regex, fci, fcc, subject_entry, limit_val, &replace_count, flags);
2262 			if (result != NULL) {
2263 				/* Add to return array */
2264 				ZVAL_STR(&zv, result);
2265 				if (string_key) {
2266 					zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2267 				} else {
2268 					zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2269 				}
2270 			}
2271 		} ZEND_HASH_FOREACH_END();
2272 	}
2273 
2274 	return replace_count;
2275 }
2276 /* }}} */
2277 
2278 /* {{{ preg_replace_common
2279  */
preg_replace_common(INTERNAL_FUNCTION_PARAMETERS,int is_filter)2280 static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, int is_filter)
2281 {
2282 	zval *regex, *replace, *subject, *zcount = NULL;
2283 	zend_long limit = -1;
2284 	size_t replace_count = 0;
2285 	zend_string	*result;
2286 	size_t old_replace_count;
2287 
2288 	/* Get function parameters and do error-checking. */
2289 	ZEND_PARSE_PARAMETERS_START(3, 5)
2290 		Z_PARAM_ZVAL(regex)
2291 		Z_PARAM_ZVAL(replace)
2292 		Z_PARAM_ZVAL(subject)
2293 		Z_PARAM_OPTIONAL
2294 		Z_PARAM_LONG(limit)
2295 		Z_PARAM_ZVAL(zcount)
2296 	ZEND_PARSE_PARAMETERS_END();
2297 
2298 	if (Z_TYPE_P(replace) != IS_ARRAY) {
2299 		convert_to_string_ex(replace);
2300 		if (Z_TYPE_P(regex) != IS_ARRAY) {
2301 			convert_to_string_ex(regex);
2302 		}
2303 	} else {
2304 		if (Z_TYPE_P(regex) != IS_ARRAY) {
2305 			zend_argument_type_error(1, "must be of type array when argument #2 ($replace) is an array, %s given", zend_zval_type_name(regex));
2306 			RETURN_THROWS();
2307 		}
2308 	}
2309 
2310 	if (Z_TYPE_P(subject) != IS_ARRAY) {
2311 		old_replace_count = replace_count;
2312 		result = php_replace_in_subject(regex,
2313 										replace,
2314 										subject,
2315 										limit,
2316 										&replace_count);
2317 		if (result != NULL) {
2318 			if (!is_filter || replace_count > old_replace_count) {
2319 				RETVAL_STR(result);
2320 			} else {
2321 				zend_string_release_ex(result, 0);
2322 				RETVAL_NULL();
2323 			}
2324 		} else {
2325 			RETVAL_NULL();
2326 		}
2327 	} else {
2328 		/* if subject is an array */
2329 		zval		*subject_entry, zv;
2330 		zend_string	*string_key;
2331 		zend_ulong	 num_key;
2332 
2333 		array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
2334 
2335 		/* For each subject entry, convert it to string, then perform replacement
2336 		   and add the result to the return_value array. */
2337 		ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
2338 			old_replace_count = replace_count;
2339 			result = php_replace_in_subject(regex,
2340 											replace,
2341 											subject_entry,
2342 											limit,
2343 											&replace_count);
2344 			if (result != NULL) {
2345 				if (!is_filter || replace_count > old_replace_count) {
2346 					/* Add to return array */
2347 					ZVAL_STR(&zv, result);
2348 					if (string_key) {
2349 						zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2350 					} else {
2351 						zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2352 					}
2353 				} else {
2354 					zend_string_release_ex(result, 0);
2355 				}
2356 			}
2357 		} ZEND_HASH_FOREACH_END();
2358 	}
2359 
2360 	if (zcount) {
2361 		ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2362 	}
2363 }
2364 /* }}} */
2365 
2366 /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
2367    Perform Perl-style regular expression replacement. */
PHP_FUNCTION(preg_replace)2368 PHP_FUNCTION(preg_replace)
2369 {
2370 	preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
2371 }
2372 /* }}} */
2373 
2374 /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
2375    Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback)2376 PHP_FUNCTION(preg_replace_callback)
2377 {
2378 	zval *regex, *replace, *subject, *zcount = NULL;
2379 	zend_long limit = -1, flags = 0;
2380 	size_t replace_count;
2381 	zend_fcall_info fci;
2382 	zend_fcall_info_cache fcc;
2383 
2384 	/* Get function parameters and do error-checking. */
2385 	ZEND_PARSE_PARAMETERS_START(3, 6)
2386 		Z_PARAM_ZVAL(regex)
2387 		Z_PARAM_ZVAL(replace)
2388 		Z_PARAM_ZVAL(subject)
2389 		Z_PARAM_OPTIONAL
2390 		Z_PARAM_LONG(limit)
2391 		Z_PARAM_ZVAL(zcount)
2392 		Z_PARAM_LONG(flags)
2393 	ZEND_PARSE_PARAMETERS_END();
2394 
2395 	if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2396 		zend_string	*callback_name = zend_get_callable_name(replace);
2397 		php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", ZSTR_VAL(callback_name));
2398 		zend_string_release_ex(callback_name, 0);
2399 		ZVAL_STR(return_value, zval_get_string(subject));
2400 		return;
2401 	}
2402 
2403 	fci.size = sizeof(fci);
2404 	fci.object = NULL;
2405 	ZVAL_COPY_VALUE(&fci.function_name, replace);
2406 
2407 	replace_count = preg_replace_func_impl(return_value, regex, &fci, &fcc, subject, limit, flags);
2408 	if (zcount) {
2409 		ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2410 	}
2411 }
2412 /* }}} */
2413 
2414 /* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]])
2415    Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback_array)2416 PHP_FUNCTION(preg_replace_callback_array)
2417 {
2418 	zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
2419 	zend_long limit = -1, flags = 0;
2420 	zend_string *str_idx;
2421 	size_t replace_count = 0;
2422 	zend_fcall_info fci;
2423 	zend_fcall_info_cache fcc;
2424 
2425 	/* Get function parameters and do error-checking. */
2426 	ZEND_PARSE_PARAMETERS_START(2, 5)
2427 		Z_PARAM_ARRAY(pattern)
2428 		Z_PARAM_ZVAL(subject)
2429 		Z_PARAM_OPTIONAL
2430 		Z_PARAM_LONG(limit)
2431 		Z_PARAM_ZVAL(zcount)
2432 		Z_PARAM_LONG(flags)
2433 	ZEND_PARSE_PARAMETERS_END();
2434 
2435 	fci.size = sizeof(fci);
2436 	fci.object = NULL;
2437 
2438 	ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(pattern), str_idx, replace) {
2439 		if (str_idx) {
2440 			ZVAL_STR_COPY(&regex, str_idx);
2441 		} else {
2442 			php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
2443 			zval_ptr_dtor(return_value);
2444 			RETURN_NULL();
2445 		}
2446 
2447 		if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2448 			zend_string *callback_name = zend_get_callable_name(replace);
2449 			php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", ZSTR_VAL(callback_name));
2450 			zend_string_release_ex(callback_name, 0);
2451 			zval_ptr_dtor(&regex);
2452 			zval_ptr_dtor(return_value);
2453 			ZVAL_COPY(return_value, subject);
2454 			return;
2455 		}
2456 
2457 		ZVAL_COPY_VALUE(&fci.function_name, replace);
2458 
2459 		replace_count += preg_replace_func_impl(&zv, &regex, &fci, &fcc, subject, limit, flags);
2460 		if (subject != return_value) {
2461 			subject = return_value;
2462 		} else {
2463 			zval_ptr_dtor(return_value);
2464 		}
2465 
2466 		zval_ptr_dtor(&regex);
2467 
2468 		ZVAL_COPY_VALUE(return_value, &zv);
2469 
2470 		if (UNEXPECTED(EG(exception))) {
2471 			zval_ptr_dtor(return_value);
2472 			RETURN_NULL();
2473 		}
2474 	} ZEND_HASH_FOREACH_END();
2475 
2476 	if (zcount) {
2477 		ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2478 	}
2479 }
2480 /* }}} */
2481 
2482 /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
2483    Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTION(preg_filter)2484 PHP_FUNCTION(preg_filter)
2485 {
2486 	preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
2487 }
2488 /* }}} */
2489 
2490 /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
2491    Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)2492 PHP_FUNCTION(preg_split)
2493 {
2494 	zend_string			*regex;			/* Regular expression */
2495 	zend_string			*subject;		/* String to match against */
2496 	zend_long			 limit_val = -1;/* Integer value of limit */
2497 	zend_long			 flags = 0;		/* Match control flags */
2498 	pcre_cache_entry	*pce;			/* Compiled regular expression */
2499 
2500 	/* Get function parameters and do error checking */
2501 	ZEND_PARSE_PARAMETERS_START(2, 4)
2502 		Z_PARAM_STR(regex)
2503 		Z_PARAM_STR(subject)
2504 		Z_PARAM_OPTIONAL
2505 		Z_PARAM_LONG(limit_val)
2506 		Z_PARAM_LONG(flags)
2507 	ZEND_PARSE_PARAMETERS_END();
2508 
2509 	/* Compile regex or get it from cache. */
2510 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2511 		RETURN_FALSE;
2512 	}
2513 
2514 	pce->refcount++;
2515 	php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2516 	pce->refcount--;
2517 }
2518 /* }}} */
2519 
2520 /* {{{ php_pcre_split
2521  */
php_pcre_split_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zend_long limit_val,zend_long flags)2522 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2523 	zend_long limit_val, zend_long flags)
2524 {
2525 	PCRE2_SIZE		*offsets;			/* Array of subpattern offsets */
2526 	uint32_t		 options;			/* Execution options */
2527 	int				 count;				/* Count of matched subpatterns */
2528 	PCRE2_SIZE		 start_offset;		/* Where the new search starts */
2529 	PCRE2_SIZE		 last_match_offset;	/* Location of last match */
2530 	uint32_t		 no_empty;			/* If NO_EMPTY flag is set */
2531 	uint32_t		 delim_capture; 	/* If delimiters should be captured */
2532 	uint32_t		 offset_capture;	/* If offsets should be captured */
2533 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
2534 	zval			 tmp;
2535 	pcre2_match_data *match_data;
2536 	char *subject = ZSTR_VAL(subject_str);
2537 
2538 	no_empty = flags & PREG_SPLIT_NO_EMPTY;
2539 	delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2540 	offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2541 
2542 	/* Initialize return value */
2543 	array_init(return_value);
2544 
2545 	/* Calculate the size of the offsets array, and allocate memory for it. */
2546 	num_subpats = pce->capture_count + 1;
2547 
2548 	/* Start at the beginning of the string */
2549 	start_offset = 0;
2550 	last_match_offset = 0;
2551 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2552 
2553 	if (limit_val == -1) {
2554 		/* pass */
2555 	} else if (limit_val == 0) {
2556 		limit_val = -1;
2557 	} else if (limit_val <= 1) {
2558 		goto last;
2559 	}
2560 
2561 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2562 		match_data = mdata;
2563 	} else {
2564 		match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
2565 		if (!match_data) {
2566 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2567 			zval_ptr_dtor(return_value);
2568 			RETURN_FALSE;
2569 		}
2570 	}
2571 
2572 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2573 
2574 #ifdef HAVE_PCRE_JIT_SUPPORT
2575 	if ((pce->preg_options & PREG_JIT) && options) {
2576 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2577 				PCRE2_NO_UTF_CHECK, match_data, mctx);
2578 	} else
2579 #endif
2580 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2581 			options, match_data, mctx);
2582 
2583 	while (1) {
2584 		/* If something matched */
2585 		if (count >= 0) {
2586 			/* Check for too many substrings condition. */
2587 			if (UNEXPECTED(count == 0)) {
2588 				php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2589 				count = num_subpats;
2590 			}
2591 
2592 matched:
2593 			offsets = pcre2_get_ovector_pointer(match_data);
2594 
2595 			if (UNEXPECTED(offsets[1] < offsets[0])) {
2596 				PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2597 				break;
2598 			}
2599 
2600 			if (!no_empty || offsets[0] != last_match_offset) {
2601 				if (offset_capture) {
2602 					/* Add (match, offset) pair to the return value */
2603 					add_offset_pair(
2604 						return_value, subject, last_match_offset, offsets[0],
2605 						NULL, 0);
2606 				} else {
2607 					/* Add the piece to the return value */
2608 					populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2609 					zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2610 				}
2611 
2612 				/* One less left to do */
2613 				if (limit_val != -1)
2614 					limit_val--;
2615 			}
2616 
2617 			if (delim_capture) {
2618 				size_t i;
2619 				for (i = 1; i < count; i++) {
2620 					/* If we have matched a delimiter */
2621 					if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2622 						if (offset_capture) {
2623 							add_offset_pair(
2624 								return_value, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2625 						} else {
2626 							populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2627 							zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2628 						}
2629 					}
2630 				}
2631 			}
2632 
2633 			/* Advance to the position right after the last full match */
2634 			start_offset = last_match_offset = offsets[1];
2635 
2636 			/* If we have matched an empty string, mimic what Perl's /g options does.
2637 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2638 			   the match again at the same point. If this fails (picked up above) we
2639 			   advance to the next character. */
2640 			if (start_offset == offsets[0]) {
2641 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2642 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2643 				if (count >= 0) {
2644 					goto matched;
2645 				} else if (count == PCRE2_ERROR_NOMATCH) {
2646 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2647 					   this is not necessarily the end. We need to advance
2648 					   the start offset, and continue. Fudge the offset values
2649 					   to achieve this, unless we're already at the end of the string. */
2650 					if (start_offset < ZSTR_LEN(subject_str)) {
2651 						start_offset += calculate_unit_length(pce, subject + start_offset);
2652 					} else {
2653 						break;
2654 					}
2655 				} else {
2656 					goto error;
2657 				}
2658 			}
2659 
2660 		} else if (count == PCRE2_ERROR_NOMATCH) {
2661 			break;
2662 		} else {
2663 error:
2664 			pcre_handle_exec_error(count);
2665 			break;
2666 		}
2667 
2668 		/* Get next piece if no limit or limit not yet reached and something matched*/
2669 		if (limit_val != -1 && limit_val <= 1) {
2670 			break;
2671 		}
2672 
2673 #ifdef HAVE_PCRE_JIT_SUPPORT
2674 		if (pce->preg_options & PREG_JIT) {
2675 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2676 					PCRE2_NO_UTF_CHECK, match_data, mctx);
2677 		} else
2678 #endif
2679 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2680 				PCRE2_NO_UTF_CHECK, match_data, mctx);
2681 	}
2682 	if (match_data != mdata) {
2683 		pcre2_match_data_free(match_data);
2684 	}
2685 
2686 	if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2687 		zval_ptr_dtor(return_value);
2688 		RETURN_FALSE;
2689 	}
2690 
2691 last:
2692 	start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2693 
2694 	if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2695 		if (offset_capture) {
2696 			/* Add the last (match, offset) pair to the return value */
2697 			add_offset_pair(return_value, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2698 		} else {
2699 			/* Add the last piece to the return value */
2700 			if (start_offset == 0) {
2701 				ZVAL_STR_COPY(&tmp, subject_str);
2702 			} else {
2703 				populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2704 			}
2705 			zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2706 		}
2707 	}
2708 }
2709 /* }}} */
2710 
2711 /* {{{ proto string preg_quote(string str [, string delim_char])
2712    Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)2713 PHP_FUNCTION(preg_quote)
2714 {
2715 	zend_string *str;       		/* Input string argument */
2716 	zend_string	*delim = NULL;		/* Additional delimiter argument */
2717 	char		*in_str;			/* Input string */
2718 	char		*in_str_end;    	/* End of the input string */
2719 	zend_string	*out_str;			/* Output string with quoted characters */
2720 	size_t       extra_len;         /* Number of additional characters */
2721 	char 		*p,					/* Iterator for input string */
2722 				*q,					/* Iterator for output string */
2723 				 delim_char = '\0',	/* Delimiter character to be quoted */
2724 				 c;					/* Current character */
2725 
2726 	/* Get the arguments and check for errors */
2727 	ZEND_PARSE_PARAMETERS_START(1, 2)
2728 		Z_PARAM_STR(str)
2729 		Z_PARAM_OPTIONAL
2730 		Z_PARAM_STR_EX(delim, 1, 0)
2731 	ZEND_PARSE_PARAMETERS_END();
2732 
2733 	/* Nothing to do if we got an empty string */
2734 	if (ZSTR_LEN(str) == 0) {
2735 		RETURN_EMPTY_STRING();
2736 	}
2737 
2738 	in_str = ZSTR_VAL(str);
2739 	in_str_end = in_str + ZSTR_LEN(str);
2740 
2741 	if (delim) {
2742 		delim_char = ZSTR_VAL(delim)[0];
2743 	}
2744 
2745 	/* Go through the string and quote necessary characters */
2746 	extra_len = 0;
2747 	p = in_str;
2748 	do {
2749 		c = *p;
2750 		switch(c) {
2751 			case '.':
2752 			case '\\':
2753 			case '+':
2754 			case '*':
2755 			case '?':
2756 			case '[':
2757 			case '^':
2758 			case ']':
2759 			case '$':
2760 			case '(':
2761 			case ')':
2762 			case '{':
2763 			case '}':
2764 			case '=':
2765 			case '!':
2766 			case '>':
2767 			case '<':
2768 			case '|':
2769 			case ':':
2770 			case '-':
2771 			case '#':
2772 				extra_len++;
2773 				break;
2774 
2775 			case '\0':
2776 				extra_len+=3;
2777 				break;
2778 
2779 			default:
2780 				if (c == delim_char) {
2781 					extra_len++;
2782 				}
2783 				break;
2784 		}
2785 		p++;
2786 	} while (p != in_str_end);
2787 
2788 	if (extra_len == 0) {
2789 		RETURN_STR_COPY(str);
2790 	}
2791 
2792 	/* Allocate enough memory so that even if each character
2793 	   is quoted, we won't run out of room */
2794 	out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2795 	q = ZSTR_VAL(out_str);
2796 	p = in_str;
2797 
2798 	do {
2799 		c = *p;
2800 		switch(c) {
2801 			case '.':
2802 			case '\\':
2803 			case '+':
2804 			case '*':
2805 			case '?':
2806 			case '[':
2807 			case '^':
2808 			case ']':
2809 			case '$':
2810 			case '(':
2811 			case ')':
2812 			case '{':
2813 			case '}':
2814 			case '=':
2815 			case '!':
2816 			case '>':
2817 			case '<':
2818 			case '|':
2819 			case ':':
2820 			case '-':
2821 			case '#':
2822 				*q++ = '\\';
2823 				*q++ = c;
2824 				break;
2825 
2826 			case '\0':
2827 				*q++ = '\\';
2828 				*q++ = '0';
2829 				*q++ = '0';
2830 				*q++ = '0';
2831 				break;
2832 
2833 			default:
2834 				if (c == delim_char) {
2835 					*q++ = '\\';
2836 				}
2837 				*q++ = c;
2838 				break;
2839 		}
2840 		p++;
2841 	} while (p != in_str_end);
2842 	*q = '\0';
2843 
2844 	RETURN_NEW_STR(out_str);
2845 }
2846 /* }}} */
2847 
2848 /* {{{ proto array preg_grep(string regex, array input [, int flags])
2849    Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)2850 PHP_FUNCTION(preg_grep)
2851 {
2852 	zend_string			*regex;			/* Regular expression */
2853 	zval				*input;			/* Input array */
2854 	zend_long			 flags = 0;		/* Match control flags */
2855 	pcre_cache_entry	*pce;			/* Compiled regular expression */
2856 
2857 	/* Get arguments and do error checking */
2858 	ZEND_PARSE_PARAMETERS_START(2, 3)
2859 		Z_PARAM_STR(regex)
2860 		Z_PARAM_ARRAY(input)
2861 		Z_PARAM_OPTIONAL
2862 		Z_PARAM_LONG(flags)
2863 	ZEND_PARSE_PARAMETERS_END();
2864 
2865 	/* Compile regex or get it from cache. */
2866 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2867 		RETURN_FALSE;
2868 	}
2869 
2870 	pce->refcount++;
2871 	php_pcre_grep_impl(pce, input, return_value, flags);
2872 	pce->refcount--;
2873 }
2874 /* }}} */
2875 
php_pcre_grep_impl(pcre_cache_entry * pce,zval * input,zval * return_value,zend_long flags)2876 PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2877 {
2878 	zval            *entry;             /* An entry in the input array */
2879 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
2880 	int				 count;				/* Count of matched subpatterns */
2881 	uint32_t		 options;			/* Execution options */
2882 	zend_string		*string_key;
2883 	zend_ulong		 num_key;
2884 	zend_bool		 invert;			/* Whether to return non-matching
2885 										   entries */
2886 	pcre2_match_data *match_data;
2887 	invert = flags & PREG_GREP_INVERT ? 1 : 0;
2888 
2889 	/* Calculate the size of the offsets array, and allocate memory for it. */
2890 	num_subpats = pce->capture_count + 1;
2891 
2892 	/* Initialize return array */
2893 	array_init(return_value);
2894 
2895 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2896 
2897 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2898 		match_data = mdata;
2899 	} else {
2900 		match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
2901 		if (!match_data) {
2902 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2903 			return;
2904 		}
2905 	}
2906 
2907 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2908 
2909 	/* Go through the input array */
2910 	ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2911 		zend_string *tmp_subject_str;
2912 		zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2913 
2914 		/* Perform the match */
2915 #ifdef HAVE_PCRE_JIT_SUPPORT
2916 		if ((pce->preg_options & PREG_JIT) && options) {
2917 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2918 					PCRE2_NO_UTF_CHECK, match_data, mctx);
2919 		} else
2920 #endif
2921 		count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2922 				options, match_data, mctx);
2923 
2924 		/* If the entry fits our requirements */
2925 		if (count >= 0) {
2926 			/* Check for too many substrings condition. */
2927 			if (UNEXPECTED(count == 0)) {
2928 				php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2929 			}
2930 			if (!invert) {
2931 				Z_TRY_ADDREF_P(entry);
2932 
2933 				/* Add to return array */
2934 				if (string_key) {
2935 					zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2936 				} else {
2937 					zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2938 				}
2939 			}
2940 		} else if (count == PCRE2_ERROR_NOMATCH) {
2941 			if (invert) {
2942 				Z_TRY_ADDREF_P(entry);
2943 
2944 				/* Add to return array */
2945 				if (string_key) {
2946 					zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2947 				} else {
2948 					zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2949 				}
2950 			}
2951 		} else {
2952 			pcre_handle_exec_error(count);
2953 			zend_tmp_string_release(tmp_subject_str);
2954 			break;
2955 		}
2956 
2957 		zend_tmp_string_release(tmp_subject_str);
2958 	} ZEND_HASH_FOREACH_END();
2959 	if (match_data != mdata) {
2960 		pcre2_match_data_free(match_data);
2961 	}
2962 }
2963 /* }}} */
2964 
2965 /* {{{ proto int preg_last_error()
2966    Returns the error code of the last regexp execution. */
PHP_FUNCTION(preg_last_error)2967 PHP_FUNCTION(preg_last_error)
2968 {
2969 	ZEND_PARSE_PARAMETERS_NONE();
2970 
2971 	RETURN_LONG(PCRE_G(error_code));
2972 }
2973 /* }}} */
2974 
2975 /* {{{ proto string preg_last_error_msg()
2976    Returns the error message of the last regexp execution. */
PHP_FUNCTION(preg_last_error_msg)2977 PHP_FUNCTION(preg_last_error_msg)
2978 {
2979     ZEND_PARSE_PARAMETERS_NONE();
2980 
2981     RETURN_STRING(php_pcre_get_error_msg(PCRE_G(error_code)));
2982 }
2983 /* }}} */
2984 
2985 /* {{{ module definition structures */
2986 
2987 zend_module_entry pcre_module_entry = {
2988 	STANDARD_MODULE_HEADER,
2989    "pcre",
2990 	ext_functions,
2991 	PHP_MINIT(pcre),
2992 	PHP_MSHUTDOWN(pcre),
2993 	PHP_RINIT(pcre),
2994 	PHP_RSHUTDOWN(pcre),
2995 	PHP_MINFO(pcre),
2996 	PHP_PCRE_VERSION,
2997 	PHP_MODULE_GLOBALS(pcre),
2998 	PHP_GINIT(pcre),
2999 	PHP_GSHUTDOWN(pcre),
3000 	NULL,
3001 	STANDARD_MODULE_PROPERTIES_EX
3002 };
3003 
3004 #ifdef COMPILE_DL_PCRE
ZEND_GET_MODULE(pcre)3005 ZEND_GET_MODULE(pcre)
3006 #endif
3007 
3008 /* }}} */
3009 
3010 PHPAPI pcre2_match_context *php_pcre_mctx(void)
3011 {/*{{{*/
3012 	return mctx;
3013 }/*}}}*/
3014 
php_pcre_gctx(void)3015 PHPAPI pcre2_general_context *php_pcre_gctx(void)
3016 {/*{{{*/
3017 	return gctx;
3018 }/*}}}*/
3019 
php_pcre_cctx(void)3020 PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3021 {/*{{{*/
3022 	return cctx;
3023 }/*}}}*/
3024 
php_pcre_pce_incref(pcre_cache_entry * pce)3025 PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3026 {/*{{{*/
3027 	assert(NULL != pce);
3028 	pce->refcount++;
3029 }/*}}}*/
3030 
php_pcre_pce_decref(pcre_cache_entry * pce)3031 PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3032 {/*{{{*/
3033 	assert(NULL != pce);
3034 	assert(0 != pce->refcount);
3035 	pce->refcount--;
3036 }/*}}}*/
3037 
php_pcre_pce_re(pcre_cache_entry * pce)3038 PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3039 {/*{{{*/
3040 	assert(NULL != pce);
3041 	return pce->re;
3042 }/*}}}*/
3043