xref: /PHP-8.0/ext/pcre/php_pcre.c (revision 712fc54e)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Author: Andrei Zmievski <andrei@php.net>                             |
14    +----------------------------------------------------------------------+
15  */
16 
17 #include "php.h"
18 #include "php_ini.h"
19 #include "php_globals.h"
20 #include "php_pcre.h"
21 #include "php_pcre_arginfo.h"
22 #include "ext/standard/info.h"
23 #include "ext/standard/basic_functions.h"
24 #include "zend_smart_str.h"
25 #include "SAPI.h"
26 
27 #include "ext/standard/php_string.h"
28 
29 #define PREG_PATTERN_ORDER			1
30 #define PREG_SET_ORDER				2
31 #define PREG_OFFSET_CAPTURE			(1<<8)
32 #define PREG_UNMATCHED_AS_NULL		(1<<9)
33 
34 #define	PREG_SPLIT_NO_EMPTY			(1<<0)
35 #define PREG_SPLIT_DELIM_CAPTURE	(1<<1)
36 #define PREG_SPLIT_OFFSET_CAPTURE	(1<<2)
37 
38 #define PREG_REPLACE_EVAL			(1<<0)
39 
40 #define PREG_GREP_INVERT			(1<<0)
41 
42 #define PREG_JIT                    (1<<3)
43 
44 #define PCRE_CACHE_SIZE 4096
45 
46 struct _pcre_cache_entry {
47 	pcre2_code *re;
48 	uint32_t preg_options;
49 	uint32_t capture_count;
50 	uint32_t name_count;
51 	uint32_t compile_options;
52 	uint32_t refcount;
53 };
54 
55 PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
56 
57 #ifdef HAVE_PCRE_JIT_SUPPORT
58 #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
59 #define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
60 ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
61 #endif
62 /* General context using (infallible) system allocator. */
63 ZEND_TLS pcre2_general_context *gctx = NULL;
64 /* These two are global per thread for now. Though it is possible to use these
65  	per pattern. Either one can copy it and use in pce, or one does no global
66 	contexts at all, but creates for every pce. */
67 ZEND_TLS pcre2_compile_context *cctx = NULL;
68 ZEND_TLS pcre2_match_context   *mctx = NULL;
69 ZEND_TLS pcre2_match_data      *mdata = NULL;
70 ZEND_TLS zend_bool              mdata_used = 0;
71 ZEND_TLS uint8_t pcre2_init_ok = 0;
72 #if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
73 static MUTEX_T pcre_mt = NULL;
74 #define php_pcre_mutex_alloc() \
75 	if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
76 #define php_pcre_mutex_free() \
77 	if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
78 #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
79 #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
80 #else
81 #define php_pcre_mutex_alloc()
82 #define php_pcre_mutex_free()
83 #define php_pcre_mutex_lock()
84 #define php_pcre_mutex_unlock()
85 #endif
86 
87 ZEND_TLS HashTable char_tables;
88 
php_pcre_free_char_table(zval * data)89 static void php_pcre_free_char_table(zval *data)
90 {/*{{{*/
91 	void *ptr = Z_PTR_P(data);
92 	pefree(ptr, 1);
93 }/*}}}*/
94 
pcre_handle_exec_error(int pcre_code)95 static void pcre_handle_exec_error(int pcre_code) /* {{{ */
96 {
97 	int preg_code = 0;
98 
99 	switch (pcre_code) {
100 		case PCRE2_ERROR_MATCHLIMIT:
101 			preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
102 			break;
103 
104 		case PCRE2_ERROR_RECURSIONLIMIT:
105 			preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
106 			break;
107 
108 		case PCRE2_ERROR_BADUTFOFFSET:
109 			preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
110 			break;
111 
112 #ifdef HAVE_PCRE_JIT_SUPPORT
113 		case PCRE2_ERROR_JIT_STACKLIMIT:
114 			preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
115 			break;
116 #endif
117 
118 		default:
119 			if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
120 				preg_code = PHP_PCRE_BAD_UTF8_ERROR;
121 			} else  {
122 				preg_code = PHP_PCRE_INTERNAL_ERROR;
123 			}
124 			break;
125 	}
126 
127 	PCRE_G(error_code) = preg_code;
128 }
129 /* }}} */
130 
php_pcre_get_error_msg(php_pcre_error_code error_code)131 static const char *php_pcre_get_error_msg(php_pcre_error_code error_code) /* {{{ */
132 {
133     switch (error_code) {
134         case PHP_PCRE_NO_ERROR:
135             return "No error";
136         case PHP_PCRE_INTERNAL_ERROR:
137             return "Internal error";
138         case PHP_PCRE_BAD_UTF8_ERROR:
139             return "Malformed UTF-8 characters, possibly incorrectly encoded";
140         case PHP_PCRE_BAD_UTF8_OFFSET_ERROR:
141             return "The offset did not correspond to the beginning of a valid UTF-8 code point";
142         case PHP_PCRE_BACKTRACK_LIMIT_ERROR:
143             return "Backtrack limit exhausted";
144         case PHP_PCRE_RECURSION_LIMIT_ERROR:
145             return "Recursion limit exhausted";
146 
147 #ifdef HAVE_PCRE_JIT_SUPPORT
148         case PHP_PCRE_JIT_STACKLIMIT_ERROR:
149             return "JIT stack limit exhausted";
150 #endif
151 
152         default:
153             return "Unknown error";
154     }
155 }
156 /* }}} */
157 
php_free_pcre_cache(zval * data)158 static void php_free_pcre_cache(zval *data) /* {{{ */
159 {
160 	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
161 	if (!pce) return;
162 	pcre2_code_free(pce->re);
163 	free(pce);
164 }
165 /* }}} */
166 
php_efree_pcre_cache(zval * data)167 static void php_efree_pcre_cache(zval *data) /* {{{ */
168 {
169 	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
170 	if (!pce) return;
171 	pcre2_code_free(pce->re);
172 	efree(pce);
173 }
174 /* }}} */
175 
php_pcre_malloc(PCRE2_SIZE size,void * data)176 static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
177 {
178 	return pemalloc(size, 1);
179 }
180 
php_pcre_free(void * block,void * data)181 static void php_pcre_free(void *block, void *data)
182 {
183 	pefree(block, 1);
184 }
185 
php_pcre_emalloc(PCRE2_SIZE size,void * data)186 static void *php_pcre_emalloc(PCRE2_SIZE size, void *data)
187 {
188 	return emalloc(size);
189 }
190 
php_pcre_efree(void * block,void * data)191 static void php_pcre_efree(void *block, void *data)
192 {
193 	efree(block);
194 }
195 
196 #ifdef PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
197 	/* pcre 10.38 needs PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, disabled by default */
198 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
199 #else
200 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS 0
201 #endif
202 
203 #define PHP_PCRE_PREALLOC_MDATA_SIZE 32
204 
php_pcre_init_pcre2(uint8_t jit)205 static void php_pcre_init_pcre2(uint8_t jit)
206 {/*{{{*/
207 	if (!gctx) {
208 		gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
209 		if (!gctx) {
210 			pcre2_init_ok = 0;
211 			return;
212 		}
213 	}
214 
215 	if (!cctx) {
216 		cctx = pcre2_compile_context_create(gctx);
217 		if (!cctx) {
218 			pcre2_init_ok = 0;
219 			return;
220 		}
221 	}
222 
223 	pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
224 
225 	if (!mctx) {
226 		mctx = pcre2_match_context_create(gctx);
227 		if (!mctx) {
228 			pcre2_init_ok = 0;
229 			return;
230 		}
231 	}
232 
233 #ifdef HAVE_PCRE_JIT_SUPPORT
234 	if (jit && !jit_stack) {
235 		jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
236 		if (!jit_stack) {
237 			pcre2_init_ok = 0;
238 			return;
239 		}
240 	}
241 #endif
242 
243 	if (!mdata) {
244 		mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
245 		if (!mdata) {
246 			pcre2_init_ok = 0;
247 			return;
248 		}
249 	}
250 
251 	pcre2_init_ok = 1;
252 }/*}}}*/
253 
php_pcre_shutdown_pcre2(void)254 static void php_pcre_shutdown_pcre2(void)
255 {/*{{{*/
256 	if (gctx) {
257 		pcre2_general_context_free(gctx);
258 		gctx = NULL;
259 	}
260 
261 	if (cctx) {
262 		pcre2_compile_context_free(cctx);
263 		cctx = NULL;
264 	}
265 
266 	if (mctx) {
267 		pcre2_match_context_free(mctx);
268 		mctx = NULL;
269 	}
270 
271 #ifdef HAVE_PCRE_JIT_SUPPORT
272 	/* Stack may only be destroyed when no cached patterns
273 	 	possibly associated with it do exist. */
274 	if (jit_stack) {
275 		pcre2_jit_stack_free(jit_stack);
276 		jit_stack = NULL;
277 	}
278 #endif
279 
280 	if (mdata) {
281 		pcre2_match_data_free(mdata);
282 		mdata = NULL;
283 	}
284 
285 	pcre2_init_ok = 0;
286 }/*}}}*/
287 
PHP_GINIT_FUNCTION(pcre)288 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
289 {
290 	php_pcre_mutex_alloc();
291 
292 	/* If we're on the CLI SAPI, there will only be one request, so we don't need the
293 	 * cache to survive after RSHUTDOWN. */
294 	pcre_globals->per_request_cache = strcmp(sapi_module.name, "cli") == 0;
295 	if (!pcre_globals->per_request_cache) {
296 		zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
297 	}
298 
299 	pcre_globals->backtrack_limit = 0;
300 	pcre_globals->recursion_limit = 0;
301 	pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
302 	ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
303 	ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
304 #ifdef HAVE_PCRE_JIT_SUPPORT
305 	pcre_globals->jit = 1;
306 #endif
307 
308 	php_pcre_init_pcre2(1);
309 	zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
310 }
311 /* }}} */
312 
PHP_GSHUTDOWN_FUNCTION(pcre)313 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
314 {
315 	if (!pcre_globals->per_request_cache) {
316 		zend_hash_destroy(&pcre_globals->pcre_cache);
317 	}
318 
319 	php_pcre_shutdown_pcre2();
320 	zend_hash_destroy(&char_tables);
321 	php_pcre_mutex_free();
322 }
323 /* }}} */
324 
PHP_INI_MH(OnUpdateBacktrackLimit)325 static PHP_INI_MH(OnUpdateBacktrackLimit)
326 {/*{{{*/
327 	OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
328 	if (mctx) {
329 		pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
330 	}
331 
332 	return SUCCESS;
333 }/*}}}*/
334 
PHP_INI_MH(OnUpdateRecursionLimit)335 static PHP_INI_MH(OnUpdateRecursionLimit)
336 {/*{{{*/
337 	OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
338 	if (mctx) {
339 		pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
340 	}
341 
342 	return SUCCESS;
343 }/*}}}*/
344 
345 #ifdef HAVE_PCRE_JIT_SUPPORT
PHP_INI_MH(OnUpdateJit)346 static PHP_INI_MH(OnUpdateJit)
347 {/*{{{*/
348 	OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
349 	if (PCRE_G(jit) && jit_stack) {
350 		pcre2_jit_stack_assign(mctx, NULL, jit_stack);
351 	} else {
352 		pcre2_jit_stack_assign(mctx, NULL, NULL);
353 	}
354 
355 	return SUCCESS;
356 }/*}}}*/
357 #endif
358 
359 PHP_INI_BEGIN()
360 	STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
361 	STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
362 #ifdef HAVE_PCRE_JIT_SUPPORT
363 	STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateJit, jit,             zend_pcre_globals, pcre_globals)
364 #endif
PHP_INI_END()365 PHP_INI_END()
366 
367 static char *_pcre2_config_str(uint32_t what)
368 {/*{{{*/
369 	int len = pcre2_config(what, NULL);
370 	char *ret = (char *) malloc(len + 1);
371 
372 	len = pcre2_config(what, ret);
373 	if (!len) {
374 		free(ret);
375 		return NULL;
376 	}
377 
378 	return ret;
379 }/*}}}*/
380 
381 /* {{{ PHP_MINFO_FUNCTION(pcre) */
PHP_MINFO_FUNCTION(pcre)382 static PHP_MINFO_FUNCTION(pcre)
383 {
384 #ifdef HAVE_PCRE_JIT_SUPPORT
385 	uint32_t flag = 0;
386 	char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
387 #endif
388 	char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
389 	char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
390 
391 	php_info_print_table_start();
392 	php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
393 	php_info_print_table_row(2, "PCRE Library Version", version);
394 	free(version);
395 	php_info_print_table_row(2, "PCRE Unicode Version", unicode);
396 	free(unicode);
397 
398 #ifdef HAVE_PCRE_JIT_SUPPORT
399 	if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
400 		php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
401 	} else {
402 		php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
403 	}
404 	if (jit_target) {
405 		php_info_print_table_row(2, "PCRE JIT Target", jit_target);
406 	}
407 	free(jit_target);
408 #else
409 	php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
410 #endif
411 
412 #ifdef HAVE_PCRE_VALGRIND_SUPPORT
413 	php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
414 #endif
415 
416 	php_info_print_table_end();
417 
418 	DISPLAY_INI_ENTRIES();
419 }
420 /* }}} */
421 
422 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTION(pcre)423 static PHP_MINIT_FUNCTION(pcre)
424 {
425 	char *version;
426 
427 #ifdef HAVE_PCRE_JIT_SUPPORT
428 	if (UNEXPECTED(!pcre2_init_ok)) {
429 		/* Retry. */
430 		php_pcre_init_pcre2(PCRE_G(jit));
431 		if (!pcre2_init_ok) {
432 			return FAILURE;
433 		}
434 	}
435 #endif
436 
437 	REGISTER_INI_ENTRIES();
438 
439 	REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
440 	REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
441 	REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
442 	REGISTER_LONG_CONSTANT("PREG_UNMATCHED_AS_NULL", PREG_UNMATCHED_AS_NULL, CONST_CS | CONST_PERSISTENT);
443 	REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
444 	REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
445 	REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
446 	REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
447 
448 	REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
449 	REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
450 	REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
451 	REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
452 	REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
453 	REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
454 	REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
455 	version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
456 	REGISTER_STRING_CONSTANT("PCRE_VERSION", version, CONST_CS | CONST_PERSISTENT);
457 	free(version);
458 	REGISTER_LONG_CONSTANT("PCRE_VERSION_MAJOR", PCRE2_MAJOR, CONST_CS | CONST_PERSISTENT);
459 	REGISTER_LONG_CONSTANT("PCRE_VERSION_MINOR", PCRE2_MINOR, CONST_CS | CONST_PERSISTENT);
460 
461 #ifdef HAVE_PCRE_JIT_SUPPORT
462 	REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 1, CONST_CS | CONST_PERSISTENT);
463 #else
464 	REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 0, CONST_CS | CONST_PERSISTENT);
465 #endif
466 
467 	return SUCCESS;
468 }
469 /* }}} */
470 
471 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTION(pcre)472 static PHP_MSHUTDOWN_FUNCTION(pcre)
473 {
474 	UNREGISTER_INI_ENTRIES();
475 
476 	return SUCCESS;
477 }
478 /* }}} */
479 
480 /* {{{ PHP_RINIT_FUNCTION(pcre) */
PHP_RINIT_FUNCTION(pcre)481 static PHP_RINIT_FUNCTION(pcre)
482 {
483 #ifdef HAVE_PCRE_JIT_SUPPORT
484 	if (UNEXPECTED(!pcre2_init_ok)) {
485 		/* Retry. */
486 		php_pcre_mutex_lock();
487 		php_pcre_init_pcre2(PCRE_G(jit));
488 		if (!pcre2_init_ok) {
489 			php_pcre_mutex_unlock();
490 			return FAILURE;
491 		}
492 		php_pcre_mutex_unlock();
493 	}
494 
495 	mdata_used = 0;
496 #endif
497 
498 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
499 	PCRE_G(gctx_zmm) = pcre2_general_context_create(php_pcre_emalloc, php_pcre_efree, NULL);
500 	if (!PCRE_G(gctx_zmm)) {
501 		return FAILURE;
502 	}
503 
504 	if (PCRE_G(per_request_cache)) {
505 		zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, php_efree_pcre_cache, 0);
506 	}
507 
508 	return SUCCESS;
509 }
510 /* }}} */
511 
PHP_RSHUTDOWN_FUNCTION(pcre)512 static PHP_RSHUTDOWN_FUNCTION(pcre)
513 {
514 	pcre2_general_context_free(PCRE_G(gctx_zmm));
515 	PCRE_G(gctx_zmm) = NULL;
516 
517 	if (PCRE_G(per_request_cache)) {
518 		zend_hash_destroy(&PCRE_G(pcre_cache));
519 	}
520 
521 	zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
522 	zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
523 	ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
524 	ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
525 	return SUCCESS;
526 }
527 
528 /* {{{ static pcre_clean_cache */
pcre_clean_cache(zval * data,void * arg)529 static int pcre_clean_cache(zval *data, void *arg)
530 {
531 	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
532 	int *num_clean = (int *)arg;
533 
534 	if (*num_clean > 0 && !pce->refcount) {
535 		(*num_clean)--;
536 		return ZEND_HASH_APPLY_REMOVE;
537 	} else {
538 		return ZEND_HASH_APPLY_KEEP;
539 	}
540 }
541 /* }}} */
542 
free_subpats_table(zend_string ** subpat_names,uint32_t num_subpats)543 static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
544 	uint32_t i;
545 	for (i = 0; i < num_subpats; i++) {
546 		if (subpat_names[i]) {
547 			zend_string_release(subpat_names[i]);
548 		}
549 	}
550 	efree(subpat_names);
551 }
552 
553 /* {{{ static make_subpats_table */
make_subpats_table(uint32_t num_subpats,pcre_cache_entry * pce)554 static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
555 {
556 	uint32_t name_cnt = pce->name_count, name_size, ni = 0;
557 	char *name_table;
558 	zend_string **subpat_names;
559 	int rc1, rc2;
560 
561 	rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
562 	rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
563 	if (rc1 < 0 || rc2 < 0) {
564 		php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
565 		return NULL;
566 	}
567 
568 	subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
569 	while (ni++ < name_cnt) {
570 		unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
571 		const char *name = name_table + 2;
572 		subpat_names[name_idx] = zend_string_init(name, strlen(name), 0);
573 		if (is_numeric_string(ZSTR_VAL(subpat_names[name_idx]), ZSTR_LEN(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
574 			php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
575 			free_subpats_table(subpat_names, num_subpats);
576 			return NULL;
577 		}
578 		name_table += name_size;
579 	}
580 	return subpat_names;
581 }
582 /* }}} */
583 
584 /* {{{ static calculate_unit_length */
585 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
calculate_unit_length(pcre_cache_entry * pce,const char * start)586 static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
587 {
588 	size_t unit_len;
589 
590 	if (pce->compile_options & PCRE2_UTF) {
591 		const char *end = start;
592 
593 		/* skip continuation bytes */
594 		while ((*++end & 0xC0) == 0x80);
595 		unit_len = end - start;
596 	} else {
597 		unit_len = 1;
598 	}
599 	return unit_len;
600 }
601 /* }}} */
602 
603 /* {{{ pcre_get_compiled_regex_cache */
pcre_get_compiled_regex_cache_ex(zend_string * regex,int locale_aware)604 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware)
605 {
606 	pcre2_code			*re = NULL;
607 #if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR
608 	uint32_t			 coptions = PCRE2_NO_START_OPTIMIZE;
609 #else
610 	uint32_t			 coptions = 0;
611 #endif
612 	PCRE2_UCHAR	         error[128];
613 	PCRE2_SIZE           erroffset;
614 	int                  errnumber;
615 	char				 delimiter;
616 	char				 start_delimiter;
617 	char				 end_delimiter;
618 	char				*p, *pp;
619 	char				*pattern;
620 	size_t				 pattern_len;
621 	uint32_t			 poptions = 0;
622 	const uint8_t       *tables = NULL;
623 	zval                *zv;
624 	pcre_cache_entry	 new_entry;
625 	int					 rc;
626 	zend_string 		*key;
627 	pcre_cache_entry *ret;
628 
629 	if (locale_aware && BG(ctype_string)) {
630 		key = zend_string_concat2(
631 			ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)),
632 			ZSTR_VAL(regex), ZSTR_LEN(regex));
633 	} else {
634 		key = regex;
635 	}
636 
637 	/* Try to lookup the cached regex entry, and if successful, just pass
638 	   back the compiled pattern, otherwise go on and compile it. */
639 	zv = zend_hash_find(&PCRE_G(pcre_cache), key);
640 	if (zv) {
641 		if (key != regex) {
642 			zend_string_release_ex(key, 0);
643 		}
644 		return (pcre_cache_entry*)Z_PTR_P(zv);
645 	}
646 
647 	p = ZSTR_VAL(regex);
648 
649 	/* Parse through the leading whitespace, and display a warning if we
650 	   get to the end without encountering a delimiter. */
651 	while (isspace((int)*(unsigned char *)p)) p++;
652 	if (*p == 0) {
653 		if (key != regex) {
654 			zend_string_release_ex(key, 0);
655 		}
656 		php_error_docref(NULL, E_WARNING,
657 						 p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
658 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
659 		return NULL;
660 	}
661 
662 	/* Get the delimiter and display a warning if it is alphanumeric
663 	   or a backslash. */
664 	delimiter = *p++;
665 	if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
666 		if (key != regex) {
667 			zend_string_release_ex(key, 0);
668 		}
669 		php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
670 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
671 		return NULL;
672 	}
673 
674 	start_delimiter = delimiter;
675 	if ((pp = strchr("([{< )]}> )]}>", delimiter)))
676 		delimiter = pp[5];
677 	end_delimiter = delimiter;
678 
679 	pp = p;
680 
681 	if (start_delimiter == end_delimiter) {
682 		/* We need to iterate through the pattern, searching for the ending delimiter,
683 		   but skipping the backslashed delimiters.  If the ending delimiter is not
684 		   found, display a warning. */
685 		while (*pp != 0) {
686 			if (*pp == '\\' && pp[1] != 0) pp++;
687 			else if (*pp == delimiter)
688 				break;
689 			pp++;
690 		}
691 	} else {
692 		/* We iterate through the pattern, searching for the matching ending
693 		 * delimiter. For each matching starting delimiter, we increment nesting
694 		 * level, and decrement it for each matching ending delimiter. If we
695 		 * reach the end of the pattern without matching, display a warning.
696 		 */
697 		int brackets = 1; 	/* brackets nesting level */
698 		while (*pp != 0) {
699 			if (*pp == '\\' && pp[1] != 0) pp++;
700 			else if (*pp == end_delimiter && --brackets <= 0)
701 				break;
702 			else if (*pp == start_delimiter)
703 				brackets++;
704 			pp++;
705 		}
706 	}
707 
708 	if (*pp == 0) {
709 		if (key != regex) {
710 			zend_string_release_ex(key, 0);
711 		}
712 		if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
713 			php_error_docref(NULL,E_WARNING, "Null byte in regex");
714 		} else if (start_delimiter == end_delimiter) {
715 			php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
716 		} else {
717 			php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
718 		}
719 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
720 		return NULL;
721 	}
722 
723 	/* Make a copy of the actual pattern. */
724 	pattern_len = pp - p;
725 	pattern = estrndup(p, pattern_len);
726 
727 	/* Move on to the options */
728 	pp++;
729 
730 	/* Parse through the options, setting appropriate flags.  Display
731 	   a warning if we encounter an unknown modifier. */
732 	while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
733 		switch (*pp++) {
734 			/* Perl compatible options */
735 			case 'i':	coptions |= PCRE2_CASELESS;		break;
736 			case 'm':	coptions |= PCRE2_MULTILINE;		break;
737 			case 's':	coptions |= PCRE2_DOTALL;		break;
738 			case 'x':	coptions |= PCRE2_EXTENDED;		break;
739 
740 			/* PCRE specific options */
741 			case 'A':	coptions |= PCRE2_ANCHORED;		break;
742 			case 'D':	coptions |= PCRE2_DOLLAR_ENDONLY;break;
743 			case 'S':	/* Pass. */					break;
744 			case 'X':	/* Pass. */					break;
745 			case 'U':	coptions |= PCRE2_UNGREEDY;		break;
746 			case 'u':	coptions |= PCRE2_UTF;
747 	/* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
748        characters, even in UTF-8 mode. However, this can be changed by setting
749        the PCRE2_UCP option. */
750 #ifdef PCRE2_UCP
751 						coptions |= PCRE2_UCP;
752 #endif
753 				break;
754 			case 'J':	coptions |= PCRE2_DUPNAMES;		break;
755 
756 			/* Custom preg options */
757 			case 'e':	poptions |= PREG_REPLACE_EVAL;	break;
758 
759 			case ' ':
760 			case '\n':
761 			case '\r':
762 				break;
763 
764 			default:
765 				if (pp[-1]) {
766 					php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
767 				} else {
768 					php_error_docref(NULL,E_WARNING, "Null byte in regex");
769 				}
770 				pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
771 				efree(pattern);
772 				if (key != regex) {
773 					zend_string_release_ex(key, 0);
774 				}
775 				return NULL;
776 		}
777 	}
778 
779 	if (poptions & PREG_REPLACE_EVAL) {
780 		php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
781 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
782 		efree(pattern);
783 		if (key != regex) {
784 			zend_string_release_ex(key, 0);
785 		}
786 		return NULL;
787 	}
788 
789 	if (key != regex) {
790 		tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(ctype_string));
791 		if (!tables) {
792 			zend_string *_k;
793 			tables = pcre2_maketables(gctx);
794 			if (UNEXPECTED(!tables)) {
795 				php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
796 				pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
797 				zend_string_release_ex(key, 0);
798 				efree(pattern);
799 				return NULL;
800 			}
801 			_k = zend_string_init(ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)), 1);
802 			GC_MAKE_PERSISTENT_LOCAL(_k);
803 			zend_hash_add_ptr(&char_tables, _k, (void *)tables);
804 			zend_string_release(_k);
805 		}
806 	}
807 	pcre2_set_character_tables(cctx, tables);
808 
809 	/* Compile pattern and display a warning if compilation failed. */
810 	re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
811 
812 	if (re == NULL) {
813 		if (key != regex) {
814 			zend_string_release_ex(key, 0);
815 		}
816 		pcre2_get_error_message(errnumber, error, sizeof(error));
817 		php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
818 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
819 		efree(pattern);
820 		return NULL;
821 	}
822 
823 #ifdef HAVE_PCRE_JIT_SUPPORT
824 	if (PCRE_G(jit)) {
825 		/* Enable PCRE JIT compiler */
826 		rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
827 		if (EXPECTED(rc >= 0)) {
828 			size_t jit_size = 0;
829 			if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
830 				poptions |= PREG_JIT;
831 			}
832 		} else if (rc == PCRE2_ERROR_NOMEMORY) {
833 			php_error_docref(NULL, E_WARNING,
834 				"Allocation of JIT memory failed, PCRE JIT will be disabled. "
835 				"This is likely caused by security restrictions. "
836 				"Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
837 			PCRE_G(jit) = 0;
838 		} else {
839 			pcre2_get_error_message(rc, error, sizeof(error));
840 			php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
841 			pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
842 		}
843 	}
844 #endif
845 	efree(pattern);
846 
847 	/*
848 	 * If we reached cache limit, clean out the items from the head of the list;
849 	 * these are supposedly the oldest ones (but not necessarily the least used
850 	 * ones).
851 	 */
852 	if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
853 		int num_clean = PCRE_CACHE_SIZE / 8;
854 		zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
855 	}
856 
857 	/* Store the compiled pattern and extra info in the cache. */
858 	new_entry.re = re;
859 	new_entry.preg_options = poptions;
860 	new_entry.compile_options = coptions;
861 	new_entry.refcount = 0;
862 
863 	rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
864 	if (rc < 0) {
865 		if (key != regex) {
866 			zend_string_release_ex(key, 0);
867 		}
868 		php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
869 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
870 		return NULL;
871 	}
872 
873 	rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
874 	if (rc < 0) {
875 		if (key != regex) {
876 			zend_string_release_ex(key, 0);
877 		}
878 		php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
879 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
880 		return NULL;
881 	}
882 
883 	/*
884 	 * Interned strings are not duplicated when stored in HashTable,
885 	 * but all the interned strings created during HTTP request are removed
886 	 * at end of request. However PCRE_G(pcre_cache) must be consistent
887 	 * on the next request as well. So we disable usage of interned strings
888 	 * as hash keys especually for this table.
889 	 * See bug #63180
890 	 */
891 	if (!(GC_FLAGS(key) & IS_STR_PERMANENT) && !PCRE_G(per_request_cache)) {
892 		zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
893 		GC_MAKE_PERSISTENT_LOCAL(str);
894 
895 		ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
896 		zend_string_release(str);
897 	} else {
898 		ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
899 	}
900 
901 	if (key != regex) {
902 		zend_string_release_ex(key, 0);
903 	}
904 
905 	return ret;
906 }
907 /* }}} */
908 
909 /* {{{ pcre_get_compiled_regex_cache */
pcre_get_compiled_regex_cache(zend_string * regex)910 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
911 {
912 	return pcre_get_compiled_regex_cache_ex(regex, 1);
913 }
914 /* }}} */
915 
916 /* {{{ pcre_get_compiled_regex */
pcre_get_compiled_regex(zend_string * regex,uint32_t * capture_count)917 PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
918 {
919 	pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
920 
921 	if (capture_count) {
922 		*capture_count = pce ? pce->capture_count : 0;
923 	}
924 
925 	return pce ? pce->re : NULL;
926 }
927 /* }}} */
928 
929 /* {{{ pcre_get_compiled_regex_ex */
pcre_get_compiled_regex_ex(zend_string * regex,uint32_t * capture_count,uint32_t * preg_options,uint32_t * compile_options)930 PHPAPI pcre2_code* pcre_get_compiled_regex_ex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options, uint32_t *compile_options)
931 {
932 	pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
933 
934 	if (preg_options) {
935 		*preg_options = pce ? pce->preg_options : 0;
936 	}
937 	if (compile_options) {
938 		*compile_options = pce ? pce->compile_options : 0;
939 	}
940 	if (capture_count) {
941 		*capture_count = pce ? pce->capture_count : 0;
942 	}
943 
944 	return pce ? pce->re : NULL;
945 }
946 /* }}} */
947 
948 /* XXX For the cases where it's only about match yes/no and no capture
949 		required, perhaps just a minimum sized data would suffice. */
php_pcre_create_match_data(uint32_t capture_count,pcre2_code * re)950 PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
951 {/*{{{*/
952 
953 	assert(NULL != re);
954 
955 	if (EXPECTED(!mdata_used)) {
956 		int rc = 0;
957 
958 		if (!capture_count) {
959 			/* As we deal with a non cached pattern, no other way to gather this info. */
960 			rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
961 		}
962 
963 		if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
964 			mdata_used = 1;
965 			return mdata;
966 		}
967 	}
968 
969 	return pcre2_match_data_create_from_pattern(re, gctx);
970 }/*}}}*/
971 
php_pcre_free_match_data(pcre2_match_data * match_data)972 PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
973 {/*{{{*/
974 	if (UNEXPECTED(match_data != mdata)) {
975 		pcre2_match_data_free(match_data);
976 	} else {
977 		mdata_used = 0;
978 	}
979 }/*}}}*/
980 
init_unmatched_null_pair()981 static void init_unmatched_null_pair() {
982 	zval val1, val2;
983 	ZVAL_NULL(&val1);
984 	ZVAL_LONG(&val2, -1);
985 	ZVAL_ARR(&PCRE_G(unmatched_null_pair), zend_new_pair(&val1, &val2));
986 }
987 
init_unmatched_empty_pair()988 static void init_unmatched_empty_pair() {
989 	zval val1, val2;
990 	ZVAL_EMPTY_STRING(&val1);
991 	ZVAL_LONG(&val2, -1);
992 	ZVAL_ARR(&PCRE_G(unmatched_empty_pair), zend_new_pair(&val1, &val2));
993 }
994 
populate_match_value_str(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset)995 static zend_always_inline void populate_match_value_str(
996 		zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
997 	ZVAL_STRINGL_FAST(val, subject + start_offset, end_offset - start_offset);
998 }
999 
populate_match_value(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,uint32_t unmatched_as_null)1000 static inline void populate_match_value(
1001 		zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
1002 		uint32_t unmatched_as_null) {
1003 	if (PCRE2_UNSET == start_offset) {
1004 		if (unmatched_as_null) {
1005 			ZVAL_NULL(val);
1006 		} else {
1007 			ZVAL_EMPTY_STRING(val);
1008 		}
1009 	} else {
1010 		populate_match_value_str(val, subject, start_offset, end_offset);
1011 	}
1012 }
1013 
add_named(zval * subpats,zend_string * name,zval * val,zend_bool unmatched)1014 static inline void add_named(
1015 		zval *subpats, zend_string *name, zval *val, zend_bool unmatched) {
1016 	/* If the DUPNAMES option is used, multiple subpatterns might have the same name.
1017 	 * In this case we want to preserve the one that actually has a value. */
1018 	if (!unmatched) {
1019 		zend_hash_update(Z_ARRVAL_P(subpats), name, val);
1020 	} else {
1021 		if (!zend_hash_add(Z_ARRVAL_P(subpats), name, val)) {
1022 			return;
1023 		}
1024 	}
1025 	Z_TRY_ADDREF_P(val);
1026 }
1027 
1028 /* {{{ add_offset_pair */
add_offset_pair(zval * result,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,zend_string * name,uint32_t unmatched_as_null)1029 static inline void add_offset_pair(
1030 		zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
1031 		zend_string *name, uint32_t unmatched_as_null)
1032 {
1033 	zval match_pair;
1034 
1035 	/* Add (match, offset) to the return value */
1036 	if (PCRE2_UNSET == start_offset) {
1037 		if (unmatched_as_null) {
1038 			if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
1039 				init_unmatched_null_pair();
1040 			}
1041 			ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
1042 		} else {
1043 			if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
1044 				init_unmatched_empty_pair();
1045 			}
1046 			ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
1047 		}
1048 	} else {
1049 		zval val1, val2;
1050 		populate_match_value_str(&val1, subject, start_offset, end_offset);
1051 		ZVAL_LONG(&val2, start_offset);
1052 		ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1053 	}
1054 
1055 	if (name) {
1056 		add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1057 	}
1058 	zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
1059 }
1060 /* }}} */
1061 
populate_subpat_array(zval * subpats,const char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1062 static void populate_subpat_array(
1063 		zval *subpats, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1064 		uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1065 	zend_bool offset_capture = (flags & PREG_OFFSET_CAPTURE) != 0;
1066 	zend_bool unmatched_as_null = (flags & PREG_UNMATCHED_AS_NULL) != 0;
1067 	zval val;
1068 	int i;
1069 	if (subpat_names) {
1070 		if (offset_capture) {
1071 			for (i = 0; i < count; i++) {
1072 				add_offset_pair(
1073 					subpats, subject, offsets[2*i], offsets[2*i+1],
1074 					subpat_names[i], unmatched_as_null);
1075 			}
1076 			if (unmatched_as_null) {
1077 				for (i = count; i < num_subpats; i++) {
1078 					add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1079 				}
1080 			}
1081 		} else {
1082 			for (i = 0; i < count; i++) {
1083 				populate_match_value(
1084 					&val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1085 				if (subpat_names[i]) {
1086 					add_named(subpats, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1087 				}
1088 				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1089 			}
1090 			if (unmatched_as_null) {
1091 				for (i = count; i < num_subpats; i++) {
1092 					ZVAL_NULL(&val);
1093 					if (subpat_names[i]) {
1094 						zend_hash_add(Z_ARRVAL_P(subpats), subpat_names[i], &val);
1095 					}
1096 					zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1097 				}
1098 			}
1099 		}
1100 	} else {
1101 		if (offset_capture) {
1102 			for (i = 0; i < count; i++) {
1103 				add_offset_pair(
1104 					subpats, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1105 			}
1106 			if (unmatched_as_null) {
1107 				for (i = count; i < num_subpats; i++) {
1108 					add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1109 				}
1110 			}
1111 		} else {
1112 			for (i = 0; i < count; i++) {
1113 				populate_match_value(
1114 					&val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1115 				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1116 			}
1117 			if (unmatched_as_null) {
1118 				for (i = count; i < num_subpats; i++) {
1119 					add_next_index_null(subpats);
1120 				}
1121 			}
1122 		}
1123 	}
1124 	/* Add MARK, if available */
1125 	if (mark) {
1126 		add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
1127 	}
1128 }
1129 
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS,int global)1130 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
1131 {
1132 	/* parameters */
1133 	zend_string		 *regex;			/* Regular expression */
1134 	zend_string		 *subject;			/* String to match against */
1135 	pcre_cache_entry *pce;				/* Compiled regular expression */
1136 	zval			 *subpats = NULL;	/* Array for subpatterns */
1137 	zend_long		  flags = 0;		/* Match control flags */
1138 	zend_long		  start_offset = 0;	/* Where the new search starts */
1139 
1140 	ZEND_PARSE_PARAMETERS_START(2, 5)
1141 		Z_PARAM_STR(regex)
1142 		Z_PARAM_STR(subject)
1143 		Z_PARAM_OPTIONAL
1144 		Z_PARAM_ZVAL(subpats)
1145 		Z_PARAM_LONG(flags)
1146 		Z_PARAM_LONG(start_offset)
1147 	ZEND_PARSE_PARAMETERS_END();
1148 
1149 	/* Compile regex or get it from cache. */
1150 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1151 		RETURN_FALSE;
1152 	}
1153 
1154 	pce->refcount++;
1155 	php_pcre_match_impl(pce, subject, return_value, subpats,
1156 		global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
1157 	pce->refcount--;
1158 }
1159 /* }}} */
1160 
is_known_valid_utf8(zend_string * subject_str,PCRE2_SIZE start_offset)1161 static zend_always_inline zend_bool is_known_valid_utf8(
1162 		zend_string *subject_str, PCRE2_SIZE start_offset) {
1163 	if (!(GC_FLAGS(subject_str) & IS_STR_VALID_UTF8)) {
1164 		/* We don't know whether the string is valid UTF-8 or not. */
1165 		return 0;
1166 	}
1167 
1168 	if (start_offset == ZSTR_LEN(subject_str)) {
1169 		/* Degenerate case: Offset points to end of string. */
1170 		return 1;
1171 	}
1172 
1173 	/* Check that the offset does not point to an UTF-8 continuation byte. */
1174 	return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1175 }
1176 
1177 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zval * subpats,int global,int use_flags,zend_long flags,zend_off_t start_offset)1178 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1179 	zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset)
1180 {
1181 	zval			 result_set,		/* Holds a set of subpatterns after
1182 										   a global match */
1183 					*match_sets = NULL;	/* An array of sets of matches for each
1184 										   subpattern after a global match */
1185 	uint32_t		 options;			/* Execution options */
1186 	int				 count;				/* Count of matched subpatterns */
1187 	PCRE2_SIZE		*offsets;			/* Array of subpattern offsets */
1188 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
1189 	int				 matched;			/* Has anything matched */
1190 	zend_string	   **subpat_names;		/* Array for named subpatterns */
1191 	size_t			 i;
1192 	uint32_t		 subpats_order;		/* Order of subpattern matches */
1193 	uint32_t		 offset_capture;	/* Capture match offsets: yes/no */
1194 	uint32_t		 unmatched_as_null;	/* Null non-matches: yes/no */
1195 	PCRE2_SPTR       mark = NULL;		/* Target for MARK name */
1196 	zval			 marks;				/* Array of marks for PREG_PATTERN_ORDER */
1197 	pcre2_match_data *match_data;
1198 	PCRE2_SIZE		 start_offset2, orig_start_offset;
1199 
1200 	char *subject = ZSTR_VAL(subject_str);
1201 	size_t subject_len = ZSTR_LEN(subject_str);
1202 
1203 	ZVAL_UNDEF(&marks);
1204 
1205 	/* Overwrite the passed-in value for subpatterns with an empty array. */
1206 	if (subpats != NULL) {
1207 		subpats = zend_try_array_init(subpats);
1208 		if (!subpats) {
1209 			RETURN_THROWS();
1210 		}
1211 	}
1212 
1213 	subpats_order = global ? PREG_PATTERN_ORDER : 0;
1214 
1215 	if (use_flags) {
1216 		offset_capture = flags & PREG_OFFSET_CAPTURE;
1217 		unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1218 
1219 		/*
1220 		 * subpats_order is pre-set to pattern mode so we change it only if
1221 		 * necessary.
1222 		 */
1223 		if (flags & 0xff) {
1224 			subpats_order = flags & 0xff;
1225 		}
1226 		if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1227 			(!global && subpats_order != 0)) {
1228 			zend_argument_value_error(4, "must be a PREG_* constant");
1229 			RETURN_THROWS();
1230 		}
1231 	} else {
1232 		offset_capture = 0;
1233 		unmatched_as_null = 0;
1234 	}
1235 
1236 	/* Negative offset counts from the end of the string. */
1237 	if (start_offset < 0) {
1238 		if ((PCRE2_SIZE)-start_offset <= subject_len) {
1239 			start_offset2 = subject_len + start_offset;
1240 		} else {
1241 			start_offset2 = 0;
1242 		}
1243 	} else {
1244 		start_offset2 = (PCRE2_SIZE)start_offset;
1245 	}
1246 
1247 	if (start_offset2 > subject_len) {
1248 		pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1249 		RETURN_FALSE;
1250 	}
1251 
1252 	/* Calculate the size of the offsets array, and allocate memory for it. */
1253 	num_subpats = pce->capture_count + 1;
1254 
1255 	/*
1256 	 * Build a mapping from subpattern numbers to their names. We will
1257 	 * allocate the table only if there are any named subpatterns.
1258 	 */
1259 	subpat_names = NULL;
1260 	if (subpats && pce->name_count > 0) {
1261 		subpat_names = make_subpats_table(num_subpats, pce);
1262 		if (!subpat_names) {
1263 			RETURN_FALSE;
1264 		}
1265 	}
1266 
1267 	/* Allocate match sets array and initialize the values. */
1268 	if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1269 		match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
1270 		for (i=0; i<num_subpats; i++) {
1271 			array_init(&match_sets[i]);
1272 		}
1273 	}
1274 
1275 	matched = 0;
1276 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1277 
1278 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1279 		match_data = mdata;
1280 	} else {
1281 		match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1282 		if (!match_data) {
1283 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1284 			if (subpat_names) {
1285 				free_subpats_table(subpat_names, num_subpats);
1286 			}
1287 			if (match_sets) {
1288 				efree(match_sets);
1289 			}
1290 			RETURN_FALSE;
1291 		}
1292 	}
1293 
1294 	orig_start_offset = start_offset2;
1295 	options =
1296 		(pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1297 			? 0 : PCRE2_NO_UTF_CHECK;
1298 
1299 	/* Execute the regular expression. */
1300 #ifdef HAVE_PCRE_JIT_SUPPORT
1301 	if ((pce->preg_options & PREG_JIT) && options) {
1302 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1303 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1304 	} else
1305 #endif
1306 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1307 			options, match_data, mctx);
1308 
1309 	while (1) {
1310 		/* If something has matched */
1311 		if (count >= 0) {
1312 			/* Check for too many substrings condition. */
1313 			if (UNEXPECTED(count == 0)) {
1314 				php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1315 				count = num_subpats;
1316 			}
1317 
1318 matched:
1319 			matched++;
1320 
1321 			offsets = pcre2_get_ovector_pointer(match_data);
1322 
1323 			/* If subpatterns array has been passed, fill it in with values. */
1324 			if (subpats != NULL) {
1325 				/* Try to get the list of substrings and display a warning if failed. */
1326 				if (offsets[1] < offsets[0]) {
1327 					if (subpat_names) {
1328 						free_subpats_table(subpat_names, num_subpats);
1329 					}
1330 					if (match_sets) efree(match_sets);
1331 					php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1332 					RETURN_FALSE;
1333 				}
1334 
1335 				if (global) {	/* global pattern matching */
1336 					if (subpats && subpats_order == PREG_PATTERN_ORDER) {
1337 						/* For each subpattern, insert it into the appropriate array. */
1338 						if (offset_capture) {
1339 							for (i = 0; i < count; i++) {
1340 								add_offset_pair(
1341 									&match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1342 									NULL, unmatched_as_null);
1343 							}
1344 						} else {
1345 							for (i = 0; i < count; i++) {
1346 								zval val;
1347 								populate_match_value(
1348 									&val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1349 								zend_hash_next_index_insert_new(Z_ARRVAL(match_sets[i]), &val);
1350 							}
1351 						}
1352 						mark = pcre2_get_mark(match_data);
1353 						/* Add MARK, if available */
1354 						if (mark) {
1355 							if (Z_TYPE(marks) == IS_UNDEF) {
1356 								array_init(&marks);
1357 							}
1358 							add_index_string(&marks, matched - 1, (char *) mark);
1359 						}
1360 						/*
1361 						 * If the number of captured subpatterns on this run is
1362 						 * less than the total possible number, pad the result
1363 						 * arrays with NULLs or empty strings.
1364 						 */
1365 						if (count < num_subpats) {
1366 							for (; i < num_subpats; i++) {
1367 								if (offset_capture) {
1368 									add_offset_pair(
1369 										&match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1370 										NULL, unmatched_as_null);
1371 								} else if (unmatched_as_null) {
1372 									add_next_index_null(&match_sets[i]);
1373 								} else {
1374 									add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
1375 								}
1376 							}
1377 						}
1378 					} else {
1379 						/* Allocate and populate the result set array */
1380 						array_init_size(&result_set, count + (mark ? 1 : 0));
1381 						mark = pcre2_get_mark(match_data);
1382 						populate_subpat_array(
1383 							&result_set, subject, offsets, subpat_names,
1384 							num_subpats, count, mark, flags);
1385 						/* And add it to the output array */
1386 						zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
1387 					}
1388 				} else {			/* single pattern matching */
1389 					/* For each subpattern, insert it into the subpatterns array. */
1390 					mark = pcre2_get_mark(match_data);
1391 					populate_subpat_array(
1392 						subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1393 					break;
1394 				}
1395 			}
1396 
1397 			/* Advance to the next piece. */
1398 			start_offset2 = offsets[1];
1399 
1400 			/* If we have matched an empty string, mimic what Perl's /g options does.
1401 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1402 			   the match again at the same point. If this fails (picked up above) we
1403 			   advance to the next character. */
1404 			if (start_offset2 == offsets[0]) {
1405 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1406 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1407 				if (count >= 0) {
1408 					if (global) {
1409 						goto matched;
1410 					} else {
1411 						break;
1412 					}
1413 				} else if (count == PCRE2_ERROR_NOMATCH) {
1414 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1415 					   this is not necessarily the end. We need to advance
1416 					   the start offset, and continue. Fudge the offset values
1417 					   to achieve this, unless we're already at the end of the string. */
1418 					if (start_offset2 < subject_len) {
1419 						size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1420 
1421 						start_offset2 += unit_len;
1422 					} else {
1423 						break;
1424 					}
1425 				} else {
1426 					goto error;
1427 				}
1428 			}
1429 		} else if (count == PCRE2_ERROR_NOMATCH) {
1430 			break;
1431 		} else {
1432 error:
1433 			pcre_handle_exec_error(count);
1434 			break;
1435 		}
1436 
1437 		if (!global) {
1438 			break;
1439 		}
1440 
1441 		/* Execute the regular expression. */
1442 #ifdef HAVE_PCRE_JIT_SUPPORT
1443 		if ((pce->preg_options & PREG_JIT)) {
1444 			if (PCRE2_UNSET == start_offset2 || start_offset2 > subject_len) {
1445 				pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1446 				break;
1447 			}
1448 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1449 					PCRE2_NO_UTF_CHECK, match_data, mctx);
1450 		} else
1451 #endif
1452 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1453 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1454 	}
1455 	if (match_data != mdata) {
1456 		pcre2_match_data_free(match_data);
1457 	}
1458 
1459 	/* Add the match sets to the output array and clean up */
1460 	if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1461 		if (subpat_names) {
1462 			for (i = 0; i < num_subpats; i++) {
1463 				if (subpat_names[i]) {
1464 					zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &match_sets[i]);
1465 					Z_ADDREF(match_sets[i]);
1466 				}
1467 				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1468 			}
1469 		} else {
1470 			for (i = 0; i < num_subpats; i++) {
1471 				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1472 			}
1473 		}
1474 		efree(match_sets);
1475 
1476 		if (Z_TYPE(marks) != IS_UNDEF) {
1477 			add_assoc_zval(subpats, "MARK", &marks);
1478 		}
1479 	}
1480 
1481 	if (subpat_names) {
1482 		free_subpats_table(subpat_names, num_subpats);
1483 	}
1484 
1485 	if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1486 		/* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1487 		if ((pce->compile_options & PCRE2_UTF)
1488 				&& !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1489 			GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1490 		}
1491 
1492 		RETVAL_LONG(matched);
1493 	} else {
1494 		RETVAL_FALSE;
1495 	}
1496 }
1497 /* }}} */
1498 
1499 /* {{{ Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)1500 PHP_FUNCTION(preg_match)
1501 {
1502 	php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1503 }
1504 /* }}} */
1505 
1506 /* {{{ Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)1507 PHP_FUNCTION(preg_match_all)
1508 {
1509 	php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1510 }
1511 /* }}} */
1512 
1513 /* {{{ preg_get_backref */
preg_get_backref(char ** str,int * backref)1514 static int preg_get_backref(char **str, int *backref)
1515 {
1516 	register char in_brace = 0;
1517 	register char *walk = *str;
1518 
1519 	if (walk[1] == 0)
1520 		return 0;
1521 
1522 	if (*walk == '$' && walk[1] == '{') {
1523 		in_brace = 1;
1524 		walk++;
1525 	}
1526 	walk++;
1527 
1528 	if (*walk >= '0' && *walk <= '9') {
1529 		*backref = *walk - '0';
1530 		walk++;
1531 	} else
1532 		return 0;
1533 
1534 	if (*walk && *walk >= '0' && *walk <= '9') {
1535 		*backref = *backref * 10 + *walk - '0';
1536 		walk++;
1537 	}
1538 
1539 	if (in_brace) {
1540 		if (*walk != '}')
1541 			return 0;
1542 		else
1543 			walk++;
1544 	}
1545 
1546 	*str = walk;
1547 	return 1;
1548 }
1549 /* }}} */
1550 
1551 /* {{{ preg_do_repl_func */
preg_do_repl_func(zend_fcall_info * fci,zend_fcall_info_cache * fcc,const char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1552 static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1553 {
1554 	zend_string *result_str;
1555 	zval		 retval;			/* Function return value */
1556 	zval	     arg;				/* Argument to pass to function */
1557 
1558 	array_init_size(&arg, count + (mark ? 1 : 0));
1559 	populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1560 
1561 	fci->retval = &retval;
1562 	fci->param_count = 1;
1563 	fci->params = &arg;
1564 
1565 	if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1566 		if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1567 			result_str = Z_STR(retval);
1568 		} else {
1569 			result_str = zval_get_string_func(&retval);
1570 			zval_ptr_dtor(&retval);
1571 		}
1572 	} else {
1573 		if (!EG(exception)) {
1574 			php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1575 		}
1576 
1577 		result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1578 	}
1579 
1580 	zval_ptr_dtor(&arg);
1581 
1582 	return result_str;
1583 }
1584 /* }}} */
1585 
1586 /* {{{ php_pcre_replace */
php_pcre_replace(zend_string * regex,zend_string * subject_str,const char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1587 PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1588 							  zend_string *subject_str,
1589 							  const char *subject, size_t subject_len,
1590 							  zend_string *replace_str,
1591 							  size_t limit, size_t *replace_count)
1592 {
1593 	pcre_cache_entry	*pce;			    /* Compiled regular expression */
1594 	zend_string	 		*result;			/* Function result */
1595 
1596 	/* Abort on pending exception, e.g. thrown from __toString(). */
1597 	if (UNEXPECTED(EG(exception))) {
1598 		return NULL;
1599 	}
1600 
1601 	/* Compile regex or get it from cache. */
1602 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1603 		return NULL;
1604 	}
1605 	pce->refcount++;
1606 	result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1607 		limit, replace_count);
1608 	pce->refcount--;
1609 
1610 	return result;
1611 }
1612 /* }}} */
1613 
1614 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry * pce,zend_string * subject_str,const char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1615 PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1616 {
1617 	uint32_t		 options;			/* Execution options */
1618 	int				 count;				/* Count of matched subpatterns */
1619 	PCRE2_SIZE		*offsets;			/* Array of subpattern offsets */
1620 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
1621 	size_t			 new_len;			/* Length of needed storage */
1622 	size_t			 alloc_len;			/* Actual allocated length */
1623 	size_t			 match_len;			/* Length of the current match */
1624 	int				 backref;			/* Backreference number */
1625 	PCRE2_SIZE		 start_offset;		/* Where the new search starts */
1626 	size_t			 last_end_offset;	/* Where the last search ended */
1627 	char			*walkbuf,			/* Location of current replacement in the result */
1628 					*walk,				/* Used to walk the replacement string */
1629 					 walk_last;			/* Last walked character */
1630 	const char		*match,				/* The current match */
1631 					*piece,				/* The current piece of subject */
1632 					*replace_end;		/* End of replacement string */
1633 	size_t			result_len; 		/* Length of result */
1634 	zend_string		*result;			/* Result of replacement */
1635 	pcre2_match_data *match_data;
1636 
1637 	/* Calculate the size of the offsets array, and allocate memory for it. */
1638 	num_subpats = pce->capture_count + 1;
1639 	alloc_len = 0;
1640 	result = NULL;
1641 
1642 	/* Initialize */
1643 	match = NULL;
1644 	start_offset = 0;
1645 	last_end_offset = 0;
1646 	result_len = 0;
1647 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1648 
1649 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1650 		match_data = mdata;
1651 	} else {
1652 		match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1653 		if (!match_data) {
1654 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1655 			return NULL;
1656 		}
1657 	}
1658 
1659 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1660 
1661 	/* Execute the regular expression. */
1662 #ifdef HAVE_PCRE_JIT_SUPPORT
1663 	if ((pce->preg_options & PREG_JIT) && options) {
1664 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1665 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1666 	} else
1667 #endif
1668 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1669 			options, match_data, mctx);
1670 
1671 	while (1) {
1672 		piece = subject + last_end_offset;
1673 
1674 		if (count >= 0 && limit > 0) {
1675 			zend_bool simple_string;
1676 
1677 			/* Check for too many substrings condition. */
1678 			if (UNEXPECTED(count == 0)) {
1679 				php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1680 				count = num_subpats;
1681 			}
1682 
1683 matched:
1684 			offsets = pcre2_get_ovector_pointer(match_data);
1685 
1686 			if (UNEXPECTED(offsets[1] < offsets[0])) {
1687 				PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1688 				if (result) {
1689 					zend_string_release_ex(result, 0);
1690 					result = NULL;
1691 				}
1692 				break;
1693 			}
1694 
1695 			if (replace_count) {
1696 				++*replace_count;
1697 			}
1698 
1699 			/* Set the match location in subject */
1700 			match = subject + offsets[0];
1701 
1702 			new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1703 
1704 			walk = ZSTR_VAL(replace_str);
1705 			replace_end = walk + ZSTR_LEN(replace_str);
1706 			walk_last = 0;
1707 			simple_string = 1;
1708 			while (walk < replace_end) {
1709 				if ('\\' == *walk || '$' == *walk) {
1710 					simple_string = 0;
1711 					if (walk_last == '\\') {
1712 						walk++;
1713 						walk_last = 0;
1714 						continue;
1715 					}
1716 					if (preg_get_backref(&walk, &backref)) {
1717 						if (backref < count)
1718 							new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1719 						continue;
1720 					}
1721 				}
1722 				new_len++;
1723 				walk++;
1724 				walk_last = walk[-1];
1725 			}
1726 
1727 			if (new_len >= alloc_len) {
1728 				alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1729 				if (result == NULL) {
1730 					result = zend_string_alloc(alloc_len, 0);
1731 				} else {
1732 					result = zend_string_extend(result, alloc_len, 0);
1733 				}
1734 			}
1735 
1736 			if (match-piece > 0) {
1737 				/* copy the part of the string before the match */
1738 				memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1739 				result_len += (match-piece);
1740 			}
1741 
1742 			if (simple_string) {
1743 				/* copy replacement */
1744 				memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1745 				result_len += ZSTR_LEN(replace_str);
1746 			} else {
1747 				/* copy replacement and backrefs */
1748 				walkbuf = ZSTR_VAL(result) + result_len;
1749 
1750 				walk = ZSTR_VAL(replace_str);
1751 				walk_last = 0;
1752 				while (walk < replace_end) {
1753 					if ('\\' == *walk || '$' == *walk) {
1754 						if (walk_last == '\\') {
1755 							*(walkbuf-1) = *walk++;
1756 							walk_last = 0;
1757 							continue;
1758 						}
1759 						if (preg_get_backref(&walk, &backref)) {
1760 							if (backref < count) {
1761 								match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1762 								memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1763 								walkbuf += match_len;
1764 							}
1765 							continue;
1766 						}
1767 					}
1768 					*walkbuf++ = *walk++;
1769 					walk_last = walk[-1];
1770 				}
1771 				*walkbuf = '\0';
1772 				/* increment the result length by how much we've added to the string */
1773 				result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1774 			}
1775 
1776 			limit--;
1777 
1778 			/* Advance to the next piece. */
1779 			start_offset = last_end_offset = offsets[1];
1780 
1781 			/* If we have matched an empty string, mimic what Perl's /g options does.
1782 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1783 			   the match again at the same point. If this fails (picked up above) we
1784 			   advance to the next character. */
1785 			if (start_offset == offsets[0]) {
1786 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1787 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1788 
1789 				piece = subject + start_offset;
1790 				if (count >= 0 && limit > 0) {
1791 					goto matched;
1792 				} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1793 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1794 					   this is not necessarily the end. We need to advance
1795 					   the start offset, and continue. Fudge the offset values
1796 					   to achieve this, unless we're already at the end of the string. */
1797 					if (start_offset < subject_len) {
1798 						size_t unit_len = calculate_unit_length(pce, piece);
1799 						start_offset += unit_len;
1800 					} else {
1801 						goto not_matched;
1802 					}
1803 				} else {
1804 					goto error;
1805 				}
1806 			}
1807 
1808 		} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1809 not_matched:
1810 			if (!result && subject_str) {
1811 				result = zend_string_copy(subject_str);
1812 				break;
1813 			}
1814 			/* now we know exactly how long it is */
1815 			alloc_len = result_len + subject_len - last_end_offset;
1816 			if (NULL != result) {
1817 				result = zend_string_realloc(result, alloc_len, 0);
1818 			} else {
1819 				result = zend_string_alloc(alloc_len, 0);
1820 			}
1821 			/* stick that last bit of string on our output */
1822 			memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1823 			result_len += subject_len - last_end_offset;
1824 			ZSTR_VAL(result)[result_len] = '\0';
1825 			ZSTR_LEN(result) = result_len;
1826 			break;
1827 		} else {
1828 error:
1829 			pcre_handle_exec_error(count);
1830 			if (result) {
1831 				zend_string_release_ex(result, 0);
1832 				result = NULL;
1833 			}
1834 			break;
1835 		}
1836 
1837 #ifdef HAVE_PCRE_JIT_SUPPORT
1838 		if (pce->preg_options & PREG_JIT) {
1839 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1840 					PCRE2_NO_UTF_CHECK, match_data, mctx);
1841 		} else
1842 #endif
1843 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1844 					PCRE2_NO_UTF_CHECK, match_data, mctx);
1845 	}
1846 	if (match_data != mdata) {
1847 		pcre2_match_data_free(match_data);
1848 	}
1849 
1850 	return result;
1851 }
1852 /* }}} */
1853 
1854 /* {{{ php_pcre_replace_func_impl() */
php_pcre_replace_func_impl(pcre_cache_entry * pce,zend_string * subject_str,const char * subject,size_t subject_len,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)1855 static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)
1856 {
1857 	uint32_t		 options;			/* Execution options */
1858 	int				 count;				/* Count of matched subpatterns */
1859 	PCRE2_SIZE		*offsets;			/* Array of subpattern offsets */
1860 	zend_string		**subpat_names;		/* Array for named subpatterns */
1861 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
1862 	size_t			 new_len;			/* Length of needed storage */
1863 	size_t			 alloc_len;			/* Actual allocated length */
1864 	PCRE2_SIZE		 start_offset;		/* Where the new search starts */
1865 	size_t			 last_end_offset;	/* Where the last search ended */
1866 	const char		*match,				/* The current match */
1867 					*piece;				/* The current piece of subject */
1868 	size_t			result_len; 		/* Length of result */
1869 	zend_string		*result;			/* Result of replacement */
1870 	zend_string     *eval_result;		/* Result of custom function */
1871 	pcre2_match_data *match_data;
1872 	zend_bool old_mdata_used;
1873 
1874 	/* Calculate the size of the offsets array, and allocate memory for it. */
1875 	num_subpats = pce->capture_count + 1;
1876 
1877 	/*
1878 	 * Build a mapping from subpattern numbers to their names. We will
1879 	 * allocate the table only if there are any named subpatterns.
1880 	 */
1881 	subpat_names = NULL;
1882 	if (UNEXPECTED(pce->name_count > 0)) {
1883 		subpat_names = make_subpats_table(num_subpats, pce);
1884 		if (!subpat_names) {
1885 			return NULL;
1886 		}
1887 	}
1888 
1889 	alloc_len = 0;
1890 	result = NULL;
1891 
1892 	/* Initialize */
1893 	match = NULL;
1894 	start_offset = 0;
1895 	last_end_offset = 0;
1896 	result_len = 0;
1897 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1898 
1899 	old_mdata_used = mdata_used;
1900 	if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1901 		mdata_used = 1;
1902 		match_data = mdata;
1903 	} else {
1904 		match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1905 		if (!match_data) {
1906 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1907 			if (subpat_names) {
1908 				free_subpats_table(subpat_names, num_subpats);
1909 			}
1910 			mdata_used = old_mdata_used;
1911 			return NULL;
1912 		}
1913 	}
1914 
1915 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1916 
1917 	/* Execute the regular expression. */
1918 #ifdef HAVE_PCRE_JIT_SUPPORT
1919 	if ((pce->preg_options & PREG_JIT) && options) {
1920 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1921 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1922 	} else
1923 #endif
1924 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1925 			options, match_data, mctx);
1926 
1927 	while (1) {
1928 		piece = subject + last_end_offset;
1929 
1930 		if (count >= 0 && limit) {
1931 			/* Check for too many substrings condition. */
1932 			if (UNEXPECTED(count == 0)) {
1933 				php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1934 				count = num_subpats;
1935 			}
1936 
1937 matched:
1938 			offsets = pcre2_get_ovector_pointer(match_data);
1939 
1940 			if (UNEXPECTED(offsets[1] < offsets[0])) {
1941 				PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1942 				if (result) {
1943 					zend_string_release_ex(result, 0);
1944 					result = NULL;
1945 				}
1946 				break;
1947 			}
1948 
1949 			if (replace_count) {
1950 				++*replace_count;
1951 			}
1952 
1953 			/* Set the match location in subject */
1954 			match = subject + offsets[0];
1955 
1956 			new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1957 
1958 			/* Use custom function to get replacement string and its length. */
1959 			eval_result = preg_do_repl_func(
1960 				fci, fcc, subject, offsets, subpat_names, num_subpats, count,
1961 				pcre2_get_mark(match_data), flags);
1962 
1963 			ZEND_ASSERT(eval_result);
1964 			new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD;
1965 			if (new_len >= alloc_len) {
1966 				alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1967 				if (result == NULL) {
1968 					result = zend_string_alloc(alloc_len, 0);
1969 				} else {
1970 					result = zend_string_extend(result, alloc_len, 0);
1971 				}
1972 			}
1973 
1974 			if (match-piece > 0) {
1975 				/* copy the part of the string before the match */
1976 				memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1977 				result_len += (match-piece);
1978 			}
1979 
1980 			/* If using custom function, copy result to the buffer and clean up. */
1981 			memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1982 			result_len += ZSTR_LEN(eval_result);
1983 			zend_string_release_ex(eval_result, 0);
1984 
1985 			limit--;
1986 
1987 			/* Advance to the next piece. */
1988 			start_offset = last_end_offset = offsets[1];
1989 
1990 			/* If we have matched an empty string, mimic what Perl's /g options does.
1991 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1992 			   the match again at the same point. If this fails (picked up above) we
1993 			   advance to the next character. */
1994 			if (start_offset == offsets[0]) {
1995 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1996 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1997 
1998 				piece = subject + start_offset;
1999 				if (count >= 0 && limit) {
2000 					goto matched;
2001 				} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2002 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2003 					   this is not necessarily the end. We need to advance
2004 					   the start offset, and continue. Fudge the offset values
2005 					   to achieve this, unless we're already at the end of the string. */
2006 					if (start_offset < subject_len) {
2007 						size_t unit_len = calculate_unit_length(pce, piece);
2008 						start_offset += unit_len;
2009 					} else {
2010 						goto not_matched;
2011 					}
2012 				} else {
2013 					goto error;
2014 				}
2015 			}
2016 
2017 		} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2018 not_matched:
2019 			if (!result && subject_str) {
2020 				result = zend_string_copy(subject_str);
2021 				break;
2022 			}
2023 			/* now we know exactly how long it is */
2024 			alloc_len = result_len + subject_len - last_end_offset;
2025 			if (NULL != result) {
2026 				result = zend_string_realloc(result, alloc_len, 0);
2027 			} else {
2028 				result = zend_string_alloc(alloc_len, 0);
2029 			}
2030 			/* stick that last bit of string on our output */
2031 			memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
2032 			result_len += subject_len - last_end_offset;
2033 			ZSTR_VAL(result)[result_len] = '\0';
2034 			ZSTR_LEN(result) = result_len;
2035 			break;
2036 		} else {
2037 error:
2038 			pcre_handle_exec_error(count);
2039 			if (result) {
2040 				zend_string_release_ex(result, 0);
2041 				result = NULL;
2042 			}
2043 			break;
2044 		}
2045 #ifdef HAVE_PCRE_JIT_SUPPORT
2046 		if ((pce->preg_options & PREG_JIT)) {
2047 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2048 					PCRE2_NO_UTF_CHECK, match_data, mctx);
2049 		} else
2050 #endif
2051 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2052 				PCRE2_NO_UTF_CHECK, match_data, mctx);
2053 	}
2054 	if (match_data != mdata) {
2055 		pcre2_match_data_free(match_data);
2056 	}
2057 	mdata_used = old_mdata_used;
2058 
2059 	if (UNEXPECTED(subpat_names)) {
2060 		free_subpats_table(subpat_names, num_subpats);
2061 	}
2062 
2063 	return result;
2064 }
2065 /* }}} */
2066 
2067 /* {{{ php_pcre_replace_func */
php_pcre_replace_func(zend_string * regex,zend_string * subject_str,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)2068 static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2069 							  zend_string *subject_str,
2070 							  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2071 							  size_t limit, size_t *replace_count, zend_long flags)
2072 {
2073 	pcre_cache_entry	*pce;			    /* Compiled regular expression */
2074 	zend_string	 		*result;			/* Function result */
2075 
2076 	/* Compile regex or get it from cache. */
2077 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2078 		return NULL;
2079 	}
2080 	pce->refcount++;
2081 	result = php_pcre_replace_func_impl(
2082 		pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
2083 		limit, replace_count, flags);
2084 	pce->refcount--;
2085 
2086 	return result;
2087 }
2088 /* }}} */
2089 
2090 /* {{{ php_pcre_replace_array */
php_pcre_replace_array(HashTable * regex,zend_string * replace_str,HashTable * replace_ht,zend_string * subject_str,size_t limit,size_t * replace_count)2091 static zend_string *php_pcre_replace_array(HashTable *regex,
2092 	zend_string *replace_str, HashTable *replace_ht,
2093 	zend_string *subject_str, size_t limit, size_t *replace_count)
2094 {
2095 	zval		*regex_entry;
2096 	zend_string *result;
2097 
2098 	zend_string_addref(subject_str);
2099 
2100 	if (replace_ht) {
2101 		uint32_t replace_idx = 0;
2102 
2103 		/* For each entry in the regex array, get the entry */
2104 		ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2105 			/* Make sure we're dealing with strings. */
2106 			zend_string *tmp_regex_str;
2107 			zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2108 			zend_string *replace_entry_str, *tmp_replace_entry_str;
2109 			zval *zv;
2110 
2111 			/* Get current entry */
2112 			while (1) {
2113 				if (replace_idx == replace_ht->nNumUsed) {
2114 					replace_entry_str = ZSTR_EMPTY_ALLOC();
2115 					tmp_replace_entry_str = NULL;
2116 					break;
2117 				}
2118 				zv = &replace_ht->arData[replace_idx].val;
2119 				replace_idx++;
2120 				if (Z_TYPE_P(zv) != IS_UNDEF) {
2121 					replace_entry_str = zval_get_tmp_string(zv, &tmp_replace_entry_str);
2122 					break;
2123 				}
2124 			}
2125 
2126 			/* Do the actual replacement and put the result back into subject_str
2127 			   for further replacements. */
2128 			result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2129 				ZSTR_LEN(subject_str), replace_entry_str, limit, replace_count);
2130 			zend_tmp_string_release(tmp_replace_entry_str);
2131 			zend_tmp_string_release(tmp_regex_str);
2132 			zend_string_release_ex(subject_str, 0);
2133 			subject_str = result;
2134 			if (UNEXPECTED(result == NULL)) {
2135 				break;
2136 			}
2137 		} ZEND_HASH_FOREACH_END();
2138 
2139 	} else {
2140 		ZEND_ASSERT(replace_str != NULL);
2141 
2142 		/* For each entry in the regex array, get the entry */
2143 		ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2144 			/* Make sure we're dealing with strings. */
2145 			zend_string *tmp_regex_str;
2146 			zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2147 
2148 			/* Do the actual replacement and put the result back into subject_str
2149 			   for further replacements. */
2150 			result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2151 				ZSTR_LEN(subject_str), replace_str, limit, replace_count);
2152 			zend_tmp_string_release(tmp_regex_str);
2153 			zend_string_release_ex(subject_str, 0);
2154 			subject_str = result;
2155 
2156 			if (UNEXPECTED(result == NULL)) {
2157 				break;
2158 			}
2159 		} ZEND_HASH_FOREACH_END();
2160 	}
2161 
2162 	return subject_str;
2163 }
2164 /* }}} */
2165 
2166 /* {{{ php_replace_in_subject */
php_replace_in_subject(zend_string * regex_str,HashTable * regex_ht,zend_string * replace_str,HashTable * replace_ht,zend_string * subject,size_t limit,size_t * replace_count)2167 static zend_always_inline zend_string *php_replace_in_subject(
2168 	zend_string *regex_str, HashTable *regex_ht,
2169 	zend_string *replace_str, HashTable *replace_ht,
2170 	zend_string *subject, size_t limit, size_t *replace_count)
2171 {
2172 	zend_string *result;
2173 
2174 	if (regex_str) {
2175 		ZEND_ASSERT(replace_str != NULL);
2176 		result = php_pcre_replace(regex_str, subject, ZSTR_VAL(subject), ZSTR_LEN(subject),
2177 			replace_str, limit, replace_count);
2178 	} else {
2179 		ZEND_ASSERT(regex_ht != NULL);
2180 		result = php_pcre_replace_array(regex_ht, replace_str, replace_ht, subject,
2181 			limit, replace_count);
2182 	}
2183 	return result;
2184 }
2185 /* }}} */
2186 
2187 /* {{{ php_replace_in_subject_func */
php_replace_in_subject_func(zend_string * regex_str,HashTable * regex_ht,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zend_string * subject,size_t limit,size_t * replace_count,zend_long flags)2188 static zend_string *php_replace_in_subject_func(zend_string *regex_str, HashTable *regex_ht,
2189 	zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2190 	zend_string *subject, size_t limit, size_t *replace_count, zend_long flags)
2191 {
2192 	zend_string *result;
2193 
2194 	if (regex_str) {
2195 		result = php_pcre_replace_func(
2196 			regex_str, subject, fci, fcc, limit, replace_count, flags);
2197 		return result;
2198 	} else {
2199 		/* If regex is an array */
2200 		zval		*regex_entry;
2201 
2202 		ZEND_ASSERT(regex_ht != NULL);
2203 
2204 		zend_string_addref(subject);
2205 
2206 		/* For each entry in the regex array, get the entry */
2207 		ZEND_HASH_FOREACH_VAL(regex_ht, regex_entry) {
2208 			/* Make sure we're dealing with strings. */
2209 			zend_string *tmp_regex_entry_str;
2210 			zend_string *regex_entry_str = zval_get_tmp_string(regex_entry, &tmp_regex_entry_str);
2211 
2212 			/* Do the actual replacement and put the result back into subject
2213 			   for further replacements. */
2214 			result = php_pcre_replace_func(
2215 				regex_entry_str, subject, fci, fcc, limit, replace_count, flags);
2216 			zend_tmp_string_release(tmp_regex_entry_str);
2217 			zend_string_release(subject);
2218 			subject = result;
2219 			if (UNEXPECTED(result == NULL)) {
2220 				break;
2221 			}
2222 		} ZEND_HASH_FOREACH_END();
2223 
2224 		return subject;
2225 	}
2226 }
2227 /* }}} */
2228 
2229 /* {{{ preg_replace_func_impl */
preg_replace_func_impl(zval * return_value,zend_string * regex_str,HashTable * regex_ht,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zend_string * subject_str,HashTable * subject_ht,zend_long limit_val,zend_long flags)2230 static size_t preg_replace_func_impl(zval *return_value,
2231 	zend_string *regex_str, HashTable *regex_ht,
2232 	zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2233 	zend_string *subject_str, HashTable *subject_ht, zend_long limit_val, zend_long flags)
2234 {
2235 	zend_string	*result;
2236 	size_t replace_count = 0;
2237 
2238 	if (subject_str) {
2239 		result = php_replace_in_subject_func(
2240 			regex_str, regex_ht, fci, fcc, subject_str, limit_val, &replace_count, flags);
2241 		if (result != NULL) {
2242 			RETVAL_STR(result);
2243 		} else {
2244 			RETVAL_NULL();
2245 		}
2246 	} else {
2247 		/* if subject is an array */
2248 		zval		*subject_entry, zv;
2249 		zend_string	*string_key;
2250 		zend_ulong	 num_key;
2251 
2252 		ZEND_ASSERT(subject_ht != NULL);
2253 
2254 		array_init_size(return_value, zend_hash_num_elements(subject_ht));
2255 
2256 		/* For each subject entry, convert it to string, then perform replacement
2257 		   and add the result to the return_value array. */
2258 		ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2259 			zend_string *tmp_subject_entry_str;
2260 			zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2261 
2262 			result = php_replace_in_subject_func(
2263 				regex_str, regex_ht, fci, fcc, subject_entry_str, limit_val, &replace_count, flags);
2264 			if (result != NULL) {
2265 				/* Add to return array */
2266 				ZVAL_STR(&zv, result);
2267 				if (string_key) {
2268 					zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2269 				} else {
2270 					zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2271 				}
2272 			}
2273 			zend_tmp_string_release(tmp_subject_entry_str);
2274 		} ZEND_HASH_FOREACH_END();
2275 	}
2276 
2277 	return replace_count;
2278 }
2279 /* }}} */
2280 
2281 /* {{{ preg_replace_common */
preg_replace_common(INTERNAL_FUNCTION_PARAMETERS,bool is_filter)2282 static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter)
2283 {
2284 	zval *zcount = NULL;
2285 	zend_string *regex_str;
2286 	HashTable *regex_ht;
2287 	zend_string *replace_str;
2288 	HashTable *replace_ht;
2289 	zend_string *subject_str;
2290 	HashTable *subject_ht;
2291 	zend_long limit = -1;
2292 	size_t replace_count = 0;
2293 	zend_string	*result;
2294 	size_t old_replace_count;
2295 
2296 	/* Get function parameters and do error-checking. */
2297 	ZEND_PARSE_PARAMETERS_START(3, 5)
2298 		Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2299 		Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
2300 		Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2301 		Z_PARAM_OPTIONAL
2302 		Z_PARAM_LONG(limit)
2303 		Z_PARAM_ZVAL(zcount)
2304 	ZEND_PARSE_PARAMETERS_END();
2305 
2306 	/* If replace is an array then the regex argument needs to also be an array */
2307 	if (replace_ht && !regex_ht) {
2308 		zend_argument_type_error(1, "must be of type array when argument #2 ($replacement) is an array, string given");
2309 		RETURN_THROWS();
2310 	}
2311 
2312 	if (subject_str) {
2313 		old_replace_count = replace_count;
2314 		result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2315 			subject_str, limit, &replace_count);
2316 		if (result != NULL) {
2317 			if (!is_filter || replace_count > old_replace_count) {
2318 				RETVAL_STR(result);
2319 			} else {
2320 				zend_string_release_ex(result, 0);
2321 				RETVAL_NULL();
2322 			}
2323 		} else {
2324 			RETVAL_NULL();
2325 		}
2326 	} else {
2327 		/* if subject is an array */
2328 		zval		*subject_entry, zv;
2329 		zend_string	*string_key;
2330 		zend_ulong	 num_key;
2331 
2332 		ZEND_ASSERT(subject_ht != NULL);
2333 
2334 		array_init_size(return_value, zend_hash_num_elements(subject_ht));
2335 
2336 		/* For each subject entry, convert it to string, then perform replacement
2337 		   and add the result to the return_value array. */
2338 		ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2339 			old_replace_count = replace_count;
2340 			zend_string *tmp_subject_entry_str;
2341 			zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2342 			result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2343 				subject_entry_str, limit, &replace_count);
2344 
2345 			if (result != NULL) {
2346 				if (!is_filter || replace_count > old_replace_count) {
2347 					/* Add to return array */
2348 					ZVAL_STR(&zv, result);
2349 					if (string_key) {
2350 						zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2351 					} else {
2352 						zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2353 					}
2354 				} else {
2355 					zend_string_release_ex(result, 0);
2356 				}
2357 			}
2358 			zend_tmp_string_release(tmp_subject_entry_str);
2359 		} ZEND_HASH_FOREACH_END();
2360 	}
2361 
2362 	if (zcount) {
2363 		ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2364 	}
2365 }
2366 /* }}} */
2367 
2368 /* {{{ Perform Perl-style regular expression replacement. */
PHP_FUNCTION(preg_replace)2369 PHP_FUNCTION(preg_replace)
2370 {
2371 	preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
2372 }
2373 /* }}} */
2374 
2375 /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback)2376 PHP_FUNCTION(preg_replace_callback)
2377 {
2378 	zval *zcount = NULL;
2379 	zend_string *regex_str;
2380 	HashTable *regex_ht;
2381 	zend_string *subject_str;
2382 	HashTable *subject_ht;
2383 	zend_long limit = -1, flags = 0;
2384 	size_t replace_count;
2385 	zend_fcall_info fci;
2386 	zend_fcall_info_cache fcc;
2387 
2388 	/* Get function parameters and do error-checking. */
2389 	ZEND_PARSE_PARAMETERS_START(3, 6)
2390 		Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2391 		Z_PARAM_FUNC(fci, fcc)
2392 		Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2393 		Z_PARAM_OPTIONAL
2394 		Z_PARAM_LONG(limit)
2395 		Z_PARAM_ZVAL(zcount)
2396 		Z_PARAM_LONG(flags)
2397 	ZEND_PARSE_PARAMETERS_END();
2398 
2399 	replace_count = preg_replace_func_impl(return_value, regex_str, regex_ht,
2400 		&fci, &fcc,
2401 		subject_str, subject_ht, limit, flags);
2402 	if (zcount) {
2403 		ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2404 	}
2405 }
2406 /* }}} */
2407 
2408 /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback_array)2409 PHP_FUNCTION(preg_replace_callback_array)
2410 {
2411 	zval zv, *replace, *zcount = NULL;
2412 	HashTable *pattern, *subject_ht;
2413 	zend_string *subject_str, *str_idx_regex;
2414 	zend_long limit = -1, flags = 0;
2415 	size_t replace_count = 0;
2416 	zend_fcall_info fci;
2417 	zend_fcall_info_cache fcc;
2418 
2419 	/* Get function parameters and do error-checking. */
2420 	ZEND_PARSE_PARAMETERS_START(2, 5)
2421 		Z_PARAM_ARRAY_HT(pattern)
2422 		Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2423 		Z_PARAM_OPTIONAL
2424 		Z_PARAM_LONG(limit)
2425 		Z_PARAM_ZVAL(zcount)
2426 		Z_PARAM_LONG(flags)
2427 	ZEND_PARSE_PARAMETERS_END();
2428 
2429 	fci.size = sizeof(fci);
2430 	fci.object = NULL;
2431 	fci.named_params = NULL;
2432 
2433 	if (subject_ht) {
2434 		GC_TRY_ADDREF(subject_ht);
2435 	} else {
2436 		GC_TRY_ADDREF(subject_str);
2437 	}
2438 
2439 	ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) {
2440 		if (!str_idx_regex) {
2441 			php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
2442 			RETVAL_NULL();
2443 			goto error;
2444 		}
2445 
2446 		if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2447 			zend_argument_type_error(1, "must contain only valid callbacks");
2448 			goto error;
2449 		}
2450 
2451 		ZVAL_COPY_VALUE(&fci.function_name, replace);
2452 
2453 		replace_count += preg_replace_func_impl(&zv, str_idx_regex, /* regex_ht */ NULL, &fci, &fcc,
2454 			subject_str, subject_ht, limit, flags);
2455 		switch (Z_TYPE(zv)) {
2456 			case IS_ARRAY:
2457 				ZEND_ASSERT(subject_ht);
2458 				zend_array_release(subject_ht);
2459 				subject_ht = Z_ARR(zv);
2460 				break;
2461 			case IS_STRING:
2462 				ZEND_ASSERT(subject_str);
2463 				zend_string_release(subject_str);
2464 				subject_str = Z_STR(zv);
2465 				break;
2466 			case IS_NULL:
2467 				RETVAL_NULL();
2468 				goto error;
2469 			EMPTY_SWITCH_DEFAULT_CASE()
2470 		}
2471 
2472 		if (EG(exception)) {
2473 			goto error;
2474 		}
2475 	} ZEND_HASH_FOREACH_END();
2476 
2477 	if (zcount) {
2478 		ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2479 	}
2480 
2481 	if (subject_ht) {
2482 		RETURN_ARR(subject_ht);
2483 	} else {
2484 		RETURN_STR(subject_str);
2485 	}
2486 
2487 error:
2488 	if (subject_ht) {
2489 		zend_array_release(subject_ht);
2490 	} else {
2491 		zend_string_release(subject_str);
2492 	}
2493 }
2494 /* }}} */
2495 
2496 /* {{{ Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTION(preg_filter)2497 PHP_FUNCTION(preg_filter)
2498 {
2499 	preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
2500 }
2501 /* }}} */
2502 
2503 /* {{{ Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)2504 PHP_FUNCTION(preg_split)
2505 {
2506 	zend_string			*regex;			/* Regular expression */
2507 	zend_string			*subject;		/* String to match against */
2508 	zend_long			 limit_val = -1;/* Integer value of limit */
2509 	zend_long			 flags = 0;		/* Match control flags */
2510 	pcre_cache_entry	*pce;			/* Compiled regular expression */
2511 
2512 	/* Get function parameters and do error checking */
2513 	ZEND_PARSE_PARAMETERS_START(2, 4)
2514 		Z_PARAM_STR(regex)
2515 		Z_PARAM_STR(subject)
2516 		Z_PARAM_OPTIONAL
2517 		Z_PARAM_LONG(limit_val)
2518 		Z_PARAM_LONG(flags)
2519 	ZEND_PARSE_PARAMETERS_END();
2520 
2521 	/* Compile regex or get it from cache. */
2522 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2523 		RETURN_FALSE;
2524 	}
2525 
2526 	pce->refcount++;
2527 	php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2528 	pce->refcount--;
2529 }
2530 /* }}} */
2531 
2532 /* {{{ php_pcre_split */
php_pcre_split_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zend_long limit_val,zend_long flags)2533 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2534 	zend_long limit_val, zend_long flags)
2535 {
2536 	PCRE2_SIZE		*offsets;			/* Array of subpattern offsets */
2537 	uint32_t		 options;			/* Execution options */
2538 	int				 count;				/* Count of matched subpatterns */
2539 	PCRE2_SIZE		 start_offset;		/* Where the new search starts */
2540 	PCRE2_SIZE		 last_match_offset;	/* Location of last match */
2541 	uint32_t		 no_empty;			/* If NO_EMPTY flag is set */
2542 	uint32_t		 delim_capture; 	/* If delimiters should be captured */
2543 	uint32_t		 offset_capture;	/* If offsets should be captured */
2544 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
2545 	zval			 tmp;
2546 	pcre2_match_data *match_data;
2547 	char *subject = ZSTR_VAL(subject_str);
2548 
2549 	no_empty = flags & PREG_SPLIT_NO_EMPTY;
2550 	delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2551 	offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2552 
2553 	/* Initialize return value */
2554 	array_init(return_value);
2555 
2556 	/* Calculate the size of the offsets array, and allocate memory for it. */
2557 	num_subpats = pce->capture_count + 1;
2558 
2559 	/* Start at the beginning of the string */
2560 	start_offset = 0;
2561 	last_match_offset = 0;
2562 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2563 
2564 	if (limit_val == -1) {
2565 		/* pass */
2566 	} else if (limit_val == 0) {
2567 		limit_val = -1;
2568 	} else if (limit_val <= 1) {
2569 		goto last;
2570 	}
2571 
2572 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2573 		match_data = mdata;
2574 	} else {
2575 		match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2576 		if (!match_data) {
2577 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2578 			zval_ptr_dtor(return_value);
2579 			RETURN_FALSE;
2580 		}
2581 	}
2582 
2583 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2584 
2585 #ifdef HAVE_PCRE_JIT_SUPPORT
2586 	if ((pce->preg_options & PREG_JIT) && options) {
2587 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2588 				PCRE2_NO_UTF_CHECK, match_data, mctx);
2589 	} else
2590 #endif
2591 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2592 			options, match_data, mctx);
2593 
2594 	while (1) {
2595 		/* If something matched */
2596 		if (count >= 0) {
2597 			/* Check for too many substrings condition. */
2598 			if (UNEXPECTED(count == 0)) {
2599 				php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2600 				count = num_subpats;
2601 			}
2602 
2603 matched:
2604 			offsets = pcre2_get_ovector_pointer(match_data);
2605 
2606 			if (UNEXPECTED(offsets[1] < offsets[0])) {
2607 				PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2608 				break;
2609 			}
2610 
2611 			if (!no_empty || offsets[0] != last_match_offset) {
2612 				if (offset_capture) {
2613 					/* Add (match, offset) pair to the return value */
2614 					add_offset_pair(
2615 						return_value, subject, last_match_offset, offsets[0],
2616 						NULL, 0);
2617 				} else {
2618 					/* Add the piece to the return value */
2619 					populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2620 					zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2621 				}
2622 
2623 				/* One less left to do */
2624 				if (limit_val != -1)
2625 					limit_val--;
2626 			}
2627 
2628 			if (delim_capture) {
2629 				size_t i;
2630 				for (i = 1; i < count; i++) {
2631 					/* If we have matched a delimiter */
2632 					if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2633 						if (offset_capture) {
2634 							add_offset_pair(
2635 								return_value, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2636 						} else {
2637 							populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2638 							zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2639 						}
2640 					}
2641 				}
2642 			}
2643 
2644 			/* Advance to the position right after the last full match */
2645 			start_offset = last_match_offset = offsets[1];
2646 
2647 			/* If we have matched an empty string, mimic what Perl's /g options does.
2648 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2649 			   the match again at the same point. If this fails (picked up above) we
2650 			   advance to the next character. */
2651 			if (start_offset == offsets[0]) {
2652 				/* Get next piece if no limit or limit not yet reached and something matched*/
2653 				if (limit_val != -1 && limit_val <= 1) {
2654 					break;
2655 				}
2656 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2657 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2658 				if (count >= 0) {
2659 					goto matched;
2660 				} else if (count == PCRE2_ERROR_NOMATCH) {
2661 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2662 					   this is not necessarily the end. We need to advance
2663 					   the start offset, and continue. Fudge the offset values
2664 					   to achieve this, unless we're already at the end of the string. */
2665 					if (start_offset < ZSTR_LEN(subject_str)) {
2666 						start_offset += calculate_unit_length(pce, subject + start_offset);
2667 					} else {
2668 						break;
2669 					}
2670 				} else {
2671 					goto error;
2672 				}
2673 			}
2674 
2675 		} else if (count == PCRE2_ERROR_NOMATCH) {
2676 			break;
2677 		} else {
2678 error:
2679 			pcre_handle_exec_error(count);
2680 			break;
2681 		}
2682 
2683 		/* Get next piece if no limit or limit not yet reached and something matched*/
2684 		if (limit_val != -1 && limit_val <= 1) {
2685 			break;
2686 		}
2687 
2688 #ifdef HAVE_PCRE_JIT_SUPPORT
2689 		if (pce->preg_options & PREG_JIT) {
2690 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2691 					PCRE2_NO_UTF_CHECK, match_data, mctx);
2692 		} else
2693 #endif
2694 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2695 				PCRE2_NO_UTF_CHECK, match_data, mctx);
2696 	}
2697 	if (match_data != mdata) {
2698 		pcre2_match_data_free(match_data);
2699 	}
2700 
2701 	if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2702 		zval_ptr_dtor(return_value);
2703 		RETURN_FALSE;
2704 	}
2705 
2706 last:
2707 	start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2708 
2709 	if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2710 		if (offset_capture) {
2711 			/* Add the last (match, offset) pair to the return value */
2712 			add_offset_pair(return_value, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2713 		} else {
2714 			/* Add the last piece to the return value */
2715 			if (start_offset == 0) {
2716 				ZVAL_STR_COPY(&tmp, subject_str);
2717 			} else {
2718 				populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2719 			}
2720 			zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2721 		}
2722 	}
2723 }
2724 /* }}} */
2725 
2726 /* {{{ Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)2727 PHP_FUNCTION(preg_quote)
2728 {
2729 	zend_string *str;       		/* Input string argument */
2730 	zend_string	*delim = NULL;		/* Additional delimiter argument */
2731 	char		*in_str;			/* Input string */
2732 	char		*in_str_end;    	/* End of the input string */
2733 	zend_string	*out_str;			/* Output string with quoted characters */
2734 	size_t       extra_len;         /* Number of additional characters */
2735 	char 		*p,					/* Iterator for input string */
2736 				*q,					/* Iterator for output string */
2737 				 delim_char = '\0',	/* Delimiter character to be quoted */
2738 				 c;					/* Current character */
2739 
2740 	/* Get the arguments and check for errors */
2741 	ZEND_PARSE_PARAMETERS_START(1, 2)
2742 		Z_PARAM_STR(str)
2743 		Z_PARAM_OPTIONAL
2744 		Z_PARAM_STR_EX(delim, 1, 0)
2745 	ZEND_PARSE_PARAMETERS_END();
2746 
2747 	/* Nothing to do if we got an empty string */
2748 	if (ZSTR_LEN(str) == 0) {
2749 		RETURN_EMPTY_STRING();
2750 	}
2751 
2752 	in_str = ZSTR_VAL(str);
2753 	in_str_end = in_str + ZSTR_LEN(str);
2754 
2755 	if (delim) {
2756 		delim_char = ZSTR_VAL(delim)[0];
2757 	}
2758 
2759 	/* Go through the string and quote necessary characters */
2760 	extra_len = 0;
2761 	p = in_str;
2762 	do {
2763 		c = *p;
2764 		switch(c) {
2765 			case '.':
2766 			case '\\':
2767 			case '+':
2768 			case '*':
2769 			case '?':
2770 			case '[':
2771 			case '^':
2772 			case ']':
2773 			case '$':
2774 			case '(':
2775 			case ')':
2776 			case '{':
2777 			case '}':
2778 			case '=':
2779 			case '!':
2780 			case '>':
2781 			case '<':
2782 			case '|':
2783 			case ':':
2784 			case '-':
2785 			case '#':
2786 				extra_len++;
2787 				break;
2788 
2789 			case '\0':
2790 				extra_len+=3;
2791 				break;
2792 
2793 			default:
2794 				if (c == delim_char) {
2795 					extra_len++;
2796 				}
2797 				break;
2798 		}
2799 		p++;
2800 	} while (p != in_str_end);
2801 
2802 	if (extra_len == 0) {
2803 		RETURN_STR_COPY(str);
2804 	}
2805 
2806 	/* Allocate enough memory so that even if each character
2807 	   is quoted, we won't run out of room */
2808 	out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2809 	q = ZSTR_VAL(out_str);
2810 	p = in_str;
2811 
2812 	do {
2813 		c = *p;
2814 		switch(c) {
2815 			case '.':
2816 			case '\\':
2817 			case '+':
2818 			case '*':
2819 			case '?':
2820 			case '[':
2821 			case '^':
2822 			case ']':
2823 			case '$':
2824 			case '(':
2825 			case ')':
2826 			case '{':
2827 			case '}':
2828 			case '=':
2829 			case '!':
2830 			case '>':
2831 			case '<':
2832 			case '|':
2833 			case ':':
2834 			case '-':
2835 			case '#':
2836 				*q++ = '\\';
2837 				*q++ = c;
2838 				break;
2839 
2840 			case '\0':
2841 				*q++ = '\\';
2842 				*q++ = '0';
2843 				*q++ = '0';
2844 				*q++ = '0';
2845 				break;
2846 
2847 			default:
2848 				if (c == delim_char) {
2849 					*q++ = '\\';
2850 				}
2851 				*q++ = c;
2852 				break;
2853 		}
2854 		p++;
2855 	} while (p != in_str_end);
2856 	*q = '\0';
2857 
2858 	RETURN_NEW_STR(out_str);
2859 }
2860 /* }}} */
2861 
2862 /* {{{ Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)2863 PHP_FUNCTION(preg_grep)
2864 {
2865 	zend_string			*regex;			/* Regular expression */
2866 	zval				*input;			/* Input array */
2867 	zend_long			 flags = 0;		/* Match control flags */
2868 	pcre_cache_entry	*pce;			/* Compiled regular expression */
2869 
2870 	/* Get arguments and do error checking */
2871 	ZEND_PARSE_PARAMETERS_START(2, 3)
2872 		Z_PARAM_STR(regex)
2873 		Z_PARAM_ARRAY(input)
2874 		Z_PARAM_OPTIONAL
2875 		Z_PARAM_LONG(flags)
2876 	ZEND_PARSE_PARAMETERS_END();
2877 
2878 	/* Compile regex or get it from cache. */
2879 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2880 		RETURN_FALSE;
2881 	}
2882 
2883 	pce->refcount++;
2884 	php_pcre_grep_impl(pce, input, return_value, flags);
2885 	pce->refcount--;
2886 }
2887 /* }}} */
2888 
php_pcre_grep_impl(pcre_cache_entry * pce,zval * input,zval * return_value,zend_long flags)2889 PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2890 {
2891 	zval            *entry;             /* An entry in the input array */
2892 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
2893 	int				 count;				/* Count of matched subpatterns */
2894 	uint32_t		 options;			/* Execution options */
2895 	zend_string		*string_key;
2896 	zend_ulong		 num_key;
2897 	zend_bool		 invert;			/* Whether to return non-matching
2898 										   entries */
2899 	pcre2_match_data *match_data;
2900 	invert = flags & PREG_GREP_INVERT ? 1 : 0;
2901 
2902 	/* Calculate the size of the offsets array, and allocate memory for it. */
2903 	num_subpats = pce->capture_count + 1;
2904 
2905 	/* Initialize return array */
2906 	array_init(return_value);
2907 
2908 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2909 
2910 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2911 		match_data = mdata;
2912 	} else {
2913 		match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2914 		if (!match_data) {
2915 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2916 			return;
2917 		}
2918 	}
2919 
2920 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2921 
2922 	/* Go through the input array */
2923 	ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2924 		zend_string *tmp_subject_str;
2925 		zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2926 
2927 		/* Perform the match */
2928 #ifdef HAVE_PCRE_JIT_SUPPORT
2929 		if ((pce->preg_options & PREG_JIT) && options) {
2930 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2931 					PCRE2_NO_UTF_CHECK, match_data, mctx);
2932 		} else
2933 #endif
2934 		count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2935 				options, match_data, mctx);
2936 
2937 		/* If the entry fits our requirements */
2938 		if (count >= 0) {
2939 			/* Check for too many substrings condition. */
2940 			if (UNEXPECTED(count == 0)) {
2941 				php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2942 			}
2943 			if (!invert) {
2944 				Z_TRY_ADDREF_P(entry);
2945 
2946 				/* Add to return array */
2947 				if (string_key) {
2948 					zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2949 				} else {
2950 					zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2951 				}
2952 			}
2953 		} else if (count == PCRE2_ERROR_NOMATCH) {
2954 			if (invert) {
2955 				Z_TRY_ADDREF_P(entry);
2956 
2957 				/* Add to return array */
2958 				if (string_key) {
2959 					zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2960 				} else {
2961 					zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2962 				}
2963 			}
2964 		} else {
2965 			pcre_handle_exec_error(count);
2966 			zend_tmp_string_release(tmp_subject_str);
2967 			break;
2968 		}
2969 
2970 		zend_tmp_string_release(tmp_subject_str);
2971 	} ZEND_HASH_FOREACH_END();
2972 	if (match_data != mdata) {
2973 		pcre2_match_data_free(match_data);
2974 	}
2975 }
2976 /* }}} */
2977 
2978 /* {{{ Returns the error code of the last regexp execution. */
PHP_FUNCTION(preg_last_error)2979 PHP_FUNCTION(preg_last_error)
2980 {
2981 	ZEND_PARSE_PARAMETERS_NONE();
2982 
2983 	RETURN_LONG(PCRE_G(error_code));
2984 }
2985 /* }}} */
2986 
2987 /* {{{ Returns the error message of the last regexp execution. */
PHP_FUNCTION(preg_last_error_msg)2988 PHP_FUNCTION(preg_last_error_msg)
2989 {
2990     ZEND_PARSE_PARAMETERS_NONE();
2991 
2992     RETURN_STRING(php_pcre_get_error_msg(PCRE_G(error_code)));
2993 }
2994 /* }}} */
2995 
2996 /* {{{ module definition structures */
2997 
2998 zend_module_entry pcre_module_entry = {
2999 	STANDARD_MODULE_HEADER,
3000    "pcre",
3001 	ext_functions,
3002 	PHP_MINIT(pcre),
3003 	PHP_MSHUTDOWN(pcre),
3004 	PHP_RINIT(pcre),
3005 	PHP_RSHUTDOWN(pcre),
3006 	PHP_MINFO(pcre),
3007 	PHP_PCRE_VERSION,
3008 	PHP_MODULE_GLOBALS(pcre),
3009 	PHP_GINIT(pcre),
3010 	PHP_GSHUTDOWN(pcre),
3011 	NULL,
3012 	STANDARD_MODULE_PROPERTIES_EX
3013 };
3014 
3015 #ifdef COMPILE_DL_PCRE
ZEND_GET_MODULE(pcre)3016 ZEND_GET_MODULE(pcre)
3017 #endif
3018 
3019 /* }}} */
3020 
3021 PHPAPI pcre2_match_context *php_pcre_mctx(void)
3022 {/*{{{*/
3023 	return mctx;
3024 }/*}}}*/
3025 
php_pcre_gctx(void)3026 PHPAPI pcre2_general_context *php_pcre_gctx(void)
3027 {/*{{{*/
3028 	return gctx;
3029 }/*}}}*/
3030 
php_pcre_cctx(void)3031 PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3032 {/*{{{*/
3033 	return cctx;
3034 }/*}}}*/
3035 
php_pcre_pce_incref(pcre_cache_entry * pce)3036 PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3037 {/*{{{*/
3038 	assert(NULL != pce);
3039 	pce->refcount++;
3040 }/*}}}*/
3041 
php_pcre_pce_decref(pcre_cache_entry * pce)3042 PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3043 {/*{{{*/
3044 	assert(NULL != pce);
3045 	assert(0 != pce->refcount);
3046 	pce->refcount--;
3047 }/*}}}*/
3048 
php_pcre_pce_re(pcre_cache_entry * pce)3049 PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3050 {/*{{{*/
3051 	assert(NULL != pce);
3052 	return pce->re;
3053 }/*}}}*/
3054