1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Author: Andrei Zmievski <andrei@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #include "php.h"
18 #include "php_ini.h"
19 #include "php_pcre.h"
20 #include "ext/standard/info.h"
21 #include "ext/standard/basic_functions.h"
22 #include "zend_smart_str.h"
23 #include "SAPI.h"
24
25 #define PREG_PATTERN_ORDER 1
26 #define PREG_SET_ORDER 2
27 #define PREG_OFFSET_CAPTURE (1<<8)
28 #define PREG_UNMATCHED_AS_NULL (1<<9)
29
30 #define PREG_SPLIT_NO_EMPTY (1<<0)
31 #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
32 #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
33
34 #define PREG_GREP_INVERT (1<<0)
35
36 #define PREG_JIT (1<<3)
37
38 #define PCRE_CACHE_SIZE 4096
39
40 #ifdef HAVE_PCRE_JIT_SUPPORT
41 #define PHP_PCRE_JIT_SUPPORT 1
42 #else
43 #define PHP_PCRE_JIT_SUPPORT 0
44 #endif
45
46 char *php_pcre_version;
47
48 #include "php_pcre_arginfo.h"
49
50 struct _pcre_cache_entry {
51 pcre2_code *re;
52 /* Pointer is not NULL when there are named captures.
53 * Length is equal to capture_count + 1 to account for capture group 0. */
54 zend_string **subpats_table;
55 uint32_t preg_options;
56 uint32_t capture_count;
57 uint32_t compile_options;
58 uint32_t refcount;
59 };
60
61 PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
62
63 #ifdef HAVE_PCRE_JIT_SUPPORT
64 #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
65 #define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
66 ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
67 #endif
68 /* General context using (infallible) system allocator. */
69 ZEND_TLS pcre2_general_context *gctx = NULL;
70 /* These two are global per thread for now. Though it is possible to use these
71 per pattern. Either one can copy it and use in pce, or one does no global
72 contexts at all, but creates for every pce. */
73 ZEND_TLS pcre2_compile_context *cctx = NULL;
74 ZEND_TLS pcre2_match_context *mctx = NULL;
75 ZEND_TLS pcre2_match_data *mdata = NULL;
76 ZEND_TLS bool mdata_used = 0;
77 ZEND_TLS uint8_t pcre2_init_ok = 0;
78 #if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
79 static MUTEX_T pcre_mt = NULL;
80 #define php_pcre_mutex_alloc() \
81 if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
82 #define php_pcre_mutex_free() \
83 if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
84 #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
85 #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
86 #else
87 #define php_pcre_mutex_alloc()
88 #define php_pcre_mutex_free()
89 #define php_pcre_mutex_lock()
90 #define php_pcre_mutex_unlock()
91 #endif
92
93 ZEND_TLS HashTable char_tables;
94
95 static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats);
96
php_pcre_free_char_table(zval * data)97 static void php_pcre_free_char_table(zval *data)
98 {/*{{{*/
99 void *ptr = Z_PTR_P(data);
100 pefree(ptr, 1);
101 }/*}}}*/
102
pcre_handle_exec_error(int pcre_code)103 static void pcre_handle_exec_error(int pcre_code) /* {{{ */
104 {
105 int preg_code = 0;
106
107 switch (pcre_code) {
108 case PCRE2_ERROR_MATCHLIMIT:
109 preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
110 break;
111
112 case PCRE2_ERROR_RECURSIONLIMIT:
113 preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
114 break;
115
116 case PCRE2_ERROR_BADUTFOFFSET:
117 preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
118 break;
119
120 #ifdef HAVE_PCRE_JIT_SUPPORT
121 case PCRE2_ERROR_JIT_STACKLIMIT:
122 preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
123 break;
124 #endif
125
126 default:
127 if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
128 preg_code = PHP_PCRE_BAD_UTF8_ERROR;
129 } else {
130 preg_code = PHP_PCRE_INTERNAL_ERROR;
131 }
132 break;
133 }
134
135 PCRE_G(error_code) = preg_code;
136 }
137 /* }}} */
138
php_pcre_get_error_msg(php_pcre_error_code error_code)139 static const char *php_pcre_get_error_msg(php_pcre_error_code error_code) /* {{{ */
140 {
141 switch (error_code) {
142 case PHP_PCRE_NO_ERROR:
143 return "No error";
144 case PHP_PCRE_INTERNAL_ERROR:
145 return "Internal error";
146 case PHP_PCRE_BAD_UTF8_ERROR:
147 return "Malformed UTF-8 characters, possibly incorrectly encoded";
148 case PHP_PCRE_BAD_UTF8_OFFSET_ERROR:
149 return "The offset did not correspond to the beginning of a valid UTF-8 code point";
150 case PHP_PCRE_BACKTRACK_LIMIT_ERROR:
151 return "Backtrack limit exhausted";
152 case PHP_PCRE_RECURSION_LIMIT_ERROR:
153 return "Recursion limit exhausted";
154
155 #ifdef HAVE_PCRE_JIT_SUPPORT
156 case PHP_PCRE_JIT_STACKLIMIT_ERROR:
157 return "JIT stack limit exhausted";
158 #endif
159
160 default:
161 return "Unknown error";
162 }
163 }
164 /* }}} */
165
php_free_pcre_cache(zval * data)166 static void php_free_pcre_cache(zval *data) /* {{{ */
167 {
168 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
169 if (!pce) return;
170 if (pce->subpats_table) {
171 free_subpats_table(pce->subpats_table, pce->capture_count + 1);
172 }
173 pcre2_code_free(pce->re);
174 free(pce);
175 }
176 /* }}} */
177
php_pcre_malloc(PCRE2_SIZE size,void * data)178 static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
179 {
180 return pemalloc(size, 1);
181 }
182
php_pcre_free(void * block,void * data)183 static void php_pcre_free(void *block, void *data)
184 {
185 pefree(block, 1);
186 }
187
php_pcre_emalloc(PCRE2_SIZE size,void * data)188 static void *php_pcre_emalloc(PCRE2_SIZE size, void *data)
189 {
190 return emalloc(size);
191 }
192
php_pcre_efree(void * block,void * data)193 static void php_pcre_efree(void *block, void *data)
194 {
195 efree(block);
196 }
197
198 #ifdef PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
199 /* pcre 10.38 needs PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, disabled by default */
200 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
201 #else
202 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS 0
203 #endif
204
205 #define PHP_PCRE_PREALLOC_MDATA_SIZE 32
206
php_pcre_init_pcre2(uint8_t jit)207 static void php_pcre_init_pcre2(uint8_t jit)
208 {/*{{{*/
209 if (!gctx) {
210 gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
211 if (!gctx) {
212 pcre2_init_ok = 0;
213 return;
214 }
215 }
216
217 if (!cctx) {
218 cctx = pcre2_compile_context_create(gctx);
219 if (!cctx) {
220 pcre2_init_ok = 0;
221 return;
222 }
223 }
224
225 pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
226
227 if (!mctx) {
228 mctx = pcre2_match_context_create(gctx);
229 if (!mctx) {
230 pcre2_init_ok = 0;
231 return;
232 }
233 }
234
235 #ifdef HAVE_PCRE_JIT_SUPPORT
236 if (jit && !jit_stack) {
237 jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
238 if (!jit_stack) {
239 pcre2_init_ok = 0;
240 return;
241 }
242 }
243 #endif
244
245 if (!mdata) {
246 mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
247 if (!mdata) {
248 pcre2_init_ok = 0;
249 return;
250 }
251 }
252
253 pcre2_init_ok = 1;
254 }/*}}}*/
255
php_pcre_shutdown_pcre2(void)256 static void php_pcre_shutdown_pcre2(void)
257 {/*{{{*/
258 if (gctx) {
259 pcre2_general_context_free(gctx);
260 gctx = NULL;
261 }
262
263 if (cctx) {
264 pcre2_compile_context_free(cctx);
265 cctx = NULL;
266 }
267
268 if (mctx) {
269 pcre2_match_context_free(mctx);
270 mctx = NULL;
271 }
272
273 #ifdef HAVE_PCRE_JIT_SUPPORT
274 /* Stack may only be destroyed when no cached patterns
275 possibly associated with it do exist. */
276 if (jit_stack) {
277 pcre2_jit_stack_free(jit_stack);
278 jit_stack = NULL;
279 }
280 #endif
281
282 if (mdata) {
283 pcre2_match_data_free(mdata);
284 mdata = NULL;
285 }
286
287 pcre2_init_ok = 0;
288 }/*}}}*/
289
PHP_GINIT_FUNCTION(pcre)290 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
291 {
292 php_pcre_mutex_alloc();
293
294 zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
295
296 pcre_globals->backtrack_limit = 0;
297 pcre_globals->recursion_limit = 0;
298 pcre_globals->error_code = PHP_PCRE_NO_ERROR;
299 ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
300 ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
301 #ifdef HAVE_PCRE_JIT_SUPPORT
302 pcre_globals->jit = 1;
303 #endif
304
305 php_pcre_init_pcre2(1);
306 zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
307 }
308 /* }}} */
309
PHP_GSHUTDOWN_FUNCTION(pcre)310 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
311 {
312 zend_hash_destroy(&pcre_globals->pcre_cache);
313
314 php_pcre_shutdown_pcre2();
315 zend_hash_destroy(&char_tables);
316 php_pcre_mutex_free();
317 }
318 /* }}} */
319
PHP_INI_MH(OnUpdateBacktrackLimit)320 static PHP_INI_MH(OnUpdateBacktrackLimit)
321 {/*{{{*/
322 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
323 if (mctx) {
324 pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
325 }
326
327 return SUCCESS;
328 }/*}}}*/
329
PHP_INI_MH(OnUpdateRecursionLimit)330 static PHP_INI_MH(OnUpdateRecursionLimit)
331 {/*{{{*/
332 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
333 if (mctx) {
334 pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
335 }
336
337 return SUCCESS;
338 }/*}}}*/
339
340 #ifdef HAVE_PCRE_JIT_SUPPORT
PHP_INI_MH(OnUpdateJit)341 static PHP_INI_MH(OnUpdateJit)
342 {/*{{{*/
343 OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
344 if (PCRE_G(jit) && jit_stack) {
345 pcre2_jit_stack_assign(mctx, NULL, jit_stack);
346 } else {
347 pcre2_jit_stack_assign(mctx, NULL, NULL);
348 }
349
350 return SUCCESS;
351 }/*}}}*/
352 #endif
353
354 PHP_INI_BEGIN()
355 STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
356 STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
357 #ifdef HAVE_PCRE_JIT_SUPPORT
358 STD_PHP_INI_BOOLEAN("pcre.jit", "1", PHP_INI_ALL, OnUpdateJit, jit, zend_pcre_globals, pcre_globals)
359 #endif
PHP_INI_END()360 PHP_INI_END()
361
362 static char *_pcre2_config_str(uint32_t what)
363 {/*{{{*/
364 int len = pcre2_config(what, NULL);
365 char *ret = (char *) malloc(len + 1);
366
367 len = pcre2_config(what, ret);
368 if (!len) {
369 free(ret);
370 return NULL;
371 }
372
373 return ret;
374 }/*}}}*/
375
376 /* {{{ PHP_MINFO_FUNCTION(pcre) */
PHP_MINFO_FUNCTION(pcre)377 static PHP_MINFO_FUNCTION(pcre)
378 {
379 #ifdef HAVE_PCRE_JIT_SUPPORT
380 uint32_t flag = 0;
381 char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
382 #endif
383 char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
384 char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
385
386 php_info_print_table_start();
387 php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
388 php_info_print_table_row(2, "PCRE Library Version", version);
389 free(version);
390 php_info_print_table_row(2, "PCRE Unicode Version", unicode);
391 free(unicode);
392
393 #ifdef HAVE_PCRE_JIT_SUPPORT
394 if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
395 php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
396 } else {
397 php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
398 }
399 if (jit_target) {
400 php_info_print_table_row(2, "PCRE JIT Target", jit_target);
401 }
402 free(jit_target);
403 #else
404 php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
405 #endif
406
407 #ifdef HAVE_PCRE_VALGRIND_SUPPORT
408 php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
409 #endif
410
411 php_info_print_table_end();
412
413 DISPLAY_INI_ENTRIES();
414 }
415 /* }}} */
416
417 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTION(pcre)418 static PHP_MINIT_FUNCTION(pcre)
419 {
420 #ifdef HAVE_PCRE_JIT_SUPPORT
421 if (UNEXPECTED(!pcre2_init_ok)) {
422 /* Retry. */
423 php_pcre_init_pcre2(PCRE_G(jit));
424 if (!pcre2_init_ok) {
425 return FAILURE;
426 }
427 }
428 #endif
429
430 REGISTER_INI_ENTRIES();
431
432 php_pcre_version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
433
434 register_php_pcre_symbols(module_number);
435
436 return SUCCESS;
437 }
438 /* }}} */
439
440 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTION(pcre)441 static PHP_MSHUTDOWN_FUNCTION(pcre)
442 {
443 UNREGISTER_INI_ENTRIES();
444
445 free(php_pcre_version);
446
447 return SUCCESS;
448 }
449 /* }}} */
450
451 /* {{{ PHP_RINIT_FUNCTION(pcre) */
PHP_RINIT_FUNCTION(pcre)452 static PHP_RINIT_FUNCTION(pcre)
453 {
454 #ifdef HAVE_PCRE_JIT_SUPPORT
455 if (UNEXPECTED(!pcre2_init_ok)) {
456 /* Retry. */
457 php_pcre_mutex_lock();
458 php_pcre_init_pcre2(PCRE_G(jit));
459 if (!pcre2_init_ok) {
460 php_pcre_mutex_unlock();
461 return FAILURE;
462 }
463 php_pcre_mutex_unlock();
464 }
465
466 mdata_used = 0;
467 #endif
468
469 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
470 PCRE_G(gctx_zmm) = pcre2_general_context_create(php_pcre_emalloc, php_pcre_efree, NULL);
471 if (!PCRE_G(gctx_zmm)) {
472 return FAILURE;
473 }
474
475 return SUCCESS;
476 }
477 /* }}} */
478
PHP_RSHUTDOWN_FUNCTION(pcre)479 static PHP_RSHUTDOWN_FUNCTION(pcre)
480 {
481 pcre2_general_context_free(PCRE_G(gctx_zmm));
482 PCRE_G(gctx_zmm) = NULL;
483
484 zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
485 zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
486 ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
487 ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
488 return SUCCESS;
489 }
490
491 /* {{{ static pcre_clean_cache */
pcre_clean_cache(zval * data,void * arg)492 static int pcre_clean_cache(zval *data, void *arg)
493 {
494 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
495 int *num_clean = (int *)arg;
496
497 if (!pce->refcount) {
498 if (--(*num_clean) == 0) {
499 return ZEND_HASH_APPLY_REMOVE|ZEND_HASH_APPLY_STOP;
500 }
501 return ZEND_HASH_APPLY_REMOVE;
502 } else {
503 return ZEND_HASH_APPLY_KEEP;
504 }
505 }
506 /* }}} */
507
free_subpats_table(zend_string ** subpat_names,uint32_t num_subpats)508 static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
509 uint32_t i;
510 for (i = 0; i < num_subpats; i++) {
511 if (subpat_names[i]) {
512 zend_string_release_ex(subpat_names[i], true);
513 }
514 }
515 pefree(subpat_names, true);
516 }
517
518 /* {{{ static make_subpats_table */
make_subpats_table(uint32_t name_cnt,pcre_cache_entry * pce)519 static zend_string **make_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce)
520 {
521 uint32_t num_subpats = pce->capture_count + 1;
522 uint32_t name_size, ni = 0;
523 char *name_table;
524 zend_string **subpat_names;
525 int rc1, rc2;
526
527 rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
528 rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
529 if (rc1 < 0 || rc2 < 0) {
530 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
531 return NULL;
532 }
533
534 subpat_names = pecalloc(num_subpats, sizeof(zend_string *), true);
535 while (ni++ < name_cnt) {
536 unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
537 const char *name = name_table + 2;
538 /* Note: this makes a persistent string when the cache is not request-based because the string
539 * has to outlive the request. In that case, they will only be used within this thread
540 * and never be shared.
541 * Although we will be storing them in user-exposed arrays, they cannot cause problems
542 * because they only live in this thread and the last reference is deleted on shutdown
543 * instead of by user code. */
544 subpat_names[name_idx] = zend_string_init(name, strlen(name), true);
545 GC_MAKE_PERSISTENT_LOCAL(subpat_names[name_idx]);
546 name_table += name_size;
547 }
548 return subpat_names;
549 }
550 /* }}} */
551
552 /* {{{ static calculate_unit_length */
553 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
calculate_unit_length(pcre_cache_entry * pce,const char * start)554 static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
555 {
556 size_t unit_len;
557
558 if (pce->compile_options & PCRE2_UTF) {
559 const char *end = start;
560
561 /* skip continuation bytes */
562 while ((*++end & 0xC0) == 0x80);
563 unit_len = end - start;
564 } else {
565 unit_len = 1;
566 }
567 return unit_len;
568 }
569 /* }}} */
570
571 /* {{{ pcre_get_compiled_regex_cache */
pcre_get_compiled_regex_cache_ex(zend_string * regex,bool locale_aware)572 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bool locale_aware)
573 {
574 pcre2_code *re = NULL;
575 #if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR && !defined(HAVE_BUNDLED_PCRE)
576 uint32_t coptions = PCRE2_NO_START_OPTIMIZE;
577 #else
578 uint32_t coptions = 0;
579 #endif
580 uint32_t eoptions = PHP_PCRE_DEFAULT_EXTRA_COPTIONS;
581 PCRE2_UCHAR error[128];
582 PCRE2_SIZE erroffset;
583 int errnumber;
584 char delimiter;
585 char start_delimiter;
586 char end_delimiter;
587 char *p, *pp;
588 char *pattern;
589 size_t pattern_len;
590 uint32_t poptions = 0;
591 const uint8_t *tables = NULL;
592 zval *zv;
593 pcre_cache_entry new_entry;
594 int rc;
595 zend_string *key;
596 pcre_cache_entry *ret;
597
598 if (locale_aware && BG(ctype_string)) {
599 key = zend_string_concat2(
600 ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)),
601 ZSTR_VAL(regex), ZSTR_LEN(regex));
602 } else {
603 key = regex;
604 }
605
606 /* Try to lookup the cached regex entry, and if successful, just pass
607 back the compiled pattern, otherwise go on and compile it. */
608 zv = zend_hash_find(&PCRE_G(pcre_cache), key);
609 if (zv) {
610 if (key != regex) {
611 zend_string_release_ex(key, 0);
612 }
613 return (pcre_cache_entry*)Z_PTR_P(zv);
614 }
615
616 p = ZSTR_VAL(regex);
617 const char* end_p = ZSTR_VAL(regex) + ZSTR_LEN(regex);
618
619 /* Parse through the leading whitespace, and display a warning if we
620 get to the end without encountering a delimiter. */
621 while (isspace((int)*(unsigned char *)p)) p++;
622 if (p >= end_p) {
623 if (key != regex) {
624 zend_string_release_ex(key, 0);
625 }
626 php_error_docref(NULL, E_WARNING, "Empty regular expression");
627 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
628 return NULL;
629 }
630
631 /* Get the delimiter and display a warning if it is alphanumeric
632 or a backslash. */
633 delimiter = *p++;
634 if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\' || delimiter == '\0') {
635 if (key != regex) {
636 zend_string_release_ex(key, 0);
637 }
638 php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric, backslash, or NUL byte");
639 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
640 return NULL;
641 }
642
643 start_delimiter = delimiter;
644 if ((pp = strchr("([{< )]}> )]}>", delimiter)))
645 delimiter = pp[5];
646 end_delimiter = delimiter;
647
648 pp = p;
649
650 if (start_delimiter == end_delimiter) {
651 /* We need to iterate through the pattern, searching for the ending delimiter,
652 but skipping the backslashed delimiters. If the ending delimiter is not
653 found, display a warning. */
654 while (pp < end_p) {
655 if (*pp == '\\' && pp + 1 < end_p) pp++;
656 else if (*pp == delimiter)
657 break;
658 pp++;
659 }
660 } else {
661 /* We iterate through the pattern, searching for the matching ending
662 * delimiter. For each matching starting delimiter, we increment nesting
663 * level, and decrement it for each matching ending delimiter. If we
664 * reach the end of the pattern without matching, display a warning.
665 */
666 int brackets = 1; /* brackets nesting level */
667 while (pp < end_p) {
668 if (*pp == '\\' && pp + 1 < end_p) pp++;
669 else if (*pp == end_delimiter && --brackets <= 0)
670 break;
671 else if (*pp == start_delimiter)
672 brackets++;
673 pp++;
674 }
675 }
676
677 if (pp >= end_p) {
678 if (key != regex) {
679 zend_string_release_ex(key, 0);
680 }
681 if (start_delimiter == end_delimiter) {
682 php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
683 } else {
684 php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
685 }
686 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
687 return NULL;
688 }
689
690 /* Make a copy of the actual pattern. */
691 pattern_len = pp - p;
692 pattern = estrndup(p, pattern_len);
693
694 /* Move on to the options */
695 pp++;
696
697 /* Parse through the options, setting appropriate flags. Display
698 a warning if we encounter an unknown modifier. */
699 while (pp < end_p) {
700 switch (*pp++) {
701 /* Perl compatible options */
702 case 'i': coptions |= PCRE2_CASELESS; break;
703 case 'm': coptions |= PCRE2_MULTILINE; break;
704 case 'n': coptions |= PCRE2_NO_AUTO_CAPTURE; break;
705 case 's': coptions |= PCRE2_DOTALL; break;
706 case 'x': coptions |= PCRE2_EXTENDED; break;
707
708 /* PCRE specific options */
709 case 'A': coptions |= PCRE2_ANCHORED; break;
710 case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
711 #ifdef PCRE2_EXTRA_CASELESS_RESTRICT
712 case 'r': eoptions |= PCRE2_EXTRA_CASELESS_RESTRICT; break;
713 #endif
714 case 'S': /* Pass. */ break;
715 case 'X': /* Pass. */ break;
716 case 'U': coptions |= PCRE2_UNGREEDY; break;
717 case 'u': coptions |= PCRE2_UTF;
718 /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
719 characters, even in UTF-8 mode. However, this can be changed by setting
720 the PCRE2_UCP option. */
721 #ifdef PCRE2_UCP
722 coptions |= PCRE2_UCP;
723 #endif
724 break;
725 case 'J': coptions |= PCRE2_DUPNAMES; break;
726
727 case ' ':
728 case '\n':
729 case '\r':
730 break;
731
732 case 'e': /* legacy eval */
733 default:
734 if (pp[-1]) {
735 php_error_docref(NULL, E_WARNING, "Unknown modifier '%c'", pp[-1]);
736 } else {
737 php_error_docref(NULL, E_WARNING, "NUL byte is not a valid modifier");
738 }
739 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
740 efree(pattern);
741 if (key != regex) {
742 zend_string_release_ex(key, 0);
743 }
744 return NULL;
745 }
746 }
747
748 if (key != regex) {
749 tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(ctype_string));
750 if (!tables) {
751 zend_string *_k;
752 tables = pcre2_maketables(gctx);
753 if (UNEXPECTED(!tables)) {
754 php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
755 pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
756 zend_string_release_ex(key, 0);
757 efree(pattern);
758 return NULL;
759 }
760 _k = zend_string_init(ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)), 1);
761 GC_MAKE_PERSISTENT_LOCAL(_k);
762 zend_hash_add_ptr(&char_tables, _k, (void *)tables);
763 zend_string_release(_k);
764 }
765 }
766 pcre2_set_character_tables(cctx, tables);
767
768 pcre2_set_compile_extra_options(cctx, eoptions);
769
770 /* Compile pattern and display a warning if compilation failed. */
771 re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
772
773 if (re == NULL) {
774 if (key != regex) {
775 zend_string_release_ex(key, 0);
776 }
777 pcre2_get_error_message(errnumber, error, sizeof(error));
778 php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
779 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
780 efree(pattern);
781 return NULL;
782 }
783
784 #ifdef HAVE_PCRE_JIT_SUPPORT
785 if (PCRE_G(jit)) {
786 /* Enable PCRE JIT compiler */
787 rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
788 if (EXPECTED(rc >= 0)) {
789 size_t jit_size = 0;
790 if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
791 poptions |= PREG_JIT;
792 }
793 } else if (rc == PCRE2_ERROR_NOMEMORY) {
794 php_error_docref(NULL, E_WARNING,
795 "Allocation of JIT memory failed, PCRE JIT will be disabled. "
796 "This is likely caused by security restrictions. "
797 "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
798 PCRE_G(jit) = 0;
799 } else {
800 pcre2_get_error_message(rc, error, sizeof(error));
801 php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
802 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
803 }
804 }
805 #endif
806 efree(pattern);
807
808 /*
809 * If we reached cache limit, clean out the items from the head of the list;
810 * these are supposedly the oldest ones (but not necessarily the least used
811 * ones).
812 */
813 if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
814 int num_clean = PCRE_CACHE_SIZE / 8;
815 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
816 }
817
818 /* Store the compiled pattern and extra info in the cache. */
819 new_entry.re = re;
820 new_entry.preg_options = poptions;
821 new_entry.compile_options = coptions;
822 new_entry.refcount = 0;
823
824 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
825 if (rc < 0) {
826 if (key != regex) {
827 zend_string_release_ex(key, 0);
828 }
829 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
830 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
831 return NULL;
832 }
833
834 uint32_t name_count;
835 rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &name_count);
836 if (rc < 0) {
837 if (key != regex) {
838 zend_string_release_ex(key, 0);
839 }
840 php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
841 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
842 return NULL;
843 }
844
845 /* Compute and cache the subpattern table to avoid computing it again over and over. */
846 if (name_count > 0) {
847 new_entry.subpats_table = make_subpats_table(name_count, &new_entry);
848 if (!new_entry.subpats_table) {
849 if (key != regex) {
850 zend_string_release_ex(key, false);
851 }
852 /* Warning already emitted by make_subpats_table() */
853 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
854 return NULL;
855 }
856 } else {
857 new_entry.subpats_table = NULL;
858 }
859
860 /*
861 * Interned strings are not duplicated when stored in HashTable,
862 * but all the interned strings created during HTTP request are removed
863 * at end of request. However PCRE_G(pcre_cache) must be consistent
864 * on the next request as well. So we disable usage of interned strings
865 * as hash keys especually for this table.
866 * See bug #63180
867 */
868 if (!(GC_FLAGS(key) & IS_STR_PERMANENT)) {
869 zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
870 GC_MAKE_PERSISTENT_LOCAL(str);
871
872 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
873 zend_string_release(str);
874 } else {
875 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
876 }
877
878 if (key != regex) {
879 zend_string_release_ex(key, 0);
880 }
881
882 return ret;
883 }
884 /* }}} */
885
886 /* {{{ pcre_get_compiled_regex_cache */
pcre_get_compiled_regex_cache(zend_string * regex)887 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
888 {
889 return pcre_get_compiled_regex_cache_ex(regex, true);
890 }
891 /* }}} */
892
893 /* {{{ pcre_get_compiled_regex */
pcre_get_compiled_regex(zend_string * regex,uint32_t * capture_count)894 PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
895 {
896 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
897
898 if (capture_count) {
899 *capture_count = pce ? pce->capture_count : 0;
900 }
901
902 return pce ? pce->re : NULL;
903 }
904 /* }}} */
905
906 /* XXX For the cases where it's only about match yes/no and no capture
907 required, perhaps just a minimum sized data would suffice. */
php_pcre_create_match_data(uint32_t capture_count,pcre2_code * re)908 PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
909 {/*{{{*/
910
911 assert(NULL != re);
912
913 if (EXPECTED(!mdata_used)) {
914 int rc = 0;
915
916 if (!capture_count) {
917 /* As we deal with a non cached pattern, no other way to gather this info. */
918 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
919 }
920
921 if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
922 mdata_used = 1;
923 return mdata;
924 }
925 }
926
927 return pcre2_match_data_create_from_pattern(re, gctx);
928 }/*}}}*/
929
php_pcre_free_match_data(pcre2_match_data * match_data)930 PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
931 {/*{{{*/
932 if (UNEXPECTED(match_data != mdata)) {
933 pcre2_match_data_free(match_data);
934 } else {
935 mdata_used = 0;
936 }
937 }/*}}}*/
938
init_unmatched_null_pair(zval * pair)939 static void init_unmatched_null_pair(zval *pair) {
940 zval val1, val2;
941 ZVAL_NULL(&val1);
942 ZVAL_LONG(&val2, -1);
943 ZVAL_ARR(pair, zend_new_pair(&val1, &val2));
944 }
945
init_unmatched_empty_pair(zval * pair)946 static void init_unmatched_empty_pair(zval *pair) {
947 zval val1, val2;
948 ZVAL_EMPTY_STRING(&val1);
949 ZVAL_LONG(&val2, -1);
950 ZVAL_ARR(pair, zend_new_pair(&val1, &val2));
951 }
952
populate_match_value_str(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset)953 static zend_always_inline void populate_match_value_str(
954 zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
955 ZVAL_STRINGL_FAST(val, subject + start_offset, end_offset - start_offset);
956 }
957
populate_match_value(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,bool unmatched_as_null)958 static zend_always_inline void populate_match_value(
959 zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
960 bool unmatched_as_null) {
961 if (PCRE2_UNSET == start_offset) {
962 if (unmatched_as_null) {
963 ZVAL_NULL(val);
964 } else {
965 ZVAL_EMPTY_STRING(val);
966 }
967 } else {
968 populate_match_value_str(val, subject, start_offset, end_offset);
969 }
970 }
971
add_named(HashTable * const subpats,zend_string * name,zval * val,bool unmatched)972 static inline void add_named(
973 HashTable *const subpats, zend_string *name, zval *val, bool unmatched) {
974 /* If the DUPNAMES option is used, multiple subpatterns might have the same name.
975 * In this case we want to preserve the one that actually has a value. */
976 if (!unmatched) {
977 zend_hash_update(subpats, name, val);
978 } else {
979 if (!zend_hash_add(subpats, name, val)) {
980 return;
981 }
982 }
983 Z_TRY_ADDREF_P(val);
984 }
985
986 /* {{{ add_offset_pair */
add_offset_pair(HashTable * const result,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,zend_string * name,zend_long unmatched_as_null)987 static inline void add_offset_pair(
988 HashTable *const result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
989 zend_string *name, zend_long unmatched_as_null)
990 {
991 zval match_pair;
992
993 /* Add (match, offset) to the return value */
994 if (PCRE2_UNSET == start_offset) {
995 if (unmatched_as_null) {
996 do {
997 if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
998 if (UNEXPECTED(EG(flags) & EG_FLAGS_IN_SHUTDOWN)) {
999 init_unmatched_null_pair(&match_pair);
1000 break;
1001 } else {
1002 init_unmatched_null_pair(&PCRE_G(unmatched_null_pair));
1003 }
1004 }
1005 ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
1006 } while (0);
1007 } else {
1008 do {
1009 if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
1010 if (UNEXPECTED(EG(flags) & EG_FLAGS_IN_SHUTDOWN)) {
1011 init_unmatched_empty_pair(&match_pair);
1012 break;
1013 } else {
1014 init_unmatched_empty_pair(&PCRE_G(unmatched_empty_pair));
1015 }
1016 }
1017 ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
1018 } while (0);
1019 }
1020 } else {
1021 zval val1, val2;
1022 populate_match_value_str(&val1, subject, start_offset, end_offset);
1023 ZVAL_LONG(&val2, start_offset);
1024 ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1025 }
1026
1027 if (name) {
1028 add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1029 }
1030 zend_hash_next_index_insert_new(result, &match_pair);
1031 }
1032 /* }}} */
1033
populate_subpat_array(zval * subpats,const char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1034 static void populate_subpat_array(
1035 zval *subpats, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1036 uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1037 zend_long offset_capture = flags & PREG_OFFSET_CAPTURE;
1038 zend_long unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1039 zval val;
1040 int i;
1041 HashTable *subpats_ht = Z_ARRVAL_P(subpats);
1042 if (subpat_names) {
1043 if (offset_capture) {
1044 for (i = 0; i < count; i++) {
1045 add_offset_pair(
1046 subpats_ht, subject, offsets[2*i], offsets[2*i+1],
1047 subpat_names[i], unmatched_as_null);
1048 }
1049 if (unmatched_as_null) {
1050 for (i = count; i < num_subpats; i++) {
1051 add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1052 }
1053 }
1054 } else {
1055 for (i = 0; i < count; i++) {
1056 populate_match_value(
1057 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1058 if (subpat_names[i]) {
1059 add_named(subpats_ht, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1060 }
1061 zend_hash_next_index_insert_new(subpats_ht, &val);
1062 }
1063 if (unmatched_as_null) {
1064 for (i = count; i < num_subpats; i++) {
1065 ZVAL_NULL(&val);
1066 if (subpat_names[i]) {
1067 zend_hash_add(subpats_ht, subpat_names[i], &val);
1068 }
1069 zend_hash_next_index_insert_new(subpats_ht, &val);
1070 }
1071 }
1072 }
1073 } else {
1074 if (offset_capture) {
1075 for (i = 0; i < count; i++) {
1076 add_offset_pair(
1077 subpats_ht, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1078 }
1079 if (unmatched_as_null) {
1080 for (i = count; i < num_subpats; i++) {
1081 add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1082 }
1083 }
1084 } else {
1085 for (i = 0; i < count; i++) {
1086 populate_match_value(
1087 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1088 zend_hash_next_index_insert_new(subpats_ht, &val);
1089 }
1090 if (unmatched_as_null) {
1091 for (i = count; i < num_subpats; i++) {
1092 add_next_index_null(subpats);
1093 }
1094 }
1095 }
1096 }
1097 /* Add MARK, if available */
1098 if (mark) {
1099 add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
1100 }
1101 }
1102
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS,bool global)1103 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, bool global) /* {{{ */
1104 {
1105 /* parameters */
1106 zend_string *regex; /* Regular expression */
1107 zend_string *subject; /* String to match against */
1108 pcre_cache_entry *pce; /* Compiled regular expression */
1109 zval *subpats = NULL; /* Array for subpatterns */
1110 zend_long flags = 0; /* Match control flags */
1111 zend_long start_offset = 0; /* Where the new search starts */
1112
1113 ZEND_PARSE_PARAMETERS_START(2, 5)
1114 Z_PARAM_STR(regex)
1115 Z_PARAM_STR(subject)
1116 Z_PARAM_OPTIONAL
1117 Z_PARAM_ZVAL(subpats)
1118 Z_PARAM_LONG(flags)
1119 Z_PARAM_LONG(start_offset)
1120 ZEND_PARSE_PARAMETERS_END();
1121
1122 /* Compile regex or get it from cache. */
1123 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1124 RETURN_FALSE;
1125 }
1126
1127 if (start_offset == ZEND_LONG_MIN) {
1128 zend_argument_value_error(5, "must be greater than " ZEND_LONG_FMT, ZEND_LONG_MIN);
1129 RETURN_THROWS();
1130 }
1131
1132 pce->refcount++;
1133 php_pcre_match_impl(pce, subject, return_value, subpats,
1134 global, flags, start_offset);
1135 pce->refcount--;
1136 }
1137 /* }}} */
1138
is_known_valid_utf8(zend_string * subject_str,PCRE2_SIZE start_offset)1139 static zend_always_inline bool is_known_valid_utf8(
1140 zend_string *subject_str, PCRE2_SIZE start_offset) {
1141 if (!ZSTR_IS_VALID_UTF8(subject_str)) {
1142 /* We don't know whether the string is valid UTF-8 or not. */
1143 return 0;
1144 }
1145
1146 if (start_offset == ZSTR_LEN(subject_str)) {
1147 /* Degenerate case: Offset points to end of string. */
1148 return 1;
1149 }
1150
1151 /* Check that the offset does not point to an UTF-8 continuation byte. */
1152 return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1153 }
1154
1155 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zval * subpats,bool global,zend_long flags,zend_off_t start_offset)1156 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1157 zval *subpats, bool global, zend_long flags, zend_off_t start_offset)
1158 {
1159 zval result_set; /* Holds a set of subpatterns after
1160 a global match */
1161 HashTable **match_sets = NULL; /* An array of sets of matches for each
1162 subpattern after a global match */
1163 uint32_t options; /* Execution options */
1164 int count; /* Count of matched subpatterns */
1165 uint32_t num_subpats; /* Number of captured subpatterns */
1166 int matched; /* Has anything matched */
1167 zend_string **subpat_names; /* Array for named subpatterns */
1168 size_t i;
1169 uint32_t subpats_order; /* Order of subpattern matches */
1170 uint32_t offset_capture; /* Capture match offsets: yes/no */
1171 zend_long unmatched_as_null; /* Null non-matches: yes/no */
1172 PCRE2_SPTR mark = NULL; /* Target for MARK name */
1173 HashTable *marks = NULL; /* Array of marks for PREG_PATTERN_ORDER */
1174 pcre2_match_data *match_data;
1175 PCRE2_SIZE start_offset2, orig_start_offset;
1176
1177 char *subject = ZSTR_VAL(subject_str);
1178 size_t subject_len = ZSTR_LEN(subject_str);
1179
1180 /* Overwrite the passed-in value for subpatterns with an empty array. */
1181 if (subpats != NULL) {
1182 subpats = zend_try_array_init(subpats);
1183 if (!subpats) {
1184 RETURN_THROWS();
1185 }
1186 }
1187
1188 subpats_order = global ? PREG_PATTERN_ORDER : 0;
1189
1190 if (flags) {
1191 offset_capture = flags & PREG_OFFSET_CAPTURE;
1192 unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1193
1194 /*
1195 * subpats_order is pre-set to pattern mode so we change it only if
1196 * necessary.
1197 */
1198 if (flags & 0xff) {
1199 subpats_order = flags & 0xff;
1200 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1201 (!global && subpats_order != 0)) {
1202 zend_argument_value_error(4, "must be a PREG_* constant");
1203 RETURN_THROWS();
1204 }
1205 }
1206 } else {
1207 offset_capture = 0;
1208 unmatched_as_null = 0;
1209 }
1210
1211 /* Negative offset counts from the end of the string. */
1212 if (start_offset < 0) {
1213 if ((PCRE2_SIZE)-start_offset <= subject_len) {
1214 start_offset2 = subject_len + start_offset;
1215 } else {
1216 start_offset2 = 0;
1217 }
1218 } else {
1219 start_offset2 = (PCRE2_SIZE)start_offset;
1220 }
1221
1222 if (start_offset2 > subject_len) {
1223 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1224 RETURN_FALSE;
1225 }
1226
1227 /* Calculate the size of the offsets array, and allocate memory for it. */
1228 num_subpats = pce->capture_count + 1;
1229
1230 /*
1231 * Build a mapping from subpattern numbers to their names. We will
1232 * allocate the table only if there are any named subpatterns.
1233 */
1234 subpat_names = NULL;
1235 if (subpats) {
1236 subpat_names = pce->subpats_table;
1237 }
1238
1239 matched = 0;
1240 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1241
1242 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1243 match_data = mdata;
1244 } else {
1245 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1246 if (!match_data) {
1247 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1248 RETURN_FALSE;
1249 }
1250 }
1251
1252 /* Allocate match sets array and initialize the values. */
1253 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1254 match_sets = safe_emalloc(num_subpats, sizeof(HashTable *), 0);
1255 for (i=0; i<num_subpats; i++) {
1256 match_sets[i] = zend_new_array(0);
1257 }
1258 }
1259
1260 /* Array of subpattern offsets */
1261 PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1262
1263 orig_start_offset = start_offset2;
1264 options =
1265 (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1266 ? 0 : PCRE2_NO_UTF_CHECK;
1267
1268 /* Execute the regular expression. */
1269 #ifdef HAVE_PCRE_JIT_SUPPORT
1270 if ((pce->preg_options & PREG_JIT) && options) {
1271 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1272 PCRE2_NO_UTF_CHECK, match_data, mctx);
1273 } else
1274 #endif
1275 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1276 options, match_data, mctx);
1277
1278 while (1) {
1279 /* If something has matched */
1280 if (count >= 0) {
1281 /* Check for too many substrings condition. */
1282 if (UNEXPECTED(count == 0)) {
1283 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1284 count = num_subpats;
1285 }
1286
1287 matched:
1288 matched++;
1289
1290 /* If subpatterns array has been passed, fill it in with values. */
1291 if (subpats != NULL) {
1292 /* Try to get the list of substrings and display a warning if failed. */
1293 if (UNEXPECTED(offsets[1] < offsets[0])) {
1294 if (match_sets) efree(match_sets);
1295 php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1296 RETURN_FALSE;
1297 }
1298
1299 if (global) { /* global pattern matching */
1300 if (subpats_order == PREG_PATTERN_ORDER) {
1301 /* For each subpattern, insert it into the appropriate array. */
1302 if (offset_capture) {
1303 for (i = 0; i < count; i++) {
1304 add_offset_pair(
1305 match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1306 NULL, unmatched_as_null);
1307 }
1308 } else {
1309 for (i = 0; i < count; i++) {
1310 zval val;
1311 populate_match_value(
1312 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1313 zend_hash_next_index_insert_new(match_sets[i], &val);
1314 }
1315 }
1316 mark = pcre2_get_mark(match_data);
1317 /* Add MARK, if available */
1318 if (mark) {
1319 if (!marks) {
1320 marks = zend_new_array(0);
1321 }
1322 zval tmp;
1323 ZVAL_STRING(&tmp, (char *) mark);
1324 zend_hash_index_add_new(marks, matched - 1, &tmp);
1325 }
1326 /*
1327 * If the number of captured subpatterns on this run is
1328 * less than the total possible number, pad the result
1329 * arrays with NULLs or empty strings.
1330 */
1331 if (count < num_subpats) {
1332 for (int i = count; i < num_subpats; i++) {
1333 if (offset_capture) {
1334 add_offset_pair(
1335 match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1336 NULL, unmatched_as_null);
1337 } else if (unmatched_as_null) {
1338 zval tmp;
1339 ZVAL_NULL(&tmp);
1340 zend_hash_next_index_insert_new(match_sets[i], &tmp);
1341 } else {
1342 zval tmp;
1343 ZVAL_EMPTY_STRING(&tmp);
1344 zend_hash_next_index_insert_new(match_sets[i], &tmp);
1345 }
1346 }
1347 }
1348 } else {
1349 /* Allocate and populate the result set array */
1350 mark = pcre2_get_mark(match_data);
1351 array_init_size(&result_set, count + (mark ? 1 : 0));
1352 populate_subpat_array(
1353 &result_set, subject, offsets, subpat_names,
1354 num_subpats, count, mark, flags);
1355 /* And add it to the output array */
1356 zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &result_set);
1357 }
1358 } else { /* single pattern matching */
1359 /* For each subpattern, insert it into the subpatterns array. */
1360 mark = pcre2_get_mark(match_data);
1361 populate_subpat_array(
1362 subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1363 break;
1364 }
1365 }
1366
1367 /* Advance to the next piece. */
1368 start_offset2 = offsets[1];
1369
1370 /* If we have matched an empty string, mimic what Perl's /g options does.
1371 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1372 the match again at the same point. If this fails (picked up above) we
1373 advance to the next character. */
1374 if (start_offset2 == offsets[0]) {
1375 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1376 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1377 if (count >= 0) {
1378 if (global) {
1379 goto matched;
1380 } else {
1381 break;
1382 }
1383 } else if (count == PCRE2_ERROR_NOMATCH) {
1384 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1385 this is not necessarily the end. We need to advance
1386 the start offset, and continue. Fudge the offset values
1387 to achieve this, unless we're already at the end of the string. */
1388 if (start_offset2 < subject_len) {
1389 size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1390
1391 start_offset2 += unit_len;
1392 } else {
1393 break;
1394 }
1395 } else {
1396 goto error;
1397 }
1398 }
1399 } else if (count == PCRE2_ERROR_NOMATCH) {
1400 break;
1401 } else {
1402 error:
1403 pcre_handle_exec_error(count);
1404 break;
1405 }
1406
1407 if (!global) {
1408 break;
1409 }
1410
1411 /* Execute the regular expression. */
1412 #ifdef HAVE_PCRE_JIT_SUPPORT
1413 if ((pce->preg_options & PREG_JIT)) {
1414 if (start_offset2 > subject_len) {
1415 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1416 break;
1417 }
1418 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1419 PCRE2_NO_UTF_CHECK, match_data, mctx);
1420 } else
1421 #endif
1422 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1423 PCRE2_NO_UTF_CHECK, match_data, mctx);
1424 }
1425 if (match_data != mdata) {
1426 pcre2_match_data_free(match_data);
1427 }
1428
1429 /* Add the match sets to the output array and clean up */
1430 if (match_sets) {
1431 if (subpat_names) {
1432 for (i = 0; i < num_subpats; i++) {
1433 zval wrapper;
1434 ZVAL_ARR(&wrapper, match_sets[i]);
1435 if (subpat_names[i]) {
1436 zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &wrapper);
1437 GC_ADDREF(match_sets[i]);
1438 }
1439 zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1440 }
1441 } else {
1442 for (i = 0; i < num_subpats; i++) {
1443 zval wrapper;
1444 ZVAL_ARR(&wrapper, match_sets[i]);
1445 zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1446 }
1447 }
1448 efree(match_sets);
1449
1450 if (marks) {
1451 zval tmp;
1452 ZVAL_ARR(&tmp, marks);
1453 zend_hash_str_update(Z_ARRVAL_P(subpats), "MARK", sizeof("MARK") - 1, &tmp);
1454 }
1455 }
1456
1457 if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1458 /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1459 if ((pce->compile_options & PCRE2_UTF)
1460 && !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1461 GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1462 }
1463
1464 RETVAL_LONG(matched);
1465 } else {
1466 RETVAL_FALSE;
1467 }
1468 }
1469 /* }}} */
1470
1471 /* {{{ Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)1472 PHP_FUNCTION(preg_match)
1473 {
1474 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
1475 }
1476 /* }}} */
1477
1478 ZEND_FRAMELESS_FUNCTION(preg_match, 2)
1479 {
1480 zval regex_tmp, subject_tmp;
1481 zend_string *regex, *subject;
1482
1483 Z_FLF_PARAM_STR(1, regex, regex_tmp);
1484 Z_FLF_PARAM_STR(2, subject, subject_tmp);
1485
1486 /* Compile regex or get it from cache. */
1487 pcre_cache_entry *pce;
1488 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1489 RETURN_FALSE;
1490 }
1491
1492 pce->refcount++;
1493 php_pcre_match_impl(pce, subject, return_value, /* subpats */ NULL,
1494 /* global */ false, /* flags */ 0, /* start_offset */ 0);
1495 pce->refcount--;
1496
1497 flf_clean:
1498 Z_FLF_PARAM_FREE_STR(1, regex_tmp);
1499 Z_FLF_PARAM_FREE_STR(2, subject_tmp);
1500 }
1501
1502 /* {{{ Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)1503 PHP_FUNCTION(preg_match_all)
1504 {
1505 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
1506 }
1507 /* }}} */
1508
1509 /* {{{ preg_get_backref */
preg_get_backref(char ** str,int * backref)1510 static int preg_get_backref(char **str, int *backref)
1511 {
1512 char in_brace = 0;
1513 char *walk = *str;
1514
1515 if (walk[1] == 0)
1516 return 0;
1517
1518 if (*walk == '$' && walk[1] == '{') {
1519 in_brace = 1;
1520 walk++;
1521 }
1522 walk++;
1523
1524 if (*walk >= '0' && *walk <= '9') {
1525 *backref = *walk - '0';
1526 walk++;
1527 } else
1528 return 0;
1529
1530 if (*walk && *walk >= '0' && *walk <= '9') {
1531 *backref = *backref * 10 + *walk - '0';
1532 walk++;
1533 }
1534
1535 if (in_brace) {
1536 if (*walk != '}')
1537 return 0;
1538 else
1539 walk++;
1540 }
1541
1542 *str = walk;
1543 return 1;
1544 }
1545 /* }}} */
1546
1547 /* {{{ preg_do_repl_func */
preg_do_repl_func(zend_fcall_info * fci,zend_fcall_info_cache * fcc,const char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1548 static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1549 {
1550 zend_string *result_str;
1551 zval retval; /* Function return value */
1552 zval arg; /* Argument to pass to function */
1553
1554 array_init_size(&arg, count + (mark ? 1 : 0));
1555 populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1556
1557 fci->retval = &retval;
1558 fci->param_count = 1;
1559 fci->params = &arg;
1560
1561 if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1562 if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1563 result_str = Z_STR(retval);
1564 } else {
1565 result_str = zval_get_string_func(&retval);
1566 zval_ptr_dtor(&retval);
1567 }
1568 } else {
1569 if (!EG(exception)) {
1570 php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1571 }
1572
1573 result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1574 }
1575
1576 zval_ptr_dtor(&arg);
1577
1578 return result_str;
1579 }
1580 /* }}} */
1581
1582 /* {{{ php_pcre_replace */
php_pcre_replace(zend_string * regex,zend_string * subject_str,const char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1583 PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1584 zend_string *subject_str,
1585 const char *subject, size_t subject_len,
1586 zend_string *replace_str,
1587 size_t limit, size_t *replace_count)
1588 {
1589 pcre_cache_entry *pce; /* Compiled regular expression */
1590 zend_string *result; /* Function result */
1591
1592 /* Abort on pending exception, e.g. thrown from __toString(). */
1593 if (UNEXPECTED(EG(exception))) {
1594 return NULL;
1595 }
1596
1597 /* Compile regex or get it from cache. */
1598 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1599 return NULL;
1600 }
1601 pce->refcount++;
1602 result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1603 limit, replace_count);
1604 pce->refcount--;
1605
1606 return result;
1607 }
1608 /* }}} */
1609
1610 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry * pce,zend_string * subject_str,const char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1611 PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1612 {
1613 uint32_t options; /* Execution options */
1614 int count; /* Count of matched subpatterns */
1615 uint32_t num_subpats; /* Number of captured subpatterns */
1616 size_t new_len; /* Length of needed storage */
1617 size_t alloc_len; /* Actual allocated length */
1618 size_t match_len; /* Length of the current match */
1619 int backref; /* Backreference number */
1620 PCRE2_SIZE start_offset; /* Where the new search starts */
1621 size_t last_end_offset; /* Where the last search ended */
1622 char *walkbuf, /* Location of current replacement in the result */
1623 *walk, /* Used to walk the replacement string */
1624 walk_last; /* Last walked character */
1625 const char *match, /* The current match */
1626 *piece, /* The current piece of subject */
1627 *replace_end; /* End of replacement string */
1628 size_t result_len; /* Length of result */
1629 zend_string *result; /* Result of replacement */
1630 pcre2_match_data *match_data;
1631
1632 /* Calculate the size of the offsets array, and allocate memory for it. */
1633 num_subpats = pce->capture_count + 1;
1634 alloc_len = 0;
1635 result = NULL;
1636
1637 /* Initialize */
1638 match = NULL;
1639 start_offset = 0;
1640 last_end_offset = 0;
1641 result_len = 0;
1642 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1643
1644 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1645 match_data = mdata;
1646 } else {
1647 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1648 if (!match_data) {
1649 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1650 return NULL;
1651 }
1652 }
1653
1654 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1655
1656 /* Array of subpattern offsets */
1657 PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1658
1659 /* Execute the regular expression. */
1660 #ifdef HAVE_PCRE_JIT_SUPPORT
1661 if ((pce->preg_options & PREG_JIT) && options) {
1662 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1663 PCRE2_NO_UTF_CHECK, match_data, mctx);
1664 } else
1665 #endif
1666 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1667 options, match_data, mctx);
1668
1669 while (1) {
1670 piece = subject + last_end_offset;
1671
1672 if (count >= 0 && limit > 0) {
1673 bool simple_string;
1674
1675 /* Check for too many substrings condition. */
1676 if (UNEXPECTED(count == 0)) {
1677 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1678 count = num_subpats;
1679 }
1680
1681 matched:
1682 if (UNEXPECTED(offsets[1] < offsets[0])) {
1683 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1684 if (result) {
1685 zend_string_release_ex(result, 0);
1686 result = NULL;
1687 }
1688 break;
1689 }
1690
1691 if (replace_count) {
1692 ++*replace_count;
1693 }
1694
1695 /* Set the match location in subject */
1696 match = subject + offsets[0];
1697
1698 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1699
1700 walk = ZSTR_VAL(replace_str);
1701 replace_end = walk + ZSTR_LEN(replace_str);
1702 walk_last = 0;
1703 simple_string = 1;
1704 while (walk < replace_end) {
1705 if ('\\' == *walk || '$' == *walk) {
1706 simple_string = 0;
1707 if (walk_last == '\\') {
1708 walk++;
1709 walk_last = 0;
1710 continue;
1711 }
1712 if (preg_get_backref(&walk, &backref)) {
1713 if (backref < count)
1714 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1715 continue;
1716 }
1717 }
1718 new_len++;
1719 walk++;
1720 walk_last = walk[-1];
1721 }
1722
1723 if (new_len >= alloc_len) {
1724 alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1725 if (result == NULL) {
1726 result = zend_string_alloc(alloc_len, 0);
1727 } else {
1728 result = zend_string_extend(result, alloc_len, 0);
1729 }
1730 }
1731
1732 if (match-piece > 0) {
1733 /* copy the part of the string before the match */
1734 memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1735 result_len += (match-piece);
1736 }
1737
1738 if (simple_string) {
1739 /* copy replacement */
1740 memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1741 result_len += ZSTR_LEN(replace_str);
1742 } else {
1743 /* copy replacement and backrefs */
1744 walkbuf = ZSTR_VAL(result) + result_len;
1745
1746 walk = ZSTR_VAL(replace_str);
1747 walk_last = 0;
1748 while (walk < replace_end) {
1749 if ('\\' == *walk || '$' == *walk) {
1750 if (walk_last == '\\') {
1751 *(walkbuf-1) = *walk++;
1752 walk_last = 0;
1753 continue;
1754 }
1755 if (preg_get_backref(&walk, &backref)) {
1756 if (backref < count) {
1757 if (offsets[backref<<1] < SIZE_MAX) {
1758 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1759 walkbuf = zend_mempcpy(walkbuf, subject + offsets[backref << 1], match_len);
1760 }
1761 }
1762 continue;
1763 }
1764 }
1765 *walkbuf++ = *walk++;
1766 walk_last = walk[-1];
1767 }
1768 *walkbuf = '\0';
1769 /* increment the result length by how much we've added to the string */
1770 result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1771 }
1772
1773 limit--;
1774
1775 /* Advance to the next piece. */
1776 start_offset = last_end_offset = offsets[1];
1777
1778 /* If we have matched an empty string, mimic what Perl's /g options does.
1779 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1780 the match again at the same point. If this fails (picked up above) we
1781 advance to the next character. */
1782 if (start_offset == offsets[0]) {
1783 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1784 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1785
1786 piece = subject + start_offset;
1787 if (count >= 0 && limit > 0) {
1788 goto matched;
1789 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1790 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1791 this is not necessarily the end. We need to advance
1792 the start offset, and continue. Fudge the offset values
1793 to achieve this, unless we're already at the end of the string. */
1794 if (start_offset < subject_len) {
1795 size_t unit_len = calculate_unit_length(pce, piece);
1796 start_offset += unit_len;
1797 } else {
1798 goto not_matched;
1799 }
1800 } else {
1801 goto error;
1802 }
1803 }
1804
1805 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1806 not_matched:
1807 if (!result && subject_str) {
1808 result = zend_string_copy(subject_str);
1809 break;
1810 }
1811 /* now we know exactly how long it is */
1812 alloc_len = result_len + subject_len - last_end_offset;
1813 if (NULL != result) {
1814 result = zend_string_realloc(result, alloc_len, 0);
1815 } else {
1816 result = zend_string_alloc(alloc_len, 0);
1817 }
1818 /* stick that last bit of string on our output */
1819 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1820 result_len += subject_len - last_end_offset;
1821 ZSTR_VAL(result)[result_len] = '\0';
1822 ZSTR_LEN(result) = result_len;
1823 break;
1824 } else {
1825 error:
1826 pcre_handle_exec_error(count);
1827 if (result) {
1828 zend_string_release_ex(result, 0);
1829 result = NULL;
1830 }
1831 break;
1832 }
1833
1834 #ifdef HAVE_PCRE_JIT_SUPPORT
1835 if (pce->preg_options & PREG_JIT) {
1836 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1837 PCRE2_NO_UTF_CHECK, match_data, mctx);
1838 } else
1839 #endif
1840 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1841 PCRE2_NO_UTF_CHECK, match_data, mctx);
1842 }
1843 if (match_data != mdata) {
1844 pcre2_match_data_free(match_data);
1845 }
1846
1847 return result;
1848 }
1849 /* }}} */
1850
1851 /* {{{ php_pcre_replace_func_impl() */
php_pcre_replace_func_impl(pcre_cache_entry * pce,zend_string * subject_str,const char * subject,size_t subject_len,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)1852 static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)
1853 {
1854 uint32_t options; /* Execution options */
1855 int count; /* Count of matched subpatterns */
1856 zend_string **subpat_names; /* Array for named subpatterns */
1857 uint32_t num_subpats; /* Number of captured subpatterns */
1858 size_t new_len; /* Length of needed storage */
1859 size_t alloc_len; /* Actual allocated length */
1860 PCRE2_SIZE start_offset; /* Where the new search starts */
1861 size_t last_end_offset; /* Where the last search ended */
1862 const char *match, /* The current match */
1863 *piece; /* The current piece of subject */
1864 size_t result_len; /* Length of result */
1865 zend_string *result; /* Result of replacement */
1866 zend_string *eval_result; /* Result of custom function */
1867 pcre2_match_data *match_data;
1868 bool old_mdata_used;
1869
1870 /* Calculate the size of the offsets array, and allocate memory for it. */
1871 num_subpats = pce->capture_count + 1;
1872 subpat_names = pce->subpats_table;
1873
1874 alloc_len = 0;
1875 result = NULL;
1876
1877 /* Initialize */
1878 match = NULL;
1879 start_offset = 0;
1880 last_end_offset = 0;
1881 result_len = 0;
1882 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1883
1884 old_mdata_used = mdata_used;
1885 if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1886 mdata_used = 1;
1887 match_data = mdata;
1888 } else {
1889 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1890 if (!match_data) {
1891 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1892 mdata_used = old_mdata_used;
1893 return NULL;
1894 }
1895 }
1896
1897 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1898
1899 /* Array of subpattern offsets */
1900 PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1901
1902 /* Execute the regular expression. */
1903 #ifdef HAVE_PCRE_JIT_SUPPORT
1904 if ((pce->preg_options & PREG_JIT) && options) {
1905 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1906 PCRE2_NO_UTF_CHECK, match_data, mctx);
1907 } else
1908 #endif
1909 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1910 options, match_data, mctx);
1911
1912 while (1) {
1913 piece = subject + last_end_offset;
1914
1915 if (count >= 0 && limit) {
1916 /* Check for too many substrings condition. */
1917 if (UNEXPECTED(count == 0)) {
1918 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1919 count = num_subpats;
1920 }
1921
1922 matched:
1923 if (UNEXPECTED(offsets[1] < offsets[0])) {
1924 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1925 if (result) {
1926 zend_string_release_ex(result, 0);
1927 result = NULL;
1928 }
1929 break;
1930 }
1931
1932 if (replace_count) {
1933 ++*replace_count;
1934 }
1935
1936 /* Set the match location in subject */
1937 match = subject + offsets[0];
1938
1939 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1940
1941 /* Use custom function to get replacement string and its length. */
1942 eval_result = preg_do_repl_func(
1943 fci, fcc, subject, offsets, subpat_names, num_subpats, count,
1944 pcre2_get_mark(match_data), flags);
1945
1946 ZEND_ASSERT(eval_result);
1947 new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD;
1948 if (new_len >= alloc_len) {
1949 alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1950 if (result == NULL) {
1951 result = zend_string_alloc(alloc_len, 0);
1952 } else {
1953 result = zend_string_extend(result, alloc_len, 0);
1954 }
1955 }
1956
1957 if (match-piece > 0) {
1958 /* copy the part of the string before the match */
1959 memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1960 result_len += (match-piece);
1961 }
1962
1963 /* If using custom function, copy result to the buffer and clean up. */
1964 memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1965 result_len += ZSTR_LEN(eval_result);
1966 zend_string_release_ex(eval_result, 0);
1967
1968 limit--;
1969
1970 /* Advance to the next piece. */
1971 start_offset = last_end_offset = offsets[1];
1972
1973 /* If we have matched an empty string, mimic what Perl's /g options does.
1974 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1975 the match again at the same point. If this fails (picked up above) we
1976 advance to the next character. */
1977 if (start_offset == offsets[0]) {
1978 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1979 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1980
1981 piece = subject + start_offset;
1982 if (count >= 0 && limit) {
1983 goto matched;
1984 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1985 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1986 this is not necessarily the end. We need to advance
1987 the start offset, and continue. Fudge the offset values
1988 to achieve this, unless we're already at the end of the string. */
1989 if (start_offset < subject_len) {
1990 size_t unit_len = calculate_unit_length(pce, piece);
1991 start_offset += unit_len;
1992 } else {
1993 goto not_matched;
1994 }
1995 } else {
1996 goto error;
1997 }
1998 }
1999
2000 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2001 not_matched:
2002 if (!result && subject_str) {
2003 result = zend_string_copy(subject_str);
2004 break;
2005 }
2006 /* now we know exactly how long it is */
2007 alloc_len = result_len + subject_len - last_end_offset;
2008 if (NULL != result) {
2009 result = zend_string_realloc(result, alloc_len, 0);
2010 } else {
2011 result = zend_string_alloc(alloc_len, 0);
2012 }
2013 /* stick that last bit of string on our output */
2014 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
2015 result_len += subject_len - last_end_offset;
2016 ZSTR_VAL(result)[result_len] = '\0';
2017 ZSTR_LEN(result) = result_len;
2018 break;
2019 } else {
2020 error:
2021 pcre_handle_exec_error(count);
2022 if (result) {
2023 zend_string_release_ex(result, 0);
2024 result = NULL;
2025 }
2026 break;
2027 }
2028 #ifdef HAVE_PCRE_JIT_SUPPORT
2029 if ((pce->preg_options & PREG_JIT)) {
2030 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2031 PCRE2_NO_UTF_CHECK, match_data, mctx);
2032 } else
2033 #endif
2034 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2035 PCRE2_NO_UTF_CHECK, match_data, mctx);
2036 }
2037 if (match_data != mdata) {
2038 pcre2_match_data_free(match_data);
2039 }
2040 mdata_used = old_mdata_used;
2041
2042 return result;
2043 }
2044 /* }}} */
2045
2046 /* {{{ php_pcre_replace_func */
php_pcre_replace_func(zend_string * regex,zend_string * subject_str,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)2047 static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2048 zend_string *subject_str,
2049 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2050 size_t limit, size_t *replace_count, zend_long flags)
2051 {
2052 pcre_cache_entry *pce; /* Compiled regular expression */
2053 zend_string *result; /* Function result */
2054
2055 /* Compile regex or get it from cache. */
2056 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2057 return NULL;
2058 }
2059 pce->refcount++;
2060 result = php_pcre_replace_func_impl(
2061 pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
2062 limit, replace_count, flags);
2063 pce->refcount--;
2064
2065 return result;
2066 }
2067 /* }}} */
2068
2069 /* {{{ php_pcre_replace_array */
php_pcre_replace_array(HashTable * regex,zend_string * replace_str,HashTable * replace_ht,zend_string * subject_str,size_t limit,size_t * replace_count)2070 static zend_string *php_pcre_replace_array(HashTable *regex,
2071 zend_string *replace_str, HashTable *replace_ht,
2072 zend_string *subject_str, size_t limit, size_t *replace_count)
2073 {
2074 zval *regex_entry;
2075 zend_string *result;
2076
2077 zend_string_addref(subject_str);
2078
2079 if (replace_ht) {
2080 uint32_t replace_idx = 0;
2081
2082 /* For each entry in the regex array, get the entry */
2083 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2084 /* Make sure we're dealing with strings. */
2085 zend_string *tmp_regex_str;
2086 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2087 zend_string *replace_entry_str, *tmp_replace_entry_str;
2088 zval *zv;
2089
2090 /* Get current entry */
2091 while (1) {
2092 if (replace_idx == replace_ht->nNumUsed) {
2093 replace_entry_str = ZSTR_EMPTY_ALLOC();
2094 tmp_replace_entry_str = NULL;
2095 break;
2096 }
2097 zv = ZEND_HASH_ELEMENT(replace_ht, replace_idx);
2098 replace_idx++;
2099 if (Z_TYPE_P(zv) != IS_UNDEF) {
2100 replace_entry_str = zval_get_tmp_string(zv, &tmp_replace_entry_str);
2101 break;
2102 }
2103 }
2104
2105 /* Do the actual replacement and put the result back into subject_str
2106 for further replacements. */
2107 result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2108 ZSTR_LEN(subject_str), replace_entry_str, limit, replace_count);
2109 zend_tmp_string_release(tmp_replace_entry_str);
2110 zend_tmp_string_release(tmp_regex_str);
2111 zend_string_release_ex(subject_str, 0);
2112 subject_str = result;
2113 if (UNEXPECTED(result == NULL)) {
2114 break;
2115 }
2116 } ZEND_HASH_FOREACH_END();
2117
2118 } else {
2119 ZEND_ASSERT(replace_str != NULL);
2120
2121 /* For each entry in the regex array, get the entry */
2122 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2123 /* Make sure we're dealing with strings. */
2124 zend_string *tmp_regex_str;
2125 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2126
2127 /* Do the actual replacement and put the result back into subject_str
2128 for further replacements. */
2129 result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2130 ZSTR_LEN(subject_str), replace_str, limit, replace_count);
2131 zend_tmp_string_release(tmp_regex_str);
2132 zend_string_release_ex(subject_str, 0);
2133 subject_str = result;
2134
2135 if (UNEXPECTED(result == NULL)) {
2136 break;
2137 }
2138 } ZEND_HASH_FOREACH_END();
2139 }
2140
2141 return subject_str;
2142 }
2143 /* }}} */
2144
2145 /* {{{ php_replace_in_subject */
php_replace_in_subject(zend_string * regex_str,HashTable * regex_ht,zend_string * replace_str,HashTable * replace_ht,zend_string * subject,size_t limit,size_t * replace_count)2146 static zend_always_inline zend_string *php_replace_in_subject(
2147 zend_string *regex_str, HashTable *regex_ht,
2148 zend_string *replace_str, HashTable *replace_ht,
2149 zend_string *subject, size_t limit, size_t *replace_count)
2150 {
2151 zend_string *result;
2152
2153 if (regex_str) {
2154 ZEND_ASSERT(replace_str != NULL);
2155 result = php_pcre_replace(regex_str, subject, ZSTR_VAL(subject), ZSTR_LEN(subject),
2156 replace_str, limit, replace_count);
2157 } else {
2158 ZEND_ASSERT(regex_ht != NULL);
2159 result = php_pcre_replace_array(regex_ht, replace_str, replace_ht, subject,
2160 limit, replace_count);
2161 }
2162 return result;
2163 }
2164 /* }}} */
2165
2166 /* {{{ php_replace_in_subject_func */
php_replace_in_subject_func(zend_string * regex_str,HashTable * regex_ht,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zend_string * subject,size_t limit,size_t * replace_count,zend_long flags)2167 static zend_string *php_replace_in_subject_func(zend_string *regex_str, HashTable *regex_ht,
2168 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2169 zend_string *subject, size_t limit, size_t *replace_count, zend_long flags)
2170 {
2171 zend_string *result;
2172
2173 if (regex_str) {
2174 result = php_pcre_replace_func(
2175 regex_str, subject, fci, fcc, limit, replace_count, flags);
2176 return result;
2177 } else {
2178 /* If regex is an array */
2179 zval *regex_entry;
2180
2181 ZEND_ASSERT(regex_ht != NULL);
2182
2183 zend_string_addref(subject);
2184
2185 /* For each entry in the regex array, get the entry */
2186 ZEND_HASH_FOREACH_VAL(regex_ht, regex_entry) {
2187 /* Make sure we're dealing with strings. */
2188 zend_string *tmp_regex_entry_str;
2189 zend_string *regex_entry_str = zval_get_tmp_string(regex_entry, &tmp_regex_entry_str);
2190
2191 /* Do the actual replacement and put the result back into subject
2192 for further replacements. */
2193 result = php_pcre_replace_func(
2194 regex_entry_str, subject, fci, fcc, limit, replace_count, flags);
2195 zend_tmp_string_release(tmp_regex_entry_str);
2196 zend_string_release(subject);
2197 subject = result;
2198 if (UNEXPECTED(result == NULL)) {
2199 break;
2200 }
2201 } ZEND_HASH_FOREACH_END();
2202
2203 return subject;
2204 }
2205 }
2206 /* }}} */
2207
2208 /* {{{ preg_replace_func_impl */
preg_replace_func_impl(zval * return_value,zend_string * regex_str,HashTable * regex_ht,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zend_string * subject_str,HashTable * subject_ht,zend_long limit_val,zend_long flags)2209 static size_t preg_replace_func_impl(zval *return_value,
2210 zend_string *regex_str, HashTable *regex_ht,
2211 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2212 zend_string *subject_str, HashTable *subject_ht, zend_long limit_val, zend_long flags)
2213 {
2214 zend_string *result;
2215 size_t replace_count = 0;
2216
2217 if (subject_str) {
2218 result = php_replace_in_subject_func(
2219 regex_str, regex_ht, fci, fcc, subject_str, limit_val, &replace_count, flags);
2220 if (result != NULL) {
2221 RETVAL_STR(result);
2222 } else {
2223 RETVAL_NULL();
2224 }
2225 } else {
2226 /* if subject is an array */
2227 zval *subject_entry, zv;
2228 zend_string *string_key;
2229 zend_ulong num_key;
2230
2231 ZEND_ASSERT(subject_ht != NULL);
2232
2233 array_init_size(return_value, zend_hash_num_elements(subject_ht));
2234 HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2235
2236 /* For each subject entry, convert it to string, then perform replacement
2237 and add the result to the return_value array. */
2238 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2239 zend_string *tmp_subject_entry_str;
2240 zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2241
2242 result = php_replace_in_subject_func(
2243 regex_str, regex_ht, fci, fcc, subject_entry_str, limit_val, &replace_count, flags);
2244 if (result != NULL) {
2245 /* Add to return array */
2246 ZVAL_STR(&zv, result);
2247 if (string_key) {
2248 zend_hash_add_new(return_value_ht, string_key, &zv);
2249 } else {
2250 zend_hash_index_add_new(return_value_ht, num_key, &zv);
2251 }
2252 }
2253 zend_tmp_string_release(tmp_subject_entry_str);
2254 } ZEND_HASH_FOREACH_END();
2255 }
2256
2257 return replace_count;
2258 }
2259 /* }}} */
2260
_preg_replace_common(zval * return_value,HashTable * regex_ht,zend_string * regex_str,HashTable * replace_ht,zend_string * replace_str,HashTable * subject_ht,zend_string * subject_str,zend_long limit,zval * zcount,bool is_filter)2261 static void _preg_replace_common(
2262 zval *return_value,
2263 HashTable *regex_ht, zend_string *regex_str,
2264 HashTable *replace_ht, zend_string *replace_str,
2265 HashTable *subject_ht, zend_string *subject_str,
2266 zend_long limit,
2267 zval *zcount,
2268 bool is_filter
2269 ) {
2270 size_t replace_count = 0;
2271 zend_string *result;
2272 size_t old_replace_count;
2273
2274 /* If replace is an array then the regex argument needs to also be an array */
2275 if (replace_ht && !regex_ht) {
2276 zend_argument_type_error(1, "must be of type array when argument #2 ($replacement) is an array, string given");
2277 RETURN_THROWS();
2278 }
2279
2280 if (subject_str) {
2281 old_replace_count = replace_count;
2282 result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2283 subject_str, limit, &replace_count);
2284 if (result != NULL) {
2285 if (!is_filter || replace_count > old_replace_count) {
2286 RETVAL_STR(result);
2287 } else {
2288 zend_string_release_ex(result, 0);
2289 RETVAL_NULL();
2290 }
2291 } else {
2292 RETVAL_NULL();
2293 }
2294 } else {
2295 /* if subject is an array */
2296 zval *subject_entry, zv;
2297 zend_string *string_key;
2298 zend_ulong num_key;
2299
2300 ZEND_ASSERT(subject_ht != NULL);
2301
2302 array_init_size(return_value, zend_hash_num_elements(subject_ht));
2303 HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2304
2305 /* For each subject entry, convert it to string, then perform replacement
2306 and add the result to the return_value array. */
2307 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2308 old_replace_count = replace_count;
2309 zend_string *tmp_subject_entry_str;
2310 zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2311 result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2312 subject_entry_str, limit, &replace_count);
2313
2314 if (result != NULL) {
2315 if (!is_filter || replace_count > old_replace_count) {
2316 /* Add to return array */
2317 ZVAL_STR(&zv, result);
2318 if (string_key) {
2319 zend_hash_add_new(return_value_ht, string_key, &zv);
2320 } else {
2321 zend_hash_index_add_new(return_value_ht, num_key, &zv);
2322 }
2323 } else {
2324 zend_string_release_ex(result, 0);
2325 }
2326 }
2327 zend_tmp_string_release(tmp_subject_entry_str);
2328 } ZEND_HASH_FOREACH_END();
2329 }
2330
2331 if (zcount) {
2332 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2333 }
2334 }
2335
2336 /* {{{ preg_replace_common */
preg_replace_common(INTERNAL_FUNCTION_PARAMETERS,bool is_filter)2337 static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter)
2338 {
2339 zend_string *regex_str, *replace_str, *subject_str;
2340 HashTable *regex_ht, *replace_ht, *subject_ht;
2341 zend_long limit = -1;
2342 zval *zcount = NULL;
2343
2344 /* Get function parameters and do error-checking. */
2345 ZEND_PARSE_PARAMETERS_START(3, 5)
2346 Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2347 Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
2348 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2349 Z_PARAM_OPTIONAL
2350 Z_PARAM_LONG(limit)
2351 Z_PARAM_ZVAL(zcount)
2352 ZEND_PARSE_PARAMETERS_END();
2353
2354 _preg_replace_common(
2355 return_value,
2356 regex_ht, regex_str,
2357 replace_ht, replace_str,
2358 subject_ht, subject_str,
2359 limit, zcount, is_filter);
2360 }
2361 /* }}} */
2362
2363 /* {{{ Perform Perl-style regular expression replacement. */
PHP_FUNCTION(preg_replace)2364 PHP_FUNCTION(preg_replace)
2365 {
2366 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
2367 }
2368 /* }}} */
2369
2370 ZEND_FRAMELESS_FUNCTION(preg_replace, 3)
2371 {
2372 zend_string *regex_str, *replace_str, *subject_str;
2373 HashTable *regex_ht, *replace_ht, *subject_ht;
2374 zval regex_tmp, replace_tmp, subject_tmp;
2375
2376 Z_FLF_PARAM_ARRAY_HT_OR_STR(1, regex_ht, regex_str, regex_tmp);
2377 Z_FLF_PARAM_ARRAY_HT_OR_STR(2, replace_ht, replace_str, replace_tmp);
2378 Z_FLF_PARAM_ARRAY_HT_OR_STR(3, subject_ht, subject_str, subject_tmp);
2379
2380 _preg_replace_common(
2381 return_value,
2382 regex_ht, regex_str,
2383 replace_ht, replace_str,
2384 subject_ht, subject_str,
2385 /* limit */ -1, /* zcount */ NULL, /* is_filter */ false);
2386
2387 flf_clean:;
2388 Z_FLF_PARAM_FREE_STR(1, regex_tmp);
2389 Z_FLF_PARAM_FREE_STR(2, replace_tmp);
2390 Z_FLF_PARAM_FREE_STR(3, subject_tmp);
2391 }
2392
2393 /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback)2394 PHP_FUNCTION(preg_replace_callback)
2395 {
2396 zval *zcount = NULL;
2397 zend_string *regex_str;
2398 HashTable *regex_ht;
2399 zend_string *subject_str;
2400 HashTable *subject_ht;
2401 zend_long limit = -1, flags = 0;
2402 size_t replace_count;
2403 zend_fcall_info fci;
2404 zend_fcall_info_cache fcc;
2405
2406 /* Get function parameters and do error-checking. */
2407 ZEND_PARSE_PARAMETERS_START(3, 6)
2408 Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2409 Z_PARAM_FUNC(fci, fcc)
2410 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2411 Z_PARAM_OPTIONAL
2412 Z_PARAM_LONG(limit)
2413 Z_PARAM_ZVAL(zcount)
2414 Z_PARAM_LONG(flags)
2415 ZEND_PARSE_PARAMETERS_END();
2416
2417 replace_count = preg_replace_func_impl(return_value, regex_str, regex_ht,
2418 &fci, &fcc,
2419 subject_str, subject_ht, limit, flags);
2420 if (zcount) {
2421 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2422 }
2423 }
2424 /* }}} */
2425
2426 /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback_array)2427 PHP_FUNCTION(preg_replace_callback_array)
2428 {
2429 zval zv, *replace, *zcount = NULL;
2430 HashTable *pattern, *subject_ht;
2431 zend_string *subject_str, *str_idx_regex;
2432 zend_long limit = -1, flags = 0;
2433 size_t replace_count = 0;
2434 zend_fcall_info fci;
2435 zend_fcall_info_cache fcc;
2436
2437 /* Get function parameters and do error-checking. */
2438 ZEND_PARSE_PARAMETERS_START(2, 5)
2439 Z_PARAM_ARRAY_HT(pattern)
2440 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2441 Z_PARAM_OPTIONAL
2442 Z_PARAM_LONG(limit)
2443 Z_PARAM_ZVAL(zcount)
2444 Z_PARAM_LONG(flags)
2445 ZEND_PARSE_PARAMETERS_END();
2446
2447 fci.size = sizeof(fci);
2448 fci.object = NULL;
2449 fci.named_params = NULL;
2450
2451 if (subject_ht) {
2452 GC_TRY_ADDREF(subject_ht);
2453 } else {
2454 GC_TRY_ADDREF(subject_str);
2455 }
2456
2457 ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) {
2458 if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2459 zend_argument_type_error(1, "must contain only valid callbacks");
2460 goto error;
2461 }
2462 if (!str_idx_regex) {
2463 zend_argument_type_error(1, "must contain only string patterns as keys");
2464 goto error;
2465 }
2466
2467 ZVAL_COPY_VALUE(&fci.function_name, replace);
2468
2469 replace_count += preg_replace_func_impl(&zv, str_idx_regex, /* regex_ht */ NULL, &fci, &fcc,
2470 subject_str, subject_ht, limit, flags);
2471 switch (Z_TYPE(zv)) {
2472 case IS_ARRAY:
2473 ZEND_ASSERT(subject_ht);
2474 zend_array_release(subject_ht);
2475 subject_ht = Z_ARR(zv);
2476 break;
2477 case IS_STRING:
2478 ZEND_ASSERT(subject_str);
2479 zend_string_release(subject_str);
2480 subject_str = Z_STR(zv);
2481 break;
2482 case IS_NULL:
2483 RETVAL_NULL();
2484 goto error;
2485 EMPTY_SWITCH_DEFAULT_CASE()
2486 }
2487
2488 if (EG(exception)) {
2489 goto error;
2490 }
2491 } ZEND_HASH_FOREACH_END();
2492
2493 if (zcount) {
2494 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2495 }
2496
2497 if (subject_ht) {
2498 RETVAL_ARR(subject_ht);
2499 // Unset the type_flags of immutable arrays to prevent the VM from performing refcounting
2500 if (GC_FLAGS(subject_ht) & IS_ARRAY_IMMUTABLE) {
2501 Z_TYPE_FLAGS_P(return_value) = 0;
2502 }
2503 return;
2504 } else {
2505 RETURN_STR(subject_str);
2506 }
2507
2508 error:
2509 if (subject_ht) {
2510 zend_array_release(subject_ht);
2511 } else {
2512 zend_string_release(subject_str);
2513 }
2514 }
2515 /* }}} */
2516
2517 /* {{{ Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTION(preg_filter)2518 PHP_FUNCTION(preg_filter)
2519 {
2520 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
2521 }
2522 /* }}} */
2523
2524 /* {{{ Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)2525 PHP_FUNCTION(preg_split)
2526 {
2527 zend_string *regex; /* Regular expression */
2528 zend_string *subject; /* String to match against */
2529 zend_long limit_val = -1;/* Integer value of limit */
2530 zend_long flags = 0; /* Match control flags */
2531 pcre_cache_entry *pce; /* Compiled regular expression */
2532
2533 /* Get function parameters and do error checking */
2534 ZEND_PARSE_PARAMETERS_START(2, 4)
2535 Z_PARAM_STR(regex)
2536 Z_PARAM_STR(subject)
2537 Z_PARAM_OPTIONAL
2538 Z_PARAM_LONG(limit_val)
2539 Z_PARAM_LONG(flags)
2540 ZEND_PARSE_PARAMETERS_END();
2541
2542 /* Compile regex or get it from cache. */
2543 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2544 RETURN_FALSE;
2545 }
2546
2547 pce->refcount++;
2548 php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2549 pce->refcount--;
2550 }
2551 /* }}} */
2552
2553 /* {{{ php_pcre_split */
php_pcre_split_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zend_long limit_val,zend_long flags)2554 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2555 zend_long limit_val, zend_long flags)
2556 {
2557 uint32_t options; /* Execution options */
2558 int count; /* Count of matched subpatterns */
2559 PCRE2_SIZE start_offset; /* Where the new search starts */
2560 PCRE2_SIZE last_match_offset; /* Location of last match */
2561 uint32_t no_empty; /* If NO_EMPTY flag is set */
2562 uint32_t delim_capture; /* If delimiters should be captured */
2563 uint32_t offset_capture; /* If offsets should be captured */
2564 uint32_t num_subpats; /* Number of captured subpatterns */
2565 zval tmp;
2566 pcre2_match_data *match_data;
2567 char *subject = ZSTR_VAL(subject_str);
2568
2569 no_empty = flags & PREG_SPLIT_NO_EMPTY;
2570 delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2571 offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2572
2573 /* Initialize return value */
2574 array_init(return_value);
2575 HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2576
2577 /* Calculate the size of the offsets array, and allocate memory for it. */
2578 num_subpats = pce->capture_count + 1;
2579
2580 /* Start at the beginning of the string */
2581 start_offset = 0;
2582 last_match_offset = 0;
2583 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2584
2585 if (limit_val == -1) {
2586 /* pass */
2587 } else if (limit_val == 0) {
2588 limit_val = -1;
2589 } else if (limit_val <= 1) {
2590 goto last;
2591 }
2592
2593 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2594 match_data = mdata;
2595 } else {
2596 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2597 if (!match_data) {
2598 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2599 zval_ptr_dtor(return_value);
2600 RETURN_FALSE;
2601 }
2602 }
2603
2604 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2605
2606 /* Array of subpattern offsets */
2607 PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
2608
2609 #ifdef HAVE_PCRE_JIT_SUPPORT
2610 if ((pce->preg_options & PREG_JIT) && options) {
2611 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2612 PCRE2_NO_UTF_CHECK, match_data, mctx);
2613 } else
2614 #endif
2615 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2616 options, match_data, mctx);
2617
2618 while (1) {
2619 /* If something matched */
2620 if (count >= 0) {
2621 /* Check for too many substrings condition. */
2622 if (UNEXPECTED(count == 0)) {
2623 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2624 count = num_subpats;
2625 }
2626
2627 matched:
2628 if (UNEXPECTED(offsets[1] < offsets[0])) {
2629 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2630 break;
2631 }
2632
2633 if (!no_empty || offsets[0] != last_match_offset) {
2634 if (offset_capture) {
2635 /* Add (match, offset) pair to the return value */
2636 add_offset_pair(
2637 return_value_ht, subject, last_match_offset, offsets[0],
2638 NULL, 0);
2639 } else {
2640 /* Add the piece to the return value */
2641 populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2642 zend_hash_next_index_insert_new(return_value_ht, &tmp);
2643 }
2644
2645 /* One less left to do */
2646 if (limit_val != -1)
2647 limit_val--;
2648 }
2649
2650 if (delim_capture) {
2651 size_t i;
2652 for (i = 1; i < count; i++) {
2653 /* If we have matched a delimiter */
2654 if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2655 if (offset_capture) {
2656 add_offset_pair(
2657 return_value_ht, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2658 } else {
2659 populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2660 zend_hash_next_index_insert_new(return_value_ht, &tmp);
2661 }
2662 }
2663 }
2664 }
2665
2666 /* Advance to the position right after the last full match */
2667 start_offset = last_match_offset = offsets[1];
2668
2669 /* If we have matched an empty string, mimic what Perl's /g options does.
2670 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2671 the match again at the same point. If this fails (picked up above) we
2672 advance to the next character. */
2673 if (start_offset == offsets[0]) {
2674 /* Get next piece if no limit or limit not yet reached and something matched*/
2675 if (limit_val != -1 && limit_val <= 1) {
2676 break;
2677 }
2678 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2679 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2680 if (count >= 0) {
2681 goto matched;
2682 } else if (count == PCRE2_ERROR_NOMATCH) {
2683 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2684 this is not necessarily the end. We need to advance
2685 the start offset, and continue. Fudge the offset values
2686 to achieve this, unless we're already at the end of the string. */
2687 if (start_offset < ZSTR_LEN(subject_str)) {
2688 start_offset += calculate_unit_length(pce, subject + start_offset);
2689 } else {
2690 break;
2691 }
2692 } else {
2693 goto error;
2694 }
2695 }
2696
2697 } else if (count == PCRE2_ERROR_NOMATCH) {
2698 break;
2699 } else {
2700 error:
2701 pcre_handle_exec_error(count);
2702 break;
2703 }
2704
2705 /* Get next piece if no limit or limit not yet reached and something matched*/
2706 if (limit_val != -1 && limit_val <= 1) {
2707 break;
2708 }
2709
2710 #ifdef HAVE_PCRE_JIT_SUPPORT
2711 if (pce->preg_options & PREG_JIT) {
2712 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2713 PCRE2_NO_UTF_CHECK, match_data, mctx);
2714 } else
2715 #endif
2716 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2717 PCRE2_NO_UTF_CHECK, match_data, mctx);
2718 }
2719 if (match_data != mdata) {
2720 pcre2_match_data_free(match_data);
2721 }
2722
2723 if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2724 zval_ptr_dtor(return_value);
2725 RETURN_FALSE;
2726 }
2727
2728 last:
2729 start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2730
2731 if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2732 if (offset_capture) {
2733 /* Add the last (match, offset) pair to the return value */
2734 add_offset_pair(return_value_ht, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2735 } else {
2736 /* Add the last piece to the return value */
2737 if (start_offset == 0) {
2738 ZVAL_STR_COPY(&tmp, subject_str);
2739 } else {
2740 populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2741 }
2742 zend_hash_next_index_insert_new(return_value_ht, &tmp);
2743 }
2744 }
2745 }
2746 /* }}} */
2747
2748 /* {{{ Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)2749 PHP_FUNCTION(preg_quote)
2750 {
2751 zend_string *str; /* Input string argument */
2752 zend_string *delim = NULL; /* Additional delimiter argument */
2753 char *in_str; /* Input string */
2754 char *in_str_end; /* End of the input string */
2755 zend_string *out_str; /* Output string with quoted characters */
2756 size_t extra_len; /* Number of additional characters */
2757 char *p, /* Iterator for input string */
2758 *q, /* Iterator for output string */
2759 delim_char = '\0', /* Delimiter character to be quoted */
2760 c; /* Current character */
2761
2762 /* Get the arguments and check for errors */
2763 ZEND_PARSE_PARAMETERS_START(1, 2)
2764 Z_PARAM_STR(str)
2765 Z_PARAM_OPTIONAL
2766 Z_PARAM_STR_OR_NULL(delim)
2767 ZEND_PARSE_PARAMETERS_END();
2768
2769 /* Nothing to do if we got an empty string */
2770 if (ZSTR_LEN(str) == 0) {
2771 RETURN_EMPTY_STRING();
2772 }
2773
2774 in_str = ZSTR_VAL(str);
2775 in_str_end = in_str + ZSTR_LEN(str);
2776
2777 if (delim) {
2778 delim_char = ZSTR_VAL(delim)[0];
2779 }
2780
2781 /* Go through the string and quote necessary characters */
2782 extra_len = 0;
2783 p = in_str;
2784 do {
2785 c = *p;
2786 switch(c) {
2787 case '.':
2788 case '\\':
2789 case '+':
2790 case '*':
2791 case '?':
2792 case '[':
2793 case '^':
2794 case ']':
2795 case '$':
2796 case '(':
2797 case ')':
2798 case '{':
2799 case '}':
2800 case '=':
2801 case '!':
2802 case '>':
2803 case '<':
2804 case '|':
2805 case ':':
2806 case '-':
2807 case '#':
2808 extra_len++;
2809 break;
2810
2811 case '\0':
2812 extra_len+=3;
2813 break;
2814
2815 default:
2816 if (c == delim_char) {
2817 extra_len++;
2818 }
2819 break;
2820 }
2821 p++;
2822 } while (p != in_str_end);
2823
2824 if (extra_len == 0) {
2825 RETURN_STR_COPY(str);
2826 }
2827
2828 /* Allocate enough memory so that even if each character
2829 is quoted, we won't run out of room */
2830 out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2831 q = ZSTR_VAL(out_str);
2832 p = in_str;
2833
2834 do {
2835 c = *p;
2836 switch(c) {
2837 case '.':
2838 case '\\':
2839 case '+':
2840 case '*':
2841 case '?':
2842 case '[':
2843 case '^':
2844 case ']':
2845 case '$':
2846 case '(':
2847 case ')':
2848 case '{':
2849 case '}':
2850 case '=':
2851 case '!':
2852 case '>':
2853 case '<':
2854 case '|':
2855 case ':':
2856 case '-':
2857 case '#':
2858 *q++ = '\\';
2859 *q++ = c;
2860 break;
2861
2862 case '\0':
2863 *q++ = '\\';
2864 *q++ = '0';
2865 *q++ = '0';
2866 *q++ = '0';
2867 break;
2868
2869 default:
2870 if (c == delim_char) {
2871 *q++ = '\\';
2872 }
2873 *q++ = c;
2874 break;
2875 }
2876 p++;
2877 } while (p != in_str_end);
2878 *q = '\0';
2879
2880 RETURN_NEW_STR(out_str);
2881 }
2882 /* }}} */
2883
2884 /* {{{ Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)2885 PHP_FUNCTION(preg_grep)
2886 {
2887 zend_string *regex; /* Regular expression */
2888 zval *input; /* Input array */
2889 zend_long flags = 0; /* Match control flags */
2890 pcre_cache_entry *pce; /* Compiled regular expression */
2891
2892 /* Get arguments and do error checking */
2893 ZEND_PARSE_PARAMETERS_START(2, 3)
2894 Z_PARAM_STR(regex)
2895 Z_PARAM_ARRAY(input)
2896 Z_PARAM_OPTIONAL
2897 Z_PARAM_LONG(flags)
2898 ZEND_PARSE_PARAMETERS_END();
2899
2900 /* Compile regex or get it from cache. */
2901 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2902 RETURN_FALSE;
2903 }
2904
2905 pce->refcount++;
2906 php_pcre_grep_impl(pce, input, return_value, flags);
2907 pce->refcount--;
2908 }
2909 /* }}} */
2910
php_pcre_grep_impl(pcre_cache_entry * pce,zval * input,zval * return_value,zend_long flags)2911 PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2912 {
2913 zval *entry; /* An entry in the input array */
2914 uint32_t num_subpats; /* Number of captured subpatterns */
2915 int count; /* Count of matched subpatterns */
2916 uint32_t options; /* Execution options */
2917 zend_string *string_key;
2918 zend_ulong num_key;
2919 bool invert; /* Whether to return non-matching
2920 entries */
2921 pcre2_match_data *match_data;
2922 invert = flags & PREG_GREP_INVERT ? 1 : 0;
2923
2924 /* Calculate the size of the offsets array, and allocate memory for it. */
2925 num_subpats = pce->capture_count + 1;
2926
2927 /* Initialize return array */
2928 array_init(return_value);
2929 HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2930
2931 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2932
2933 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2934 match_data = mdata;
2935 } else {
2936 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2937 if (!match_data) {
2938 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2939 return;
2940 }
2941 }
2942
2943 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2944
2945 /* Go through the input array */
2946 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2947 zend_string *tmp_subject_str;
2948 zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2949
2950 /* Perform the match */
2951 #ifdef HAVE_PCRE_JIT_SUPPORT
2952 if ((pce->preg_options & PREG_JIT) && options) {
2953 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2954 PCRE2_NO_UTF_CHECK, match_data, mctx);
2955 } else
2956 #endif
2957 count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2958 options, match_data, mctx);
2959
2960 /* If the entry fits our requirements */
2961 if (count >= 0) {
2962 /* Check for too many substrings condition. */
2963 if (UNEXPECTED(count == 0)) {
2964 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2965 }
2966 if (!invert) {
2967 Z_TRY_ADDREF_P(entry);
2968
2969 /* Add to return array */
2970 if (string_key) {
2971 zend_hash_update(return_value_ht, string_key, entry);
2972 } else {
2973 zend_hash_index_update(return_value_ht, num_key, entry);
2974 }
2975 }
2976 } else if (count == PCRE2_ERROR_NOMATCH) {
2977 if (invert) {
2978 Z_TRY_ADDREF_P(entry);
2979
2980 /* Add to return array */
2981 if (string_key) {
2982 zend_hash_update(return_value_ht, string_key, entry);
2983 } else {
2984 zend_hash_index_update(return_value_ht, num_key, entry);
2985 }
2986 }
2987 } else {
2988 pcre_handle_exec_error(count);
2989 zend_tmp_string_release(tmp_subject_str);
2990 break;
2991 }
2992
2993 zend_tmp_string_release(tmp_subject_str);
2994 } ZEND_HASH_FOREACH_END();
2995 if (match_data != mdata) {
2996 pcre2_match_data_free(match_data);
2997 }
2998 }
2999 /* }}} */
3000
3001 /* {{{ Returns the error code of the last regexp execution. */
PHP_FUNCTION(preg_last_error)3002 PHP_FUNCTION(preg_last_error)
3003 {
3004 ZEND_PARSE_PARAMETERS_NONE();
3005
3006 RETURN_LONG(PCRE_G(error_code));
3007 }
3008 /* }}} */
3009
3010 /* {{{ Returns the error message of the last regexp execution. */
PHP_FUNCTION(preg_last_error_msg)3011 PHP_FUNCTION(preg_last_error_msg)
3012 {
3013 ZEND_PARSE_PARAMETERS_NONE();
3014
3015 RETURN_STRING(php_pcre_get_error_msg(PCRE_G(error_code)));
3016 }
3017 /* }}} */
3018
3019 /* {{{ module definition structures */
3020
3021 zend_module_entry pcre_module_entry = {
3022 STANDARD_MODULE_HEADER,
3023 "pcre",
3024 ext_functions,
3025 PHP_MINIT(pcre),
3026 PHP_MSHUTDOWN(pcre),
3027 PHP_RINIT(pcre),
3028 PHP_RSHUTDOWN(pcre),
3029 PHP_MINFO(pcre),
3030 PHP_PCRE_VERSION,
3031 PHP_MODULE_GLOBALS(pcre),
3032 PHP_GINIT(pcre),
3033 PHP_GSHUTDOWN(pcre),
3034 NULL,
3035 STANDARD_MODULE_PROPERTIES_EX
3036 };
3037
3038 #ifdef COMPILE_DL_PCRE
ZEND_GET_MODULE(pcre)3039 ZEND_GET_MODULE(pcre)
3040 #endif
3041
3042 /* }}} */
3043
3044 PHPAPI pcre2_match_context *php_pcre_mctx(void)
3045 {/*{{{*/
3046 return mctx;
3047 }/*}}}*/
3048
php_pcre_gctx(void)3049 PHPAPI pcre2_general_context *php_pcre_gctx(void)
3050 {/*{{{*/
3051 return gctx;
3052 }/*}}}*/
3053
php_pcre_cctx(void)3054 PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3055 {/*{{{*/
3056 return cctx;
3057 }/*}}}*/
3058
php_pcre_pce_incref(pcre_cache_entry * pce)3059 PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3060 {/*{{{*/
3061 assert(NULL != pce);
3062 pce->refcount++;
3063 }/*}}}*/
3064
php_pcre_pce_decref(pcre_cache_entry * pce)3065 PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3066 {/*{{{*/
3067 assert(NULL != pce);
3068 assert(0 != pce->refcount);
3069 pce->refcount--;
3070 }/*}}}*/
3071
php_pcre_pce_re(pcre_cache_entry * pce)3072 PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3073 {/*{{{*/
3074 assert(NULL != pce);
3075 return pce->re;
3076 }/*}}}*/
3077