1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Author: Andrei Zmievski <andrei@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #include "php.h"
18 #include "php_ini.h"
19 #include "php_globals.h"
20 #include "php_pcre.h"
21 #include "ext/standard/info.h"
22 #include "ext/standard/basic_functions.h"
23 #include "zend_smart_str.h"
24 #include "SAPI.h"
25
26 #include "ext/standard/php_string.h"
27
28 #define PREG_PATTERN_ORDER 1
29 #define PREG_SET_ORDER 2
30 #define PREG_OFFSET_CAPTURE (1<<8)
31 #define PREG_UNMATCHED_AS_NULL (1<<9)
32
33 #define PREG_SPLIT_NO_EMPTY (1<<0)
34 #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
35 #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
36
37 #define PREG_REPLACE_EVAL (1<<0)
38
39 #define PREG_GREP_INVERT (1<<0)
40
41 #define PREG_JIT (1<<3)
42
43 #define PCRE_CACHE_SIZE 4096
44
45 #ifdef HAVE_PCRE_JIT_SUPPORT
46 #define PHP_PCRE_JIT_SUPPORT 1
47 #else
48 #define PHP_PCRE_JIT_SUPPORT 0
49 #endif
50
51 char *php_pcre_version;
52
53 #include "php_pcre_arginfo.h"
54
55 struct _pcre_cache_entry {
56 pcre2_code *re;
57 uint32_t preg_options;
58 uint32_t capture_count;
59 uint32_t name_count;
60 uint32_t compile_options;
61 uint32_t refcount;
62 };
63
64 PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
65
66 #ifdef HAVE_PCRE_JIT_SUPPORT
67 #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
68 #define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
69 ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
70 #endif
71 /* General context using (infallible) system allocator. */
72 ZEND_TLS pcre2_general_context *gctx = NULL;
73 /* These two are global per thread for now. Though it is possible to use these
74 per pattern. Either one can copy it and use in pce, or one does no global
75 contexts at all, but creates for every pce. */
76 ZEND_TLS pcre2_compile_context *cctx = NULL;
77 ZEND_TLS pcre2_match_context *mctx = NULL;
78 ZEND_TLS pcre2_match_data *mdata = NULL;
79 ZEND_TLS bool mdata_used = 0;
80 ZEND_TLS uint8_t pcre2_init_ok = 0;
81 #if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
82 static MUTEX_T pcre_mt = NULL;
83 #define php_pcre_mutex_alloc() \
84 if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
85 #define php_pcre_mutex_free() \
86 if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
87 #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
88 #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
89 #else
90 #define php_pcre_mutex_alloc()
91 #define php_pcre_mutex_free()
92 #define php_pcre_mutex_lock()
93 #define php_pcre_mutex_unlock()
94 #endif
95
96 ZEND_TLS HashTable char_tables;
97
php_pcre_free_char_table(zval * data)98 static void php_pcre_free_char_table(zval *data)
99 {/*{{{*/
100 void *ptr = Z_PTR_P(data);
101 pefree(ptr, 1);
102 }/*}}}*/
103
pcre_handle_exec_error(int pcre_code)104 static void pcre_handle_exec_error(int pcre_code) /* {{{ */
105 {
106 int preg_code = 0;
107
108 switch (pcre_code) {
109 case PCRE2_ERROR_MATCHLIMIT:
110 preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
111 break;
112
113 case PCRE2_ERROR_RECURSIONLIMIT:
114 preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
115 break;
116
117 case PCRE2_ERROR_BADUTFOFFSET:
118 preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
119 break;
120
121 #ifdef HAVE_PCRE_JIT_SUPPORT
122 case PCRE2_ERROR_JIT_STACKLIMIT:
123 preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
124 break;
125 #endif
126
127 default:
128 if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
129 preg_code = PHP_PCRE_BAD_UTF8_ERROR;
130 } else {
131 preg_code = PHP_PCRE_INTERNAL_ERROR;
132 }
133 break;
134 }
135
136 PCRE_G(error_code) = preg_code;
137 }
138 /* }}} */
139
php_pcre_get_error_msg(php_pcre_error_code error_code)140 static const char *php_pcre_get_error_msg(php_pcre_error_code error_code) /* {{{ */
141 {
142 switch (error_code) {
143 case PHP_PCRE_NO_ERROR:
144 return "No error";
145 case PHP_PCRE_INTERNAL_ERROR:
146 return "Internal error";
147 case PHP_PCRE_BAD_UTF8_ERROR:
148 return "Malformed UTF-8 characters, possibly incorrectly encoded";
149 case PHP_PCRE_BAD_UTF8_OFFSET_ERROR:
150 return "The offset did not correspond to the beginning of a valid UTF-8 code point";
151 case PHP_PCRE_BACKTRACK_LIMIT_ERROR:
152 return "Backtrack limit exhausted";
153 case PHP_PCRE_RECURSION_LIMIT_ERROR:
154 return "Recursion limit exhausted";
155
156 #ifdef HAVE_PCRE_JIT_SUPPORT
157 case PHP_PCRE_JIT_STACKLIMIT_ERROR:
158 return "JIT stack limit exhausted";
159 #endif
160
161 default:
162 return "Unknown error";
163 }
164 }
165 /* }}} */
166
php_free_pcre_cache(zval * data)167 static void php_free_pcre_cache(zval *data) /* {{{ */
168 {
169 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
170 if (!pce) return;
171 pcre2_code_free(pce->re);
172 free(pce);
173 }
174 /* }}} */
175
php_efree_pcre_cache(zval * data)176 static void php_efree_pcre_cache(zval *data) /* {{{ */
177 {
178 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
179 if (!pce) return;
180 pcre2_code_free(pce->re);
181 efree(pce);
182 }
183 /* }}} */
184
php_pcre_malloc(PCRE2_SIZE size,void * data)185 static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
186 {
187 return pemalloc(size, 1);
188 }
189
php_pcre_free(void * block,void * data)190 static void php_pcre_free(void *block, void *data)
191 {
192 pefree(block, 1);
193 }
194
php_pcre_emalloc(PCRE2_SIZE size,void * data)195 static void *php_pcre_emalloc(PCRE2_SIZE size, void *data)
196 {
197 return emalloc(size);
198 }
199
php_pcre_efree(void * block,void * data)200 static void php_pcre_efree(void *block, void *data)
201 {
202 efree(block);
203 }
204
205 #ifdef PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
206 /* pcre 10.38 needs PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, disabled by default */
207 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
208 #else
209 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS 0
210 #endif
211
212 #define PHP_PCRE_PREALLOC_MDATA_SIZE 32
213
php_pcre_init_pcre2(uint8_t jit)214 static void php_pcre_init_pcre2(uint8_t jit)
215 {/*{{{*/
216 if (!gctx) {
217 gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
218 if (!gctx) {
219 pcre2_init_ok = 0;
220 return;
221 }
222 }
223
224 if (!cctx) {
225 cctx = pcre2_compile_context_create(gctx);
226 if (!cctx) {
227 pcre2_init_ok = 0;
228 return;
229 }
230 }
231
232 pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
233
234 if (!mctx) {
235 mctx = pcre2_match_context_create(gctx);
236 if (!mctx) {
237 pcre2_init_ok = 0;
238 return;
239 }
240 }
241
242 #ifdef HAVE_PCRE_JIT_SUPPORT
243 if (jit && !jit_stack) {
244 jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
245 if (!jit_stack) {
246 pcre2_init_ok = 0;
247 return;
248 }
249 }
250 #endif
251
252 if (!mdata) {
253 mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
254 if (!mdata) {
255 pcre2_init_ok = 0;
256 return;
257 }
258 }
259
260 pcre2_init_ok = 1;
261 }/*}}}*/
262
php_pcre_shutdown_pcre2(void)263 static void php_pcre_shutdown_pcre2(void)
264 {/*{{{*/
265 if (gctx) {
266 pcre2_general_context_free(gctx);
267 gctx = NULL;
268 }
269
270 if (cctx) {
271 pcre2_compile_context_free(cctx);
272 cctx = NULL;
273 }
274
275 if (mctx) {
276 pcre2_match_context_free(mctx);
277 mctx = NULL;
278 }
279
280 #ifdef HAVE_PCRE_JIT_SUPPORT
281 /* Stack may only be destroyed when no cached patterns
282 possibly associated with it do exist. */
283 if (jit_stack) {
284 pcre2_jit_stack_free(jit_stack);
285 jit_stack = NULL;
286 }
287 #endif
288
289 if (mdata) {
290 pcre2_match_data_free(mdata);
291 mdata = NULL;
292 }
293
294 pcre2_init_ok = 0;
295 }/*}}}*/
296
PHP_GINIT_FUNCTION(pcre)297 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
298 {
299 php_pcre_mutex_alloc();
300
301 /* If we're on the CLI SAPI, there will only be one request, so we don't need the
302 * cache to survive after RSHUTDOWN. */
303 pcre_globals->per_request_cache = strcmp(sapi_module.name, "cli") == 0;
304 if (!pcre_globals->per_request_cache) {
305 zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
306 }
307
308 pcre_globals->backtrack_limit = 0;
309 pcre_globals->recursion_limit = 0;
310 pcre_globals->error_code = PHP_PCRE_NO_ERROR;
311 ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
312 ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
313 #ifdef HAVE_PCRE_JIT_SUPPORT
314 pcre_globals->jit = 1;
315 #endif
316
317 php_pcre_init_pcre2(1);
318 zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
319 }
320 /* }}} */
321
PHP_GSHUTDOWN_FUNCTION(pcre)322 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
323 {
324 if (!pcre_globals->per_request_cache) {
325 zend_hash_destroy(&pcre_globals->pcre_cache);
326 }
327
328 php_pcre_shutdown_pcre2();
329 zend_hash_destroy(&char_tables);
330 php_pcre_mutex_free();
331 }
332 /* }}} */
333
PHP_INI_MH(OnUpdateBacktrackLimit)334 static PHP_INI_MH(OnUpdateBacktrackLimit)
335 {/*{{{*/
336 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
337 if (mctx) {
338 pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
339 }
340
341 return SUCCESS;
342 }/*}}}*/
343
PHP_INI_MH(OnUpdateRecursionLimit)344 static PHP_INI_MH(OnUpdateRecursionLimit)
345 {/*{{{*/
346 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
347 if (mctx) {
348 pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
349 }
350
351 return SUCCESS;
352 }/*}}}*/
353
354 #ifdef HAVE_PCRE_JIT_SUPPORT
PHP_INI_MH(OnUpdateJit)355 static PHP_INI_MH(OnUpdateJit)
356 {/*{{{*/
357 OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
358 if (PCRE_G(jit) && jit_stack) {
359 pcre2_jit_stack_assign(mctx, NULL, jit_stack);
360 } else {
361 pcre2_jit_stack_assign(mctx, NULL, NULL);
362 }
363
364 return SUCCESS;
365 }/*}}}*/
366 #endif
367
368 PHP_INI_BEGIN()
369 STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
370 STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
371 #ifdef HAVE_PCRE_JIT_SUPPORT
372 STD_PHP_INI_BOOLEAN("pcre.jit", "1", PHP_INI_ALL, OnUpdateJit, jit, zend_pcre_globals, pcre_globals)
373 #endif
PHP_INI_END()374 PHP_INI_END()
375
376 static char *_pcre2_config_str(uint32_t what)
377 {/*{{{*/
378 int len = pcre2_config(what, NULL);
379 char *ret = (char *) malloc(len + 1);
380
381 len = pcre2_config(what, ret);
382 if (!len) {
383 free(ret);
384 return NULL;
385 }
386
387 return ret;
388 }/*}}}*/
389
390 /* {{{ PHP_MINFO_FUNCTION(pcre) */
PHP_MINFO_FUNCTION(pcre)391 static PHP_MINFO_FUNCTION(pcre)
392 {
393 #ifdef HAVE_PCRE_JIT_SUPPORT
394 uint32_t flag = 0;
395 char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
396 #endif
397 char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
398 char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
399
400 php_info_print_table_start();
401 php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
402 php_info_print_table_row(2, "PCRE Library Version", version);
403 free(version);
404 php_info_print_table_row(2, "PCRE Unicode Version", unicode);
405 free(unicode);
406
407 #ifdef HAVE_PCRE_JIT_SUPPORT
408 if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
409 php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
410 } else {
411 php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
412 }
413 if (jit_target) {
414 php_info_print_table_row(2, "PCRE JIT Target", jit_target);
415 }
416 free(jit_target);
417 #else
418 php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
419 #endif
420
421 #ifdef HAVE_PCRE_VALGRIND_SUPPORT
422 php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
423 #endif
424
425 php_info_print_table_end();
426
427 DISPLAY_INI_ENTRIES();
428 }
429 /* }}} */
430
431 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTION(pcre)432 static PHP_MINIT_FUNCTION(pcre)
433 {
434 #ifdef HAVE_PCRE_JIT_SUPPORT
435 if (UNEXPECTED(!pcre2_init_ok)) {
436 /* Retry. */
437 php_pcre_init_pcre2(PCRE_G(jit));
438 if (!pcre2_init_ok) {
439 return FAILURE;
440 }
441 }
442 #endif
443
444 REGISTER_INI_ENTRIES();
445
446 php_pcre_version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
447
448 register_php_pcre_symbols(module_number);
449
450 return SUCCESS;
451 }
452 /* }}} */
453
454 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTION(pcre)455 static PHP_MSHUTDOWN_FUNCTION(pcre)
456 {
457 UNREGISTER_INI_ENTRIES();
458
459 free(php_pcre_version);
460
461 return SUCCESS;
462 }
463 /* }}} */
464
465 /* {{{ PHP_RINIT_FUNCTION(pcre) */
PHP_RINIT_FUNCTION(pcre)466 static PHP_RINIT_FUNCTION(pcre)
467 {
468 #ifdef HAVE_PCRE_JIT_SUPPORT
469 if (UNEXPECTED(!pcre2_init_ok)) {
470 /* Retry. */
471 php_pcre_mutex_lock();
472 php_pcre_init_pcre2(PCRE_G(jit));
473 if (!pcre2_init_ok) {
474 php_pcre_mutex_unlock();
475 return FAILURE;
476 }
477 php_pcre_mutex_unlock();
478 }
479
480 mdata_used = 0;
481 #endif
482
483 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
484 PCRE_G(gctx_zmm) = pcre2_general_context_create(php_pcre_emalloc, php_pcre_efree, NULL);
485 if (!PCRE_G(gctx_zmm)) {
486 return FAILURE;
487 }
488
489 if (PCRE_G(per_request_cache)) {
490 zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, php_efree_pcre_cache, 0);
491 }
492
493 return SUCCESS;
494 }
495 /* }}} */
496
PHP_RSHUTDOWN_FUNCTION(pcre)497 static PHP_RSHUTDOWN_FUNCTION(pcre)
498 {
499 pcre2_general_context_free(PCRE_G(gctx_zmm));
500 PCRE_G(gctx_zmm) = NULL;
501
502 if (PCRE_G(per_request_cache)) {
503 zend_hash_destroy(&PCRE_G(pcre_cache));
504 }
505
506 zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
507 zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
508 ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
509 ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
510 return SUCCESS;
511 }
512
513 /* {{{ static pcre_clean_cache */
pcre_clean_cache(zval * data,void * arg)514 static int pcre_clean_cache(zval *data, void *arg)
515 {
516 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
517 int *num_clean = (int *)arg;
518
519 if (*num_clean > 0 && !pce->refcount) {
520 (*num_clean)--;
521 return ZEND_HASH_APPLY_REMOVE;
522 } else {
523 return ZEND_HASH_APPLY_KEEP;
524 }
525 }
526 /* }}} */
527
free_subpats_table(zend_string ** subpat_names,uint32_t num_subpats)528 static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
529 uint32_t i;
530 for (i = 0; i < num_subpats; i++) {
531 if (subpat_names[i]) {
532 zend_string_release(subpat_names[i]);
533 }
534 }
535 efree(subpat_names);
536 }
537
538 /* {{{ static make_subpats_table */
make_subpats_table(uint32_t num_subpats,pcre_cache_entry * pce)539 static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
540 {
541 uint32_t name_cnt = pce->name_count, name_size, ni = 0;
542 char *name_table;
543 zend_string **subpat_names;
544 int rc1, rc2;
545
546 rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
547 rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
548 if (rc1 < 0 || rc2 < 0) {
549 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
550 return NULL;
551 }
552
553 subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
554 while (ni++ < name_cnt) {
555 unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
556 const char *name = name_table + 2;
557 subpat_names[name_idx] = zend_string_init(name, strlen(name), 0);
558 if (is_numeric_string(ZSTR_VAL(subpat_names[name_idx]), ZSTR_LEN(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
559 php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
560 free_subpats_table(subpat_names, num_subpats);
561 return NULL;
562 }
563 name_table += name_size;
564 }
565 return subpat_names;
566 }
567 /* }}} */
568
569 /* {{{ static calculate_unit_length */
570 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
calculate_unit_length(pcre_cache_entry * pce,const char * start)571 static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
572 {
573 size_t unit_len;
574
575 if (pce->compile_options & PCRE2_UTF) {
576 const char *end = start;
577
578 /* skip continuation bytes */
579 while ((*++end & 0xC0) == 0x80);
580 unit_len = end - start;
581 } else {
582 unit_len = 1;
583 }
584 return unit_len;
585 }
586 /* }}} */
587
588 /* {{{ pcre_get_compiled_regex_cache */
pcre_get_compiled_regex_cache_ex(zend_string * regex,int locale_aware)589 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware)
590 {
591 pcre2_code *re = NULL;
592 #if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR && !HAVE_BUNDLED_PCRE
593 uint32_t coptions = PCRE2_NO_START_OPTIMIZE;
594 #else
595 uint32_t coptions = 0;
596 #endif
597 PCRE2_UCHAR error[128];
598 PCRE2_SIZE erroffset;
599 int errnumber;
600 char delimiter;
601 char start_delimiter;
602 char end_delimiter;
603 char *p, *pp;
604 char *pattern;
605 size_t pattern_len;
606 uint32_t poptions = 0;
607 const uint8_t *tables = NULL;
608 zval *zv;
609 pcre_cache_entry new_entry;
610 int rc;
611 zend_string *key;
612 pcre_cache_entry *ret;
613
614 if (locale_aware && BG(ctype_string)) {
615 key = zend_string_concat2(
616 ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)),
617 ZSTR_VAL(regex), ZSTR_LEN(regex));
618 } else {
619 key = regex;
620 }
621
622 /* Try to lookup the cached regex entry, and if successful, just pass
623 back the compiled pattern, otherwise go on and compile it. */
624 zv = zend_hash_find(&PCRE_G(pcre_cache), key);
625 if (zv) {
626 if (key != regex) {
627 zend_string_release_ex(key, 0);
628 }
629 return (pcre_cache_entry*)Z_PTR_P(zv);
630 }
631
632 p = ZSTR_VAL(regex);
633 const char* end_p = ZSTR_VAL(regex) + ZSTR_LEN(regex);
634
635 /* Parse through the leading whitespace, and display a warning if we
636 get to the end without encountering a delimiter. */
637 while (isspace((int)*(unsigned char *)p)) p++;
638 if (p >= end_p) {
639 if (key != regex) {
640 zend_string_release_ex(key, 0);
641 }
642 php_error_docref(NULL, E_WARNING, "Empty regular expression");
643 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
644 return NULL;
645 }
646
647 /* Get the delimiter and display a warning if it is alphanumeric
648 or a backslash. */
649 delimiter = *p++;
650 if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\' || delimiter == '\0') {
651 if (key != regex) {
652 zend_string_release_ex(key, 0);
653 }
654 php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric, backslash, or NUL");
655 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
656 return NULL;
657 }
658
659 start_delimiter = delimiter;
660 if ((pp = strchr("([{< )]}> )]}>", delimiter)))
661 delimiter = pp[5];
662 end_delimiter = delimiter;
663
664 pp = p;
665
666 if (start_delimiter == end_delimiter) {
667 /* We need to iterate through the pattern, searching for the ending delimiter,
668 but skipping the backslashed delimiters. If the ending delimiter is not
669 found, display a warning. */
670 while (pp < end_p) {
671 if (*pp == '\\' && pp + 1 < end_p) pp++;
672 else if (*pp == delimiter)
673 break;
674 pp++;
675 }
676 } else {
677 /* We iterate through the pattern, searching for the matching ending
678 * delimiter. For each matching starting delimiter, we increment nesting
679 * level, and decrement it for each matching ending delimiter. If we
680 * reach the end of the pattern without matching, display a warning.
681 */
682 int brackets = 1; /* brackets nesting level */
683 while (pp < end_p) {
684 if (*pp == '\\' && pp + 1 < end_p) pp++;
685 else if (*pp == end_delimiter && --brackets <= 0)
686 break;
687 else if (*pp == start_delimiter)
688 brackets++;
689 pp++;
690 }
691 }
692
693 if (pp >= end_p) {
694 if (key != regex) {
695 zend_string_release_ex(key, 0);
696 }
697 if (start_delimiter == end_delimiter) {
698 php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
699 } else {
700 php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
701 }
702 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
703 return NULL;
704 }
705
706 /* Make a copy of the actual pattern. */
707 pattern_len = pp - p;
708 pattern = estrndup(p, pattern_len);
709
710 /* Move on to the options */
711 pp++;
712
713 /* Parse through the options, setting appropriate flags. Display
714 a warning if we encounter an unknown modifier. */
715 while (pp < end_p) {
716 switch (*pp++) {
717 /* Perl compatible options */
718 case 'i': coptions |= PCRE2_CASELESS; break;
719 case 'm': coptions |= PCRE2_MULTILINE; break;
720 case 'n': coptions |= PCRE2_NO_AUTO_CAPTURE; break;
721 case 's': coptions |= PCRE2_DOTALL; break;
722 case 'x': coptions |= PCRE2_EXTENDED; break;
723
724 /* PCRE specific options */
725 case 'A': coptions |= PCRE2_ANCHORED; break;
726 case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
727 case 'S': /* Pass. */ break;
728 case 'X': /* Pass. */ break;
729 case 'U': coptions |= PCRE2_UNGREEDY; break;
730 case 'u': coptions |= PCRE2_UTF;
731 /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
732 characters, even in UTF-8 mode. However, this can be changed by setting
733 the PCRE2_UCP option. */
734 #ifdef PCRE2_UCP
735 coptions |= PCRE2_UCP;
736 #endif
737 break;
738 case 'J': coptions |= PCRE2_DUPNAMES; break;
739
740 /* Custom preg options */
741 case 'e': poptions |= PREG_REPLACE_EVAL; break;
742
743 case ' ':
744 case '\n':
745 case '\r':
746 break;
747
748 default:
749 if (pp[-1]) {
750 php_error_docref(NULL, E_WARNING, "Unknown modifier '%c'", pp[-1]);
751 } else {
752 php_error_docref(NULL, E_WARNING, "NUL is not a valid modifier");
753 }
754 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
755 efree(pattern);
756 if (key != regex) {
757 zend_string_release_ex(key, 0);
758 }
759 return NULL;
760 }
761 }
762
763 if (poptions & PREG_REPLACE_EVAL) {
764 php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
765 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
766 efree(pattern);
767 if (key != regex) {
768 zend_string_release_ex(key, 0);
769 }
770 return NULL;
771 }
772
773 if (key != regex) {
774 tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(ctype_string));
775 if (!tables) {
776 zend_string *_k;
777 tables = pcre2_maketables(gctx);
778 if (UNEXPECTED(!tables)) {
779 php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
780 pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
781 zend_string_release_ex(key, 0);
782 efree(pattern);
783 return NULL;
784 }
785 _k = zend_string_init(ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)), 1);
786 GC_MAKE_PERSISTENT_LOCAL(_k);
787 zend_hash_add_ptr(&char_tables, _k, (void *)tables);
788 zend_string_release(_k);
789 }
790 }
791 pcre2_set_character_tables(cctx, tables);
792
793 /* Compile pattern and display a warning if compilation failed. */
794 re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
795
796 if (re == NULL) {
797 if (key != regex) {
798 zend_string_release_ex(key, 0);
799 }
800 pcre2_get_error_message(errnumber, error, sizeof(error));
801 php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
802 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
803 efree(pattern);
804 return NULL;
805 }
806
807 #ifdef HAVE_PCRE_JIT_SUPPORT
808 if (PCRE_G(jit)) {
809 /* Enable PCRE JIT compiler */
810 rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
811 if (EXPECTED(rc >= 0)) {
812 size_t jit_size = 0;
813 if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
814 poptions |= PREG_JIT;
815 }
816 } else if (rc == PCRE2_ERROR_NOMEMORY) {
817 php_error_docref(NULL, E_WARNING,
818 "Allocation of JIT memory failed, PCRE JIT will be disabled. "
819 "This is likely caused by security restrictions. "
820 "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
821 PCRE_G(jit) = 0;
822 } else {
823 pcre2_get_error_message(rc, error, sizeof(error));
824 php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
825 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
826 }
827 }
828 #endif
829 efree(pattern);
830
831 /*
832 * If we reached cache limit, clean out the items from the head of the list;
833 * these are supposedly the oldest ones (but not necessarily the least used
834 * ones).
835 */
836 if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
837 int num_clean = PCRE_CACHE_SIZE / 8;
838 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
839 }
840
841 /* Store the compiled pattern and extra info in the cache. */
842 new_entry.re = re;
843 new_entry.preg_options = poptions;
844 new_entry.compile_options = coptions;
845 new_entry.refcount = 0;
846
847 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
848 if (rc < 0) {
849 if (key != regex) {
850 zend_string_release_ex(key, 0);
851 }
852 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
853 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
854 return NULL;
855 }
856
857 rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
858 if (rc < 0) {
859 if (key != regex) {
860 zend_string_release_ex(key, 0);
861 }
862 php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
863 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
864 return NULL;
865 }
866
867 /*
868 * Interned strings are not duplicated when stored in HashTable,
869 * but all the interned strings created during HTTP request are removed
870 * at end of request. However PCRE_G(pcre_cache) must be consistent
871 * on the next request as well. So we disable usage of interned strings
872 * as hash keys especually for this table.
873 * See bug #63180
874 */
875 if (!(GC_FLAGS(key) & IS_STR_PERMANENT) && !PCRE_G(per_request_cache)) {
876 zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
877 GC_MAKE_PERSISTENT_LOCAL(str);
878
879 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
880 zend_string_release(str);
881 } else {
882 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
883 }
884
885 if (key != regex) {
886 zend_string_release_ex(key, 0);
887 }
888
889 return ret;
890 }
891 /* }}} */
892
893 /* {{{ pcre_get_compiled_regex_cache */
pcre_get_compiled_regex_cache(zend_string * regex)894 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
895 {
896 return pcre_get_compiled_regex_cache_ex(regex, 1);
897 }
898 /* }}} */
899
900 /* {{{ pcre_get_compiled_regex */
pcre_get_compiled_regex(zend_string * regex,uint32_t * capture_count)901 PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
902 {
903 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
904
905 if (capture_count) {
906 *capture_count = pce ? pce->capture_count : 0;
907 }
908
909 return pce ? pce->re : NULL;
910 }
911 /* }}} */
912
913 /* XXX For the cases where it's only about match yes/no and no capture
914 required, perhaps just a minimum sized data would suffice. */
php_pcre_create_match_data(uint32_t capture_count,pcre2_code * re)915 PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
916 {/*{{{*/
917
918 assert(NULL != re);
919
920 if (EXPECTED(!mdata_used)) {
921 int rc = 0;
922
923 if (!capture_count) {
924 /* As we deal with a non cached pattern, no other way to gather this info. */
925 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
926 }
927
928 if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
929 mdata_used = 1;
930 return mdata;
931 }
932 }
933
934 return pcre2_match_data_create_from_pattern(re, gctx);
935 }/*}}}*/
936
php_pcre_free_match_data(pcre2_match_data * match_data)937 PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
938 {/*{{{*/
939 if (UNEXPECTED(match_data != mdata)) {
940 pcre2_match_data_free(match_data);
941 } else {
942 mdata_used = 0;
943 }
944 }/*}}}*/
945
init_unmatched_null_pair(void)946 static void init_unmatched_null_pair(void) {
947 zval val1, val2;
948 ZVAL_NULL(&val1);
949 ZVAL_LONG(&val2, -1);
950 ZVAL_ARR(&PCRE_G(unmatched_null_pair), zend_new_pair(&val1, &val2));
951 }
952
init_unmatched_empty_pair(void)953 static void init_unmatched_empty_pair(void) {
954 zval val1, val2;
955 ZVAL_EMPTY_STRING(&val1);
956 ZVAL_LONG(&val2, -1);
957 ZVAL_ARR(&PCRE_G(unmatched_empty_pair), zend_new_pair(&val1, &val2));
958 }
959
populate_match_value_str(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset)960 static zend_always_inline void populate_match_value_str(
961 zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
962 ZVAL_STRINGL_FAST(val, subject + start_offset, end_offset - start_offset);
963 }
964
populate_match_value(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,uint32_t unmatched_as_null)965 static inline void populate_match_value(
966 zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
967 uint32_t unmatched_as_null) {
968 if (PCRE2_UNSET == start_offset) {
969 if (unmatched_as_null) {
970 ZVAL_NULL(val);
971 } else {
972 ZVAL_EMPTY_STRING(val);
973 }
974 } else {
975 populate_match_value_str(val, subject, start_offset, end_offset);
976 }
977 }
978
add_named(zval * subpats,zend_string * name,zval * val,bool unmatched)979 static inline void add_named(
980 zval *subpats, zend_string *name, zval *val, bool unmatched) {
981 /* If the DUPNAMES option is used, multiple subpatterns might have the same name.
982 * In this case we want to preserve the one that actually has a value. */
983 if (!unmatched) {
984 zend_hash_update(Z_ARRVAL_P(subpats), name, val);
985 } else {
986 if (!zend_hash_add(Z_ARRVAL_P(subpats), name, val)) {
987 return;
988 }
989 }
990 Z_TRY_ADDREF_P(val);
991 }
992
993 /* {{{ add_offset_pair */
add_offset_pair(zval * result,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,zend_string * name,uint32_t unmatched_as_null)994 static inline void add_offset_pair(
995 zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
996 zend_string *name, uint32_t unmatched_as_null)
997 {
998 zval match_pair;
999
1000 /* Add (match, offset) to the return value */
1001 if (PCRE2_UNSET == start_offset) {
1002 if (unmatched_as_null) {
1003 if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
1004 init_unmatched_null_pair();
1005 }
1006 ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
1007 } else {
1008 if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
1009 init_unmatched_empty_pair();
1010 }
1011 ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
1012 }
1013 } else {
1014 zval val1, val2;
1015 populate_match_value_str(&val1, subject, start_offset, end_offset);
1016 ZVAL_LONG(&val2, start_offset);
1017 ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1018 }
1019
1020 if (name) {
1021 add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1022 }
1023 zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
1024 }
1025 /* }}} */
1026
populate_subpat_array(zval * subpats,const char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1027 static void populate_subpat_array(
1028 zval *subpats, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1029 uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1030 bool offset_capture = (flags & PREG_OFFSET_CAPTURE) != 0;
1031 bool unmatched_as_null = (flags & PREG_UNMATCHED_AS_NULL) != 0;
1032 zval val;
1033 int i;
1034 if (subpat_names) {
1035 if (offset_capture) {
1036 for (i = 0; i < count; i++) {
1037 add_offset_pair(
1038 subpats, subject, offsets[2*i], offsets[2*i+1],
1039 subpat_names[i], unmatched_as_null);
1040 }
1041 if (unmatched_as_null) {
1042 for (i = count; i < num_subpats; i++) {
1043 add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1044 }
1045 }
1046 } else {
1047 for (i = 0; i < count; i++) {
1048 populate_match_value(
1049 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1050 if (subpat_names[i]) {
1051 add_named(subpats, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1052 }
1053 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1054 }
1055 if (unmatched_as_null) {
1056 for (i = count; i < num_subpats; i++) {
1057 ZVAL_NULL(&val);
1058 if (subpat_names[i]) {
1059 zend_hash_add(Z_ARRVAL_P(subpats), subpat_names[i], &val);
1060 }
1061 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1062 }
1063 }
1064 }
1065 } else {
1066 if (offset_capture) {
1067 for (i = 0; i < count; i++) {
1068 add_offset_pair(
1069 subpats, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1070 }
1071 if (unmatched_as_null) {
1072 for (i = count; i < num_subpats; i++) {
1073 add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1074 }
1075 }
1076 } else {
1077 for (i = 0; i < count; i++) {
1078 populate_match_value(
1079 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1080 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1081 }
1082 if (unmatched_as_null) {
1083 for (i = count; i < num_subpats; i++) {
1084 add_next_index_null(subpats);
1085 }
1086 }
1087 }
1088 }
1089 /* Add MARK, if available */
1090 if (mark) {
1091 add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
1092 }
1093 }
1094
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS,int global)1095 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
1096 {
1097 /* parameters */
1098 zend_string *regex; /* Regular expression */
1099 zend_string *subject; /* String to match against */
1100 pcre_cache_entry *pce; /* Compiled regular expression */
1101 zval *subpats = NULL; /* Array for subpatterns */
1102 zend_long flags = 0; /* Match control flags */
1103 zend_long start_offset = 0; /* Where the new search starts */
1104
1105 ZEND_PARSE_PARAMETERS_START(2, 5)
1106 Z_PARAM_STR(regex)
1107 Z_PARAM_STR(subject)
1108 Z_PARAM_OPTIONAL
1109 Z_PARAM_ZVAL(subpats)
1110 Z_PARAM_LONG(flags)
1111 Z_PARAM_LONG(start_offset)
1112 ZEND_PARSE_PARAMETERS_END();
1113
1114 /* Compile regex or get it from cache. */
1115 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1116 RETURN_FALSE;
1117 }
1118
1119 pce->refcount++;
1120 php_pcre_match_impl(pce, subject, return_value, subpats,
1121 global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
1122 pce->refcount--;
1123 }
1124 /* }}} */
1125
is_known_valid_utf8(zend_string * subject_str,PCRE2_SIZE start_offset)1126 static zend_always_inline bool is_known_valid_utf8(
1127 zend_string *subject_str, PCRE2_SIZE start_offset) {
1128 if (!(GC_FLAGS(subject_str) & IS_STR_VALID_UTF8)) {
1129 /* We don't know whether the string is valid UTF-8 or not. */
1130 return 0;
1131 }
1132
1133 if (start_offset == ZSTR_LEN(subject_str)) {
1134 /* Degenerate case: Offset points to end of string. */
1135 return 1;
1136 }
1137
1138 /* Check that the offset does not point to an UTF-8 continuation byte. */
1139 return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1140 }
1141
1142 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zval * subpats,int global,int use_flags,zend_long flags,zend_off_t start_offset)1143 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1144 zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset)
1145 {
1146 zval result_set, /* Holds a set of subpatterns after
1147 a global match */
1148 *match_sets = NULL; /* An array of sets of matches for each
1149 subpattern after a global match */
1150 uint32_t options; /* Execution options */
1151 int count; /* Count of matched subpatterns */
1152 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1153 uint32_t num_subpats; /* Number of captured subpatterns */
1154 int matched; /* Has anything matched */
1155 zend_string **subpat_names; /* Array for named subpatterns */
1156 size_t i;
1157 uint32_t subpats_order; /* Order of subpattern matches */
1158 uint32_t offset_capture; /* Capture match offsets: yes/no */
1159 uint32_t unmatched_as_null; /* Null non-matches: yes/no */
1160 PCRE2_SPTR mark = NULL; /* Target for MARK name */
1161 zval marks; /* Array of marks for PREG_PATTERN_ORDER */
1162 pcre2_match_data *match_data;
1163 PCRE2_SIZE start_offset2, orig_start_offset;
1164
1165 char *subject = ZSTR_VAL(subject_str);
1166 size_t subject_len = ZSTR_LEN(subject_str);
1167
1168 ZVAL_UNDEF(&marks);
1169
1170 /* Overwrite the passed-in value for subpatterns with an empty array. */
1171 if (subpats != NULL) {
1172 subpats = zend_try_array_init(subpats);
1173 if (!subpats) {
1174 RETURN_THROWS();
1175 }
1176 }
1177
1178 subpats_order = global ? PREG_PATTERN_ORDER : 0;
1179
1180 if (use_flags) {
1181 offset_capture = flags & PREG_OFFSET_CAPTURE;
1182 unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1183
1184 /*
1185 * subpats_order is pre-set to pattern mode so we change it only if
1186 * necessary.
1187 */
1188 if (flags & 0xff) {
1189 subpats_order = flags & 0xff;
1190 }
1191 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1192 (!global && subpats_order != 0)) {
1193 zend_argument_value_error(4, "must be a PREG_* constant");
1194 RETURN_THROWS();
1195 }
1196 } else {
1197 offset_capture = 0;
1198 unmatched_as_null = 0;
1199 }
1200
1201 /* Negative offset counts from the end of the string. */
1202 if (start_offset < 0) {
1203 if ((PCRE2_SIZE)-start_offset <= subject_len) {
1204 start_offset2 = subject_len + start_offset;
1205 } else {
1206 start_offset2 = 0;
1207 }
1208 } else {
1209 start_offset2 = (PCRE2_SIZE)start_offset;
1210 }
1211
1212 if (start_offset2 > subject_len) {
1213 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1214 RETURN_FALSE;
1215 }
1216
1217 /* Calculate the size of the offsets array, and allocate memory for it. */
1218 num_subpats = pce->capture_count + 1;
1219
1220 /*
1221 * Build a mapping from subpattern numbers to their names. We will
1222 * allocate the table only if there are any named subpatterns.
1223 */
1224 subpat_names = NULL;
1225 if (subpats && pce->name_count > 0) {
1226 subpat_names = make_subpats_table(num_subpats, pce);
1227 if (!subpat_names) {
1228 RETURN_FALSE;
1229 }
1230 }
1231
1232 /* Allocate match sets array and initialize the values. */
1233 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1234 match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
1235 for (i=0; i<num_subpats; i++) {
1236 array_init(&match_sets[i]);
1237 }
1238 }
1239
1240 matched = 0;
1241 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1242
1243 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1244 match_data = mdata;
1245 } else {
1246 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1247 if (!match_data) {
1248 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1249 if (subpat_names) {
1250 free_subpats_table(subpat_names, num_subpats);
1251 }
1252 if (match_sets) {
1253 efree(match_sets);
1254 }
1255 RETURN_FALSE;
1256 }
1257 }
1258
1259 orig_start_offset = start_offset2;
1260 options =
1261 (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1262 ? 0 : PCRE2_NO_UTF_CHECK;
1263
1264 /* Execute the regular expression. */
1265 #ifdef HAVE_PCRE_JIT_SUPPORT
1266 if ((pce->preg_options & PREG_JIT) && options) {
1267 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1268 PCRE2_NO_UTF_CHECK, match_data, mctx);
1269 } else
1270 #endif
1271 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1272 options, match_data, mctx);
1273
1274 while (1) {
1275 /* If something has matched */
1276 if (count >= 0) {
1277 /* Check for too many substrings condition. */
1278 if (UNEXPECTED(count == 0)) {
1279 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1280 count = num_subpats;
1281 }
1282
1283 matched:
1284 matched++;
1285
1286 offsets = pcre2_get_ovector_pointer(match_data);
1287
1288 /* If subpatterns array has been passed, fill it in with values. */
1289 if (subpats != NULL) {
1290 /* Try to get the list of substrings and display a warning if failed. */
1291 if (offsets[1] < offsets[0]) {
1292 if (subpat_names) {
1293 free_subpats_table(subpat_names, num_subpats);
1294 }
1295 if (match_sets) efree(match_sets);
1296 php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1297 RETURN_FALSE;
1298 }
1299
1300 if (global) { /* global pattern matching */
1301 if (subpats && subpats_order == PREG_PATTERN_ORDER) {
1302 /* For each subpattern, insert it into the appropriate array. */
1303 if (offset_capture) {
1304 for (i = 0; i < count; i++) {
1305 add_offset_pair(
1306 &match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1307 NULL, unmatched_as_null);
1308 }
1309 } else {
1310 for (i = 0; i < count; i++) {
1311 zval val;
1312 populate_match_value(
1313 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1314 zend_hash_next_index_insert_new(Z_ARRVAL(match_sets[i]), &val);
1315 }
1316 }
1317 mark = pcre2_get_mark(match_data);
1318 /* Add MARK, if available */
1319 if (mark) {
1320 if (Z_TYPE(marks) == IS_UNDEF) {
1321 array_init(&marks);
1322 }
1323 add_index_string(&marks, matched - 1, (char *) mark);
1324 }
1325 /*
1326 * If the number of captured subpatterns on this run is
1327 * less than the total possible number, pad the result
1328 * arrays with NULLs or empty strings.
1329 */
1330 if (count < num_subpats) {
1331 for (; i < num_subpats; i++) {
1332 if (offset_capture) {
1333 add_offset_pair(
1334 &match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1335 NULL, unmatched_as_null);
1336 } else if (unmatched_as_null) {
1337 add_next_index_null(&match_sets[i]);
1338 } else {
1339 add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
1340 }
1341 }
1342 }
1343 } else {
1344 /* Allocate and populate the result set array */
1345 array_init_size(&result_set, count + (mark ? 1 : 0));
1346 mark = pcre2_get_mark(match_data);
1347 populate_subpat_array(
1348 &result_set, subject, offsets, subpat_names,
1349 num_subpats, count, mark, flags);
1350 /* And add it to the output array */
1351 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
1352 }
1353 } else { /* single pattern matching */
1354 /* For each subpattern, insert it into the subpatterns array. */
1355 mark = pcre2_get_mark(match_data);
1356 populate_subpat_array(
1357 subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1358 break;
1359 }
1360 }
1361
1362 /* Advance to the next piece. */
1363 start_offset2 = offsets[1];
1364
1365 /* If we have matched an empty string, mimic what Perl's /g options does.
1366 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1367 the match again at the same point. If this fails (picked up above) we
1368 advance to the next character. */
1369 if (start_offset2 == offsets[0]) {
1370 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1371 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1372 if (count >= 0) {
1373 if (global) {
1374 goto matched;
1375 } else {
1376 break;
1377 }
1378 } else if (count == PCRE2_ERROR_NOMATCH) {
1379 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1380 this is not necessarily the end. We need to advance
1381 the start offset, and continue. Fudge the offset values
1382 to achieve this, unless we're already at the end of the string. */
1383 if (start_offset2 < subject_len) {
1384 size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1385
1386 start_offset2 += unit_len;
1387 } else {
1388 break;
1389 }
1390 } else {
1391 goto error;
1392 }
1393 }
1394 } else if (count == PCRE2_ERROR_NOMATCH) {
1395 break;
1396 } else {
1397 error:
1398 pcre_handle_exec_error(count);
1399 break;
1400 }
1401
1402 if (!global) {
1403 break;
1404 }
1405
1406 /* Execute the regular expression. */
1407 #ifdef HAVE_PCRE_JIT_SUPPORT
1408 if ((pce->preg_options & PREG_JIT)) {
1409 if (PCRE2_UNSET == start_offset2 || start_offset2 > subject_len) {
1410 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1411 break;
1412 }
1413 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1414 PCRE2_NO_UTF_CHECK, match_data, mctx);
1415 } else
1416 #endif
1417 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1418 PCRE2_NO_UTF_CHECK, match_data, mctx);
1419 }
1420 if (match_data != mdata) {
1421 pcre2_match_data_free(match_data);
1422 }
1423
1424 /* Add the match sets to the output array and clean up */
1425 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1426 if (subpat_names) {
1427 for (i = 0; i < num_subpats; i++) {
1428 if (subpat_names[i]) {
1429 zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &match_sets[i]);
1430 Z_ADDREF(match_sets[i]);
1431 }
1432 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1433 }
1434 } else {
1435 for (i = 0; i < num_subpats; i++) {
1436 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1437 }
1438 }
1439 efree(match_sets);
1440
1441 if (Z_TYPE(marks) != IS_UNDEF) {
1442 add_assoc_zval(subpats, "MARK", &marks);
1443 }
1444 }
1445
1446 if (subpat_names) {
1447 free_subpats_table(subpat_names, num_subpats);
1448 }
1449
1450 if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1451 /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1452 if ((pce->compile_options & PCRE2_UTF)
1453 && !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1454 GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1455 }
1456
1457 RETVAL_LONG(matched);
1458 } else {
1459 RETVAL_FALSE;
1460 }
1461 }
1462 /* }}} */
1463
1464 /* {{{ Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)1465 PHP_FUNCTION(preg_match)
1466 {
1467 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1468 }
1469 /* }}} */
1470
1471 /* {{{ Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)1472 PHP_FUNCTION(preg_match_all)
1473 {
1474 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1475 }
1476 /* }}} */
1477
1478 /* {{{ preg_get_backref */
preg_get_backref(char ** str,int * backref)1479 static int preg_get_backref(char **str, int *backref)
1480 {
1481 char in_brace = 0;
1482 char *walk = *str;
1483
1484 if (walk[1] == 0)
1485 return 0;
1486
1487 if (*walk == '$' && walk[1] == '{') {
1488 in_brace = 1;
1489 walk++;
1490 }
1491 walk++;
1492
1493 if (*walk >= '0' && *walk <= '9') {
1494 *backref = *walk - '0';
1495 walk++;
1496 } else
1497 return 0;
1498
1499 if (*walk && *walk >= '0' && *walk <= '9') {
1500 *backref = *backref * 10 + *walk - '0';
1501 walk++;
1502 }
1503
1504 if (in_brace) {
1505 if (*walk != '}')
1506 return 0;
1507 else
1508 walk++;
1509 }
1510
1511 *str = walk;
1512 return 1;
1513 }
1514 /* }}} */
1515
1516 /* {{{ preg_do_repl_func */
preg_do_repl_func(zend_fcall_info * fci,zend_fcall_info_cache * fcc,const char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1517 static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1518 {
1519 zend_string *result_str;
1520 zval retval; /* Function return value */
1521 zval arg; /* Argument to pass to function */
1522
1523 array_init_size(&arg, count + (mark ? 1 : 0));
1524 populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1525
1526 fci->retval = &retval;
1527 fci->param_count = 1;
1528 fci->params = &arg;
1529
1530 if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1531 if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1532 result_str = Z_STR(retval);
1533 } else {
1534 result_str = zval_get_string_func(&retval);
1535 zval_ptr_dtor(&retval);
1536 }
1537 } else {
1538 if (!EG(exception)) {
1539 php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1540 }
1541
1542 result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1543 }
1544
1545 zval_ptr_dtor(&arg);
1546
1547 return result_str;
1548 }
1549 /* }}} */
1550
1551 /* {{{ php_pcre_replace */
php_pcre_replace(zend_string * regex,zend_string * subject_str,const char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1552 PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1553 zend_string *subject_str,
1554 const char *subject, size_t subject_len,
1555 zend_string *replace_str,
1556 size_t limit, size_t *replace_count)
1557 {
1558 pcre_cache_entry *pce; /* Compiled regular expression */
1559 zend_string *result; /* Function result */
1560
1561 /* Abort on pending exception, e.g. thrown from __toString(). */
1562 if (UNEXPECTED(EG(exception))) {
1563 return NULL;
1564 }
1565
1566 /* Compile regex or get it from cache. */
1567 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1568 return NULL;
1569 }
1570 pce->refcount++;
1571 result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1572 limit, replace_count);
1573 pce->refcount--;
1574
1575 return result;
1576 }
1577 /* }}} */
1578
1579 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry * pce,zend_string * subject_str,const char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1580 PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1581 {
1582 uint32_t options; /* Execution options */
1583 int count; /* Count of matched subpatterns */
1584 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1585 uint32_t num_subpats; /* Number of captured subpatterns */
1586 size_t new_len; /* Length of needed storage */
1587 size_t alloc_len; /* Actual allocated length */
1588 size_t match_len; /* Length of the current match */
1589 int backref; /* Backreference number */
1590 PCRE2_SIZE start_offset; /* Where the new search starts */
1591 size_t last_end_offset; /* Where the last search ended */
1592 char *walkbuf, /* Location of current replacement in the result */
1593 *walk, /* Used to walk the replacement string */
1594 walk_last; /* Last walked character */
1595 const char *match, /* The current match */
1596 *piece, /* The current piece of subject */
1597 *replace_end; /* End of replacement string */
1598 size_t result_len; /* Length of result */
1599 zend_string *result; /* Result of replacement */
1600 pcre2_match_data *match_data;
1601
1602 /* Calculate the size of the offsets array, and allocate memory for it. */
1603 num_subpats = pce->capture_count + 1;
1604 alloc_len = 0;
1605 result = NULL;
1606
1607 /* Initialize */
1608 match = NULL;
1609 start_offset = 0;
1610 last_end_offset = 0;
1611 result_len = 0;
1612 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1613
1614 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1615 match_data = mdata;
1616 } else {
1617 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1618 if (!match_data) {
1619 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1620 return NULL;
1621 }
1622 }
1623
1624 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1625
1626 /* Execute the regular expression. */
1627 #ifdef HAVE_PCRE_JIT_SUPPORT
1628 if ((pce->preg_options & PREG_JIT) && options) {
1629 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1630 PCRE2_NO_UTF_CHECK, match_data, mctx);
1631 } else
1632 #endif
1633 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1634 options, match_data, mctx);
1635
1636 while (1) {
1637 piece = subject + last_end_offset;
1638
1639 if (count >= 0 && limit > 0) {
1640 bool simple_string;
1641
1642 /* Check for too many substrings condition. */
1643 if (UNEXPECTED(count == 0)) {
1644 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1645 count = num_subpats;
1646 }
1647
1648 matched:
1649 offsets = pcre2_get_ovector_pointer(match_data);
1650
1651 if (UNEXPECTED(offsets[1] < offsets[0])) {
1652 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1653 if (result) {
1654 zend_string_release_ex(result, 0);
1655 result = NULL;
1656 }
1657 break;
1658 }
1659
1660 if (replace_count) {
1661 ++*replace_count;
1662 }
1663
1664 /* Set the match location in subject */
1665 match = subject + offsets[0];
1666
1667 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1668
1669 walk = ZSTR_VAL(replace_str);
1670 replace_end = walk + ZSTR_LEN(replace_str);
1671 walk_last = 0;
1672 simple_string = 1;
1673 while (walk < replace_end) {
1674 if ('\\' == *walk || '$' == *walk) {
1675 simple_string = 0;
1676 if (walk_last == '\\') {
1677 walk++;
1678 walk_last = 0;
1679 continue;
1680 }
1681 if (preg_get_backref(&walk, &backref)) {
1682 if (backref < count)
1683 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1684 continue;
1685 }
1686 }
1687 new_len++;
1688 walk++;
1689 walk_last = walk[-1];
1690 }
1691
1692 if (new_len >= alloc_len) {
1693 alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1694 if (result == NULL) {
1695 result = zend_string_alloc(alloc_len, 0);
1696 } else {
1697 result = zend_string_extend(result, alloc_len, 0);
1698 }
1699 }
1700
1701 if (match-piece > 0) {
1702 /* copy the part of the string before the match */
1703 memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1704 result_len += (match-piece);
1705 }
1706
1707 if (simple_string) {
1708 /* copy replacement */
1709 memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1710 result_len += ZSTR_LEN(replace_str);
1711 } else {
1712 /* copy replacement and backrefs */
1713 walkbuf = ZSTR_VAL(result) + result_len;
1714
1715 walk = ZSTR_VAL(replace_str);
1716 walk_last = 0;
1717 while (walk < replace_end) {
1718 if ('\\' == *walk || '$' == *walk) {
1719 if (walk_last == '\\') {
1720 *(walkbuf-1) = *walk++;
1721 walk_last = 0;
1722 continue;
1723 }
1724 if (preg_get_backref(&walk, &backref)) {
1725 if (backref < count) {
1726 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1727 memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1728 walkbuf += match_len;
1729 }
1730 continue;
1731 }
1732 }
1733 *walkbuf++ = *walk++;
1734 walk_last = walk[-1];
1735 }
1736 *walkbuf = '\0';
1737 /* increment the result length by how much we've added to the string */
1738 result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1739 }
1740
1741 limit--;
1742
1743 /* Advance to the next piece. */
1744 start_offset = last_end_offset = offsets[1];
1745
1746 /* If we have matched an empty string, mimic what Perl's /g options does.
1747 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1748 the match again at the same point. If this fails (picked up above) we
1749 advance to the next character. */
1750 if (start_offset == offsets[0]) {
1751 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1752 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1753
1754 piece = subject + start_offset;
1755 if (count >= 0 && limit > 0) {
1756 goto matched;
1757 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1758 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1759 this is not necessarily the end. We need to advance
1760 the start offset, and continue. Fudge the offset values
1761 to achieve this, unless we're already at the end of the string. */
1762 if (start_offset < subject_len) {
1763 size_t unit_len = calculate_unit_length(pce, piece);
1764 start_offset += unit_len;
1765 } else {
1766 goto not_matched;
1767 }
1768 } else {
1769 goto error;
1770 }
1771 }
1772
1773 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1774 not_matched:
1775 if (!result && subject_str) {
1776 result = zend_string_copy(subject_str);
1777 break;
1778 }
1779 /* now we know exactly how long it is */
1780 alloc_len = result_len + subject_len - last_end_offset;
1781 if (NULL != result) {
1782 result = zend_string_realloc(result, alloc_len, 0);
1783 } else {
1784 result = zend_string_alloc(alloc_len, 0);
1785 }
1786 /* stick that last bit of string on our output */
1787 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1788 result_len += subject_len - last_end_offset;
1789 ZSTR_VAL(result)[result_len] = '\0';
1790 ZSTR_LEN(result) = result_len;
1791 break;
1792 } else {
1793 error:
1794 pcre_handle_exec_error(count);
1795 if (result) {
1796 zend_string_release_ex(result, 0);
1797 result = NULL;
1798 }
1799 break;
1800 }
1801
1802 #ifdef HAVE_PCRE_JIT_SUPPORT
1803 if (pce->preg_options & PREG_JIT) {
1804 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1805 PCRE2_NO_UTF_CHECK, match_data, mctx);
1806 } else
1807 #endif
1808 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1809 PCRE2_NO_UTF_CHECK, match_data, mctx);
1810 }
1811 if (match_data != mdata) {
1812 pcre2_match_data_free(match_data);
1813 }
1814
1815 return result;
1816 }
1817 /* }}} */
1818
1819 /* {{{ php_pcre_replace_func_impl() */
php_pcre_replace_func_impl(pcre_cache_entry * pce,zend_string * subject_str,const char * subject,size_t subject_len,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)1820 static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)
1821 {
1822 uint32_t options; /* Execution options */
1823 int count; /* Count of matched subpatterns */
1824 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1825 zend_string **subpat_names; /* Array for named subpatterns */
1826 uint32_t num_subpats; /* Number of captured subpatterns */
1827 size_t new_len; /* Length of needed storage */
1828 size_t alloc_len; /* Actual allocated length */
1829 PCRE2_SIZE start_offset; /* Where the new search starts */
1830 size_t last_end_offset; /* Where the last search ended */
1831 const char *match, /* The current match */
1832 *piece; /* The current piece of subject */
1833 size_t result_len; /* Length of result */
1834 zend_string *result; /* Result of replacement */
1835 zend_string *eval_result; /* Result of custom function */
1836 pcre2_match_data *match_data;
1837 bool old_mdata_used;
1838
1839 /* Calculate the size of the offsets array, and allocate memory for it. */
1840 num_subpats = pce->capture_count + 1;
1841
1842 /*
1843 * Build a mapping from subpattern numbers to their names. We will
1844 * allocate the table only if there are any named subpatterns.
1845 */
1846 subpat_names = NULL;
1847 if (UNEXPECTED(pce->name_count > 0)) {
1848 subpat_names = make_subpats_table(num_subpats, pce);
1849 if (!subpat_names) {
1850 return NULL;
1851 }
1852 }
1853
1854 alloc_len = 0;
1855 result = NULL;
1856
1857 /* Initialize */
1858 match = NULL;
1859 start_offset = 0;
1860 last_end_offset = 0;
1861 result_len = 0;
1862 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1863
1864 old_mdata_used = mdata_used;
1865 if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1866 mdata_used = 1;
1867 match_data = mdata;
1868 } else {
1869 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1870 if (!match_data) {
1871 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1872 if (subpat_names) {
1873 free_subpats_table(subpat_names, num_subpats);
1874 }
1875 mdata_used = old_mdata_used;
1876 return NULL;
1877 }
1878 }
1879
1880 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1881
1882 /* Execute the regular expression. */
1883 #ifdef HAVE_PCRE_JIT_SUPPORT
1884 if ((pce->preg_options & PREG_JIT) && options) {
1885 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1886 PCRE2_NO_UTF_CHECK, match_data, mctx);
1887 } else
1888 #endif
1889 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1890 options, match_data, mctx);
1891
1892 while (1) {
1893 piece = subject + last_end_offset;
1894
1895 if (count >= 0 && limit) {
1896 /* Check for too many substrings condition. */
1897 if (UNEXPECTED(count == 0)) {
1898 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1899 count = num_subpats;
1900 }
1901
1902 matched:
1903 offsets = pcre2_get_ovector_pointer(match_data);
1904
1905 if (UNEXPECTED(offsets[1] < offsets[0])) {
1906 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1907 if (result) {
1908 zend_string_release_ex(result, 0);
1909 result = NULL;
1910 }
1911 break;
1912 }
1913
1914 if (replace_count) {
1915 ++*replace_count;
1916 }
1917
1918 /* Set the match location in subject */
1919 match = subject + offsets[0];
1920
1921 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1922
1923 /* Use custom function to get replacement string and its length. */
1924 eval_result = preg_do_repl_func(
1925 fci, fcc, subject, offsets, subpat_names, num_subpats, count,
1926 pcre2_get_mark(match_data), flags);
1927
1928 ZEND_ASSERT(eval_result);
1929 new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD;
1930 if (new_len >= alloc_len) {
1931 alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1932 if (result == NULL) {
1933 result = zend_string_alloc(alloc_len, 0);
1934 } else {
1935 result = zend_string_extend(result, alloc_len, 0);
1936 }
1937 }
1938
1939 if (match-piece > 0) {
1940 /* copy the part of the string before the match */
1941 memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1942 result_len += (match-piece);
1943 }
1944
1945 /* If using custom function, copy result to the buffer and clean up. */
1946 memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1947 result_len += ZSTR_LEN(eval_result);
1948 zend_string_release_ex(eval_result, 0);
1949
1950 limit--;
1951
1952 /* Advance to the next piece. */
1953 start_offset = last_end_offset = offsets[1];
1954
1955 /* If we have matched an empty string, mimic what Perl's /g options does.
1956 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1957 the match again at the same point. If this fails (picked up above) we
1958 advance to the next character. */
1959 if (start_offset == offsets[0]) {
1960 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1961 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1962
1963 piece = subject + start_offset;
1964 if (count >= 0 && limit) {
1965 goto matched;
1966 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1967 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1968 this is not necessarily the end. We need to advance
1969 the start offset, and continue. Fudge the offset values
1970 to achieve this, unless we're already at the end of the string. */
1971 if (start_offset < subject_len) {
1972 size_t unit_len = calculate_unit_length(pce, piece);
1973 start_offset += unit_len;
1974 } else {
1975 goto not_matched;
1976 }
1977 } else {
1978 goto error;
1979 }
1980 }
1981
1982 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1983 not_matched:
1984 if (!result && subject_str) {
1985 result = zend_string_copy(subject_str);
1986 break;
1987 }
1988 /* now we know exactly how long it is */
1989 alloc_len = result_len + subject_len - last_end_offset;
1990 if (NULL != result) {
1991 result = zend_string_realloc(result, alloc_len, 0);
1992 } else {
1993 result = zend_string_alloc(alloc_len, 0);
1994 }
1995 /* stick that last bit of string on our output */
1996 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1997 result_len += subject_len - last_end_offset;
1998 ZSTR_VAL(result)[result_len] = '\0';
1999 ZSTR_LEN(result) = result_len;
2000 break;
2001 } else {
2002 error:
2003 pcre_handle_exec_error(count);
2004 if (result) {
2005 zend_string_release_ex(result, 0);
2006 result = NULL;
2007 }
2008 break;
2009 }
2010 #ifdef HAVE_PCRE_JIT_SUPPORT
2011 if ((pce->preg_options & PREG_JIT)) {
2012 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2013 PCRE2_NO_UTF_CHECK, match_data, mctx);
2014 } else
2015 #endif
2016 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2017 PCRE2_NO_UTF_CHECK, match_data, mctx);
2018 }
2019 if (match_data != mdata) {
2020 pcre2_match_data_free(match_data);
2021 }
2022 mdata_used = old_mdata_used;
2023
2024 if (UNEXPECTED(subpat_names)) {
2025 free_subpats_table(subpat_names, num_subpats);
2026 }
2027
2028 return result;
2029 }
2030 /* }}} */
2031
2032 /* {{{ php_pcre_replace_func */
php_pcre_replace_func(zend_string * regex,zend_string * subject_str,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)2033 static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2034 zend_string *subject_str,
2035 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2036 size_t limit, size_t *replace_count, zend_long flags)
2037 {
2038 pcre_cache_entry *pce; /* Compiled regular expression */
2039 zend_string *result; /* Function result */
2040
2041 /* Compile regex or get it from cache. */
2042 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2043 return NULL;
2044 }
2045 pce->refcount++;
2046 result = php_pcre_replace_func_impl(
2047 pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
2048 limit, replace_count, flags);
2049 pce->refcount--;
2050
2051 return result;
2052 }
2053 /* }}} */
2054
2055 /* {{{ php_pcre_replace_array */
php_pcre_replace_array(HashTable * regex,zend_string * replace_str,HashTable * replace_ht,zend_string * subject_str,size_t limit,size_t * replace_count)2056 static zend_string *php_pcre_replace_array(HashTable *regex,
2057 zend_string *replace_str, HashTable *replace_ht,
2058 zend_string *subject_str, size_t limit, size_t *replace_count)
2059 {
2060 zval *regex_entry;
2061 zend_string *result;
2062
2063 zend_string_addref(subject_str);
2064
2065 if (replace_ht) {
2066 uint32_t replace_idx = 0;
2067
2068 /* For each entry in the regex array, get the entry */
2069 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2070 /* Make sure we're dealing with strings. */
2071 zend_string *tmp_regex_str;
2072 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2073 zend_string *replace_entry_str, *tmp_replace_entry_str;
2074 zval *zv;
2075
2076 /* Get current entry */
2077 while (1) {
2078 if (replace_idx == replace_ht->nNumUsed) {
2079 replace_entry_str = ZSTR_EMPTY_ALLOC();
2080 tmp_replace_entry_str = NULL;
2081 break;
2082 }
2083 zv = ZEND_HASH_ELEMENT(replace_ht, replace_idx);
2084 replace_idx++;
2085 if (Z_TYPE_P(zv) != IS_UNDEF) {
2086 replace_entry_str = zval_get_tmp_string(zv, &tmp_replace_entry_str);
2087 break;
2088 }
2089 }
2090
2091 /* Do the actual replacement and put the result back into subject_str
2092 for further replacements. */
2093 result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2094 ZSTR_LEN(subject_str), replace_entry_str, limit, replace_count);
2095 zend_tmp_string_release(tmp_replace_entry_str);
2096 zend_tmp_string_release(tmp_regex_str);
2097 zend_string_release_ex(subject_str, 0);
2098 subject_str = result;
2099 if (UNEXPECTED(result == NULL)) {
2100 break;
2101 }
2102 } ZEND_HASH_FOREACH_END();
2103
2104 } else {
2105 ZEND_ASSERT(replace_str != NULL);
2106
2107 /* For each entry in the regex array, get the entry */
2108 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2109 /* Make sure we're dealing with strings. */
2110 zend_string *tmp_regex_str;
2111 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2112
2113 /* Do the actual replacement and put the result back into subject_str
2114 for further replacements. */
2115 result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2116 ZSTR_LEN(subject_str), replace_str, limit, replace_count);
2117 zend_tmp_string_release(tmp_regex_str);
2118 zend_string_release_ex(subject_str, 0);
2119 subject_str = result;
2120
2121 if (UNEXPECTED(result == NULL)) {
2122 break;
2123 }
2124 } ZEND_HASH_FOREACH_END();
2125 }
2126
2127 return subject_str;
2128 }
2129 /* }}} */
2130
2131 /* {{{ php_replace_in_subject */
php_replace_in_subject(zend_string * regex_str,HashTable * regex_ht,zend_string * replace_str,HashTable * replace_ht,zend_string * subject,size_t limit,size_t * replace_count)2132 static zend_always_inline zend_string *php_replace_in_subject(
2133 zend_string *regex_str, HashTable *regex_ht,
2134 zend_string *replace_str, HashTable *replace_ht,
2135 zend_string *subject, size_t limit, size_t *replace_count)
2136 {
2137 zend_string *result;
2138
2139 if (regex_str) {
2140 ZEND_ASSERT(replace_str != NULL);
2141 result = php_pcre_replace(regex_str, subject, ZSTR_VAL(subject), ZSTR_LEN(subject),
2142 replace_str, limit, replace_count);
2143 } else {
2144 ZEND_ASSERT(regex_ht != NULL);
2145 result = php_pcre_replace_array(regex_ht, replace_str, replace_ht, subject,
2146 limit, replace_count);
2147 }
2148 return result;
2149 }
2150 /* }}} */
2151
2152 /* {{{ php_replace_in_subject_func */
php_replace_in_subject_func(zend_string * regex_str,HashTable * regex_ht,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zend_string * subject,size_t limit,size_t * replace_count,zend_long flags)2153 static zend_string *php_replace_in_subject_func(zend_string *regex_str, HashTable *regex_ht,
2154 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2155 zend_string *subject, size_t limit, size_t *replace_count, zend_long flags)
2156 {
2157 zend_string *result;
2158
2159 if (regex_str) {
2160 result = php_pcre_replace_func(
2161 regex_str, subject, fci, fcc, limit, replace_count, flags);
2162 return result;
2163 } else {
2164 /* If regex is an array */
2165 zval *regex_entry;
2166
2167 ZEND_ASSERT(regex_ht != NULL);
2168
2169 zend_string_addref(subject);
2170
2171 /* For each entry in the regex array, get the entry */
2172 ZEND_HASH_FOREACH_VAL(regex_ht, regex_entry) {
2173 /* Make sure we're dealing with strings. */
2174 zend_string *tmp_regex_entry_str;
2175 zend_string *regex_entry_str = zval_get_tmp_string(regex_entry, &tmp_regex_entry_str);
2176
2177 /* Do the actual replacement and put the result back into subject
2178 for further replacements. */
2179 result = php_pcre_replace_func(
2180 regex_entry_str, subject, fci, fcc, limit, replace_count, flags);
2181 zend_tmp_string_release(tmp_regex_entry_str);
2182 zend_string_release(subject);
2183 subject = result;
2184 if (UNEXPECTED(result == NULL)) {
2185 break;
2186 }
2187 } ZEND_HASH_FOREACH_END();
2188
2189 return subject;
2190 }
2191 }
2192 /* }}} */
2193
2194 /* {{{ preg_replace_func_impl */
preg_replace_func_impl(zval * return_value,zend_string * regex_str,HashTable * regex_ht,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zend_string * subject_str,HashTable * subject_ht,zend_long limit_val,zend_long flags)2195 static size_t preg_replace_func_impl(zval *return_value,
2196 zend_string *regex_str, HashTable *regex_ht,
2197 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2198 zend_string *subject_str, HashTable *subject_ht, zend_long limit_val, zend_long flags)
2199 {
2200 zend_string *result;
2201 size_t replace_count = 0;
2202
2203 if (subject_str) {
2204 result = php_replace_in_subject_func(
2205 regex_str, regex_ht, fci, fcc, subject_str, limit_val, &replace_count, flags);
2206 if (result != NULL) {
2207 RETVAL_STR(result);
2208 } else {
2209 RETVAL_NULL();
2210 }
2211 } else {
2212 /* if subject is an array */
2213 zval *subject_entry, zv;
2214 zend_string *string_key;
2215 zend_ulong num_key;
2216
2217 ZEND_ASSERT(subject_ht != NULL);
2218
2219 array_init_size(return_value, zend_hash_num_elements(subject_ht));
2220
2221 /* For each subject entry, convert it to string, then perform replacement
2222 and add the result to the return_value array. */
2223 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2224 zend_string *tmp_subject_entry_str;
2225 zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2226
2227 result = php_replace_in_subject_func(
2228 regex_str, regex_ht, fci, fcc, subject_entry_str, limit_val, &replace_count, flags);
2229 if (result != NULL) {
2230 /* Add to return array */
2231 ZVAL_STR(&zv, result);
2232 if (string_key) {
2233 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2234 } else {
2235 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2236 }
2237 }
2238 zend_tmp_string_release(tmp_subject_entry_str);
2239 } ZEND_HASH_FOREACH_END();
2240 }
2241
2242 return replace_count;
2243 }
2244 /* }}} */
2245
2246 /* {{{ preg_replace_common */
preg_replace_common(INTERNAL_FUNCTION_PARAMETERS,bool is_filter)2247 static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter)
2248 {
2249 zval *zcount = NULL;
2250 zend_string *regex_str;
2251 HashTable *regex_ht;
2252 zend_string *replace_str;
2253 HashTable *replace_ht;
2254 zend_string *subject_str;
2255 HashTable *subject_ht;
2256 zend_long limit = -1;
2257 size_t replace_count = 0;
2258 zend_string *result;
2259 size_t old_replace_count;
2260
2261 /* Get function parameters and do error-checking. */
2262 ZEND_PARSE_PARAMETERS_START(3, 5)
2263 Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2264 Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
2265 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2266 Z_PARAM_OPTIONAL
2267 Z_PARAM_LONG(limit)
2268 Z_PARAM_ZVAL(zcount)
2269 ZEND_PARSE_PARAMETERS_END();
2270
2271 /* If replace is an array then the regex argument needs to also be an array */
2272 if (replace_ht && !regex_ht) {
2273 zend_argument_type_error(1, "must be of type array when argument #2 ($replacement) is an array, string given");
2274 RETURN_THROWS();
2275 }
2276
2277 if (subject_str) {
2278 old_replace_count = replace_count;
2279 result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2280 subject_str, limit, &replace_count);
2281 if (result != NULL) {
2282 if (!is_filter || replace_count > old_replace_count) {
2283 RETVAL_STR(result);
2284 } else {
2285 zend_string_release_ex(result, 0);
2286 RETVAL_NULL();
2287 }
2288 } else {
2289 RETVAL_NULL();
2290 }
2291 } else {
2292 /* if subject is an array */
2293 zval *subject_entry, zv;
2294 zend_string *string_key;
2295 zend_ulong num_key;
2296
2297 ZEND_ASSERT(subject_ht != NULL);
2298
2299 array_init_size(return_value, zend_hash_num_elements(subject_ht));
2300
2301 /* For each subject entry, convert it to string, then perform replacement
2302 and add the result to the return_value array. */
2303 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2304 old_replace_count = replace_count;
2305 zend_string *tmp_subject_entry_str;
2306 zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2307 result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2308 subject_entry_str, limit, &replace_count);
2309
2310 if (result != NULL) {
2311 if (!is_filter || replace_count > old_replace_count) {
2312 /* Add to return array */
2313 ZVAL_STR(&zv, result);
2314 if (string_key) {
2315 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2316 } else {
2317 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2318 }
2319 } else {
2320 zend_string_release_ex(result, 0);
2321 }
2322 }
2323 zend_tmp_string_release(tmp_subject_entry_str);
2324 } ZEND_HASH_FOREACH_END();
2325 }
2326
2327 if (zcount) {
2328 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2329 }
2330 }
2331 /* }}} */
2332
2333 /* {{{ Perform Perl-style regular expression replacement. */
PHP_FUNCTION(preg_replace)2334 PHP_FUNCTION(preg_replace)
2335 {
2336 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
2337 }
2338 /* }}} */
2339
2340 /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback)2341 PHP_FUNCTION(preg_replace_callback)
2342 {
2343 zval *zcount = NULL;
2344 zend_string *regex_str;
2345 HashTable *regex_ht;
2346 zend_string *subject_str;
2347 HashTable *subject_ht;
2348 zend_long limit = -1, flags = 0;
2349 size_t replace_count;
2350 zend_fcall_info fci;
2351 zend_fcall_info_cache fcc;
2352
2353 /* Get function parameters and do error-checking. */
2354 ZEND_PARSE_PARAMETERS_START(3, 6)
2355 Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2356 Z_PARAM_FUNC(fci, fcc)
2357 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2358 Z_PARAM_OPTIONAL
2359 Z_PARAM_LONG(limit)
2360 Z_PARAM_ZVAL(zcount)
2361 Z_PARAM_LONG(flags)
2362 ZEND_PARSE_PARAMETERS_END();
2363
2364 replace_count = preg_replace_func_impl(return_value, regex_str, regex_ht,
2365 &fci, &fcc,
2366 subject_str, subject_ht, limit, flags);
2367 if (zcount) {
2368 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2369 }
2370 }
2371 /* }}} */
2372
2373 /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback_array)2374 PHP_FUNCTION(preg_replace_callback_array)
2375 {
2376 zval zv, *replace, *zcount = NULL;
2377 HashTable *pattern, *subject_ht;
2378 zend_string *subject_str, *str_idx_regex;
2379 zend_long limit = -1, flags = 0;
2380 size_t replace_count = 0;
2381 zend_fcall_info fci;
2382 zend_fcall_info_cache fcc;
2383
2384 /* Get function parameters and do error-checking. */
2385 ZEND_PARSE_PARAMETERS_START(2, 5)
2386 Z_PARAM_ARRAY_HT(pattern)
2387 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2388 Z_PARAM_OPTIONAL
2389 Z_PARAM_LONG(limit)
2390 Z_PARAM_ZVAL(zcount)
2391 Z_PARAM_LONG(flags)
2392 ZEND_PARSE_PARAMETERS_END();
2393
2394 fci.size = sizeof(fci);
2395 fci.object = NULL;
2396 fci.named_params = NULL;
2397
2398 if (subject_ht) {
2399 GC_TRY_ADDREF(subject_ht);
2400 } else {
2401 GC_TRY_ADDREF(subject_str);
2402 }
2403
2404 ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) {
2405 if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2406 zend_argument_type_error(1, "must contain only valid callbacks");
2407 goto error;
2408 }
2409 if (!str_idx_regex) {
2410 zend_argument_type_error(1, "must contain only string patterns as keys");
2411 goto error;
2412 }
2413
2414 ZVAL_COPY_VALUE(&fci.function_name, replace);
2415
2416 replace_count += preg_replace_func_impl(&zv, str_idx_regex, /* regex_ht */ NULL, &fci, &fcc,
2417 subject_str, subject_ht, limit, flags);
2418 switch (Z_TYPE(zv)) {
2419 case IS_ARRAY:
2420 ZEND_ASSERT(subject_ht);
2421 zend_array_release(subject_ht);
2422 subject_ht = Z_ARR(zv);
2423 break;
2424 case IS_STRING:
2425 ZEND_ASSERT(subject_str);
2426 zend_string_release(subject_str);
2427 subject_str = Z_STR(zv);
2428 break;
2429 case IS_NULL:
2430 RETVAL_NULL();
2431 goto error;
2432 EMPTY_SWITCH_DEFAULT_CASE()
2433 }
2434
2435 if (EG(exception)) {
2436 goto error;
2437 }
2438 } ZEND_HASH_FOREACH_END();
2439
2440 if (zcount) {
2441 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2442 }
2443
2444 if (subject_ht) {
2445 RETVAL_ARR(subject_ht);
2446 // Unset the type_flags of immutable arrays to prevent the VM from performing refcounting
2447 if (GC_FLAGS(subject_ht) & IS_ARRAY_IMMUTABLE) {
2448 Z_TYPE_FLAGS_P(return_value) = 0;
2449 }
2450 return;
2451 } else {
2452 RETURN_STR(subject_str);
2453 }
2454
2455 error:
2456 if (subject_ht) {
2457 zend_array_release(subject_ht);
2458 } else {
2459 zend_string_release(subject_str);
2460 }
2461 }
2462 /* }}} */
2463
2464 /* {{{ Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTION(preg_filter)2465 PHP_FUNCTION(preg_filter)
2466 {
2467 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
2468 }
2469 /* }}} */
2470
2471 /* {{{ Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)2472 PHP_FUNCTION(preg_split)
2473 {
2474 zend_string *regex; /* Regular expression */
2475 zend_string *subject; /* String to match against */
2476 zend_long limit_val = -1;/* Integer value of limit */
2477 zend_long flags = 0; /* Match control flags */
2478 pcre_cache_entry *pce; /* Compiled regular expression */
2479
2480 /* Get function parameters and do error checking */
2481 ZEND_PARSE_PARAMETERS_START(2, 4)
2482 Z_PARAM_STR(regex)
2483 Z_PARAM_STR(subject)
2484 Z_PARAM_OPTIONAL
2485 Z_PARAM_LONG(limit_val)
2486 Z_PARAM_LONG(flags)
2487 ZEND_PARSE_PARAMETERS_END();
2488
2489 /* Compile regex or get it from cache. */
2490 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2491 RETURN_FALSE;
2492 }
2493
2494 pce->refcount++;
2495 php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2496 pce->refcount--;
2497 }
2498 /* }}} */
2499
2500 /* {{{ php_pcre_split */
php_pcre_split_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zend_long limit_val,zend_long flags)2501 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2502 zend_long limit_val, zend_long flags)
2503 {
2504 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
2505 uint32_t options; /* Execution options */
2506 int count; /* Count of matched subpatterns */
2507 PCRE2_SIZE start_offset; /* Where the new search starts */
2508 PCRE2_SIZE last_match_offset; /* Location of last match */
2509 uint32_t no_empty; /* If NO_EMPTY flag is set */
2510 uint32_t delim_capture; /* If delimiters should be captured */
2511 uint32_t offset_capture; /* If offsets should be captured */
2512 uint32_t num_subpats; /* Number of captured subpatterns */
2513 zval tmp;
2514 pcre2_match_data *match_data;
2515 char *subject = ZSTR_VAL(subject_str);
2516
2517 no_empty = flags & PREG_SPLIT_NO_EMPTY;
2518 delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2519 offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2520
2521 /* Initialize return value */
2522 array_init(return_value);
2523
2524 /* Calculate the size of the offsets array, and allocate memory for it. */
2525 num_subpats = pce->capture_count + 1;
2526
2527 /* Start at the beginning of the string */
2528 start_offset = 0;
2529 last_match_offset = 0;
2530 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2531
2532 if (limit_val == -1) {
2533 /* pass */
2534 } else if (limit_val == 0) {
2535 limit_val = -1;
2536 } else if (limit_val <= 1) {
2537 goto last;
2538 }
2539
2540 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2541 match_data = mdata;
2542 } else {
2543 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2544 if (!match_data) {
2545 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2546 zval_ptr_dtor(return_value);
2547 RETURN_FALSE;
2548 }
2549 }
2550
2551 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2552
2553 #ifdef HAVE_PCRE_JIT_SUPPORT
2554 if ((pce->preg_options & PREG_JIT) && options) {
2555 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2556 PCRE2_NO_UTF_CHECK, match_data, mctx);
2557 } else
2558 #endif
2559 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2560 options, match_data, mctx);
2561
2562 while (1) {
2563 /* If something matched */
2564 if (count >= 0) {
2565 /* Check for too many substrings condition. */
2566 if (UNEXPECTED(count == 0)) {
2567 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2568 count = num_subpats;
2569 }
2570
2571 matched:
2572 offsets = pcre2_get_ovector_pointer(match_data);
2573
2574 if (UNEXPECTED(offsets[1] < offsets[0])) {
2575 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2576 break;
2577 }
2578
2579 if (!no_empty || offsets[0] != last_match_offset) {
2580 if (offset_capture) {
2581 /* Add (match, offset) pair to the return value */
2582 add_offset_pair(
2583 return_value, subject, last_match_offset, offsets[0],
2584 NULL, 0);
2585 } else {
2586 /* Add the piece to the return value */
2587 populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2588 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2589 }
2590
2591 /* One less left to do */
2592 if (limit_val != -1)
2593 limit_val--;
2594 }
2595
2596 if (delim_capture) {
2597 size_t i;
2598 for (i = 1; i < count; i++) {
2599 /* If we have matched a delimiter */
2600 if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2601 if (offset_capture) {
2602 add_offset_pair(
2603 return_value, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2604 } else {
2605 populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2606 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2607 }
2608 }
2609 }
2610 }
2611
2612 /* Advance to the position right after the last full match */
2613 start_offset = last_match_offset = offsets[1];
2614
2615 /* If we have matched an empty string, mimic what Perl's /g options does.
2616 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2617 the match again at the same point. If this fails (picked up above) we
2618 advance to the next character. */
2619 if (start_offset == offsets[0]) {
2620 /* Get next piece if no limit or limit not yet reached and something matched*/
2621 if (limit_val != -1 && limit_val <= 1) {
2622 break;
2623 }
2624 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2625 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2626 if (count >= 0) {
2627 goto matched;
2628 } else if (count == PCRE2_ERROR_NOMATCH) {
2629 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2630 this is not necessarily the end. We need to advance
2631 the start offset, and continue. Fudge the offset values
2632 to achieve this, unless we're already at the end of the string. */
2633 if (start_offset < ZSTR_LEN(subject_str)) {
2634 start_offset += calculate_unit_length(pce, subject + start_offset);
2635 } else {
2636 break;
2637 }
2638 } else {
2639 goto error;
2640 }
2641 }
2642
2643 } else if (count == PCRE2_ERROR_NOMATCH) {
2644 break;
2645 } else {
2646 error:
2647 pcre_handle_exec_error(count);
2648 break;
2649 }
2650
2651 /* Get next piece if no limit or limit not yet reached and something matched*/
2652 if (limit_val != -1 && limit_val <= 1) {
2653 break;
2654 }
2655
2656 #ifdef HAVE_PCRE_JIT_SUPPORT
2657 if (pce->preg_options & PREG_JIT) {
2658 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2659 PCRE2_NO_UTF_CHECK, match_data, mctx);
2660 } else
2661 #endif
2662 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2663 PCRE2_NO_UTF_CHECK, match_data, mctx);
2664 }
2665 if (match_data != mdata) {
2666 pcre2_match_data_free(match_data);
2667 }
2668
2669 if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2670 zval_ptr_dtor(return_value);
2671 RETURN_FALSE;
2672 }
2673
2674 last:
2675 start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2676
2677 if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2678 if (offset_capture) {
2679 /* Add the last (match, offset) pair to the return value */
2680 add_offset_pair(return_value, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2681 } else {
2682 /* Add the last piece to the return value */
2683 if (start_offset == 0) {
2684 ZVAL_STR_COPY(&tmp, subject_str);
2685 } else {
2686 populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2687 }
2688 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2689 }
2690 }
2691 }
2692 /* }}} */
2693
2694 /* {{{ Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)2695 PHP_FUNCTION(preg_quote)
2696 {
2697 zend_string *str; /* Input string argument */
2698 zend_string *delim = NULL; /* Additional delimiter argument */
2699 char *in_str; /* Input string */
2700 char *in_str_end; /* End of the input string */
2701 zend_string *out_str; /* Output string with quoted characters */
2702 size_t extra_len; /* Number of additional characters */
2703 char *p, /* Iterator for input string */
2704 *q, /* Iterator for output string */
2705 delim_char = '\0', /* Delimiter character to be quoted */
2706 c; /* Current character */
2707
2708 /* Get the arguments and check for errors */
2709 ZEND_PARSE_PARAMETERS_START(1, 2)
2710 Z_PARAM_STR(str)
2711 Z_PARAM_OPTIONAL
2712 Z_PARAM_STR_OR_NULL(delim)
2713 ZEND_PARSE_PARAMETERS_END();
2714
2715 /* Nothing to do if we got an empty string */
2716 if (ZSTR_LEN(str) == 0) {
2717 RETURN_EMPTY_STRING();
2718 }
2719
2720 in_str = ZSTR_VAL(str);
2721 in_str_end = in_str + ZSTR_LEN(str);
2722
2723 if (delim) {
2724 delim_char = ZSTR_VAL(delim)[0];
2725 }
2726
2727 /* Go through the string and quote necessary characters */
2728 extra_len = 0;
2729 p = in_str;
2730 do {
2731 c = *p;
2732 switch(c) {
2733 case '.':
2734 case '\\':
2735 case '+':
2736 case '*':
2737 case '?':
2738 case '[':
2739 case '^':
2740 case ']':
2741 case '$':
2742 case '(':
2743 case ')':
2744 case '{':
2745 case '}':
2746 case '=':
2747 case '!':
2748 case '>':
2749 case '<':
2750 case '|':
2751 case ':':
2752 case '-':
2753 case '#':
2754 extra_len++;
2755 break;
2756
2757 case '\0':
2758 extra_len+=3;
2759 break;
2760
2761 default:
2762 if (c == delim_char) {
2763 extra_len++;
2764 }
2765 break;
2766 }
2767 p++;
2768 } while (p != in_str_end);
2769
2770 if (extra_len == 0) {
2771 RETURN_STR_COPY(str);
2772 }
2773
2774 /* Allocate enough memory so that even if each character
2775 is quoted, we won't run out of room */
2776 out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2777 q = ZSTR_VAL(out_str);
2778 p = in_str;
2779
2780 do {
2781 c = *p;
2782 switch(c) {
2783 case '.':
2784 case '\\':
2785 case '+':
2786 case '*':
2787 case '?':
2788 case '[':
2789 case '^':
2790 case ']':
2791 case '$':
2792 case '(':
2793 case ')':
2794 case '{':
2795 case '}':
2796 case '=':
2797 case '!':
2798 case '>':
2799 case '<':
2800 case '|':
2801 case ':':
2802 case '-':
2803 case '#':
2804 *q++ = '\\';
2805 *q++ = c;
2806 break;
2807
2808 case '\0':
2809 *q++ = '\\';
2810 *q++ = '0';
2811 *q++ = '0';
2812 *q++ = '0';
2813 break;
2814
2815 default:
2816 if (c == delim_char) {
2817 *q++ = '\\';
2818 }
2819 *q++ = c;
2820 break;
2821 }
2822 p++;
2823 } while (p != in_str_end);
2824 *q = '\0';
2825
2826 RETURN_NEW_STR(out_str);
2827 }
2828 /* }}} */
2829
2830 /* {{{ Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)2831 PHP_FUNCTION(preg_grep)
2832 {
2833 zend_string *regex; /* Regular expression */
2834 zval *input; /* Input array */
2835 zend_long flags = 0; /* Match control flags */
2836 pcre_cache_entry *pce; /* Compiled regular expression */
2837
2838 /* Get arguments and do error checking */
2839 ZEND_PARSE_PARAMETERS_START(2, 3)
2840 Z_PARAM_STR(regex)
2841 Z_PARAM_ARRAY(input)
2842 Z_PARAM_OPTIONAL
2843 Z_PARAM_LONG(flags)
2844 ZEND_PARSE_PARAMETERS_END();
2845
2846 /* Compile regex or get it from cache. */
2847 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2848 RETURN_FALSE;
2849 }
2850
2851 pce->refcount++;
2852 php_pcre_grep_impl(pce, input, return_value, flags);
2853 pce->refcount--;
2854 }
2855 /* }}} */
2856
php_pcre_grep_impl(pcre_cache_entry * pce,zval * input,zval * return_value,zend_long flags)2857 PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2858 {
2859 zval *entry; /* An entry in the input array */
2860 uint32_t num_subpats; /* Number of captured subpatterns */
2861 int count; /* Count of matched subpatterns */
2862 uint32_t options; /* Execution options */
2863 zend_string *string_key;
2864 zend_ulong num_key;
2865 bool invert; /* Whether to return non-matching
2866 entries */
2867 pcre2_match_data *match_data;
2868 invert = flags & PREG_GREP_INVERT ? 1 : 0;
2869
2870 /* Calculate the size of the offsets array, and allocate memory for it. */
2871 num_subpats = pce->capture_count + 1;
2872
2873 /* Initialize return array */
2874 array_init(return_value);
2875
2876 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2877
2878 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2879 match_data = mdata;
2880 } else {
2881 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2882 if (!match_data) {
2883 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2884 return;
2885 }
2886 }
2887
2888 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2889
2890 /* Go through the input array */
2891 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2892 zend_string *tmp_subject_str;
2893 zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2894
2895 /* Perform the match */
2896 #ifdef HAVE_PCRE_JIT_SUPPORT
2897 if ((pce->preg_options & PREG_JIT) && options) {
2898 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2899 PCRE2_NO_UTF_CHECK, match_data, mctx);
2900 } else
2901 #endif
2902 count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2903 options, match_data, mctx);
2904
2905 /* If the entry fits our requirements */
2906 if (count >= 0) {
2907 /* Check for too many substrings condition. */
2908 if (UNEXPECTED(count == 0)) {
2909 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2910 }
2911 if (!invert) {
2912 Z_TRY_ADDREF_P(entry);
2913
2914 /* Add to return array */
2915 if (string_key) {
2916 zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2917 } else {
2918 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2919 }
2920 }
2921 } else if (count == PCRE2_ERROR_NOMATCH) {
2922 if (invert) {
2923 Z_TRY_ADDREF_P(entry);
2924
2925 /* Add to return array */
2926 if (string_key) {
2927 zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2928 } else {
2929 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2930 }
2931 }
2932 } else {
2933 pcre_handle_exec_error(count);
2934 zend_tmp_string_release(tmp_subject_str);
2935 break;
2936 }
2937
2938 zend_tmp_string_release(tmp_subject_str);
2939 } ZEND_HASH_FOREACH_END();
2940 if (match_data != mdata) {
2941 pcre2_match_data_free(match_data);
2942 }
2943 }
2944 /* }}} */
2945
2946 /* {{{ Returns the error code of the last regexp execution. */
PHP_FUNCTION(preg_last_error)2947 PHP_FUNCTION(preg_last_error)
2948 {
2949 ZEND_PARSE_PARAMETERS_NONE();
2950
2951 RETURN_LONG(PCRE_G(error_code));
2952 }
2953 /* }}} */
2954
2955 /* {{{ Returns the error message of the last regexp execution. */
PHP_FUNCTION(preg_last_error_msg)2956 PHP_FUNCTION(preg_last_error_msg)
2957 {
2958 ZEND_PARSE_PARAMETERS_NONE();
2959
2960 RETURN_STRING(php_pcre_get_error_msg(PCRE_G(error_code)));
2961 }
2962 /* }}} */
2963
2964 /* {{{ module definition structures */
2965
2966 zend_module_entry pcre_module_entry = {
2967 STANDARD_MODULE_HEADER,
2968 "pcre",
2969 ext_functions,
2970 PHP_MINIT(pcre),
2971 PHP_MSHUTDOWN(pcre),
2972 PHP_RINIT(pcre),
2973 PHP_RSHUTDOWN(pcre),
2974 PHP_MINFO(pcre),
2975 PHP_PCRE_VERSION,
2976 PHP_MODULE_GLOBALS(pcre),
2977 PHP_GINIT(pcre),
2978 PHP_GSHUTDOWN(pcre),
2979 NULL,
2980 STANDARD_MODULE_PROPERTIES_EX
2981 };
2982
2983 #ifdef COMPILE_DL_PCRE
ZEND_GET_MODULE(pcre)2984 ZEND_GET_MODULE(pcre)
2985 #endif
2986
2987 /* }}} */
2988
2989 PHPAPI pcre2_match_context *php_pcre_mctx(void)
2990 {/*{{{*/
2991 return mctx;
2992 }/*}}}*/
2993
php_pcre_gctx(void)2994 PHPAPI pcre2_general_context *php_pcre_gctx(void)
2995 {/*{{{*/
2996 return gctx;
2997 }/*}}}*/
2998
php_pcre_cctx(void)2999 PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3000 {/*{{{*/
3001 return cctx;
3002 }/*}}}*/
3003
php_pcre_pce_incref(pcre_cache_entry * pce)3004 PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3005 {/*{{{*/
3006 assert(NULL != pce);
3007 pce->refcount++;
3008 }/*}}}*/
3009
php_pcre_pce_decref(pcre_cache_entry * pce)3010 PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3011 {/*{{{*/
3012 assert(NULL != pce);
3013 assert(0 != pce->refcount);
3014 pce->refcount--;
3015 }/*}}}*/
3016
php_pcre_pce_re(pcre_cache_entry * pce)3017 PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3018 {/*{{{*/
3019 assert(NULL != pce);
3020 return pce->re;
3021 }/*}}}*/
3022