1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Author: Andrei Zmievski <andrei@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #include "php.h"
18 #include "php_ini.h"
19 #include "php_globals.h"
20 #include "php_pcre.h"
21 #include "ext/standard/info.h"
22 #include "ext/standard/basic_functions.h"
23 #include "zend_smart_str.h"
24 #include "SAPI.h"
25
26 #include "ext/standard/php_string.h"
27
28 #define PREG_PATTERN_ORDER 1
29 #define PREG_SET_ORDER 2
30 #define PREG_OFFSET_CAPTURE (1<<8)
31 #define PREG_UNMATCHED_AS_NULL (1<<9)
32
33 #define PREG_SPLIT_NO_EMPTY (1<<0)
34 #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
35 #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
36
37 #define PREG_REPLACE_EVAL (1<<0)
38
39 #define PREG_GREP_INVERT (1<<0)
40
41 #define PREG_JIT (1<<3)
42
43 #define PCRE_CACHE_SIZE 4096
44
45 #ifdef HAVE_PCRE_JIT_SUPPORT
46 #define PHP_PCRE_JIT_SUPPORT 1
47 #else
48 #define PHP_PCRE_JIT_SUPPORT 0
49 #endif
50
51 char *php_pcre_version;
52
53 #include "php_pcre_arginfo.h"
54
55 struct _pcre_cache_entry {
56 pcre2_code *re;
57 uint32_t preg_options;
58 uint32_t capture_count;
59 uint32_t name_count;
60 uint32_t compile_options;
61 uint32_t refcount;
62 };
63
64 PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
65
66 #ifdef HAVE_PCRE_JIT_SUPPORT
67 #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
68 #define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
69 ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
70 #endif
71 /* General context using (infallible) system allocator. */
72 ZEND_TLS pcre2_general_context *gctx = NULL;
73 /* These two are global per thread for now. Though it is possible to use these
74 per pattern. Either one can copy it and use in pce, or one does no global
75 contexts at all, but creates for every pce. */
76 ZEND_TLS pcre2_compile_context *cctx = NULL;
77 ZEND_TLS pcre2_match_context *mctx = NULL;
78 ZEND_TLS pcre2_match_data *mdata = NULL;
79 ZEND_TLS bool mdata_used = 0;
80 ZEND_TLS uint8_t pcre2_init_ok = 0;
81 #if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
82 static MUTEX_T pcre_mt = NULL;
83 #define php_pcre_mutex_alloc() \
84 if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
85 #define php_pcre_mutex_free() \
86 if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
87 #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
88 #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
89 #else
90 #define php_pcre_mutex_alloc()
91 #define php_pcre_mutex_free()
92 #define php_pcre_mutex_lock()
93 #define php_pcre_mutex_unlock()
94 #endif
95
96 ZEND_TLS HashTable char_tables;
97
php_pcre_free_char_table(zval * data)98 static void php_pcre_free_char_table(zval *data)
99 {/*{{{*/
100 void *ptr = Z_PTR_P(data);
101 pefree(ptr, 1);
102 }/*}}}*/
103
pcre_handle_exec_error(int pcre_code)104 static void pcre_handle_exec_error(int pcre_code) /* {{{ */
105 {
106 int preg_code = 0;
107
108 switch (pcre_code) {
109 case PCRE2_ERROR_MATCHLIMIT:
110 preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
111 break;
112
113 case PCRE2_ERROR_RECURSIONLIMIT:
114 preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
115 break;
116
117 case PCRE2_ERROR_BADUTFOFFSET:
118 preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
119 break;
120
121 #ifdef HAVE_PCRE_JIT_SUPPORT
122 case PCRE2_ERROR_JIT_STACKLIMIT:
123 preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
124 break;
125 #endif
126
127 default:
128 if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
129 preg_code = PHP_PCRE_BAD_UTF8_ERROR;
130 } else {
131 preg_code = PHP_PCRE_INTERNAL_ERROR;
132 }
133 break;
134 }
135
136 PCRE_G(error_code) = preg_code;
137 }
138 /* }}} */
139
php_pcre_get_error_msg(php_pcre_error_code error_code)140 static const char *php_pcre_get_error_msg(php_pcre_error_code error_code) /* {{{ */
141 {
142 switch (error_code) {
143 case PHP_PCRE_NO_ERROR:
144 return "No error";
145 case PHP_PCRE_INTERNAL_ERROR:
146 return "Internal error";
147 case PHP_PCRE_BAD_UTF8_ERROR:
148 return "Malformed UTF-8 characters, possibly incorrectly encoded";
149 case PHP_PCRE_BAD_UTF8_OFFSET_ERROR:
150 return "The offset did not correspond to the beginning of a valid UTF-8 code point";
151 case PHP_PCRE_BACKTRACK_LIMIT_ERROR:
152 return "Backtrack limit exhausted";
153 case PHP_PCRE_RECURSION_LIMIT_ERROR:
154 return "Recursion limit exhausted";
155
156 #ifdef HAVE_PCRE_JIT_SUPPORT
157 case PHP_PCRE_JIT_STACKLIMIT_ERROR:
158 return "JIT stack limit exhausted";
159 #endif
160
161 default:
162 return "Unknown error";
163 }
164 }
165 /* }}} */
166
php_free_pcre_cache(zval * data)167 static void php_free_pcre_cache(zval *data) /* {{{ */
168 {
169 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
170 if (!pce) return;
171 pcre2_code_free(pce->re);
172 free(pce);
173 }
174 /* }}} */
175
php_efree_pcre_cache(zval * data)176 static void php_efree_pcre_cache(zval *data) /* {{{ */
177 {
178 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
179 if (!pce) return;
180 pcre2_code_free(pce->re);
181 efree(pce);
182 }
183 /* }}} */
184
php_pcre_malloc(PCRE2_SIZE size,void * data)185 static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
186 {
187 return pemalloc(size, 1);
188 }
189
php_pcre_free(void * block,void * data)190 static void php_pcre_free(void *block, void *data)
191 {
192 pefree(block, 1);
193 }
194
php_pcre_emalloc(PCRE2_SIZE size,void * data)195 static void *php_pcre_emalloc(PCRE2_SIZE size, void *data)
196 {
197 return emalloc(size);
198 }
199
php_pcre_efree(void * block,void * data)200 static void php_pcre_efree(void *block, void *data)
201 {
202 efree(block);
203 }
204
205 #ifdef PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
206 /* pcre 10.38 needs PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, disabled by default */
207 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
208 #else
209 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS 0
210 #endif
211
212 #define PHP_PCRE_PREALLOC_MDATA_SIZE 32
213
php_pcre_init_pcre2(uint8_t jit)214 static void php_pcre_init_pcre2(uint8_t jit)
215 {/*{{{*/
216 if (!gctx) {
217 gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
218 if (!gctx) {
219 pcre2_init_ok = 0;
220 return;
221 }
222 }
223
224 if (!cctx) {
225 cctx = pcre2_compile_context_create(gctx);
226 if (!cctx) {
227 pcre2_init_ok = 0;
228 return;
229 }
230 }
231
232 pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
233
234 if (!mctx) {
235 mctx = pcre2_match_context_create(gctx);
236 if (!mctx) {
237 pcre2_init_ok = 0;
238 return;
239 }
240 }
241
242 #ifdef HAVE_PCRE_JIT_SUPPORT
243 if (jit && !jit_stack) {
244 jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
245 if (!jit_stack) {
246 pcre2_init_ok = 0;
247 return;
248 }
249 }
250 #endif
251
252 if (!mdata) {
253 mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
254 if (!mdata) {
255 pcre2_init_ok = 0;
256 return;
257 }
258 }
259
260 pcre2_init_ok = 1;
261 }/*}}}*/
262
php_pcre_shutdown_pcre2(void)263 static void php_pcre_shutdown_pcre2(void)
264 {/*{{{*/
265 if (gctx) {
266 pcre2_general_context_free(gctx);
267 gctx = NULL;
268 }
269
270 if (cctx) {
271 pcre2_compile_context_free(cctx);
272 cctx = NULL;
273 }
274
275 if (mctx) {
276 pcre2_match_context_free(mctx);
277 mctx = NULL;
278 }
279
280 #ifdef HAVE_PCRE_JIT_SUPPORT
281 /* Stack may only be destroyed when no cached patterns
282 possibly associated with it do exist. */
283 if (jit_stack) {
284 pcre2_jit_stack_free(jit_stack);
285 jit_stack = NULL;
286 }
287 #endif
288
289 if (mdata) {
290 pcre2_match_data_free(mdata);
291 mdata = NULL;
292 }
293
294 pcre2_init_ok = 0;
295 }/*}}}*/
296
PHP_GINIT_FUNCTION(pcre)297 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
298 {
299 php_pcre_mutex_alloc();
300
301 /* If we're on the CLI SAPI, there will only be one request, so we don't need the
302 * cache to survive after RSHUTDOWN. */
303 pcre_globals->per_request_cache = strcmp(sapi_module.name, "cli") == 0;
304 if (!pcre_globals->per_request_cache) {
305 zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
306 }
307
308 pcre_globals->backtrack_limit = 0;
309 pcre_globals->recursion_limit = 0;
310 pcre_globals->error_code = PHP_PCRE_NO_ERROR;
311 ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
312 ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
313 #ifdef HAVE_PCRE_JIT_SUPPORT
314 pcre_globals->jit = 1;
315 #endif
316
317 php_pcre_init_pcre2(1);
318 zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
319 }
320 /* }}} */
321
PHP_GSHUTDOWN_FUNCTION(pcre)322 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
323 {
324 if (!pcre_globals->per_request_cache) {
325 zend_hash_destroy(&pcre_globals->pcre_cache);
326 }
327
328 php_pcre_shutdown_pcre2();
329 zend_hash_destroy(&char_tables);
330 php_pcre_mutex_free();
331 }
332 /* }}} */
333
PHP_INI_MH(OnUpdateBacktrackLimit)334 static PHP_INI_MH(OnUpdateBacktrackLimit)
335 {/*{{{*/
336 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
337 if (mctx) {
338 pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
339 }
340
341 return SUCCESS;
342 }/*}}}*/
343
PHP_INI_MH(OnUpdateRecursionLimit)344 static PHP_INI_MH(OnUpdateRecursionLimit)
345 {/*{{{*/
346 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
347 if (mctx) {
348 pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
349 }
350
351 return SUCCESS;
352 }/*}}}*/
353
354 #ifdef HAVE_PCRE_JIT_SUPPORT
PHP_INI_MH(OnUpdateJit)355 static PHP_INI_MH(OnUpdateJit)
356 {/*{{{*/
357 OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
358 if (PCRE_G(jit) && jit_stack) {
359 pcre2_jit_stack_assign(mctx, NULL, jit_stack);
360 } else {
361 pcre2_jit_stack_assign(mctx, NULL, NULL);
362 }
363
364 return SUCCESS;
365 }/*}}}*/
366 #endif
367
368 PHP_INI_BEGIN()
369 STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
370 STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
371 #ifdef HAVE_PCRE_JIT_SUPPORT
372 STD_PHP_INI_BOOLEAN("pcre.jit", "1", PHP_INI_ALL, OnUpdateJit, jit, zend_pcre_globals, pcre_globals)
373 #endif
PHP_INI_END()374 PHP_INI_END()
375
376 static char *_pcre2_config_str(uint32_t what)
377 {/*{{{*/
378 int len = pcre2_config(what, NULL);
379 char *ret = (char *) malloc(len + 1);
380
381 len = pcre2_config(what, ret);
382 if (!len) {
383 free(ret);
384 return NULL;
385 }
386
387 return ret;
388 }/*}}}*/
389
390 /* {{{ PHP_MINFO_FUNCTION(pcre) */
PHP_MINFO_FUNCTION(pcre)391 static PHP_MINFO_FUNCTION(pcre)
392 {
393 #ifdef HAVE_PCRE_JIT_SUPPORT
394 uint32_t flag = 0;
395 char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
396 #endif
397 char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
398 char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
399
400 php_info_print_table_start();
401 php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
402 php_info_print_table_row(2, "PCRE Library Version", version);
403 free(version);
404 php_info_print_table_row(2, "PCRE Unicode Version", unicode);
405 free(unicode);
406
407 #ifdef HAVE_PCRE_JIT_SUPPORT
408 if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
409 php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
410 } else {
411 php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
412 }
413 if (jit_target) {
414 php_info_print_table_row(2, "PCRE JIT Target", jit_target);
415 }
416 free(jit_target);
417 #else
418 php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
419 #endif
420
421 #ifdef HAVE_PCRE_VALGRIND_SUPPORT
422 php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
423 #endif
424
425 php_info_print_table_end();
426
427 DISPLAY_INI_ENTRIES();
428 }
429 /* }}} */
430
431 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTION(pcre)432 static PHP_MINIT_FUNCTION(pcre)
433 {
434 #ifdef HAVE_PCRE_JIT_SUPPORT
435 if (UNEXPECTED(!pcre2_init_ok)) {
436 /* Retry. */
437 php_pcre_init_pcre2(PCRE_G(jit));
438 if (!pcre2_init_ok) {
439 return FAILURE;
440 }
441 }
442 #endif
443
444 REGISTER_INI_ENTRIES();
445
446 php_pcre_version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
447
448 register_php_pcre_symbols(module_number);
449
450 return SUCCESS;
451 }
452 /* }}} */
453
454 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTION(pcre)455 static PHP_MSHUTDOWN_FUNCTION(pcre)
456 {
457 UNREGISTER_INI_ENTRIES();
458
459 free(php_pcre_version);
460
461 return SUCCESS;
462 }
463 /* }}} */
464
465 /* {{{ PHP_RINIT_FUNCTION(pcre) */
PHP_RINIT_FUNCTION(pcre)466 static PHP_RINIT_FUNCTION(pcre)
467 {
468 #ifdef HAVE_PCRE_JIT_SUPPORT
469 if (UNEXPECTED(!pcre2_init_ok)) {
470 /* Retry. */
471 php_pcre_mutex_lock();
472 php_pcre_init_pcre2(PCRE_G(jit));
473 if (!pcre2_init_ok) {
474 php_pcre_mutex_unlock();
475 return FAILURE;
476 }
477 php_pcre_mutex_unlock();
478 }
479
480 mdata_used = 0;
481 #endif
482
483 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
484 PCRE_G(gctx_zmm) = pcre2_general_context_create(php_pcre_emalloc, php_pcre_efree, NULL);
485 if (!PCRE_G(gctx_zmm)) {
486 return FAILURE;
487 }
488
489 if (PCRE_G(per_request_cache)) {
490 zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, php_efree_pcre_cache, 0);
491 }
492
493 return SUCCESS;
494 }
495 /* }}} */
496
PHP_RSHUTDOWN_FUNCTION(pcre)497 static PHP_RSHUTDOWN_FUNCTION(pcre)
498 {
499 pcre2_general_context_free(PCRE_G(gctx_zmm));
500 PCRE_G(gctx_zmm) = NULL;
501
502 if (PCRE_G(per_request_cache)) {
503 zend_hash_destroy(&PCRE_G(pcre_cache));
504 }
505
506 zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
507 zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
508 ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
509 ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
510 return SUCCESS;
511 }
512
513 /* {{{ static pcre_clean_cache */
pcre_clean_cache(zval * data,void * arg)514 static int pcre_clean_cache(zval *data, void *arg)
515 {
516 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
517 int *num_clean = (int *)arg;
518
519 if (*num_clean > 0 && !pce->refcount) {
520 (*num_clean)--;
521 return ZEND_HASH_APPLY_REMOVE;
522 } else {
523 return ZEND_HASH_APPLY_KEEP;
524 }
525 }
526 /* }}} */
527
free_subpats_table(zend_string ** subpat_names,uint32_t num_subpats)528 static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
529 uint32_t i;
530 for (i = 0; i < num_subpats; i++) {
531 if (subpat_names[i]) {
532 zend_string_release(subpat_names[i]);
533 }
534 }
535 efree(subpat_names);
536 }
537
538 /* {{{ static make_subpats_table */
make_subpats_table(uint32_t num_subpats,pcre_cache_entry * pce)539 static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
540 {
541 uint32_t name_cnt = pce->name_count, name_size, ni = 0;
542 char *name_table;
543 zend_string **subpat_names;
544 int rc1, rc2;
545
546 rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
547 rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
548 if (rc1 < 0 || rc2 < 0) {
549 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
550 return NULL;
551 }
552
553 subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
554 while (ni++ < name_cnt) {
555 unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
556 const char *name = name_table + 2;
557 subpat_names[name_idx] = zend_string_init(name, strlen(name), 0);
558 if (is_numeric_string(ZSTR_VAL(subpat_names[name_idx]), ZSTR_LEN(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
559 php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
560 free_subpats_table(subpat_names, num_subpats);
561 return NULL;
562 }
563 name_table += name_size;
564 }
565 return subpat_names;
566 }
567 /* }}} */
568
569 /* {{{ static calculate_unit_length */
570 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
calculate_unit_length(pcre_cache_entry * pce,const char * start)571 static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
572 {
573 size_t unit_len;
574
575 if (pce->compile_options & PCRE2_UTF) {
576 const char *end = start;
577
578 /* skip continuation bytes */
579 while ((*++end & 0xC0) == 0x80);
580 unit_len = end - start;
581 } else {
582 unit_len = 1;
583 }
584 return unit_len;
585 }
586 /* }}} */
587
588 /* {{{ pcre_get_compiled_regex_cache */
pcre_get_compiled_regex_cache_ex(zend_string * regex,int locale_aware)589 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware)
590 {
591 pcre2_code *re = NULL;
592 #if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR && !HAVE_BUNDLED_PCRE
593 uint32_t coptions = PCRE2_NO_START_OPTIMIZE;
594 #else
595 uint32_t coptions = 0;
596 #endif
597 PCRE2_UCHAR error[128];
598 PCRE2_SIZE erroffset;
599 int errnumber;
600 char delimiter;
601 char start_delimiter;
602 char end_delimiter;
603 char *p, *pp;
604 char *pattern;
605 size_t pattern_len;
606 uint32_t poptions = 0;
607 const uint8_t *tables = NULL;
608 zval *zv;
609 pcre_cache_entry new_entry;
610 int rc;
611 zend_string *key;
612 pcre_cache_entry *ret;
613
614 if (locale_aware && BG(ctype_string)) {
615 key = zend_string_concat2(
616 ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)),
617 ZSTR_VAL(regex), ZSTR_LEN(regex));
618 } else {
619 key = regex;
620 }
621
622 /* Try to lookup the cached regex entry, and if successful, just pass
623 back the compiled pattern, otherwise go on and compile it. */
624 zv = zend_hash_find(&PCRE_G(pcre_cache), key);
625 if (zv) {
626 if (key != regex) {
627 zend_string_release_ex(key, 0);
628 }
629 return (pcre_cache_entry*)Z_PTR_P(zv);
630 }
631
632 p = ZSTR_VAL(regex);
633 const char* end_p = ZSTR_VAL(regex) + ZSTR_LEN(regex);
634
635 /* Parse through the leading whitespace, and display a warning if we
636 get to the end without encountering a delimiter. */
637 while (isspace((int)*(unsigned char *)p)) p++;
638 if (p >= end_p) {
639 if (key != regex) {
640 zend_string_release_ex(key, 0);
641 }
642 php_error_docref(NULL, E_WARNING, "Empty regular expression");
643 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
644 return NULL;
645 }
646
647 /* Get the delimiter and display a warning if it is alphanumeric
648 or a backslash. */
649 delimiter = *p++;
650 if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\' || delimiter == '\0') {
651 if (key != regex) {
652 zend_string_release_ex(key, 0);
653 }
654 php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric, backslash, or NUL");
655 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
656 return NULL;
657 }
658
659 start_delimiter = delimiter;
660 if ((pp = strchr("([{< )]}> )]}>", delimiter)))
661 delimiter = pp[5];
662 end_delimiter = delimiter;
663
664 pp = p;
665
666 if (start_delimiter == end_delimiter) {
667 /* We need to iterate through the pattern, searching for the ending delimiter,
668 but skipping the backslashed delimiters. If the ending delimiter is not
669 found, display a warning. */
670 while (pp < end_p) {
671 if (*pp == '\\' && pp + 1 < end_p) pp++;
672 else if (*pp == delimiter)
673 break;
674 pp++;
675 }
676 } else {
677 /* We iterate through the pattern, searching for the matching ending
678 * delimiter. For each matching starting delimiter, we increment nesting
679 * level, and decrement it for each matching ending delimiter. If we
680 * reach the end of the pattern without matching, display a warning.
681 */
682 int brackets = 1; /* brackets nesting level */
683 while (pp < end_p) {
684 if (*pp == '\\' && pp + 1 < end_p) pp++;
685 else if (*pp == end_delimiter && --brackets <= 0)
686 break;
687 else if (*pp == start_delimiter)
688 brackets++;
689 pp++;
690 }
691 }
692
693 if (pp >= end_p) {
694 if (key != regex) {
695 zend_string_release_ex(key, 0);
696 }
697 if (start_delimiter == end_delimiter) {
698 php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
699 } else {
700 php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
701 }
702 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
703 return NULL;
704 }
705
706 /* Make a copy of the actual pattern. */
707 pattern_len = pp - p;
708 pattern = estrndup(p, pattern_len);
709
710 /* Move on to the options */
711 pp++;
712
713 /* Parse through the options, setting appropriate flags. Display
714 a warning if we encounter an unknown modifier. */
715 while (pp < end_p) {
716 switch (*pp++) {
717 /* Perl compatible options */
718 case 'i': coptions |= PCRE2_CASELESS; break;
719 case 'm': coptions |= PCRE2_MULTILINE; break;
720 case 'n': coptions |= PCRE2_NO_AUTO_CAPTURE; break;
721 case 's': coptions |= PCRE2_DOTALL; break;
722 case 'x': coptions |= PCRE2_EXTENDED; break;
723
724 /* PCRE specific options */
725 case 'A': coptions |= PCRE2_ANCHORED; break;
726 case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
727 case 'S': /* Pass. */ break;
728 case 'X': /* Pass. */ break;
729 case 'U': coptions |= PCRE2_UNGREEDY; break;
730 case 'u': coptions |= PCRE2_UTF;
731 /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
732 characters, even in UTF-8 mode. However, this can be changed by setting
733 the PCRE2_UCP option. */
734 #ifdef PCRE2_UCP
735 coptions |= PCRE2_UCP;
736 #endif
737 break;
738 case 'J': coptions |= PCRE2_DUPNAMES; break;
739
740 /* Custom preg options */
741 case 'e': poptions |= PREG_REPLACE_EVAL; break;
742
743 case ' ':
744 case '\n':
745 case '\r':
746 break;
747
748 default:
749 if (pp[-1]) {
750 php_error_docref(NULL, E_WARNING, "Unknown modifier '%c'", pp[-1]);
751 } else {
752 php_error_docref(NULL, E_WARNING, "NUL is not a valid modifier");
753 }
754 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
755 efree(pattern);
756 if (key != regex) {
757 zend_string_release_ex(key, 0);
758 }
759 return NULL;
760 }
761 }
762
763 if (poptions & PREG_REPLACE_EVAL) {
764 php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
765 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
766 efree(pattern);
767 if (key != regex) {
768 zend_string_release_ex(key, 0);
769 }
770 return NULL;
771 }
772
773 if (key != regex) {
774 tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(ctype_string));
775 if (!tables) {
776 zend_string *_k;
777 tables = pcre2_maketables(gctx);
778 if (UNEXPECTED(!tables)) {
779 php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
780 pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
781 zend_string_release_ex(key, 0);
782 efree(pattern);
783 return NULL;
784 }
785 _k = zend_string_init(ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)), 1);
786 GC_MAKE_PERSISTENT_LOCAL(_k);
787 zend_hash_add_ptr(&char_tables, _k, (void *)tables);
788 zend_string_release(_k);
789 }
790 }
791 pcre2_set_character_tables(cctx, tables);
792
793 /* Compile pattern and display a warning if compilation failed. */
794 re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
795
796 if (re == NULL) {
797 if (key != regex) {
798 zend_string_release_ex(key, 0);
799 }
800 pcre2_get_error_message(errnumber, error, sizeof(error));
801 php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
802 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
803 efree(pattern);
804 return NULL;
805 }
806
807 #ifdef HAVE_PCRE_JIT_SUPPORT
808 if (PCRE_G(jit)) {
809 /* Enable PCRE JIT compiler */
810 rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
811 if (EXPECTED(rc >= 0)) {
812 size_t jit_size = 0;
813 if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
814 poptions |= PREG_JIT;
815 }
816 } else if (rc == PCRE2_ERROR_NOMEMORY) {
817 php_error_docref(NULL, E_WARNING,
818 "Allocation of JIT memory failed, PCRE JIT will be disabled. "
819 "This is likely caused by security restrictions. "
820 "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
821 PCRE_G(jit) = 0;
822 } else {
823 pcre2_get_error_message(rc, error, sizeof(error));
824 php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
825 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
826 }
827 }
828 #endif
829 efree(pattern);
830
831 /*
832 * If we reached cache limit, clean out the items from the head of the list;
833 * these are supposedly the oldest ones (but not necessarily the least used
834 * ones).
835 */
836 if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
837 int num_clean = PCRE_CACHE_SIZE / 8;
838 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
839 }
840
841 /* Store the compiled pattern and extra info in the cache. */
842 new_entry.re = re;
843 new_entry.preg_options = poptions;
844 new_entry.compile_options = coptions;
845 new_entry.refcount = 0;
846
847 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
848 if (rc < 0) {
849 if (key != regex) {
850 zend_string_release_ex(key, 0);
851 }
852 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
853 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
854 return NULL;
855 }
856
857 rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
858 if (rc < 0) {
859 if (key != regex) {
860 zend_string_release_ex(key, 0);
861 }
862 php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
863 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
864 return NULL;
865 }
866
867 /*
868 * Interned strings are not duplicated when stored in HashTable,
869 * but all the interned strings created during HTTP request are removed
870 * at end of request. However PCRE_G(pcre_cache) must be consistent
871 * on the next request as well. So we disable usage of interned strings
872 * as hash keys especually for this table.
873 * See bug #63180
874 */
875 if (!(GC_FLAGS(key) & IS_STR_PERMANENT) && !PCRE_G(per_request_cache)) {
876 zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
877 GC_MAKE_PERSISTENT_LOCAL(str);
878
879 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
880 zend_string_release(str);
881 } else {
882 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
883 }
884
885 if (key != regex) {
886 zend_string_release_ex(key, 0);
887 }
888
889 return ret;
890 }
891 /* }}} */
892
893 /* {{{ pcre_get_compiled_regex_cache */
pcre_get_compiled_regex_cache(zend_string * regex)894 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
895 {
896 return pcre_get_compiled_regex_cache_ex(regex, 1);
897 }
898 /* }}} */
899
900 /* {{{ pcre_get_compiled_regex */
pcre_get_compiled_regex(zend_string * regex,uint32_t * capture_count)901 PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
902 {
903 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
904
905 if (capture_count) {
906 *capture_count = pce ? pce->capture_count : 0;
907 }
908
909 return pce ? pce->re : NULL;
910 }
911 /* }}} */
912
913 /* XXX For the cases where it's only about match yes/no and no capture
914 required, perhaps just a minimum sized data would suffice. */
php_pcre_create_match_data(uint32_t capture_count,pcre2_code * re)915 PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
916 {/*{{{*/
917
918 assert(NULL != re);
919
920 if (EXPECTED(!mdata_used)) {
921 int rc = 0;
922
923 if (!capture_count) {
924 /* As we deal with a non cached pattern, no other way to gather this info. */
925 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
926 }
927
928 if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
929 mdata_used = 1;
930 return mdata;
931 }
932 }
933
934 return pcre2_match_data_create_from_pattern(re, gctx);
935 }/*}}}*/
936
php_pcre_free_match_data(pcre2_match_data * match_data)937 PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
938 {/*{{{*/
939 if (UNEXPECTED(match_data != mdata)) {
940 pcre2_match_data_free(match_data);
941 } else {
942 mdata_used = 0;
943 }
944 }/*}}}*/
945
init_unmatched_null_pair(void)946 static void init_unmatched_null_pair(void) {
947 zval val1, val2;
948 ZVAL_NULL(&val1);
949 ZVAL_LONG(&val2, -1);
950 ZVAL_ARR(&PCRE_G(unmatched_null_pair), zend_new_pair(&val1, &val2));
951 }
952
init_unmatched_empty_pair(void)953 static void init_unmatched_empty_pair(void) {
954 zval val1, val2;
955 ZVAL_EMPTY_STRING(&val1);
956 ZVAL_LONG(&val2, -1);
957 ZVAL_ARR(&PCRE_G(unmatched_empty_pair), zend_new_pair(&val1, &val2));
958 }
959
populate_match_value_str(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset)960 static zend_always_inline void populate_match_value_str(
961 zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
962 ZVAL_STRINGL_FAST(val, subject + start_offset, end_offset - start_offset);
963 }
964
populate_match_value(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,uint32_t unmatched_as_null)965 static inline void populate_match_value(
966 zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
967 uint32_t unmatched_as_null) {
968 if (PCRE2_UNSET == start_offset) {
969 if (unmatched_as_null) {
970 ZVAL_NULL(val);
971 } else {
972 ZVAL_EMPTY_STRING(val);
973 }
974 } else {
975 populate_match_value_str(val, subject, start_offset, end_offset);
976 }
977 }
978
add_named(zval * subpats,zend_string * name,zval * val,bool unmatched)979 static inline void add_named(
980 zval *subpats, zend_string *name, zval *val, bool unmatched) {
981 /* If the DUPNAMES option is used, multiple subpatterns might have the same name.
982 * In this case we want to preserve the one that actually has a value. */
983 if (!unmatched) {
984 zend_hash_update(Z_ARRVAL_P(subpats), name, val);
985 } else {
986 if (!zend_hash_add(Z_ARRVAL_P(subpats), name, val)) {
987 return;
988 }
989 }
990 Z_TRY_ADDREF_P(val);
991 }
992
993 /* {{{ add_offset_pair */
add_offset_pair(zval * result,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,zend_string * name,uint32_t unmatched_as_null)994 static inline void add_offset_pair(
995 zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
996 zend_string *name, uint32_t unmatched_as_null)
997 {
998 zval match_pair;
999
1000 /* Add (match, offset) to the return value */
1001 if (PCRE2_UNSET == start_offset) {
1002 if (unmatched_as_null) {
1003 if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
1004 init_unmatched_null_pair();
1005 }
1006 ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
1007 } else {
1008 if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
1009 init_unmatched_empty_pair();
1010 }
1011 ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
1012 }
1013 } else {
1014 zval val1, val2;
1015 populate_match_value_str(&val1, subject, start_offset, end_offset);
1016 ZVAL_LONG(&val2, start_offset);
1017 ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1018 }
1019
1020 if (name) {
1021 add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1022 }
1023 zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
1024 }
1025 /* }}} */
1026
populate_subpat_array(zval * subpats,const char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1027 static void populate_subpat_array(
1028 zval *subpats, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1029 uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1030 bool offset_capture = (flags & PREG_OFFSET_CAPTURE) != 0;
1031 bool unmatched_as_null = (flags & PREG_UNMATCHED_AS_NULL) != 0;
1032 zval val;
1033 int i;
1034 if (subpat_names) {
1035 if (offset_capture) {
1036 for (i = 0; i < count; i++) {
1037 add_offset_pair(
1038 subpats, subject, offsets[2*i], offsets[2*i+1],
1039 subpat_names[i], unmatched_as_null);
1040 }
1041 if (unmatched_as_null) {
1042 for (i = count; i < num_subpats; i++) {
1043 add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1044 }
1045 }
1046 } else {
1047 for (i = 0; i < count; i++) {
1048 populate_match_value(
1049 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1050 if (subpat_names[i]) {
1051 add_named(subpats, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1052 }
1053 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1054 }
1055 if (unmatched_as_null) {
1056 for (i = count; i < num_subpats; i++) {
1057 ZVAL_NULL(&val);
1058 if (subpat_names[i]) {
1059 zend_hash_add(Z_ARRVAL_P(subpats), subpat_names[i], &val);
1060 }
1061 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1062 }
1063 }
1064 }
1065 } else {
1066 if (offset_capture) {
1067 for (i = 0; i < count; i++) {
1068 add_offset_pair(
1069 subpats, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1070 }
1071 if (unmatched_as_null) {
1072 for (i = count; i < num_subpats; i++) {
1073 add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1074 }
1075 }
1076 } else {
1077 for (i = 0; i < count; i++) {
1078 populate_match_value(
1079 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1080 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1081 }
1082 if (unmatched_as_null) {
1083 for (i = count; i < num_subpats; i++) {
1084 add_next_index_null(subpats);
1085 }
1086 }
1087 }
1088 }
1089 /* Add MARK, if available */
1090 if (mark) {
1091 add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
1092 }
1093 }
1094
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS,int global)1095 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
1096 {
1097 /* parameters */
1098 zend_string *regex; /* Regular expression */
1099 zend_string *subject; /* String to match against */
1100 pcre_cache_entry *pce; /* Compiled regular expression */
1101 zval *subpats = NULL; /* Array for subpatterns */
1102 zend_long flags = 0; /* Match control flags */
1103 zend_long start_offset = 0; /* Where the new search starts */
1104
1105 ZEND_PARSE_PARAMETERS_START(2, 5)
1106 Z_PARAM_STR(regex)
1107 Z_PARAM_STR(subject)
1108 Z_PARAM_OPTIONAL
1109 Z_PARAM_ZVAL(subpats)
1110 Z_PARAM_LONG(flags)
1111 Z_PARAM_LONG(start_offset)
1112 ZEND_PARSE_PARAMETERS_END();
1113
1114 /* Compile regex or get it from cache. */
1115 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1116 RETURN_FALSE;
1117 }
1118
1119 if (start_offset == ZEND_LONG_MIN) {
1120 zend_argument_value_error(5, "must be greater than " ZEND_LONG_FMT, ZEND_LONG_MIN);
1121 RETURN_THROWS();
1122 }
1123
1124 pce->refcount++;
1125 php_pcre_match_impl(pce, subject, return_value, subpats,
1126 global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
1127 pce->refcount--;
1128 }
1129 /* }}} */
1130
is_known_valid_utf8(zend_string * subject_str,PCRE2_SIZE start_offset)1131 static zend_always_inline bool is_known_valid_utf8(
1132 zend_string *subject_str, PCRE2_SIZE start_offset) {
1133 if (!(GC_FLAGS(subject_str) & IS_STR_VALID_UTF8)) {
1134 /* We don't know whether the string is valid UTF-8 or not. */
1135 return 0;
1136 }
1137
1138 if (start_offset == ZSTR_LEN(subject_str)) {
1139 /* Degenerate case: Offset points to end of string. */
1140 return 1;
1141 }
1142
1143 /* Check that the offset does not point to an UTF-8 continuation byte. */
1144 return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1145 }
1146
1147 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zval * subpats,int global,int use_flags,zend_long flags,zend_off_t start_offset)1148 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1149 zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset)
1150 {
1151 zval result_set, /* Holds a set of subpatterns after
1152 a global match */
1153 *match_sets = NULL; /* An array of sets of matches for each
1154 subpattern after a global match */
1155 uint32_t options; /* Execution options */
1156 int count; /* Count of matched subpatterns */
1157 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1158 uint32_t num_subpats; /* Number of captured subpatterns */
1159 int matched; /* Has anything matched */
1160 zend_string **subpat_names; /* Array for named subpatterns */
1161 size_t i;
1162 uint32_t subpats_order; /* Order of subpattern matches */
1163 uint32_t offset_capture; /* Capture match offsets: yes/no */
1164 uint32_t unmatched_as_null; /* Null non-matches: yes/no */
1165 PCRE2_SPTR mark = NULL; /* Target for MARK name */
1166 zval marks; /* Array of marks for PREG_PATTERN_ORDER */
1167 pcre2_match_data *match_data;
1168 PCRE2_SIZE start_offset2, orig_start_offset;
1169
1170 char *subject = ZSTR_VAL(subject_str);
1171 size_t subject_len = ZSTR_LEN(subject_str);
1172
1173 ZVAL_UNDEF(&marks);
1174
1175 /* Overwrite the passed-in value for subpatterns with an empty array. */
1176 if (subpats != NULL) {
1177 subpats = zend_try_array_init(subpats);
1178 if (!subpats) {
1179 RETURN_THROWS();
1180 }
1181 }
1182
1183 subpats_order = global ? PREG_PATTERN_ORDER : 0;
1184
1185 if (use_flags) {
1186 offset_capture = flags & PREG_OFFSET_CAPTURE;
1187 unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1188
1189 /*
1190 * subpats_order is pre-set to pattern mode so we change it only if
1191 * necessary.
1192 */
1193 if (flags & 0xff) {
1194 subpats_order = flags & 0xff;
1195 }
1196 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1197 (!global && subpats_order != 0)) {
1198 zend_argument_value_error(4, "must be a PREG_* constant");
1199 RETURN_THROWS();
1200 }
1201 } else {
1202 offset_capture = 0;
1203 unmatched_as_null = 0;
1204 }
1205
1206 /* Negative offset counts from the end of the string. */
1207 if (start_offset < 0) {
1208 if ((PCRE2_SIZE)-start_offset <= subject_len) {
1209 start_offset2 = subject_len + start_offset;
1210 } else {
1211 start_offset2 = 0;
1212 }
1213 } else {
1214 start_offset2 = (PCRE2_SIZE)start_offset;
1215 }
1216
1217 if (start_offset2 > subject_len) {
1218 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1219 RETURN_FALSE;
1220 }
1221
1222 /* Calculate the size of the offsets array, and allocate memory for it. */
1223 num_subpats = pce->capture_count + 1;
1224
1225 /*
1226 * Build a mapping from subpattern numbers to their names. We will
1227 * allocate the table only if there are any named subpatterns.
1228 */
1229 subpat_names = NULL;
1230 if (subpats && pce->name_count > 0) {
1231 subpat_names = make_subpats_table(num_subpats, pce);
1232 if (!subpat_names) {
1233 RETURN_FALSE;
1234 }
1235 }
1236
1237 /* Allocate match sets array and initialize the values. */
1238 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1239 match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
1240 for (i=0; i<num_subpats; i++) {
1241 array_init(&match_sets[i]);
1242 }
1243 }
1244
1245 matched = 0;
1246 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1247
1248 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1249 match_data = mdata;
1250 } else {
1251 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1252 if (!match_data) {
1253 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1254 if (subpat_names) {
1255 free_subpats_table(subpat_names, num_subpats);
1256 }
1257 if (match_sets) {
1258 efree(match_sets);
1259 }
1260 RETURN_FALSE;
1261 }
1262 }
1263
1264 orig_start_offset = start_offset2;
1265 options =
1266 (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1267 ? 0 : PCRE2_NO_UTF_CHECK;
1268
1269 /* Execute the regular expression. */
1270 #ifdef HAVE_PCRE_JIT_SUPPORT
1271 if ((pce->preg_options & PREG_JIT) && options) {
1272 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1273 PCRE2_NO_UTF_CHECK, match_data, mctx);
1274 } else
1275 #endif
1276 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1277 options, match_data, mctx);
1278
1279 while (1) {
1280 /* If something has matched */
1281 if (count >= 0) {
1282 /* Check for too many substrings condition. */
1283 if (UNEXPECTED(count == 0)) {
1284 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1285 count = num_subpats;
1286 }
1287
1288 matched:
1289 matched++;
1290
1291 offsets = pcre2_get_ovector_pointer(match_data);
1292
1293 /* If subpatterns array has been passed, fill it in with values. */
1294 if (subpats != NULL) {
1295 /* Try to get the list of substrings and display a warning if failed. */
1296 if (offsets[1] < offsets[0]) {
1297 if (subpat_names) {
1298 free_subpats_table(subpat_names, num_subpats);
1299 }
1300 if (match_sets) efree(match_sets);
1301 php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1302 RETURN_FALSE;
1303 }
1304
1305 if (global) { /* global pattern matching */
1306 if (subpats && subpats_order == PREG_PATTERN_ORDER) {
1307 /* For each subpattern, insert it into the appropriate array. */
1308 if (offset_capture) {
1309 for (i = 0; i < count; i++) {
1310 add_offset_pair(
1311 &match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1312 NULL, unmatched_as_null);
1313 }
1314 } else {
1315 for (i = 0; i < count; i++) {
1316 zval val;
1317 populate_match_value(
1318 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1319 zend_hash_next_index_insert_new(Z_ARRVAL(match_sets[i]), &val);
1320 }
1321 }
1322 mark = pcre2_get_mark(match_data);
1323 /* Add MARK, if available */
1324 if (mark) {
1325 if (Z_TYPE(marks) == IS_UNDEF) {
1326 array_init(&marks);
1327 }
1328 add_index_string(&marks, matched - 1, (char *) mark);
1329 }
1330 /*
1331 * If the number of captured subpatterns on this run is
1332 * less than the total possible number, pad the result
1333 * arrays with NULLs or empty strings.
1334 */
1335 if (count < num_subpats) {
1336 for (; i < num_subpats; i++) {
1337 if (offset_capture) {
1338 add_offset_pair(
1339 &match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1340 NULL, unmatched_as_null);
1341 } else if (unmatched_as_null) {
1342 add_next_index_null(&match_sets[i]);
1343 } else {
1344 add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
1345 }
1346 }
1347 }
1348 } else {
1349 /* Allocate and populate the result set array */
1350 array_init_size(&result_set, count + (mark ? 1 : 0));
1351 mark = pcre2_get_mark(match_data);
1352 populate_subpat_array(
1353 &result_set, subject, offsets, subpat_names,
1354 num_subpats, count, mark, flags);
1355 /* And add it to the output array */
1356 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
1357 }
1358 } else { /* single pattern matching */
1359 /* For each subpattern, insert it into the subpatterns array. */
1360 mark = pcre2_get_mark(match_data);
1361 populate_subpat_array(
1362 subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1363 break;
1364 }
1365 }
1366
1367 /* Advance to the next piece. */
1368 start_offset2 = offsets[1];
1369
1370 /* If we have matched an empty string, mimic what Perl's /g options does.
1371 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1372 the match again at the same point. If this fails (picked up above) we
1373 advance to the next character. */
1374 if (start_offset2 == offsets[0]) {
1375 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1376 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1377 if (count >= 0) {
1378 if (global) {
1379 goto matched;
1380 } else {
1381 break;
1382 }
1383 } else if (count == PCRE2_ERROR_NOMATCH) {
1384 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1385 this is not necessarily the end. We need to advance
1386 the start offset, and continue. Fudge the offset values
1387 to achieve this, unless we're already at the end of the string. */
1388 if (start_offset2 < subject_len) {
1389 size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1390
1391 start_offset2 += unit_len;
1392 } else {
1393 break;
1394 }
1395 } else {
1396 goto error;
1397 }
1398 }
1399 } else if (count == PCRE2_ERROR_NOMATCH) {
1400 break;
1401 } else {
1402 error:
1403 pcre_handle_exec_error(count);
1404 break;
1405 }
1406
1407 if (!global) {
1408 break;
1409 }
1410
1411 /* Execute the regular expression. */
1412 #ifdef HAVE_PCRE_JIT_SUPPORT
1413 if ((pce->preg_options & PREG_JIT)) {
1414 if (PCRE2_UNSET == start_offset2 || start_offset2 > subject_len) {
1415 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1416 break;
1417 }
1418 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1419 PCRE2_NO_UTF_CHECK, match_data, mctx);
1420 } else
1421 #endif
1422 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1423 PCRE2_NO_UTF_CHECK, match_data, mctx);
1424 }
1425 if (match_data != mdata) {
1426 pcre2_match_data_free(match_data);
1427 }
1428
1429 /* Add the match sets to the output array and clean up */
1430 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1431 if (subpat_names) {
1432 for (i = 0; i < num_subpats; i++) {
1433 if (subpat_names[i]) {
1434 zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &match_sets[i]);
1435 Z_ADDREF(match_sets[i]);
1436 }
1437 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1438 }
1439 } else {
1440 for (i = 0; i < num_subpats; i++) {
1441 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1442 }
1443 }
1444 efree(match_sets);
1445
1446 if (Z_TYPE(marks) != IS_UNDEF) {
1447 add_assoc_zval(subpats, "MARK", &marks);
1448 }
1449 }
1450
1451 if (subpat_names) {
1452 free_subpats_table(subpat_names, num_subpats);
1453 }
1454
1455 if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1456 /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1457 if ((pce->compile_options & PCRE2_UTF)
1458 && !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1459 GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1460 }
1461
1462 RETVAL_LONG(matched);
1463 } else {
1464 RETVAL_FALSE;
1465 }
1466 }
1467 /* }}} */
1468
1469 /* {{{ Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)1470 PHP_FUNCTION(preg_match)
1471 {
1472 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1473 }
1474 /* }}} */
1475
1476 /* {{{ Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)1477 PHP_FUNCTION(preg_match_all)
1478 {
1479 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1480 }
1481 /* }}} */
1482
1483 /* {{{ preg_get_backref */
preg_get_backref(char ** str,int * backref)1484 static int preg_get_backref(char **str, int *backref)
1485 {
1486 char in_brace = 0;
1487 char *walk = *str;
1488
1489 if (walk[1] == 0)
1490 return 0;
1491
1492 if (*walk == '$' && walk[1] == '{') {
1493 in_brace = 1;
1494 walk++;
1495 }
1496 walk++;
1497
1498 if (*walk >= '0' && *walk <= '9') {
1499 *backref = *walk - '0';
1500 walk++;
1501 } else
1502 return 0;
1503
1504 if (*walk && *walk >= '0' && *walk <= '9') {
1505 *backref = *backref * 10 + *walk - '0';
1506 walk++;
1507 }
1508
1509 if (in_brace) {
1510 if (*walk != '}')
1511 return 0;
1512 else
1513 walk++;
1514 }
1515
1516 *str = walk;
1517 return 1;
1518 }
1519 /* }}} */
1520
1521 /* {{{ preg_do_repl_func */
preg_do_repl_func(zend_fcall_info * fci,zend_fcall_info_cache * fcc,const char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1522 static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1523 {
1524 zend_string *result_str;
1525 zval retval; /* Function return value */
1526 zval arg; /* Argument to pass to function */
1527
1528 array_init_size(&arg, count + (mark ? 1 : 0));
1529 populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1530
1531 fci->retval = &retval;
1532 fci->param_count = 1;
1533 fci->params = &arg;
1534
1535 if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1536 if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1537 result_str = Z_STR(retval);
1538 } else {
1539 result_str = zval_get_string_func(&retval);
1540 zval_ptr_dtor(&retval);
1541 }
1542 } else {
1543 if (!EG(exception)) {
1544 php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1545 }
1546
1547 result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1548 }
1549
1550 zval_ptr_dtor(&arg);
1551
1552 return result_str;
1553 }
1554 /* }}} */
1555
1556 /* {{{ php_pcre_replace */
php_pcre_replace(zend_string * regex,zend_string * subject_str,const char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1557 PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1558 zend_string *subject_str,
1559 const char *subject, size_t subject_len,
1560 zend_string *replace_str,
1561 size_t limit, size_t *replace_count)
1562 {
1563 pcre_cache_entry *pce; /* Compiled regular expression */
1564 zend_string *result; /* Function result */
1565
1566 /* Abort on pending exception, e.g. thrown from __toString(). */
1567 if (UNEXPECTED(EG(exception))) {
1568 return NULL;
1569 }
1570
1571 /* Compile regex or get it from cache. */
1572 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1573 return NULL;
1574 }
1575 pce->refcount++;
1576 result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1577 limit, replace_count);
1578 pce->refcount--;
1579
1580 return result;
1581 }
1582 /* }}} */
1583
1584 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry * pce,zend_string * subject_str,const char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1585 PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1586 {
1587 uint32_t options; /* Execution options */
1588 int count; /* Count of matched subpatterns */
1589 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1590 uint32_t num_subpats; /* Number of captured subpatterns */
1591 size_t new_len; /* Length of needed storage */
1592 size_t alloc_len; /* Actual allocated length */
1593 size_t match_len; /* Length of the current match */
1594 int backref; /* Backreference number */
1595 PCRE2_SIZE start_offset; /* Where the new search starts */
1596 size_t last_end_offset; /* Where the last search ended */
1597 char *walkbuf, /* Location of current replacement in the result */
1598 *walk, /* Used to walk the replacement string */
1599 walk_last; /* Last walked character */
1600 const char *match, /* The current match */
1601 *piece, /* The current piece of subject */
1602 *replace_end; /* End of replacement string */
1603 size_t result_len; /* Length of result */
1604 zend_string *result; /* Result of replacement */
1605 pcre2_match_data *match_data;
1606
1607 /* Calculate the size of the offsets array, and allocate memory for it. */
1608 num_subpats = pce->capture_count + 1;
1609 alloc_len = 0;
1610 result = NULL;
1611
1612 /* Initialize */
1613 match = NULL;
1614 start_offset = 0;
1615 last_end_offset = 0;
1616 result_len = 0;
1617 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1618
1619 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1620 match_data = mdata;
1621 } else {
1622 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1623 if (!match_data) {
1624 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1625 return NULL;
1626 }
1627 }
1628
1629 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1630
1631 /* Execute the regular expression. */
1632 #ifdef HAVE_PCRE_JIT_SUPPORT
1633 if ((pce->preg_options & PREG_JIT) && options) {
1634 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1635 PCRE2_NO_UTF_CHECK, match_data, mctx);
1636 } else
1637 #endif
1638 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1639 options, match_data, mctx);
1640
1641 while (1) {
1642 piece = subject + last_end_offset;
1643
1644 if (count >= 0 && limit > 0) {
1645 bool simple_string;
1646
1647 /* Check for too many substrings condition. */
1648 if (UNEXPECTED(count == 0)) {
1649 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1650 count = num_subpats;
1651 }
1652
1653 matched:
1654 offsets = pcre2_get_ovector_pointer(match_data);
1655
1656 if (UNEXPECTED(offsets[1] < offsets[0])) {
1657 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1658 if (result) {
1659 zend_string_release_ex(result, 0);
1660 result = NULL;
1661 }
1662 break;
1663 }
1664
1665 if (replace_count) {
1666 ++*replace_count;
1667 }
1668
1669 /* Set the match location in subject */
1670 match = subject + offsets[0];
1671
1672 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1673
1674 walk = ZSTR_VAL(replace_str);
1675 replace_end = walk + ZSTR_LEN(replace_str);
1676 walk_last = 0;
1677 simple_string = 1;
1678 while (walk < replace_end) {
1679 if ('\\' == *walk || '$' == *walk) {
1680 simple_string = 0;
1681 if (walk_last == '\\') {
1682 walk++;
1683 walk_last = 0;
1684 continue;
1685 }
1686 if (preg_get_backref(&walk, &backref)) {
1687 if (backref < count)
1688 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1689 continue;
1690 }
1691 }
1692 new_len++;
1693 walk++;
1694 walk_last = walk[-1];
1695 }
1696
1697 if (new_len >= alloc_len) {
1698 alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1699 if (result == NULL) {
1700 result = zend_string_alloc(alloc_len, 0);
1701 } else {
1702 result = zend_string_extend(result, alloc_len, 0);
1703 }
1704 }
1705
1706 if (match-piece > 0) {
1707 /* copy the part of the string before the match */
1708 memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1709 result_len += (match-piece);
1710 }
1711
1712 if (simple_string) {
1713 /* copy replacement */
1714 memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1715 result_len += ZSTR_LEN(replace_str);
1716 } else {
1717 /* copy replacement and backrefs */
1718 walkbuf = ZSTR_VAL(result) + result_len;
1719
1720 walk = ZSTR_VAL(replace_str);
1721 walk_last = 0;
1722 while (walk < replace_end) {
1723 if ('\\' == *walk || '$' == *walk) {
1724 if (walk_last == '\\') {
1725 *(walkbuf-1) = *walk++;
1726 walk_last = 0;
1727 continue;
1728 }
1729 if (preg_get_backref(&walk, &backref)) {
1730 if (backref < count) {
1731 if (offsets[backref<<1] < SIZE_MAX) {
1732 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1733 memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1734 walkbuf += match_len;
1735 }
1736 }
1737 continue;
1738 }
1739 }
1740 *walkbuf++ = *walk++;
1741 walk_last = walk[-1];
1742 }
1743 *walkbuf = '\0';
1744 /* increment the result length by how much we've added to the string */
1745 result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1746 }
1747
1748 limit--;
1749
1750 /* Advance to the next piece. */
1751 start_offset = last_end_offset = offsets[1];
1752
1753 /* If we have matched an empty string, mimic what Perl's /g options does.
1754 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1755 the match again at the same point. If this fails (picked up above) we
1756 advance to the next character. */
1757 if (start_offset == offsets[0]) {
1758 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1759 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1760
1761 piece = subject + start_offset;
1762 if (count >= 0 && limit > 0) {
1763 goto matched;
1764 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1765 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1766 this is not necessarily the end. We need to advance
1767 the start offset, and continue. Fudge the offset values
1768 to achieve this, unless we're already at the end of the string. */
1769 if (start_offset < subject_len) {
1770 size_t unit_len = calculate_unit_length(pce, piece);
1771 start_offset += unit_len;
1772 } else {
1773 goto not_matched;
1774 }
1775 } else {
1776 goto error;
1777 }
1778 }
1779
1780 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1781 not_matched:
1782 if (!result && subject_str) {
1783 result = zend_string_copy(subject_str);
1784 break;
1785 }
1786 /* now we know exactly how long it is */
1787 alloc_len = result_len + subject_len - last_end_offset;
1788 if (NULL != result) {
1789 result = zend_string_realloc(result, alloc_len, 0);
1790 } else {
1791 result = zend_string_alloc(alloc_len, 0);
1792 }
1793 /* stick that last bit of string on our output */
1794 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1795 result_len += subject_len - last_end_offset;
1796 ZSTR_VAL(result)[result_len] = '\0';
1797 ZSTR_LEN(result) = result_len;
1798 break;
1799 } else {
1800 error:
1801 pcre_handle_exec_error(count);
1802 if (result) {
1803 zend_string_release_ex(result, 0);
1804 result = NULL;
1805 }
1806 break;
1807 }
1808
1809 #ifdef HAVE_PCRE_JIT_SUPPORT
1810 if (pce->preg_options & PREG_JIT) {
1811 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1812 PCRE2_NO_UTF_CHECK, match_data, mctx);
1813 } else
1814 #endif
1815 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1816 PCRE2_NO_UTF_CHECK, match_data, mctx);
1817 }
1818 if (match_data != mdata) {
1819 pcre2_match_data_free(match_data);
1820 }
1821
1822 return result;
1823 }
1824 /* }}} */
1825
1826 /* {{{ php_pcre_replace_func_impl() */
php_pcre_replace_func_impl(pcre_cache_entry * pce,zend_string * subject_str,const char * subject,size_t subject_len,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)1827 static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)
1828 {
1829 uint32_t options; /* Execution options */
1830 int count; /* Count of matched subpatterns */
1831 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1832 zend_string **subpat_names; /* Array for named subpatterns */
1833 uint32_t num_subpats; /* Number of captured subpatterns */
1834 size_t new_len; /* Length of needed storage */
1835 size_t alloc_len; /* Actual allocated length */
1836 PCRE2_SIZE start_offset; /* Where the new search starts */
1837 size_t last_end_offset; /* Where the last search ended */
1838 const char *match, /* The current match */
1839 *piece; /* The current piece of subject */
1840 size_t result_len; /* Length of result */
1841 zend_string *result; /* Result of replacement */
1842 zend_string *eval_result; /* Result of custom function */
1843 pcre2_match_data *match_data;
1844 bool old_mdata_used;
1845
1846 /* Calculate the size of the offsets array, and allocate memory for it. */
1847 num_subpats = pce->capture_count + 1;
1848
1849 /*
1850 * Build a mapping from subpattern numbers to their names. We will
1851 * allocate the table only if there are any named subpatterns.
1852 */
1853 subpat_names = NULL;
1854 if (UNEXPECTED(pce->name_count > 0)) {
1855 subpat_names = make_subpats_table(num_subpats, pce);
1856 if (!subpat_names) {
1857 return NULL;
1858 }
1859 }
1860
1861 alloc_len = 0;
1862 result = NULL;
1863
1864 /* Initialize */
1865 match = NULL;
1866 start_offset = 0;
1867 last_end_offset = 0;
1868 result_len = 0;
1869 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1870
1871 old_mdata_used = mdata_used;
1872 if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1873 mdata_used = 1;
1874 match_data = mdata;
1875 } else {
1876 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1877 if (!match_data) {
1878 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1879 if (subpat_names) {
1880 free_subpats_table(subpat_names, num_subpats);
1881 }
1882 mdata_used = old_mdata_used;
1883 return NULL;
1884 }
1885 }
1886
1887 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1888
1889 /* Execute the regular expression. */
1890 #ifdef HAVE_PCRE_JIT_SUPPORT
1891 if ((pce->preg_options & PREG_JIT) && options) {
1892 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1893 PCRE2_NO_UTF_CHECK, match_data, mctx);
1894 } else
1895 #endif
1896 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1897 options, match_data, mctx);
1898
1899 while (1) {
1900 piece = subject + last_end_offset;
1901
1902 if (count >= 0 && limit) {
1903 /* Check for too many substrings condition. */
1904 if (UNEXPECTED(count == 0)) {
1905 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1906 count = num_subpats;
1907 }
1908
1909 matched:
1910 offsets = pcre2_get_ovector_pointer(match_data);
1911
1912 if (UNEXPECTED(offsets[1] < offsets[0])) {
1913 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1914 if (result) {
1915 zend_string_release_ex(result, 0);
1916 result = NULL;
1917 }
1918 break;
1919 }
1920
1921 if (replace_count) {
1922 ++*replace_count;
1923 }
1924
1925 /* Set the match location in subject */
1926 match = subject + offsets[0];
1927
1928 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1929
1930 /* Use custom function to get replacement string and its length. */
1931 eval_result = preg_do_repl_func(
1932 fci, fcc, subject, offsets, subpat_names, num_subpats, count,
1933 pcre2_get_mark(match_data), flags);
1934
1935 ZEND_ASSERT(eval_result);
1936 new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD;
1937 if (new_len >= alloc_len) {
1938 alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1939 if (result == NULL) {
1940 result = zend_string_alloc(alloc_len, 0);
1941 } else {
1942 result = zend_string_extend(result, alloc_len, 0);
1943 }
1944 }
1945
1946 if (match-piece > 0) {
1947 /* copy the part of the string before the match */
1948 memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1949 result_len += (match-piece);
1950 }
1951
1952 /* If using custom function, copy result to the buffer and clean up. */
1953 memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1954 result_len += ZSTR_LEN(eval_result);
1955 zend_string_release_ex(eval_result, 0);
1956
1957 limit--;
1958
1959 /* Advance to the next piece. */
1960 start_offset = last_end_offset = offsets[1];
1961
1962 /* If we have matched an empty string, mimic what Perl's /g options does.
1963 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1964 the match again at the same point. If this fails (picked up above) we
1965 advance to the next character. */
1966 if (start_offset == offsets[0]) {
1967 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1968 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1969
1970 piece = subject + start_offset;
1971 if (count >= 0 && limit) {
1972 goto matched;
1973 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1974 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1975 this is not necessarily the end. We need to advance
1976 the start offset, and continue. Fudge the offset values
1977 to achieve this, unless we're already at the end of the string. */
1978 if (start_offset < subject_len) {
1979 size_t unit_len = calculate_unit_length(pce, piece);
1980 start_offset += unit_len;
1981 } else {
1982 goto not_matched;
1983 }
1984 } else {
1985 goto error;
1986 }
1987 }
1988
1989 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1990 not_matched:
1991 if (!result && subject_str) {
1992 result = zend_string_copy(subject_str);
1993 break;
1994 }
1995 /* now we know exactly how long it is */
1996 alloc_len = result_len + subject_len - last_end_offset;
1997 if (NULL != result) {
1998 result = zend_string_realloc(result, alloc_len, 0);
1999 } else {
2000 result = zend_string_alloc(alloc_len, 0);
2001 }
2002 /* stick that last bit of string on our output */
2003 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
2004 result_len += subject_len - last_end_offset;
2005 ZSTR_VAL(result)[result_len] = '\0';
2006 ZSTR_LEN(result) = result_len;
2007 break;
2008 } else {
2009 error:
2010 pcre_handle_exec_error(count);
2011 if (result) {
2012 zend_string_release_ex(result, 0);
2013 result = NULL;
2014 }
2015 break;
2016 }
2017 #ifdef HAVE_PCRE_JIT_SUPPORT
2018 if ((pce->preg_options & PREG_JIT)) {
2019 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2020 PCRE2_NO_UTF_CHECK, match_data, mctx);
2021 } else
2022 #endif
2023 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2024 PCRE2_NO_UTF_CHECK, match_data, mctx);
2025 }
2026 if (match_data != mdata) {
2027 pcre2_match_data_free(match_data);
2028 }
2029 mdata_used = old_mdata_used;
2030
2031 if (UNEXPECTED(subpat_names)) {
2032 free_subpats_table(subpat_names, num_subpats);
2033 }
2034
2035 return result;
2036 }
2037 /* }}} */
2038
2039 /* {{{ php_pcre_replace_func */
php_pcre_replace_func(zend_string * regex,zend_string * subject_str,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)2040 static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2041 zend_string *subject_str,
2042 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2043 size_t limit, size_t *replace_count, zend_long flags)
2044 {
2045 pcre_cache_entry *pce; /* Compiled regular expression */
2046 zend_string *result; /* Function result */
2047
2048 /* Compile regex or get it from cache. */
2049 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2050 return NULL;
2051 }
2052 pce->refcount++;
2053 result = php_pcre_replace_func_impl(
2054 pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
2055 limit, replace_count, flags);
2056 pce->refcount--;
2057
2058 return result;
2059 }
2060 /* }}} */
2061
2062 /* {{{ php_pcre_replace_array */
php_pcre_replace_array(HashTable * regex,zend_string * replace_str,HashTable * replace_ht,zend_string * subject_str,size_t limit,size_t * replace_count)2063 static zend_string *php_pcre_replace_array(HashTable *regex,
2064 zend_string *replace_str, HashTable *replace_ht,
2065 zend_string *subject_str, size_t limit, size_t *replace_count)
2066 {
2067 zval *regex_entry;
2068 zend_string *result;
2069
2070 zend_string_addref(subject_str);
2071
2072 if (replace_ht) {
2073 uint32_t replace_idx = 0;
2074
2075 /* For each entry in the regex array, get the entry */
2076 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2077 /* Make sure we're dealing with strings. */
2078 zend_string *tmp_regex_str;
2079 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2080 zend_string *replace_entry_str, *tmp_replace_entry_str;
2081 zval *zv;
2082
2083 /* Get current entry */
2084 while (1) {
2085 if (replace_idx == replace_ht->nNumUsed) {
2086 replace_entry_str = ZSTR_EMPTY_ALLOC();
2087 tmp_replace_entry_str = NULL;
2088 break;
2089 }
2090 zv = ZEND_HASH_ELEMENT(replace_ht, replace_idx);
2091 replace_idx++;
2092 if (Z_TYPE_P(zv) != IS_UNDEF) {
2093 replace_entry_str = zval_get_tmp_string(zv, &tmp_replace_entry_str);
2094 break;
2095 }
2096 }
2097
2098 /* Do the actual replacement and put the result back into subject_str
2099 for further replacements. */
2100 result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2101 ZSTR_LEN(subject_str), replace_entry_str, limit, replace_count);
2102 zend_tmp_string_release(tmp_replace_entry_str);
2103 zend_tmp_string_release(tmp_regex_str);
2104 zend_string_release_ex(subject_str, 0);
2105 subject_str = result;
2106 if (UNEXPECTED(result == NULL)) {
2107 break;
2108 }
2109 } ZEND_HASH_FOREACH_END();
2110
2111 } else {
2112 ZEND_ASSERT(replace_str != NULL);
2113
2114 /* For each entry in the regex array, get the entry */
2115 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2116 /* Make sure we're dealing with strings. */
2117 zend_string *tmp_regex_str;
2118 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2119
2120 /* Do the actual replacement and put the result back into subject_str
2121 for further replacements. */
2122 result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2123 ZSTR_LEN(subject_str), replace_str, limit, replace_count);
2124 zend_tmp_string_release(tmp_regex_str);
2125 zend_string_release_ex(subject_str, 0);
2126 subject_str = result;
2127
2128 if (UNEXPECTED(result == NULL)) {
2129 break;
2130 }
2131 } ZEND_HASH_FOREACH_END();
2132 }
2133
2134 return subject_str;
2135 }
2136 /* }}} */
2137
2138 /* {{{ php_replace_in_subject */
php_replace_in_subject(zend_string * regex_str,HashTable * regex_ht,zend_string * replace_str,HashTable * replace_ht,zend_string * subject,size_t limit,size_t * replace_count)2139 static zend_always_inline zend_string *php_replace_in_subject(
2140 zend_string *regex_str, HashTable *regex_ht,
2141 zend_string *replace_str, HashTable *replace_ht,
2142 zend_string *subject, size_t limit, size_t *replace_count)
2143 {
2144 zend_string *result;
2145
2146 if (regex_str) {
2147 ZEND_ASSERT(replace_str != NULL);
2148 result = php_pcre_replace(regex_str, subject, ZSTR_VAL(subject), ZSTR_LEN(subject),
2149 replace_str, limit, replace_count);
2150 } else {
2151 ZEND_ASSERT(regex_ht != NULL);
2152 result = php_pcre_replace_array(regex_ht, replace_str, replace_ht, subject,
2153 limit, replace_count);
2154 }
2155 return result;
2156 }
2157 /* }}} */
2158
2159 /* {{{ php_replace_in_subject_func */
php_replace_in_subject_func(zend_string * regex_str,HashTable * regex_ht,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zend_string * subject,size_t limit,size_t * replace_count,zend_long flags)2160 static zend_string *php_replace_in_subject_func(zend_string *regex_str, HashTable *regex_ht,
2161 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2162 zend_string *subject, size_t limit, size_t *replace_count, zend_long flags)
2163 {
2164 zend_string *result;
2165
2166 if (regex_str) {
2167 result = php_pcre_replace_func(
2168 regex_str, subject, fci, fcc, limit, replace_count, flags);
2169 return result;
2170 } else {
2171 /* If regex is an array */
2172 zval *regex_entry;
2173
2174 ZEND_ASSERT(regex_ht != NULL);
2175
2176 zend_string_addref(subject);
2177
2178 /* For each entry in the regex array, get the entry */
2179 ZEND_HASH_FOREACH_VAL(regex_ht, regex_entry) {
2180 /* Make sure we're dealing with strings. */
2181 zend_string *tmp_regex_entry_str;
2182 zend_string *regex_entry_str = zval_get_tmp_string(regex_entry, &tmp_regex_entry_str);
2183
2184 /* Do the actual replacement and put the result back into subject
2185 for further replacements. */
2186 result = php_pcre_replace_func(
2187 regex_entry_str, subject, fci, fcc, limit, replace_count, flags);
2188 zend_tmp_string_release(tmp_regex_entry_str);
2189 zend_string_release(subject);
2190 subject = result;
2191 if (UNEXPECTED(result == NULL)) {
2192 break;
2193 }
2194 } ZEND_HASH_FOREACH_END();
2195
2196 return subject;
2197 }
2198 }
2199 /* }}} */
2200
2201 /* {{{ preg_replace_func_impl */
preg_replace_func_impl(zval * return_value,zend_string * regex_str,HashTable * regex_ht,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zend_string * subject_str,HashTable * subject_ht,zend_long limit_val,zend_long flags)2202 static size_t preg_replace_func_impl(zval *return_value,
2203 zend_string *regex_str, HashTable *regex_ht,
2204 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2205 zend_string *subject_str, HashTable *subject_ht, zend_long limit_val, zend_long flags)
2206 {
2207 zend_string *result;
2208 size_t replace_count = 0;
2209
2210 if (subject_str) {
2211 result = php_replace_in_subject_func(
2212 regex_str, regex_ht, fci, fcc, subject_str, limit_val, &replace_count, flags);
2213 if (result != NULL) {
2214 RETVAL_STR(result);
2215 } else {
2216 RETVAL_NULL();
2217 }
2218 } else {
2219 /* if subject is an array */
2220 zval *subject_entry, zv;
2221 zend_string *string_key;
2222 zend_ulong num_key;
2223
2224 ZEND_ASSERT(subject_ht != NULL);
2225
2226 array_init_size(return_value, zend_hash_num_elements(subject_ht));
2227
2228 /* For each subject entry, convert it to string, then perform replacement
2229 and add the result to the return_value array. */
2230 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2231 zend_string *tmp_subject_entry_str;
2232 zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2233
2234 result = php_replace_in_subject_func(
2235 regex_str, regex_ht, fci, fcc, subject_entry_str, limit_val, &replace_count, flags);
2236 if (result != NULL) {
2237 /* Add to return array */
2238 ZVAL_STR(&zv, result);
2239 if (string_key) {
2240 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2241 } else {
2242 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2243 }
2244 }
2245 zend_tmp_string_release(tmp_subject_entry_str);
2246 } ZEND_HASH_FOREACH_END();
2247 }
2248
2249 return replace_count;
2250 }
2251 /* }}} */
2252
2253 /* {{{ preg_replace_common */
preg_replace_common(INTERNAL_FUNCTION_PARAMETERS,bool is_filter)2254 static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter)
2255 {
2256 zval *zcount = NULL;
2257 zend_string *regex_str;
2258 HashTable *regex_ht;
2259 zend_string *replace_str;
2260 HashTable *replace_ht;
2261 zend_string *subject_str;
2262 HashTable *subject_ht;
2263 zend_long limit = -1;
2264 size_t replace_count = 0;
2265 zend_string *result;
2266 size_t old_replace_count;
2267
2268 /* Get function parameters and do error-checking. */
2269 ZEND_PARSE_PARAMETERS_START(3, 5)
2270 Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2271 Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
2272 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2273 Z_PARAM_OPTIONAL
2274 Z_PARAM_LONG(limit)
2275 Z_PARAM_ZVAL(zcount)
2276 ZEND_PARSE_PARAMETERS_END();
2277
2278 /* If replace is an array then the regex argument needs to also be an array */
2279 if (replace_ht && !regex_ht) {
2280 zend_argument_type_error(1, "must be of type array when argument #2 ($replacement) is an array, string given");
2281 RETURN_THROWS();
2282 }
2283
2284 if (subject_str) {
2285 old_replace_count = replace_count;
2286 result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2287 subject_str, limit, &replace_count);
2288 if (result != NULL) {
2289 if (!is_filter || replace_count > old_replace_count) {
2290 RETVAL_STR(result);
2291 } else {
2292 zend_string_release_ex(result, 0);
2293 RETVAL_NULL();
2294 }
2295 } else {
2296 RETVAL_NULL();
2297 }
2298 } else {
2299 /* if subject is an array */
2300 zval *subject_entry, zv;
2301 zend_string *string_key;
2302 zend_ulong num_key;
2303
2304 ZEND_ASSERT(subject_ht != NULL);
2305
2306 array_init_size(return_value, zend_hash_num_elements(subject_ht));
2307
2308 /* For each subject entry, convert it to string, then perform replacement
2309 and add the result to the return_value array. */
2310 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2311 old_replace_count = replace_count;
2312 zend_string *tmp_subject_entry_str;
2313 zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2314 result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2315 subject_entry_str, limit, &replace_count);
2316
2317 if (result != NULL) {
2318 if (!is_filter || replace_count > old_replace_count) {
2319 /* Add to return array */
2320 ZVAL_STR(&zv, result);
2321 if (string_key) {
2322 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2323 } else {
2324 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2325 }
2326 } else {
2327 zend_string_release_ex(result, 0);
2328 }
2329 }
2330 zend_tmp_string_release(tmp_subject_entry_str);
2331 } ZEND_HASH_FOREACH_END();
2332 }
2333
2334 if (zcount) {
2335 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2336 }
2337 }
2338 /* }}} */
2339
2340 /* {{{ Perform Perl-style regular expression replacement. */
PHP_FUNCTION(preg_replace)2341 PHP_FUNCTION(preg_replace)
2342 {
2343 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
2344 }
2345 /* }}} */
2346
2347 /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback)2348 PHP_FUNCTION(preg_replace_callback)
2349 {
2350 zval *zcount = NULL;
2351 zend_string *regex_str;
2352 HashTable *regex_ht;
2353 zend_string *subject_str;
2354 HashTable *subject_ht;
2355 zend_long limit = -1, flags = 0;
2356 size_t replace_count;
2357 zend_fcall_info fci;
2358 zend_fcall_info_cache fcc;
2359
2360 /* Get function parameters and do error-checking. */
2361 ZEND_PARSE_PARAMETERS_START(3, 6)
2362 Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2363 Z_PARAM_FUNC(fci, fcc)
2364 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2365 Z_PARAM_OPTIONAL
2366 Z_PARAM_LONG(limit)
2367 Z_PARAM_ZVAL(zcount)
2368 Z_PARAM_LONG(flags)
2369 ZEND_PARSE_PARAMETERS_END();
2370
2371 replace_count = preg_replace_func_impl(return_value, regex_str, regex_ht,
2372 &fci, &fcc,
2373 subject_str, subject_ht, limit, flags);
2374 if (zcount) {
2375 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2376 }
2377 }
2378 /* }}} */
2379
2380 /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback_array)2381 PHP_FUNCTION(preg_replace_callback_array)
2382 {
2383 zval zv, *replace, *zcount = NULL;
2384 HashTable *pattern, *subject_ht;
2385 zend_string *subject_str, *str_idx_regex;
2386 zend_long limit = -1, flags = 0;
2387 size_t replace_count = 0;
2388 zend_fcall_info fci;
2389 zend_fcall_info_cache fcc;
2390
2391 /* Get function parameters and do error-checking. */
2392 ZEND_PARSE_PARAMETERS_START(2, 5)
2393 Z_PARAM_ARRAY_HT(pattern)
2394 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2395 Z_PARAM_OPTIONAL
2396 Z_PARAM_LONG(limit)
2397 Z_PARAM_ZVAL(zcount)
2398 Z_PARAM_LONG(flags)
2399 ZEND_PARSE_PARAMETERS_END();
2400
2401 fci.size = sizeof(fci);
2402 fci.object = NULL;
2403 fci.named_params = NULL;
2404
2405 if (subject_ht) {
2406 GC_TRY_ADDREF(subject_ht);
2407 } else {
2408 GC_TRY_ADDREF(subject_str);
2409 }
2410
2411 ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) {
2412 if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2413 zend_argument_type_error(1, "must contain only valid callbacks");
2414 goto error;
2415 }
2416 if (!str_idx_regex) {
2417 zend_argument_type_error(1, "must contain only string patterns as keys");
2418 goto error;
2419 }
2420
2421 ZVAL_COPY_VALUE(&fci.function_name, replace);
2422
2423 replace_count += preg_replace_func_impl(&zv, str_idx_regex, /* regex_ht */ NULL, &fci, &fcc,
2424 subject_str, subject_ht, limit, flags);
2425 switch (Z_TYPE(zv)) {
2426 case IS_ARRAY:
2427 ZEND_ASSERT(subject_ht);
2428 zend_array_release(subject_ht);
2429 subject_ht = Z_ARR(zv);
2430 break;
2431 case IS_STRING:
2432 ZEND_ASSERT(subject_str);
2433 zend_string_release(subject_str);
2434 subject_str = Z_STR(zv);
2435 break;
2436 case IS_NULL:
2437 RETVAL_NULL();
2438 goto error;
2439 EMPTY_SWITCH_DEFAULT_CASE()
2440 }
2441
2442 if (EG(exception)) {
2443 goto error;
2444 }
2445 } ZEND_HASH_FOREACH_END();
2446
2447 if (zcount) {
2448 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2449 }
2450
2451 if (subject_ht) {
2452 RETVAL_ARR(subject_ht);
2453 // Unset the type_flags of immutable arrays to prevent the VM from performing refcounting
2454 if (GC_FLAGS(subject_ht) & IS_ARRAY_IMMUTABLE) {
2455 Z_TYPE_FLAGS_P(return_value) = 0;
2456 }
2457 return;
2458 } else {
2459 RETURN_STR(subject_str);
2460 }
2461
2462 error:
2463 if (subject_ht) {
2464 zend_array_release(subject_ht);
2465 } else {
2466 zend_string_release(subject_str);
2467 }
2468 }
2469 /* }}} */
2470
2471 /* {{{ Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTION(preg_filter)2472 PHP_FUNCTION(preg_filter)
2473 {
2474 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
2475 }
2476 /* }}} */
2477
2478 /* {{{ Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)2479 PHP_FUNCTION(preg_split)
2480 {
2481 zend_string *regex; /* Regular expression */
2482 zend_string *subject; /* String to match against */
2483 zend_long limit_val = -1;/* Integer value of limit */
2484 zend_long flags = 0; /* Match control flags */
2485 pcre_cache_entry *pce; /* Compiled regular expression */
2486
2487 /* Get function parameters and do error checking */
2488 ZEND_PARSE_PARAMETERS_START(2, 4)
2489 Z_PARAM_STR(regex)
2490 Z_PARAM_STR(subject)
2491 Z_PARAM_OPTIONAL
2492 Z_PARAM_LONG(limit_val)
2493 Z_PARAM_LONG(flags)
2494 ZEND_PARSE_PARAMETERS_END();
2495
2496 /* Compile regex or get it from cache. */
2497 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2498 RETURN_FALSE;
2499 }
2500
2501 pce->refcount++;
2502 php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2503 pce->refcount--;
2504 }
2505 /* }}} */
2506
2507 /* {{{ php_pcre_split */
php_pcre_split_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zend_long limit_val,zend_long flags)2508 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2509 zend_long limit_val, zend_long flags)
2510 {
2511 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
2512 uint32_t options; /* Execution options */
2513 int count; /* Count of matched subpatterns */
2514 PCRE2_SIZE start_offset; /* Where the new search starts */
2515 PCRE2_SIZE last_match_offset; /* Location of last match */
2516 uint32_t no_empty; /* If NO_EMPTY flag is set */
2517 uint32_t delim_capture; /* If delimiters should be captured */
2518 uint32_t offset_capture; /* If offsets should be captured */
2519 uint32_t num_subpats; /* Number of captured subpatterns */
2520 zval tmp;
2521 pcre2_match_data *match_data;
2522 char *subject = ZSTR_VAL(subject_str);
2523
2524 no_empty = flags & PREG_SPLIT_NO_EMPTY;
2525 delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2526 offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2527
2528 /* Initialize return value */
2529 array_init(return_value);
2530
2531 /* Calculate the size of the offsets array, and allocate memory for it. */
2532 num_subpats = pce->capture_count + 1;
2533
2534 /* Start at the beginning of the string */
2535 start_offset = 0;
2536 last_match_offset = 0;
2537 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2538
2539 if (limit_val == -1) {
2540 /* pass */
2541 } else if (limit_val == 0) {
2542 limit_val = -1;
2543 } else if (limit_val <= 1) {
2544 goto last;
2545 }
2546
2547 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2548 match_data = mdata;
2549 } else {
2550 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2551 if (!match_data) {
2552 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2553 zval_ptr_dtor(return_value);
2554 RETURN_FALSE;
2555 }
2556 }
2557
2558 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2559
2560 #ifdef HAVE_PCRE_JIT_SUPPORT
2561 if ((pce->preg_options & PREG_JIT) && options) {
2562 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2563 PCRE2_NO_UTF_CHECK, match_data, mctx);
2564 } else
2565 #endif
2566 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2567 options, match_data, mctx);
2568
2569 while (1) {
2570 /* If something matched */
2571 if (count >= 0) {
2572 /* Check for too many substrings condition. */
2573 if (UNEXPECTED(count == 0)) {
2574 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2575 count = num_subpats;
2576 }
2577
2578 matched:
2579 offsets = pcre2_get_ovector_pointer(match_data);
2580
2581 if (UNEXPECTED(offsets[1] < offsets[0])) {
2582 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2583 break;
2584 }
2585
2586 if (!no_empty || offsets[0] != last_match_offset) {
2587 if (offset_capture) {
2588 /* Add (match, offset) pair to the return value */
2589 add_offset_pair(
2590 return_value, subject, last_match_offset, offsets[0],
2591 NULL, 0);
2592 } else {
2593 /* Add the piece to the return value */
2594 populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2595 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2596 }
2597
2598 /* One less left to do */
2599 if (limit_val != -1)
2600 limit_val--;
2601 }
2602
2603 if (delim_capture) {
2604 size_t i;
2605 for (i = 1; i < count; i++) {
2606 /* If we have matched a delimiter */
2607 if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2608 if (offset_capture) {
2609 add_offset_pair(
2610 return_value, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2611 } else {
2612 populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2613 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2614 }
2615 }
2616 }
2617 }
2618
2619 /* Advance to the position right after the last full match */
2620 start_offset = last_match_offset = offsets[1];
2621
2622 /* If we have matched an empty string, mimic what Perl's /g options does.
2623 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2624 the match again at the same point. If this fails (picked up above) we
2625 advance to the next character. */
2626 if (start_offset == offsets[0]) {
2627 /* Get next piece if no limit or limit not yet reached and something matched*/
2628 if (limit_val != -1 && limit_val <= 1) {
2629 break;
2630 }
2631 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2632 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2633 if (count >= 0) {
2634 goto matched;
2635 } else if (count == PCRE2_ERROR_NOMATCH) {
2636 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2637 this is not necessarily the end. We need to advance
2638 the start offset, and continue. Fudge the offset values
2639 to achieve this, unless we're already at the end of the string. */
2640 if (start_offset < ZSTR_LEN(subject_str)) {
2641 start_offset += calculate_unit_length(pce, subject + start_offset);
2642 } else {
2643 break;
2644 }
2645 } else {
2646 goto error;
2647 }
2648 }
2649
2650 } else if (count == PCRE2_ERROR_NOMATCH) {
2651 break;
2652 } else {
2653 error:
2654 pcre_handle_exec_error(count);
2655 break;
2656 }
2657
2658 /* Get next piece if no limit or limit not yet reached and something matched*/
2659 if (limit_val != -1 && limit_val <= 1) {
2660 break;
2661 }
2662
2663 #ifdef HAVE_PCRE_JIT_SUPPORT
2664 if (pce->preg_options & PREG_JIT) {
2665 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2666 PCRE2_NO_UTF_CHECK, match_data, mctx);
2667 } else
2668 #endif
2669 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2670 PCRE2_NO_UTF_CHECK, match_data, mctx);
2671 }
2672 if (match_data != mdata) {
2673 pcre2_match_data_free(match_data);
2674 }
2675
2676 if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2677 zval_ptr_dtor(return_value);
2678 RETURN_FALSE;
2679 }
2680
2681 last:
2682 start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2683
2684 if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2685 if (offset_capture) {
2686 /* Add the last (match, offset) pair to the return value */
2687 add_offset_pair(return_value, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2688 } else {
2689 /* Add the last piece to the return value */
2690 if (start_offset == 0) {
2691 ZVAL_STR_COPY(&tmp, subject_str);
2692 } else {
2693 populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2694 }
2695 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2696 }
2697 }
2698 }
2699 /* }}} */
2700
2701 /* {{{ Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)2702 PHP_FUNCTION(preg_quote)
2703 {
2704 zend_string *str; /* Input string argument */
2705 zend_string *delim = NULL; /* Additional delimiter argument */
2706 char *in_str; /* Input string */
2707 char *in_str_end; /* End of the input string */
2708 zend_string *out_str; /* Output string with quoted characters */
2709 size_t extra_len; /* Number of additional characters */
2710 char *p, /* Iterator for input string */
2711 *q, /* Iterator for output string */
2712 delim_char = '\0', /* Delimiter character to be quoted */
2713 c; /* Current character */
2714
2715 /* Get the arguments and check for errors */
2716 ZEND_PARSE_PARAMETERS_START(1, 2)
2717 Z_PARAM_STR(str)
2718 Z_PARAM_OPTIONAL
2719 Z_PARAM_STR_OR_NULL(delim)
2720 ZEND_PARSE_PARAMETERS_END();
2721
2722 /* Nothing to do if we got an empty string */
2723 if (ZSTR_LEN(str) == 0) {
2724 RETURN_EMPTY_STRING();
2725 }
2726
2727 in_str = ZSTR_VAL(str);
2728 in_str_end = in_str + ZSTR_LEN(str);
2729
2730 if (delim) {
2731 delim_char = ZSTR_VAL(delim)[0];
2732 }
2733
2734 /* Go through the string and quote necessary characters */
2735 extra_len = 0;
2736 p = in_str;
2737 do {
2738 c = *p;
2739 switch(c) {
2740 case '.':
2741 case '\\':
2742 case '+':
2743 case '*':
2744 case '?':
2745 case '[':
2746 case '^':
2747 case ']':
2748 case '$':
2749 case '(':
2750 case ')':
2751 case '{':
2752 case '}':
2753 case '=':
2754 case '!':
2755 case '>':
2756 case '<':
2757 case '|':
2758 case ':':
2759 case '-':
2760 case '#':
2761 extra_len++;
2762 break;
2763
2764 case '\0':
2765 extra_len+=3;
2766 break;
2767
2768 default:
2769 if (c == delim_char) {
2770 extra_len++;
2771 }
2772 break;
2773 }
2774 p++;
2775 } while (p != in_str_end);
2776
2777 if (extra_len == 0) {
2778 RETURN_STR_COPY(str);
2779 }
2780
2781 /* Allocate enough memory so that even if each character
2782 is quoted, we won't run out of room */
2783 out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2784 q = ZSTR_VAL(out_str);
2785 p = in_str;
2786
2787 do {
2788 c = *p;
2789 switch(c) {
2790 case '.':
2791 case '\\':
2792 case '+':
2793 case '*':
2794 case '?':
2795 case '[':
2796 case '^':
2797 case ']':
2798 case '$':
2799 case '(':
2800 case ')':
2801 case '{':
2802 case '}':
2803 case '=':
2804 case '!':
2805 case '>':
2806 case '<':
2807 case '|':
2808 case ':':
2809 case '-':
2810 case '#':
2811 *q++ = '\\';
2812 *q++ = c;
2813 break;
2814
2815 case '\0':
2816 *q++ = '\\';
2817 *q++ = '0';
2818 *q++ = '0';
2819 *q++ = '0';
2820 break;
2821
2822 default:
2823 if (c == delim_char) {
2824 *q++ = '\\';
2825 }
2826 *q++ = c;
2827 break;
2828 }
2829 p++;
2830 } while (p != in_str_end);
2831 *q = '\0';
2832
2833 RETURN_NEW_STR(out_str);
2834 }
2835 /* }}} */
2836
2837 /* {{{ Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)2838 PHP_FUNCTION(preg_grep)
2839 {
2840 zend_string *regex; /* Regular expression */
2841 zval *input; /* Input array */
2842 zend_long flags = 0; /* Match control flags */
2843 pcre_cache_entry *pce; /* Compiled regular expression */
2844
2845 /* Get arguments and do error checking */
2846 ZEND_PARSE_PARAMETERS_START(2, 3)
2847 Z_PARAM_STR(regex)
2848 Z_PARAM_ARRAY(input)
2849 Z_PARAM_OPTIONAL
2850 Z_PARAM_LONG(flags)
2851 ZEND_PARSE_PARAMETERS_END();
2852
2853 /* Compile regex or get it from cache. */
2854 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2855 RETURN_FALSE;
2856 }
2857
2858 pce->refcount++;
2859 php_pcre_grep_impl(pce, input, return_value, flags);
2860 pce->refcount--;
2861 }
2862 /* }}} */
2863
php_pcre_grep_impl(pcre_cache_entry * pce,zval * input,zval * return_value,zend_long flags)2864 PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2865 {
2866 zval *entry; /* An entry in the input array */
2867 uint32_t num_subpats; /* Number of captured subpatterns */
2868 int count; /* Count of matched subpatterns */
2869 uint32_t options; /* Execution options */
2870 zend_string *string_key;
2871 zend_ulong num_key;
2872 bool invert; /* Whether to return non-matching
2873 entries */
2874 pcre2_match_data *match_data;
2875 invert = flags & PREG_GREP_INVERT ? 1 : 0;
2876
2877 /* Calculate the size of the offsets array, and allocate memory for it. */
2878 num_subpats = pce->capture_count + 1;
2879
2880 /* Initialize return array */
2881 array_init(return_value);
2882
2883 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2884
2885 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2886 match_data = mdata;
2887 } else {
2888 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2889 if (!match_data) {
2890 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2891 return;
2892 }
2893 }
2894
2895 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2896
2897 /* Go through the input array */
2898 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2899 zend_string *tmp_subject_str;
2900 zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2901
2902 /* Perform the match */
2903 #ifdef HAVE_PCRE_JIT_SUPPORT
2904 if ((pce->preg_options & PREG_JIT) && options) {
2905 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2906 PCRE2_NO_UTF_CHECK, match_data, mctx);
2907 } else
2908 #endif
2909 count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2910 options, match_data, mctx);
2911
2912 /* If the entry fits our requirements */
2913 if (count >= 0) {
2914 /* Check for too many substrings condition. */
2915 if (UNEXPECTED(count == 0)) {
2916 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2917 }
2918 if (!invert) {
2919 Z_TRY_ADDREF_P(entry);
2920
2921 /* Add to return array */
2922 if (string_key) {
2923 zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2924 } else {
2925 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2926 }
2927 }
2928 } else if (count == PCRE2_ERROR_NOMATCH) {
2929 if (invert) {
2930 Z_TRY_ADDREF_P(entry);
2931
2932 /* Add to return array */
2933 if (string_key) {
2934 zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2935 } else {
2936 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2937 }
2938 }
2939 } else {
2940 pcre_handle_exec_error(count);
2941 zend_tmp_string_release(tmp_subject_str);
2942 break;
2943 }
2944
2945 zend_tmp_string_release(tmp_subject_str);
2946 } ZEND_HASH_FOREACH_END();
2947 if (match_data != mdata) {
2948 pcre2_match_data_free(match_data);
2949 }
2950 }
2951 /* }}} */
2952
2953 /* {{{ Returns the error code of the last regexp execution. */
PHP_FUNCTION(preg_last_error)2954 PHP_FUNCTION(preg_last_error)
2955 {
2956 ZEND_PARSE_PARAMETERS_NONE();
2957
2958 RETURN_LONG(PCRE_G(error_code));
2959 }
2960 /* }}} */
2961
2962 /* {{{ Returns the error message of the last regexp execution. */
PHP_FUNCTION(preg_last_error_msg)2963 PHP_FUNCTION(preg_last_error_msg)
2964 {
2965 ZEND_PARSE_PARAMETERS_NONE();
2966
2967 RETURN_STRING(php_pcre_get_error_msg(PCRE_G(error_code)));
2968 }
2969 /* }}} */
2970
2971 /* {{{ module definition structures */
2972
2973 zend_module_entry pcre_module_entry = {
2974 STANDARD_MODULE_HEADER,
2975 "pcre",
2976 ext_functions,
2977 PHP_MINIT(pcre),
2978 PHP_MSHUTDOWN(pcre),
2979 PHP_RINIT(pcre),
2980 PHP_RSHUTDOWN(pcre),
2981 PHP_MINFO(pcre),
2982 PHP_PCRE_VERSION,
2983 PHP_MODULE_GLOBALS(pcre),
2984 PHP_GINIT(pcre),
2985 PHP_GSHUTDOWN(pcre),
2986 NULL,
2987 STANDARD_MODULE_PROPERTIES_EX
2988 };
2989
2990 #ifdef COMPILE_DL_PCRE
ZEND_GET_MODULE(pcre)2991 ZEND_GET_MODULE(pcre)
2992 #endif
2993
2994 /* }}} */
2995
2996 PHPAPI pcre2_match_context *php_pcre_mctx(void)
2997 {/*{{{*/
2998 return mctx;
2999 }/*}}}*/
3000
php_pcre_gctx(void)3001 PHPAPI pcre2_general_context *php_pcre_gctx(void)
3002 {/*{{{*/
3003 return gctx;
3004 }/*}}}*/
3005
php_pcre_cctx(void)3006 PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3007 {/*{{{*/
3008 return cctx;
3009 }/*}}}*/
3010
php_pcre_pce_incref(pcre_cache_entry * pce)3011 PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3012 {/*{{{*/
3013 assert(NULL != pce);
3014 pce->refcount++;
3015 }/*}}}*/
3016
php_pcre_pce_decref(pcre_cache_entry * pce)3017 PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3018 {/*{{{*/
3019 assert(NULL != pce);
3020 assert(0 != pce->refcount);
3021 pce->refcount--;
3022 }/*}}}*/
3023
php_pcre_pce_re(pcre_cache_entry * pce)3024 PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3025 {/*{{{*/
3026 assert(NULL != pce);
3027 return pce->re;
3028 }/*}}}*/
3029