1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Author: Andrei Zmievski <andrei@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #include "php.h"
18 #include "php_ini.h"
19 #include "php_pcre.h"
20 #include "ext/standard/info.h"
21 #include "ext/standard/basic_functions.h"
22 #include "zend_smart_str.h"
23 #include "SAPI.h"
24
25 #define PREG_PATTERN_ORDER 1
26 #define PREG_SET_ORDER 2
27 #define PREG_OFFSET_CAPTURE (1<<8)
28 #define PREG_UNMATCHED_AS_NULL (1<<9)
29
30 #define PREG_SPLIT_NO_EMPTY (1<<0)
31 #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
32 #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
33
34 #define PREG_GREP_INVERT (1<<0)
35
36 #define PREG_JIT (1<<3)
37
38 #define PCRE_CACHE_SIZE 4096
39
40 #ifdef HAVE_PCRE_JIT_SUPPORT
41 #define PHP_PCRE_JIT_SUPPORT 1
42 #else
43 #define PHP_PCRE_JIT_SUPPORT 0
44 #endif
45
46 char *php_pcre_version;
47
48 #include "php_pcre_arginfo.h"
49
50 struct _pcre_cache_entry {
51 pcre2_code *re;
52 /* Pointer is not NULL when there are named captures.
53 * Length is equal to capture_count + 1 to account for capture group 0. */
54 zend_string **subpats_table;
55 uint32_t preg_options;
56 uint32_t capture_count;
57 uint32_t compile_options;
58 uint32_t refcount;
59 };
60
61 PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
62
63 #ifdef HAVE_PCRE_JIT_SUPPORT
64 #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
65 #define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
66 ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
67 #endif
68 /* General context using (infallible) system allocator. */
69 ZEND_TLS pcre2_general_context *gctx = NULL;
70 /* These two are global per thread for now. Though it is possible to use these
71 per pattern. Either one can copy it and use in pce, or one does no global
72 contexts at all, but creates for every pce. */
73 ZEND_TLS pcre2_compile_context *cctx = NULL;
74 ZEND_TLS pcre2_match_context *mctx = NULL;
75 ZEND_TLS pcre2_match_data *mdata = NULL;
76 ZEND_TLS bool mdata_used = 0;
77 ZEND_TLS uint8_t pcre2_init_ok = 0;
78 #if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
79 static MUTEX_T pcre_mt = NULL;
80 #define php_pcre_mutex_alloc() \
81 if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
82 #define php_pcre_mutex_free() \
83 if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
84 #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
85 #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
86 #else
87 #define php_pcre_mutex_alloc()
88 #define php_pcre_mutex_free()
89 #define php_pcre_mutex_lock()
90 #define php_pcre_mutex_unlock()
91 #endif
92
93 ZEND_TLS HashTable char_tables;
94
95 static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats, bool persistent);
96
php_pcre_free_char_table(zval * data)97 static void php_pcre_free_char_table(zval *data)
98 {/*{{{*/
99 void *ptr = Z_PTR_P(data);
100 pefree(ptr, 1);
101 }/*}}}*/
102
pcre_handle_exec_error(int pcre_code)103 static void pcre_handle_exec_error(int pcre_code) /* {{{ */
104 {
105 int preg_code = 0;
106
107 switch (pcre_code) {
108 case PCRE2_ERROR_MATCHLIMIT:
109 preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
110 break;
111
112 case PCRE2_ERROR_RECURSIONLIMIT:
113 preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
114 break;
115
116 case PCRE2_ERROR_BADUTFOFFSET:
117 preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
118 break;
119
120 #ifdef HAVE_PCRE_JIT_SUPPORT
121 case PCRE2_ERROR_JIT_STACKLIMIT:
122 preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
123 break;
124 #endif
125
126 default:
127 if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
128 preg_code = PHP_PCRE_BAD_UTF8_ERROR;
129 } else {
130 preg_code = PHP_PCRE_INTERNAL_ERROR;
131 }
132 break;
133 }
134
135 PCRE_G(error_code) = preg_code;
136 }
137 /* }}} */
138
php_pcre_get_error_msg(php_pcre_error_code error_code)139 static const char *php_pcre_get_error_msg(php_pcre_error_code error_code) /* {{{ */
140 {
141 switch (error_code) {
142 case PHP_PCRE_NO_ERROR:
143 return "No error";
144 case PHP_PCRE_INTERNAL_ERROR:
145 return "Internal error";
146 case PHP_PCRE_BAD_UTF8_ERROR:
147 return "Malformed UTF-8 characters, possibly incorrectly encoded";
148 case PHP_PCRE_BAD_UTF8_OFFSET_ERROR:
149 return "The offset did not correspond to the beginning of a valid UTF-8 code point";
150 case PHP_PCRE_BACKTRACK_LIMIT_ERROR:
151 return "Backtrack limit exhausted";
152 case PHP_PCRE_RECURSION_LIMIT_ERROR:
153 return "Recursion limit exhausted";
154
155 #ifdef HAVE_PCRE_JIT_SUPPORT
156 case PHP_PCRE_JIT_STACKLIMIT_ERROR:
157 return "JIT stack limit exhausted";
158 #endif
159
160 default:
161 return "Unknown error";
162 }
163 }
164 /* }}} */
165
php_free_pcre_cache(zval * data)166 static void php_free_pcre_cache(zval *data) /* {{{ */
167 {
168 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
169 if (!pce) return;
170 if (pce->subpats_table) {
171 free_subpats_table(pce->subpats_table, pce->capture_count + 1, true);
172 }
173 pcre2_code_free(pce->re);
174 free(pce);
175 }
176 /* }}} */
177
php_efree_pcre_cache(zval * data)178 static void php_efree_pcre_cache(zval *data) /* {{{ */
179 {
180 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
181 if (!pce) return;
182 if (pce->subpats_table) {
183 free_subpats_table(pce->subpats_table, pce->capture_count + 1, false);
184 }
185 pcre2_code_free(pce->re);
186 efree(pce);
187 }
188 /* }}} */
189
php_pcre_malloc(PCRE2_SIZE size,void * data)190 static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
191 {
192 return pemalloc(size, 1);
193 }
194
php_pcre_free(void * block,void * data)195 static void php_pcre_free(void *block, void *data)
196 {
197 pefree(block, 1);
198 }
199
php_pcre_emalloc(PCRE2_SIZE size,void * data)200 static void *php_pcre_emalloc(PCRE2_SIZE size, void *data)
201 {
202 return emalloc(size);
203 }
204
php_pcre_efree(void * block,void * data)205 static void php_pcre_efree(void *block, void *data)
206 {
207 efree(block);
208 }
209
210 #ifdef PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
211 /* pcre 10.38 needs PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, disabled by default */
212 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
213 #else
214 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS 0
215 #endif
216
217 #define PHP_PCRE_PREALLOC_MDATA_SIZE 32
218
php_pcre_init_pcre2(uint8_t jit)219 static void php_pcre_init_pcre2(uint8_t jit)
220 {/*{{{*/
221 if (!gctx) {
222 gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
223 if (!gctx) {
224 pcre2_init_ok = 0;
225 return;
226 }
227 }
228
229 if (!cctx) {
230 cctx = pcre2_compile_context_create(gctx);
231 if (!cctx) {
232 pcre2_init_ok = 0;
233 return;
234 }
235 }
236
237 pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
238
239 if (!mctx) {
240 mctx = pcre2_match_context_create(gctx);
241 if (!mctx) {
242 pcre2_init_ok = 0;
243 return;
244 }
245 }
246
247 #ifdef HAVE_PCRE_JIT_SUPPORT
248 if (jit && !jit_stack) {
249 jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
250 if (!jit_stack) {
251 pcre2_init_ok = 0;
252 return;
253 }
254 }
255 #endif
256
257 if (!mdata) {
258 mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
259 if (!mdata) {
260 pcre2_init_ok = 0;
261 return;
262 }
263 }
264
265 pcre2_init_ok = 1;
266 }/*}}}*/
267
php_pcre_shutdown_pcre2(void)268 static void php_pcre_shutdown_pcre2(void)
269 {/*{{{*/
270 if (gctx) {
271 pcre2_general_context_free(gctx);
272 gctx = NULL;
273 }
274
275 if (cctx) {
276 pcre2_compile_context_free(cctx);
277 cctx = NULL;
278 }
279
280 if (mctx) {
281 pcre2_match_context_free(mctx);
282 mctx = NULL;
283 }
284
285 #ifdef HAVE_PCRE_JIT_SUPPORT
286 /* Stack may only be destroyed when no cached patterns
287 possibly associated with it do exist. */
288 if (jit_stack) {
289 pcre2_jit_stack_free(jit_stack);
290 jit_stack = NULL;
291 }
292 #endif
293
294 if (mdata) {
295 pcre2_match_data_free(mdata);
296 mdata = NULL;
297 }
298
299 pcre2_init_ok = 0;
300 }/*}}}*/
301
PHP_GINIT_FUNCTION(pcre)302 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
303 {
304 php_pcre_mutex_alloc();
305
306 /* If we're on the CLI SAPI, there will only be one request, so we don't need the
307 * cache to survive after RSHUTDOWN. */
308 pcre_globals->per_request_cache = strcmp(sapi_module.name, "cli") == 0;
309 if (!pcre_globals->per_request_cache) {
310 zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
311 }
312
313 pcre_globals->backtrack_limit = 0;
314 pcre_globals->recursion_limit = 0;
315 pcre_globals->error_code = PHP_PCRE_NO_ERROR;
316 ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
317 ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
318 #ifdef HAVE_PCRE_JIT_SUPPORT
319 pcre_globals->jit = 1;
320 #endif
321
322 php_pcre_init_pcre2(1);
323 zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
324 }
325 /* }}} */
326
PHP_GSHUTDOWN_FUNCTION(pcre)327 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
328 {
329 if (!pcre_globals->per_request_cache) {
330 zend_hash_destroy(&pcre_globals->pcre_cache);
331 }
332
333 php_pcre_shutdown_pcre2();
334 zend_hash_destroy(&char_tables);
335 php_pcre_mutex_free();
336 }
337 /* }}} */
338
PHP_INI_MH(OnUpdateBacktrackLimit)339 static PHP_INI_MH(OnUpdateBacktrackLimit)
340 {/*{{{*/
341 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
342 if (mctx) {
343 pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
344 }
345
346 return SUCCESS;
347 }/*}}}*/
348
PHP_INI_MH(OnUpdateRecursionLimit)349 static PHP_INI_MH(OnUpdateRecursionLimit)
350 {/*{{{*/
351 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
352 if (mctx) {
353 pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
354 }
355
356 return SUCCESS;
357 }/*}}}*/
358
359 #ifdef HAVE_PCRE_JIT_SUPPORT
PHP_INI_MH(OnUpdateJit)360 static PHP_INI_MH(OnUpdateJit)
361 {/*{{{*/
362 OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
363 if (PCRE_G(jit) && jit_stack) {
364 pcre2_jit_stack_assign(mctx, NULL, jit_stack);
365 } else {
366 pcre2_jit_stack_assign(mctx, NULL, NULL);
367 }
368
369 return SUCCESS;
370 }/*}}}*/
371 #endif
372
373 PHP_INI_BEGIN()
374 STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
375 STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
376 #ifdef HAVE_PCRE_JIT_SUPPORT
377 STD_PHP_INI_BOOLEAN("pcre.jit", "1", PHP_INI_ALL, OnUpdateJit, jit, zend_pcre_globals, pcre_globals)
378 #endif
PHP_INI_END()379 PHP_INI_END()
380
381 static char *_pcre2_config_str(uint32_t what)
382 {/*{{{*/
383 int len = pcre2_config(what, NULL);
384 char *ret = (char *) malloc(len + 1);
385
386 len = pcre2_config(what, ret);
387 if (!len) {
388 free(ret);
389 return NULL;
390 }
391
392 return ret;
393 }/*}}}*/
394
395 /* {{{ PHP_MINFO_FUNCTION(pcre) */
PHP_MINFO_FUNCTION(pcre)396 static PHP_MINFO_FUNCTION(pcre)
397 {
398 #ifdef HAVE_PCRE_JIT_SUPPORT
399 uint32_t flag = 0;
400 char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
401 #endif
402 char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
403 char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
404
405 php_info_print_table_start();
406 php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
407 php_info_print_table_row(2, "PCRE Library Version", version);
408 free(version);
409 php_info_print_table_row(2, "PCRE Unicode Version", unicode);
410 free(unicode);
411
412 #ifdef HAVE_PCRE_JIT_SUPPORT
413 if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
414 php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
415 } else {
416 php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
417 }
418 if (jit_target) {
419 php_info_print_table_row(2, "PCRE JIT Target", jit_target);
420 }
421 free(jit_target);
422 #else
423 php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
424 #endif
425
426 #ifdef HAVE_PCRE_VALGRIND_SUPPORT
427 php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
428 #endif
429
430 php_info_print_table_end();
431
432 DISPLAY_INI_ENTRIES();
433 }
434 /* }}} */
435
436 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTION(pcre)437 static PHP_MINIT_FUNCTION(pcre)
438 {
439 #ifdef HAVE_PCRE_JIT_SUPPORT
440 if (UNEXPECTED(!pcre2_init_ok)) {
441 /* Retry. */
442 php_pcre_init_pcre2(PCRE_G(jit));
443 if (!pcre2_init_ok) {
444 return FAILURE;
445 }
446 }
447 #endif
448
449 REGISTER_INI_ENTRIES();
450
451 php_pcre_version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
452
453 register_php_pcre_symbols(module_number);
454
455 return SUCCESS;
456 }
457 /* }}} */
458
459 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTION(pcre)460 static PHP_MSHUTDOWN_FUNCTION(pcre)
461 {
462 UNREGISTER_INI_ENTRIES();
463
464 free(php_pcre_version);
465
466 return SUCCESS;
467 }
468 /* }}} */
469
470 /* {{{ PHP_RINIT_FUNCTION(pcre) */
PHP_RINIT_FUNCTION(pcre)471 static PHP_RINIT_FUNCTION(pcre)
472 {
473 #ifdef HAVE_PCRE_JIT_SUPPORT
474 if (UNEXPECTED(!pcre2_init_ok)) {
475 /* Retry. */
476 php_pcre_mutex_lock();
477 php_pcre_init_pcre2(PCRE_G(jit));
478 if (!pcre2_init_ok) {
479 php_pcre_mutex_unlock();
480 return FAILURE;
481 }
482 php_pcre_mutex_unlock();
483 }
484
485 mdata_used = 0;
486 #endif
487
488 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
489 PCRE_G(gctx_zmm) = pcre2_general_context_create(php_pcre_emalloc, php_pcre_efree, NULL);
490 if (!PCRE_G(gctx_zmm)) {
491 return FAILURE;
492 }
493
494 if (PCRE_G(per_request_cache)) {
495 zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, php_efree_pcre_cache, 0);
496 }
497
498 return SUCCESS;
499 }
500 /* }}} */
501
PHP_RSHUTDOWN_FUNCTION(pcre)502 static PHP_RSHUTDOWN_FUNCTION(pcre)
503 {
504 pcre2_general_context_free(PCRE_G(gctx_zmm));
505 PCRE_G(gctx_zmm) = NULL;
506
507 if (PCRE_G(per_request_cache)) {
508 zend_hash_destroy(&PCRE_G(pcre_cache));
509 }
510
511 zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
512 zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
513 ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
514 ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
515 return SUCCESS;
516 }
517
518 /* {{{ static pcre_clean_cache */
pcre_clean_cache(zval * data,void * arg)519 static int pcre_clean_cache(zval *data, void *arg)
520 {
521 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
522 int *num_clean = (int *)arg;
523
524 if (*num_clean > 0 && !pce->refcount) {
525 (*num_clean)--;
526 return ZEND_HASH_APPLY_REMOVE;
527 } else {
528 return ZEND_HASH_APPLY_KEEP;
529 }
530 }
531 /* }}} */
532
free_subpats_table(zend_string ** subpat_names,uint32_t num_subpats,bool persistent)533 static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats, bool persistent) {
534 uint32_t i;
535 for (i = 0; i < num_subpats; i++) {
536 if (subpat_names[i]) {
537 zend_string_release_ex(subpat_names[i], persistent);
538 }
539 }
540 pefree(subpat_names, persistent);
541 }
542
543 /* {{{ static make_subpats_table */
make_subpats_table(uint32_t name_cnt,pcre_cache_entry * pce,bool persistent)544 static zend_string **make_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce, bool persistent)
545 {
546 uint32_t num_subpats = pce->capture_count + 1;
547 uint32_t name_size, ni = 0;
548 char *name_table;
549 zend_string **subpat_names;
550 int rc1, rc2;
551
552 rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
553 rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
554 if (rc1 < 0 || rc2 < 0) {
555 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
556 return NULL;
557 }
558
559 subpat_names = pecalloc(num_subpats, sizeof(zend_string *), persistent);
560 while (ni++ < name_cnt) {
561 unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
562 const char *name = name_table + 2;
563 /* Note: this makes a persistent string when the cache is not request-based because the string
564 * has to outlive the request. In that case, they will only be used within this thread
565 * and never be shared.
566 * Although we will be storing them in user-exposed arrays, they cannot cause problems
567 * because they only live in this thread and the last reference is deleted on shutdown
568 * instead of by user code. */
569 subpat_names[name_idx] = zend_string_init(name, strlen(name), persistent);
570 if (persistent) {
571 GC_MAKE_PERSISTENT_LOCAL(subpat_names[name_idx]);
572 }
573 name_table += name_size;
574 }
575 return subpat_names;
576 }
577 /* }}} */
578
579 /* {{{ static calculate_unit_length */
580 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
calculate_unit_length(pcre_cache_entry * pce,const char * start)581 static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
582 {
583 size_t unit_len;
584
585 if (pce->compile_options & PCRE2_UTF) {
586 const char *end = start;
587
588 /* skip continuation bytes */
589 while ((*++end & 0xC0) == 0x80);
590 unit_len = end - start;
591 } else {
592 unit_len = 1;
593 }
594 return unit_len;
595 }
596 /* }}} */
597
598 /* {{{ pcre_get_compiled_regex_cache */
pcre_get_compiled_regex_cache_ex(zend_string * regex,bool locale_aware)599 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bool locale_aware)
600 {
601 pcre2_code *re = NULL;
602 #if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR && !defined(HAVE_BUNDLED_PCRE)
603 uint32_t coptions = PCRE2_NO_START_OPTIMIZE;
604 #else
605 uint32_t coptions = 0;
606 #endif
607 uint32_t eoptions = PHP_PCRE_DEFAULT_EXTRA_COPTIONS;
608 PCRE2_UCHAR error[128];
609 PCRE2_SIZE erroffset;
610 int errnumber;
611 char delimiter;
612 char start_delimiter;
613 char end_delimiter;
614 char *p, *pp;
615 char *pattern;
616 size_t pattern_len;
617 uint32_t poptions = 0;
618 const uint8_t *tables = NULL;
619 zval *zv;
620 pcre_cache_entry new_entry;
621 int rc;
622 zend_string *key;
623 pcre_cache_entry *ret;
624
625 if (locale_aware && BG(ctype_string)) {
626 key = zend_string_concat2(
627 ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)),
628 ZSTR_VAL(regex), ZSTR_LEN(regex));
629 } else {
630 key = regex;
631 }
632
633 /* Try to lookup the cached regex entry, and if successful, just pass
634 back the compiled pattern, otherwise go on and compile it. */
635 zv = zend_hash_find(&PCRE_G(pcre_cache), key);
636 if (zv) {
637 if (key != regex) {
638 zend_string_release_ex(key, 0);
639 }
640 return (pcre_cache_entry*)Z_PTR_P(zv);
641 }
642
643 p = ZSTR_VAL(regex);
644 const char* end_p = ZSTR_VAL(regex) + ZSTR_LEN(regex);
645
646 /* Parse through the leading whitespace, and display a warning if we
647 get to the end without encountering a delimiter. */
648 while (isspace((int)*(unsigned char *)p)) p++;
649 if (p >= end_p) {
650 if (key != regex) {
651 zend_string_release_ex(key, 0);
652 }
653 php_error_docref(NULL, E_WARNING, "Empty regular expression");
654 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
655 return NULL;
656 }
657
658 /* Get the delimiter and display a warning if it is alphanumeric
659 or a backslash. */
660 delimiter = *p++;
661 if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\' || delimiter == '\0') {
662 if (key != regex) {
663 zend_string_release_ex(key, 0);
664 }
665 php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric, backslash, or NUL byte");
666 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
667 return NULL;
668 }
669
670 start_delimiter = delimiter;
671 if ((pp = strchr("([{< )]}> )]}>", delimiter)))
672 delimiter = pp[5];
673 end_delimiter = delimiter;
674
675 pp = p;
676
677 if (start_delimiter == end_delimiter) {
678 /* We need to iterate through the pattern, searching for the ending delimiter,
679 but skipping the backslashed delimiters. If the ending delimiter is not
680 found, display a warning. */
681 while (pp < end_p) {
682 if (*pp == '\\' && pp + 1 < end_p) pp++;
683 else if (*pp == delimiter)
684 break;
685 pp++;
686 }
687 } else {
688 /* We iterate through the pattern, searching for the matching ending
689 * delimiter. For each matching starting delimiter, we increment nesting
690 * level, and decrement it for each matching ending delimiter. If we
691 * reach the end of the pattern without matching, display a warning.
692 */
693 int brackets = 1; /* brackets nesting level */
694 while (pp < end_p) {
695 if (*pp == '\\' && pp + 1 < end_p) pp++;
696 else if (*pp == end_delimiter && --brackets <= 0)
697 break;
698 else if (*pp == start_delimiter)
699 brackets++;
700 pp++;
701 }
702 }
703
704 if (pp >= end_p) {
705 if (key != regex) {
706 zend_string_release_ex(key, 0);
707 }
708 if (start_delimiter == end_delimiter) {
709 php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
710 } else {
711 php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
712 }
713 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
714 return NULL;
715 }
716
717 /* Make a copy of the actual pattern. */
718 pattern_len = pp - p;
719 pattern = estrndup(p, pattern_len);
720
721 /* Move on to the options */
722 pp++;
723
724 /* Parse through the options, setting appropriate flags. Display
725 a warning if we encounter an unknown modifier. */
726 while (pp < end_p) {
727 switch (*pp++) {
728 /* Perl compatible options */
729 case 'i': coptions |= PCRE2_CASELESS; break;
730 case 'm': coptions |= PCRE2_MULTILINE; break;
731 case 'n': coptions |= PCRE2_NO_AUTO_CAPTURE; break;
732 case 's': coptions |= PCRE2_DOTALL; break;
733 case 'x': coptions |= PCRE2_EXTENDED; break;
734
735 /* PCRE specific options */
736 case 'A': coptions |= PCRE2_ANCHORED; break;
737 case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
738 #ifdef PCRE2_EXTRA_CASELESS_RESTRICT
739 case 'r': eoptions |= PCRE2_EXTRA_CASELESS_RESTRICT; break;
740 #endif
741 case 'S': /* Pass. */ break;
742 case 'X': /* Pass. */ break;
743 case 'U': coptions |= PCRE2_UNGREEDY; break;
744 case 'u': coptions |= PCRE2_UTF;
745 /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
746 characters, even in UTF-8 mode. However, this can be changed by setting
747 the PCRE2_UCP option. */
748 #ifdef PCRE2_UCP
749 coptions |= PCRE2_UCP;
750 #endif
751 break;
752 case 'J': coptions |= PCRE2_DUPNAMES; break;
753
754 case ' ':
755 case '\n':
756 case '\r':
757 break;
758
759 case 'e': /* legacy eval */
760 default:
761 if (pp[-1]) {
762 php_error_docref(NULL, E_WARNING, "Unknown modifier '%c'", pp[-1]);
763 } else {
764 php_error_docref(NULL, E_WARNING, "NUL byte is not a valid modifier");
765 }
766 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
767 efree(pattern);
768 if (key != regex) {
769 zend_string_release_ex(key, 0);
770 }
771 return NULL;
772 }
773 }
774
775 if (key != regex) {
776 tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(ctype_string));
777 if (!tables) {
778 zend_string *_k;
779 tables = pcre2_maketables(gctx);
780 if (UNEXPECTED(!tables)) {
781 php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
782 pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
783 zend_string_release_ex(key, 0);
784 efree(pattern);
785 return NULL;
786 }
787 _k = zend_string_init(ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)), 1);
788 GC_MAKE_PERSISTENT_LOCAL(_k);
789 zend_hash_add_ptr(&char_tables, _k, (void *)tables);
790 zend_string_release(_k);
791 }
792 }
793 pcre2_set_character_tables(cctx, tables);
794
795 pcre2_set_compile_extra_options(cctx, eoptions);
796
797 /* Compile pattern and display a warning if compilation failed. */
798 re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
799
800 if (re == NULL) {
801 if (key != regex) {
802 zend_string_release_ex(key, 0);
803 }
804 pcre2_get_error_message(errnumber, error, sizeof(error));
805 php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
806 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
807 efree(pattern);
808 return NULL;
809 }
810
811 #ifdef HAVE_PCRE_JIT_SUPPORT
812 if (PCRE_G(jit)) {
813 /* Enable PCRE JIT compiler */
814 rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
815 if (EXPECTED(rc >= 0)) {
816 size_t jit_size = 0;
817 if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
818 poptions |= PREG_JIT;
819 }
820 } else if (rc == PCRE2_ERROR_NOMEMORY) {
821 php_error_docref(NULL, E_WARNING,
822 "Allocation of JIT memory failed, PCRE JIT will be disabled. "
823 "This is likely caused by security restrictions. "
824 "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
825 PCRE_G(jit) = 0;
826 } else {
827 pcre2_get_error_message(rc, error, sizeof(error));
828 php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
829 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
830 }
831 }
832 #endif
833 efree(pattern);
834
835 /*
836 * If we reached cache limit, clean out the items from the head of the list;
837 * these are supposedly the oldest ones (but not necessarily the least used
838 * ones).
839 */
840 if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
841 int num_clean = PCRE_CACHE_SIZE / 8;
842 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
843 }
844
845 /* Store the compiled pattern and extra info in the cache. */
846 new_entry.re = re;
847 new_entry.preg_options = poptions;
848 new_entry.compile_options = coptions;
849 new_entry.refcount = 0;
850
851 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
852 if (rc < 0) {
853 if (key != regex) {
854 zend_string_release_ex(key, 0);
855 }
856 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
857 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
858 return NULL;
859 }
860
861 uint32_t name_count;
862 rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &name_count);
863 if (rc < 0) {
864 if (key != regex) {
865 zend_string_release_ex(key, 0);
866 }
867 php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
868 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
869 return NULL;
870 }
871
872 /* Compute and cache the subpattern table to avoid computing it again over and over. */
873 if (name_count > 0) {
874 new_entry.subpats_table = make_subpats_table(name_count, &new_entry, !PCRE_G(per_request_cache));
875 if (!new_entry.subpats_table) {
876 if (key != regex) {
877 zend_string_release_ex(key, false);
878 }
879 /* Warning already emitted by make_subpats_table() */
880 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
881 return NULL;
882 }
883 } else {
884 new_entry.subpats_table = NULL;
885 }
886
887 /*
888 * Interned strings are not duplicated when stored in HashTable,
889 * but all the interned strings created during HTTP request are removed
890 * at end of request. However PCRE_G(pcre_cache) must be consistent
891 * on the next request as well. So we disable usage of interned strings
892 * as hash keys especually for this table.
893 * See bug #63180
894 */
895 if (!(GC_FLAGS(key) & IS_STR_PERMANENT) && !PCRE_G(per_request_cache)) {
896 zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
897 GC_MAKE_PERSISTENT_LOCAL(str);
898
899 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
900 zend_string_release(str);
901 } else {
902 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
903 }
904
905 if (key != regex) {
906 zend_string_release_ex(key, 0);
907 }
908
909 return ret;
910 }
911 /* }}} */
912
913 /* {{{ pcre_get_compiled_regex_cache */
pcre_get_compiled_regex_cache(zend_string * regex)914 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
915 {
916 return pcre_get_compiled_regex_cache_ex(regex, true);
917 }
918 /* }}} */
919
920 /* {{{ pcre_get_compiled_regex */
pcre_get_compiled_regex(zend_string * regex,uint32_t * capture_count)921 PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
922 {
923 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
924
925 if (capture_count) {
926 *capture_count = pce ? pce->capture_count : 0;
927 }
928
929 return pce ? pce->re : NULL;
930 }
931 /* }}} */
932
933 /* XXX For the cases where it's only about match yes/no and no capture
934 required, perhaps just a minimum sized data would suffice. */
php_pcre_create_match_data(uint32_t capture_count,pcre2_code * re)935 PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
936 {/*{{{*/
937
938 assert(NULL != re);
939
940 if (EXPECTED(!mdata_used)) {
941 int rc = 0;
942
943 if (!capture_count) {
944 /* As we deal with a non cached pattern, no other way to gather this info. */
945 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
946 }
947
948 if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
949 mdata_used = 1;
950 return mdata;
951 }
952 }
953
954 return pcre2_match_data_create_from_pattern(re, gctx);
955 }/*}}}*/
956
php_pcre_free_match_data(pcre2_match_data * match_data)957 PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
958 {/*{{{*/
959 if (UNEXPECTED(match_data != mdata)) {
960 pcre2_match_data_free(match_data);
961 } else {
962 mdata_used = 0;
963 }
964 }/*}}}*/
965
init_unmatched_null_pair(void)966 static void init_unmatched_null_pair(void) {
967 zval val1, val2;
968 ZVAL_NULL(&val1);
969 ZVAL_LONG(&val2, -1);
970 ZVAL_ARR(&PCRE_G(unmatched_null_pair), zend_new_pair(&val1, &val2));
971 }
972
init_unmatched_empty_pair(void)973 static void init_unmatched_empty_pair(void) {
974 zval val1, val2;
975 ZVAL_EMPTY_STRING(&val1);
976 ZVAL_LONG(&val2, -1);
977 ZVAL_ARR(&PCRE_G(unmatched_empty_pair), zend_new_pair(&val1, &val2));
978 }
979
populate_match_value_str(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset)980 static zend_always_inline void populate_match_value_str(
981 zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
982 ZVAL_STRINGL_FAST(val, subject + start_offset, end_offset - start_offset);
983 }
984
populate_match_value(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,bool unmatched_as_null)985 static zend_always_inline void populate_match_value(
986 zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
987 bool unmatched_as_null) {
988 if (PCRE2_UNSET == start_offset) {
989 if (unmatched_as_null) {
990 ZVAL_NULL(val);
991 } else {
992 ZVAL_EMPTY_STRING(val);
993 }
994 } else {
995 populate_match_value_str(val, subject, start_offset, end_offset);
996 }
997 }
998
add_named(HashTable * const subpats,zend_string * name,zval * val,bool unmatched)999 static inline void add_named(
1000 HashTable *const subpats, zend_string *name, zval *val, bool unmatched) {
1001 /* If the DUPNAMES option is used, multiple subpatterns might have the same name.
1002 * In this case we want to preserve the one that actually has a value. */
1003 if (!unmatched) {
1004 zend_hash_update(subpats, name, val);
1005 } else {
1006 if (!zend_hash_add(subpats, name, val)) {
1007 return;
1008 }
1009 }
1010 Z_TRY_ADDREF_P(val);
1011 }
1012
1013 /* {{{ add_offset_pair */
add_offset_pair(HashTable * const result,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,zend_string * name,zend_long unmatched_as_null)1014 static inline void add_offset_pair(
1015 HashTable *const result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
1016 zend_string *name, zend_long unmatched_as_null)
1017 {
1018 zval match_pair;
1019
1020 /* Add (match, offset) to the return value */
1021 if (PCRE2_UNSET == start_offset) {
1022 if (unmatched_as_null) {
1023 if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
1024 init_unmatched_null_pair();
1025 }
1026 ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
1027 } else {
1028 if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
1029 init_unmatched_empty_pair();
1030 }
1031 ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
1032 }
1033 } else {
1034 zval val1, val2;
1035 populate_match_value_str(&val1, subject, start_offset, end_offset);
1036 ZVAL_LONG(&val2, start_offset);
1037 ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1038 }
1039
1040 if (name) {
1041 add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1042 }
1043 zend_hash_next_index_insert_new(result, &match_pair);
1044 }
1045 /* }}} */
1046
populate_subpat_array(zval * subpats,const char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1047 static void populate_subpat_array(
1048 zval *subpats, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1049 uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1050 zend_long offset_capture = flags & PREG_OFFSET_CAPTURE;
1051 zend_long unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1052 zval val;
1053 int i;
1054 HashTable *subpats_ht = Z_ARRVAL_P(subpats);
1055 if (subpat_names) {
1056 if (offset_capture) {
1057 for (i = 0; i < count; i++) {
1058 add_offset_pair(
1059 subpats_ht, subject, offsets[2*i], offsets[2*i+1],
1060 subpat_names[i], unmatched_as_null);
1061 }
1062 if (unmatched_as_null) {
1063 for (i = count; i < num_subpats; i++) {
1064 add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1065 }
1066 }
1067 } else {
1068 for (i = 0; i < count; i++) {
1069 populate_match_value(
1070 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1071 if (subpat_names[i]) {
1072 add_named(subpats_ht, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1073 }
1074 zend_hash_next_index_insert_new(subpats_ht, &val);
1075 }
1076 if (unmatched_as_null) {
1077 for (i = count; i < num_subpats; i++) {
1078 ZVAL_NULL(&val);
1079 if (subpat_names[i]) {
1080 zend_hash_add(subpats_ht, subpat_names[i], &val);
1081 }
1082 zend_hash_next_index_insert_new(subpats_ht, &val);
1083 }
1084 }
1085 }
1086 } else {
1087 if (offset_capture) {
1088 for (i = 0; i < count; i++) {
1089 add_offset_pair(
1090 subpats_ht, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1091 }
1092 if (unmatched_as_null) {
1093 for (i = count; i < num_subpats; i++) {
1094 add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1095 }
1096 }
1097 } else {
1098 for (i = 0; i < count; i++) {
1099 populate_match_value(
1100 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1101 zend_hash_next_index_insert_new(subpats_ht, &val);
1102 }
1103 if (unmatched_as_null) {
1104 for (i = count; i < num_subpats; i++) {
1105 add_next_index_null(subpats);
1106 }
1107 }
1108 }
1109 }
1110 /* Add MARK, if available */
1111 if (mark) {
1112 add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
1113 }
1114 }
1115
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS,bool global)1116 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, bool global) /* {{{ */
1117 {
1118 /* parameters */
1119 zend_string *regex; /* Regular expression */
1120 zend_string *subject; /* String to match against */
1121 pcre_cache_entry *pce; /* Compiled regular expression */
1122 zval *subpats = NULL; /* Array for subpatterns */
1123 zend_long flags = 0; /* Match control flags */
1124 zend_long start_offset = 0; /* Where the new search starts */
1125
1126 ZEND_PARSE_PARAMETERS_START(2, 5)
1127 Z_PARAM_STR(regex)
1128 Z_PARAM_STR(subject)
1129 Z_PARAM_OPTIONAL
1130 Z_PARAM_ZVAL(subpats)
1131 Z_PARAM_LONG(flags)
1132 Z_PARAM_LONG(start_offset)
1133 ZEND_PARSE_PARAMETERS_END();
1134
1135 /* Compile regex or get it from cache. */
1136 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1137 RETURN_FALSE;
1138 }
1139
1140 pce->refcount++;
1141 php_pcre_match_impl(pce, subject, return_value, subpats,
1142 global, flags, start_offset);
1143 pce->refcount--;
1144 }
1145 /* }}} */
1146
is_known_valid_utf8(zend_string * subject_str,PCRE2_SIZE start_offset)1147 static zend_always_inline bool is_known_valid_utf8(
1148 zend_string *subject_str, PCRE2_SIZE start_offset) {
1149 if (!ZSTR_IS_VALID_UTF8(subject_str)) {
1150 /* We don't know whether the string is valid UTF-8 or not. */
1151 return 0;
1152 }
1153
1154 if (start_offset == ZSTR_LEN(subject_str)) {
1155 /* Degenerate case: Offset points to end of string. */
1156 return 1;
1157 }
1158
1159 /* Check that the offset does not point to an UTF-8 continuation byte. */
1160 return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1161 }
1162
1163 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zval * subpats,bool global,zend_long flags,zend_off_t start_offset)1164 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1165 zval *subpats, bool global, zend_long flags, zend_off_t start_offset)
1166 {
1167 zval result_set; /* Holds a set of subpatterns after
1168 a global match */
1169 HashTable **match_sets = NULL; /* An array of sets of matches for each
1170 subpattern after a global match */
1171 uint32_t options; /* Execution options */
1172 int count; /* Count of matched subpatterns */
1173 uint32_t num_subpats; /* Number of captured subpatterns */
1174 int matched; /* Has anything matched */
1175 zend_string **subpat_names; /* Array for named subpatterns */
1176 size_t i;
1177 uint32_t subpats_order; /* Order of subpattern matches */
1178 uint32_t offset_capture; /* Capture match offsets: yes/no */
1179 zend_long unmatched_as_null; /* Null non-matches: yes/no */
1180 PCRE2_SPTR mark = NULL; /* Target for MARK name */
1181 HashTable *marks = NULL; /* Array of marks for PREG_PATTERN_ORDER */
1182 pcre2_match_data *match_data;
1183 PCRE2_SIZE start_offset2, orig_start_offset;
1184
1185 char *subject = ZSTR_VAL(subject_str);
1186 size_t subject_len = ZSTR_LEN(subject_str);
1187
1188 /* Overwrite the passed-in value for subpatterns with an empty array. */
1189 if (subpats != NULL) {
1190 subpats = zend_try_array_init(subpats);
1191 if (!subpats) {
1192 RETURN_THROWS();
1193 }
1194 }
1195
1196 subpats_order = global ? PREG_PATTERN_ORDER : 0;
1197
1198 if (flags) {
1199 offset_capture = flags & PREG_OFFSET_CAPTURE;
1200 unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1201
1202 /*
1203 * subpats_order is pre-set to pattern mode so we change it only if
1204 * necessary.
1205 */
1206 if (flags & 0xff) {
1207 subpats_order = flags & 0xff;
1208 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1209 (!global && subpats_order != 0)) {
1210 zend_argument_value_error(4, "must be a PREG_* constant");
1211 RETURN_THROWS();
1212 }
1213 }
1214 } else {
1215 offset_capture = 0;
1216 unmatched_as_null = 0;
1217 }
1218
1219 /* Negative offset counts from the end of the string. */
1220 if (start_offset < 0) {
1221 if ((PCRE2_SIZE)-start_offset <= subject_len) {
1222 start_offset2 = subject_len + start_offset;
1223 } else {
1224 start_offset2 = 0;
1225 }
1226 } else {
1227 start_offset2 = (PCRE2_SIZE)start_offset;
1228 }
1229
1230 if (start_offset2 > subject_len) {
1231 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1232 RETURN_FALSE;
1233 }
1234
1235 /* Calculate the size of the offsets array, and allocate memory for it. */
1236 num_subpats = pce->capture_count + 1;
1237
1238 /*
1239 * Build a mapping from subpattern numbers to their names. We will
1240 * allocate the table only if there are any named subpatterns.
1241 */
1242 subpat_names = NULL;
1243 if (subpats) {
1244 subpat_names = pce->subpats_table;
1245 }
1246
1247 matched = 0;
1248 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1249
1250 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1251 match_data = mdata;
1252 } else {
1253 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1254 if (!match_data) {
1255 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1256 RETURN_FALSE;
1257 }
1258 }
1259
1260 /* Allocate match sets array and initialize the values. */
1261 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1262 match_sets = safe_emalloc(num_subpats, sizeof(HashTable *), 0);
1263 for (i=0; i<num_subpats; i++) {
1264 match_sets[i] = zend_new_array(0);
1265 }
1266 }
1267
1268 /* Array of subpattern offsets */
1269 PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1270
1271 orig_start_offset = start_offset2;
1272 options =
1273 (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1274 ? 0 : PCRE2_NO_UTF_CHECK;
1275
1276 /* Execute the regular expression. */
1277 #ifdef HAVE_PCRE_JIT_SUPPORT
1278 if ((pce->preg_options & PREG_JIT) && options) {
1279 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1280 PCRE2_NO_UTF_CHECK, match_data, mctx);
1281 } else
1282 #endif
1283 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1284 options, match_data, mctx);
1285
1286 while (1) {
1287 /* If something has matched */
1288 if (count >= 0) {
1289 /* Check for too many substrings condition. */
1290 if (UNEXPECTED(count == 0)) {
1291 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1292 count = num_subpats;
1293 }
1294
1295 matched:
1296 matched++;
1297
1298 /* If subpatterns array has been passed, fill it in with values. */
1299 if (subpats != NULL) {
1300 /* Try to get the list of substrings and display a warning if failed. */
1301 if (UNEXPECTED(offsets[1] < offsets[0])) {
1302 if (match_sets) efree(match_sets);
1303 php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1304 RETURN_FALSE;
1305 }
1306
1307 if (global) { /* global pattern matching */
1308 if (subpats_order == PREG_PATTERN_ORDER) {
1309 /* For each subpattern, insert it into the appropriate array. */
1310 if (offset_capture) {
1311 for (i = 0; i < count; i++) {
1312 add_offset_pair(
1313 match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1314 NULL, unmatched_as_null);
1315 }
1316 } else {
1317 for (i = 0; i < count; i++) {
1318 zval val;
1319 populate_match_value(
1320 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1321 zend_hash_next_index_insert_new(match_sets[i], &val);
1322 }
1323 }
1324 mark = pcre2_get_mark(match_data);
1325 /* Add MARK, if available */
1326 if (mark) {
1327 if (!marks) {
1328 marks = zend_new_array(0);
1329 }
1330 zval tmp;
1331 ZVAL_STRING(&tmp, (char *) mark);
1332 zend_hash_index_add_new(marks, matched - 1, &tmp);
1333 }
1334 /*
1335 * If the number of captured subpatterns on this run is
1336 * less than the total possible number, pad the result
1337 * arrays with NULLs or empty strings.
1338 */
1339 if (count < num_subpats) {
1340 for (int i = count; i < num_subpats; i++) {
1341 if (offset_capture) {
1342 add_offset_pair(
1343 match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1344 NULL, unmatched_as_null);
1345 } else if (unmatched_as_null) {
1346 zval tmp;
1347 ZVAL_NULL(&tmp);
1348 zend_hash_next_index_insert_new(match_sets[i], &tmp);
1349 } else {
1350 zval tmp;
1351 ZVAL_EMPTY_STRING(&tmp);
1352 zend_hash_next_index_insert_new(match_sets[i], &tmp);
1353 }
1354 }
1355 }
1356 } else {
1357 /* Allocate and populate the result set array */
1358 mark = pcre2_get_mark(match_data);
1359 array_init_size(&result_set, count + (mark ? 1 : 0));
1360 populate_subpat_array(
1361 &result_set, subject, offsets, subpat_names,
1362 num_subpats, count, mark, flags);
1363 /* And add it to the output array */
1364 zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &result_set);
1365 }
1366 } else { /* single pattern matching */
1367 /* For each subpattern, insert it into the subpatterns array. */
1368 mark = pcre2_get_mark(match_data);
1369 populate_subpat_array(
1370 subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1371 break;
1372 }
1373 }
1374
1375 /* Advance to the next piece. */
1376 start_offset2 = offsets[1];
1377
1378 /* If we have matched an empty string, mimic what Perl's /g options does.
1379 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1380 the match again at the same point. If this fails (picked up above) we
1381 advance to the next character. */
1382 if (start_offset2 == offsets[0]) {
1383 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1384 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1385 if (count >= 0) {
1386 if (global) {
1387 goto matched;
1388 } else {
1389 break;
1390 }
1391 } else if (count == PCRE2_ERROR_NOMATCH) {
1392 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1393 this is not necessarily the end. We need to advance
1394 the start offset, and continue. Fudge the offset values
1395 to achieve this, unless we're already at the end of the string. */
1396 if (start_offset2 < subject_len) {
1397 size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1398
1399 start_offset2 += unit_len;
1400 } else {
1401 break;
1402 }
1403 } else {
1404 goto error;
1405 }
1406 }
1407 } else if (count == PCRE2_ERROR_NOMATCH) {
1408 break;
1409 } else {
1410 error:
1411 pcre_handle_exec_error(count);
1412 break;
1413 }
1414
1415 if (!global) {
1416 break;
1417 }
1418
1419 /* Execute the regular expression. */
1420 #ifdef HAVE_PCRE_JIT_SUPPORT
1421 if ((pce->preg_options & PREG_JIT)) {
1422 if (start_offset2 > subject_len) {
1423 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1424 break;
1425 }
1426 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1427 PCRE2_NO_UTF_CHECK, match_data, mctx);
1428 } else
1429 #endif
1430 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1431 PCRE2_NO_UTF_CHECK, match_data, mctx);
1432 }
1433 if (match_data != mdata) {
1434 pcre2_match_data_free(match_data);
1435 }
1436
1437 /* Add the match sets to the output array and clean up */
1438 if (match_sets) {
1439 if (subpat_names) {
1440 for (i = 0; i < num_subpats; i++) {
1441 zval wrapper;
1442 ZVAL_ARR(&wrapper, match_sets[i]);
1443 if (subpat_names[i]) {
1444 zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &wrapper);
1445 GC_ADDREF(match_sets[i]);
1446 }
1447 zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1448 }
1449 } else {
1450 for (i = 0; i < num_subpats; i++) {
1451 zval wrapper;
1452 ZVAL_ARR(&wrapper, match_sets[i]);
1453 zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1454 }
1455 }
1456 efree(match_sets);
1457
1458 if (marks) {
1459 zval tmp;
1460 ZVAL_ARR(&tmp, marks);
1461 zend_hash_str_update(Z_ARRVAL_P(subpats), "MARK", sizeof("MARK") - 1, &tmp);
1462 }
1463 }
1464
1465 if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1466 /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1467 if ((pce->compile_options & PCRE2_UTF)
1468 && !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1469 GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1470 }
1471
1472 RETVAL_LONG(matched);
1473 } else {
1474 RETVAL_FALSE;
1475 }
1476 }
1477 /* }}} */
1478
1479 /* {{{ Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)1480 PHP_FUNCTION(preg_match)
1481 {
1482 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
1483 }
1484 /* }}} */
1485
1486 ZEND_FRAMELESS_FUNCTION(preg_match, 2)
1487 {
1488 zval regex_tmp, subject_tmp;
1489 zend_string *regex, *subject;
1490
1491 Z_FLF_PARAM_STR(1, regex, regex_tmp);
1492 Z_FLF_PARAM_STR(2, subject, subject_tmp);
1493
1494 /* Compile regex or get it from cache. */
1495 pcre_cache_entry *pce;
1496 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1497 RETURN_FALSE;
1498 }
1499
1500 pce->refcount++;
1501 php_pcre_match_impl(pce, subject, return_value, /* subpats */ NULL,
1502 /* global */ false, /* flags */ 0, /* start_offset */ 0);
1503 pce->refcount--;
1504
1505 flf_clean:
1506 Z_FLF_PARAM_FREE_STR(1, regex_tmp);
1507 Z_FLF_PARAM_FREE_STR(2, subject_tmp);
1508 }
1509
1510 /* {{{ Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)1511 PHP_FUNCTION(preg_match_all)
1512 {
1513 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
1514 }
1515 /* }}} */
1516
1517 /* {{{ preg_get_backref */
preg_get_backref(char ** str,int * backref)1518 static int preg_get_backref(char **str, int *backref)
1519 {
1520 char in_brace = 0;
1521 char *walk = *str;
1522
1523 if (walk[1] == 0)
1524 return 0;
1525
1526 if (*walk == '$' && walk[1] == '{') {
1527 in_brace = 1;
1528 walk++;
1529 }
1530 walk++;
1531
1532 if (*walk >= '0' && *walk <= '9') {
1533 *backref = *walk - '0';
1534 walk++;
1535 } else
1536 return 0;
1537
1538 if (*walk && *walk >= '0' && *walk <= '9') {
1539 *backref = *backref * 10 + *walk - '0';
1540 walk++;
1541 }
1542
1543 if (in_brace) {
1544 if (*walk != '}')
1545 return 0;
1546 else
1547 walk++;
1548 }
1549
1550 *str = walk;
1551 return 1;
1552 }
1553 /* }}} */
1554
1555 /* {{{ preg_do_repl_func */
preg_do_repl_func(zend_fcall_info * fci,zend_fcall_info_cache * fcc,const char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1556 static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1557 {
1558 zend_string *result_str;
1559 zval retval; /* Function return value */
1560 zval arg; /* Argument to pass to function */
1561
1562 array_init_size(&arg, count + (mark ? 1 : 0));
1563 populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1564
1565 fci->retval = &retval;
1566 fci->param_count = 1;
1567 fci->params = &arg;
1568
1569 if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1570 if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1571 result_str = Z_STR(retval);
1572 } else {
1573 result_str = zval_get_string_func(&retval);
1574 zval_ptr_dtor(&retval);
1575 }
1576 } else {
1577 if (!EG(exception)) {
1578 php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1579 }
1580
1581 result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1582 }
1583
1584 zval_ptr_dtor(&arg);
1585
1586 return result_str;
1587 }
1588 /* }}} */
1589
1590 /* {{{ php_pcre_replace */
php_pcre_replace(zend_string * regex,zend_string * subject_str,const char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1591 PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1592 zend_string *subject_str,
1593 const char *subject, size_t subject_len,
1594 zend_string *replace_str,
1595 size_t limit, size_t *replace_count)
1596 {
1597 pcre_cache_entry *pce; /* Compiled regular expression */
1598 zend_string *result; /* Function result */
1599
1600 /* Abort on pending exception, e.g. thrown from __toString(). */
1601 if (UNEXPECTED(EG(exception))) {
1602 return NULL;
1603 }
1604
1605 /* Compile regex or get it from cache. */
1606 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1607 return NULL;
1608 }
1609 pce->refcount++;
1610 result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1611 limit, replace_count);
1612 pce->refcount--;
1613
1614 return result;
1615 }
1616 /* }}} */
1617
1618 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry * pce,zend_string * subject_str,const char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1619 PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1620 {
1621 uint32_t options; /* Execution options */
1622 int count; /* Count of matched subpatterns */
1623 uint32_t num_subpats; /* Number of captured subpatterns */
1624 size_t new_len; /* Length of needed storage */
1625 size_t alloc_len; /* Actual allocated length */
1626 size_t match_len; /* Length of the current match */
1627 int backref; /* Backreference number */
1628 PCRE2_SIZE start_offset; /* Where the new search starts */
1629 size_t last_end_offset; /* Where the last search ended */
1630 char *walkbuf, /* Location of current replacement in the result */
1631 *walk, /* Used to walk the replacement string */
1632 walk_last; /* Last walked character */
1633 const char *match, /* The current match */
1634 *piece, /* The current piece of subject */
1635 *replace_end; /* End of replacement string */
1636 size_t result_len; /* Length of result */
1637 zend_string *result; /* Result of replacement */
1638 pcre2_match_data *match_data;
1639
1640 /* Calculate the size of the offsets array, and allocate memory for it. */
1641 num_subpats = pce->capture_count + 1;
1642 alloc_len = 0;
1643 result = NULL;
1644
1645 /* Initialize */
1646 match = NULL;
1647 start_offset = 0;
1648 last_end_offset = 0;
1649 result_len = 0;
1650 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1651
1652 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1653 match_data = mdata;
1654 } else {
1655 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1656 if (!match_data) {
1657 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1658 return NULL;
1659 }
1660 }
1661
1662 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1663
1664 /* Array of subpattern offsets */
1665 PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1666
1667 /* Execute the regular expression. */
1668 #ifdef HAVE_PCRE_JIT_SUPPORT
1669 if ((pce->preg_options & PREG_JIT) && options) {
1670 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1671 PCRE2_NO_UTF_CHECK, match_data, mctx);
1672 } else
1673 #endif
1674 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1675 options, match_data, mctx);
1676
1677 while (1) {
1678 piece = subject + last_end_offset;
1679
1680 if (count >= 0 && limit > 0) {
1681 bool simple_string;
1682
1683 /* Check for too many substrings condition. */
1684 if (UNEXPECTED(count == 0)) {
1685 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1686 count = num_subpats;
1687 }
1688
1689 matched:
1690 if (UNEXPECTED(offsets[1] < offsets[0])) {
1691 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1692 if (result) {
1693 zend_string_release_ex(result, 0);
1694 result = NULL;
1695 }
1696 break;
1697 }
1698
1699 if (replace_count) {
1700 ++*replace_count;
1701 }
1702
1703 /* Set the match location in subject */
1704 match = subject + offsets[0];
1705
1706 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1707
1708 walk = ZSTR_VAL(replace_str);
1709 replace_end = walk + ZSTR_LEN(replace_str);
1710 walk_last = 0;
1711 simple_string = 1;
1712 while (walk < replace_end) {
1713 if ('\\' == *walk || '$' == *walk) {
1714 simple_string = 0;
1715 if (walk_last == '\\') {
1716 walk++;
1717 walk_last = 0;
1718 continue;
1719 }
1720 if (preg_get_backref(&walk, &backref)) {
1721 if (backref < count)
1722 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1723 continue;
1724 }
1725 }
1726 new_len++;
1727 walk++;
1728 walk_last = walk[-1];
1729 }
1730
1731 if (new_len >= alloc_len) {
1732 alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1733 if (result == NULL) {
1734 result = zend_string_alloc(alloc_len, 0);
1735 } else {
1736 result = zend_string_extend(result, alloc_len, 0);
1737 }
1738 }
1739
1740 if (match-piece > 0) {
1741 /* copy the part of the string before the match */
1742 memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1743 result_len += (match-piece);
1744 }
1745
1746 if (simple_string) {
1747 /* copy replacement */
1748 memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1749 result_len += ZSTR_LEN(replace_str);
1750 } else {
1751 /* copy replacement and backrefs */
1752 walkbuf = ZSTR_VAL(result) + result_len;
1753
1754 walk = ZSTR_VAL(replace_str);
1755 walk_last = 0;
1756 while (walk < replace_end) {
1757 if ('\\' == *walk || '$' == *walk) {
1758 if (walk_last == '\\') {
1759 *(walkbuf-1) = *walk++;
1760 walk_last = 0;
1761 continue;
1762 }
1763 if (preg_get_backref(&walk, &backref)) {
1764 if (backref < count) {
1765 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1766 walkbuf = zend_mempcpy(walkbuf, subject + offsets[backref << 1], match_len);
1767 }
1768 continue;
1769 }
1770 }
1771 *walkbuf++ = *walk++;
1772 walk_last = walk[-1];
1773 }
1774 *walkbuf = '\0';
1775 /* increment the result length by how much we've added to the string */
1776 result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1777 }
1778
1779 limit--;
1780
1781 /* Advance to the next piece. */
1782 start_offset = last_end_offset = offsets[1];
1783
1784 /* If we have matched an empty string, mimic what Perl's /g options does.
1785 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1786 the match again at the same point. If this fails (picked up above) we
1787 advance to the next character. */
1788 if (start_offset == offsets[0]) {
1789 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1790 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1791
1792 piece = subject + start_offset;
1793 if (count >= 0 && limit > 0) {
1794 goto matched;
1795 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1796 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1797 this is not necessarily the end. We need to advance
1798 the start offset, and continue. Fudge the offset values
1799 to achieve this, unless we're already at the end of the string. */
1800 if (start_offset < subject_len) {
1801 size_t unit_len = calculate_unit_length(pce, piece);
1802 start_offset += unit_len;
1803 } else {
1804 goto not_matched;
1805 }
1806 } else {
1807 goto error;
1808 }
1809 }
1810
1811 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1812 not_matched:
1813 if (!result && subject_str) {
1814 result = zend_string_copy(subject_str);
1815 break;
1816 }
1817 /* now we know exactly how long it is */
1818 alloc_len = result_len + subject_len - last_end_offset;
1819 if (NULL != result) {
1820 result = zend_string_realloc(result, alloc_len, 0);
1821 } else {
1822 result = zend_string_alloc(alloc_len, 0);
1823 }
1824 /* stick that last bit of string on our output */
1825 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1826 result_len += subject_len - last_end_offset;
1827 ZSTR_VAL(result)[result_len] = '\0';
1828 ZSTR_LEN(result) = result_len;
1829 break;
1830 } else {
1831 error:
1832 pcre_handle_exec_error(count);
1833 if (result) {
1834 zend_string_release_ex(result, 0);
1835 result = NULL;
1836 }
1837 break;
1838 }
1839
1840 #ifdef HAVE_PCRE_JIT_SUPPORT
1841 if (pce->preg_options & PREG_JIT) {
1842 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1843 PCRE2_NO_UTF_CHECK, match_data, mctx);
1844 } else
1845 #endif
1846 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1847 PCRE2_NO_UTF_CHECK, match_data, mctx);
1848 }
1849 if (match_data != mdata) {
1850 pcre2_match_data_free(match_data);
1851 }
1852
1853 return result;
1854 }
1855 /* }}} */
1856
1857 /* {{{ php_pcre_replace_func_impl() */
php_pcre_replace_func_impl(pcre_cache_entry * pce,zend_string * subject_str,const char * subject,size_t subject_len,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)1858 static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)
1859 {
1860 uint32_t options; /* Execution options */
1861 int count; /* Count of matched subpatterns */
1862 zend_string **subpat_names; /* Array for named subpatterns */
1863 uint32_t num_subpats; /* Number of captured subpatterns */
1864 size_t new_len; /* Length of needed storage */
1865 size_t alloc_len; /* Actual allocated length */
1866 PCRE2_SIZE start_offset; /* Where the new search starts */
1867 size_t last_end_offset; /* Where the last search ended */
1868 const char *match, /* The current match */
1869 *piece; /* The current piece of subject */
1870 size_t result_len; /* Length of result */
1871 zend_string *result; /* Result of replacement */
1872 zend_string *eval_result; /* Result of custom function */
1873 pcre2_match_data *match_data;
1874 bool old_mdata_used;
1875
1876 /* Calculate the size of the offsets array, and allocate memory for it. */
1877 num_subpats = pce->capture_count + 1;
1878 subpat_names = pce->subpats_table;
1879
1880 alloc_len = 0;
1881 result = NULL;
1882
1883 /* Initialize */
1884 match = NULL;
1885 start_offset = 0;
1886 last_end_offset = 0;
1887 result_len = 0;
1888 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1889
1890 old_mdata_used = mdata_used;
1891 if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1892 mdata_used = 1;
1893 match_data = mdata;
1894 } else {
1895 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1896 if (!match_data) {
1897 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1898 mdata_used = old_mdata_used;
1899 return NULL;
1900 }
1901 }
1902
1903 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1904
1905 /* Array of subpattern offsets */
1906 PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1907
1908 /* Execute the regular expression. */
1909 #ifdef HAVE_PCRE_JIT_SUPPORT
1910 if ((pce->preg_options & PREG_JIT) && options) {
1911 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1912 PCRE2_NO_UTF_CHECK, match_data, mctx);
1913 } else
1914 #endif
1915 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1916 options, match_data, mctx);
1917
1918 while (1) {
1919 piece = subject + last_end_offset;
1920
1921 if (count >= 0 && limit) {
1922 /* Check for too many substrings condition. */
1923 if (UNEXPECTED(count == 0)) {
1924 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1925 count = num_subpats;
1926 }
1927
1928 matched:
1929 if (UNEXPECTED(offsets[1] < offsets[0])) {
1930 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1931 if (result) {
1932 zend_string_release_ex(result, 0);
1933 result = NULL;
1934 }
1935 break;
1936 }
1937
1938 if (replace_count) {
1939 ++*replace_count;
1940 }
1941
1942 /* Set the match location in subject */
1943 match = subject + offsets[0];
1944
1945 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1946
1947 /* Use custom function to get replacement string and its length. */
1948 eval_result = preg_do_repl_func(
1949 fci, fcc, subject, offsets, subpat_names, num_subpats, count,
1950 pcre2_get_mark(match_data), flags);
1951
1952 ZEND_ASSERT(eval_result);
1953 new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD;
1954 if (new_len >= alloc_len) {
1955 alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1956 if (result == NULL) {
1957 result = zend_string_alloc(alloc_len, 0);
1958 } else {
1959 result = zend_string_extend(result, alloc_len, 0);
1960 }
1961 }
1962
1963 if (match-piece > 0) {
1964 /* copy the part of the string before the match */
1965 memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1966 result_len += (match-piece);
1967 }
1968
1969 /* If using custom function, copy result to the buffer and clean up. */
1970 memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1971 result_len += ZSTR_LEN(eval_result);
1972 zend_string_release_ex(eval_result, 0);
1973
1974 limit--;
1975
1976 /* Advance to the next piece. */
1977 start_offset = last_end_offset = offsets[1];
1978
1979 /* If we have matched an empty string, mimic what Perl's /g options does.
1980 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1981 the match again at the same point. If this fails (picked up above) we
1982 advance to the next character. */
1983 if (start_offset == offsets[0]) {
1984 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1985 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1986
1987 piece = subject + start_offset;
1988 if (count >= 0 && limit) {
1989 goto matched;
1990 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1991 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1992 this is not necessarily the end. We need to advance
1993 the start offset, and continue. Fudge the offset values
1994 to achieve this, unless we're already at the end of the string. */
1995 if (start_offset < subject_len) {
1996 size_t unit_len = calculate_unit_length(pce, piece);
1997 start_offset += unit_len;
1998 } else {
1999 goto not_matched;
2000 }
2001 } else {
2002 goto error;
2003 }
2004 }
2005
2006 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2007 not_matched:
2008 if (!result && subject_str) {
2009 result = zend_string_copy(subject_str);
2010 break;
2011 }
2012 /* now we know exactly how long it is */
2013 alloc_len = result_len + subject_len - last_end_offset;
2014 if (NULL != result) {
2015 result = zend_string_realloc(result, alloc_len, 0);
2016 } else {
2017 result = zend_string_alloc(alloc_len, 0);
2018 }
2019 /* stick that last bit of string on our output */
2020 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
2021 result_len += subject_len - last_end_offset;
2022 ZSTR_VAL(result)[result_len] = '\0';
2023 ZSTR_LEN(result) = result_len;
2024 break;
2025 } else {
2026 error:
2027 pcre_handle_exec_error(count);
2028 if (result) {
2029 zend_string_release_ex(result, 0);
2030 result = NULL;
2031 }
2032 break;
2033 }
2034 #ifdef HAVE_PCRE_JIT_SUPPORT
2035 if ((pce->preg_options & PREG_JIT)) {
2036 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2037 PCRE2_NO_UTF_CHECK, match_data, mctx);
2038 } else
2039 #endif
2040 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2041 PCRE2_NO_UTF_CHECK, match_data, mctx);
2042 }
2043 if (match_data != mdata) {
2044 pcre2_match_data_free(match_data);
2045 }
2046 mdata_used = old_mdata_used;
2047
2048 return result;
2049 }
2050 /* }}} */
2051
2052 /* {{{ php_pcre_replace_func */
php_pcre_replace_func(zend_string * regex,zend_string * subject_str,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)2053 static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2054 zend_string *subject_str,
2055 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2056 size_t limit, size_t *replace_count, zend_long flags)
2057 {
2058 pcre_cache_entry *pce; /* Compiled regular expression */
2059 zend_string *result; /* Function result */
2060
2061 /* Compile regex or get it from cache. */
2062 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2063 return NULL;
2064 }
2065 pce->refcount++;
2066 result = php_pcre_replace_func_impl(
2067 pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
2068 limit, replace_count, flags);
2069 pce->refcount--;
2070
2071 return result;
2072 }
2073 /* }}} */
2074
2075 /* {{{ php_pcre_replace_array */
php_pcre_replace_array(HashTable * regex,zend_string * replace_str,HashTable * replace_ht,zend_string * subject_str,size_t limit,size_t * replace_count)2076 static zend_string *php_pcre_replace_array(HashTable *regex,
2077 zend_string *replace_str, HashTable *replace_ht,
2078 zend_string *subject_str, size_t limit, size_t *replace_count)
2079 {
2080 zval *regex_entry;
2081 zend_string *result;
2082
2083 zend_string_addref(subject_str);
2084
2085 if (replace_ht) {
2086 uint32_t replace_idx = 0;
2087
2088 /* For each entry in the regex array, get the entry */
2089 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2090 /* Make sure we're dealing with strings. */
2091 zend_string *tmp_regex_str;
2092 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2093 zend_string *replace_entry_str, *tmp_replace_entry_str;
2094 zval *zv;
2095
2096 /* Get current entry */
2097 while (1) {
2098 if (replace_idx == replace_ht->nNumUsed) {
2099 replace_entry_str = ZSTR_EMPTY_ALLOC();
2100 tmp_replace_entry_str = NULL;
2101 break;
2102 }
2103 zv = ZEND_HASH_ELEMENT(replace_ht, replace_idx);
2104 replace_idx++;
2105 if (Z_TYPE_P(zv) != IS_UNDEF) {
2106 replace_entry_str = zval_get_tmp_string(zv, &tmp_replace_entry_str);
2107 break;
2108 }
2109 }
2110
2111 /* Do the actual replacement and put the result back into subject_str
2112 for further replacements. */
2113 result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2114 ZSTR_LEN(subject_str), replace_entry_str, limit, replace_count);
2115 zend_tmp_string_release(tmp_replace_entry_str);
2116 zend_tmp_string_release(tmp_regex_str);
2117 zend_string_release_ex(subject_str, 0);
2118 subject_str = result;
2119 if (UNEXPECTED(result == NULL)) {
2120 break;
2121 }
2122 } ZEND_HASH_FOREACH_END();
2123
2124 } else {
2125 ZEND_ASSERT(replace_str != NULL);
2126
2127 /* For each entry in the regex array, get the entry */
2128 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2129 /* Make sure we're dealing with strings. */
2130 zend_string *tmp_regex_str;
2131 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2132
2133 /* Do the actual replacement and put the result back into subject_str
2134 for further replacements. */
2135 result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2136 ZSTR_LEN(subject_str), replace_str, limit, replace_count);
2137 zend_tmp_string_release(tmp_regex_str);
2138 zend_string_release_ex(subject_str, 0);
2139 subject_str = result;
2140
2141 if (UNEXPECTED(result == NULL)) {
2142 break;
2143 }
2144 } ZEND_HASH_FOREACH_END();
2145 }
2146
2147 return subject_str;
2148 }
2149 /* }}} */
2150
2151 /* {{{ php_replace_in_subject */
php_replace_in_subject(zend_string * regex_str,HashTable * regex_ht,zend_string * replace_str,HashTable * replace_ht,zend_string * subject,size_t limit,size_t * replace_count)2152 static zend_always_inline zend_string *php_replace_in_subject(
2153 zend_string *regex_str, HashTable *regex_ht,
2154 zend_string *replace_str, HashTable *replace_ht,
2155 zend_string *subject, size_t limit, size_t *replace_count)
2156 {
2157 zend_string *result;
2158
2159 if (regex_str) {
2160 ZEND_ASSERT(replace_str != NULL);
2161 result = php_pcre_replace(regex_str, subject, ZSTR_VAL(subject), ZSTR_LEN(subject),
2162 replace_str, limit, replace_count);
2163 } else {
2164 ZEND_ASSERT(regex_ht != NULL);
2165 result = php_pcre_replace_array(regex_ht, replace_str, replace_ht, subject,
2166 limit, replace_count);
2167 }
2168 return result;
2169 }
2170 /* }}} */
2171
2172 /* {{{ php_replace_in_subject_func */
php_replace_in_subject_func(zend_string * regex_str,HashTable * regex_ht,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zend_string * subject,size_t limit,size_t * replace_count,zend_long flags)2173 static zend_string *php_replace_in_subject_func(zend_string *regex_str, HashTable *regex_ht,
2174 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2175 zend_string *subject, size_t limit, size_t *replace_count, zend_long flags)
2176 {
2177 zend_string *result;
2178
2179 if (regex_str) {
2180 result = php_pcre_replace_func(
2181 regex_str, subject, fci, fcc, limit, replace_count, flags);
2182 return result;
2183 } else {
2184 /* If regex is an array */
2185 zval *regex_entry;
2186
2187 ZEND_ASSERT(regex_ht != NULL);
2188
2189 zend_string_addref(subject);
2190
2191 /* For each entry in the regex array, get the entry */
2192 ZEND_HASH_FOREACH_VAL(regex_ht, regex_entry) {
2193 /* Make sure we're dealing with strings. */
2194 zend_string *tmp_regex_entry_str;
2195 zend_string *regex_entry_str = zval_get_tmp_string(regex_entry, &tmp_regex_entry_str);
2196
2197 /* Do the actual replacement and put the result back into subject
2198 for further replacements. */
2199 result = php_pcre_replace_func(
2200 regex_entry_str, subject, fci, fcc, limit, replace_count, flags);
2201 zend_tmp_string_release(tmp_regex_entry_str);
2202 zend_string_release(subject);
2203 subject = result;
2204 if (UNEXPECTED(result == NULL)) {
2205 break;
2206 }
2207 } ZEND_HASH_FOREACH_END();
2208
2209 return subject;
2210 }
2211 }
2212 /* }}} */
2213
2214 /* {{{ preg_replace_func_impl */
preg_replace_func_impl(zval * return_value,zend_string * regex_str,HashTable * regex_ht,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zend_string * subject_str,HashTable * subject_ht,zend_long limit_val,zend_long flags)2215 static size_t preg_replace_func_impl(zval *return_value,
2216 zend_string *regex_str, HashTable *regex_ht,
2217 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2218 zend_string *subject_str, HashTable *subject_ht, zend_long limit_val, zend_long flags)
2219 {
2220 zend_string *result;
2221 size_t replace_count = 0;
2222
2223 if (subject_str) {
2224 result = php_replace_in_subject_func(
2225 regex_str, regex_ht, fci, fcc, subject_str, limit_val, &replace_count, flags);
2226 if (result != NULL) {
2227 RETVAL_STR(result);
2228 } else {
2229 RETVAL_NULL();
2230 }
2231 } else {
2232 /* if subject is an array */
2233 zval *subject_entry, zv;
2234 zend_string *string_key;
2235 zend_ulong num_key;
2236
2237 ZEND_ASSERT(subject_ht != NULL);
2238
2239 array_init_size(return_value, zend_hash_num_elements(subject_ht));
2240 HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2241
2242 /* For each subject entry, convert it to string, then perform replacement
2243 and add the result to the return_value array. */
2244 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2245 zend_string *tmp_subject_entry_str;
2246 zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2247
2248 result = php_replace_in_subject_func(
2249 regex_str, regex_ht, fci, fcc, subject_entry_str, limit_val, &replace_count, flags);
2250 if (result != NULL) {
2251 /* Add to return array */
2252 ZVAL_STR(&zv, result);
2253 if (string_key) {
2254 zend_hash_add_new(return_value_ht, string_key, &zv);
2255 } else {
2256 zend_hash_index_add_new(return_value_ht, num_key, &zv);
2257 }
2258 }
2259 zend_tmp_string_release(tmp_subject_entry_str);
2260 } ZEND_HASH_FOREACH_END();
2261 }
2262
2263 return replace_count;
2264 }
2265 /* }}} */
2266
_preg_replace_common(zval * return_value,HashTable * regex_ht,zend_string * regex_str,HashTable * replace_ht,zend_string * replace_str,HashTable * subject_ht,zend_string * subject_str,zend_long limit,zval * zcount,bool is_filter)2267 static void _preg_replace_common(
2268 zval *return_value,
2269 HashTable *regex_ht, zend_string *regex_str,
2270 HashTable *replace_ht, zend_string *replace_str,
2271 HashTable *subject_ht, zend_string *subject_str,
2272 zend_long limit,
2273 zval *zcount,
2274 bool is_filter
2275 ) {
2276 size_t replace_count = 0;
2277 zend_string *result;
2278 size_t old_replace_count;
2279
2280 /* If replace is an array then the regex argument needs to also be an array */
2281 if (replace_ht && !regex_ht) {
2282 zend_argument_type_error(1, "must be of type array when argument #2 ($replacement) is an array, string given");
2283 RETURN_THROWS();
2284 }
2285
2286 if (subject_str) {
2287 old_replace_count = replace_count;
2288 result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2289 subject_str, limit, &replace_count);
2290 if (result != NULL) {
2291 if (!is_filter || replace_count > old_replace_count) {
2292 RETVAL_STR(result);
2293 } else {
2294 zend_string_release_ex(result, 0);
2295 RETVAL_NULL();
2296 }
2297 } else {
2298 RETVAL_NULL();
2299 }
2300 } else {
2301 /* if subject is an array */
2302 zval *subject_entry, zv;
2303 zend_string *string_key;
2304 zend_ulong num_key;
2305
2306 ZEND_ASSERT(subject_ht != NULL);
2307
2308 array_init_size(return_value, zend_hash_num_elements(subject_ht));
2309 HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2310
2311 /* For each subject entry, convert it to string, then perform replacement
2312 and add the result to the return_value array. */
2313 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2314 old_replace_count = replace_count;
2315 zend_string *tmp_subject_entry_str;
2316 zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2317 result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2318 subject_entry_str, limit, &replace_count);
2319
2320 if (result != NULL) {
2321 if (!is_filter || replace_count > old_replace_count) {
2322 /* Add to return array */
2323 ZVAL_STR(&zv, result);
2324 if (string_key) {
2325 zend_hash_add_new(return_value_ht, string_key, &zv);
2326 } else {
2327 zend_hash_index_add_new(return_value_ht, num_key, &zv);
2328 }
2329 } else {
2330 zend_string_release_ex(result, 0);
2331 }
2332 }
2333 zend_tmp_string_release(tmp_subject_entry_str);
2334 } ZEND_HASH_FOREACH_END();
2335 }
2336
2337 if (zcount) {
2338 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2339 }
2340 }
2341
2342 /* {{{ preg_replace_common */
preg_replace_common(INTERNAL_FUNCTION_PARAMETERS,bool is_filter)2343 static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter)
2344 {
2345 zend_string *regex_str, *replace_str, *subject_str;
2346 HashTable *regex_ht, *replace_ht, *subject_ht;
2347 zend_long limit = -1;
2348 zval *zcount = NULL;
2349
2350 /* Get function parameters and do error-checking. */
2351 ZEND_PARSE_PARAMETERS_START(3, 5)
2352 Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2353 Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
2354 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2355 Z_PARAM_OPTIONAL
2356 Z_PARAM_LONG(limit)
2357 Z_PARAM_ZVAL(zcount)
2358 ZEND_PARSE_PARAMETERS_END();
2359
2360 _preg_replace_common(
2361 return_value,
2362 regex_ht, regex_str,
2363 replace_ht, replace_str,
2364 subject_ht, subject_str,
2365 limit, zcount, is_filter);
2366 }
2367 /* }}} */
2368
2369 /* {{{ Perform Perl-style regular expression replacement. */
PHP_FUNCTION(preg_replace)2370 PHP_FUNCTION(preg_replace)
2371 {
2372 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
2373 }
2374 /* }}} */
2375
2376 ZEND_FRAMELESS_FUNCTION(preg_replace, 3)
2377 {
2378 zend_string *regex_str, *replace_str, *subject_str;
2379 HashTable *regex_ht, *replace_ht, *subject_ht;
2380 zval regex_tmp, replace_tmp, subject_tmp;
2381
2382 Z_FLF_PARAM_ARRAY_HT_OR_STR(1, regex_ht, regex_str, regex_tmp);
2383 Z_FLF_PARAM_ARRAY_HT_OR_STR(2, replace_ht, replace_str, replace_tmp);
2384 Z_FLF_PARAM_ARRAY_HT_OR_STR(3, subject_ht, subject_str, subject_tmp);
2385
2386 _preg_replace_common(
2387 return_value,
2388 regex_ht, regex_str,
2389 replace_ht, replace_str,
2390 subject_ht, subject_str,
2391 /* limit */ -1, /* zcount */ NULL, /* is_filter */ false);
2392
2393 flf_clean:;
2394 Z_FLF_PARAM_FREE_STR(1, regex_tmp);
2395 Z_FLF_PARAM_FREE_STR(2, replace_tmp);
2396 Z_FLF_PARAM_FREE_STR(3, subject_tmp);
2397 }
2398
2399 /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback)2400 PHP_FUNCTION(preg_replace_callback)
2401 {
2402 zval *zcount = NULL;
2403 zend_string *regex_str;
2404 HashTable *regex_ht;
2405 zend_string *subject_str;
2406 HashTable *subject_ht;
2407 zend_long limit = -1, flags = 0;
2408 size_t replace_count;
2409 zend_fcall_info fci;
2410 zend_fcall_info_cache fcc;
2411
2412 /* Get function parameters and do error-checking. */
2413 ZEND_PARSE_PARAMETERS_START(3, 6)
2414 Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2415 Z_PARAM_FUNC(fci, fcc)
2416 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2417 Z_PARAM_OPTIONAL
2418 Z_PARAM_LONG(limit)
2419 Z_PARAM_ZVAL(zcount)
2420 Z_PARAM_LONG(flags)
2421 ZEND_PARSE_PARAMETERS_END();
2422
2423 replace_count = preg_replace_func_impl(return_value, regex_str, regex_ht,
2424 &fci, &fcc,
2425 subject_str, subject_ht, limit, flags);
2426 if (zcount) {
2427 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2428 }
2429 }
2430 /* }}} */
2431
2432 /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback_array)2433 PHP_FUNCTION(preg_replace_callback_array)
2434 {
2435 zval zv, *replace, *zcount = NULL;
2436 HashTable *pattern, *subject_ht;
2437 zend_string *subject_str, *str_idx_regex;
2438 zend_long limit = -1, flags = 0;
2439 size_t replace_count = 0;
2440 zend_fcall_info fci;
2441 zend_fcall_info_cache fcc;
2442
2443 /* Get function parameters and do error-checking. */
2444 ZEND_PARSE_PARAMETERS_START(2, 5)
2445 Z_PARAM_ARRAY_HT(pattern)
2446 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2447 Z_PARAM_OPTIONAL
2448 Z_PARAM_LONG(limit)
2449 Z_PARAM_ZVAL(zcount)
2450 Z_PARAM_LONG(flags)
2451 ZEND_PARSE_PARAMETERS_END();
2452
2453 fci.size = sizeof(fci);
2454 fci.object = NULL;
2455 fci.named_params = NULL;
2456
2457 if (subject_ht) {
2458 GC_TRY_ADDREF(subject_ht);
2459 } else {
2460 GC_TRY_ADDREF(subject_str);
2461 }
2462
2463 ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) {
2464 if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2465 zend_argument_type_error(1, "must contain only valid callbacks");
2466 goto error;
2467 }
2468 if (!str_idx_regex) {
2469 zend_argument_type_error(1, "must contain only string patterns as keys");
2470 goto error;
2471 }
2472
2473 ZVAL_COPY_VALUE(&fci.function_name, replace);
2474
2475 replace_count += preg_replace_func_impl(&zv, str_idx_regex, /* regex_ht */ NULL, &fci, &fcc,
2476 subject_str, subject_ht, limit, flags);
2477 switch (Z_TYPE(zv)) {
2478 case IS_ARRAY:
2479 ZEND_ASSERT(subject_ht);
2480 zend_array_release(subject_ht);
2481 subject_ht = Z_ARR(zv);
2482 break;
2483 case IS_STRING:
2484 ZEND_ASSERT(subject_str);
2485 zend_string_release(subject_str);
2486 subject_str = Z_STR(zv);
2487 break;
2488 case IS_NULL:
2489 RETVAL_NULL();
2490 goto error;
2491 EMPTY_SWITCH_DEFAULT_CASE()
2492 }
2493
2494 if (EG(exception)) {
2495 goto error;
2496 }
2497 } ZEND_HASH_FOREACH_END();
2498
2499 if (zcount) {
2500 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2501 }
2502
2503 if (subject_ht) {
2504 RETVAL_ARR(subject_ht);
2505 // Unset the type_flags of immutable arrays to prevent the VM from performing refcounting
2506 if (GC_FLAGS(subject_ht) & IS_ARRAY_IMMUTABLE) {
2507 Z_TYPE_FLAGS_P(return_value) = 0;
2508 }
2509 return;
2510 } else {
2511 RETURN_STR(subject_str);
2512 }
2513
2514 error:
2515 if (subject_ht) {
2516 zend_array_release(subject_ht);
2517 } else {
2518 zend_string_release(subject_str);
2519 }
2520 }
2521 /* }}} */
2522
2523 /* {{{ Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTION(preg_filter)2524 PHP_FUNCTION(preg_filter)
2525 {
2526 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
2527 }
2528 /* }}} */
2529
2530 /* {{{ Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)2531 PHP_FUNCTION(preg_split)
2532 {
2533 zend_string *regex; /* Regular expression */
2534 zend_string *subject; /* String to match against */
2535 zend_long limit_val = -1;/* Integer value of limit */
2536 zend_long flags = 0; /* Match control flags */
2537 pcre_cache_entry *pce; /* Compiled regular expression */
2538
2539 /* Get function parameters and do error checking */
2540 ZEND_PARSE_PARAMETERS_START(2, 4)
2541 Z_PARAM_STR(regex)
2542 Z_PARAM_STR(subject)
2543 Z_PARAM_OPTIONAL
2544 Z_PARAM_LONG(limit_val)
2545 Z_PARAM_LONG(flags)
2546 ZEND_PARSE_PARAMETERS_END();
2547
2548 /* Compile regex or get it from cache. */
2549 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2550 RETURN_FALSE;
2551 }
2552
2553 pce->refcount++;
2554 php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2555 pce->refcount--;
2556 }
2557 /* }}} */
2558
2559 /* {{{ php_pcre_split */
php_pcre_split_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zend_long limit_val,zend_long flags)2560 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2561 zend_long limit_val, zend_long flags)
2562 {
2563 uint32_t options; /* Execution options */
2564 int count; /* Count of matched subpatterns */
2565 PCRE2_SIZE start_offset; /* Where the new search starts */
2566 PCRE2_SIZE last_match_offset; /* Location of last match */
2567 uint32_t no_empty; /* If NO_EMPTY flag is set */
2568 uint32_t delim_capture; /* If delimiters should be captured */
2569 uint32_t offset_capture; /* If offsets should be captured */
2570 uint32_t num_subpats; /* Number of captured subpatterns */
2571 zval tmp;
2572 pcre2_match_data *match_data;
2573 char *subject = ZSTR_VAL(subject_str);
2574
2575 no_empty = flags & PREG_SPLIT_NO_EMPTY;
2576 delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2577 offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2578
2579 /* Initialize return value */
2580 array_init(return_value);
2581 HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2582
2583 /* Calculate the size of the offsets array, and allocate memory for it. */
2584 num_subpats = pce->capture_count + 1;
2585
2586 /* Start at the beginning of the string */
2587 start_offset = 0;
2588 last_match_offset = 0;
2589 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2590
2591 if (limit_val == -1) {
2592 /* pass */
2593 } else if (limit_val == 0) {
2594 limit_val = -1;
2595 } else if (limit_val <= 1) {
2596 goto last;
2597 }
2598
2599 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2600 match_data = mdata;
2601 } else {
2602 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2603 if (!match_data) {
2604 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2605 zval_ptr_dtor(return_value);
2606 RETURN_FALSE;
2607 }
2608 }
2609
2610 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2611
2612 /* Array of subpattern offsets */
2613 PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
2614
2615 #ifdef HAVE_PCRE_JIT_SUPPORT
2616 if ((pce->preg_options & PREG_JIT) && options) {
2617 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2618 PCRE2_NO_UTF_CHECK, match_data, mctx);
2619 } else
2620 #endif
2621 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2622 options, match_data, mctx);
2623
2624 while (1) {
2625 /* If something matched */
2626 if (count >= 0) {
2627 /* Check for too many substrings condition. */
2628 if (UNEXPECTED(count == 0)) {
2629 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2630 count = num_subpats;
2631 }
2632
2633 matched:
2634 if (UNEXPECTED(offsets[1] < offsets[0])) {
2635 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2636 break;
2637 }
2638
2639 if (!no_empty || offsets[0] != last_match_offset) {
2640 if (offset_capture) {
2641 /* Add (match, offset) pair to the return value */
2642 add_offset_pair(
2643 return_value_ht, subject, last_match_offset, offsets[0],
2644 NULL, 0);
2645 } else {
2646 /* Add the piece to the return value */
2647 populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2648 zend_hash_next_index_insert_new(return_value_ht, &tmp);
2649 }
2650
2651 /* One less left to do */
2652 if (limit_val != -1)
2653 limit_val--;
2654 }
2655
2656 if (delim_capture) {
2657 size_t i;
2658 for (i = 1; i < count; i++) {
2659 /* If we have matched a delimiter */
2660 if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2661 if (offset_capture) {
2662 add_offset_pair(
2663 return_value_ht, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2664 } else {
2665 populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2666 zend_hash_next_index_insert_new(return_value_ht, &tmp);
2667 }
2668 }
2669 }
2670 }
2671
2672 /* Advance to the position right after the last full match */
2673 start_offset = last_match_offset = offsets[1];
2674
2675 /* If we have matched an empty string, mimic what Perl's /g options does.
2676 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2677 the match again at the same point. If this fails (picked up above) we
2678 advance to the next character. */
2679 if (start_offset == offsets[0]) {
2680 /* Get next piece if no limit or limit not yet reached and something matched*/
2681 if (limit_val != -1 && limit_val <= 1) {
2682 break;
2683 }
2684 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2685 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2686 if (count >= 0) {
2687 goto matched;
2688 } else if (count == PCRE2_ERROR_NOMATCH) {
2689 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2690 this is not necessarily the end. We need to advance
2691 the start offset, and continue. Fudge the offset values
2692 to achieve this, unless we're already at the end of the string. */
2693 if (start_offset < ZSTR_LEN(subject_str)) {
2694 start_offset += calculate_unit_length(pce, subject + start_offset);
2695 } else {
2696 break;
2697 }
2698 } else {
2699 goto error;
2700 }
2701 }
2702
2703 } else if (count == PCRE2_ERROR_NOMATCH) {
2704 break;
2705 } else {
2706 error:
2707 pcre_handle_exec_error(count);
2708 break;
2709 }
2710
2711 /* Get next piece if no limit or limit not yet reached and something matched*/
2712 if (limit_val != -1 && limit_val <= 1) {
2713 break;
2714 }
2715
2716 #ifdef HAVE_PCRE_JIT_SUPPORT
2717 if (pce->preg_options & PREG_JIT) {
2718 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2719 PCRE2_NO_UTF_CHECK, match_data, mctx);
2720 } else
2721 #endif
2722 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2723 PCRE2_NO_UTF_CHECK, match_data, mctx);
2724 }
2725 if (match_data != mdata) {
2726 pcre2_match_data_free(match_data);
2727 }
2728
2729 if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2730 zval_ptr_dtor(return_value);
2731 RETURN_FALSE;
2732 }
2733
2734 last:
2735 start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2736
2737 if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2738 if (offset_capture) {
2739 /* Add the last (match, offset) pair to the return value */
2740 add_offset_pair(return_value_ht, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2741 } else {
2742 /* Add the last piece to the return value */
2743 if (start_offset == 0) {
2744 ZVAL_STR_COPY(&tmp, subject_str);
2745 } else {
2746 populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2747 }
2748 zend_hash_next_index_insert_new(return_value_ht, &tmp);
2749 }
2750 }
2751 }
2752 /* }}} */
2753
2754 /* {{{ Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)2755 PHP_FUNCTION(preg_quote)
2756 {
2757 zend_string *str; /* Input string argument */
2758 zend_string *delim = NULL; /* Additional delimiter argument */
2759 char *in_str; /* Input string */
2760 char *in_str_end; /* End of the input string */
2761 zend_string *out_str; /* Output string with quoted characters */
2762 size_t extra_len; /* Number of additional characters */
2763 char *p, /* Iterator for input string */
2764 *q, /* Iterator for output string */
2765 delim_char = '\0', /* Delimiter character to be quoted */
2766 c; /* Current character */
2767
2768 /* Get the arguments and check for errors */
2769 ZEND_PARSE_PARAMETERS_START(1, 2)
2770 Z_PARAM_STR(str)
2771 Z_PARAM_OPTIONAL
2772 Z_PARAM_STR_OR_NULL(delim)
2773 ZEND_PARSE_PARAMETERS_END();
2774
2775 /* Nothing to do if we got an empty string */
2776 if (ZSTR_LEN(str) == 0) {
2777 RETURN_EMPTY_STRING();
2778 }
2779
2780 in_str = ZSTR_VAL(str);
2781 in_str_end = in_str + ZSTR_LEN(str);
2782
2783 if (delim) {
2784 delim_char = ZSTR_VAL(delim)[0];
2785 }
2786
2787 /* Go through the string and quote necessary characters */
2788 extra_len = 0;
2789 p = in_str;
2790 do {
2791 c = *p;
2792 switch(c) {
2793 case '.':
2794 case '\\':
2795 case '+':
2796 case '*':
2797 case '?':
2798 case '[':
2799 case '^':
2800 case ']':
2801 case '$':
2802 case '(':
2803 case ')':
2804 case '{':
2805 case '}':
2806 case '=':
2807 case '!':
2808 case '>':
2809 case '<':
2810 case '|':
2811 case ':':
2812 case '-':
2813 case '#':
2814 extra_len++;
2815 break;
2816
2817 case '\0':
2818 extra_len+=3;
2819 break;
2820
2821 default:
2822 if (c == delim_char) {
2823 extra_len++;
2824 }
2825 break;
2826 }
2827 p++;
2828 } while (p != in_str_end);
2829
2830 if (extra_len == 0) {
2831 RETURN_STR_COPY(str);
2832 }
2833
2834 /* Allocate enough memory so that even if each character
2835 is quoted, we won't run out of room */
2836 out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2837 q = ZSTR_VAL(out_str);
2838 p = in_str;
2839
2840 do {
2841 c = *p;
2842 switch(c) {
2843 case '.':
2844 case '\\':
2845 case '+':
2846 case '*':
2847 case '?':
2848 case '[':
2849 case '^':
2850 case ']':
2851 case '$':
2852 case '(':
2853 case ')':
2854 case '{':
2855 case '}':
2856 case '=':
2857 case '!':
2858 case '>':
2859 case '<':
2860 case '|':
2861 case ':':
2862 case '-':
2863 case '#':
2864 *q++ = '\\';
2865 *q++ = c;
2866 break;
2867
2868 case '\0':
2869 *q++ = '\\';
2870 *q++ = '0';
2871 *q++ = '0';
2872 *q++ = '0';
2873 break;
2874
2875 default:
2876 if (c == delim_char) {
2877 *q++ = '\\';
2878 }
2879 *q++ = c;
2880 break;
2881 }
2882 p++;
2883 } while (p != in_str_end);
2884 *q = '\0';
2885
2886 RETURN_NEW_STR(out_str);
2887 }
2888 /* }}} */
2889
2890 /* {{{ Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)2891 PHP_FUNCTION(preg_grep)
2892 {
2893 zend_string *regex; /* Regular expression */
2894 zval *input; /* Input array */
2895 zend_long flags = 0; /* Match control flags */
2896 pcre_cache_entry *pce; /* Compiled regular expression */
2897
2898 /* Get arguments and do error checking */
2899 ZEND_PARSE_PARAMETERS_START(2, 3)
2900 Z_PARAM_STR(regex)
2901 Z_PARAM_ARRAY(input)
2902 Z_PARAM_OPTIONAL
2903 Z_PARAM_LONG(flags)
2904 ZEND_PARSE_PARAMETERS_END();
2905
2906 /* Compile regex or get it from cache. */
2907 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2908 RETURN_FALSE;
2909 }
2910
2911 pce->refcount++;
2912 php_pcre_grep_impl(pce, input, return_value, flags);
2913 pce->refcount--;
2914 }
2915 /* }}} */
2916
php_pcre_grep_impl(pcre_cache_entry * pce,zval * input,zval * return_value,zend_long flags)2917 PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2918 {
2919 zval *entry; /* An entry in the input array */
2920 uint32_t num_subpats; /* Number of captured subpatterns */
2921 int count; /* Count of matched subpatterns */
2922 uint32_t options; /* Execution options */
2923 zend_string *string_key;
2924 zend_ulong num_key;
2925 bool invert; /* Whether to return non-matching
2926 entries */
2927 pcre2_match_data *match_data;
2928 invert = flags & PREG_GREP_INVERT ? 1 : 0;
2929
2930 /* Calculate the size of the offsets array, and allocate memory for it. */
2931 num_subpats = pce->capture_count + 1;
2932
2933 /* Initialize return array */
2934 array_init(return_value);
2935 HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2936
2937 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2938
2939 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2940 match_data = mdata;
2941 } else {
2942 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2943 if (!match_data) {
2944 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2945 return;
2946 }
2947 }
2948
2949 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2950
2951 /* Go through the input array */
2952 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2953 zend_string *tmp_subject_str;
2954 zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2955
2956 /* Perform the match */
2957 #ifdef HAVE_PCRE_JIT_SUPPORT
2958 if ((pce->preg_options & PREG_JIT) && options) {
2959 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2960 PCRE2_NO_UTF_CHECK, match_data, mctx);
2961 } else
2962 #endif
2963 count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2964 options, match_data, mctx);
2965
2966 /* If the entry fits our requirements */
2967 if (count >= 0) {
2968 /* Check for too many substrings condition. */
2969 if (UNEXPECTED(count == 0)) {
2970 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2971 }
2972 if (!invert) {
2973 Z_TRY_ADDREF_P(entry);
2974
2975 /* Add to return array */
2976 if (string_key) {
2977 zend_hash_update(return_value_ht, string_key, entry);
2978 } else {
2979 zend_hash_index_update(return_value_ht, num_key, entry);
2980 }
2981 }
2982 } else if (count == PCRE2_ERROR_NOMATCH) {
2983 if (invert) {
2984 Z_TRY_ADDREF_P(entry);
2985
2986 /* Add to return array */
2987 if (string_key) {
2988 zend_hash_update(return_value_ht, string_key, entry);
2989 } else {
2990 zend_hash_index_update(return_value_ht, num_key, entry);
2991 }
2992 }
2993 } else {
2994 pcre_handle_exec_error(count);
2995 zend_tmp_string_release(tmp_subject_str);
2996 break;
2997 }
2998
2999 zend_tmp_string_release(tmp_subject_str);
3000 } ZEND_HASH_FOREACH_END();
3001 if (match_data != mdata) {
3002 pcre2_match_data_free(match_data);
3003 }
3004 }
3005 /* }}} */
3006
3007 /* {{{ Returns the error code of the last regexp execution. */
PHP_FUNCTION(preg_last_error)3008 PHP_FUNCTION(preg_last_error)
3009 {
3010 ZEND_PARSE_PARAMETERS_NONE();
3011
3012 RETURN_LONG(PCRE_G(error_code));
3013 }
3014 /* }}} */
3015
3016 /* {{{ Returns the error message of the last regexp execution. */
PHP_FUNCTION(preg_last_error_msg)3017 PHP_FUNCTION(preg_last_error_msg)
3018 {
3019 ZEND_PARSE_PARAMETERS_NONE();
3020
3021 RETURN_STRING(php_pcre_get_error_msg(PCRE_G(error_code)));
3022 }
3023 /* }}} */
3024
3025 /* {{{ module definition structures */
3026
3027 zend_module_entry pcre_module_entry = {
3028 STANDARD_MODULE_HEADER,
3029 "pcre",
3030 ext_functions,
3031 PHP_MINIT(pcre),
3032 PHP_MSHUTDOWN(pcre),
3033 PHP_RINIT(pcre),
3034 PHP_RSHUTDOWN(pcre),
3035 PHP_MINFO(pcre),
3036 PHP_PCRE_VERSION,
3037 PHP_MODULE_GLOBALS(pcre),
3038 PHP_GINIT(pcre),
3039 PHP_GSHUTDOWN(pcre),
3040 NULL,
3041 STANDARD_MODULE_PROPERTIES_EX
3042 };
3043
3044 #ifdef COMPILE_DL_PCRE
ZEND_GET_MODULE(pcre)3045 ZEND_GET_MODULE(pcre)
3046 #endif
3047
3048 /* }}} */
3049
3050 PHPAPI pcre2_match_context *php_pcre_mctx(void)
3051 {/*{{{*/
3052 return mctx;
3053 }/*}}}*/
3054
php_pcre_gctx(void)3055 PHPAPI pcre2_general_context *php_pcre_gctx(void)
3056 {/*{{{*/
3057 return gctx;
3058 }/*}}}*/
3059
php_pcre_cctx(void)3060 PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3061 {/*{{{*/
3062 return cctx;
3063 }/*}}}*/
3064
php_pcre_pce_incref(pcre_cache_entry * pce)3065 PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3066 {/*{{{*/
3067 assert(NULL != pce);
3068 pce->refcount++;
3069 }/*}}}*/
3070
php_pcre_pce_decref(pcre_cache_entry * pce)3071 PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3072 {/*{{{*/
3073 assert(NULL != pce);
3074 assert(0 != pce->refcount);
3075 pce->refcount--;
3076 }/*}}}*/
3077
php_pcre_pce_re(pcre_cache_entry * pce)3078 PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3079 {/*{{{*/
3080 assert(NULL != pce);
3081 return pce->re;
3082 }/*}}}*/
3083