1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Author: Andrei Zmievski <andrei@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #include "php.h"
18 #include "php_ini.h"
19 #include "php_globals.h"
20 #include "php_pcre.h"
21 #include "ext/standard/info.h"
22 #include "ext/standard/basic_functions.h"
23 #include "zend_smart_str.h"
24 #include "SAPI.h"
25
26 #include "ext/standard/php_string.h"
27
28 #define PREG_PATTERN_ORDER 1
29 #define PREG_SET_ORDER 2
30 #define PREG_OFFSET_CAPTURE (1<<8)
31 #define PREG_UNMATCHED_AS_NULL (1<<9)
32
33 #define PREG_SPLIT_NO_EMPTY (1<<0)
34 #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
35 #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
36
37 #define PREG_GREP_INVERT (1<<0)
38
39 #define PREG_JIT (1<<3)
40
41 #define PCRE_CACHE_SIZE 4096
42
43 #ifdef HAVE_PCRE_JIT_SUPPORT
44 #define PHP_PCRE_JIT_SUPPORT 1
45 #else
46 #define PHP_PCRE_JIT_SUPPORT 0
47 #endif
48
49 char *php_pcre_version;
50
51 #include "php_pcre_arginfo.h"
52
53 struct _pcre_cache_entry {
54 pcre2_code *re;
55 uint32_t preg_options;
56 uint32_t capture_count;
57 uint32_t name_count;
58 uint32_t compile_options;
59 uint32_t refcount;
60 };
61
62 PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
63
64 #ifdef HAVE_PCRE_JIT_SUPPORT
65 #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
66 #define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
67 ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
68 #endif
69 /* General context using (infallible) system allocator. */
70 ZEND_TLS pcre2_general_context *gctx = NULL;
71 /* These two are global per thread for now. Though it is possible to use these
72 per pattern. Either one can copy it and use in pce, or one does no global
73 contexts at all, but creates for every pce. */
74 ZEND_TLS pcre2_compile_context *cctx = NULL;
75 ZEND_TLS pcre2_match_context *mctx = NULL;
76 ZEND_TLS pcre2_match_data *mdata = NULL;
77 ZEND_TLS bool mdata_used = 0;
78 ZEND_TLS uint8_t pcre2_init_ok = 0;
79 #if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
80 static MUTEX_T pcre_mt = NULL;
81 #define php_pcre_mutex_alloc() \
82 if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
83 #define php_pcre_mutex_free() \
84 if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
85 #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
86 #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
87 #else
88 #define php_pcre_mutex_alloc()
89 #define php_pcre_mutex_free()
90 #define php_pcre_mutex_lock()
91 #define php_pcre_mutex_unlock()
92 #endif
93
94 ZEND_TLS HashTable char_tables;
95
php_pcre_free_char_table(zval * data)96 static void php_pcre_free_char_table(zval *data)
97 {/*{{{*/
98 void *ptr = Z_PTR_P(data);
99 pefree(ptr, 1);
100 }/*}}}*/
101
pcre_handle_exec_error(int pcre_code)102 static void pcre_handle_exec_error(int pcre_code) /* {{{ */
103 {
104 int preg_code = 0;
105
106 switch (pcre_code) {
107 case PCRE2_ERROR_MATCHLIMIT:
108 preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
109 break;
110
111 case PCRE2_ERROR_RECURSIONLIMIT:
112 preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
113 break;
114
115 case PCRE2_ERROR_BADUTFOFFSET:
116 preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
117 break;
118
119 #ifdef HAVE_PCRE_JIT_SUPPORT
120 case PCRE2_ERROR_JIT_STACKLIMIT:
121 preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
122 break;
123 #endif
124
125 default:
126 if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
127 preg_code = PHP_PCRE_BAD_UTF8_ERROR;
128 } else {
129 preg_code = PHP_PCRE_INTERNAL_ERROR;
130 }
131 break;
132 }
133
134 PCRE_G(error_code) = preg_code;
135 }
136 /* }}} */
137
php_pcre_get_error_msg(php_pcre_error_code error_code)138 static const char *php_pcre_get_error_msg(php_pcre_error_code error_code) /* {{{ */
139 {
140 switch (error_code) {
141 case PHP_PCRE_NO_ERROR:
142 return "No error";
143 case PHP_PCRE_INTERNAL_ERROR:
144 return "Internal error";
145 case PHP_PCRE_BAD_UTF8_ERROR:
146 return "Malformed UTF-8 characters, possibly incorrectly encoded";
147 case PHP_PCRE_BAD_UTF8_OFFSET_ERROR:
148 return "The offset did not correspond to the beginning of a valid UTF-8 code point";
149 case PHP_PCRE_BACKTRACK_LIMIT_ERROR:
150 return "Backtrack limit exhausted";
151 case PHP_PCRE_RECURSION_LIMIT_ERROR:
152 return "Recursion limit exhausted";
153
154 #ifdef HAVE_PCRE_JIT_SUPPORT
155 case PHP_PCRE_JIT_STACKLIMIT_ERROR:
156 return "JIT stack limit exhausted";
157 #endif
158
159 default:
160 return "Unknown error";
161 }
162 }
163 /* }}} */
164
php_free_pcre_cache(zval * data)165 static void php_free_pcre_cache(zval *data) /* {{{ */
166 {
167 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
168 if (!pce) return;
169 pcre2_code_free(pce->re);
170 free(pce);
171 }
172 /* }}} */
173
php_efree_pcre_cache(zval * data)174 static void php_efree_pcre_cache(zval *data) /* {{{ */
175 {
176 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
177 if (!pce) return;
178 pcre2_code_free(pce->re);
179 efree(pce);
180 }
181 /* }}} */
182
php_pcre_malloc(PCRE2_SIZE size,void * data)183 static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
184 {
185 return pemalloc(size, 1);
186 }
187
php_pcre_free(void * block,void * data)188 static void php_pcre_free(void *block, void *data)
189 {
190 pefree(block, 1);
191 }
192
php_pcre_emalloc(PCRE2_SIZE size,void * data)193 static void *php_pcre_emalloc(PCRE2_SIZE size, void *data)
194 {
195 return emalloc(size);
196 }
197
php_pcre_efree(void * block,void * data)198 static void php_pcre_efree(void *block, void *data)
199 {
200 efree(block);
201 }
202
203 #ifdef PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
204 /* pcre 10.38 needs PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, disabled by default */
205 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
206 #else
207 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS 0
208 #endif
209
210 #define PHP_PCRE_PREALLOC_MDATA_SIZE 32
211
php_pcre_init_pcre2(uint8_t jit)212 static void php_pcre_init_pcre2(uint8_t jit)
213 {/*{{{*/
214 if (!gctx) {
215 gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
216 if (!gctx) {
217 pcre2_init_ok = 0;
218 return;
219 }
220 }
221
222 if (!cctx) {
223 cctx = pcre2_compile_context_create(gctx);
224 if (!cctx) {
225 pcre2_init_ok = 0;
226 return;
227 }
228 }
229
230 pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
231
232 if (!mctx) {
233 mctx = pcre2_match_context_create(gctx);
234 if (!mctx) {
235 pcre2_init_ok = 0;
236 return;
237 }
238 }
239
240 #ifdef HAVE_PCRE_JIT_SUPPORT
241 if (jit && !jit_stack) {
242 jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
243 if (!jit_stack) {
244 pcre2_init_ok = 0;
245 return;
246 }
247 }
248 #endif
249
250 if (!mdata) {
251 mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
252 if (!mdata) {
253 pcre2_init_ok = 0;
254 return;
255 }
256 }
257
258 pcre2_init_ok = 1;
259 }/*}}}*/
260
php_pcre_shutdown_pcre2(void)261 static void php_pcre_shutdown_pcre2(void)
262 {/*{{{*/
263 if (gctx) {
264 pcre2_general_context_free(gctx);
265 gctx = NULL;
266 }
267
268 if (cctx) {
269 pcre2_compile_context_free(cctx);
270 cctx = NULL;
271 }
272
273 if (mctx) {
274 pcre2_match_context_free(mctx);
275 mctx = NULL;
276 }
277
278 #ifdef HAVE_PCRE_JIT_SUPPORT
279 /* Stack may only be destroyed when no cached patterns
280 possibly associated with it do exist. */
281 if (jit_stack) {
282 pcre2_jit_stack_free(jit_stack);
283 jit_stack = NULL;
284 }
285 #endif
286
287 if (mdata) {
288 pcre2_match_data_free(mdata);
289 mdata = NULL;
290 }
291
292 pcre2_init_ok = 0;
293 }/*}}}*/
294
PHP_GINIT_FUNCTION(pcre)295 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
296 {
297 php_pcre_mutex_alloc();
298
299 /* If we're on the CLI SAPI, there will only be one request, so we don't need the
300 * cache to survive after RSHUTDOWN. */
301 pcre_globals->per_request_cache = strcmp(sapi_module.name, "cli") == 0;
302 if (!pcre_globals->per_request_cache) {
303 zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
304 }
305
306 pcre_globals->backtrack_limit = 0;
307 pcre_globals->recursion_limit = 0;
308 pcre_globals->error_code = PHP_PCRE_NO_ERROR;
309 ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
310 ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
311 #ifdef HAVE_PCRE_JIT_SUPPORT
312 pcre_globals->jit = 1;
313 #endif
314
315 php_pcre_init_pcre2(1);
316 zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
317 }
318 /* }}} */
319
PHP_GSHUTDOWN_FUNCTION(pcre)320 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
321 {
322 if (!pcre_globals->per_request_cache) {
323 zend_hash_destroy(&pcre_globals->pcre_cache);
324 }
325
326 php_pcre_shutdown_pcre2();
327 zend_hash_destroy(&char_tables);
328 php_pcre_mutex_free();
329 }
330 /* }}} */
331
PHP_INI_MH(OnUpdateBacktrackLimit)332 static PHP_INI_MH(OnUpdateBacktrackLimit)
333 {/*{{{*/
334 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
335 if (mctx) {
336 pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
337 }
338
339 return SUCCESS;
340 }/*}}}*/
341
PHP_INI_MH(OnUpdateRecursionLimit)342 static PHP_INI_MH(OnUpdateRecursionLimit)
343 {/*{{{*/
344 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
345 if (mctx) {
346 pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
347 }
348
349 return SUCCESS;
350 }/*}}}*/
351
352 #ifdef HAVE_PCRE_JIT_SUPPORT
PHP_INI_MH(OnUpdateJit)353 static PHP_INI_MH(OnUpdateJit)
354 {/*{{{*/
355 OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
356 if (PCRE_G(jit) && jit_stack) {
357 pcre2_jit_stack_assign(mctx, NULL, jit_stack);
358 } else {
359 pcre2_jit_stack_assign(mctx, NULL, NULL);
360 }
361
362 return SUCCESS;
363 }/*}}}*/
364 #endif
365
366 PHP_INI_BEGIN()
367 STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
368 STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
369 #ifdef HAVE_PCRE_JIT_SUPPORT
370 STD_PHP_INI_BOOLEAN("pcre.jit", "1", PHP_INI_ALL, OnUpdateJit, jit, zend_pcre_globals, pcre_globals)
371 #endif
PHP_INI_END()372 PHP_INI_END()
373
374 static char *_pcre2_config_str(uint32_t what)
375 {/*{{{*/
376 int len = pcre2_config(what, NULL);
377 char *ret = (char *) malloc(len + 1);
378
379 len = pcre2_config(what, ret);
380 if (!len) {
381 free(ret);
382 return NULL;
383 }
384
385 return ret;
386 }/*}}}*/
387
388 /* {{{ PHP_MINFO_FUNCTION(pcre) */
PHP_MINFO_FUNCTION(pcre)389 static PHP_MINFO_FUNCTION(pcre)
390 {
391 #ifdef HAVE_PCRE_JIT_SUPPORT
392 uint32_t flag = 0;
393 char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
394 #endif
395 char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
396 char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
397
398 php_info_print_table_start();
399 php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
400 php_info_print_table_row(2, "PCRE Library Version", version);
401 free(version);
402 php_info_print_table_row(2, "PCRE Unicode Version", unicode);
403 free(unicode);
404
405 #ifdef HAVE_PCRE_JIT_SUPPORT
406 if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
407 php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
408 } else {
409 php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
410 }
411 if (jit_target) {
412 php_info_print_table_row(2, "PCRE JIT Target", jit_target);
413 }
414 free(jit_target);
415 #else
416 php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
417 #endif
418
419 #ifdef HAVE_PCRE_VALGRIND_SUPPORT
420 php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
421 #endif
422
423 php_info_print_table_end();
424
425 DISPLAY_INI_ENTRIES();
426 }
427 /* }}} */
428
429 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTION(pcre)430 static PHP_MINIT_FUNCTION(pcre)
431 {
432 #ifdef HAVE_PCRE_JIT_SUPPORT
433 if (UNEXPECTED(!pcre2_init_ok)) {
434 /* Retry. */
435 php_pcre_init_pcre2(PCRE_G(jit));
436 if (!pcre2_init_ok) {
437 return FAILURE;
438 }
439 }
440 #endif
441
442 REGISTER_INI_ENTRIES();
443
444 php_pcre_version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
445
446 register_php_pcre_symbols(module_number);
447
448 return SUCCESS;
449 }
450 /* }}} */
451
452 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTION(pcre)453 static PHP_MSHUTDOWN_FUNCTION(pcre)
454 {
455 UNREGISTER_INI_ENTRIES();
456
457 free(php_pcre_version);
458
459 return SUCCESS;
460 }
461 /* }}} */
462
463 /* {{{ PHP_RINIT_FUNCTION(pcre) */
PHP_RINIT_FUNCTION(pcre)464 static PHP_RINIT_FUNCTION(pcre)
465 {
466 #ifdef HAVE_PCRE_JIT_SUPPORT
467 if (UNEXPECTED(!pcre2_init_ok)) {
468 /* Retry. */
469 php_pcre_mutex_lock();
470 php_pcre_init_pcre2(PCRE_G(jit));
471 if (!pcre2_init_ok) {
472 php_pcre_mutex_unlock();
473 return FAILURE;
474 }
475 php_pcre_mutex_unlock();
476 }
477
478 mdata_used = 0;
479 #endif
480
481 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
482 PCRE_G(gctx_zmm) = pcre2_general_context_create(php_pcre_emalloc, php_pcre_efree, NULL);
483 if (!PCRE_G(gctx_zmm)) {
484 return FAILURE;
485 }
486
487 if (PCRE_G(per_request_cache)) {
488 zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, php_efree_pcre_cache, 0);
489 }
490
491 return SUCCESS;
492 }
493 /* }}} */
494
PHP_RSHUTDOWN_FUNCTION(pcre)495 static PHP_RSHUTDOWN_FUNCTION(pcre)
496 {
497 pcre2_general_context_free(PCRE_G(gctx_zmm));
498 PCRE_G(gctx_zmm) = NULL;
499
500 if (PCRE_G(per_request_cache)) {
501 zend_hash_destroy(&PCRE_G(pcre_cache));
502 }
503
504 zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
505 zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
506 ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
507 ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
508 return SUCCESS;
509 }
510
511 /* {{{ static pcre_clean_cache */
pcre_clean_cache(zval * data,void * arg)512 static int pcre_clean_cache(zval *data, void *arg)
513 {
514 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
515 int *num_clean = (int *)arg;
516
517 if (*num_clean > 0 && !pce->refcount) {
518 (*num_clean)--;
519 return ZEND_HASH_APPLY_REMOVE;
520 } else {
521 return ZEND_HASH_APPLY_KEEP;
522 }
523 }
524 /* }}} */
525
free_subpats_table(zend_string ** subpat_names,uint32_t num_subpats)526 static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
527 uint32_t i;
528 for (i = 0; i < num_subpats; i++) {
529 if (subpat_names[i]) {
530 zend_string_release_ex(subpat_names[i], false);
531 }
532 }
533 efree(subpat_names);
534 }
535
536 /* {{{ static make_subpats_table */
make_subpats_table(uint32_t num_subpats,pcre_cache_entry * pce)537 static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
538 {
539 uint32_t name_cnt = pce->name_count, name_size, ni = 0;
540 char *name_table;
541 zend_string **subpat_names;
542 int rc1, rc2;
543
544 rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
545 rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
546 if (rc1 < 0 || rc2 < 0) {
547 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
548 return NULL;
549 }
550
551 subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
552 while (ni++ < name_cnt) {
553 unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
554 const char *name = name_table + 2;
555 subpat_names[name_idx] = zend_string_init(name, strlen(name), 0);
556 if (is_numeric_string(ZSTR_VAL(subpat_names[name_idx]), ZSTR_LEN(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
557 php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
558 free_subpats_table(subpat_names, num_subpats);
559 return NULL;
560 }
561 name_table += name_size;
562 }
563 return subpat_names;
564 }
565 /* }}} */
566
567 /* {{{ static calculate_unit_length */
568 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
calculate_unit_length(pcre_cache_entry * pce,const char * start)569 static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
570 {
571 size_t unit_len;
572
573 if (pce->compile_options & PCRE2_UTF) {
574 const char *end = start;
575
576 /* skip continuation bytes */
577 while ((*++end & 0xC0) == 0x80);
578 unit_len = end - start;
579 } else {
580 unit_len = 1;
581 }
582 return unit_len;
583 }
584 /* }}} */
585
586 /* {{{ pcre_get_compiled_regex_cache */
pcre_get_compiled_regex_cache_ex(zend_string * regex,bool locale_aware)587 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bool locale_aware)
588 {
589 pcre2_code *re = NULL;
590 #if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR && !HAVE_BUNDLED_PCRE
591 uint32_t coptions = PCRE2_NO_START_OPTIMIZE;
592 #else
593 uint32_t coptions = 0;
594 #endif
595 PCRE2_UCHAR error[128];
596 PCRE2_SIZE erroffset;
597 int errnumber;
598 char delimiter;
599 char start_delimiter;
600 char end_delimiter;
601 char *p, *pp;
602 char *pattern;
603 size_t pattern_len;
604 uint32_t poptions = 0;
605 const uint8_t *tables = NULL;
606 zval *zv;
607 pcre_cache_entry new_entry;
608 int rc;
609 zend_string *key;
610 pcre_cache_entry *ret;
611
612 if (locale_aware && BG(ctype_string)) {
613 key = zend_string_concat2(
614 ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)),
615 ZSTR_VAL(regex), ZSTR_LEN(regex));
616 } else {
617 key = regex;
618 }
619
620 /* Try to lookup the cached regex entry, and if successful, just pass
621 back the compiled pattern, otherwise go on and compile it. */
622 zv = zend_hash_find(&PCRE_G(pcre_cache), key);
623 if (zv) {
624 if (key != regex) {
625 zend_string_release_ex(key, 0);
626 }
627 return (pcre_cache_entry*)Z_PTR_P(zv);
628 }
629
630 p = ZSTR_VAL(regex);
631 const char* end_p = ZSTR_VAL(regex) + ZSTR_LEN(regex);
632
633 /* Parse through the leading whitespace, and display a warning if we
634 get to the end without encountering a delimiter. */
635 while (isspace((int)*(unsigned char *)p)) p++;
636 if (p >= end_p) {
637 if (key != regex) {
638 zend_string_release_ex(key, 0);
639 }
640 php_error_docref(NULL, E_WARNING, "Empty regular expression");
641 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
642 return NULL;
643 }
644
645 /* Get the delimiter and display a warning if it is alphanumeric
646 or a backslash. */
647 delimiter = *p++;
648 if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\' || delimiter == '\0') {
649 if (key != regex) {
650 zend_string_release_ex(key, 0);
651 }
652 php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric, backslash, or NUL byte");
653 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
654 return NULL;
655 }
656
657 start_delimiter = delimiter;
658 if ((pp = strchr("([{< )]}> )]}>", delimiter)))
659 delimiter = pp[5];
660 end_delimiter = delimiter;
661
662 pp = p;
663
664 if (start_delimiter == end_delimiter) {
665 /* We need to iterate through the pattern, searching for the ending delimiter,
666 but skipping the backslashed delimiters. If the ending delimiter is not
667 found, display a warning. */
668 while (pp < end_p) {
669 if (*pp == '\\' && pp + 1 < end_p) pp++;
670 else if (*pp == delimiter)
671 break;
672 pp++;
673 }
674 } else {
675 /* We iterate through the pattern, searching for the matching ending
676 * delimiter. For each matching starting delimiter, we increment nesting
677 * level, and decrement it for each matching ending delimiter. If we
678 * reach the end of the pattern without matching, display a warning.
679 */
680 int brackets = 1; /* brackets nesting level */
681 while (pp < end_p) {
682 if (*pp == '\\' && pp + 1 < end_p) pp++;
683 else if (*pp == end_delimiter && --brackets <= 0)
684 break;
685 else if (*pp == start_delimiter)
686 brackets++;
687 pp++;
688 }
689 }
690
691 if (pp >= end_p) {
692 if (key != regex) {
693 zend_string_release_ex(key, 0);
694 }
695 if (start_delimiter == end_delimiter) {
696 php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
697 } else {
698 php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
699 }
700 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
701 return NULL;
702 }
703
704 /* Make a copy of the actual pattern. */
705 pattern_len = pp - p;
706 pattern = estrndup(p, pattern_len);
707
708 /* Move on to the options */
709 pp++;
710
711 /* Parse through the options, setting appropriate flags. Display
712 a warning if we encounter an unknown modifier. */
713 while (pp < end_p) {
714 switch (*pp++) {
715 /* Perl compatible options */
716 case 'i': coptions |= PCRE2_CASELESS; break;
717 case 'm': coptions |= PCRE2_MULTILINE; break;
718 case 'n': coptions |= PCRE2_NO_AUTO_CAPTURE; break;
719 case 's': coptions |= PCRE2_DOTALL; break;
720 case 'x': coptions |= PCRE2_EXTENDED; break;
721
722 /* PCRE specific options */
723 case 'A': coptions |= PCRE2_ANCHORED; break;
724 case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
725 case 'S': /* Pass. */ break;
726 case 'X': /* Pass. */ break;
727 case 'U': coptions |= PCRE2_UNGREEDY; break;
728 case 'u': coptions |= PCRE2_UTF;
729 /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
730 characters, even in UTF-8 mode. However, this can be changed by setting
731 the PCRE2_UCP option. */
732 #ifdef PCRE2_UCP
733 coptions |= PCRE2_UCP;
734 #endif
735 break;
736 case 'J': coptions |= PCRE2_DUPNAMES; break;
737
738 case ' ':
739 case '\n':
740 case '\r':
741 break;
742
743 case 'e': /* legacy eval */
744 default:
745 if (pp[-1]) {
746 php_error_docref(NULL, E_WARNING, "Unknown modifier '%c'", pp[-1]);
747 } else {
748 php_error_docref(NULL, E_WARNING, "NUL byte is not a valid modifier");
749 }
750 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
751 efree(pattern);
752 if (key != regex) {
753 zend_string_release_ex(key, 0);
754 }
755 return NULL;
756 }
757 }
758
759 if (key != regex) {
760 tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(ctype_string));
761 if (!tables) {
762 zend_string *_k;
763 tables = pcre2_maketables(gctx);
764 if (UNEXPECTED(!tables)) {
765 php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
766 pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
767 zend_string_release_ex(key, 0);
768 efree(pattern);
769 return NULL;
770 }
771 _k = zend_string_init(ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)), 1);
772 GC_MAKE_PERSISTENT_LOCAL(_k);
773 zend_hash_add_ptr(&char_tables, _k, (void *)tables);
774 zend_string_release(_k);
775 }
776 }
777 pcre2_set_character_tables(cctx, tables);
778
779 /* Compile pattern and display a warning if compilation failed. */
780 re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
781
782 if (re == NULL) {
783 if (key != regex) {
784 zend_string_release_ex(key, 0);
785 }
786 pcre2_get_error_message(errnumber, error, sizeof(error));
787 php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
788 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
789 efree(pattern);
790 return NULL;
791 }
792
793 #ifdef HAVE_PCRE_JIT_SUPPORT
794 if (PCRE_G(jit)) {
795 /* Enable PCRE JIT compiler */
796 rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
797 if (EXPECTED(rc >= 0)) {
798 size_t jit_size = 0;
799 if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
800 poptions |= PREG_JIT;
801 }
802 } else if (rc == PCRE2_ERROR_NOMEMORY) {
803 php_error_docref(NULL, E_WARNING,
804 "Allocation of JIT memory failed, PCRE JIT will be disabled. "
805 "This is likely caused by security restrictions. "
806 "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
807 PCRE_G(jit) = 0;
808 } else {
809 pcre2_get_error_message(rc, error, sizeof(error));
810 php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
811 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
812 }
813 }
814 #endif
815 efree(pattern);
816
817 /*
818 * If we reached cache limit, clean out the items from the head of the list;
819 * these are supposedly the oldest ones (but not necessarily the least used
820 * ones).
821 */
822 if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
823 int num_clean = PCRE_CACHE_SIZE / 8;
824 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
825 }
826
827 /* Store the compiled pattern and extra info in the cache. */
828 new_entry.re = re;
829 new_entry.preg_options = poptions;
830 new_entry.compile_options = coptions;
831 new_entry.refcount = 0;
832
833 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
834 if (rc < 0) {
835 if (key != regex) {
836 zend_string_release_ex(key, 0);
837 }
838 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
839 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
840 return NULL;
841 }
842
843 rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
844 if (rc < 0) {
845 if (key != regex) {
846 zend_string_release_ex(key, 0);
847 }
848 php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
849 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
850 return NULL;
851 }
852
853 /*
854 * Interned strings are not duplicated when stored in HashTable,
855 * but all the interned strings created during HTTP request are removed
856 * at end of request. However PCRE_G(pcre_cache) must be consistent
857 * on the next request as well. So we disable usage of interned strings
858 * as hash keys especually for this table.
859 * See bug #63180
860 */
861 if (!(GC_FLAGS(key) & IS_STR_PERMANENT) && !PCRE_G(per_request_cache)) {
862 zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
863 GC_MAKE_PERSISTENT_LOCAL(str);
864
865 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
866 zend_string_release(str);
867 } else {
868 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
869 }
870
871 if (key != regex) {
872 zend_string_release_ex(key, 0);
873 }
874
875 return ret;
876 }
877 /* }}} */
878
879 /* {{{ pcre_get_compiled_regex_cache */
pcre_get_compiled_regex_cache(zend_string * regex)880 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
881 {
882 return pcre_get_compiled_regex_cache_ex(regex, true);
883 }
884 /* }}} */
885
886 /* {{{ pcre_get_compiled_regex */
pcre_get_compiled_regex(zend_string * regex,uint32_t * capture_count)887 PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
888 {
889 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
890
891 if (capture_count) {
892 *capture_count = pce ? pce->capture_count : 0;
893 }
894
895 return pce ? pce->re : NULL;
896 }
897 /* }}} */
898
899 /* XXX For the cases where it's only about match yes/no and no capture
900 required, perhaps just a minimum sized data would suffice. */
php_pcre_create_match_data(uint32_t capture_count,pcre2_code * re)901 PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
902 {/*{{{*/
903
904 assert(NULL != re);
905
906 if (EXPECTED(!mdata_used)) {
907 int rc = 0;
908
909 if (!capture_count) {
910 /* As we deal with a non cached pattern, no other way to gather this info. */
911 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
912 }
913
914 if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
915 mdata_used = 1;
916 return mdata;
917 }
918 }
919
920 return pcre2_match_data_create_from_pattern(re, gctx);
921 }/*}}}*/
922
php_pcre_free_match_data(pcre2_match_data * match_data)923 PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
924 {/*{{{*/
925 if (UNEXPECTED(match_data != mdata)) {
926 pcre2_match_data_free(match_data);
927 } else {
928 mdata_used = 0;
929 }
930 }/*}}}*/
931
init_unmatched_null_pair(void)932 static void init_unmatched_null_pair(void) {
933 zval val1, val2;
934 ZVAL_NULL(&val1);
935 ZVAL_LONG(&val2, -1);
936 ZVAL_ARR(&PCRE_G(unmatched_null_pair), zend_new_pair(&val1, &val2));
937 }
938
init_unmatched_empty_pair(void)939 static void init_unmatched_empty_pair(void) {
940 zval val1, val2;
941 ZVAL_EMPTY_STRING(&val1);
942 ZVAL_LONG(&val2, -1);
943 ZVAL_ARR(&PCRE_G(unmatched_empty_pair), zend_new_pair(&val1, &val2));
944 }
945
populate_match_value_str(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset)946 static zend_always_inline void populate_match_value_str(
947 zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
948 ZVAL_STRINGL_FAST(val, subject + start_offset, end_offset - start_offset);
949 }
950
populate_match_value(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,bool unmatched_as_null)951 static zend_always_inline void populate_match_value(
952 zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
953 bool unmatched_as_null) {
954 if (PCRE2_UNSET == start_offset) {
955 if (unmatched_as_null) {
956 ZVAL_NULL(val);
957 } else {
958 ZVAL_EMPTY_STRING(val);
959 }
960 } else {
961 populate_match_value_str(val, subject, start_offset, end_offset);
962 }
963 }
964
add_named(HashTable * const subpats,zend_string * name,zval * val,bool unmatched)965 static inline void add_named(
966 HashTable *const subpats, zend_string *name, zval *val, bool unmatched) {
967 /* If the DUPNAMES option is used, multiple subpatterns might have the same name.
968 * In this case we want to preserve the one that actually has a value. */
969 if (!unmatched) {
970 zend_hash_update(subpats, name, val);
971 } else {
972 if (!zend_hash_add(subpats, name, val)) {
973 return;
974 }
975 }
976 Z_TRY_ADDREF_P(val);
977 }
978
979 /* {{{ add_offset_pair */
add_offset_pair(HashTable * const result,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,zend_string * name,zend_long unmatched_as_null)980 static inline void add_offset_pair(
981 HashTable *const result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
982 zend_string *name, zend_long unmatched_as_null)
983 {
984 zval match_pair;
985
986 /* Add (match, offset) to the return value */
987 if (PCRE2_UNSET == start_offset) {
988 if (unmatched_as_null) {
989 if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
990 init_unmatched_null_pair();
991 }
992 ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
993 } else {
994 if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
995 init_unmatched_empty_pair();
996 }
997 ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
998 }
999 } else {
1000 zval val1, val2;
1001 populate_match_value_str(&val1, subject, start_offset, end_offset);
1002 ZVAL_LONG(&val2, start_offset);
1003 ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1004 }
1005
1006 if (name) {
1007 add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1008 }
1009 zend_hash_next_index_insert_new(result, &match_pair);
1010 }
1011 /* }}} */
1012
populate_subpat_array(zval * subpats,const char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1013 static void populate_subpat_array(
1014 zval *subpats, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1015 uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1016 zend_long offset_capture = flags & PREG_OFFSET_CAPTURE;
1017 zend_long unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1018 zval val;
1019 int i;
1020 HashTable *subpats_ht = Z_ARRVAL_P(subpats);
1021 if (subpat_names) {
1022 if (offset_capture) {
1023 for (i = 0; i < count; i++) {
1024 add_offset_pair(
1025 subpats_ht, subject, offsets[2*i], offsets[2*i+1],
1026 subpat_names[i], unmatched_as_null);
1027 }
1028 if (unmatched_as_null) {
1029 for (i = count; i < num_subpats; i++) {
1030 add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1031 }
1032 }
1033 } else {
1034 for (i = 0; i < count; i++) {
1035 populate_match_value(
1036 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1037 if (subpat_names[i]) {
1038 add_named(subpats_ht, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1039 }
1040 zend_hash_next_index_insert_new(subpats_ht, &val);
1041 }
1042 if (unmatched_as_null) {
1043 for (i = count; i < num_subpats; i++) {
1044 ZVAL_NULL(&val);
1045 if (subpat_names[i]) {
1046 zend_hash_add(subpats_ht, subpat_names[i], &val);
1047 }
1048 zend_hash_next_index_insert_new(subpats_ht, &val);
1049 }
1050 }
1051 }
1052 } else {
1053 if (offset_capture) {
1054 for (i = 0; i < count; i++) {
1055 add_offset_pair(
1056 subpats_ht, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1057 }
1058 if (unmatched_as_null) {
1059 for (i = count; i < num_subpats; i++) {
1060 add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1061 }
1062 }
1063 } else {
1064 for (i = 0; i < count; i++) {
1065 populate_match_value(
1066 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1067 zend_hash_next_index_insert_new(subpats_ht, &val);
1068 }
1069 if (unmatched_as_null) {
1070 for (i = count; i < num_subpats; i++) {
1071 add_next_index_null(subpats);
1072 }
1073 }
1074 }
1075 }
1076 /* Add MARK, if available */
1077 if (mark) {
1078 add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
1079 }
1080 }
1081
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS,bool global)1082 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, bool global) /* {{{ */
1083 {
1084 /* parameters */
1085 zend_string *regex; /* Regular expression */
1086 zend_string *subject; /* String to match against */
1087 pcre_cache_entry *pce; /* Compiled regular expression */
1088 zval *subpats = NULL; /* Array for subpatterns */
1089 zend_long flags = 0; /* Match control flags */
1090 zend_long start_offset = 0; /* Where the new search starts */
1091
1092 ZEND_PARSE_PARAMETERS_START(2, 5)
1093 Z_PARAM_STR(regex)
1094 Z_PARAM_STR(subject)
1095 Z_PARAM_OPTIONAL
1096 Z_PARAM_ZVAL(subpats)
1097 Z_PARAM_LONG(flags)
1098 Z_PARAM_LONG(start_offset)
1099 ZEND_PARSE_PARAMETERS_END();
1100
1101 /* Compile regex or get it from cache. */
1102 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1103 RETURN_FALSE;
1104 }
1105
1106 pce->refcount++;
1107 php_pcre_match_impl(pce, subject, return_value, subpats,
1108 global, flags, start_offset);
1109 pce->refcount--;
1110 }
1111 /* }}} */
1112
is_known_valid_utf8(zend_string * subject_str,PCRE2_SIZE start_offset)1113 static zend_always_inline bool is_known_valid_utf8(
1114 zend_string *subject_str, PCRE2_SIZE start_offset) {
1115 if (!ZSTR_IS_VALID_UTF8(subject_str)) {
1116 /* We don't know whether the string is valid UTF-8 or not. */
1117 return 0;
1118 }
1119
1120 if (start_offset == ZSTR_LEN(subject_str)) {
1121 /* Degenerate case: Offset points to end of string. */
1122 return 1;
1123 }
1124
1125 /* Check that the offset does not point to an UTF-8 continuation byte. */
1126 return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1127 }
1128
1129 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zval * subpats,bool global,zend_long flags,zend_off_t start_offset)1130 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1131 zval *subpats, bool global, zend_long flags, zend_off_t start_offset)
1132 {
1133 zval result_set; /* Holds a set of subpatterns after
1134 a global match */
1135 HashTable **match_sets = NULL; /* An array of sets of matches for each
1136 subpattern after a global match */
1137 uint32_t options; /* Execution options */
1138 int count; /* Count of matched subpatterns */
1139 uint32_t num_subpats; /* Number of captured subpatterns */
1140 int matched; /* Has anything matched */
1141 zend_string **subpat_names; /* Array for named subpatterns */
1142 size_t i;
1143 uint32_t subpats_order; /* Order of subpattern matches */
1144 uint32_t offset_capture; /* Capture match offsets: yes/no */
1145 zend_long unmatched_as_null; /* Null non-matches: yes/no */
1146 PCRE2_SPTR mark = NULL; /* Target for MARK name */
1147 HashTable *marks = NULL; /* Array of marks for PREG_PATTERN_ORDER */
1148 pcre2_match_data *match_data;
1149 PCRE2_SIZE start_offset2, orig_start_offset;
1150
1151 char *subject = ZSTR_VAL(subject_str);
1152 size_t subject_len = ZSTR_LEN(subject_str);
1153
1154 /* Overwrite the passed-in value for subpatterns with an empty array. */
1155 if (subpats != NULL) {
1156 subpats = zend_try_array_init(subpats);
1157 if (!subpats) {
1158 RETURN_THROWS();
1159 }
1160 }
1161
1162 subpats_order = global ? PREG_PATTERN_ORDER : 0;
1163
1164 if (flags) {
1165 offset_capture = flags & PREG_OFFSET_CAPTURE;
1166 unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1167
1168 /*
1169 * subpats_order is pre-set to pattern mode so we change it only if
1170 * necessary.
1171 */
1172 if (flags & 0xff) {
1173 subpats_order = flags & 0xff;
1174 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1175 (!global && subpats_order != 0)) {
1176 zend_argument_value_error(4, "must be a PREG_* constant");
1177 RETURN_THROWS();
1178 }
1179 }
1180 } else {
1181 offset_capture = 0;
1182 unmatched_as_null = 0;
1183 }
1184
1185 /* Negative offset counts from the end of the string. */
1186 if (start_offset < 0) {
1187 if ((PCRE2_SIZE)-start_offset <= subject_len) {
1188 start_offset2 = subject_len + start_offset;
1189 } else {
1190 start_offset2 = 0;
1191 }
1192 } else {
1193 start_offset2 = (PCRE2_SIZE)start_offset;
1194 }
1195
1196 if (start_offset2 > subject_len) {
1197 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1198 RETURN_FALSE;
1199 }
1200
1201 /* Calculate the size of the offsets array, and allocate memory for it. */
1202 num_subpats = pce->capture_count + 1;
1203
1204 /*
1205 * Build a mapping from subpattern numbers to their names. We will
1206 * allocate the table only if there are any named subpatterns.
1207 */
1208 subpat_names = NULL;
1209 if (subpats && pce->name_count > 0) {
1210 subpat_names = make_subpats_table(num_subpats, pce);
1211 if (!subpat_names) {
1212 RETURN_FALSE;
1213 }
1214 }
1215
1216 matched = 0;
1217 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1218
1219 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1220 match_data = mdata;
1221 } else {
1222 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1223 if (!match_data) {
1224 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1225 if (subpat_names) {
1226 free_subpats_table(subpat_names, num_subpats);
1227 }
1228 RETURN_FALSE;
1229 }
1230 }
1231
1232 /* Allocate match sets array and initialize the values. */
1233 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1234 match_sets = safe_emalloc(num_subpats, sizeof(HashTable *), 0);
1235 for (i=0; i<num_subpats; i++) {
1236 match_sets[i] = zend_new_array(0);
1237 }
1238 }
1239
1240 /* Array of subpattern offsets */
1241 PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1242
1243 orig_start_offset = start_offset2;
1244 options =
1245 (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1246 ? 0 : PCRE2_NO_UTF_CHECK;
1247
1248 /* Execute the regular expression. */
1249 #ifdef HAVE_PCRE_JIT_SUPPORT
1250 if ((pce->preg_options & PREG_JIT) && options) {
1251 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1252 PCRE2_NO_UTF_CHECK, match_data, mctx);
1253 } else
1254 #endif
1255 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1256 options, match_data, mctx);
1257
1258 while (1) {
1259 /* If something has matched */
1260 if (count >= 0) {
1261 /* Check for too many substrings condition. */
1262 if (UNEXPECTED(count == 0)) {
1263 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1264 count = num_subpats;
1265 }
1266
1267 matched:
1268 matched++;
1269
1270 /* If subpatterns array has been passed, fill it in with values. */
1271 if (subpats != NULL) {
1272 /* Try to get the list of substrings and display a warning if failed. */
1273 if (UNEXPECTED(offsets[1] < offsets[0])) {
1274 if (subpat_names) {
1275 free_subpats_table(subpat_names, num_subpats);
1276 }
1277 if (match_sets) efree(match_sets);
1278 php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1279 RETURN_FALSE;
1280 }
1281
1282 if (global) { /* global pattern matching */
1283 if (subpats_order == PREG_PATTERN_ORDER) {
1284 /* For each subpattern, insert it into the appropriate array. */
1285 if (offset_capture) {
1286 for (i = 0; i < count; i++) {
1287 add_offset_pair(
1288 match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1289 NULL, unmatched_as_null);
1290 }
1291 } else {
1292 for (i = 0; i < count; i++) {
1293 zval val;
1294 populate_match_value(
1295 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1296 zend_hash_next_index_insert_new(match_sets[i], &val);
1297 }
1298 }
1299 mark = pcre2_get_mark(match_data);
1300 /* Add MARK, if available */
1301 if (mark) {
1302 if (!marks) {
1303 marks = zend_new_array(0);
1304 }
1305 zval tmp;
1306 ZVAL_STRING(&tmp, (char *) mark);
1307 zend_hash_index_add_new(marks, matched - 1, &tmp);
1308 }
1309 /*
1310 * If the number of captured subpatterns on this run is
1311 * less than the total possible number, pad the result
1312 * arrays with NULLs or empty strings.
1313 */
1314 if (count < num_subpats) {
1315 for (int i = count; i < num_subpats; i++) {
1316 if (offset_capture) {
1317 add_offset_pair(
1318 match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1319 NULL, unmatched_as_null);
1320 } else if (unmatched_as_null) {
1321 zval tmp;
1322 ZVAL_NULL(&tmp);
1323 zend_hash_next_index_insert_new(match_sets[i], &tmp);
1324 } else {
1325 zval tmp;
1326 ZVAL_EMPTY_STRING(&tmp);
1327 zend_hash_next_index_insert_new(match_sets[i], &tmp);
1328 }
1329 }
1330 }
1331 } else {
1332 /* Allocate and populate the result set array */
1333 mark = pcre2_get_mark(match_data);
1334 array_init_size(&result_set, count + (mark ? 1 : 0));
1335 populate_subpat_array(
1336 &result_set, subject, offsets, subpat_names,
1337 num_subpats, count, mark, flags);
1338 /* And add it to the output array */
1339 zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &result_set);
1340 }
1341 } else { /* single pattern matching */
1342 /* For each subpattern, insert it into the subpatterns array. */
1343 mark = pcre2_get_mark(match_data);
1344 populate_subpat_array(
1345 subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1346 break;
1347 }
1348 }
1349
1350 /* Advance to the next piece. */
1351 start_offset2 = offsets[1];
1352
1353 /* If we have matched an empty string, mimic what Perl's /g options does.
1354 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1355 the match again at the same point. If this fails (picked up above) we
1356 advance to the next character. */
1357 if (start_offset2 == offsets[0]) {
1358 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1359 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1360 if (count >= 0) {
1361 if (global) {
1362 goto matched;
1363 } else {
1364 break;
1365 }
1366 } else if (count == PCRE2_ERROR_NOMATCH) {
1367 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1368 this is not necessarily the end. We need to advance
1369 the start offset, and continue. Fudge the offset values
1370 to achieve this, unless we're already at the end of the string. */
1371 if (start_offset2 < subject_len) {
1372 size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1373
1374 start_offset2 += unit_len;
1375 } else {
1376 break;
1377 }
1378 } else {
1379 goto error;
1380 }
1381 }
1382 } else if (count == PCRE2_ERROR_NOMATCH) {
1383 break;
1384 } else {
1385 error:
1386 pcre_handle_exec_error(count);
1387 break;
1388 }
1389
1390 if (!global) {
1391 break;
1392 }
1393
1394 /* Execute the regular expression. */
1395 #ifdef HAVE_PCRE_JIT_SUPPORT
1396 if ((pce->preg_options & PREG_JIT)) {
1397 if (start_offset2 > subject_len) {
1398 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1399 break;
1400 }
1401 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1402 PCRE2_NO_UTF_CHECK, match_data, mctx);
1403 } else
1404 #endif
1405 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1406 PCRE2_NO_UTF_CHECK, match_data, mctx);
1407 }
1408 if (match_data != mdata) {
1409 pcre2_match_data_free(match_data);
1410 }
1411
1412 /* Add the match sets to the output array and clean up */
1413 if (match_sets) {
1414 if (subpat_names) {
1415 for (i = 0; i < num_subpats; i++) {
1416 zval wrapper;
1417 ZVAL_ARR(&wrapper, match_sets[i]);
1418 if (subpat_names[i]) {
1419 zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &wrapper);
1420 GC_ADDREF(match_sets[i]);
1421 }
1422 zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1423 }
1424 } else {
1425 for (i = 0; i < num_subpats; i++) {
1426 zval wrapper;
1427 ZVAL_ARR(&wrapper, match_sets[i]);
1428 zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1429 }
1430 }
1431 efree(match_sets);
1432
1433 if (marks) {
1434 zval tmp;
1435 ZVAL_ARR(&tmp, marks);
1436 zend_hash_str_update(Z_ARRVAL_P(subpats), "MARK", sizeof("MARK") - 1, &tmp);
1437 }
1438 }
1439
1440 if (subpat_names) {
1441 free_subpats_table(subpat_names, num_subpats);
1442 }
1443
1444 if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1445 /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1446 if ((pce->compile_options & PCRE2_UTF)
1447 && !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1448 GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1449 }
1450
1451 RETVAL_LONG(matched);
1452 } else {
1453 RETVAL_FALSE;
1454 }
1455 }
1456 /* }}} */
1457
1458 /* {{{ Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)1459 PHP_FUNCTION(preg_match)
1460 {
1461 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
1462 }
1463 /* }}} */
1464
1465 ZEND_FRAMELESS_FUNCTION(preg_match, 2)
1466 {
1467 zval regex_tmp, subject_tmp;
1468 zend_string *regex, *subject;
1469
1470 Z_FLF_PARAM_STR(1, regex, regex_tmp);
1471 Z_FLF_PARAM_STR(2, subject, subject_tmp);
1472
1473 /* Compile regex or get it from cache. */
1474 pcre_cache_entry *pce;
1475 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1476 RETURN_FALSE;
1477 }
1478
1479 pce->refcount++;
1480 php_pcre_match_impl(pce, subject, return_value, /* subpats */ NULL,
1481 /* global */ false, /* flags */ 0, /* start_offset */ 0);
1482 pce->refcount--;
1483
1484 flf_clean:
1485 Z_FLF_PARAM_FREE_STR(1, regex_tmp);
1486 Z_FLF_PARAM_FREE_STR(2, subject_tmp);
1487 }
1488
1489 /* {{{ Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)1490 PHP_FUNCTION(preg_match_all)
1491 {
1492 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
1493 }
1494 /* }}} */
1495
1496 /* {{{ preg_get_backref */
preg_get_backref(char ** str,int * backref)1497 static int preg_get_backref(char **str, int *backref)
1498 {
1499 char in_brace = 0;
1500 char *walk = *str;
1501
1502 if (walk[1] == 0)
1503 return 0;
1504
1505 if (*walk == '$' && walk[1] == '{') {
1506 in_brace = 1;
1507 walk++;
1508 }
1509 walk++;
1510
1511 if (*walk >= '0' && *walk <= '9') {
1512 *backref = *walk - '0';
1513 walk++;
1514 } else
1515 return 0;
1516
1517 if (*walk && *walk >= '0' && *walk <= '9') {
1518 *backref = *backref * 10 + *walk - '0';
1519 walk++;
1520 }
1521
1522 if (in_brace) {
1523 if (*walk != '}')
1524 return 0;
1525 else
1526 walk++;
1527 }
1528
1529 *str = walk;
1530 return 1;
1531 }
1532 /* }}} */
1533
1534 /* {{{ preg_do_repl_func */
preg_do_repl_func(zend_fcall_info * fci,zend_fcall_info_cache * fcc,const char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1535 static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1536 {
1537 zend_string *result_str;
1538 zval retval; /* Function return value */
1539 zval arg; /* Argument to pass to function */
1540
1541 array_init_size(&arg, count + (mark ? 1 : 0));
1542 populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1543
1544 fci->retval = &retval;
1545 fci->param_count = 1;
1546 fci->params = &arg;
1547
1548 if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1549 if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1550 result_str = Z_STR(retval);
1551 } else {
1552 result_str = zval_get_string_func(&retval);
1553 zval_ptr_dtor(&retval);
1554 }
1555 } else {
1556 if (!EG(exception)) {
1557 php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1558 }
1559
1560 result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1561 }
1562
1563 zval_ptr_dtor(&arg);
1564
1565 return result_str;
1566 }
1567 /* }}} */
1568
1569 /* {{{ php_pcre_replace */
php_pcre_replace(zend_string * regex,zend_string * subject_str,const char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1570 PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1571 zend_string *subject_str,
1572 const char *subject, size_t subject_len,
1573 zend_string *replace_str,
1574 size_t limit, size_t *replace_count)
1575 {
1576 pcre_cache_entry *pce; /* Compiled regular expression */
1577 zend_string *result; /* Function result */
1578
1579 /* Abort on pending exception, e.g. thrown from __toString(). */
1580 if (UNEXPECTED(EG(exception))) {
1581 return NULL;
1582 }
1583
1584 /* Compile regex or get it from cache. */
1585 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1586 return NULL;
1587 }
1588 pce->refcount++;
1589 result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1590 limit, replace_count);
1591 pce->refcount--;
1592
1593 return result;
1594 }
1595 /* }}} */
1596
1597 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry * pce,zend_string * subject_str,const char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1598 PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1599 {
1600 uint32_t options; /* Execution options */
1601 int count; /* Count of matched subpatterns */
1602 uint32_t num_subpats; /* Number of captured subpatterns */
1603 size_t new_len; /* Length of needed storage */
1604 size_t alloc_len; /* Actual allocated length */
1605 size_t match_len; /* Length of the current match */
1606 int backref; /* Backreference number */
1607 PCRE2_SIZE start_offset; /* Where the new search starts */
1608 size_t last_end_offset; /* Where the last search ended */
1609 char *walkbuf, /* Location of current replacement in the result */
1610 *walk, /* Used to walk the replacement string */
1611 walk_last; /* Last walked character */
1612 const char *match, /* The current match */
1613 *piece, /* The current piece of subject */
1614 *replace_end; /* End of replacement string */
1615 size_t result_len; /* Length of result */
1616 zend_string *result; /* Result of replacement */
1617 pcre2_match_data *match_data;
1618
1619 /* Calculate the size of the offsets array, and allocate memory for it. */
1620 num_subpats = pce->capture_count + 1;
1621 alloc_len = 0;
1622 result = NULL;
1623
1624 /* Initialize */
1625 match = NULL;
1626 start_offset = 0;
1627 last_end_offset = 0;
1628 result_len = 0;
1629 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1630
1631 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1632 match_data = mdata;
1633 } else {
1634 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1635 if (!match_data) {
1636 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1637 return NULL;
1638 }
1639 }
1640
1641 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1642
1643 /* Array of subpattern offsets */
1644 PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1645
1646 /* Execute the regular expression. */
1647 #ifdef HAVE_PCRE_JIT_SUPPORT
1648 if ((pce->preg_options & PREG_JIT) && options) {
1649 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1650 PCRE2_NO_UTF_CHECK, match_data, mctx);
1651 } else
1652 #endif
1653 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1654 options, match_data, mctx);
1655
1656 while (1) {
1657 piece = subject + last_end_offset;
1658
1659 if (count >= 0 && limit > 0) {
1660 bool simple_string;
1661
1662 /* Check for too many substrings condition. */
1663 if (UNEXPECTED(count == 0)) {
1664 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1665 count = num_subpats;
1666 }
1667
1668 matched:
1669 if (UNEXPECTED(offsets[1] < offsets[0])) {
1670 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1671 if (result) {
1672 zend_string_release_ex(result, 0);
1673 result = NULL;
1674 }
1675 break;
1676 }
1677
1678 if (replace_count) {
1679 ++*replace_count;
1680 }
1681
1682 /* Set the match location in subject */
1683 match = subject + offsets[0];
1684
1685 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1686
1687 walk = ZSTR_VAL(replace_str);
1688 replace_end = walk + ZSTR_LEN(replace_str);
1689 walk_last = 0;
1690 simple_string = 1;
1691 while (walk < replace_end) {
1692 if ('\\' == *walk || '$' == *walk) {
1693 simple_string = 0;
1694 if (walk_last == '\\') {
1695 walk++;
1696 walk_last = 0;
1697 continue;
1698 }
1699 if (preg_get_backref(&walk, &backref)) {
1700 if (backref < count)
1701 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1702 continue;
1703 }
1704 }
1705 new_len++;
1706 walk++;
1707 walk_last = walk[-1];
1708 }
1709
1710 if (new_len >= alloc_len) {
1711 alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1712 if (result == NULL) {
1713 result = zend_string_alloc(alloc_len, 0);
1714 } else {
1715 result = zend_string_extend(result, alloc_len, 0);
1716 }
1717 }
1718
1719 if (match-piece > 0) {
1720 /* copy the part of the string before the match */
1721 memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1722 result_len += (match-piece);
1723 }
1724
1725 if (simple_string) {
1726 /* copy replacement */
1727 memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1728 result_len += ZSTR_LEN(replace_str);
1729 } else {
1730 /* copy replacement and backrefs */
1731 walkbuf = ZSTR_VAL(result) + result_len;
1732
1733 walk = ZSTR_VAL(replace_str);
1734 walk_last = 0;
1735 while (walk < replace_end) {
1736 if ('\\' == *walk || '$' == *walk) {
1737 if (walk_last == '\\') {
1738 *(walkbuf-1) = *walk++;
1739 walk_last = 0;
1740 continue;
1741 }
1742 if (preg_get_backref(&walk, &backref)) {
1743 if (backref < count) {
1744 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1745 walkbuf = zend_mempcpy(walkbuf, subject + offsets[backref << 1], match_len);
1746 }
1747 continue;
1748 }
1749 }
1750 *walkbuf++ = *walk++;
1751 walk_last = walk[-1];
1752 }
1753 *walkbuf = '\0';
1754 /* increment the result length by how much we've added to the string */
1755 result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1756 }
1757
1758 limit--;
1759
1760 /* Advance to the next piece. */
1761 start_offset = last_end_offset = offsets[1];
1762
1763 /* If we have matched an empty string, mimic what Perl's /g options does.
1764 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1765 the match again at the same point. If this fails (picked up above) we
1766 advance to the next character. */
1767 if (start_offset == offsets[0]) {
1768 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1769 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1770
1771 piece = subject + start_offset;
1772 if (count >= 0 && limit > 0) {
1773 goto matched;
1774 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1775 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1776 this is not necessarily the end. We need to advance
1777 the start offset, and continue. Fudge the offset values
1778 to achieve this, unless we're already at the end of the string. */
1779 if (start_offset < subject_len) {
1780 size_t unit_len = calculate_unit_length(pce, piece);
1781 start_offset += unit_len;
1782 } else {
1783 goto not_matched;
1784 }
1785 } else {
1786 goto error;
1787 }
1788 }
1789
1790 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1791 not_matched:
1792 if (!result && subject_str) {
1793 result = zend_string_copy(subject_str);
1794 break;
1795 }
1796 /* now we know exactly how long it is */
1797 alloc_len = result_len + subject_len - last_end_offset;
1798 if (NULL != result) {
1799 result = zend_string_realloc(result, alloc_len, 0);
1800 } else {
1801 result = zend_string_alloc(alloc_len, 0);
1802 }
1803 /* stick that last bit of string on our output */
1804 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1805 result_len += subject_len - last_end_offset;
1806 ZSTR_VAL(result)[result_len] = '\0';
1807 ZSTR_LEN(result) = result_len;
1808 break;
1809 } else {
1810 error:
1811 pcre_handle_exec_error(count);
1812 if (result) {
1813 zend_string_release_ex(result, 0);
1814 result = NULL;
1815 }
1816 break;
1817 }
1818
1819 #ifdef HAVE_PCRE_JIT_SUPPORT
1820 if (pce->preg_options & PREG_JIT) {
1821 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1822 PCRE2_NO_UTF_CHECK, match_data, mctx);
1823 } else
1824 #endif
1825 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1826 PCRE2_NO_UTF_CHECK, match_data, mctx);
1827 }
1828 if (match_data != mdata) {
1829 pcre2_match_data_free(match_data);
1830 }
1831
1832 return result;
1833 }
1834 /* }}} */
1835
1836 /* {{{ php_pcre_replace_func_impl() */
php_pcre_replace_func_impl(pcre_cache_entry * pce,zend_string * subject_str,const char * subject,size_t subject_len,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)1837 static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)
1838 {
1839 uint32_t options; /* Execution options */
1840 int count; /* Count of matched subpatterns */
1841 zend_string **subpat_names; /* Array for named subpatterns */
1842 uint32_t num_subpats; /* Number of captured subpatterns */
1843 size_t new_len; /* Length of needed storage */
1844 size_t alloc_len; /* Actual allocated length */
1845 PCRE2_SIZE start_offset; /* Where the new search starts */
1846 size_t last_end_offset; /* Where the last search ended */
1847 const char *match, /* The current match */
1848 *piece; /* The current piece of subject */
1849 size_t result_len; /* Length of result */
1850 zend_string *result; /* Result of replacement */
1851 zend_string *eval_result; /* Result of custom function */
1852 pcre2_match_data *match_data;
1853 bool old_mdata_used;
1854
1855 /* Calculate the size of the offsets array, and allocate memory for it. */
1856 num_subpats = pce->capture_count + 1;
1857
1858 /*
1859 * Build a mapping from subpattern numbers to their names. We will
1860 * allocate the table only if there are any named subpatterns.
1861 */
1862 subpat_names = NULL;
1863 if (UNEXPECTED(pce->name_count > 0)) {
1864 subpat_names = make_subpats_table(num_subpats, pce);
1865 if (!subpat_names) {
1866 return NULL;
1867 }
1868 }
1869
1870 alloc_len = 0;
1871 result = NULL;
1872
1873 /* Initialize */
1874 match = NULL;
1875 start_offset = 0;
1876 last_end_offset = 0;
1877 result_len = 0;
1878 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1879
1880 old_mdata_used = mdata_used;
1881 if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1882 mdata_used = 1;
1883 match_data = mdata;
1884 } else {
1885 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1886 if (!match_data) {
1887 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1888 if (subpat_names) {
1889 free_subpats_table(subpat_names, num_subpats);
1890 }
1891 mdata_used = old_mdata_used;
1892 return NULL;
1893 }
1894 }
1895
1896 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1897
1898 /* Array of subpattern offsets */
1899 PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1900
1901 /* Execute the regular expression. */
1902 #ifdef HAVE_PCRE_JIT_SUPPORT
1903 if ((pce->preg_options & PREG_JIT) && options) {
1904 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1905 PCRE2_NO_UTF_CHECK, match_data, mctx);
1906 } else
1907 #endif
1908 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1909 options, match_data, mctx);
1910
1911 while (1) {
1912 piece = subject + last_end_offset;
1913
1914 if (count >= 0 && limit) {
1915 /* Check for too many substrings condition. */
1916 if (UNEXPECTED(count == 0)) {
1917 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1918 count = num_subpats;
1919 }
1920
1921 matched:
1922 if (UNEXPECTED(offsets[1] < offsets[0])) {
1923 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1924 if (result) {
1925 zend_string_release_ex(result, 0);
1926 result = NULL;
1927 }
1928 break;
1929 }
1930
1931 if (replace_count) {
1932 ++*replace_count;
1933 }
1934
1935 /* Set the match location in subject */
1936 match = subject + offsets[0];
1937
1938 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1939
1940 /* Use custom function to get replacement string and its length. */
1941 eval_result = preg_do_repl_func(
1942 fci, fcc, subject, offsets, subpat_names, num_subpats, count,
1943 pcre2_get_mark(match_data), flags);
1944
1945 ZEND_ASSERT(eval_result);
1946 new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD;
1947 if (new_len >= alloc_len) {
1948 alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1949 if (result == NULL) {
1950 result = zend_string_alloc(alloc_len, 0);
1951 } else {
1952 result = zend_string_extend(result, alloc_len, 0);
1953 }
1954 }
1955
1956 if (match-piece > 0) {
1957 /* copy the part of the string before the match */
1958 memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1959 result_len += (match-piece);
1960 }
1961
1962 /* If using custom function, copy result to the buffer and clean up. */
1963 memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1964 result_len += ZSTR_LEN(eval_result);
1965 zend_string_release_ex(eval_result, 0);
1966
1967 limit--;
1968
1969 /* Advance to the next piece. */
1970 start_offset = last_end_offset = offsets[1];
1971
1972 /* If we have matched an empty string, mimic what Perl's /g options does.
1973 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1974 the match again at the same point. If this fails (picked up above) we
1975 advance to the next character. */
1976 if (start_offset == offsets[0]) {
1977 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1978 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1979
1980 piece = subject + start_offset;
1981 if (count >= 0 && limit) {
1982 goto matched;
1983 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1984 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1985 this is not necessarily the end. We need to advance
1986 the start offset, and continue. Fudge the offset values
1987 to achieve this, unless we're already at the end of the string. */
1988 if (start_offset < subject_len) {
1989 size_t unit_len = calculate_unit_length(pce, piece);
1990 start_offset += unit_len;
1991 } else {
1992 goto not_matched;
1993 }
1994 } else {
1995 goto error;
1996 }
1997 }
1998
1999 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2000 not_matched:
2001 if (!result && subject_str) {
2002 result = zend_string_copy(subject_str);
2003 break;
2004 }
2005 /* now we know exactly how long it is */
2006 alloc_len = result_len + subject_len - last_end_offset;
2007 if (NULL != result) {
2008 result = zend_string_realloc(result, alloc_len, 0);
2009 } else {
2010 result = zend_string_alloc(alloc_len, 0);
2011 }
2012 /* stick that last bit of string on our output */
2013 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
2014 result_len += subject_len - last_end_offset;
2015 ZSTR_VAL(result)[result_len] = '\0';
2016 ZSTR_LEN(result) = result_len;
2017 break;
2018 } else {
2019 error:
2020 pcre_handle_exec_error(count);
2021 if (result) {
2022 zend_string_release_ex(result, 0);
2023 result = NULL;
2024 }
2025 break;
2026 }
2027 #ifdef HAVE_PCRE_JIT_SUPPORT
2028 if ((pce->preg_options & PREG_JIT)) {
2029 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2030 PCRE2_NO_UTF_CHECK, match_data, mctx);
2031 } else
2032 #endif
2033 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2034 PCRE2_NO_UTF_CHECK, match_data, mctx);
2035 }
2036 if (match_data != mdata) {
2037 pcre2_match_data_free(match_data);
2038 }
2039 mdata_used = old_mdata_used;
2040
2041 if (UNEXPECTED(subpat_names)) {
2042 free_subpats_table(subpat_names, num_subpats);
2043 }
2044
2045 return result;
2046 }
2047 /* }}} */
2048
2049 /* {{{ php_pcre_replace_func */
php_pcre_replace_func(zend_string * regex,zend_string * subject_str,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)2050 static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2051 zend_string *subject_str,
2052 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2053 size_t limit, size_t *replace_count, zend_long flags)
2054 {
2055 pcre_cache_entry *pce; /* Compiled regular expression */
2056 zend_string *result; /* Function result */
2057
2058 /* Compile regex or get it from cache. */
2059 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2060 return NULL;
2061 }
2062 pce->refcount++;
2063 result = php_pcre_replace_func_impl(
2064 pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
2065 limit, replace_count, flags);
2066 pce->refcount--;
2067
2068 return result;
2069 }
2070 /* }}} */
2071
2072 /* {{{ php_pcre_replace_array */
php_pcre_replace_array(HashTable * regex,zend_string * replace_str,HashTable * replace_ht,zend_string * subject_str,size_t limit,size_t * replace_count)2073 static zend_string *php_pcre_replace_array(HashTable *regex,
2074 zend_string *replace_str, HashTable *replace_ht,
2075 zend_string *subject_str, size_t limit, size_t *replace_count)
2076 {
2077 zval *regex_entry;
2078 zend_string *result;
2079
2080 zend_string_addref(subject_str);
2081
2082 if (replace_ht) {
2083 uint32_t replace_idx = 0;
2084
2085 /* For each entry in the regex array, get the entry */
2086 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2087 /* Make sure we're dealing with strings. */
2088 zend_string *tmp_regex_str;
2089 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2090 zend_string *replace_entry_str, *tmp_replace_entry_str;
2091 zval *zv;
2092
2093 /* Get current entry */
2094 while (1) {
2095 if (replace_idx == replace_ht->nNumUsed) {
2096 replace_entry_str = ZSTR_EMPTY_ALLOC();
2097 tmp_replace_entry_str = NULL;
2098 break;
2099 }
2100 zv = ZEND_HASH_ELEMENT(replace_ht, replace_idx);
2101 replace_idx++;
2102 if (Z_TYPE_P(zv) != IS_UNDEF) {
2103 replace_entry_str = zval_get_tmp_string(zv, &tmp_replace_entry_str);
2104 break;
2105 }
2106 }
2107
2108 /* Do the actual replacement and put the result back into subject_str
2109 for further replacements. */
2110 result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2111 ZSTR_LEN(subject_str), replace_entry_str, limit, replace_count);
2112 zend_tmp_string_release(tmp_replace_entry_str);
2113 zend_tmp_string_release(tmp_regex_str);
2114 zend_string_release_ex(subject_str, 0);
2115 subject_str = result;
2116 if (UNEXPECTED(result == NULL)) {
2117 break;
2118 }
2119 } ZEND_HASH_FOREACH_END();
2120
2121 } else {
2122 ZEND_ASSERT(replace_str != NULL);
2123
2124 /* For each entry in the regex array, get the entry */
2125 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2126 /* Make sure we're dealing with strings. */
2127 zend_string *tmp_regex_str;
2128 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2129
2130 /* Do the actual replacement and put the result back into subject_str
2131 for further replacements. */
2132 result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2133 ZSTR_LEN(subject_str), replace_str, limit, replace_count);
2134 zend_tmp_string_release(tmp_regex_str);
2135 zend_string_release_ex(subject_str, 0);
2136 subject_str = result;
2137
2138 if (UNEXPECTED(result == NULL)) {
2139 break;
2140 }
2141 } ZEND_HASH_FOREACH_END();
2142 }
2143
2144 return subject_str;
2145 }
2146 /* }}} */
2147
2148 /* {{{ php_replace_in_subject */
php_replace_in_subject(zend_string * regex_str,HashTable * regex_ht,zend_string * replace_str,HashTable * replace_ht,zend_string * subject,size_t limit,size_t * replace_count)2149 static zend_always_inline zend_string *php_replace_in_subject(
2150 zend_string *regex_str, HashTable *regex_ht,
2151 zend_string *replace_str, HashTable *replace_ht,
2152 zend_string *subject, size_t limit, size_t *replace_count)
2153 {
2154 zend_string *result;
2155
2156 if (regex_str) {
2157 ZEND_ASSERT(replace_str != NULL);
2158 result = php_pcre_replace(regex_str, subject, ZSTR_VAL(subject), ZSTR_LEN(subject),
2159 replace_str, limit, replace_count);
2160 } else {
2161 ZEND_ASSERT(regex_ht != NULL);
2162 result = php_pcre_replace_array(regex_ht, replace_str, replace_ht, subject,
2163 limit, replace_count);
2164 }
2165 return result;
2166 }
2167 /* }}} */
2168
2169 /* {{{ php_replace_in_subject_func */
php_replace_in_subject_func(zend_string * regex_str,HashTable * regex_ht,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zend_string * subject,size_t limit,size_t * replace_count,zend_long flags)2170 static zend_string *php_replace_in_subject_func(zend_string *regex_str, HashTable *regex_ht,
2171 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2172 zend_string *subject, size_t limit, size_t *replace_count, zend_long flags)
2173 {
2174 zend_string *result;
2175
2176 if (regex_str) {
2177 result = php_pcre_replace_func(
2178 regex_str, subject, fci, fcc, limit, replace_count, flags);
2179 return result;
2180 } else {
2181 /* If regex is an array */
2182 zval *regex_entry;
2183
2184 ZEND_ASSERT(regex_ht != NULL);
2185
2186 zend_string_addref(subject);
2187
2188 /* For each entry in the regex array, get the entry */
2189 ZEND_HASH_FOREACH_VAL(regex_ht, regex_entry) {
2190 /* Make sure we're dealing with strings. */
2191 zend_string *tmp_regex_entry_str;
2192 zend_string *regex_entry_str = zval_get_tmp_string(regex_entry, &tmp_regex_entry_str);
2193
2194 /* Do the actual replacement and put the result back into subject
2195 for further replacements. */
2196 result = php_pcre_replace_func(
2197 regex_entry_str, subject, fci, fcc, limit, replace_count, flags);
2198 zend_tmp_string_release(tmp_regex_entry_str);
2199 zend_string_release(subject);
2200 subject = result;
2201 if (UNEXPECTED(result == NULL)) {
2202 break;
2203 }
2204 } ZEND_HASH_FOREACH_END();
2205
2206 return subject;
2207 }
2208 }
2209 /* }}} */
2210
2211 /* {{{ preg_replace_func_impl */
preg_replace_func_impl(zval * return_value,zend_string * regex_str,HashTable * regex_ht,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zend_string * subject_str,HashTable * subject_ht,zend_long limit_val,zend_long flags)2212 static size_t preg_replace_func_impl(zval *return_value,
2213 zend_string *regex_str, HashTable *regex_ht,
2214 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2215 zend_string *subject_str, HashTable *subject_ht, zend_long limit_val, zend_long flags)
2216 {
2217 zend_string *result;
2218 size_t replace_count = 0;
2219
2220 if (subject_str) {
2221 result = php_replace_in_subject_func(
2222 regex_str, regex_ht, fci, fcc, subject_str, limit_val, &replace_count, flags);
2223 if (result != NULL) {
2224 RETVAL_STR(result);
2225 } else {
2226 RETVAL_NULL();
2227 }
2228 } else {
2229 /* if subject is an array */
2230 zval *subject_entry, zv;
2231 zend_string *string_key;
2232 zend_ulong num_key;
2233
2234 ZEND_ASSERT(subject_ht != NULL);
2235
2236 array_init_size(return_value, zend_hash_num_elements(subject_ht));
2237 HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2238
2239 /* For each subject entry, convert it to string, then perform replacement
2240 and add the result to the return_value array. */
2241 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2242 zend_string *tmp_subject_entry_str;
2243 zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2244
2245 result = php_replace_in_subject_func(
2246 regex_str, regex_ht, fci, fcc, subject_entry_str, limit_val, &replace_count, flags);
2247 if (result != NULL) {
2248 /* Add to return array */
2249 ZVAL_STR(&zv, result);
2250 if (string_key) {
2251 zend_hash_add_new(return_value_ht, string_key, &zv);
2252 } else {
2253 zend_hash_index_add_new(return_value_ht, num_key, &zv);
2254 }
2255 }
2256 zend_tmp_string_release(tmp_subject_entry_str);
2257 } ZEND_HASH_FOREACH_END();
2258 }
2259
2260 return replace_count;
2261 }
2262 /* }}} */
2263
_preg_replace_common(zval * return_value,HashTable * regex_ht,zend_string * regex_str,HashTable * replace_ht,zend_string * replace_str,HashTable * subject_ht,zend_string * subject_str,zend_long limit,zval * zcount,bool is_filter)2264 static void _preg_replace_common(
2265 zval *return_value,
2266 HashTable *regex_ht, zend_string *regex_str,
2267 HashTable *replace_ht, zend_string *replace_str,
2268 HashTable *subject_ht, zend_string *subject_str,
2269 zend_long limit,
2270 zval *zcount,
2271 bool is_filter
2272 ) {
2273 size_t replace_count = 0;
2274 zend_string *result;
2275 size_t old_replace_count;
2276
2277 /* If replace is an array then the regex argument needs to also be an array */
2278 if (replace_ht && !regex_ht) {
2279 zend_argument_type_error(1, "must be of type array when argument #2 ($replacement) is an array, string given");
2280 RETURN_THROWS();
2281 }
2282
2283 if (subject_str) {
2284 old_replace_count = replace_count;
2285 result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2286 subject_str, limit, &replace_count);
2287 if (result != NULL) {
2288 if (!is_filter || replace_count > old_replace_count) {
2289 RETVAL_STR(result);
2290 } else {
2291 zend_string_release_ex(result, 0);
2292 RETVAL_NULL();
2293 }
2294 } else {
2295 RETVAL_NULL();
2296 }
2297 } else {
2298 /* if subject is an array */
2299 zval *subject_entry, zv;
2300 zend_string *string_key;
2301 zend_ulong num_key;
2302
2303 ZEND_ASSERT(subject_ht != NULL);
2304
2305 array_init_size(return_value, zend_hash_num_elements(subject_ht));
2306 HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2307
2308 /* For each subject entry, convert it to string, then perform replacement
2309 and add the result to the return_value array. */
2310 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2311 old_replace_count = replace_count;
2312 zend_string *tmp_subject_entry_str;
2313 zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2314 result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2315 subject_entry_str, limit, &replace_count);
2316
2317 if (result != NULL) {
2318 if (!is_filter || replace_count > old_replace_count) {
2319 /* Add to return array */
2320 ZVAL_STR(&zv, result);
2321 if (string_key) {
2322 zend_hash_add_new(return_value_ht, string_key, &zv);
2323 } else {
2324 zend_hash_index_add_new(return_value_ht, num_key, &zv);
2325 }
2326 } else {
2327 zend_string_release_ex(result, 0);
2328 }
2329 }
2330 zend_tmp_string_release(tmp_subject_entry_str);
2331 } ZEND_HASH_FOREACH_END();
2332 }
2333
2334 if (zcount) {
2335 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2336 }
2337 }
2338
2339 /* {{{ preg_replace_common */
preg_replace_common(INTERNAL_FUNCTION_PARAMETERS,bool is_filter)2340 static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter)
2341 {
2342 zend_string *regex_str, *replace_str, *subject_str;
2343 HashTable *regex_ht, *replace_ht, *subject_ht;
2344 zend_long limit = -1;
2345 zval *zcount = NULL;
2346
2347 /* Get function parameters and do error-checking. */
2348 ZEND_PARSE_PARAMETERS_START(3, 5)
2349 Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2350 Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
2351 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2352 Z_PARAM_OPTIONAL
2353 Z_PARAM_LONG(limit)
2354 Z_PARAM_ZVAL(zcount)
2355 ZEND_PARSE_PARAMETERS_END();
2356
2357 _preg_replace_common(
2358 return_value,
2359 regex_ht, regex_str,
2360 replace_ht, replace_str,
2361 subject_ht, subject_str,
2362 limit, zcount, is_filter);
2363 }
2364 /* }}} */
2365
2366 /* {{{ Perform Perl-style regular expression replacement. */
PHP_FUNCTION(preg_replace)2367 PHP_FUNCTION(preg_replace)
2368 {
2369 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
2370 }
2371 /* }}} */
2372
2373 ZEND_FRAMELESS_FUNCTION(preg_replace, 3)
2374 {
2375 zend_string *regex_str, *replace_str, *subject_str;
2376 HashTable *regex_ht, *replace_ht, *subject_ht;
2377 zval regex_tmp, replace_tmp, subject_tmp;
2378
2379 Z_FLF_PARAM_ARRAY_HT_OR_STR(1, regex_ht, regex_str, regex_tmp);
2380 Z_FLF_PARAM_ARRAY_HT_OR_STR(2, replace_ht, replace_str, replace_tmp);
2381 Z_FLF_PARAM_ARRAY_HT_OR_STR(3, subject_ht, subject_str, subject_tmp);
2382
2383 _preg_replace_common(
2384 return_value,
2385 regex_ht, regex_str,
2386 replace_ht, replace_str,
2387 subject_ht, subject_str,
2388 /* limit */ -1, /* zcount */ NULL, /* is_filter */ false);
2389
2390 flf_clean:;
2391 Z_FLF_PARAM_FREE_STR(1, regex_tmp);
2392 Z_FLF_PARAM_FREE_STR(2, replace_tmp);
2393 Z_FLF_PARAM_FREE_STR(3, subject_tmp);
2394 }
2395
2396 /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback)2397 PHP_FUNCTION(preg_replace_callback)
2398 {
2399 zval *zcount = NULL;
2400 zend_string *regex_str;
2401 HashTable *regex_ht;
2402 zend_string *subject_str;
2403 HashTable *subject_ht;
2404 zend_long limit = -1, flags = 0;
2405 size_t replace_count;
2406 zend_fcall_info fci;
2407 zend_fcall_info_cache fcc;
2408
2409 /* Get function parameters and do error-checking. */
2410 ZEND_PARSE_PARAMETERS_START(3, 6)
2411 Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2412 Z_PARAM_FUNC(fci, fcc)
2413 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2414 Z_PARAM_OPTIONAL
2415 Z_PARAM_LONG(limit)
2416 Z_PARAM_ZVAL(zcount)
2417 Z_PARAM_LONG(flags)
2418 ZEND_PARSE_PARAMETERS_END();
2419
2420 replace_count = preg_replace_func_impl(return_value, regex_str, regex_ht,
2421 &fci, &fcc,
2422 subject_str, subject_ht, limit, flags);
2423 if (zcount) {
2424 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2425 }
2426 }
2427 /* }}} */
2428
2429 /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback_array)2430 PHP_FUNCTION(preg_replace_callback_array)
2431 {
2432 zval zv, *replace, *zcount = NULL;
2433 HashTable *pattern, *subject_ht;
2434 zend_string *subject_str, *str_idx_regex;
2435 zend_long limit = -1, flags = 0;
2436 size_t replace_count = 0;
2437 zend_fcall_info fci;
2438 zend_fcall_info_cache fcc;
2439
2440 /* Get function parameters and do error-checking. */
2441 ZEND_PARSE_PARAMETERS_START(2, 5)
2442 Z_PARAM_ARRAY_HT(pattern)
2443 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2444 Z_PARAM_OPTIONAL
2445 Z_PARAM_LONG(limit)
2446 Z_PARAM_ZVAL(zcount)
2447 Z_PARAM_LONG(flags)
2448 ZEND_PARSE_PARAMETERS_END();
2449
2450 fci.size = sizeof(fci);
2451 fci.object = NULL;
2452 fci.named_params = NULL;
2453
2454 if (subject_ht) {
2455 GC_TRY_ADDREF(subject_ht);
2456 } else {
2457 GC_TRY_ADDREF(subject_str);
2458 }
2459
2460 ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) {
2461 if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2462 zend_argument_type_error(1, "must contain only valid callbacks");
2463 goto error;
2464 }
2465 if (!str_idx_regex) {
2466 zend_argument_type_error(1, "must contain only string patterns as keys");
2467 goto error;
2468 }
2469
2470 ZVAL_COPY_VALUE(&fci.function_name, replace);
2471
2472 replace_count += preg_replace_func_impl(&zv, str_idx_regex, /* regex_ht */ NULL, &fci, &fcc,
2473 subject_str, subject_ht, limit, flags);
2474 switch (Z_TYPE(zv)) {
2475 case IS_ARRAY:
2476 ZEND_ASSERT(subject_ht);
2477 zend_array_release(subject_ht);
2478 subject_ht = Z_ARR(zv);
2479 break;
2480 case IS_STRING:
2481 ZEND_ASSERT(subject_str);
2482 zend_string_release(subject_str);
2483 subject_str = Z_STR(zv);
2484 break;
2485 case IS_NULL:
2486 RETVAL_NULL();
2487 goto error;
2488 EMPTY_SWITCH_DEFAULT_CASE()
2489 }
2490
2491 if (EG(exception)) {
2492 goto error;
2493 }
2494 } ZEND_HASH_FOREACH_END();
2495
2496 if (zcount) {
2497 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2498 }
2499
2500 if (subject_ht) {
2501 RETVAL_ARR(subject_ht);
2502 // Unset the type_flags of immutable arrays to prevent the VM from performing refcounting
2503 if (GC_FLAGS(subject_ht) & IS_ARRAY_IMMUTABLE) {
2504 Z_TYPE_FLAGS_P(return_value) = 0;
2505 }
2506 return;
2507 } else {
2508 RETURN_STR(subject_str);
2509 }
2510
2511 error:
2512 if (subject_ht) {
2513 zend_array_release(subject_ht);
2514 } else {
2515 zend_string_release(subject_str);
2516 }
2517 }
2518 /* }}} */
2519
2520 /* {{{ Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTION(preg_filter)2521 PHP_FUNCTION(preg_filter)
2522 {
2523 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
2524 }
2525 /* }}} */
2526
2527 /* {{{ Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)2528 PHP_FUNCTION(preg_split)
2529 {
2530 zend_string *regex; /* Regular expression */
2531 zend_string *subject; /* String to match against */
2532 zend_long limit_val = -1;/* Integer value of limit */
2533 zend_long flags = 0; /* Match control flags */
2534 pcre_cache_entry *pce; /* Compiled regular expression */
2535
2536 /* Get function parameters and do error checking */
2537 ZEND_PARSE_PARAMETERS_START(2, 4)
2538 Z_PARAM_STR(regex)
2539 Z_PARAM_STR(subject)
2540 Z_PARAM_OPTIONAL
2541 Z_PARAM_LONG(limit_val)
2542 Z_PARAM_LONG(flags)
2543 ZEND_PARSE_PARAMETERS_END();
2544
2545 /* Compile regex or get it from cache. */
2546 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2547 RETURN_FALSE;
2548 }
2549
2550 pce->refcount++;
2551 php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2552 pce->refcount--;
2553 }
2554 /* }}} */
2555
2556 /* {{{ php_pcre_split */
php_pcre_split_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zend_long limit_val,zend_long flags)2557 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2558 zend_long limit_val, zend_long flags)
2559 {
2560 uint32_t options; /* Execution options */
2561 int count; /* Count of matched subpatterns */
2562 PCRE2_SIZE start_offset; /* Where the new search starts */
2563 PCRE2_SIZE last_match_offset; /* Location of last match */
2564 uint32_t no_empty; /* If NO_EMPTY flag is set */
2565 uint32_t delim_capture; /* If delimiters should be captured */
2566 uint32_t offset_capture; /* If offsets should be captured */
2567 uint32_t num_subpats; /* Number of captured subpatterns */
2568 zval tmp;
2569 pcre2_match_data *match_data;
2570 char *subject = ZSTR_VAL(subject_str);
2571
2572 no_empty = flags & PREG_SPLIT_NO_EMPTY;
2573 delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2574 offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2575
2576 /* Initialize return value */
2577 array_init(return_value);
2578 HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2579
2580 /* Calculate the size of the offsets array, and allocate memory for it. */
2581 num_subpats = pce->capture_count + 1;
2582
2583 /* Start at the beginning of the string */
2584 start_offset = 0;
2585 last_match_offset = 0;
2586 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2587
2588 if (limit_val == -1) {
2589 /* pass */
2590 } else if (limit_val == 0) {
2591 limit_val = -1;
2592 } else if (limit_val <= 1) {
2593 goto last;
2594 }
2595
2596 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2597 match_data = mdata;
2598 } else {
2599 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2600 if (!match_data) {
2601 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2602 zval_ptr_dtor(return_value);
2603 RETURN_FALSE;
2604 }
2605 }
2606
2607 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2608
2609 /* Array of subpattern offsets */
2610 PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
2611
2612 #ifdef HAVE_PCRE_JIT_SUPPORT
2613 if ((pce->preg_options & PREG_JIT) && options) {
2614 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2615 PCRE2_NO_UTF_CHECK, match_data, mctx);
2616 } else
2617 #endif
2618 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2619 options, match_data, mctx);
2620
2621 while (1) {
2622 /* If something matched */
2623 if (count >= 0) {
2624 /* Check for too many substrings condition. */
2625 if (UNEXPECTED(count == 0)) {
2626 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2627 count = num_subpats;
2628 }
2629
2630 matched:
2631 if (UNEXPECTED(offsets[1] < offsets[0])) {
2632 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2633 break;
2634 }
2635
2636 if (!no_empty || offsets[0] != last_match_offset) {
2637 if (offset_capture) {
2638 /* Add (match, offset) pair to the return value */
2639 add_offset_pair(
2640 return_value_ht, subject, last_match_offset, offsets[0],
2641 NULL, 0);
2642 } else {
2643 /* Add the piece to the return value */
2644 populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2645 zend_hash_next_index_insert_new(return_value_ht, &tmp);
2646 }
2647
2648 /* One less left to do */
2649 if (limit_val != -1)
2650 limit_val--;
2651 }
2652
2653 if (delim_capture) {
2654 size_t i;
2655 for (i = 1; i < count; i++) {
2656 /* If we have matched a delimiter */
2657 if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2658 if (offset_capture) {
2659 add_offset_pair(
2660 return_value_ht, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2661 } else {
2662 populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2663 zend_hash_next_index_insert_new(return_value_ht, &tmp);
2664 }
2665 }
2666 }
2667 }
2668
2669 /* Advance to the position right after the last full match */
2670 start_offset = last_match_offset = offsets[1];
2671
2672 /* If we have matched an empty string, mimic what Perl's /g options does.
2673 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2674 the match again at the same point. If this fails (picked up above) we
2675 advance to the next character. */
2676 if (start_offset == offsets[0]) {
2677 /* Get next piece if no limit or limit not yet reached and something matched*/
2678 if (limit_val != -1 && limit_val <= 1) {
2679 break;
2680 }
2681 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2682 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2683 if (count >= 0) {
2684 goto matched;
2685 } else if (count == PCRE2_ERROR_NOMATCH) {
2686 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2687 this is not necessarily the end. We need to advance
2688 the start offset, and continue. Fudge the offset values
2689 to achieve this, unless we're already at the end of the string. */
2690 if (start_offset < ZSTR_LEN(subject_str)) {
2691 start_offset += calculate_unit_length(pce, subject + start_offset);
2692 } else {
2693 break;
2694 }
2695 } else {
2696 goto error;
2697 }
2698 }
2699
2700 } else if (count == PCRE2_ERROR_NOMATCH) {
2701 break;
2702 } else {
2703 error:
2704 pcre_handle_exec_error(count);
2705 break;
2706 }
2707
2708 /* Get next piece if no limit or limit not yet reached and something matched*/
2709 if (limit_val != -1 && limit_val <= 1) {
2710 break;
2711 }
2712
2713 #ifdef HAVE_PCRE_JIT_SUPPORT
2714 if (pce->preg_options & PREG_JIT) {
2715 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2716 PCRE2_NO_UTF_CHECK, match_data, mctx);
2717 } else
2718 #endif
2719 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2720 PCRE2_NO_UTF_CHECK, match_data, mctx);
2721 }
2722 if (match_data != mdata) {
2723 pcre2_match_data_free(match_data);
2724 }
2725
2726 if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2727 zval_ptr_dtor(return_value);
2728 RETURN_FALSE;
2729 }
2730
2731 last:
2732 start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2733
2734 if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2735 if (offset_capture) {
2736 /* Add the last (match, offset) pair to the return value */
2737 add_offset_pair(return_value_ht, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2738 } else {
2739 /* Add the last piece to the return value */
2740 if (start_offset == 0) {
2741 ZVAL_STR_COPY(&tmp, subject_str);
2742 } else {
2743 populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2744 }
2745 zend_hash_next_index_insert_new(return_value_ht, &tmp);
2746 }
2747 }
2748 }
2749 /* }}} */
2750
2751 /* {{{ Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)2752 PHP_FUNCTION(preg_quote)
2753 {
2754 zend_string *str; /* Input string argument */
2755 zend_string *delim = NULL; /* Additional delimiter argument */
2756 char *in_str; /* Input string */
2757 char *in_str_end; /* End of the input string */
2758 zend_string *out_str; /* Output string with quoted characters */
2759 size_t extra_len; /* Number of additional characters */
2760 char *p, /* Iterator for input string */
2761 *q, /* Iterator for output string */
2762 delim_char = '\0', /* Delimiter character to be quoted */
2763 c; /* Current character */
2764
2765 /* Get the arguments and check for errors */
2766 ZEND_PARSE_PARAMETERS_START(1, 2)
2767 Z_PARAM_STR(str)
2768 Z_PARAM_OPTIONAL
2769 Z_PARAM_STR_OR_NULL(delim)
2770 ZEND_PARSE_PARAMETERS_END();
2771
2772 /* Nothing to do if we got an empty string */
2773 if (ZSTR_LEN(str) == 0) {
2774 RETURN_EMPTY_STRING();
2775 }
2776
2777 in_str = ZSTR_VAL(str);
2778 in_str_end = in_str + ZSTR_LEN(str);
2779
2780 if (delim) {
2781 delim_char = ZSTR_VAL(delim)[0];
2782 }
2783
2784 /* Go through the string and quote necessary characters */
2785 extra_len = 0;
2786 p = in_str;
2787 do {
2788 c = *p;
2789 switch(c) {
2790 case '.':
2791 case '\\':
2792 case '+':
2793 case '*':
2794 case '?':
2795 case '[':
2796 case '^':
2797 case ']':
2798 case '$':
2799 case '(':
2800 case ')':
2801 case '{':
2802 case '}':
2803 case '=':
2804 case '!':
2805 case '>':
2806 case '<':
2807 case '|':
2808 case ':':
2809 case '-':
2810 case '#':
2811 extra_len++;
2812 break;
2813
2814 case '\0':
2815 extra_len+=3;
2816 break;
2817
2818 default:
2819 if (c == delim_char) {
2820 extra_len++;
2821 }
2822 break;
2823 }
2824 p++;
2825 } while (p != in_str_end);
2826
2827 if (extra_len == 0) {
2828 RETURN_STR_COPY(str);
2829 }
2830
2831 /* Allocate enough memory so that even if each character
2832 is quoted, we won't run out of room */
2833 out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2834 q = ZSTR_VAL(out_str);
2835 p = in_str;
2836
2837 do {
2838 c = *p;
2839 switch(c) {
2840 case '.':
2841 case '\\':
2842 case '+':
2843 case '*':
2844 case '?':
2845 case '[':
2846 case '^':
2847 case ']':
2848 case '$':
2849 case '(':
2850 case ')':
2851 case '{':
2852 case '}':
2853 case '=':
2854 case '!':
2855 case '>':
2856 case '<':
2857 case '|':
2858 case ':':
2859 case '-':
2860 case '#':
2861 *q++ = '\\';
2862 *q++ = c;
2863 break;
2864
2865 case '\0':
2866 *q++ = '\\';
2867 *q++ = '0';
2868 *q++ = '0';
2869 *q++ = '0';
2870 break;
2871
2872 default:
2873 if (c == delim_char) {
2874 *q++ = '\\';
2875 }
2876 *q++ = c;
2877 break;
2878 }
2879 p++;
2880 } while (p != in_str_end);
2881 *q = '\0';
2882
2883 RETURN_NEW_STR(out_str);
2884 }
2885 /* }}} */
2886
2887 /* {{{ Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)2888 PHP_FUNCTION(preg_grep)
2889 {
2890 zend_string *regex; /* Regular expression */
2891 zval *input; /* Input array */
2892 zend_long flags = 0; /* Match control flags */
2893 pcre_cache_entry *pce; /* Compiled regular expression */
2894
2895 /* Get arguments and do error checking */
2896 ZEND_PARSE_PARAMETERS_START(2, 3)
2897 Z_PARAM_STR(regex)
2898 Z_PARAM_ARRAY(input)
2899 Z_PARAM_OPTIONAL
2900 Z_PARAM_LONG(flags)
2901 ZEND_PARSE_PARAMETERS_END();
2902
2903 /* Compile regex or get it from cache. */
2904 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2905 RETURN_FALSE;
2906 }
2907
2908 pce->refcount++;
2909 php_pcre_grep_impl(pce, input, return_value, flags);
2910 pce->refcount--;
2911 }
2912 /* }}} */
2913
php_pcre_grep_impl(pcre_cache_entry * pce,zval * input,zval * return_value,zend_long flags)2914 PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2915 {
2916 zval *entry; /* An entry in the input array */
2917 uint32_t num_subpats; /* Number of captured subpatterns */
2918 int count; /* Count of matched subpatterns */
2919 uint32_t options; /* Execution options */
2920 zend_string *string_key;
2921 zend_ulong num_key;
2922 bool invert; /* Whether to return non-matching
2923 entries */
2924 pcre2_match_data *match_data;
2925 invert = flags & PREG_GREP_INVERT ? 1 : 0;
2926
2927 /* Calculate the size of the offsets array, and allocate memory for it. */
2928 num_subpats = pce->capture_count + 1;
2929
2930 /* Initialize return array */
2931 array_init(return_value);
2932 HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2933
2934 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2935
2936 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2937 match_data = mdata;
2938 } else {
2939 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2940 if (!match_data) {
2941 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2942 return;
2943 }
2944 }
2945
2946 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2947
2948 /* Go through the input array */
2949 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2950 zend_string *tmp_subject_str;
2951 zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2952
2953 /* Perform the match */
2954 #ifdef HAVE_PCRE_JIT_SUPPORT
2955 if ((pce->preg_options & PREG_JIT) && options) {
2956 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2957 PCRE2_NO_UTF_CHECK, match_data, mctx);
2958 } else
2959 #endif
2960 count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2961 options, match_data, mctx);
2962
2963 /* If the entry fits our requirements */
2964 if (count >= 0) {
2965 /* Check for too many substrings condition. */
2966 if (UNEXPECTED(count == 0)) {
2967 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2968 }
2969 if (!invert) {
2970 Z_TRY_ADDREF_P(entry);
2971
2972 /* Add to return array */
2973 if (string_key) {
2974 zend_hash_update(return_value_ht, string_key, entry);
2975 } else {
2976 zend_hash_index_update(return_value_ht, num_key, entry);
2977 }
2978 }
2979 } else if (count == PCRE2_ERROR_NOMATCH) {
2980 if (invert) {
2981 Z_TRY_ADDREF_P(entry);
2982
2983 /* Add to return array */
2984 if (string_key) {
2985 zend_hash_update(return_value_ht, string_key, entry);
2986 } else {
2987 zend_hash_index_update(return_value_ht, num_key, entry);
2988 }
2989 }
2990 } else {
2991 pcre_handle_exec_error(count);
2992 zend_tmp_string_release(tmp_subject_str);
2993 break;
2994 }
2995
2996 zend_tmp_string_release(tmp_subject_str);
2997 } ZEND_HASH_FOREACH_END();
2998 if (match_data != mdata) {
2999 pcre2_match_data_free(match_data);
3000 }
3001 }
3002 /* }}} */
3003
3004 /* {{{ Returns the error code of the last regexp execution. */
PHP_FUNCTION(preg_last_error)3005 PHP_FUNCTION(preg_last_error)
3006 {
3007 ZEND_PARSE_PARAMETERS_NONE();
3008
3009 RETURN_LONG(PCRE_G(error_code));
3010 }
3011 /* }}} */
3012
3013 /* {{{ Returns the error message of the last regexp execution. */
PHP_FUNCTION(preg_last_error_msg)3014 PHP_FUNCTION(preg_last_error_msg)
3015 {
3016 ZEND_PARSE_PARAMETERS_NONE();
3017
3018 RETURN_STRING(php_pcre_get_error_msg(PCRE_G(error_code)));
3019 }
3020 /* }}} */
3021
3022 /* {{{ module definition structures */
3023
3024 zend_module_entry pcre_module_entry = {
3025 STANDARD_MODULE_HEADER,
3026 "pcre",
3027 ext_functions,
3028 PHP_MINIT(pcre),
3029 PHP_MSHUTDOWN(pcre),
3030 PHP_RINIT(pcre),
3031 PHP_RSHUTDOWN(pcre),
3032 PHP_MINFO(pcre),
3033 PHP_PCRE_VERSION,
3034 PHP_MODULE_GLOBALS(pcre),
3035 PHP_GINIT(pcre),
3036 PHP_GSHUTDOWN(pcre),
3037 NULL,
3038 STANDARD_MODULE_PROPERTIES_EX
3039 };
3040
3041 #ifdef COMPILE_DL_PCRE
ZEND_GET_MODULE(pcre)3042 ZEND_GET_MODULE(pcre)
3043 #endif
3044
3045 /* }}} */
3046
3047 PHPAPI pcre2_match_context *php_pcre_mctx(void)
3048 {/*{{{*/
3049 return mctx;
3050 }/*}}}*/
3051
php_pcre_gctx(void)3052 PHPAPI pcre2_general_context *php_pcre_gctx(void)
3053 {/*{{{*/
3054 return gctx;
3055 }/*}}}*/
3056
php_pcre_cctx(void)3057 PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3058 {/*{{{*/
3059 return cctx;
3060 }/*}}}*/
3061
php_pcre_pce_incref(pcre_cache_entry * pce)3062 PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3063 {/*{{{*/
3064 assert(NULL != pce);
3065 pce->refcount++;
3066 }/*}}}*/
3067
php_pcre_pce_decref(pcre_cache_entry * pce)3068 PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3069 {/*{{{*/
3070 assert(NULL != pce);
3071 assert(0 != pce->refcount);
3072 pce->refcount--;
3073 }/*}}}*/
3074
php_pcre_pce_re(pcre_cache_entry * pce)3075 PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3076 {/*{{{*/
3077 assert(NULL != pce);
3078 return pce->re;
3079 }/*}}}*/
3080