1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Author: Andrei Zmievski <andrei@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #include "php.h"
18 #include "php_ini.h"
19 #include "php_globals.h"
20 #include "php_pcre.h"
21 #include "ext/standard/info.h"
22 #include "ext/standard/basic_functions.h"
23 #include "zend_smart_str.h"
24 #include "SAPI.h"
25
26 #include "ext/standard/php_string.h"
27
28 #define PREG_PATTERN_ORDER 1
29 #define PREG_SET_ORDER 2
30 #define PREG_OFFSET_CAPTURE (1<<8)
31 #define PREG_UNMATCHED_AS_NULL (1<<9)
32
33 #define PREG_SPLIT_NO_EMPTY (1<<0)
34 #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
35 #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
36
37 #define PREG_GREP_INVERT (1<<0)
38
39 #define PREG_JIT (1<<3)
40
41 #define PCRE_CACHE_SIZE 4096
42
43 #ifdef HAVE_PCRE_JIT_SUPPORT
44 #define PHP_PCRE_JIT_SUPPORT 1
45 #else
46 #define PHP_PCRE_JIT_SUPPORT 0
47 #endif
48
49 char *php_pcre_version;
50
51 #include "php_pcre_arginfo.h"
52
53 struct _pcre_cache_entry {
54 pcre2_code *re;
55 uint32_t preg_options;
56 uint32_t capture_count;
57 uint32_t name_count;
58 uint32_t compile_options;
59 uint32_t refcount;
60 };
61
62 PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
63
64 #ifdef HAVE_PCRE_JIT_SUPPORT
65 #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
66 #define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
67 ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
68 #endif
69 /* General context using (infallible) system allocator. */
70 ZEND_TLS pcre2_general_context *gctx = NULL;
71 /* These two are global per thread for now. Though it is possible to use these
72 per pattern. Either one can copy it and use in pce, or one does no global
73 contexts at all, but creates for every pce. */
74 ZEND_TLS pcre2_compile_context *cctx = NULL;
75 ZEND_TLS pcre2_match_context *mctx = NULL;
76 ZEND_TLS pcre2_match_data *mdata = NULL;
77 ZEND_TLS bool mdata_used = 0;
78 ZEND_TLS uint8_t pcre2_init_ok = 0;
79 #if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
80 static MUTEX_T pcre_mt = NULL;
81 #define php_pcre_mutex_alloc() \
82 if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
83 #define php_pcre_mutex_free() \
84 if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
85 #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
86 #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
87 #else
88 #define php_pcre_mutex_alloc()
89 #define php_pcre_mutex_free()
90 #define php_pcre_mutex_lock()
91 #define php_pcre_mutex_unlock()
92 #endif
93
94 ZEND_TLS HashTable char_tables;
95
php_pcre_free_char_table(zval * data)96 static void php_pcre_free_char_table(zval *data)
97 {/*{{{*/
98 void *ptr = Z_PTR_P(data);
99 pefree(ptr, 1);
100 }/*}}}*/
101
pcre_handle_exec_error(int pcre_code)102 static void pcre_handle_exec_error(int pcre_code) /* {{{ */
103 {
104 int preg_code = 0;
105
106 switch (pcre_code) {
107 case PCRE2_ERROR_MATCHLIMIT:
108 preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
109 break;
110
111 case PCRE2_ERROR_RECURSIONLIMIT:
112 preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
113 break;
114
115 case PCRE2_ERROR_BADUTFOFFSET:
116 preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
117 break;
118
119 #ifdef HAVE_PCRE_JIT_SUPPORT
120 case PCRE2_ERROR_JIT_STACKLIMIT:
121 preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
122 break;
123 #endif
124
125 default:
126 if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
127 preg_code = PHP_PCRE_BAD_UTF8_ERROR;
128 } else {
129 preg_code = PHP_PCRE_INTERNAL_ERROR;
130 }
131 break;
132 }
133
134 PCRE_G(error_code) = preg_code;
135 }
136 /* }}} */
137
php_pcre_get_error_msg(php_pcre_error_code error_code)138 static const char *php_pcre_get_error_msg(php_pcre_error_code error_code) /* {{{ */
139 {
140 switch (error_code) {
141 case PHP_PCRE_NO_ERROR:
142 return "No error";
143 case PHP_PCRE_INTERNAL_ERROR:
144 return "Internal error";
145 case PHP_PCRE_BAD_UTF8_ERROR:
146 return "Malformed UTF-8 characters, possibly incorrectly encoded";
147 case PHP_PCRE_BAD_UTF8_OFFSET_ERROR:
148 return "The offset did not correspond to the beginning of a valid UTF-8 code point";
149 case PHP_PCRE_BACKTRACK_LIMIT_ERROR:
150 return "Backtrack limit exhausted";
151 case PHP_PCRE_RECURSION_LIMIT_ERROR:
152 return "Recursion limit exhausted";
153
154 #ifdef HAVE_PCRE_JIT_SUPPORT
155 case PHP_PCRE_JIT_STACKLIMIT_ERROR:
156 return "JIT stack limit exhausted";
157 #endif
158
159 default:
160 return "Unknown error";
161 }
162 }
163 /* }}} */
164
php_free_pcre_cache(zval * data)165 static void php_free_pcre_cache(zval *data) /* {{{ */
166 {
167 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
168 if (!pce) return;
169 pcre2_code_free(pce->re);
170 free(pce);
171 }
172 /* }}} */
173
php_efree_pcre_cache(zval * data)174 static void php_efree_pcre_cache(zval *data) /* {{{ */
175 {
176 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
177 if (!pce) return;
178 pcre2_code_free(pce->re);
179 efree(pce);
180 }
181 /* }}} */
182
php_pcre_malloc(PCRE2_SIZE size,void * data)183 static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
184 {
185 return pemalloc(size, 1);
186 }
187
php_pcre_free(void * block,void * data)188 static void php_pcre_free(void *block, void *data)
189 {
190 pefree(block, 1);
191 }
192
php_pcre_emalloc(PCRE2_SIZE size,void * data)193 static void *php_pcre_emalloc(PCRE2_SIZE size, void *data)
194 {
195 return emalloc(size);
196 }
197
php_pcre_efree(void * block,void * data)198 static void php_pcre_efree(void *block, void *data)
199 {
200 efree(block);
201 }
202
203 #ifdef PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
204 /* pcre 10.38 needs PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, disabled by default */
205 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
206 #else
207 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS 0
208 #endif
209
210 #define PHP_PCRE_PREALLOC_MDATA_SIZE 32
211
php_pcre_init_pcre2(uint8_t jit)212 static void php_pcre_init_pcre2(uint8_t jit)
213 {/*{{{*/
214 if (!gctx) {
215 gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
216 if (!gctx) {
217 pcre2_init_ok = 0;
218 return;
219 }
220 }
221
222 if (!cctx) {
223 cctx = pcre2_compile_context_create(gctx);
224 if (!cctx) {
225 pcre2_init_ok = 0;
226 return;
227 }
228 }
229
230 pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
231
232 if (!mctx) {
233 mctx = pcre2_match_context_create(gctx);
234 if (!mctx) {
235 pcre2_init_ok = 0;
236 return;
237 }
238 }
239
240 #ifdef HAVE_PCRE_JIT_SUPPORT
241 if (jit && !jit_stack) {
242 jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
243 if (!jit_stack) {
244 pcre2_init_ok = 0;
245 return;
246 }
247 }
248 #endif
249
250 if (!mdata) {
251 mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
252 if (!mdata) {
253 pcre2_init_ok = 0;
254 return;
255 }
256 }
257
258 pcre2_init_ok = 1;
259 }/*}}}*/
260
php_pcre_shutdown_pcre2(void)261 static void php_pcre_shutdown_pcre2(void)
262 {/*{{{*/
263 if (gctx) {
264 pcre2_general_context_free(gctx);
265 gctx = NULL;
266 }
267
268 if (cctx) {
269 pcre2_compile_context_free(cctx);
270 cctx = NULL;
271 }
272
273 if (mctx) {
274 pcre2_match_context_free(mctx);
275 mctx = NULL;
276 }
277
278 #ifdef HAVE_PCRE_JIT_SUPPORT
279 /* Stack may only be destroyed when no cached patterns
280 possibly associated with it do exist. */
281 if (jit_stack) {
282 pcre2_jit_stack_free(jit_stack);
283 jit_stack = NULL;
284 }
285 #endif
286
287 if (mdata) {
288 pcre2_match_data_free(mdata);
289 mdata = NULL;
290 }
291
292 pcre2_init_ok = 0;
293 }/*}}}*/
294
PHP_GINIT_FUNCTION(pcre)295 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
296 {
297 php_pcre_mutex_alloc();
298
299 /* If we're on the CLI SAPI, there will only be one request, so we don't need the
300 * cache to survive after RSHUTDOWN. */
301 pcre_globals->per_request_cache = strcmp(sapi_module.name, "cli") == 0;
302 if (!pcre_globals->per_request_cache) {
303 zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
304 }
305
306 pcre_globals->backtrack_limit = 0;
307 pcre_globals->recursion_limit = 0;
308 pcre_globals->error_code = PHP_PCRE_NO_ERROR;
309 ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
310 ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
311 #ifdef HAVE_PCRE_JIT_SUPPORT
312 pcre_globals->jit = 1;
313 #endif
314
315 php_pcre_init_pcre2(1);
316 zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
317 }
318 /* }}} */
319
PHP_GSHUTDOWN_FUNCTION(pcre)320 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
321 {
322 if (!pcre_globals->per_request_cache) {
323 zend_hash_destroy(&pcre_globals->pcre_cache);
324 }
325
326 php_pcre_shutdown_pcre2();
327 zend_hash_destroy(&char_tables);
328 php_pcre_mutex_free();
329 }
330 /* }}} */
331
PHP_INI_MH(OnUpdateBacktrackLimit)332 static PHP_INI_MH(OnUpdateBacktrackLimit)
333 {/*{{{*/
334 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
335 if (mctx) {
336 pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
337 }
338
339 return SUCCESS;
340 }/*}}}*/
341
PHP_INI_MH(OnUpdateRecursionLimit)342 static PHP_INI_MH(OnUpdateRecursionLimit)
343 {/*{{{*/
344 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
345 if (mctx) {
346 pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
347 }
348
349 return SUCCESS;
350 }/*}}}*/
351
352 #ifdef HAVE_PCRE_JIT_SUPPORT
PHP_INI_MH(OnUpdateJit)353 static PHP_INI_MH(OnUpdateJit)
354 {/*{{{*/
355 OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
356 if (PCRE_G(jit) && jit_stack) {
357 pcre2_jit_stack_assign(mctx, NULL, jit_stack);
358 } else {
359 pcre2_jit_stack_assign(mctx, NULL, NULL);
360 }
361
362 return SUCCESS;
363 }/*}}}*/
364 #endif
365
366 PHP_INI_BEGIN()
367 STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
368 STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
369 #ifdef HAVE_PCRE_JIT_SUPPORT
370 STD_PHP_INI_BOOLEAN("pcre.jit", "1", PHP_INI_ALL, OnUpdateJit, jit, zend_pcre_globals, pcre_globals)
371 #endif
PHP_INI_END()372 PHP_INI_END()
373
374 static char *_pcre2_config_str(uint32_t what)
375 {/*{{{*/
376 int len = pcre2_config(what, NULL);
377 char *ret = (char *) malloc(len + 1);
378
379 len = pcre2_config(what, ret);
380 if (!len) {
381 free(ret);
382 return NULL;
383 }
384
385 return ret;
386 }/*}}}*/
387
388 /* {{{ PHP_MINFO_FUNCTION(pcre) */
PHP_MINFO_FUNCTION(pcre)389 static PHP_MINFO_FUNCTION(pcre)
390 {
391 #ifdef HAVE_PCRE_JIT_SUPPORT
392 uint32_t flag = 0;
393 char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
394 #endif
395 char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
396 char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
397
398 php_info_print_table_start();
399 php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
400 php_info_print_table_row(2, "PCRE Library Version", version);
401 free(version);
402 php_info_print_table_row(2, "PCRE Unicode Version", unicode);
403 free(unicode);
404
405 #ifdef HAVE_PCRE_JIT_SUPPORT
406 if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
407 php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
408 } else {
409 php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
410 }
411 if (jit_target) {
412 php_info_print_table_row(2, "PCRE JIT Target", jit_target);
413 }
414 free(jit_target);
415 #else
416 php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
417 #endif
418
419 #ifdef HAVE_PCRE_VALGRIND_SUPPORT
420 php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
421 #endif
422
423 php_info_print_table_end();
424
425 DISPLAY_INI_ENTRIES();
426 }
427 /* }}} */
428
429 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTION(pcre)430 static PHP_MINIT_FUNCTION(pcre)
431 {
432 #ifdef HAVE_PCRE_JIT_SUPPORT
433 if (UNEXPECTED(!pcre2_init_ok)) {
434 /* Retry. */
435 php_pcre_init_pcre2(PCRE_G(jit));
436 if (!pcre2_init_ok) {
437 return FAILURE;
438 }
439 }
440 #endif
441
442 REGISTER_INI_ENTRIES();
443
444 php_pcre_version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
445
446 register_php_pcre_symbols(module_number);
447
448 return SUCCESS;
449 }
450 /* }}} */
451
452 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTION(pcre)453 static PHP_MSHUTDOWN_FUNCTION(pcre)
454 {
455 UNREGISTER_INI_ENTRIES();
456
457 free(php_pcre_version);
458
459 return SUCCESS;
460 }
461 /* }}} */
462
463 /* {{{ PHP_RINIT_FUNCTION(pcre) */
PHP_RINIT_FUNCTION(pcre)464 static PHP_RINIT_FUNCTION(pcre)
465 {
466 #ifdef HAVE_PCRE_JIT_SUPPORT
467 if (UNEXPECTED(!pcre2_init_ok)) {
468 /* Retry. */
469 php_pcre_mutex_lock();
470 php_pcre_init_pcre2(PCRE_G(jit));
471 if (!pcre2_init_ok) {
472 php_pcre_mutex_unlock();
473 return FAILURE;
474 }
475 php_pcre_mutex_unlock();
476 }
477
478 mdata_used = 0;
479 #endif
480
481 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
482 PCRE_G(gctx_zmm) = pcre2_general_context_create(php_pcre_emalloc, php_pcre_efree, NULL);
483 if (!PCRE_G(gctx_zmm)) {
484 return FAILURE;
485 }
486
487 if (PCRE_G(per_request_cache)) {
488 zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, php_efree_pcre_cache, 0);
489 }
490
491 return SUCCESS;
492 }
493 /* }}} */
494
PHP_RSHUTDOWN_FUNCTION(pcre)495 static PHP_RSHUTDOWN_FUNCTION(pcre)
496 {
497 pcre2_general_context_free(PCRE_G(gctx_zmm));
498 PCRE_G(gctx_zmm) = NULL;
499
500 if (PCRE_G(per_request_cache)) {
501 zend_hash_destroy(&PCRE_G(pcre_cache));
502 }
503
504 zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
505 zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
506 ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
507 ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
508 return SUCCESS;
509 }
510
511 /* {{{ static pcre_clean_cache */
pcre_clean_cache(zval * data,void * arg)512 static int pcre_clean_cache(zval *data, void *arg)
513 {
514 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
515 int *num_clean = (int *)arg;
516
517 if (*num_clean > 0 && !pce->refcount) {
518 (*num_clean)--;
519 return ZEND_HASH_APPLY_REMOVE;
520 } else {
521 return ZEND_HASH_APPLY_KEEP;
522 }
523 }
524 /* }}} */
525
free_subpats_table(zend_string ** subpat_names,uint32_t num_subpats)526 static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
527 uint32_t i;
528 for (i = 0; i < num_subpats; i++) {
529 if (subpat_names[i]) {
530 zend_string_release_ex(subpat_names[i], false);
531 }
532 }
533 efree(subpat_names);
534 }
535
536 /* {{{ static make_subpats_table */
make_subpats_table(uint32_t num_subpats,pcre_cache_entry * pce)537 static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
538 {
539 uint32_t name_cnt = pce->name_count, name_size, ni = 0;
540 char *name_table;
541 zend_string **subpat_names;
542 int rc1, rc2;
543
544 rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
545 rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
546 if (rc1 < 0 || rc2 < 0) {
547 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
548 return NULL;
549 }
550
551 subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
552 while (ni++ < name_cnt) {
553 unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
554 const char *name = name_table + 2;
555 subpat_names[name_idx] = zend_string_init(name, strlen(name), 0);
556 if (is_numeric_string(ZSTR_VAL(subpat_names[name_idx]), ZSTR_LEN(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
557 php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
558 free_subpats_table(subpat_names, num_subpats);
559 return NULL;
560 }
561 name_table += name_size;
562 }
563 return subpat_names;
564 }
565 /* }}} */
566
567 /* {{{ static calculate_unit_length */
568 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
calculate_unit_length(pcre_cache_entry * pce,const char * start)569 static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
570 {
571 size_t unit_len;
572
573 if (pce->compile_options & PCRE2_UTF) {
574 const char *end = start;
575
576 /* skip continuation bytes */
577 while ((*++end & 0xC0) == 0x80);
578 unit_len = end - start;
579 } else {
580 unit_len = 1;
581 }
582 return unit_len;
583 }
584 /* }}} */
585
586 /* {{{ pcre_get_compiled_regex_cache */
pcre_get_compiled_regex_cache_ex(zend_string * regex,bool locale_aware)587 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bool locale_aware)
588 {
589 pcre2_code *re = NULL;
590 #if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR && !HAVE_BUNDLED_PCRE
591 uint32_t coptions = PCRE2_NO_START_OPTIMIZE;
592 #else
593 uint32_t coptions = 0;
594 #endif
595 uint32_t eoptions = PHP_PCRE_DEFAULT_EXTRA_COPTIONS;
596 PCRE2_UCHAR error[128];
597 PCRE2_SIZE erroffset;
598 int errnumber;
599 char delimiter;
600 char start_delimiter;
601 char end_delimiter;
602 char *p, *pp;
603 char *pattern;
604 size_t pattern_len;
605 uint32_t poptions = 0;
606 const uint8_t *tables = NULL;
607 zval *zv;
608 pcre_cache_entry new_entry;
609 int rc;
610 zend_string *key;
611 pcre_cache_entry *ret;
612
613 if (locale_aware && BG(ctype_string)) {
614 key = zend_string_concat2(
615 ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)),
616 ZSTR_VAL(regex), ZSTR_LEN(regex));
617 } else {
618 key = regex;
619 }
620
621 /* Try to lookup the cached regex entry, and if successful, just pass
622 back the compiled pattern, otherwise go on and compile it. */
623 zv = zend_hash_find(&PCRE_G(pcre_cache), key);
624 if (zv) {
625 if (key != regex) {
626 zend_string_release_ex(key, 0);
627 }
628 return (pcre_cache_entry*)Z_PTR_P(zv);
629 }
630
631 p = ZSTR_VAL(regex);
632 const char* end_p = ZSTR_VAL(regex) + ZSTR_LEN(regex);
633
634 /* Parse through the leading whitespace, and display a warning if we
635 get to the end without encountering a delimiter. */
636 while (isspace((int)*(unsigned char *)p)) p++;
637 if (p >= end_p) {
638 if (key != regex) {
639 zend_string_release_ex(key, 0);
640 }
641 php_error_docref(NULL, E_WARNING, "Empty regular expression");
642 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
643 return NULL;
644 }
645
646 /* Get the delimiter and display a warning if it is alphanumeric
647 or a backslash. */
648 delimiter = *p++;
649 if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\' || delimiter == '\0') {
650 if (key != regex) {
651 zend_string_release_ex(key, 0);
652 }
653 php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric, backslash, or NUL byte");
654 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
655 return NULL;
656 }
657
658 start_delimiter = delimiter;
659 if ((pp = strchr("([{< )]}> )]}>", delimiter)))
660 delimiter = pp[5];
661 end_delimiter = delimiter;
662
663 pp = p;
664
665 if (start_delimiter == end_delimiter) {
666 /* We need to iterate through the pattern, searching for the ending delimiter,
667 but skipping the backslashed delimiters. If the ending delimiter is not
668 found, display a warning. */
669 while (pp < end_p) {
670 if (*pp == '\\' && pp + 1 < end_p) pp++;
671 else if (*pp == delimiter)
672 break;
673 pp++;
674 }
675 } else {
676 /* We iterate through the pattern, searching for the matching ending
677 * delimiter. For each matching starting delimiter, we increment nesting
678 * level, and decrement it for each matching ending delimiter. If we
679 * reach the end of the pattern without matching, display a warning.
680 */
681 int brackets = 1; /* brackets nesting level */
682 while (pp < end_p) {
683 if (*pp == '\\' && pp + 1 < end_p) pp++;
684 else if (*pp == end_delimiter && --brackets <= 0)
685 break;
686 else if (*pp == start_delimiter)
687 brackets++;
688 pp++;
689 }
690 }
691
692 if (pp >= end_p) {
693 if (key != regex) {
694 zend_string_release_ex(key, 0);
695 }
696 if (start_delimiter == end_delimiter) {
697 php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
698 } else {
699 php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
700 }
701 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
702 return NULL;
703 }
704
705 /* Make a copy of the actual pattern. */
706 pattern_len = pp - p;
707 pattern = estrndup(p, pattern_len);
708
709 /* Move on to the options */
710 pp++;
711
712 /* Parse through the options, setting appropriate flags. Display
713 a warning if we encounter an unknown modifier. */
714 while (pp < end_p) {
715 switch (*pp++) {
716 /* Perl compatible options */
717 case 'i': coptions |= PCRE2_CASELESS; break;
718 case 'm': coptions |= PCRE2_MULTILINE; break;
719 case 'n': coptions |= PCRE2_NO_AUTO_CAPTURE; break;
720 case 's': coptions |= PCRE2_DOTALL; break;
721 case 'x': coptions |= PCRE2_EXTENDED; break;
722
723 /* PCRE specific options */
724 case 'A': coptions |= PCRE2_ANCHORED; break;
725 case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
726 #ifdef PCRE2_EXTRA_CASELESS_RESTRICT
727 case 'r': eoptions |= PCRE2_EXTRA_CASELESS_RESTRICT; break;
728 #endif
729 case 'S': /* Pass. */ break;
730 case 'X': /* Pass. */ break;
731 case 'U': coptions |= PCRE2_UNGREEDY; break;
732 case 'u': coptions |= PCRE2_UTF;
733 /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
734 characters, even in UTF-8 mode. However, this can be changed by setting
735 the PCRE2_UCP option. */
736 #ifdef PCRE2_UCP
737 coptions |= PCRE2_UCP;
738 #endif
739 break;
740 case 'J': coptions |= PCRE2_DUPNAMES; break;
741
742 case ' ':
743 case '\n':
744 case '\r':
745 break;
746
747 case 'e': /* legacy eval */
748 default:
749 if (pp[-1]) {
750 php_error_docref(NULL, E_WARNING, "Unknown modifier '%c'", pp[-1]);
751 } else {
752 php_error_docref(NULL, E_WARNING, "NUL byte is not a valid modifier");
753 }
754 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
755 efree(pattern);
756 if (key != regex) {
757 zend_string_release_ex(key, 0);
758 }
759 return NULL;
760 }
761 }
762
763 if (key != regex) {
764 tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(ctype_string));
765 if (!tables) {
766 zend_string *_k;
767 tables = pcre2_maketables(gctx);
768 if (UNEXPECTED(!tables)) {
769 php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
770 pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
771 zend_string_release_ex(key, 0);
772 efree(pattern);
773 return NULL;
774 }
775 _k = zend_string_init(ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)), 1);
776 GC_MAKE_PERSISTENT_LOCAL(_k);
777 zend_hash_add_ptr(&char_tables, _k, (void *)tables);
778 zend_string_release(_k);
779 }
780 }
781 pcre2_set_character_tables(cctx, tables);
782
783 pcre2_set_compile_extra_options(cctx, eoptions);
784
785 /* Compile pattern and display a warning if compilation failed. */
786 re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
787
788 if (re == NULL) {
789 if (key != regex) {
790 zend_string_release_ex(key, 0);
791 }
792 pcre2_get_error_message(errnumber, error, sizeof(error));
793 php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
794 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
795 efree(pattern);
796 return NULL;
797 }
798
799 #ifdef HAVE_PCRE_JIT_SUPPORT
800 if (PCRE_G(jit)) {
801 /* Enable PCRE JIT compiler */
802 rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
803 if (EXPECTED(rc >= 0)) {
804 size_t jit_size = 0;
805 if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
806 poptions |= PREG_JIT;
807 }
808 } else if (rc == PCRE2_ERROR_NOMEMORY) {
809 php_error_docref(NULL, E_WARNING,
810 "Allocation of JIT memory failed, PCRE JIT will be disabled. "
811 "This is likely caused by security restrictions. "
812 "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
813 PCRE_G(jit) = 0;
814 } else {
815 pcre2_get_error_message(rc, error, sizeof(error));
816 php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
817 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
818 }
819 }
820 #endif
821 efree(pattern);
822
823 /*
824 * If we reached cache limit, clean out the items from the head of the list;
825 * these are supposedly the oldest ones (but not necessarily the least used
826 * ones).
827 */
828 if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
829 int num_clean = PCRE_CACHE_SIZE / 8;
830 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
831 }
832
833 /* Store the compiled pattern and extra info in the cache. */
834 new_entry.re = re;
835 new_entry.preg_options = poptions;
836 new_entry.compile_options = coptions;
837 new_entry.refcount = 0;
838
839 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
840 if (rc < 0) {
841 if (key != regex) {
842 zend_string_release_ex(key, 0);
843 }
844 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
845 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
846 return NULL;
847 }
848
849 rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
850 if (rc < 0) {
851 if (key != regex) {
852 zend_string_release_ex(key, 0);
853 }
854 php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
855 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
856 return NULL;
857 }
858
859 /*
860 * Interned strings are not duplicated when stored in HashTable,
861 * but all the interned strings created during HTTP request are removed
862 * at end of request. However PCRE_G(pcre_cache) must be consistent
863 * on the next request as well. So we disable usage of interned strings
864 * as hash keys especually for this table.
865 * See bug #63180
866 */
867 if (!(GC_FLAGS(key) & IS_STR_PERMANENT) && !PCRE_G(per_request_cache)) {
868 zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
869 GC_MAKE_PERSISTENT_LOCAL(str);
870
871 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
872 zend_string_release(str);
873 } else {
874 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
875 }
876
877 if (key != regex) {
878 zend_string_release_ex(key, 0);
879 }
880
881 return ret;
882 }
883 /* }}} */
884
885 /* {{{ pcre_get_compiled_regex_cache */
pcre_get_compiled_regex_cache(zend_string * regex)886 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
887 {
888 return pcre_get_compiled_regex_cache_ex(regex, true);
889 }
890 /* }}} */
891
892 /* {{{ pcre_get_compiled_regex */
pcre_get_compiled_regex(zend_string * regex,uint32_t * capture_count)893 PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
894 {
895 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
896
897 if (capture_count) {
898 *capture_count = pce ? pce->capture_count : 0;
899 }
900
901 return pce ? pce->re : NULL;
902 }
903 /* }}} */
904
905 /* XXX For the cases where it's only about match yes/no and no capture
906 required, perhaps just a minimum sized data would suffice. */
php_pcre_create_match_data(uint32_t capture_count,pcre2_code * re)907 PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
908 {/*{{{*/
909
910 assert(NULL != re);
911
912 if (EXPECTED(!mdata_used)) {
913 int rc = 0;
914
915 if (!capture_count) {
916 /* As we deal with a non cached pattern, no other way to gather this info. */
917 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
918 }
919
920 if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
921 mdata_used = 1;
922 return mdata;
923 }
924 }
925
926 return pcre2_match_data_create_from_pattern(re, gctx);
927 }/*}}}*/
928
php_pcre_free_match_data(pcre2_match_data * match_data)929 PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
930 {/*{{{*/
931 if (UNEXPECTED(match_data != mdata)) {
932 pcre2_match_data_free(match_data);
933 } else {
934 mdata_used = 0;
935 }
936 }/*}}}*/
937
init_unmatched_null_pair(void)938 static void init_unmatched_null_pair(void) {
939 zval val1, val2;
940 ZVAL_NULL(&val1);
941 ZVAL_LONG(&val2, -1);
942 ZVAL_ARR(&PCRE_G(unmatched_null_pair), zend_new_pair(&val1, &val2));
943 }
944
init_unmatched_empty_pair(void)945 static void init_unmatched_empty_pair(void) {
946 zval val1, val2;
947 ZVAL_EMPTY_STRING(&val1);
948 ZVAL_LONG(&val2, -1);
949 ZVAL_ARR(&PCRE_G(unmatched_empty_pair), zend_new_pair(&val1, &val2));
950 }
951
populate_match_value_str(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset)952 static zend_always_inline void populate_match_value_str(
953 zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
954 ZVAL_STRINGL_FAST(val, subject + start_offset, end_offset - start_offset);
955 }
956
populate_match_value(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,bool unmatched_as_null)957 static zend_always_inline void populate_match_value(
958 zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
959 bool unmatched_as_null) {
960 if (PCRE2_UNSET == start_offset) {
961 if (unmatched_as_null) {
962 ZVAL_NULL(val);
963 } else {
964 ZVAL_EMPTY_STRING(val);
965 }
966 } else {
967 populate_match_value_str(val, subject, start_offset, end_offset);
968 }
969 }
970
add_named(HashTable * const subpats,zend_string * name,zval * val,bool unmatched)971 static inline void add_named(
972 HashTable *const subpats, zend_string *name, zval *val, bool unmatched) {
973 /* If the DUPNAMES option is used, multiple subpatterns might have the same name.
974 * In this case we want to preserve the one that actually has a value. */
975 if (!unmatched) {
976 zend_hash_update(subpats, name, val);
977 } else {
978 if (!zend_hash_add(subpats, name, val)) {
979 return;
980 }
981 }
982 Z_TRY_ADDREF_P(val);
983 }
984
985 /* {{{ add_offset_pair */
add_offset_pair(HashTable * const result,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,zend_string * name,zend_long unmatched_as_null)986 static inline void add_offset_pair(
987 HashTable *const result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
988 zend_string *name, zend_long unmatched_as_null)
989 {
990 zval match_pair;
991
992 /* Add (match, offset) to the return value */
993 if (PCRE2_UNSET == start_offset) {
994 if (unmatched_as_null) {
995 if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
996 init_unmatched_null_pair();
997 }
998 ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
999 } else {
1000 if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
1001 init_unmatched_empty_pair();
1002 }
1003 ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
1004 }
1005 } else {
1006 zval val1, val2;
1007 populate_match_value_str(&val1, subject, start_offset, end_offset);
1008 ZVAL_LONG(&val2, start_offset);
1009 ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1010 }
1011
1012 if (name) {
1013 add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1014 }
1015 zend_hash_next_index_insert_new(result, &match_pair);
1016 }
1017 /* }}} */
1018
populate_subpat_array(zval * subpats,const char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1019 static void populate_subpat_array(
1020 zval *subpats, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1021 uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1022 zend_long offset_capture = flags & PREG_OFFSET_CAPTURE;
1023 zend_long unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1024 zval val;
1025 int i;
1026 HashTable *subpats_ht = Z_ARRVAL_P(subpats);
1027 if (subpat_names) {
1028 if (offset_capture) {
1029 for (i = 0; i < count; i++) {
1030 add_offset_pair(
1031 subpats_ht, subject, offsets[2*i], offsets[2*i+1],
1032 subpat_names[i], unmatched_as_null);
1033 }
1034 if (unmatched_as_null) {
1035 for (i = count; i < num_subpats; i++) {
1036 add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1037 }
1038 }
1039 } else {
1040 for (i = 0; i < count; i++) {
1041 populate_match_value(
1042 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1043 if (subpat_names[i]) {
1044 add_named(subpats_ht, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1045 }
1046 zend_hash_next_index_insert_new(subpats_ht, &val);
1047 }
1048 if (unmatched_as_null) {
1049 for (i = count; i < num_subpats; i++) {
1050 ZVAL_NULL(&val);
1051 if (subpat_names[i]) {
1052 zend_hash_add(subpats_ht, subpat_names[i], &val);
1053 }
1054 zend_hash_next_index_insert_new(subpats_ht, &val);
1055 }
1056 }
1057 }
1058 } else {
1059 if (offset_capture) {
1060 for (i = 0; i < count; i++) {
1061 add_offset_pair(
1062 subpats_ht, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1063 }
1064 if (unmatched_as_null) {
1065 for (i = count; i < num_subpats; i++) {
1066 add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1067 }
1068 }
1069 } else {
1070 for (i = 0; i < count; i++) {
1071 populate_match_value(
1072 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1073 zend_hash_next_index_insert_new(subpats_ht, &val);
1074 }
1075 if (unmatched_as_null) {
1076 for (i = count; i < num_subpats; i++) {
1077 add_next_index_null(subpats);
1078 }
1079 }
1080 }
1081 }
1082 /* Add MARK, if available */
1083 if (mark) {
1084 add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
1085 }
1086 }
1087
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS,bool global)1088 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, bool global) /* {{{ */
1089 {
1090 /* parameters */
1091 zend_string *regex; /* Regular expression */
1092 zend_string *subject; /* String to match against */
1093 pcre_cache_entry *pce; /* Compiled regular expression */
1094 zval *subpats = NULL; /* Array for subpatterns */
1095 zend_long flags = 0; /* Match control flags */
1096 zend_long start_offset = 0; /* Where the new search starts */
1097
1098 ZEND_PARSE_PARAMETERS_START(2, 5)
1099 Z_PARAM_STR(regex)
1100 Z_PARAM_STR(subject)
1101 Z_PARAM_OPTIONAL
1102 Z_PARAM_ZVAL(subpats)
1103 Z_PARAM_LONG(flags)
1104 Z_PARAM_LONG(start_offset)
1105 ZEND_PARSE_PARAMETERS_END();
1106
1107 /* Compile regex or get it from cache. */
1108 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1109 RETURN_FALSE;
1110 }
1111
1112 pce->refcount++;
1113 php_pcre_match_impl(pce, subject, return_value, subpats,
1114 global, flags, start_offset);
1115 pce->refcount--;
1116 }
1117 /* }}} */
1118
is_known_valid_utf8(zend_string * subject_str,PCRE2_SIZE start_offset)1119 static zend_always_inline bool is_known_valid_utf8(
1120 zend_string *subject_str, PCRE2_SIZE start_offset) {
1121 if (!ZSTR_IS_VALID_UTF8(subject_str)) {
1122 /* We don't know whether the string is valid UTF-8 or not. */
1123 return 0;
1124 }
1125
1126 if (start_offset == ZSTR_LEN(subject_str)) {
1127 /* Degenerate case: Offset points to end of string. */
1128 return 1;
1129 }
1130
1131 /* Check that the offset does not point to an UTF-8 continuation byte. */
1132 return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1133 }
1134
1135 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zval * subpats,bool global,zend_long flags,zend_off_t start_offset)1136 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1137 zval *subpats, bool global, zend_long flags, zend_off_t start_offset)
1138 {
1139 zval result_set; /* Holds a set of subpatterns after
1140 a global match */
1141 HashTable **match_sets = NULL; /* An array of sets of matches for each
1142 subpattern after a global match */
1143 uint32_t options; /* Execution options */
1144 int count; /* Count of matched subpatterns */
1145 uint32_t num_subpats; /* Number of captured subpatterns */
1146 int matched; /* Has anything matched */
1147 zend_string **subpat_names; /* Array for named subpatterns */
1148 size_t i;
1149 uint32_t subpats_order; /* Order of subpattern matches */
1150 uint32_t offset_capture; /* Capture match offsets: yes/no */
1151 zend_long unmatched_as_null; /* Null non-matches: yes/no */
1152 PCRE2_SPTR mark = NULL; /* Target for MARK name */
1153 HashTable *marks = NULL; /* Array of marks for PREG_PATTERN_ORDER */
1154 pcre2_match_data *match_data;
1155 PCRE2_SIZE start_offset2, orig_start_offset;
1156
1157 char *subject = ZSTR_VAL(subject_str);
1158 size_t subject_len = ZSTR_LEN(subject_str);
1159
1160 /* Overwrite the passed-in value for subpatterns with an empty array. */
1161 if (subpats != NULL) {
1162 subpats = zend_try_array_init(subpats);
1163 if (!subpats) {
1164 RETURN_THROWS();
1165 }
1166 }
1167
1168 subpats_order = global ? PREG_PATTERN_ORDER : 0;
1169
1170 if (flags) {
1171 offset_capture = flags & PREG_OFFSET_CAPTURE;
1172 unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1173
1174 /*
1175 * subpats_order is pre-set to pattern mode so we change it only if
1176 * necessary.
1177 */
1178 if (flags & 0xff) {
1179 subpats_order = flags & 0xff;
1180 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1181 (!global && subpats_order != 0)) {
1182 zend_argument_value_error(4, "must be a PREG_* constant");
1183 RETURN_THROWS();
1184 }
1185 }
1186 } else {
1187 offset_capture = 0;
1188 unmatched_as_null = 0;
1189 }
1190
1191 /* Negative offset counts from the end of the string. */
1192 if (start_offset < 0) {
1193 if ((PCRE2_SIZE)-start_offset <= subject_len) {
1194 start_offset2 = subject_len + start_offset;
1195 } else {
1196 start_offset2 = 0;
1197 }
1198 } else {
1199 start_offset2 = (PCRE2_SIZE)start_offset;
1200 }
1201
1202 if (start_offset2 > subject_len) {
1203 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1204 RETURN_FALSE;
1205 }
1206
1207 /* Calculate the size of the offsets array, and allocate memory for it. */
1208 num_subpats = pce->capture_count + 1;
1209
1210 /*
1211 * Build a mapping from subpattern numbers to their names. We will
1212 * allocate the table only if there are any named subpatterns.
1213 */
1214 subpat_names = NULL;
1215 if (subpats && pce->name_count > 0) {
1216 subpat_names = make_subpats_table(num_subpats, pce);
1217 if (!subpat_names) {
1218 RETURN_FALSE;
1219 }
1220 }
1221
1222 matched = 0;
1223 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1224
1225 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1226 match_data = mdata;
1227 } else {
1228 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1229 if (!match_data) {
1230 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1231 if (subpat_names) {
1232 free_subpats_table(subpat_names, num_subpats);
1233 }
1234 RETURN_FALSE;
1235 }
1236 }
1237
1238 /* Allocate match sets array and initialize the values. */
1239 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1240 match_sets = safe_emalloc(num_subpats, sizeof(HashTable *), 0);
1241 for (i=0; i<num_subpats; i++) {
1242 match_sets[i] = zend_new_array(0);
1243 }
1244 }
1245
1246 /* Array of subpattern offsets */
1247 PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1248
1249 orig_start_offset = start_offset2;
1250 options =
1251 (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1252 ? 0 : PCRE2_NO_UTF_CHECK;
1253
1254 /* Execute the regular expression. */
1255 #ifdef HAVE_PCRE_JIT_SUPPORT
1256 if ((pce->preg_options & PREG_JIT) && options) {
1257 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1258 PCRE2_NO_UTF_CHECK, match_data, mctx);
1259 } else
1260 #endif
1261 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1262 options, match_data, mctx);
1263
1264 while (1) {
1265 /* If something has matched */
1266 if (count >= 0) {
1267 /* Check for too many substrings condition. */
1268 if (UNEXPECTED(count == 0)) {
1269 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1270 count = num_subpats;
1271 }
1272
1273 matched:
1274 matched++;
1275
1276 /* If subpatterns array has been passed, fill it in with values. */
1277 if (subpats != NULL) {
1278 /* Try to get the list of substrings and display a warning if failed. */
1279 if (UNEXPECTED(offsets[1] < offsets[0])) {
1280 if (subpat_names) {
1281 free_subpats_table(subpat_names, num_subpats);
1282 }
1283 if (match_sets) efree(match_sets);
1284 php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1285 RETURN_FALSE;
1286 }
1287
1288 if (global) { /* global pattern matching */
1289 if (subpats_order == PREG_PATTERN_ORDER) {
1290 /* For each subpattern, insert it into the appropriate array. */
1291 if (offset_capture) {
1292 for (i = 0; i < count; i++) {
1293 add_offset_pair(
1294 match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1295 NULL, unmatched_as_null);
1296 }
1297 } else {
1298 for (i = 0; i < count; i++) {
1299 zval val;
1300 populate_match_value(
1301 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1302 zend_hash_next_index_insert_new(match_sets[i], &val);
1303 }
1304 }
1305 mark = pcre2_get_mark(match_data);
1306 /* Add MARK, if available */
1307 if (mark) {
1308 if (!marks) {
1309 marks = zend_new_array(0);
1310 }
1311 zval tmp;
1312 ZVAL_STRING(&tmp, (char *) mark);
1313 zend_hash_index_add_new(marks, matched - 1, &tmp);
1314 }
1315 /*
1316 * If the number of captured subpatterns on this run is
1317 * less than the total possible number, pad the result
1318 * arrays with NULLs or empty strings.
1319 */
1320 if (count < num_subpats) {
1321 for (int i = count; i < num_subpats; i++) {
1322 if (offset_capture) {
1323 add_offset_pair(
1324 match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1325 NULL, unmatched_as_null);
1326 } else if (unmatched_as_null) {
1327 zval tmp;
1328 ZVAL_NULL(&tmp);
1329 zend_hash_next_index_insert_new(match_sets[i], &tmp);
1330 } else {
1331 zval tmp;
1332 ZVAL_EMPTY_STRING(&tmp);
1333 zend_hash_next_index_insert_new(match_sets[i], &tmp);
1334 }
1335 }
1336 }
1337 } else {
1338 /* Allocate and populate the result set array */
1339 mark = pcre2_get_mark(match_data);
1340 array_init_size(&result_set, count + (mark ? 1 : 0));
1341 populate_subpat_array(
1342 &result_set, subject, offsets, subpat_names,
1343 num_subpats, count, mark, flags);
1344 /* And add it to the output array */
1345 zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &result_set);
1346 }
1347 } else { /* single pattern matching */
1348 /* For each subpattern, insert it into the subpatterns array. */
1349 mark = pcre2_get_mark(match_data);
1350 populate_subpat_array(
1351 subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1352 break;
1353 }
1354 }
1355
1356 /* Advance to the next piece. */
1357 start_offset2 = offsets[1];
1358
1359 /* If we have matched an empty string, mimic what Perl's /g options does.
1360 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1361 the match again at the same point. If this fails (picked up above) we
1362 advance to the next character. */
1363 if (start_offset2 == offsets[0]) {
1364 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1365 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1366 if (count >= 0) {
1367 if (global) {
1368 goto matched;
1369 } else {
1370 break;
1371 }
1372 } else if (count == PCRE2_ERROR_NOMATCH) {
1373 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1374 this is not necessarily the end. We need to advance
1375 the start offset, and continue. Fudge the offset values
1376 to achieve this, unless we're already at the end of the string. */
1377 if (start_offset2 < subject_len) {
1378 size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1379
1380 start_offset2 += unit_len;
1381 } else {
1382 break;
1383 }
1384 } else {
1385 goto error;
1386 }
1387 }
1388 } else if (count == PCRE2_ERROR_NOMATCH) {
1389 break;
1390 } else {
1391 error:
1392 pcre_handle_exec_error(count);
1393 break;
1394 }
1395
1396 if (!global) {
1397 break;
1398 }
1399
1400 /* Execute the regular expression. */
1401 #ifdef HAVE_PCRE_JIT_SUPPORT
1402 if ((pce->preg_options & PREG_JIT)) {
1403 if (start_offset2 > subject_len) {
1404 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1405 break;
1406 }
1407 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1408 PCRE2_NO_UTF_CHECK, match_data, mctx);
1409 } else
1410 #endif
1411 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1412 PCRE2_NO_UTF_CHECK, match_data, mctx);
1413 }
1414 if (match_data != mdata) {
1415 pcre2_match_data_free(match_data);
1416 }
1417
1418 /* Add the match sets to the output array and clean up */
1419 if (match_sets) {
1420 if (subpat_names) {
1421 for (i = 0; i < num_subpats; i++) {
1422 zval wrapper;
1423 ZVAL_ARR(&wrapper, match_sets[i]);
1424 if (subpat_names[i]) {
1425 zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &wrapper);
1426 GC_ADDREF(match_sets[i]);
1427 }
1428 zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1429 }
1430 } else {
1431 for (i = 0; i < num_subpats; i++) {
1432 zval wrapper;
1433 ZVAL_ARR(&wrapper, match_sets[i]);
1434 zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1435 }
1436 }
1437 efree(match_sets);
1438
1439 if (marks) {
1440 zval tmp;
1441 ZVAL_ARR(&tmp, marks);
1442 zend_hash_str_update(Z_ARRVAL_P(subpats), "MARK", sizeof("MARK") - 1, &tmp);
1443 }
1444 }
1445
1446 if (subpat_names) {
1447 free_subpats_table(subpat_names, num_subpats);
1448 }
1449
1450 if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1451 /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1452 if ((pce->compile_options & PCRE2_UTF)
1453 && !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1454 GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1455 }
1456
1457 RETVAL_LONG(matched);
1458 } else {
1459 RETVAL_FALSE;
1460 }
1461 }
1462 /* }}} */
1463
1464 /* {{{ Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)1465 PHP_FUNCTION(preg_match)
1466 {
1467 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
1468 }
1469 /* }}} */
1470
1471 ZEND_FRAMELESS_FUNCTION(preg_match, 2)
1472 {
1473 zval regex_tmp, subject_tmp;
1474 zend_string *regex, *subject;
1475
1476 Z_FLF_PARAM_STR(1, regex, regex_tmp);
1477 Z_FLF_PARAM_STR(2, subject, subject_tmp);
1478
1479 /* Compile regex or get it from cache. */
1480 pcre_cache_entry *pce;
1481 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1482 RETURN_FALSE;
1483 }
1484
1485 pce->refcount++;
1486 php_pcre_match_impl(pce, subject, return_value, /* subpats */ NULL,
1487 /* global */ false, /* flags */ 0, /* start_offset */ 0);
1488 pce->refcount--;
1489
1490 flf_clean:
1491 Z_FLF_PARAM_FREE_STR(1, regex_tmp);
1492 Z_FLF_PARAM_FREE_STR(2, subject_tmp);
1493 }
1494
1495 /* {{{ Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)1496 PHP_FUNCTION(preg_match_all)
1497 {
1498 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
1499 }
1500 /* }}} */
1501
1502 /* {{{ preg_get_backref */
preg_get_backref(char ** str,int * backref)1503 static int preg_get_backref(char **str, int *backref)
1504 {
1505 char in_brace = 0;
1506 char *walk = *str;
1507
1508 if (walk[1] == 0)
1509 return 0;
1510
1511 if (*walk == '$' && walk[1] == '{') {
1512 in_brace = 1;
1513 walk++;
1514 }
1515 walk++;
1516
1517 if (*walk >= '0' && *walk <= '9') {
1518 *backref = *walk - '0';
1519 walk++;
1520 } else
1521 return 0;
1522
1523 if (*walk && *walk >= '0' && *walk <= '9') {
1524 *backref = *backref * 10 + *walk - '0';
1525 walk++;
1526 }
1527
1528 if (in_brace) {
1529 if (*walk != '}')
1530 return 0;
1531 else
1532 walk++;
1533 }
1534
1535 *str = walk;
1536 return 1;
1537 }
1538 /* }}} */
1539
1540 /* {{{ preg_do_repl_func */
preg_do_repl_func(zend_fcall_info * fci,zend_fcall_info_cache * fcc,const char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1541 static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1542 {
1543 zend_string *result_str;
1544 zval retval; /* Function return value */
1545 zval arg; /* Argument to pass to function */
1546
1547 array_init_size(&arg, count + (mark ? 1 : 0));
1548 populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1549
1550 fci->retval = &retval;
1551 fci->param_count = 1;
1552 fci->params = &arg;
1553
1554 if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1555 if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1556 result_str = Z_STR(retval);
1557 } else {
1558 result_str = zval_get_string_func(&retval);
1559 zval_ptr_dtor(&retval);
1560 }
1561 } else {
1562 if (!EG(exception)) {
1563 php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1564 }
1565
1566 result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1567 }
1568
1569 zval_ptr_dtor(&arg);
1570
1571 return result_str;
1572 }
1573 /* }}} */
1574
1575 /* {{{ php_pcre_replace */
php_pcre_replace(zend_string * regex,zend_string * subject_str,const char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1576 PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1577 zend_string *subject_str,
1578 const char *subject, size_t subject_len,
1579 zend_string *replace_str,
1580 size_t limit, size_t *replace_count)
1581 {
1582 pcre_cache_entry *pce; /* Compiled regular expression */
1583 zend_string *result; /* Function result */
1584
1585 /* Abort on pending exception, e.g. thrown from __toString(). */
1586 if (UNEXPECTED(EG(exception))) {
1587 return NULL;
1588 }
1589
1590 /* Compile regex or get it from cache. */
1591 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1592 return NULL;
1593 }
1594 pce->refcount++;
1595 result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1596 limit, replace_count);
1597 pce->refcount--;
1598
1599 return result;
1600 }
1601 /* }}} */
1602
1603 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry * pce,zend_string * subject_str,const char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1604 PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1605 {
1606 uint32_t options; /* Execution options */
1607 int count; /* Count of matched subpatterns */
1608 uint32_t num_subpats; /* Number of captured subpatterns */
1609 size_t new_len; /* Length of needed storage */
1610 size_t alloc_len; /* Actual allocated length */
1611 size_t match_len; /* Length of the current match */
1612 int backref; /* Backreference number */
1613 PCRE2_SIZE start_offset; /* Where the new search starts */
1614 size_t last_end_offset; /* Where the last search ended */
1615 char *walkbuf, /* Location of current replacement in the result */
1616 *walk, /* Used to walk the replacement string */
1617 walk_last; /* Last walked character */
1618 const char *match, /* The current match */
1619 *piece, /* The current piece of subject */
1620 *replace_end; /* End of replacement string */
1621 size_t result_len; /* Length of result */
1622 zend_string *result; /* Result of replacement */
1623 pcre2_match_data *match_data;
1624
1625 /* Calculate the size of the offsets array, and allocate memory for it. */
1626 num_subpats = pce->capture_count + 1;
1627 alloc_len = 0;
1628 result = NULL;
1629
1630 /* Initialize */
1631 match = NULL;
1632 start_offset = 0;
1633 last_end_offset = 0;
1634 result_len = 0;
1635 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1636
1637 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1638 match_data = mdata;
1639 } else {
1640 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1641 if (!match_data) {
1642 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1643 return NULL;
1644 }
1645 }
1646
1647 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1648
1649 /* Array of subpattern offsets */
1650 PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1651
1652 /* Execute the regular expression. */
1653 #ifdef HAVE_PCRE_JIT_SUPPORT
1654 if ((pce->preg_options & PREG_JIT) && options) {
1655 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1656 PCRE2_NO_UTF_CHECK, match_data, mctx);
1657 } else
1658 #endif
1659 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1660 options, match_data, mctx);
1661
1662 while (1) {
1663 piece = subject + last_end_offset;
1664
1665 if (count >= 0 && limit > 0) {
1666 bool simple_string;
1667
1668 /* Check for too many substrings condition. */
1669 if (UNEXPECTED(count == 0)) {
1670 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1671 count = num_subpats;
1672 }
1673
1674 matched:
1675 if (UNEXPECTED(offsets[1] < offsets[0])) {
1676 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1677 if (result) {
1678 zend_string_release_ex(result, 0);
1679 result = NULL;
1680 }
1681 break;
1682 }
1683
1684 if (replace_count) {
1685 ++*replace_count;
1686 }
1687
1688 /* Set the match location in subject */
1689 match = subject + offsets[0];
1690
1691 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1692
1693 walk = ZSTR_VAL(replace_str);
1694 replace_end = walk + ZSTR_LEN(replace_str);
1695 walk_last = 0;
1696 simple_string = 1;
1697 while (walk < replace_end) {
1698 if ('\\' == *walk || '$' == *walk) {
1699 simple_string = 0;
1700 if (walk_last == '\\') {
1701 walk++;
1702 walk_last = 0;
1703 continue;
1704 }
1705 if (preg_get_backref(&walk, &backref)) {
1706 if (backref < count)
1707 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1708 continue;
1709 }
1710 }
1711 new_len++;
1712 walk++;
1713 walk_last = walk[-1];
1714 }
1715
1716 if (new_len >= alloc_len) {
1717 alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1718 if (result == NULL) {
1719 result = zend_string_alloc(alloc_len, 0);
1720 } else {
1721 result = zend_string_extend(result, alloc_len, 0);
1722 }
1723 }
1724
1725 if (match-piece > 0) {
1726 /* copy the part of the string before the match */
1727 memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1728 result_len += (match-piece);
1729 }
1730
1731 if (simple_string) {
1732 /* copy replacement */
1733 memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1734 result_len += ZSTR_LEN(replace_str);
1735 } else {
1736 /* copy replacement and backrefs */
1737 walkbuf = ZSTR_VAL(result) + result_len;
1738
1739 walk = ZSTR_VAL(replace_str);
1740 walk_last = 0;
1741 while (walk < replace_end) {
1742 if ('\\' == *walk || '$' == *walk) {
1743 if (walk_last == '\\') {
1744 *(walkbuf-1) = *walk++;
1745 walk_last = 0;
1746 continue;
1747 }
1748 if (preg_get_backref(&walk, &backref)) {
1749 if (backref < count) {
1750 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1751 walkbuf = zend_mempcpy(walkbuf, subject + offsets[backref << 1], match_len);
1752 }
1753 continue;
1754 }
1755 }
1756 *walkbuf++ = *walk++;
1757 walk_last = walk[-1];
1758 }
1759 *walkbuf = '\0';
1760 /* increment the result length by how much we've added to the string */
1761 result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1762 }
1763
1764 limit--;
1765
1766 /* Advance to the next piece. */
1767 start_offset = last_end_offset = offsets[1];
1768
1769 /* If we have matched an empty string, mimic what Perl's /g options does.
1770 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1771 the match again at the same point. If this fails (picked up above) we
1772 advance to the next character. */
1773 if (start_offset == offsets[0]) {
1774 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1775 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1776
1777 piece = subject + start_offset;
1778 if (count >= 0 && limit > 0) {
1779 goto matched;
1780 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1781 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1782 this is not necessarily the end. We need to advance
1783 the start offset, and continue. Fudge the offset values
1784 to achieve this, unless we're already at the end of the string. */
1785 if (start_offset < subject_len) {
1786 size_t unit_len = calculate_unit_length(pce, piece);
1787 start_offset += unit_len;
1788 } else {
1789 goto not_matched;
1790 }
1791 } else {
1792 goto error;
1793 }
1794 }
1795
1796 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1797 not_matched:
1798 if (!result && subject_str) {
1799 result = zend_string_copy(subject_str);
1800 break;
1801 }
1802 /* now we know exactly how long it is */
1803 alloc_len = result_len + subject_len - last_end_offset;
1804 if (NULL != result) {
1805 result = zend_string_realloc(result, alloc_len, 0);
1806 } else {
1807 result = zend_string_alloc(alloc_len, 0);
1808 }
1809 /* stick that last bit of string on our output */
1810 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1811 result_len += subject_len - last_end_offset;
1812 ZSTR_VAL(result)[result_len] = '\0';
1813 ZSTR_LEN(result) = result_len;
1814 break;
1815 } else {
1816 error:
1817 pcre_handle_exec_error(count);
1818 if (result) {
1819 zend_string_release_ex(result, 0);
1820 result = NULL;
1821 }
1822 break;
1823 }
1824
1825 #ifdef HAVE_PCRE_JIT_SUPPORT
1826 if (pce->preg_options & PREG_JIT) {
1827 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1828 PCRE2_NO_UTF_CHECK, match_data, mctx);
1829 } else
1830 #endif
1831 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1832 PCRE2_NO_UTF_CHECK, match_data, mctx);
1833 }
1834 if (match_data != mdata) {
1835 pcre2_match_data_free(match_data);
1836 }
1837
1838 return result;
1839 }
1840 /* }}} */
1841
1842 /* {{{ php_pcre_replace_func_impl() */
php_pcre_replace_func_impl(pcre_cache_entry * pce,zend_string * subject_str,const char * subject,size_t subject_len,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)1843 static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)
1844 {
1845 uint32_t options; /* Execution options */
1846 int count; /* Count of matched subpatterns */
1847 zend_string **subpat_names; /* Array for named subpatterns */
1848 uint32_t num_subpats; /* Number of captured subpatterns */
1849 size_t new_len; /* Length of needed storage */
1850 size_t alloc_len; /* Actual allocated length */
1851 PCRE2_SIZE start_offset; /* Where the new search starts */
1852 size_t last_end_offset; /* Where the last search ended */
1853 const char *match, /* The current match */
1854 *piece; /* The current piece of subject */
1855 size_t result_len; /* Length of result */
1856 zend_string *result; /* Result of replacement */
1857 zend_string *eval_result; /* Result of custom function */
1858 pcre2_match_data *match_data;
1859 bool old_mdata_used;
1860
1861 /* Calculate the size of the offsets array, and allocate memory for it. */
1862 num_subpats = pce->capture_count + 1;
1863
1864 /*
1865 * Build a mapping from subpattern numbers to their names. We will
1866 * allocate the table only if there are any named subpatterns.
1867 */
1868 subpat_names = NULL;
1869 if (UNEXPECTED(pce->name_count > 0)) {
1870 subpat_names = make_subpats_table(num_subpats, pce);
1871 if (!subpat_names) {
1872 return NULL;
1873 }
1874 }
1875
1876 alloc_len = 0;
1877 result = NULL;
1878
1879 /* Initialize */
1880 match = NULL;
1881 start_offset = 0;
1882 last_end_offset = 0;
1883 result_len = 0;
1884 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1885
1886 old_mdata_used = mdata_used;
1887 if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1888 mdata_used = 1;
1889 match_data = mdata;
1890 } else {
1891 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1892 if (!match_data) {
1893 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1894 if (subpat_names) {
1895 free_subpats_table(subpat_names, num_subpats);
1896 }
1897 mdata_used = old_mdata_used;
1898 return NULL;
1899 }
1900 }
1901
1902 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1903
1904 /* Array of subpattern offsets */
1905 PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1906
1907 /* Execute the regular expression. */
1908 #ifdef HAVE_PCRE_JIT_SUPPORT
1909 if ((pce->preg_options & PREG_JIT) && options) {
1910 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1911 PCRE2_NO_UTF_CHECK, match_data, mctx);
1912 } else
1913 #endif
1914 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1915 options, match_data, mctx);
1916
1917 while (1) {
1918 piece = subject + last_end_offset;
1919
1920 if (count >= 0 && limit) {
1921 /* Check for too many substrings condition. */
1922 if (UNEXPECTED(count == 0)) {
1923 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1924 count = num_subpats;
1925 }
1926
1927 matched:
1928 if (UNEXPECTED(offsets[1] < offsets[0])) {
1929 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1930 if (result) {
1931 zend_string_release_ex(result, 0);
1932 result = NULL;
1933 }
1934 break;
1935 }
1936
1937 if (replace_count) {
1938 ++*replace_count;
1939 }
1940
1941 /* Set the match location in subject */
1942 match = subject + offsets[0];
1943
1944 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1945
1946 /* Use custom function to get replacement string and its length. */
1947 eval_result = preg_do_repl_func(
1948 fci, fcc, subject, offsets, subpat_names, num_subpats, count,
1949 pcre2_get_mark(match_data), flags);
1950
1951 ZEND_ASSERT(eval_result);
1952 new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD;
1953 if (new_len >= alloc_len) {
1954 alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1955 if (result == NULL) {
1956 result = zend_string_alloc(alloc_len, 0);
1957 } else {
1958 result = zend_string_extend(result, alloc_len, 0);
1959 }
1960 }
1961
1962 if (match-piece > 0) {
1963 /* copy the part of the string before the match */
1964 memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1965 result_len += (match-piece);
1966 }
1967
1968 /* If using custom function, copy result to the buffer and clean up. */
1969 memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1970 result_len += ZSTR_LEN(eval_result);
1971 zend_string_release_ex(eval_result, 0);
1972
1973 limit--;
1974
1975 /* Advance to the next piece. */
1976 start_offset = last_end_offset = offsets[1];
1977
1978 /* If we have matched an empty string, mimic what Perl's /g options does.
1979 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1980 the match again at the same point. If this fails (picked up above) we
1981 advance to the next character. */
1982 if (start_offset == offsets[0]) {
1983 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1984 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1985
1986 piece = subject + start_offset;
1987 if (count >= 0 && limit) {
1988 goto matched;
1989 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1990 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1991 this is not necessarily the end. We need to advance
1992 the start offset, and continue. Fudge the offset values
1993 to achieve this, unless we're already at the end of the string. */
1994 if (start_offset < subject_len) {
1995 size_t unit_len = calculate_unit_length(pce, piece);
1996 start_offset += unit_len;
1997 } else {
1998 goto not_matched;
1999 }
2000 } else {
2001 goto error;
2002 }
2003 }
2004
2005 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2006 not_matched:
2007 if (!result && subject_str) {
2008 result = zend_string_copy(subject_str);
2009 break;
2010 }
2011 /* now we know exactly how long it is */
2012 alloc_len = result_len + subject_len - last_end_offset;
2013 if (NULL != result) {
2014 result = zend_string_realloc(result, alloc_len, 0);
2015 } else {
2016 result = zend_string_alloc(alloc_len, 0);
2017 }
2018 /* stick that last bit of string on our output */
2019 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
2020 result_len += subject_len - last_end_offset;
2021 ZSTR_VAL(result)[result_len] = '\0';
2022 ZSTR_LEN(result) = result_len;
2023 break;
2024 } else {
2025 error:
2026 pcre_handle_exec_error(count);
2027 if (result) {
2028 zend_string_release_ex(result, 0);
2029 result = NULL;
2030 }
2031 break;
2032 }
2033 #ifdef HAVE_PCRE_JIT_SUPPORT
2034 if ((pce->preg_options & PREG_JIT)) {
2035 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2036 PCRE2_NO_UTF_CHECK, match_data, mctx);
2037 } else
2038 #endif
2039 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2040 PCRE2_NO_UTF_CHECK, match_data, mctx);
2041 }
2042 if (match_data != mdata) {
2043 pcre2_match_data_free(match_data);
2044 }
2045 mdata_used = old_mdata_used;
2046
2047 if (UNEXPECTED(subpat_names)) {
2048 free_subpats_table(subpat_names, num_subpats);
2049 }
2050
2051 return result;
2052 }
2053 /* }}} */
2054
2055 /* {{{ php_pcre_replace_func */
php_pcre_replace_func(zend_string * regex,zend_string * subject_str,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)2056 static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2057 zend_string *subject_str,
2058 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2059 size_t limit, size_t *replace_count, zend_long flags)
2060 {
2061 pcre_cache_entry *pce; /* Compiled regular expression */
2062 zend_string *result; /* Function result */
2063
2064 /* Compile regex or get it from cache. */
2065 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2066 return NULL;
2067 }
2068 pce->refcount++;
2069 result = php_pcre_replace_func_impl(
2070 pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
2071 limit, replace_count, flags);
2072 pce->refcount--;
2073
2074 return result;
2075 }
2076 /* }}} */
2077
2078 /* {{{ php_pcre_replace_array */
php_pcre_replace_array(HashTable * regex,zend_string * replace_str,HashTable * replace_ht,zend_string * subject_str,size_t limit,size_t * replace_count)2079 static zend_string *php_pcre_replace_array(HashTable *regex,
2080 zend_string *replace_str, HashTable *replace_ht,
2081 zend_string *subject_str, size_t limit, size_t *replace_count)
2082 {
2083 zval *regex_entry;
2084 zend_string *result;
2085
2086 zend_string_addref(subject_str);
2087
2088 if (replace_ht) {
2089 uint32_t replace_idx = 0;
2090
2091 /* For each entry in the regex array, get the entry */
2092 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2093 /* Make sure we're dealing with strings. */
2094 zend_string *tmp_regex_str;
2095 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2096 zend_string *replace_entry_str, *tmp_replace_entry_str;
2097 zval *zv;
2098
2099 /* Get current entry */
2100 while (1) {
2101 if (replace_idx == replace_ht->nNumUsed) {
2102 replace_entry_str = ZSTR_EMPTY_ALLOC();
2103 tmp_replace_entry_str = NULL;
2104 break;
2105 }
2106 zv = ZEND_HASH_ELEMENT(replace_ht, replace_idx);
2107 replace_idx++;
2108 if (Z_TYPE_P(zv) != IS_UNDEF) {
2109 replace_entry_str = zval_get_tmp_string(zv, &tmp_replace_entry_str);
2110 break;
2111 }
2112 }
2113
2114 /* Do the actual replacement and put the result back into subject_str
2115 for further replacements. */
2116 result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2117 ZSTR_LEN(subject_str), replace_entry_str, limit, replace_count);
2118 zend_tmp_string_release(tmp_replace_entry_str);
2119 zend_tmp_string_release(tmp_regex_str);
2120 zend_string_release_ex(subject_str, 0);
2121 subject_str = result;
2122 if (UNEXPECTED(result == NULL)) {
2123 break;
2124 }
2125 } ZEND_HASH_FOREACH_END();
2126
2127 } else {
2128 ZEND_ASSERT(replace_str != NULL);
2129
2130 /* For each entry in the regex array, get the entry */
2131 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2132 /* Make sure we're dealing with strings. */
2133 zend_string *tmp_regex_str;
2134 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2135
2136 /* Do the actual replacement and put the result back into subject_str
2137 for further replacements. */
2138 result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2139 ZSTR_LEN(subject_str), replace_str, limit, replace_count);
2140 zend_tmp_string_release(tmp_regex_str);
2141 zend_string_release_ex(subject_str, 0);
2142 subject_str = result;
2143
2144 if (UNEXPECTED(result == NULL)) {
2145 break;
2146 }
2147 } ZEND_HASH_FOREACH_END();
2148 }
2149
2150 return subject_str;
2151 }
2152 /* }}} */
2153
2154 /* {{{ php_replace_in_subject */
php_replace_in_subject(zend_string * regex_str,HashTable * regex_ht,zend_string * replace_str,HashTable * replace_ht,zend_string * subject,size_t limit,size_t * replace_count)2155 static zend_always_inline zend_string *php_replace_in_subject(
2156 zend_string *regex_str, HashTable *regex_ht,
2157 zend_string *replace_str, HashTable *replace_ht,
2158 zend_string *subject, size_t limit, size_t *replace_count)
2159 {
2160 zend_string *result;
2161
2162 if (regex_str) {
2163 ZEND_ASSERT(replace_str != NULL);
2164 result = php_pcre_replace(regex_str, subject, ZSTR_VAL(subject), ZSTR_LEN(subject),
2165 replace_str, limit, replace_count);
2166 } else {
2167 ZEND_ASSERT(regex_ht != NULL);
2168 result = php_pcre_replace_array(regex_ht, replace_str, replace_ht, subject,
2169 limit, replace_count);
2170 }
2171 return result;
2172 }
2173 /* }}} */
2174
2175 /* {{{ php_replace_in_subject_func */
php_replace_in_subject_func(zend_string * regex_str,HashTable * regex_ht,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zend_string * subject,size_t limit,size_t * replace_count,zend_long flags)2176 static zend_string *php_replace_in_subject_func(zend_string *regex_str, HashTable *regex_ht,
2177 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2178 zend_string *subject, size_t limit, size_t *replace_count, zend_long flags)
2179 {
2180 zend_string *result;
2181
2182 if (regex_str) {
2183 result = php_pcre_replace_func(
2184 regex_str, subject, fci, fcc, limit, replace_count, flags);
2185 return result;
2186 } else {
2187 /* If regex is an array */
2188 zval *regex_entry;
2189
2190 ZEND_ASSERT(regex_ht != NULL);
2191
2192 zend_string_addref(subject);
2193
2194 /* For each entry in the regex array, get the entry */
2195 ZEND_HASH_FOREACH_VAL(regex_ht, regex_entry) {
2196 /* Make sure we're dealing with strings. */
2197 zend_string *tmp_regex_entry_str;
2198 zend_string *regex_entry_str = zval_get_tmp_string(regex_entry, &tmp_regex_entry_str);
2199
2200 /* Do the actual replacement and put the result back into subject
2201 for further replacements. */
2202 result = php_pcre_replace_func(
2203 regex_entry_str, subject, fci, fcc, limit, replace_count, flags);
2204 zend_tmp_string_release(tmp_regex_entry_str);
2205 zend_string_release(subject);
2206 subject = result;
2207 if (UNEXPECTED(result == NULL)) {
2208 break;
2209 }
2210 } ZEND_HASH_FOREACH_END();
2211
2212 return subject;
2213 }
2214 }
2215 /* }}} */
2216
2217 /* {{{ preg_replace_func_impl */
preg_replace_func_impl(zval * return_value,zend_string * regex_str,HashTable * regex_ht,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zend_string * subject_str,HashTable * subject_ht,zend_long limit_val,zend_long flags)2218 static size_t preg_replace_func_impl(zval *return_value,
2219 zend_string *regex_str, HashTable *regex_ht,
2220 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2221 zend_string *subject_str, HashTable *subject_ht, zend_long limit_val, zend_long flags)
2222 {
2223 zend_string *result;
2224 size_t replace_count = 0;
2225
2226 if (subject_str) {
2227 result = php_replace_in_subject_func(
2228 regex_str, regex_ht, fci, fcc, subject_str, limit_val, &replace_count, flags);
2229 if (result != NULL) {
2230 RETVAL_STR(result);
2231 } else {
2232 RETVAL_NULL();
2233 }
2234 } else {
2235 /* if subject is an array */
2236 zval *subject_entry, zv;
2237 zend_string *string_key;
2238 zend_ulong num_key;
2239
2240 ZEND_ASSERT(subject_ht != NULL);
2241
2242 array_init_size(return_value, zend_hash_num_elements(subject_ht));
2243 HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2244
2245 /* For each subject entry, convert it to string, then perform replacement
2246 and add the result to the return_value array. */
2247 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2248 zend_string *tmp_subject_entry_str;
2249 zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2250
2251 result = php_replace_in_subject_func(
2252 regex_str, regex_ht, fci, fcc, subject_entry_str, limit_val, &replace_count, flags);
2253 if (result != NULL) {
2254 /* Add to return array */
2255 ZVAL_STR(&zv, result);
2256 if (string_key) {
2257 zend_hash_add_new(return_value_ht, string_key, &zv);
2258 } else {
2259 zend_hash_index_add_new(return_value_ht, num_key, &zv);
2260 }
2261 }
2262 zend_tmp_string_release(tmp_subject_entry_str);
2263 } ZEND_HASH_FOREACH_END();
2264 }
2265
2266 return replace_count;
2267 }
2268 /* }}} */
2269
_preg_replace_common(zval * return_value,HashTable * regex_ht,zend_string * regex_str,HashTable * replace_ht,zend_string * replace_str,HashTable * subject_ht,zend_string * subject_str,zend_long limit,zval * zcount,bool is_filter)2270 static void _preg_replace_common(
2271 zval *return_value,
2272 HashTable *regex_ht, zend_string *regex_str,
2273 HashTable *replace_ht, zend_string *replace_str,
2274 HashTable *subject_ht, zend_string *subject_str,
2275 zend_long limit,
2276 zval *zcount,
2277 bool is_filter
2278 ) {
2279 size_t replace_count = 0;
2280 zend_string *result;
2281 size_t old_replace_count;
2282
2283 /* If replace is an array then the regex argument needs to also be an array */
2284 if (replace_ht && !regex_ht) {
2285 zend_argument_type_error(1, "must be of type array when argument #2 ($replacement) is an array, string given");
2286 RETURN_THROWS();
2287 }
2288
2289 if (subject_str) {
2290 old_replace_count = replace_count;
2291 result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2292 subject_str, limit, &replace_count);
2293 if (result != NULL) {
2294 if (!is_filter || replace_count > old_replace_count) {
2295 RETVAL_STR(result);
2296 } else {
2297 zend_string_release_ex(result, 0);
2298 RETVAL_NULL();
2299 }
2300 } else {
2301 RETVAL_NULL();
2302 }
2303 } else {
2304 /* if subject is an array */
2305 zval *subject_entry, zv;
2306 zend_string *string_key;
2307 zend_ulong num_key;
2308
2309 ZEND_ASSERT(subject_ht != NULL);
2310
2311 array_init_size(return_value, zend_hash_num_elements(subject_ht));
2312 HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2313
2314 /* For each subject entry, convert it to string, then perform replacement
2315 and add the result to the return_value array. */
2316 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2317 old_replace_count = replace_count;
2318 zend_string *tmp_subject_entry_str;
2319 zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2320 result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2321 subject_entry_str, limit, &replace_count);
2322
2323 if (result != NULL) {
2324 if (!is_filter || replace_count > old_replace_count) {
2325 /* Add to return array */
2326 ZVAL_STR(&zv, result);
2327 if (string_key) {
2328 zend_hash_add_new(return_value_ht, string_key, &zv);
2329 } else {
2330 zend_hash_index_add_new(return_value_ht, num_key, &zv);
2331 }
2332 } else {
2333 zend_string_release_ex(result, 0);
2334 }
2335 }
2336 zend_tmp_string_release(tmp_subject_entry_str);
2337 } ZEND_HASH_FOREACH_END();
2338 }
2339
2340 if (zcount) {
2341 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2342 }
2343 }
2344
2345 /* {{{ preg_replace_common */
preg_replace_common(INTERNAL_FUNCTION_PARAMETERS,bool is_filter)2346 static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter)
2347 {
2348 zend_string *regex_str, *replace_str, *subject_str;
2349 HashTable *regex_ht, *replace_ht, *subject_ht;
2350 zend_long limit = -1;
2351 zval *zcount = NULL;
2352
2353 /* Get function parameters and do error-checking. */
2354 ZEND_PARSE_PARAMETERS_START(3, 5)
2355 Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2356 Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
2357 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2358 Z_PARAM_OPTIONAL
2359 Z_PARAM_LONG(limit)
2360 Z_PARAM_ZVAL(zcount)
2361 ZEND_PARSE_PARAMETERS_END();
2362
2363 _preg_replace_common(
2364 return_value,
2365 regex_ht, regex_str,
2366 replace_ht, replace_str,
2367 subject_ht, subject_str,
2368 limit, zcount, is_filter);
2369 }
2370 /* }}} */
2371
2372 /* {{{ Perform Perl-style regular expression replacement. */
PHP_FUNCTION(preg_replace)2373 PHP_FUNCTION(preg_replace)
2374 {
2375 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
2376 }
2377 /* }}} */
2378
2379 ZEND_FRAMELESS_FUNCTION(preg_replace, 3)
2380 {
2381 zend_string *regex_str, *replace_str, *subject_str;
2382 HashTable *regex_ht, *replace_ht, *subject_ht;
2383 zval regex_tmp, replace_tmp, subject_tmp;
2384
2385 Z_FLF_PARAM_ARRAY_HT_OR_STR(1, regex_ht, regex_str, regex_tmp);
2386 Z_FLF_PARAM_ARRAY_HT_OR_STR(2, replace_ht, replace_str, replace_tmp);
2387 Z_FLF_PARAM_ARRAY_HT_OR_STR(3, subject_ht, subject_str, subject_tmp);
2388
2389 _preg_replace_common(
2390 return_value,
2391 regex_ht, regex_str,
2392 replace_ht, replace_str,
2393 subject_ht, subject_str,
2394 /* limit */ -1, /* zcount */ NULL, /* is_filter */ false);
2395
2396 flf_clean:;
2397 Z_FLF_PARAM_FREE_STR(1, regex_tmp);
2398 Z_FLF_PARAM_FREE_STR(2, replace_tmp);
2399 Z_FLF_PARAM_FREE_STR(3, subject_tmp);
2400 }
2401
2402 /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback)2403 PHP_FUNCTION(preg_replace_callback)
2404 {
2405 zval *zcount = NULL;
2406 zend_string *regex_str;
2407 HashTable *regex_ht;
2408 zend_string *subject_str;
2409 HashTable *subject_ht;
2410 zend_long limit = -1, flags = 0;
2411 size_t replace_count;
2412 zend_fcall_info fci;
2413 zend_fcall_info_cache fcc;
2414
2415 /* Get function parameters and do error-checking. */
2416 ZEND_PARSE_PARAMETERS_START(3, 6)
2417 Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2418 Z_PARAM_FUNC(fci, fcc)
2419 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2420 Z_PARAM_OPTIONAL
2421 Z_PARAM_LONG(limit)
2422 Z_PARAM_ZVAL(zcount)
2423 Z_PARAM_LONG(flags)
2424 ZEND_PARSE_PARAMETERS_END();
2425
2426 replace_count = preg_replace_func_impl(return_value, regex_str, regex_ht,
2427 &fci, &fcc,
2428 subject_str, subject_ht, limit, flags);
2429 if (zcount) {
2430 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2431 }
2432 }
2433 /* }}} */
2434
2435 /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback_array)2436 PHP_FUNCTION(preg_replace_callback_array)
2437 {
2438 zval zv, *replace, *zcount = NULL;
2439 HashTable *pattern, *subject_ht;
2440 zend_string *subject_str, *str_idx_regex;
2441 zend_long limit = -1, flags = 0;
2442 size_t replace_count = 0;
2443 zend_fcall_info fci;
2444 zend_fcall_info_cache fcc;
2445
2446 /* Get function parameters and do error-checking. */
2447 ZEND_PARSE_PARAMETERS_START(2, 5)
2448 Z_PARAM_ARRAY_HT(pattern)
2449 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2450 Z_PARAM_OPTIONAL
2451 Z_PARAM_LONG(limit)
2452 Z_PARAM_ZVAL(zcount)
2453 Z_PARAM_LONG(flags)
2454 ZEND_PARSE_PARAMETERS_END();
2455
2456 fci.size = sizeof(fci);
2457 fci.object = NULL;
2458 fci.named_params = NULL;
2459
2460 if (subject_ht) {
2461 GC_TRY_ADDREF(subject_ht);
2462 } else {
2463 GC_TRY_ADDREF(subject_str);
2464 }
2465
2466 ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) {
2467 if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2468 zend_argument_type_error(1, "must contain only valid callbacks");
2469 goto error;
2470 }
2471 if (!str_idx_regex) {
2472 zend_argument_type_error(1, "must contain only string patterns as keys");
2473 goto error;
2474 }
2475
2476 ZVAL_COPY_VALUE(&fci.function_name, replace);
2477
2478 replace_count += preg_replace_func_impl(&zv, str_idx_regex, /* regex_ht */ NULL, &fci, &fcc,
2479 subject_str, subject_ht, limit, flags);
2480 switch (Z_TYPE(zv)) {
2481 case IS_ARRAY:
2482 ZEND_ASSERT(subject_ht);
2483 zend_array_release(subject_ht);
2484 subject_ht = Z_ARR(zv);
2485 break;
2486 case IS_STRING:
2487 ZEND_ASSERT(subject_str);
2488 zend_string_release(subject_str);
2489 subject_str = Z_STR(zv);
2490 break;
2491 case IS_NULL:
2492 RETVAL_NULL();
2493 goto error;
2494 EMPTY_SWITCH_DEFAULT_CASE()
2495 }
2496
2497 if (EG(exception)) {
2498 goto error;
2499 }
2500 } ZEND_HASH_FOREACH_END();
2501
2502 if (zcount) {
2503 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2504 }
2505
2506 if (subject_ht) {
2507 RETVAL_ARR(subject_ht);
2508 // Unset the type_flags of immutable arrays to prevent the VM from performing refcounting
2509 if (GC_FLAGS(subject_ht) & IS_ARRAY_IMMUTABLE) {
2510 Z_TYPE_FLAGS_P(return_value) = 0;
2511 }
2512 return;
2513 } else {
2514 RETURN_STR(subject_str);
2515 }
2516
2517 error:
2518 if (subject_ht) {
2519 zend_array_release(subject_ht);
2520 } else {
2521 zend_string_release(subject_str);
2522 }
2523 }
2524 /* }}} */
2525
2526 /* {{{ Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTION(preg_filter)2527 PHP_FUNCTION(preg_filter)
2528 {
2529 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
2530 }
2531 /* }}} */
2532
2533 /* {{{ Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)2534 PHP_FUNCTION(preg_split)
2535 {
2536 zend_string *regex; /* Regular expression */
2537 zend_string *subject; /* String to match against */
2538 zend_long limit_val = -1;/* Integer value of limit */
2539 zend_long flags = 0; /* Match control flags */
2540 pcre_cache_entry *pce; /* Compiled regular expression */
2541
2542 /* Get function parameters and do error checking */
2543 ZEND_PARSE_PARAMETERS_START(2, 4)
2544 Z_PARAM_STR(regex)
2545 Z_PARAM_STR(subject)
2546 Z_PARAM_OPTIONAL
2547 Z_PARAM_LONG(limit_val)
2548 Z_PARAM_LONG(flags)
2549 ZEND_PARSE_PARAMETERS_END();
2550
2551 /* Compile regex or get it from cache. */
2552 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2553 RETURN_FALSE;
2554 }
2555
2556 pce->refcount++;
2557 php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2558 pce->refcount--;
2559 }
2560 /* }}} */
2561
2562 /* {{{ php_pcre_split */
php_pcre_split_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zend_long limit_val,zend_long flags)2563 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2564 zend_long limit_val, zend_long flags)
2565 {
2566 uint32_t options; /* Execution options */
2567 int count; /* Count of matched subpatterns */
2568 PCRE2_SIZE start_offset; /* Where the new search starts */
2569 PCRE2_SIZE last_match_offset; /* Location of last match */
2570 uint32_t no_empty; /* If NO_EMPTY flag is set */
2571 uint32_t delim_capture; /* If delimiters should be captured */
2572 uint32_t offset_capture; /* If offsets should be captured */
2573 uint32_t num_subpats; /* Number of captured subpatterns */
2574 zval tmp;
2575 pcre2_match_data *match_data;
2576 char *subject = ZSTR_VAL(subject_str);
2577
2578 no_empty = flags & PREG_SPLIT_NO_EMPTY;
2579 delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2580 offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2581
2582 /* Initialize return value */
2583 array_init(return_value);
2584 HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2585
2586 /* Calculate the size of the offsets array, and allocate memory for it. */
2587 num_subpats = pce->capture_count + 1;
2588
2589 /* Start at the beginning of the string */
2590 start_offset = 0;
2591 last_match_offset = 0;
2592 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2593
2594 if (limit_val == -1) {
2595 /* pass */
2596 } else if (limit_val == 0) {
2597 limit_val = -1;
2598 } else if (limit_val <= 1) {
2599 goto last;
2600 }
2601
2602 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2603 match_data = mdata;
2604 } else {
2605 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2606 if (!match_data) {
2607 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2608 zval_ptr_dtor(return_value);
2609 RETURN_FALSE;
2610 }
2611 }
2612
2613 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2614
2615 /* Array of subpattern offsets */
2616 PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
2617
2618 #ifdef HAVE_PCRE_JIT_SUPPORT
2619 if ((pce->preg_options & PREG_JIT) && options) {
2620 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2621 PCRE2_NO_UTF_CHECK, match_data, mctx);
2622 } else
2623 #endif
2624 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2625 options, match_data, mctx);
2626
2627 while (1) {
2628 /* If something matched */
2629 if (count >= 0) {
2630 /* Check for too many substrings condition. */
2631 if (UNEXPECTED(count == 0)) {
2632 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2633 count = num_subpats;
2634 }
2635
2636 matched:
2637 if (UNEXPECTED(offsets[1] < offsets[0])) {
2638 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2639 break;
2640 }
2641
2642 if (!no_empty || offsets[0] != last_match_offset) {
2643 if (offset_capture) {
2644 /* Add (match, offset) pair to the return value */
2645 add_offset_pair(
2646 return_value_ht, subject, last_match_offset, offsets[0],
2647 NULL, 0);
2648 } else {
2649 /* Add the piece to the return value */
2650 populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2651 zend_hash_next_index_insert_new(return_value_ht, &tmp);
2652 }
2653
2654 /* One less left to do */
2655 if (limit_val != -1)
2656 limit_val--;
2657 }
2658
2659 if (delim_capture) {
2660 size_t i;
2661 for (i = 1; i < count; i++) {
2662 /* If we have matched a delimiter */
2663 if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2664 if (offset_capture) {
2665 add_offset_pair(
2666 return_value_ht, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2667 } else {
2668 populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2669 zend_hash_next_index_insert_new(return_value_ht, &tmp);
2670 }
2671 }
2672 }
2673 }
2674
2675 /* Advance to the position right after the last full match */
2676 start_offset = last_match_offset = offsets[1];
2677
2678 /* If we have matched an empty string, mimic what Perl's /g options does.
2679 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2680 the match again at the same point. If this fails (picked up above) we
2681 advance to the next character. */
2682 if (start_offset == offsets[0]) {
2683 /* Get next piece if no limit or limit not yet reached and something matched*/
2684 if (limit_val != -1 && limit_val <= 1) {
2685 break;
2686 }
2687 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2688 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2689 if (count >= 0) {
2690 goto matched;
2691 } else if (count == PCRE2_ERROR_NOMATCH) {
2692 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2693 this is not necessarily the end. We need to advance
2694 the start offset, and continue. Fudge the offset values
2695 to achieve this, unless we're already at the end of the string. */
2696 if (start_offset < ZSTR_LEN(subject_str)) {
2697 start_offset += calculate_unit_length(pce, subject + start_offset);
2698 } else {
2699 break;
2700 }
2701 } else {
2702 goto error;
2703 }
2704 }
2705
2706 } else if (count == PCRE2_ERROR_NOMATCH) {
2707 break;
2708 } else {
2709 error:
2710 pcre_handle_exec_error(count);
2711 break;
2712 }
2713
2714 /* Get next piece if no limit or limit not yet reached and something matched*/
2715 if (limit_val != -1 && limit_val <= 1) {
2716 break;
2717 }
2718
2719 #ifdef HAVE_PCRE_JIT_SUPPORT
2720 if (pce->preg_options & PREG_JIT) {
2721 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2722 PCRE2_NO_UTF_CHECK, match_data, mctx);
2723 } else
2724 #endif
2725 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2726 PCRE2_NO_UTF_CHECK, match_data, mctx);
2727 }
2728 if (match_data != mdata) {
2729 pcre2_match_data_free(match_data);
2730 }
2731
2732 if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2733 zval_ptr_dtor(return_value);
2734 RETURN_FALSE;
2735 }
2736
2737 last:
2738 start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2739
2740 if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2741 if (offset_capture) {
2742 /* Add the last (match, offset) pair to the return value */
2743 add_offset_pair(return_value_ht, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2744 } else {
2745 /* Add the last piece to the return value */
2746 if (start_offset == 0) {
2747 ZVAL_STR_COPY(&tmp, subject_str);
2748 } else {
2749 populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2750 }
2751 zend_hash_next_index_insert_new(return_value_ht, &tmp);
2752 }
2753 }
2754 }
2755 /* }}} */
2756
2757 /* {{{ Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)2758 PHP_FUNCTION(preg_quote)
2759 {
2760 zend_string *str; /* Input string argument */
2761 zend_string *delim = NULL; /* Additional delimiter argument */
2762 char *in_str; /* Input string */
2763 char *in_str_end; /* End of the input string */
2764 zend_string *out_str; /* Output string with quoted characters */
2765 size_t extra_len; /* Number of additional characters */
2766 char *p, /* Iterator for input string */
2767 *q, /* Iterator for output string */
2768 delim_char = '\0', /* Delimiter character to be quoted */
2769 c; /* Current character */
2770
2771 /* Get the arguments and check for errors */
2772 ZEND_PARSE_PARAMETERS_START(1, 2)
2773 Z_PARAM_STR(str)
2774 Z_PARAM_OPTIONAL
2775 Z_PARAM_STR_OR_NULL(delim)
2776 ZEND_PARSE_PARAMETERS_END();
2777
2778 /* Nothing to do if we got an empty string */
2779 if (ZSTR_LEN(str) == 0) {
2780 RETURN_EMPTY_STRING();
2781 }
2782
2783 in_str = ZSTR_VAL(str);
2784 in_str_end = in_str + ZSTR_LEN(str);
2785
2786 if (delim) {
2787 delim_char = ZSTR_VAL(delim)[0];
2788 }
2789
2790 /* Go through the string and quote necessary characters */
2791 extra_len = 0;
2792 p = in_str;
2793 do {
2794 c = *p;
2795 switch(c) {
2796 case '.':
2797 case '\\':
2798 case '+':
2799 case '*':
2800 case '?':
2801 case '[':
2802 case '^':
2803 case ']':
2804 case '$':
2805 case '(':
2806 case ')':
2807 case '{':
2808 case '}':
2809 case '=':
2810 case '!':
2811 case '>':
2812 case '<':
2813 case '|':
2814 case ':':
2815 case '-':
2816 case '#':
2817 extra_len++;
2818 break;
2819
2820 case '\0':
2821 extra_len+=3;
2822 break;
2823
2824 default:
2825 if (c == delim_char) {
2826 extra_len++;
2827 }
2828 break;
2829 }
2830 p++;
2831 } while (p != in_str_end);
2832
2833 if (extra_len == 0) {
2834 RETURN_STR_COPY(str);
2835 }
2836
2837 /* Allocate enough memory so that even if each character
2838 is quoted, we won't run out of room */
2839 out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2840 q = ZSTR_VAL(out_str);
2841 p = in_str;
2842
2843 do {
2844 c = *p;
2845 switch(c) {
2846 case '.':
2847 case '\\':
2848 case '+':
2849 case '*':
2850 case '?':
2851 case '[':
2852 case '^':
2853 case ']':
2854 case '$':
2855 case '(':
2856 case ')':
2857 case '{':
2858 case '}':
2859 case '=':
2860 case '!':
2861 case '>':
2862 case '<':
2863 case '|':
2864 case ':':
2865 case '-':
2866 case '#':
2867 *q++ = '\\';
2868 *q++ = c;
2869 break;
2870
2871 case '\0':
2872 *q++ = '\\';
2873 *q++ = '0';
2874 *q++ = '0';
2875 *q++ = '0';
2876 break;
2877
2878 default:
2879 if (c == delim_char) {
2880 *q++ = '\\';
2881 }
2882 *q++ = c;
2883 break;
2884 }
2885 p++;
2886 } while (p != in_str_end);
2887 *q = '\0';
2888
2889 RETURN_NEW_STR(out_str);
2890 }
2891 /* }}} */
2892
2893 /* {{{ Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)2894 PHP_FUNCTION(preg_grep)
2895 {
2896 zend_string *regex; /* Regular expression */
2897 zval *input; /* Input array */
2898 zend_long flags = 0; /* Match control flags */
2899 pcre_cache_entry *pce; /* Compiled regular expression */
2900
2901 /* Get arguments and do error checking */
2902 ZEND_PARSE_PARAMETERS_START(2, 3)
2903 Z_PARAM_STR(regex)
2904 Z_PARAM_ARRAY(input)
2905 Z_PARAM_OPTIONAL
2906 Z_PARAM_LONG(flags)
2907 ZEND_PARSE_PARAMETERS_END();
2908
2909 /* Compile regex or get it from cache. */
2910 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2911 RETURN_FALSE;
2912 }
2913
2914 pce->refcount++;
2915 php_pcre_grep_impl(pce, input, return_value, flags);
2916 pce->refcount--;
2917 }
2918 /* }}} */
2919
php_pcre_grep_impl(pcre_cache_entry * pce,zval * input,zval * return_value,zend_long flags)2920 PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2921 {
2922 zval *entry; /* An entry in the input array */
2923 uint32_t num_subpats; /* Number of captured subpatterns */
2924 int count; /* Count of matched subpatterns */
2925 uint32_t options; /* Execution options */
2926 zend_string *string_key;
2927 zend_ulong num_key;
2928 bool invert; /* Whether to return non-matching
2929 entries */
2930 pcre2_match_data *match_data;
2931 invert = flags & PREG_GREP_INVERT ? 1 : 0;
2932
2933 /* Calculate the size of the offsets array, and allocate memory for it. */
2934 num_subpats = pce->capture_count + 1;
2935
2936 /* Initialize return array */
2937 array_init(return_value);
2938 HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2939
2940 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2941
2942 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2943 match_data = mdata;
2944 } else {
2945 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2946 if (!match_data) {
2947 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2948 return;
2949 }
2950 }
2951
2952 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2953
2954 /* Go through the input array */
2955 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2956 zend_string *tmp_subject_str;
2957 zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2958
2959 /* Perform the match */
2960 #ifdef HAVE_PCRE_JIT_SUPPORT
2961 if ((pce->preg_options & PREG_JIT) && options) {
2962 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2963 PCRE2_NO_UTF_CHECK, match_data, mctx);
2964 } else
2965 #endif
2966 count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2967 options, match_data, mctx);
2968
2969 /* If the entry fits our requirements */
2970 if (count >= 0) {
2971 /* Check for too many substrings condition. */
2972 if (UNEXPECTED(count == 0)) {
2973 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2974 }
2975 if (!invert) {
2976 Z_TRY_ADDREF_P(entry);
2977
2978 /* Add to return array */
2979 if (string_key) {
2980 zend_hash_update(return_value_ht, string_key, entry);
2981 } else {
2982 zend_hash_index_update(return_value_ht, num_key, entry);
2983 }
2984 }
2985 } else if (count == PCRE2_ERROR_NOMATCH) {
2986 if (invert) {
2987 Z_TRY_ADDREF_P(entry);
2988
2989 /* Add to return array */
2990 if (string_key) {
2991 zend_hash_update(return_value_ht, string_key, entry);
2992 } else {
2993 zend_hash_index_update(return_value_ht, num_key, entry);
2994 }
2995 }
2996 } else {
2997 pcre_handle_exec_error(count);
2998 zend_tmp_string_release(tmp_subject_str);
2999 break;
3000 }
3001
3002 zend_tmp_string_release(tmp_subject_str);
3003 } ZEND_HASH_FOREACH_END();
3004 if (match_data != mdata) {
3005 pcre2_match_data_free(match_data);
3006 }
3007 }
3008 /* }}} */
3009
3010 /* {{{ Returns the error code of the last regexp execution. */
PHP_FUNCTION(preg_last_error)3011 PHP_FUNCTION(preg_last_error)
3012 {
3013 ZEND_PARSE_PARAMETERS_NONE();
3014
3015 RETURN_LONG(PCRE_G(error_code));
3016 }
3017 /* }}} */
3018
3019 /* {{{ Returns the error message of the last regexp execution. */
PHP_FUNCTION(preg_last_error_msg)3020 PHP_FUNCTION(preg_last_error_msg)
3021 {
3022 ZEND_PARSE_PARAMETERS_NONE();
3023
3024 RETURN_STRING(php_pcre_get_error_msg(PCRE_G(error_code)));
3025 }
3026 /* }}} */
3027
3028 /* {{{ module definition structures */
3029
3030 zend_module_entry pcre_module_entry = {
3031 STANDARD_MODULE_HEADER,
3032 "pcre",
3033 ext_functions,
3034 PHP_MINIT(pcre),
3035 PHP_MSHUTDOWN(pcre),
3036 PHP_RINIT(pcre),
3037 PHP_RSHUTDOWN(pcre),
3038 PHP_MINFO(pcre),
3039 PHP_PCRE_VERSION,
3040 PHP_MODULE_GLOBALS(pcre),
3041 PHP_GINIT(pcre),
3042 PHP_GSHUTDOWN(pcre),
3043 NULL,
3044 STANDARD_MODULE_PROPERTIES_EX
3045 };
3046
3047 #ifdef COMPILE_DL_PCRE
ZEND_GET_MODULE(pcre)3048 ZEND_GET_MODULE(pcre)
3049 #endif
3050
3051 /* }}} */
3052
3053 PHPAPI pcre2_match_context *php_pcre_mctx(void)
3054 {/*{{{*/
3055 return mctx;
3056 }/*}}}*/
3057
php_pcre_gctx(void)3058 PHPAPI pcre2_general_context *php_pcre_gctx(void)
3059 {/*{{{*/
3060 return gctx;
3061 }/*}}}*/
3062
php_pcre_cctx(void)3063 PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3064 {/*{{{*/
3065 return cctx;
3066 }/*}}}*/
3067
php_pcre_pce_incref(pcre_cache_entry * pce)3068 PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3069 {/*{{{*/
3070 assert(NULL != pce);
3071 pce->refcount++;
3072 }/*}}}*/
3073
php_pcre_pce_decref(pcre_cache_entry * pce)3074 PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3075 {/*{{{*/
3076 assert(NULL != pce);
3077 assert(0 != pce->refcount);
3078 pce->refcount--;
3079 }/*}}}*/
3080
php_pcre_pce_re(pcre_cache_entry * pce)3081 PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3082 {/*{{{*/
3083 assert(NULL != pce);
3084 return pce->re;
3085 }/*}}}*/
3086