1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Author: Andrei Zmievski <andrei@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #include "php.h"
18 #include "php_ini.h"
19 #include "php_globals.h"
20 #include "php_pcre.h"
21 #include "php_pcre_arginfo.h"
22 #include "ext/standard/info.h"
23 #include "ext/standard/basic_functions.h"
24 #include "zend_smart_str.h"
25 #include "SAPI.h"
26
27 #include "ext/standard/php_string.h"
28
29 #define PREG_PATTERN_ORDER 1
30 #define PREG_SET_ORDER 2
31 #define PREG_OFFSET_CAPTURE (1<<8)
32 #define PREG_UNMATCHED_AS_NULL (1<<9)
33
34 #define PREG_SPLIT_NO_EMPTY (1<<0)
35 #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
36 #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
37
38 #define PREG_REPLACE_EVAL (1<<0)
39
40 #define PREG_GREP_INVERT (1<<0)
41
42 #define PREG_JIT (1<<3)
43
44 #define PCRE_CACHE_SIZE 4096
45
46 struct _pcre_cache_entry {
47 pcre2_code *re;
48 uint32_t preg_options;
49 uint32_t capture_count;
50 uint32_t name_count;
51 uint32_t compile_options;
52 uint32_t refcount;
53 };
54
55 PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
56
57 #ifdef HAVE_PCRE_JIT_SUPPORT
58 #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
59 #define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
60 ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
61 #endif
62 /* General context using (infallible) system allocator. */
63 ZEND_TLS pcre2_general_context *gctx = NULL;
64 /* These two are global per thread for now. Though it is possible to use these
65 per pattern. Either one can copy it and use in pce, or one does no global
66 contexts at all, but creates for every pce. */
67 ZEND_TLS pcre2_compile_context *cctx = NULL;
68 ZEND_TLS pcre2_match_context *mctx = NULL;
69 ZEND_TLS pcre2_match_data *mdata = NULL;
70 ZEND_TLS bool mdata_used = 0;
71 ZEND_TLS uint8_t pcre2_init_ok = 0;
72 #if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
73 static MUTEX_T pcre_mt = NULL;
74 #define php_pcre_mutex_alloc() \
75 if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
76 #define php_pcre_mutex_free() \
77 if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
78 #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
79 #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
80 #else
81 #define php_pcre_mutex_alloc()
82 #define php_pcre_mutex_free()
83 #define php_pcre_mutex_lock()
84 #define php_pcre_mutex_unlock()
85 #endif
86
87 ZEND_TLS HashTable char_tables;
88
php_pcre_free_char_table(zval * data)89 static void php_pcre_free_char_table(zval *data)
90 {/*{{{*/
91 void *ptr = Z_PTR_P(data);
92 pefree(ptr, 1);
93 }/*}}}*/
94
pcre_handle_exec_error(int pcre_code)95 static void pcre_handle_exec_error(int pcre_code) /* {{{ */
96 {
97 int preg_code = 0;
98
99 switch (pcre_code) {
100 case PCRE2_ERROR_MATCHLIMIT:
101 preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
102 break;
103
104 case PCRE2_ERROR_RECURSIONLIMIT:
105 preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
106 break;
107
108 case PCRE2_ERROR_BADUTFOFFSET:
109 preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
110 break;
111
112 #ifdef HAVE_PCRE_JIT_SUPPORT
113 case PCRE2_ERROR_JIT_STACKLIMIT:
114 preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
115 break;
116 #endif
117
118 default:
119 if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
120 preg_code = PHP_PCRE_BAD_UTF8_ERROR;
121 } else {
122 preg_code = PHP_PCRE_INTERNAL_ERROR;
123 }
124 break;
125 }
126
127 PCRE_G(error_code) = preg_code;
128 }
129 /* }}} */
130
php_pcre_get_error_msg(php_pcre_error_code error_code)131 static const char *php_pcre_get_error_msg(php_pcre_error_code error_code) /* {{{ */
132 {
133 switch (error_code) {
134 case PHP_PCRE_NO_ERROR:
135 return "No error";
136 case PHP_PCRE_INTERNAL_ERROR:
137 return "Internal error";
138 case PHP_PCRE_BAD_UTF8_ERROR:
139 return "Malformed UTF-8 characters, possibly incorrectly encoded";
140 case PHP_PCRE_BAD_UTF8_OFFSET_ERROR:
141 return "The offset did not correspond to the beginning of a valid UTF-8 code point";
142 case PHP_PCRE_BACKTRACK_LIMIT_ERROR:
143 return "Backtrack limit exhausted";
144 case PHP_PCRE_RECURSION_LIMIT_ERROR:
145 return "Recursion limit exhausted";
146
147 #ifdef HAVE_PCRE_JIT_SUPPORT
148 case PHP_PCRE_JIT_STACKLIMIT_ERROR:
149 return "JIT stack limit exhausted";
150 #endif
151
152 default:
153 return "Unknown error";
154 }
155 }
156 /* }}} */
157
php_free_pcre_cache(zval * data)158 static void php_free_pcre_cache(zval *data) /* {{{ */
159 {
160 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
161 if (!pce) return;
162 pcre2_code_free(pce->re);
163 free(pce);
164 }
165 /* }}} */
166
php_efree_pcre_cache(zval * data)167 static void php_efree_pcre_cache(zval *data) /* {{{ */
168 {
169 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
170 if (!pce) return;
171 pcre2_code_free(pce->re);
172 efree(pce);
173 }
174 /* }}} */
175
php_pcre_malloc(PCRE2_SIZE size,void * data)176 static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
177 {
178 return pemalloc(size, 1);
179 }
180
php_pcre_free(void * block,void * data)181 static void php_pcre_free(void *block, void *data)
182 {
183 pefree(block, 1);
184 }
185
php_pcre_emalloc(PCRE2_SIZE size,void * data)186 static void *php_pcre_emalloc(PCRE2_SIZE size, void *data)
187 {
188 return emalloc(size);
189 }
190
php_pcre_efree(void * block,void * data)191 static void php_pcre_efree(void *block, void *data)
192 {
193 efree(block);
194 }
195
196 #ifdef PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
197 /* pcre 10.38 needs PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, disabled by default */
198 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
199 #else
200 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS 0
201 #endif
202
203 #define PHP_PCRE_PREALLOC_MDATA_SIZE 32
204
php_pcre_init_pcre2(uint8_t jit)205 static void php_pcre_init_pcre2(uint8_t jit)
206 {/*{{{*/
207 if (!gctx) {
208 gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
209 if (!gctx) {
210 pcre2_init_ok = 0;
211 return;
212 }
213 }
214
215 if (!cctx) {
216 cctx = pcre2_compile_context_create(gctx);
217 if (!cctx) {
218 pcre2_init_ok = 0;
219 return;
220 }
221 }
222
223 pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
224
225 if (!mctx) {
226 mctx = pcre2_match_context_create(gctx);
227 if (!mctx) {
228 pcre2_init_ok = 0;
229 return;
230 }
231 }
232
233 #ifdef HAVE_PCRE_JIT_SUPPORT
234 if (jit && !jit_stack) {
235 jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
236 if (!jit_stack) {
237 pcre2_init_ok = 0;
238 return;
239 }
240 }
241 #endif
242
243 if (!mdata) {
244 mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
245 if (!mdata) {
246 pcre2_init_ok = 0;
247 return;
248 }
249 }
250
251 pcre2_init_ok = 1;
252 }/*}}}*/
253
php_pcre_shutdown_pcre2(void)254 static void php_pcre_shutdown_pcre2(void)
255 {/*{{{*/
256 if (gctx) {
257 pcre2_general_context_free(gctx);
258 gctx = NULL;
259 }
260
261 if (cctx) {
262 pcre2_compile_context_free(cctx);
263 cctx = NULL;
264 }
265
266 if (mctx) {
267 pcre2_match_context_free(mctx);
268 mctx = NULL;
269 }
270
271 #ifdef HAVE_PCRE_JIT_SUPPORT
272 /* Stack may only be destroyed when no cached patterns
273 possibly associated with it do exist. */
274 if (jit_stack) {
275 pcre2_jit_stack_free(jit_stack);
276 jit_stack = NULL;
277 }
278 #endif
279
280 if (mdata) {
281 pcre2_match_data_free(mdata);
282 mdata = NULL;
283 }
284
285 pcre2_init_ok = 0;
286 }/*}}}*/
287
PHP_GINIT_FUNCTION(pcre)288 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
289 {
290 php_pcre_mutex_alloc();
291
292 /* If we're on the CLI SAPI, there will only be one request, so we don't need the
293 * cache to survive after RSHUTDOWN. */
294 pcre_globals->per_request_cache = strcmp(sapi_module.name, "cli") == 0;
295 if (!pcre_globals->per_request_cache) {
296 zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
297 }
298
299 pcre_globals->backtrack_limit = 0;
300 pcre_globals->recursion_limit = 0;
301 pcre_globals->error_code = PHP_PCRE_NO_ERROR;
302 ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
303 ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
304 #ifdef HAVE_PCRE_JIT_SUPPORT
305 pcre_globals->jit = 1;
306 #endif
307
308 php_pcre_init_pcre2(1);
309 zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
310 }
311 /* }}} */
312
PHP_GSHUTDOWN_FUNCTION(pcre)313 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
314 {
315 if (!pcre_globals->per_request_cache) {
316 zend_hash_destroy(&pcre_globals->pcre_cache);
317 }
318
319 php_pcre_shutdown_pcre2();
320 zend_hash_destroy(&char_tables);
321 php_pcre_mutex_free();
322 }
323 /* }}} */
324
PHP_INI_MH(OnUpdateBacktrackLimit)325 static PHP_INI_MH(OnUpdateBacktrackLimit)
326 {/*{{{*/
327 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
328 if (mctx) {
329 pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
330 }
331
332 return SUCCESS;
333 }/*}}}*/
334
PHP_INI_MH(OnUpdateRecursionLimit)335 static PHP_INI_MH(OnUpdateRecursionLimit)
336 {/*{{{*/
337 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
338 if (mctx) {
339 pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
340 }
341
342 return SUCCESS;
343 }/*}}}*/
344
345 #ifdef HAVE_PCRE_JIT_SUPPORT
PHP_INI_MH(OnUpdateJit)346 static PHP_INI_MH(OnUpdateJit)
347 {/*{{{*/
348 OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
349 if (PCRE_G(jit) && jit_stack) {
350 pcre2_jit_stack_assign(mctx, NULL, jit_stack);
351 } else {
352 pcre2_jit_stack_assign(mctx, NULL, NULL);
353 }
354
355 return SUCCESS;
356 }/*}}}*/
357 #endif
358
359 PHP_INI_BEGIN()
360 STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
361 STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
362 #ifdef HAVE_PCRE_JIT_SUPPORT
363 STD_PHP_INI_ENTRY("pcre.jit", "1", PHP_INI_ALL, OnUpdateJit, jit, zend_pcre_globals, pcre_globals)
364 #endif
PHP_INI_END()365 PHP_INI_END()
366
367 static char *_pcre2_config_str(uint32_t what)
368 {/*{{{*/
369 int len = pcre2_config(what, NULL);
370 char *ret = (char *) malloc(len + 1);
371
372 len = pcre2_config(what, ret);
373 if (!len) {
374 free(ret);
375 return NULL;
376 }
377
378 return ret;
379 }/*}}}*/
380
381 /* {{{ PHP_MINFO_FUNCTION(pcre) */
PHP_MINFO_FUNCTION(pcre)382 static PHP_MINFO_FUNCTION(pcre)
383 {
384 #ifdef HAVE_PCRE_JIT_SUPPORT
385 uint32_t flag = 0;
386 char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
387 #endif
388 char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
389 char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
390
391 php_info_print_table_start();
392 php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
393 php_info_print_table_row(2, "PCRE Library Version", version);
394 free(version);
395 php_info_print_table_row(2, "PCRE Unicode Version", unicode);
396 free(unicode);
397
398 #ifdef HAVE_PCRE_JIT_SUPPORT
399 if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
400 php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
401 } else {
402 php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
403 }
404 if (jit_target) {
405 php_info_print_table_row(2, "PCRE JIT Target", jit_target);
406 }
407 free(jit_target);
408 #else
409 php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
410 #endif
411
412 #ifdef HAVE_PCRE_VALGRIND_SUPPORT
413 php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
414 #endif
415
416 php_info_print_table_end();
417
418 DISPLAY_INI_ENTRIES();
419 }
420 /* }}} */
421
422 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTION(pcre)423 static PHP_MINIT_FUNCTION(pcre)
424 {
425 char *version;
426
427 #ifdef HAVE_PCRE_JIT_SUPPORT
428 if (UNEXPECTED(!pcre2_init_ok)) {
429 /* Retry. */
430 php_pcre_init_pcre2(PCRE_G(jit));
431 if (!pcre2_init_ok) {
432 return FAILURE;
433 }
434 }
435 #endif
436
437 REGISTER_INI_ENTRIES();
438
439 REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
440 REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
441 REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
442 REGISTER_LONG_CONSTANT("PREG_UNMATCHED_AS_NULL", PREG_UNMATCHED_AS_NULL, CONST_CS | CONST_PERSISTENT);
443 REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
444 REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
445 REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
446 REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
447
448 REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
449 REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
450 REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
451 REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
452 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
453 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
454 REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
455 version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
456 REGISTER_STRING_CONSTANT("PCRE_VERSION", version, CONST_CS | CONST_PERSISTENT);
457 free(version);
458 REGISTER_LONG_CONSTANT("PCRE_VERSION_MAJOR", PCRE2_MAJOR, CONST_CS | CONST_PERSISTENT);
459 REGISTER_LONG_CONSTANT("PCRE_VERSION_MINOR", PCRE2_MINOR, CONST_CS | CONST_PERSISTENT);
460
461 #ifdef HAVE_PCRE_JIT_SUPPORT
462 REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 1, CONST_CS | CONST_PERSISTENT);
463 #else
464 REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 0, CONST_CS | CONST_PERSISTENT);
465 #endif
466
467 return SUCCESS;
468 }
469 /* }}} */
470
471 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTION(pcre)472 static PHP_MSHUTDOWN_FUNCTION(pcre)
473 {
474 UNREGISTER_INI_ENTRIES();
475
476 return SUCCESS;
477 }
478 /* }}} */
479
480 /* {{{ PHP_RINIT_FUNCTION(pcre) */
PHP_RINIT_FUNCTION(pcre)481 static PHP_RINIT_FUNCTION(pcre)
482 {
483 #ifdef HAVE_PCRE_JIT_SUPPORT
484 if (UNEXPECTED(!pcre2_init_ok)) {
485 /* Retry. */
486 php_pcre_mutex_lock();
487 php_pcre_init_pcre2(PCRE_G(jit));
488 if (!pcre2_init_ok) {
489 php_pcre_mutex_unlock();
490 return FAILURE;
491 }
492 php_pcre_mutex_unlock();
493 }
494
495 mdata_used = 0;
496 #endif
497
498 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
499 PCRE_G(gctx_zmm) = pcre2_general_context_create(php_pcre_emalloc, php_pcre_efree, NULL);
500 if (!PCRE_G(gctx_zmm)) {
501 return FAILURE;
502 }
503
504 if (PCRE_G(per_request_cache)) {
505 zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, php_efree_pcre_cache, 0);
506 }
507
508 return SUCCESS;
509 }
510 /* }}} */
511
PHP_RSHUTDOWN_FUNCTION(pcre)512 static PHP_RSHUTDOWN_FUNCTION(pcre)
513 {
514 pcre2_general_context_free(PCRE_G(gctx_zmm));
515 PCRE_G(gctx_zmm) = NULL;
516
517 if (PCRE_G(per_request_cache)) {
518 zend_hash_destroy(&PCRE_G(pcre_cache));
519 }
520
521 zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
522 zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
523 ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
524 ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
525 return SUCCESS;
526 }
527
528 /* {{{ static pcre_clean_cache */
pcre_clean_cache(zval * data,void * arg)529 static int pcre_clean_cache(zval *data, void *arg)
530 {
531 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
532 int *num_clean = (int *)arg;
533
534 if (*num_clean > 0 && !pce->refcount) {
535 (*num_clean)--;
536 return ZEND_HASH_APPLY_REMOVE;
537 } else {
538 return ZEND_HASH_APPLY_KEEP;
539 }
540 }
541 /* }}} */
542
free_subpats_table(zend_string ** subpat_names,uint32_t num_subpats)543 static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
544 uint32_t i;
545 for (i = 0; i < num_subpats; i++) {
546 if (subpat_names[i]) {
547 zend_string_release(subpat_names[i]);
548 }
549 }
550 efree(subpat_names);
551 }
552
553 /* {{{ static make_subpats_table */
make_subpats_table(uint32_t num_subpats,pcre_cache_entry * pce)554 static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
555 {
556 uint32_t name_cnt = pce->name_count, name_size, ni = 0;
557 char *name_table;
558 zend_string **subpat_names;
559 int rc1, rc2;
560
561 rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
562 rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
563 if (rc1 < 0 || rc2 < 0) {
564 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
565 return NULL;
566 }
567
568 subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
569 while (ni++ < name_cnt) {
570 unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
571 const char *name = name_table + 2;
572 subpat_names[name_idx] = zend_string_init(name, strlen(name), 0);
573 if (is_numeric_string(ZSTR_VAL(subpat_names[name_idx]), ZSTR_LEN(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
574 php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
575 free_subpats_table(subpat_names, num_subpats);
576 return NULL;
577 }
578 name_table += name_size;
579 }
580 return subpat_names;
581 }
582 /* }}} */
583
584 /* {{{ static calculate_unit_length */
585 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
calculate_unit_length(pcre_cache_entry * pce,const char * start)586 static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
587 {
588 size_t unit_len;
589
590 if (pce->compile_options & PCRE2_UTF) {
591 const char *end = start;
592
593 /* skip continuation bytes */
594 while ((*++end & 0xC0) == 0x80);
595 unit_len = end - start;
596 } else {
597 unit_len = 1;
598 }
599 return unit_len;
600 }
601 /* }}} */
602
603 /* {{{ pcre_get_compiled_regex_cache */
pcre_get_compiled_regex_cache_ex(zend_string * regex,int locale_aware)604 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware)
605 {
606 pcre2_code *re = NULL;
607 #if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR && !HAVE_BUNDLED_PCRE
608 uint32_t coptions = PCRE2_NO_START_OPTIMIZE;
609 #else
610 uint32_t coptions = 0;
611 #endif
612 PCRE2_UCHAR error[128];
613 PCRE2_SIZE erroffset;
614 int errnumber;
615 char delimiter;
616 char start_delimiter;
617 char end_delimiter;
618 char *p, *pp;
619 char *pattern;
620 size_t pattern_len;
621 uint32_t poptions = 0;
622 const uint8_t *tables = NULL;
623 zval *zv;
624 pcre_cache_entry new_entry;
625 int rc;
626 zend_string *key;
627 pcre_cache_entry *ret;
628
629 if (locale_aware && BG(ctype_string)) {
630 key = zend_string_concat2(
631 ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)),
632 ZSTR_VAL(regex), ZSTR_LEN(regex));
633 } else {
634 key = regex;
635 }
636
637 /* Try to lookup the cached regex entry, and if successful, just pass
638 back the compiled pattern, otherwise go on and compile it. */
639 zv = zend_hash_find(&PCRE_G(pcre_cache), key);
640 if (zv) {
641 if (key != regex) {
642 zend_string_release_ex(key, 0);
643 }
644 return (pcre_cache_entry*)Z_PTR_P(zv);
645 }
646
647 p = ZSTR_VAL(regex);
648
649 /* Parse through the leading whitespace, and display a warning if we
650 get to the end without encountering a delimiter. */
651 while (isspace((int)*(unsigned char *)p)) p++;
652 if (*p == 0) {
653 if (key != regex) {
654 zend_string_release_ex(key, 0);
655 }
656 php_error_docref(NULL, E_WARNING,
657 p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
658 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
659 return NULL;
660 }
661
662 /* Get the delimiter and display a warning if it is alphanumeric
663 or a backslash. */
664 delimiter = *p++;
665 if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
666 if (key != regex) {
667 zend_string_release_ex(key, 0);
668 }
669 php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
670 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
671 return NULL;
672 }
673
674 start_delimiter = delimiter;
675 if ((pp = strchr("([{< )]}> )]}>", delimiter)))
676 delimiter = pp[5];
677 end_delimiter = delimiter;
678
679 pp = p;
680
681 if (start_delimiter == end_delimiter) {
682 /* We need to iterate through the pattern, searching for the ending delimiter,
683 but skipping the backslashed delimiters. If the ending delimiter is not
684 found, display a warning. */
685 while (*pp != 0) {
686 if (*pp == '\\' && pp[1] != 0) pp++;
687 else if (*pp == delimiter)
688 break;
689 pp++;
690 }
691 } else {
692 /* We iterate through the pattern, searching for the matching ending
693 * delimiter. For each matching starting delimiter, we increment nesting
694 * level, and decrement it for each matching ending delimiter. If we
695 * reach the end of the pattern without matching, display a warning.
696 */
697 int brackets = 1; /* brackets nesting level */
698 while (*pp != 0) {
699 if (*pp == '\\' && pp[1] != 0) pp++;
700 else if (*pp == end_delimiter && --brackets <= 0)
701 break;
702 else if (*pp == start_delimiter)
703 brackets++;
704 pp++;
705 }
706 }
707
708 if (*pp == 0) {
709 if (key != regex) {
710 zend_string_release_ex(key, 0);
711 }
712 if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
713 php_error_docref(NULL,E_WARNING, "Null byte in regex");
714 } else if (start_delimiter == end_delimiter) {
715 php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
716 } else {
717 php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
718 }
719 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
720 return NULL;
721 }
722
723 /* Make a copy of the actual pattern. */
724 pattern_len = pp - p;
725 pattern = estrndup(p, pattern_len);
726
727 /* Move on to the options */
728 pp++;
729
730 /* Parse through the options, setting appropriate flags. Display
731 a warning if we encounter an unknown modifier. */
732 while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
733 switch (*pp++) {
734 /* Perl compatible options */
735 case 'i': coptions |= PCRE2_CASELESS; break;
736 case 'm': coptions |= PCRE2_MULTILINE; break;
737 case 's': coptions |= PCRE2_DOTALL; break;
738 case 'x': coptions |= PCRE2_EXTENDED; break;
739
740 /* PCRE specific options */
741 case 'A': coptions |= PCRE2_ANCHORED; break;
742 case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
743 case 'S': /* Pass. */ break;
744 case 'X': /* Pass. */ break;
745 case 'U': coptions |= PCRE2_UNGREEDY; break;
746 case 'u': coptions |= PCRE2_UTF;
747 /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
748 characters, even in UTF-8 mode. However, this can be changed by setting
749 the PCRE2_UCP option. */
750 #ifdef PCRE2_UCP
751 coptions |= PCRE2_UCP;
752 #endif
753 break;
754 case 'J': coptions |= PCRE2_DUPNAMES; break;
755
756 /* Custom preg options */
757 case 'e': poptions |= PREG_REPLACE_EVAL; break;
758
759 case ' ':
760 case '\n':
761 case '\r':
762 break;
763
764 default:
765 if (pp[-1]) {
766 php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
767 } else {
768 php_error_docref(NULL,E_WARNING, "Null byte in regex");
769 }
770 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
771 efree(pattern);
772 if (key != regex) {
773 zend_string_release_ex(key, 0);
774 }
775 return NULL;
776 }
777 }
778
779 if (poptions & PREG_REPLACE_EVAL) {
780 php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
781 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
782 efree(pattern);
783 if (key != regex) {
784 zend_string_release_ex(key, 0);
785 }
786 return NULL;
787 }
788
789 if (key != regex) {
790 tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(ctype_string));
791 if (!tables) {
792 zend_string *_k;
793 tables = pcre2_maketables(gctx);
794 if (UNEXPECTED(!tables)) {
795 php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
796 pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
797 zend_string_release_ex(key, 0);
798 efree(pattern);
799 return NULL;
800 }
801 _k = zend_string_init(ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)), 1);
802 GC_MAKE_PERSISTENT_LOCAL(_k);
803 zend_hash_add_ptr(&char_tables, _k, (void *)tables);
804 zend_string_release(_k);
805 }
806 }
807 pcre2_set_character_tables(cctx, tables);
808
809 /* Compile pattern and display a warning if compilation failed. */
810 re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
811
812 if (re == NULL) {
813 if (key != regex) {
814 zend_string_release_ex(key, 0);
815 }
816 pcre2_get_error_message(errnumber, error, sizeof(error));
817 php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
818 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
819 efree(pattern);
820 return NULL;
821 }
822
823 #ifdef HAVE_PCRE_JIT_SUPPORT
824 if (PCRE_G(jit)) {
825 /* Enable PCRE JIT compiler */
826 rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
827 if (EXPECTED(rc >= 0)) {
828 size_t jit_size = 0;
829 if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
830 poptions |= PREG_JIT;
831 }
832 } else if (rc == PCRE2_ERROR_NOMEMORY) {
833 php_error_docref(NULL, E_WARNING,
834 "Allocation of JIT memory failed, PCRE JIT will be disabled. "
835 "This is likely caused by security restrictions. "
836 "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
837 PCRE_G(jit) = 0;
838 } else {
839 pcre2_get_error_message(rc, error, sizeof(error));
840 php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
841 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
842 }
843 }
844 #endif
845 efree(pattern);
846
847 /*
848 * If we reached cache limit, clean out the items from the head of the list;
849 * these are supposedly the oldest ones (but not necessarily the least used
850 * ones).
851 */
852 if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
853 int num_clean = PCRE_CACHE_SIZE / 8;
854 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
855 }
856
857 /* Store the compiled pattern and extra info in the cache. */
858 new_entry.re = re;
859 new_entry.preg_options = poptions;
860 new_entry.compile_options = coptions;
861 new_entry.refcount = 0;
862
863 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
864 if (rc < 0) {
865 if (key != regex) {
866 zend_string_release_ex(key, 0);
867 }
868 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
869 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
870 return NULL;
871 }
872
873 rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
874 if (rc < 0) {
875 if (key != regex) {
876 zend_string_release_ex(key, 0);
877 }
878 php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
879 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
880 return NULL;
881 }
882
883 /*
884 * Interned strings are not duplicated when stored in HashTable,
885 * but all the interned strings created during HTTP request are removed
886 * at end of request. However PCRE_G(pcre_cache) must be consistent
887 * on the next request as well. So we disable usage of interned strings
888 * as hash keys especually for this table.
889 * See bug #63180
890 */
891 if (!(GC_FLAGS(key) & IS_STR_PERMANENT) && !PCRE_G(per_request_cache)) {
892 zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
893 GC_MAKE_PERSISTENT_LOCAL(str);
894
895 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
896 zend_string_release(str);
897 } else {
898 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
899 }
900
901 if (key != regex) {
902 zend_string_release_ex(key, 0);
903 }
904
905 return ret;
906 }
907 /* }}} */
908
909 /* {{{ pcre_get_compiled_regex_cache */
pcre_get_compiled_regex_cache(zend_string * regex)910 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
911 {
912 return pcre_get_compiled_regex_cache_ex(regex, 1);
913 }
914 /* }}} */
915
916 /* {{{ pcre_get_compiled_regex */
pcre_get_compiled_regex(zend_string * regex,uint32_t * capture_count)917 PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
918 {
919 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
920
921 if (capture_count) {
922 *capture_count = pce ? pce->capture_count : 0;
923 }
924
925 return pce ? pce->re : NULL;
926 }
927 /* }}} */
928
929 /* {{{ pcre_get_compiled_regex_ex */
pcre_get_compiled_regex_ex(zend_string * regex,uint32_t * capture_count,uint32_t * preg_options,uint32_t * compile_options)930 PHPAPI pcre2_code* pcre_get_compiled_regex_ex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options, uint32_t *compile_options)
931 {
932 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
933
934 if (preg_options) {
935 *preg_options = pce ? pce->preg_options : 0;
936 }
937 if (compile_options) {
938 *compile_options = pce ? pce->compile_options : 0;
939 }
940 if (capture_count) {
941 *capture_count = pce ? pce->capture_count : 0;
942 }
943
944 return pce ? pce->re : NULL;
945 }
946 /* }}} */
947
948 /* XXX For the cases where it's only about match yes/no and no capture
949 required, perhaps just a minimum sized data would suffice. */
php_pcre_create_match_data(uint32_t capture_count,pcre2_code * re)950 PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
951 {/*{{{*/
952
953 assert(NULL != re);
954
955 if (EXPECTED(!mdata_used)) {
956 int rc = 0;
957
958 if (!capture_count) {
959 /* As we deal with a non cached pattern, no other way to gather this info. */
960 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
961 }
962
963 if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
964 mdata_used = 1;
965 return mdata;
966 }
967 }
968
969 return pcre2_match_data_create_from_pattern(re, gctx);
970 }/*}}}*/
971
php_pcre_free_match_data(pcre2_match_data * match_data)972 PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
973 {/*{{{*/
974 if (UNEXPECTED(match_data != mdata)) {
975 pcre2_match_data_free(match_data);
976 } else {
977 mdata_used = 0;
978 }
979 }/*}}}*/
980
init_unmatched_null_pair(void)981 static void init_unmatched_null_pair(void) {
982 zval val1, val2;
983 ZVAL_NULL(&val1);
984 ZVAL_LONG(&val2, -1);
985 ZVAL_ARR(&PCRE_G(unmatched_null_pair), zend_new_pair(&val1, &val2));
986 }
987
init_unmatched_empty_pair(void)988 static void init_unmatched_empty_pair(void) {
989 zval val1, val2;
990 ZVAL_EMPTY_STRING(&val1);
991 ZVAL_LONG(&val2, -1);
992 ZVAL_ARR(&PCRE_G(unmatched_empty_pair), zend_new_pair(&val1, &val2));
993 }
994
populate_match_value_str(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset)995 static zend_always_inline void populate_match_value_str(
996 zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
997 ZVAL_STRINGL_FAST(val, subject + start_offset, end_offset - start_offset);
998 }
999
populate_match_value(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,uint32_t unmatched_as_null)1000 static inline void populate_match_value(
1001 zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
1002 uint32_t unmatched_as_null) {
1003 if (PCRE2_UNSET == start_offset) {
1004 if (unmatched_as_null) {
1005 ZVAL_NULL(val);
1006 } else {
1007 ZVAL_EMPTY_STRING(val);
1008 }
1009 } else {
1010 populate_match_value_str(val, subject, start_offset, end_offset);
1011 }
1012 }
1013
add_named(zval * subpats,zend_string * name,zval * val,bool unmatched)1014 static inline void add_named(
1015 zval *subpats, zend_string *name, zval *val, bool unmatched) {
1016 /* If the DUPNAMES option is used, multiple subpatterns might have the same name.
1017 * In this case we want to preserve the one that actually has a value. */
1018 if (!unmatched) {
1019 zend_hash_update(Z_ARRVAL_P(subpats), name, val);
1020 } else {
1021 if (!zend_hash_add(Z_ARRVAL_P(subpats), name, val)) {
1022 return;
1023 }
1024 }
1025 Z_TRY_ADDREF_P(val);
1026 }
1027
1028 /* {{{ add_offset_pair */
add_offset_pair(zval * result,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,zend_string * name,uint32_t unmatched_as_null)1029 static inline void add_offset_pair(
1030 zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
1031 zend_string *name, uint32_t unmatched_as_null)
1032 {
1033 zval match_pair;
1034
1035 /* Add (match, offset) to the return value */
1036 if (PCRE2_UNSET == start_offset) {
1037 if (unmatched_as_null) {
1038 if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
1039 init_unmatched_null_pair();
1040 }
1041 ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
1042 } else {
1043 if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
1044 init_unmatched_empty_pair();
1045 }
1046 ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
1047 }
1048 } else {
1049 zval val1, val2;
1050 populate_match_value_str(&val1, subject, start_offset, end_offset);
1051 ZVAL_LONG(&val2, start_offset);
1052 ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1053 }
1054
1055 if (name) {
1056 add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1057 }
1058 zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
1059 }
1060 /* }}} */
1061
populate_subpat_array(zval * subpats,const char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1062 static void populate_subpat_array(
1063 zval *subpats, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1064 uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1065 bool offset_capture = (flags & PREG_OFFSET_CAPTURE) != 0;
1066 bool unmatched_as_null = (flags & PREG_UNMATCHED_AS_NULL) != 0;
1067 zval val;
1068 int i;
1069 if (subpat_names) {
1070 if (offset_capture) {
1071 for (i = 0; i < count; i++) {
1072 add_offset_pair(
1073 subpats, subject, offsets[2*i], offsets[2*i+1],
1074 subpat_names[i], unmatched_as_null);
1075 }
1076 if (unmatched_as_null) {
1077 for (i = count; i < num_subpats; i++) {
1078 add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1079 }
1080 }
1081 } else {
1082 for (i = 0; i < count; i++) {
1083 populate_match_value(
1084 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1085 if (subpat_names[i]) {
1086 add_named(subpats, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1087 }
1088 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1089 }
1090 if (unmatched_as_null) {
1091 for (i = count; i < num_subpats; i++) {
1092 ZVAL_NULL(&val);
1093 if (subpat_names[i]) {
1094 zend_hash_add(Z_ARRVAL_P(subpats), subpat_names[i], &val);
1095 }
1096 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1097 }
1098 }
1099 }
1100 } else {
1101 if (offset_capture) {
1102 for (i = 0; i < count; i++) {
1103 add_offset_pair(
1104 subpats, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1105 }
1106 if (unmatched_as_null) {
1107 for (i = count; i < num_subpats; i++) {
1108 add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1109 }
1110 }
1111 } else {
1112 for (i = 0; i < count; i++) {
1113 populate_match_value(
1114 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1115 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1116 }
1117 if (unmatched_as_null) {
1118 for (i = count; i < num_subpats; i++) {
1119 add_next_index_null(subpats);
1120 }
1121 }
1122 }
1123 }
1124 /* Add MARK, if available */
1125 if (mark) {
1126 add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
1127 }
1128 }
1129
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS,int global)1130 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
1131 {
1132 /* parameters */
1133 zend_string *regex; /* Regular expression */
1134 zend_string *subject; /* String to match against */
1135 pcre_cache_entry *pce; /* Compiled regular expression */
1136 zval *subpats = NULL; /* Array for subpatterns */
1137 zend_long flags = 0; /* Match control flags */
1138 zend_long start_offset = 0; /* Where the new search starts */
1139
1140 ZEND_PARSE_PARAMETERS_START(2, 5)
1141 Z_PARAM_STR(regex)
1142 Z_PARAM_STR(subject)
1143 Z_PARAM_OPTIONAL
1144 Z_PARAM_ZVAL(subpats)
1145 Z_PARAM_LONG(flags)
1146 Z_PARAM_LONG(start_offset)
1147 ZEND_PARSE_PARAMETERS_END();
1148
1149 /* Compile regex or get it from cache. */
1150 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1151 RETURN_FALSE;
1152 }
1153
1154 pce->refcount++;
1155 php_pcre_match_impl(pce, subject, return_value, subpats,
1156 global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
1157 pce->refcount--;
1158 }
1159 /* }}} */
1160
is_known_valid_utf8(zend_string * subject_str,PCRE2_SIZE start_offset)1161 static zend_always_inline bool is_known_valid_utf8(
1162 zend_string *subject_str, PCRE2_SIZE start_offset) {
1163 if (!(GC_FLAGS(subject_str) & IS_STR_VALID_UTF8)) {
1164 /* We don't know whether the string is valid UTF-8 or not. */
1165 return 0;
1166 }
1167
1168 if (start_offset == ZSTR_LEN(subject_str)) {
1169 /* Degenerate case: Offset points to end of string. */
1170 return 1;
1171 }
1172
1173 /* Check that the offset does not point to an UTF-8 continuation byte. */
1174 return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1175 }
1176
1177 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zval * subpats,int global,int use_flags,zend_long flags,zend_off_t start_offset)1178 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1179 zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset)
1180 {
1181 zval result_set, /* Holds a set of subpatterns after
1182 a global match */
1183 *match_sets = NULL; /* An array of sets of matches for each
1184 subpattern after a global match */
1185 uint32_t options; /* Execution options */
1186 int count; /* Count of matched subpatterns */
1187 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1188 uint32_t num_subpats; /* Number of captured subpatterns */
1189 int matched; /* Has anything matched */
1190 zend_string **subpat_names; /* Array for named subpatterns */
1191 size_t i;
1192 uint32_t subpats_order; /* Order of subpattern matches */
1193 uint32_t offset_capture; /* Capture match offsets: yes/no */
1194 uint32_t unmatched_as_null; /* Null non-matches: yes/no */
1195 PCRE2_SPTR mark = NULL; /* Target for MARK name */
1196 zval marks; /* Array of marks for PREG_PATTERN_ORDER */
1197 pcre2_match_data *match_data;
1198 PCRE2_SIZE start_offset2, orig_start_offset;
1199
1200 char *subject = ZSTR_VAL(subject_str);
1201 size_t subject_len = ZSTR_LEN(subject_str);
1202
1203 ZVAL_UNDEF(&marks);
1204
1205 /* Overwrite the passed-in value for subpatterns with an empty array. */
1206 if (subpats != NULL) {
1207 subpats = zend_try_array_init(subpats);
1208 if (!subpats) {
1209 RETURN_THROWS();
1210 }
1211 }
1212
1213 subpats_order = global ? PREG_PATTERN_ORDER : 0;
1214
1215 if (use_flags) {
1216 offset_capture = flags & PREG_OFFSET_CAPTURE;
1217 unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1218
1219 /*
1220 * subpats_order is pre-set to pattern mode so we change it only if
1221 * necessary.
1222 */
1223 if (flags & 0xff) {
1224 subpats_order = flags & 0xff;
1225 }
1226 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1227 (!global && subpats_order != 0)) {
1228 zend_argument_value_error(4, "must be a PREG_* constant");
1229 RETURN_THROWS();
1230 }
1231 } else {
1232 offset_capture = 0;
1233 unmatched_as_null = 0;
1234 }
1235
1236 /* Negative offset counts from the end of the string. */
1237 if (start_offset < 0) {
1238 if ((PCRE2_SIZE)-start_offset <= subject_len) {
1239 start_offset2 = subject_len + start_offset;
1240 } else {
1241 start_offset2 = 0;
1242 }
1243 } else {
1244 start_offset2 = (PCRE2_SIZE)start_offset;
1245 }
1246
1247 if (start_offset2 > subject_len) {
1248 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1249 RETURN_FALSE;
1250 }
1251
1252 /* Calculate the size of the offsets array, and allocate memory for it. */
1253 num_subpats = pce->capture_count + 1;
1254
1255 /*
1256 * Build a mapping from subpattern numbers to their names. We will
1257 * allocate the table only if there are any named subpatterns.
1258 */
1259 subpat_names = NULL;
1260 if (subpats && pce->name_count > 0) {
1261 subpat_names = make_subpats_table(num_subpats, pce);
1262 if (!subpat_names) {
1263 RETURN_FALSE;
1264 }
1265 }
1266
1267 /* Allocate match sets array and initialize the values. */
1268 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1269 match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
1270 for (i=0; i<num_subpats; i++) {
1271 array_init(&match_sets[i]);
1272 }
1273 }
1274
1275 matched = 0;
1276 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1277
1278 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1279 match_data = mdata;
1280 } else {
1281 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1282 if (!match_data) {
1283 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1284 if (subpat_names) {
1285 free_subpats_table(subpat_names, num_subpats);
1286 }
1287 if (match_sets) {
1288 efree(match_sets);
1289 }
1290 RETURN_FALSE;
1291 }
1292 }
1293
1294 orig_start_offset = start_offset2;
1295 options =
1296 (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1297 ? 0 : PCRE2_NO_UTF_CHECK;
1298
1299 /* Execute the regular expression. */
1300 #ifdef HAVE_PCRE_JIT_SUPPORT
1301 if ((pce->preg_options & PREG_JIT) && options) {
1302 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1303 PCRE2_NO_UTF_CHECK, match_data, mctx);
1304 } else
1305 #endif
1306 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1307 options, match_data, mctx);
1308
1309 while (1) {
1310 /* If something has matched */
1311 if (count >= 0) {
1312 /* Check for too many substrings condition. */
1313 if (UNEXPECTED(count == 0)) {
1314 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1315 count = num_subpats;
1316 }
1317
1318 matched:
1319 matched++;
1320
1321 offsets = pcre2_get_ovector_pointer(match_data);
1322
1323 /* If subpatterns array has been passed, fill it in with values. */
1324 if (subpats != NULL) {
1325 /* Try to get the list of substrings and display a warning if failed. */
1326 if (offsets[1] < offsets[0]) {
1327 if (subpat_names) {
1328 free_subpats_table(subpat_names, num_subpats);
1329 }
1330 if (match_sets) efree(match_sets);
1331 php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1332 RETURN_FALSE;
1333 }
1334
1335 if (global) { /* global pattern matching */
1336 if (subpats && subpats_order == PREG_PATTERN_ORDER) {
1337 /* For each subpattern, insert it into the appropriate array. */
1338 if (offset_capture) {
1339 for (i = 0; i < count; i++) {
1340 add_offset_pair(
1341 &match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1342 NULL, unmatched_as_null);
1343 }
1344 } else {
1345 for (i = 0; i < count; i++) {
1346 zval val;
1347 populate_match_value(
1348 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1349 zend_hash_next_index_insert_new(Z_ARRVAL(match_sets[i]), &val);
1350 }
1351 }
1352 mark = pcre2_get_mark(match_data);
1353 /* Add MARK, if available */
1354 if (mark) {
1355 if (Z_TYPE(marks) == IS_UNDEF) {
1356 array_init(&marks);
1357 }
1358 add_index_string(&marks, matched - 1, (char *) mark);
1359 }
1360 /*
1361 * If the number of captured subpatterns on this run is
1362 * less than the total possible number, pad the result
1363 * arrays with NULLs or empty strings.
1364 */
1365 if (count < num_subpats) {
1366 for (; i < num_subpats; i++) {
1367 if (offset_capture) {
1368 add_offset_pair(
1369 &match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1370 NULL, unmatched_as_null);
1371 } else if (unmatched_as_null) {
1372 add_next_index_null(&match_sets[i]);
1373 } else {
1374 add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
1375 }
1376 }
1377 }
1378 } else {
1379 /* Allocate and populate the result set array */
1380 array_init_size(&result_set, count + (mark ? 1 : 0));
1381 mark = pcre2_get_mark(match_data);
1382 populate_subpat_array(
1383 &result_set, subject, offsets, subpat_names,
1384 num_subpats, count, mark, flags);
1385 /* And add it to the output array */
1386 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
1387 }
1388 } else { /* single pattern matching */
1389 /* For each subpattern, insert it into the subpatterns array. */
1390 mark = pcre2_get_mark(match_data);
1391 populate_subpat_array(
1392 subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1393 break;
1394 }
1395 }
1396
1397 /* Advance to the next piece. */
1398 start_offset2 = offsets[1];
1399
1400 /* If we have matched an empty string, mimic what Perl's /g options does.
1401 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1402 the match again at the same point. If this fails (picked up above) we
1403 advance to the next character. */
1404 if (start_offset2 == offsets[0]) {
1405 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1406 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1407 if (count >= 0) {
1408 if (global) {
1409 goto matched;
1410 } else {
1411 break;
1412 }
1413 } else if (count == PCRE2_ERROR_NOMATCH) {
1414 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1415 this is not necessarily the end. We need to advance
1416 the start offset, and continue. Fudge the offset values
1417 to achieve this, unless we're already at the end of the string. */
1418 if (start_offset2 < subject_len) {
1419 size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1420
1421 start_offset2 += unit_len;
1422 } else {
1423 break;
1424 }
1425 } else {
1426 goto error;
1427 }
1428 }
1429 } else if (count == PCRE2_ERROR_NOMATCH) {
1430 break;
1431 } else {
1432 error:
1433 pcre_handle_exec_error(count);
1434 break;
1435 }
1436
1437 if (!global) {
1438 break;
1439 }
1440
1441 /* Execute the regular expression. */
1442 #ifdef HAVE_PCRE_JIT_SUPPORT
1443 if ((pce->preg_options & PREG_JIT)) {
1444 if (PCRE2_UNSET == start_offset2 || start_offset2 > subject_len) {
1445 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1446 break;
1447 }
1448 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1449 PCRE2_NO_UTF_CHECK, match_data, mctx);
1450 } else
1451 #endif
1452 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1453 PCRE2_NO_UTF_CHECK, match_data, mctx);
1454 }
1455 if (match_data != mdata) {
1456 pcre2_match_data_free(match_data);
1457 }
1458
1459 /* Add the match sets to the output array and clean up */
1460 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1461 if (subpat_names) {
1462 for (i = 0; i < num_subpats; i++) {
1463 if (subpat_names[i]) {
1464 zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &match_sets[i]);
1465 Z_ADDREF(match_sets[i]);
1466 }
1467 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1468 }
1469 } else {
1470 for (i = 0; i < num_subpats; i++) {
1471 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1472 }
1473 }
1474 efree(match_sets);
1475
1476 if (Z_TYPE(marks) != IS_UNDEF) {
1477 add_assoc_zval(subpats, "MARK", &marks);
1478 }
1479 }
1480
1481 if (subpat_names) {
1482 free_subpats_table(subpat_names, num_subpats);
1483 }
1484
1485 if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1486 /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1487 if ((pce->compile_options & PCRE2_UTF)
1488 && !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1489 GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1490 }
1491
1492 RETVAL_LONG(matched);
1493 } else {
1494 RETVAL_FALSE;
1495 }
1496 }
1497 /* }}} */
1498
1499 /* {{{ Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)1500 PHP_FUNCTION(preg_match)
1501 {
1502 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1503 }
1504 /* }}} */
1505
1506 /* {{{ Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)1507 PHP_FUNCTION(preg_match_all)
1508 {
1509 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1510 }
1511 /* }}} */
1512
1513 /* {{{ preg_get_backref */
preg_get_backref(char ** str,int * backref)1514 static int preg_get_backref(char **str, int *backref)
1515 {
1516 char in_brace = 0;
1517 char *walk = *str;
1518
1519 if (walk[1] == 0)
1520 return 0;
1521
1522 if (*walk == '$' && walk[1] == '{') {
1523 in_brace = 1;
1524 walk++;
1525 }
1526 walk++;
1527
1528 if (*walk >= '0' && *walk <= '9') {
1529 *backref = *walk - '0';
1530 walk++;
1531 } else
1532 return 0;
1533
1534 if (*walk && *walk >= '0' && *walk <= '9') {
1535 *backref = *backref * 10 + *walk - '0';
1536 walk++;
1537 }
1538
1539 if (in_brace) {
1540 if (*walk != '}')
1541 return 0;
1542 else
1543 walk++;
1544 }
1545
1546 *str = walk;
1547 return 1;
1548 }
1549 /* }}} */
1550
1551 /* {{{ preg_do_repl_func */
preg_do_repl_func(zend_fcall_info * fci,zend_fcall_info_cache * fcc,const char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1552 static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1553 {
1554 zend_string *result_str;
1555 zval retval; /* Function return value */
1556 zval arg; /* Argument to pass to function */
1557
1558 array_init_size(&arg, count + (mark ? 1 : 0));
1559 populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1560
1561 fci->retval = &retval;
1562 fci->param_count = 1;
1563 fci->params = &arg;
1564
1565 if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1566 if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1567 result_str = Z_STR(retval);
1568 } else {
1569 result_str = zval_get_string_func(&retval);
1570 zval_ptr_dtor(&retval);
1571 }
1572 } else {
1573 if (!EG(exception)) {
1574 php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1575 }
1576
1577 result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1578 }
1579
1580 zval_ptr_dtor(&arg);
1581
1582 return result_str;
1583 }
1584 /* }}} */
1585
1586 /* {{{ php_pcre_replace */
php_pcre_replace(zend_string * regex,zend_string * subject_str,const char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1587 PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1588 zend_string *subject_str,
1589 const char *subject, size_t subject_len,
1590 zend_string *replace_str,
1591 size_t limit, size_t *replace_count)
1592 {
1593 pcre_cache_entry *pce; /* Compiled regular expression */
1594 zend_string *result; /* Function result */
1595
1596 /* Abort on pending exception, e.g. thrown from __toString(). */
1597 if (UNEXPECTED(EG(exception))) {
1598 return NULL;
1599 }
1600
1601 /* Compile regex or get it from cache. */
1602 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1603 return NULL;
1604 }
1605 pce->refcount++;
1606 result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1607 limit, replace_count);
1608 pce->refcount--;
1609
1610 return result;
1611 }
1612 /* }}} */
1613
1614 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry * pce,zend_string * subject_str,const char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1615 PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1616 {
1617 uint32_t options; /* Execution options */
1618 int count; /* Count of matched subpatterns */
1619 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1620 uint32_t num_subpats; /* Number of captured subpatterns */
1621 size_t new_len; /* Length of needed storage */
1622 size_t alloc_len; /* Actual allocated length */
1623 size_t match_len; /* Length of the current match */
1624 int backref; /* Backreference number */
1625 PCRE2_SIZE start_offset; /* Where the new search starts */
1626 size_t last_end_offset; /* Where the last search ended */
1627 char *walkbuf, /* Location of current replacement in the result */
1628 *walk, /* Used to walk the replacement string */
1629 walk_last; /* Last walked character */
1630 const char *match, /* The current match */
1631 *piece, /* The current piece of subject */
1632 *replace_end; /* End of replacement string */
1633 size_t result_len; /* Length of result */
1634 zend_string *result; /* Result of replacement */
1635 pcre2_match_data *match_data;
1636
1637 /* Calculate the size of the offsets array, and allocate memory for it. */
1638 num_subpats = pce->capture_count + 1;
1639 alloc_len = 0;
1640 result = NULL;
1641
1642 /* Initialize */
1643 match = NULL;
1644 start_offset = 0;
1645 last_end_offset = 0;
1646 result_len = 0;
1647 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1648
1649 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1650 match_data = mdata;
1651 } else {
1652 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1653 if (!match_data) {
1654 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1655 return NULL;
1656 }
1657 }
1658
1659 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1660
1661 /* Execute the regular expression. */
1662 #ifdef HAVE_PCRE_JIT_SUPPORT
1663 if ((pce->preg_options & PREG_JIT) && options) {
1664 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1665 PCRE2_NO_UTF_CHECK, match_data, mctx);
1666 } else
1667 #endif
1668 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1669 options, match_data, mctx);
1670
1671 while (1) {
1672 piece = subject + last_end_offset;
1673
1674 if (count >= 0 && limit > 0) {
1675 bool simple_string;
1676
1677 /* Check for too many substrings condition. */
1678 if (UNEXPECTED(count == 0)) {
1679 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1680 count = num_subpats;
1681 }
1682
1683 matched:
1684 offsets = pcre2_get_ovector_pointer(match_data);
1685
1686 if (UNEXPECTED(offsets[1] < offsets[0])) {
1687 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1688 if (result) {
1689 zend_string_release_ex(result, 0);
1690 result = NULL;
1691 }
1692 break;
1693 }
1694
1695 if (replace_count) {
1696 ++*replace_count;
1697 }
1698
1699 /* Set the match location in subject */
1700 match = subject + offsets[0];
1701
1702 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1703
1704 walk = ZSTR_VAL(replace_str);
1705 replace_end = walk + ZSTR_LEN(replace_str);
1706 walk_last = 0;
1707 simple_string = 1;
1708 while (walk < replace_end) {
1709 if ('\\' == *walk || '$' == *walk) {
1710 simple_string = 0;
1711 if (walk_last == '\\') {
1712 walk++;
1713 walk_last = 0;
1714 continue;
1715 }
1716 if (preg_get_backref(&walk, &backref)) {
1717 if (backref < count)
1718 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1719 continue;
1720 }
1721 }
1722 new_len++;
1723 walk++;
1724 walk_last = walk[-1];
1725 }
1726
1727 if (new_len >= alloc_len) {
1728 alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1729 if (result == NULL) {
1730 result = zend_string_alloc(alloc_len, 0);
1731 } else {
1732 result = zend_string_extend(result, alloc_len, 0);
1733 }
1734 }
1735
1736 if (match-piece > 0) {
1737 /* copy the part of the string before the match */
1738 memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1739 result_len += (match-piece);
1740 }
1741
1742 if (simple_string) {
1743 /* copy replacement */
1744 memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1745 result_len += ZSTR_LEN(replace_str);
1746 } else {
1747 /* copy replacement and backrefs */
1748 walkbuf = ZSTR_VAL(result) + result_len;
1749
1750 walk = ZSTR_VAL(replace_str);
1751 walk_last = 0;
1752 while (walk < replace_end) {
1753 if ('\\' == *walk || '$' == *walk) {
1754 if (walk_last == '\\') {
1755 *(walkbuf-1) = *walk++;
1756 walk_last = 0;
1757 continue;
1758 }
1759 if (preg_get_backref(&walk, &backref)) {
1760 if (backref < count) {
1761 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1762 memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1763 walkbuf += match_len;
1764 }
1765 continue;
1766 }
1767 }
1768 *walkbuf++ = *walk++;
1769 walk_last = walk[-1];
1770 }
1771 *walkbuf = '\0';
1772 /* increment the result length by how much we've added to the string */
1773 result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1774 }
1775
1776 limit--;
1777
1778 /* Advance to the next piece. */
1779 start_offset = last_end_offset = offsets[1];
1780
1781 /* If we have matched an empty string, mimic what Perl's /g options does.
1782 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1783 the match again at the same point. If this fails (picked up above) we
1784 advance to the next character. */
1785 if (start_offset == offsets[0]) {
1786 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1787 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1788
1789 piece = subject + start_offset;
1790 if (count >= 0 && limit > 0) {
1791 goto matched;
1792 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1793 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1794 this is not necessarily the end. We need to advance
1795 the start offset, and continue. Fudge the offset values
1796 to achieve this, unless we're already at the end of the string. */
1797 if (start_offset < subject_len) {
1798 size_t unit_len = calculate_unit_length(pce, piece);
1799 start_offset += unit_len;
1800 } else {
1801 goto not_matched;
1802 }
1803 } else {
1804 goto error;
1805 }
1806 }
1807
1808 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1809 not_matched:
1810 if (!result && subject_str) {
1811 result = zend_string_copy(subject_str);
1812 break;
1813 }
1814 /* now we know exactly how long it is */
1815 alloc_len = result_len + subject_len - last_end_offset;
1816 if (NULL != result) {
1817 result = zend_string_realloc(result, alloc_len, 0);
1818 } else {
1819 result = zend_string_alloc(alloc_len, 0);
1820 }
1821 /* stick that last bit of string on our output */
1822 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1823 result_len += subject_len - last_end_offset;
1824 ZSTR_VAL(result)[result_len] = '\0';
1825 ZSTR_LEN(result) = result_len;
1826 break;
1827 } else {
1828 error:
1829 pcre_handle_exec_error(count);
1830 if (result) {
1831 zend_string_release_ex(result, 0);
1832 result = NULL;
1833 }
1834 break;
1835 }
1836
1837 #ifdef HAVE_PCRE_JIT_SUPPORT
1838 if (pce->preg_options & PREG_JIT) {
1839 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1840 PCRE2_NO_UTF_CHECK, match_data, mctx);
1841 } else
1842 #endif
1843 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1844 PCRE2_NO_UTF_CHECK, match_data, mctx);
1845 }
1846 if (match_data != mdata) {
1847 pcre2_match_data_free(match_data);
1848 }
1849
1850 return result;
1851 }
1852 /* }}} */
1853
1854 /* {{{ php_pcre_replace_func_impl() */
php_pcre_replace_func_impl(pcre_cache_entry * pce,zend_string * subject_str,const char * subject,size_t subject_len,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)1855 static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)
1856 {
1857 uint32_t options; /* Execution options */
1858 int count; /* Count of matched subpatterns */
1859 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1860 zend_string **subpat_names; /* Array for named subpatterns */
1861 uint32_t num_subpats; /* Number of captured subpatterns */
1862 size_t new_len; /* Length of needed storage */
1863 size_t alloc_len; /* Actual allocated length */
1864 PCRE2_SIZE start_offset; /* Where the new search starts */
1865 size_t last_end_offset; /* Where the last search ended */
1866 const char *match, /* The current match */
1867 *piece; /* The current piece of subject */
1868 size_t result_len; /* Length of result */
1869 zend_string *result; /* Result of replacement */
1870 zend_string *eval_result; /* Result of custom function */
1871 pcre2_match_data *match_data;
1872 bool old_mdata_used;
1873
1874 /* Calculate the size of the offsets array, and allocate memory for it. */
1875 num_subpats = pce->capture_count + 1;
1876
1877 /*
1878 * Build a mapping from subpattern numbers to their names. We will
1879 * allocate the table only if there are any named subpatterns.
1880 */
1881 subpat_names = NULL;
1882 if (UNEXPECTED(pce->name_count > 0)) {
1883 subpat_names = make_subpats_table(num_subpats, pce);
1884 if (!subpat_names) {
1885 return NULL;
1886 }
1887 }
1888
1889 alloc_len = 0;
1890 result = NULL;
1891
1892 /* Initialize */
1893 match = NULL;
1894 start_offset = 0;
1895 last_end_offset = 0;
1896 result_len = 0;
1897 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1898
1899 old_mdata_used = mdata_used;
1900 if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1901 mdata_used = 1;
1902 match_data = mdata;
1903 } else {
1904 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1905 if (!match_data) {
1906 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1907 if (subpat_names) {
1908 free_subpats_table(subpat_names, num_subpats);
1909 }
1910 mdata_used = old_mdata_used;
1911 return NULL;
1912 }
1913 }
1914
1915 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1916
1917 /* Execute the regular expression. */
1918 #ifdef HAVE_PCRE_JIT_SUPPORT
1919 if ((pce->preg_options & PREG_JIT) && options) {
1920 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1921 PCRE2_NO_UTF_CHECK, match_data, mctx);
1922 } else
1923 #endif
1924 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1925 options, match_data, mctx);
1926
1927 while (1) {
1928 piece = subject + last_end_offset;
1929
1930 if (count >= 0 && limit) {
1931 /* Check for too many substrings condition. */
1932 if (UNEXPECTED(count == 0)) {
1933 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1934 count = num_subpats;
1935 }
1936
1937 matched:
1938 offsets = pcre2_get_ovector_pointer(match_data);
1939
1940 if (UNEXPECTED(offsets[1] < offsets[0])) {
1941 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1942 if (result) {
1943 zend_string_release_ex(result, 0);
1944 result = NULL;
1945 }
1946 break;
1947 }
1948
1949 if (replace_count) {
1950 ++*replace_count;
1951 }
1952
1953 /* Set the match location in subject */
1954 match = subject + offsets[0];
1955
1956 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1957
1958 /* Use custom function to get replacement string and its length. */
1959 eval_result = preg_do_repl_func(
1960 fci, fcc, subject, offsets, subpat_names, num_subpats, count,
1961 pcre2_get_mark(match_data), flags);
1962
1963 ZEND_ASSERT(eval_result);
1964 new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD;
1965 if (new_len >= alloc_len) {
1966 alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1967 if (result == NULL) {
1968 result = zend_string_alloc(alloc_len, 0);
1969 } else {
1970 result = zend_string_extend(result, alloc_len, 0);
1971 }
1972 }
1973
1974 if (match-piece > 0) {
1975 /* copy the part of the string before the match */
1976 memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1977 result_len += (match-piece);
1978 }
1979
1980 /* If using custom function, copy result to the buffer and clean up. */
1981 memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1982 result_len += ZSTR_LEN(eval_result);
1983 zend_string_release_ex(eval_result, 0);
1984
1985 limit--;
1986
1987 /* Advance to the next piece. */
1988 start_offset = last_end_offset = offsets[1];
1989
1990 /* If we have matched an empty string, mimic what Perl's /g options does.
1991 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1992 the match again at the same point. If this fails (picked up above) we
1993 advance to the next character. */
1994 if (start_offset == offsets[0]) {
1995 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1996 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1997
1998 piece = subject + start_offset;
1999 if (count >= 0 && limit) {
2000 goto matched;
2001 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2002 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2003 this is not necessarily the end. We need to advance
2004 the start offset, and continue. Fudge the offset values
2005 to achieve this, unless we're already at the end of the string. */
2006 if (start_offset < subject_len) {
2007 size_t unit_len = calculate_unit_length(pce, piece);
2008 start_offset += unit_len;
2009 } else {
2010 goto not_matched;
2011 }
2012 } else {
2013 goto error;
2014 }
2015 }
2016
2017 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2018 not_matched:
2019 if (!result && subject_str) {
2020 result = zend_string_copy(subject_str);
2021 break;
2022 }
2023 /* now we know exactly how long it is */
2024 alloc_len = result_len + subject_len - last_end_offset;
2025 if (NULL != result) {
2026 result = zend_string_realloc(result, alloc_len, 0);
2027 } else {
2028 result = zend_string_alloc(alloc_len, 0);
2029 }
2030 /* stick that last bit of string on our output */
2031 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
2032 result_len += subject_len - last_end_offset;
2033 ZSTR_VAL(result)[result_len] = '\0';
2034 ZSTR_LEN(result) = result_len;
2035 break;
2036 } else {
2037 error:
2038 pcre_handle_exec_error(count);
2039 if (result) {
2040 zend_string_release_ex(result, 0);
2041 result = NULL;
2042 }
2043 break;
2044 }
2045 #ifdef HAVE_PCRE_JIT_SUPPORT
2046 if ((pce->preg_options & PREG_JIT)) {
2047 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2048 PCRE2_NO_UTF_CHECK, match_data, mctx);
2049 } else
2050 #endif
2051 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2052 PCRE2_NO_UTF_CHECK, match_data, mctx);
2053 }
2054 if (match_data != mdata) {
2055 pcre2_match_data_free(match_data);
2056 }
2057 mdata_used = old_mdata_used;
2058
2059 if (UNEXPECTED(subpat_names)) {
2060 free_subpats_table(subpat_names, num_subpats);
2061 }
2062
2063 return result;
2064 }
2065 /* }}} */
2066
2067 /* {{{ php_pcre_replace_func */
php_pcre_replace_func(zend_string * regex,zend_string * subject_str,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)2068 static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2069 zend_string *subject_str,
2070 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2071 size_t limit, size_t *replace_count, zend_long flags)
2072 {
2073 pcre_cache_entry *pce; /* Compiled regular expression */
2074 zend_string *result; /* Function result */
2075
2076 /* Compile regex or get it from cache. */
2077 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2078 return NULL;
2079 }
2080 pce->refcount++;
2081 result = php_pcre_replace_func_impl(
2082 pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
2083 limit, replace_count, flags);
2084 pce->refcount--;
2085
2086 return result;
2087 }
2088 /* }}} */
2089
2090 /* {{{ php_pcre_replace_array */
php_pcre_replace_array(HashTable * regex,zend_string * replace_str,HashTable * replace_ht,zend_string * subject_str,size_t limit,size_t * replace_count)2091 static zend_string *php_pcre_replace_array(HashTable *regex,
2092 zend_string *replace_str, HashTable *replace_ht,
2093 zend_string *subject_str, size_t limit, size_t *replace_count)
2094 {
2095 zval *regex_entry;
2096 zend_string *result;
2097
2098 zend_string_addref(subject_str);
2099
2100 if (replace_ht) {
2101 uint32_t replace_idx = 0;
2102
2103 /* For each entry in the regex array, get the entry */
2104 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2105 /* Make sure we're dealing with strings. */
2106 zend_string *tmp_regex_str;
2107 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2108 zend_string *replace_entry_str, *tmp_replace_entry_str;
2109 zval *zv;
2110
2111 /* Get current entry */
2112 while (1) {
2113 if (replace_idx == replace_ht->nNumUsed) {
2114 replace_entry_str = ZSTR_EMPTY_ALLOC();
2115 tmp_replace_entry_str = NULL;
2116 break;
2117 }
2118 zv = &replace_ht->arData[replace_idx].val;
2119 replace_idx++;
2120 if (Z_TYPE_P(zv) != IS_UNDEF) {
2121 replace_entry_str = zval_get_tmp_string(zv, &tmp_replace_entry_str);
2122 break;
2123 }
2124 }
2125
2126 /* Do the actual replacement and put the result back into subject_str
2127 for further replacements. */
2128 result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2129 ZSTR_LEN(subject_str), replace_entry_str, limit, replace_count);
2130 zend_tmp_string_release(tmp_replace_entry_str);
2131 zend_tmp_string_release(tmp_regex_str);
2132 zend_string_release_ex(subject_str, 0);
2133 subject_str = result;
2134 if (UNEXPECTED(result == NULL)) {
2135 break;
2136 }
2137 } ZEND_HASH_FOREACH_END();
2138
2139 } else {
2140 ZEND_ASSERT(replace_str != NULL);
2141
2142 /* For each entry in the regex array, get the entry */
2143 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2144 /* Make sure we're dealing with strings. */
2145 zend_string *tmp_regex_str;
2146 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2147
2148 /* Do the actual replacement and put the result back into subject_str
2149 for further replacements. */
2150 result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2151 ZSTR_LEN(subject_str), replace_str, limit, replace_count);
2152 zend_tmp_string_release(tmp_regex_str);
2153 zend_string_release_ex(subject_str, 0);
2154 subject_str = result;
2155
2156 if (UNEXPECTED(result == NULL)) {
2157 break;
2158 }
2159 } ZEND_HASH_FOREACH_END();
2160 }
2161
2162 return subject_str;
2163 }
2164 /* }}} */
2165
2166 /* {{{ php_replace_in_subject */
php_replace_in_subject(zend_string * regex_str,HashTable * regex_ht,zend_string * replace_str,HashTable * replace_ht,zend_string * subject,size_t limit,size_t * replace_count)2167 static zend_always_inline zend_string *php_replace_in_subject(
2168 zend_string *regex_str, HashTable *regex_ht,
2169 zend_string *replace_str, HashTable *replace_ht,
2170 zend_string *subject, size_t limit, size_t *replace_count)
2171 {
2172 zend_string *result;
2173
2174 if (regex_str) {
2175 ZEND_ASSERT(replace_str != NULL);
2176 result = php_pcre_replace(regex_str, subject, ZSTR_VAL(subject), ZSTR_LEN(subject),
2177 replace_str, limit, replace_count);
2178 } else {
2179 ZEND_ASSERT(regex_ht != NULL);
2180 result = php_pcre_replace_array(regex_ht, replace_str, replace_ht, subject,
2181 limit, replace_count);
2182 }
2183 return result;
2184 }
2185 /* }}} */
2186
2187 /* {{{ php_replace_in_subject_func */
php_replace_in_subject_func(zend_string * regex_str,HashTable * regex_ht,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zend_string * subject,size_t limit,size_t * replace_count,zend_long flags)2188 static zend_string *php_replace_in_subject_func(zend_string *regex_str, HashTable *regex_ht,
2189 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2190 zend_string *subject, size_t limit, size_t *replace_count, zend_long flags)
2191 {
2192 zend_string *result;
2193
2194 if (regex_str) {
2195 result = php_pcre_replace_func(
2196 regex_str, subject, fci, fcc, limit, replace_count, flags);
2197 return result;
2198 } else {
2199 /* If regex is an array */
2200 zval *regex_entry;
2201
2202 ZEND_ASSERT(regex_ht != NULL);
2203
2204 zend_string_addref(subject);
2205
2206 /* For each entry in the regex array, get the entry */
2207 ZEND_HASH_FOREACH_VAL(regex_ht, regex_entry) {
2208 /* Make sure we're dealing with strings. */
2209 zend_string *tmp_regex_entry_str;
2210 zend_string *regex_entry_str = zval_get_tmp_string(regex_entry, &tmp_regex_entry_str);
2211
2212 /* Do the actual replacement and put the result back into subject
2213 for further replacements. */
2214 result = php_pcre_replace_func(
2215 regex_entry_str, subject, fci, fcc, limit, replace_count, flags);
2216 zend_tmp_string_release(tmp_regex_entry_str);
2217 zend_string_release(subject);
2218 subject = result;
2219 if (UNEXPECTED(result == NULL)) {
2220 break;
2221 }
2222 } ZEND_HASH_FOREACH_END();
2223
2224 return subject;
2225 }
2226 }
2227 /* }}} */
2228
2229 /* {{{ preg_replace_func_impl */
preg_replace_func_impl(zval * return_value,zend_string * regex_str,HashTable * regex_ht,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zend_string * subject_str,HashTable * subject_ht,zend_long limit_val,zend_long flags)2230 static size_t preg_replace_func_impl(zval *return_value,
2231 zend_string *regex_str, HashTable *regex_ht,
2232 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2233 zend_string *subject_str, HashTable *subject_ht, zend_long limit_val, zend_long flags)
2234 {
2235 zend_string *result;
2236 size_t replace_count = 0;
2237
2238 if (subject_str) {
2239 result = php_replace_in_subject_func(
2240 regex_str, regex_ht, fci, fcc, subject_str, limit_val, &replace_count, flags);
2241 if (result != NULL) {
2242 RETVAL_STR(result);
2243 } else {
2244 RETVAL_NULL();
2245 }
2246 } else {
2247 /* if subject is an array */
2248 zval *subject_entry, zv;
2249 zend_string *string_key;
2250 zend_ulong num_key;
2251
2252 ZEND_ASSERT(subject_ht != NULL);
2253
2254 array_init_size(return_value, zend_hash_num_elements(subject_ht));
2255
2256 /* For each subject entry, convert it to string, then perform replacement
2257 and add the result to the return_value array. */
2258 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2259 zend_string *tmp_subject_entry_str;
2260 zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2261
2262 result = php_replace_in_subject_func(
2263 regex_str, regex_ht, fci, fcc, subject_entry_str, limit_val, &replace_count, flags);
2264 if (result != NULL) {
2265 /* Add to return array */
2266 ZVAL_STR(&zv, result);
2267 if (string_key) {
2268 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2269 } else {
2270 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2271 }
2272 }
2273 zend_tmp_string_release(tmp_subject_entry_str);
2274 } ZEND_HASH_FOREACH_END();
2275 }
2276
2277 return replace_count;
2278 }
2279 /* }}} */
2280
2281 /* {{{ preg_replace_common */
preg_replace_common(INTERNAL_FUNCTION_PARAMETERS,bool is_filter)2282 static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter)
2283 {
2284 zval *zcount = NULL;
2285 zend_string *regex_str;
2286 HashTable *regex_ht;
2287 zend_string *replace_str;
2288 HashTable *replace_ht;
2289 zend_string *subject_str;
2290 HashTable *subject_ht;
2291 zend_long limit = -1;
2292 size_t replace_count = 0;
2293 zend_string *result;
2294 size_t old_replace_count;
2295
2296 /* Get function parameters and do error-checking. */
2297 ZEND_PARSE_PARAMETERS_START(3, 5)
2298 Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2299 Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
2300 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2301 Z_PARAM_OPTIONAL
2302 Z_PARAM_LONG(limit)
2303 Z_PARAM_ZVAL(zcount)
2304 ZEND_PARSE_PARAMETERS_END();
2305
2306 /* If replace is an array then the regex argument needs to also be an array */
2307 if (replace_ht && !regex_ht) {
2308 zend_argument_type_error(1, "must be of type array when argument #2 ($replacement) is an array, string given");
2309 RETURN_THROWS();
2310 }
2311
2312 if (subject_str) {
2313 old_replace_count = replace_count;
2314 result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2315 subject_str, limit, &replace_count);
2316 if (result != NULL) {
2317 if (!is_filter || replace_count > old_replace_count) {
2318 RETVAL_STR(result);
2319 } else {
2320 zend_string_release_ex(result, 0);
2321 RETVAL_NULL();
2322 }
2323 } else {
2324 RETVAL_NULL();
2325 }
2326 } else {
2327 /* if subject is an array */
2328 zval *subject_entry, zv;
2329 zend_string *string_key;
2330 zend_ulong num_key;
2331
2332 ZEND_ASSERT(subject_ht != NULL);
2333
2334 array_init_size(return_value, zend_hash_num_elements(subject_ht));
2335
2336 /* For each subject entry, convert it to string, then perform replacement
2337 and add the result to the return_value array. */
2338 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2339 old_replace_count = replace_count;
2340 zend_string *tmp_subject_entry_str;
2341 zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2342 result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2343 subject_entry_str, limit, &replace_count);
2344
2345 if (result != NULL) {
2346 if (!is_filter || replace_count > old_replace_count) {
2347 /* Add to return array */
2348 ZVAL_STR(&zv, result);
2349 if (string_key) {
2350 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2351 } else {
2352 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2353 }
2354 } else {
2355 zend_string_release_ex(result, 0);
2356 }
2357 }
2358 zend_tmp_string_release(tmp_subject_entry_str);
2359 } ZEND_HASH_FOREACH_END();
2360 }
2361
2362 if (zcount) {
2363 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2364 }
2365 }
2366 /* }}} */
2367
2368 /* {{{ Perform Perl-style regular expression replacement. */
PHP_FUNCTION(preg_replace)2369 PHP_FUNCTION(preg_replace)
2370 {
2371 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
2372 }
2373 /* }}} */
2374
2375 /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback)2376 PHP_FUNCTION(preg_replace_callback)
2377 {
2378 zval *zcount = NULL;
2379 zend_string *regex_str;
2380 HashTable *regex_ht;
2381 zend_string *subject_str;
2382 HashTable *subject_ht;
2383 zend_long limit = -1, flags = 0;
2384 size_t replace_count;
2385 zend_fcall_info fci;
2386 zend_fcall_info_cache fcc;
2387
2388 /* Get function parameters and do error-checking. */
2389 ZEND_PARSE_PARAMETERS_START(3, 6)
2390 Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2391 Z_PARAM_FUNC(fci, fcc)
2392 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2393 Z_PARAM_OPTIONAL
2394 Z_PARAM_LONG(limit)
2395 Z_PARAM_ZVAL(zcount)
2396 Z_PARAM_LONG(flags)
2397 ZEND_PARSE_PARAMETERS_END();
2398
2399 replace_count = preg_replace_func_impl(return_value, regex_str, regex_ht,
2400 &fci, &fcc,
2401 subject_str, subject_ht, limit, flags);
2402 if (zcount) {
2403 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2404 }
2405 }
2406 /* }}} */
2407
2408 /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback_array)2409 PHP_FUNCTION(preg_replace_callback_array)
2410 {
2411 zval zv, *replace, *zcount = NULL;
2412 HashTable *pattern, *subject_ht;
2413 zend_string *subject_str, *str_idx_regex;
2414 zend_long limit = -1, flags = 0;
2415 size_t replace_count = 0;
2416 zend_fcall_info fci;
2417 zend_fcall_info_cache fcc;
2418
2419 /* Get function parameters and do error-checking. */
2420 ZEND_PARSE_PARAMETERS_START(2, 5)
2421 Z_PARAM_ARRAY_HT(pattern)
2422 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2423 Z_PARAM_OPTIONAL
2424 Z_PARAM_LONG(limit)
2425 Z_PARAM_ZVAL(zcount)
2426 Z_PARAM_LONG(flags)
2427 ZEND_PARSE_PARAMETERS_END();
2428
2429 fci.size = sizeof(fci);
2430 fci.object = NULL;
2431 fci.named_params = NULL;
2432
2433 if (subject_ht) {
2434 GC_TRY_ADDREF(subject_ht);
2435 } else {
2436 GC_TRY_ADDREF(subject_str);
2437 }
2438
2439 ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) {
2440 if (!str_idx_regex) {
2441 php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
2442 RETVAL_NULL();
2443 goto error;
2444 }
2445
2446 if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2447 zend_argument_type_error(1, "must contain only valid callbacks");
2448 goto error;
2449 }
2450
2451 ZVAL_COPY_VALUE(&fci.function_name, replace);
2452
2453 replace_count += preg_replace_func_impl(&zv, str_idx_regex, /* regex_ht */ NULL, &fci, &fcc,
2454 subject_str, subject_ht, limit, flags);
2455 switch (Z_TYPE(zv)) {
2456 case IS_ARRAY:
2457 ZEND_ASSERT(subject_ht);
2458 zend_array_release(subject_ht);
2459 subject_ht = Z_ARR(zv);
2460 break;
2461 case IS_STRING:
2462 ZEND_ASSERT(subject_str);
2463 zend_string_release(subject_str);
2464 subject_str = Z_STR(zv);
2465 break;
2466 case IS_NULL:
2467 RETVAL_NULL();
2468 goto error;
2469 EMPTY_SWITCH_DEFAULT_CASE()
2470 }
2471
2472 if (EG(exception)) {
2473 goto error;
2474 }
2475 } ZEND_HASH_FOREACH_END();
2476
2477 if (zcount) {
2478 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2479 }
2480
2481 if (subject_ht) {
2482 RETVAL_ARR(subject_ht);
2483 // Unset the type_flags of immutable arrays to prevent the VM from performing refcounting
2484 if (GC_FLAGS(subject_ht) & IS_ARRAY_IMMUTABLE) {
2485 Z_TYPE_FLAGS_P(return_value) = 0;
2486 }
2487 return;
2488 } else {
2489 RETURN_STR(subject_str);
2490 }
2491
2492 error:
2493 if (subject_ht) {
2494 zend_array_release(subject_ht);
2495 } else {
2496 zend_string_release(subject_str);
2497 }
2498 }
2499 /* }}} */
2500
2501 /* {{{ Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTION(preg_filter)2502 PHP_FUNCTION(preg_filter)
2503 {
2504 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
2505 }
2506 /* }}} */
2507
2508 /* {{{ Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)2509 PHP_FUNCTION(preg_split)
2510 {
2511 zend_string *regex; /* Regular expression */
2512 zend_string *subject; /* String to match against */
2513 zend_long limit_val = -1;/* Integer value of limit */
2514 zend_long flags = 0; /* Match control flags */
2515 pcre_cache_entry *pce; /* Compiled regular expression */
2516
2517 /* Get function parameters and do error checking */
2518 ZEND_PARSE_PARAMETERS_START(2, 4)
2519 Z_PARAM_STR(regex)
2520 Z_PARAM_STR(subject)
2521 Z_PARAM_OPTIONAL
2522 Z_PARAM_LONG(limit_val)
2523 Z_PARAM_LONG(flags)
2524 ZEND_PARSE_PARAMETERS_END();
2525
2526 /* Compile regex or get it from cache. */
2527 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2528 RETURN_FALSE;
2529 }
2530
2531 pce->refcount++;
2532 php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2533 pce->refcount--;
2534 }
2535 /* }}} */
2536
2537 /* {{{ php_pcre_split */
php_pcre_split_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zend_long limit_val,zend_long flags)2538 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2539 zend_long limit_val, zend_long flags)
2540 {
2541 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
2542 uint32_t options; /* Execution options */
2543 int count; /* Count of matched subpatterns */
2544 PCRE2_SIZE start_offset; /* Where the new search starts */
2545 PCRE2_SIZE last_match_offset; /* Location of last match */
2546 uint32_t no_empty; /* If NO_EMPTY flag is set */
2547 uint32_t delim_capture; /* If delimiters should be captured */
2548 uint32_t offset_capture; /* If offsets should be captured */
2549 uint32_t num_subpats; /* Number of captured subpatterns */
2550 zval tmp;
2551 pcre2_match_data *match_data;
2552 char *subject = ZSTR_VAL(subject_str);
2553
2554 no_empty = flags & PREG_SPLIT_NO_EMPTY;
2555 delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2556 offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2557
2558 /* Initialize return value */
2559 array_init(return_value);
2560
2561 /* Calculate the size of the offsets array, and allocate memory for it. */
2562 num_subpats = pce->capture_count + 1;
2563
2564 /* Start at the beginning of the string */
2565 start_offset = 0;
2566 last_match_offset = 0;
2567 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2568
2569 if (limit_val == -1) {
2570 /* pass */
2571 } else if (limit_val == 0) {
2572 limit_val = -1;
2573 } else if (limit_val <= 1) {
2574 goto last;
2575 }
2576
2577 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2578 match_data = mdata;
2579 } else {
2580 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2581 if (!match_data) {
2582 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2583 zval_ptr_dtor(return_value);
2584 RETURN_FALSE;
2585 }
2586 }
2587
2588 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2589
2590 #ifdef HAVE_PCRE_JIT_SUPPORT
2591 if ((pce->preg_options & PREG_JIT) && options) {
2592 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2593 PCRE2_NO_UTF_CHECK, match_data, mctx);
2594 } else
2595 #endif
2596 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2597 options, match_data, mctx);
2598
2599 while (1) {
2600 /* If something matched */
2601 if (count >= 0) {
2602 /* Check for too many substrings condition. */
2603 if (UNEXPECTED(count == 0)) {
2604 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2605 count = num_subpats;
2606 }
2607
2608 matched:
2609 offsets = pcre2_get_ovector_pointer(match_data);
2610
2611 if (UNEXPECTED(offsets[1] < offsets[0])) {
2612 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2613 break;
2614 }
2615
2616 if (!no_empty || offsets[0] != last_match_offset) {
2617 if (offset_capture) {
2618 /* Add (match, offset) pair to the return value */
2619 add_offset_pair(
2620 return_value, subject, last_match_offset, offsets[0],
2621 NULL, 0);
2622 } else {
2623 /* Add the piece to the return value */
2624 populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2625 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2626 }
2627
2628 /* One less left to do */
2629 if (limit_val != -1)
2630 limit_val--;
2631 }
2632
2633 if (delim_capture) {
2634 size_t i;
2635 for (i = 1; i < count; i++) {
2636 /* If we have matched a delimiter */
2637 if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2638 if (offset_capture) {
2639 add_offset_pair(
2640 return_value, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2641 } else {
2642 populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2643 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2644 }
2645 }
2646 }
2647 }
2648
2649 /* Advance to the position right after the last full match */
2650 start_offset = last_match_offset = offsets[1];
2651
2652 /* If we have matched an empty string, mimic what Perl's /g options does.
2653 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2654 the match again at the same point. If this fails (picked up above) we
2655 advance to the next character. */
2656 if (start_offset == offsets[0]) {
2657 /* Get next piece if no limit or limit not yet reached and something matched*/
2658 if (limit_val != -1 && limit_val <= 1) {
2659 break;
2660 }
2661 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2662 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2663 if (count >= 0) {
2664 goto matched;
2665 } else if (count == PCRE2_ERROR_NOMATCH) {
2666 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2667 this is not necessarily the end. We need to advance
2668 the start offset, and continue. Fudge the offset values
2669 to achieve this, unless we're already at the end of the string. */
2670 if (start_offset < ZSTR_LEN(subject_str)) {
2671 start_offset += calculate_unit_length(pce, subject + start_offset);
2672 } else {
2673 break;
2674 }
2675 } else {
2676 goto error;
2677 }
2678 }
2679
2680 } else if (count == PCRE2_ERROR_NOMATCH) {
2681 break;
2682 } else {
2683 error:
2684 pcre_handle_exec_error(count);
2685 break;
2686 }
2687
2688 /* Get next piece if no limit or limit not yet reached and something matched*/
2689 if (limit_val != -1 && limit_val <= 1) {
2690 break;
2691 }
2692
2693 #ifdef HAVE_PCRE_JIT_SUPPORT
2694 if (pce->preg_options & PREG_JIT) {
2695 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2696 PCRE2_NO_UTF_CHECK, match_data, mctx);
2697 } else
2698 #endif
2699 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2700 PCRE2_NO_UTF_CHECK, match_data, mctx);
2701 }
2702 if (match_data != mdata) {
2703 pcre2_match_data_free(match_data);
2704 }
2705
2706 if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2707 zval_ptr_dtor(return_value);
2708 RETURN_FALSE;
2709 }
2710
2711 last:
2712 start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2713
2714 if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2715 if (offset_capture) {
2716 /* Add the last (match, offset) pair to the return value */
2717 add_offset_pair(return_value, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2718 } else {
2719 /* Add the last piece to the return value */
2720 if (start_offset == 0) {
2721 ZVAL_STR_COPY(&tmp, subject_str);
2722 } else {
2723 populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2724 }
2725 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2726 }
2727 }
2728 }
2729 /* }}} */
2730
2731 /* {{{ Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)2732 PHP_FUNCTION(preg_quote)
2733 {
2734 zend_string *str; /* Input string argument */
2735 zend_string *delim = NULL; /* Additional delimiter argument */
2736 char *in_str; /* Input string */
2737 char *in_str_end; /* End of the input string */
2738 zend_string *out_str; /* Output string with quoted characters */
2739 size_t extra_len; /* Number of additional characters */
2740 char *p, /* Iterator for input string */
2741 *q, /* Iterator for output string */
2742 delim_char = '\0', /* Delimiter character to be quoted */
2743 c; /* Current character */
2744
2745 /* Get the arguments and check for errors */
2746 ZEND_PARSE_PARAMETERS_START(1, 2)
2747 Z_PARAM_STR(str)
2748 Z_PARAM_OPTIONAL
2749 Z_PARAM_STR_OR_NULL(delim)
2750 ZEND_PARSE_PARAMETERS_END();
2751
2752 /* Nothing to do if we got an empty string */
2753 if (ZSTR_LEN(str) == 0) {
2754 RETURN_EMPTY_STRING();
2755 }
2756
2757 in_str = ZSTR_VAL(str);
2758 in_str_end = in_str + ZSTR_LEN(str);
2759
2760 if (delim) {
2761 delim_char = ZSTR_VAL(delim)[0];
2762 }
2763
2764 /* Go through the string and quote necessary characters */
2765 extra_len = 0;
2766 p = in_str;
2767 do {
2768 c = *p;
2769 switch(c) {
2770 case '.':
2771 case '\\':
2772 case '+':
2773 case '*':
2774 case '?':
2775 case '[':
2776 case '^':
2777 case ']':
2778 case '$':
2779 case '(':
2780 case ')':
2781 case '{':
2782 case '}':
2783 case '=':
2784 case '!':
2785 case '>':
2786 case '<':
2787 case '|':
2788 case ':':
2789 case '-':
2790 case '#':
2791 extra_len++;
2792 break;
2793
2794 case '\0':
2795 extra_len+=3;
2796 break;
2797
2798 default:
2799 if (c == delim_char) {
2800 extra_len++;
2801 }
2802 break;
2803 }
2804 p++;
2805 } while (p != in_str_end);
2806
2807 if (extra_len == 0) {
2808 RETURN_STR_COPY(str);
2809 }
2810
2811 /* Allocate enough memory so that even if each character
2812 is quoted, we won't run out of room */
2813 out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2814 q = ZSTR_VAL(out_str);
2815 p = in_str;
2816
2817 do {
2818 c = *p;
2819 switch(c) {
2820 case '.':
2821 case '\\':
2822 case '+':
2823 case '*':
2824 case '?':
2825 case '[':
2826 case '^':
2827 case ']':
2828 case '$':
2829 case '(':
2830 case ')':
2831 case '{':
2832 case '}':
2833 case '=':
2834 case '!':
2835 case '>':
2836 case '<':
2837 case '|':
2838 case ':':
2839 case '-':
2840 case '#':
2841 *q++ = '\\';
2842 *q++ = c;
2843 break;
2844
2845 case '\0':
2846 *q++ = '\\';
2847 *q++ = '0';
2848 *q++ = '0';
2849 *q++ = '0';
2850 break;
2851
2852 default:
2853 if (c == delim_char) {
2854 *q++ = '\\';
2855 }
2856 *q++ = c;
2857 break;
2858 }
2859 p++;
2860 } while (p != in_str_end);
2861 *q = '\0';
2862
2863 RETURN_NEW_STR(out_str);
2864 }
2865 /* }}} */
2866
2867 /* {{{ Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)2868 PHP_FUNCTION(preg_grep)
2869 {
2870 zend_string *regex; /* Regular expression */
2871 zval *input; /* Input array */
2872 zend_long flags = 0; /* Match control flags */
2873 pcre_cache_entry *pce; /* Compiled regular expression */
2874
2875 /* Get arguments and do error checking */
2876 ZEND_PARSE_PARAMETERS_START(2, 3)
2877 Z_PARAM_STR(regex)
2878 Z_PARAM_ARRAY(input)
2879 Z_PARAM_OPTIONAL
2880 Z_PARAM_LONG(flags)
2881 ZEND_PARSE_PARAMETERS_END();
2882
2883 /* Compile regex or get it from cache. */
2884 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2885 RETURN_FALSE;
2886 }
2887
2888 pce->refcount++;
2889 php_pcre_grep_impl(pce, input, return_value, flags);
2890 pce->refcount--;
2891 }
2892 /* }}} */
2893
php_pcre_grep_impl(pcre_cache_entry * pce,zval * input,zval * return_value,zend_long flags)2894 PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2895 {
2896 zval *entry; /* An entry in the input array */
2897 uint32_t num_subpats; /* Number of captured subpatterns */
2898 int count; /* Count of matched subpatterns */
2899 uint32_t options; /* Execution options */
2900 zend_string *string_key;
2901 zend_ulong num_key;
2902 bool invert; /* Whether to return non-matching
2903 entries */
2904 pcre2_match_data *match_data;
2905 invert = flags & PREG_GREP_INVERT ? 1 : 0;
2906
2907 /* Calculate the size of the offsets array, and allocate memory for it. */
2908 num_subpats = pce->capture_count + 1;
2909
2910 /* Initialize return array */
2911 array_init(return_value);
2912
2913 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2914
2915 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2916 match_data = mdata;
2917 } else {
2918 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2919 if (!match_data) {
2920 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2921 return;
2922 }
2923 }
2924
2925 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2926
2927 /* Go through the input array */
2928 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2929 zend_string *tmp_subject_str;
2930 zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2931
2932 /* Perform the match */
2933 #ifdef HAVE_PCRE_JIT_SUPPORT
2934 if ((pce->preg_options & PREG_JIT) && options) {
2935 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2936 PCRE2_NO_UTF_CHECK, match_data, mctx);
2937 } else
2938 #endif
2939 count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2940 options, match_data, mctx);
2941
2942 /* If the entry fits our requirements */
2943 if (count >= 0) {
2944 /* Check for too many substrings condition. */
2945 if (UNEXPECTED(count == 0)) {
2946 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2947 }
2948 if (!invert) {
2949 Z_TRY_ADDREF_P(entry);
2950
2951 /* Add to return array */
2952 if (string_key) {
2953 zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2954 } else {
2955 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2956 }
2957 }
2958 } else if (count == PCRE2_ERROR_NOMATCH) {
2959 if (invert) {
2960 Z_TRY_ADDREF_P(entry);
2961
2962 /* Add to return array */
2963 if (string_key) {
2964 zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2965 } else {
2966 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2967 }
2968 }
2969 } else {
2970 pcre_handle_exec_error(count);
2971 zend_tmp_string_release(tmp_subject_str);
2972 break;
2973 }
2974
2975 zend_tmp_string_release(tmp_subject_str);
2976 } ZEND_HASH_FOREACH_END();
2977 if (match_data != mdata) {
2978 pcre2_match_data_free(match_data);
2979 }
2980 }
2981 /* }}} */
2982
2983 /* {{{ Returns the error code of the last regexp execution. */
PHP_FUNCTION(preg_last_error)2984 PHP_FUNCTION(preg_last_error)
2985 {
2986 ZEND_PARSE_PARAMETERS_NONE();
2987
2988 RETURN_LONG(PCRE_G(error_code));
2989 }
2990 /* }}} */
2991
2992 /* {{{ Returns the error message of the last regexp execution. */
PHP_FUNCTION(preg_last_error_msg)2993 PHP_FUNCTION(preg_last_error_msg)
2994 {
2995 ZEND_PARSE_PARAMETERS_NONE();
2996
2997 RETURN_STRING(php_pcre_get_error_msg(PCRE_G(error_code)));
2998 }
2999 /* }}} */
3000
3001 /* {{{ module definition structures */
3002
3003 zend_module_entry pcre_module_entry = {
3004 STANDARD_MODULE_HEADER,
3005 "pcre",
3006 ext_functions,
3007 PHP_MINIT(pcre),
3008 PHP_MSHUTDOWN(pcre),
3009 PHP_RINIT(pcre),
3010 PHP_RSHUTDOWN(pcre),
3011 PHP_MINFO(pcre),
3012 PHP_PCRE_VERSION,
3013 PHP_MODULE_GLOBALS(pcre),
3014 PHP_GINIT(pcre),
3015 PHP_GSHUTDOWN(pcre),
3016 NULL,
3017 STANDARD_MODULE_PROPERTIES_EX
3018 };
3019
3020 #ifdef COMPILE_DL_PCRE
ZEND_GET_MODULE(pcre)3021 ZEND_GET_MODULE(pcre)
3022 #endif
3023
3024 /* }}} */
3025
3026 PHPAPI pcre2_match_context *php_pcre_mctx(void)
3027 {/*{{{*/
3028 return mctx;
3029 }/*}}}*/
3030
php_pcre_gctx(void)3031 PHPAPI pcre2_general_context *php_pcre_gctx(void)
3032 {/*{{{*/
3033 return gctx;
3034 }/*}}}*/
3035
php_pcre_cctx(void)3036 PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3037 {/*{{{*/
3038 return cctx;
3039 }/*}}}*/
3040
php_pcre_pce_incref(pcre_cache_entry * pce)3041 PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3042 {/*{{{*/
3043 assert(NULL != pce);
3044 pce->refcount++;
3045 }/*}}}*/
3046
php_pcre_pce_decref(pcre_cache_entry * pce)3047 PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3048 {/*{{{*/
3049 assert(NULL != pce);
3050 assert(0 != pce->refcount);
3051 pce->refcount--;
3052 }/*}}}*/
3053
php_pcre_pce_re(pcre_cache_entry * pce)3054 PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3055 {/*{{{*/
3056 assert(NULL != pce);
3057 return pce->re;
3058 }/*}}}*/
3059