1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Author: Andrei Zmievski <andrei@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #include "php.h"
18 #include "php_ini.h"
19 #include "php_globals.h"
20 #include "php_pcre.h"
21 #include "php_pcre_arginfo.h"
22 #include "ext/standard/info.h"
23 #include "ext/standard/basic_functions.h"
24 #include "zend_smart_str.h"
25 #include "SAPI.h"
26
27 #include "ext/standard/php_string.h"
28
29 #define PREG_PATTERN_ORDER 1
30 #define PREG_SET_ORDER 2
31 #define PREG_OFFSET_CAPTURE (1<<8)
32 #define PREG_UNMATCHED_AS_NULL (1<<9)
33
34 #define PREG_SPLIT_NO_EMPTY (1<<0)
35 #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
36 #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
37
38 #define PREG_REPLACE_EVAL (1<<0)
39
40 #define PREG_GREP_INVERT (1<<0)
41
42 #define PREG_JIT (1<<3)
43
44 #define PCRE_CACHE_SIZE 4096
45
46 struct _pcre_cache_entry {
47 pcre2_code *re;
48 uint32_t preg_options;
49 uint32_t capture_count;
50 uint32_t name_count;
51 uint32_t compile_options;
52 uint32_t refcount;
53 };
54
55 PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
56
57 #ifdef HAVE_PCRE_JIT_SUPPORT
58 #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
59 #define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
60 ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
61 #endif
62 /* General context using (infallible) system allocator. */
63 ZEND_TLS pcre2_general_context *gctx = NULL;
64 /* These two are global per thread for now. Though it is possible to use these
65 per pattern. Either one can copy it and use in pce, or one does no global
66 contexts at all, but creates for every pce. */
67 ZEND_TLS pcre2_compile_context *cctx = NULL;
68 ZEND_TLS pcre2_match_context *mctx = NULL;
69 ZEND_TLS pcre2_match_data *mdata = NULL;
70 ZEND_TLS bool mdata_used = 0;
71 ZEND_TLS uint8_t pcre2_init_ok = 0;
72 #if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
73 static MUTEX_T pcre_mt = NULL;
74 #define php_pcre_mutex_alloc() \
75 if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
76 #define php_pcre_mutex_free() \
77 if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
78 #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
79 #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
80 #else
81 #define php_pcre_mutex_alloc()
82 #define php_pcre_mutex_free()
83 #define php_pcre_mutex_lock()
84 #define php_pcre_mutex_unlock()
85 #endif
86
87 ZEND_TLS HashTable char_tables;
88
php_pcre_free_char_table(zval * data)89 static void php_pcre_free_char_table(zval *data)
90 {/*{{{*/
91 void *ptr = Z_PTR_P(data);
92 pefree(ptr, 1);
93 }/*}}}*/
94
pcre_handle_exec_error(int pcre_code)95 static void pcre_handle_exec_error(int pcre_code) /* {{{ */
96 {
97 int preg_code = 0;
98
99 switch (pcre_code) {
100 case PCRE2_ERROR_MATCHLIMIT:
101 preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
102 break;
103
104 case PCRE2_ERROR_RECURSIONLIMIT:
105 preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
106 break;
107
108 case PCRE2_ERROR_BADUTFOFFSET:
109 preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
110 break;
111
112 #ifdef HAVE_PCRE_JIT_SUPPORT
113 case PCRE2_ERROR_JIT_STACKLIMIT:
114 preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
115 break;
116 #endif
117
118 default:
119 if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
120 preg_code = PHP_PCRE_BAD_UTF8_ERROR;
121 } else {
122 preg_code = PHP_PCRE_INTERNAL_ERROR;
123 }
124 break;
125 }
126
127 PCRE_G(error_code) = preg_code;
128 }
129 /* }}} */
130
php_pcre_get_error_msg(php_pcre_error_code error_code)131 static const char *php_pcre_get_error_msg(php_pcre_error_code error_code) /* {{{ */
132 {
133 switch (error_code) {
134 case PHP_PCRE_NO_ERROR:
135 return "No error";
136 case PHP_PCRE_INTERNAL_ERROR:
137 return "Internal error";
138 case PHP_PCRE_BAD_UTF8_ERROR:
139 return "Malformed UTF-8 characters, possibly incorrectly encoded";
140 case PHP_PCRE_BAD_UTF8_OFFSET_ERROR:
141 return "The offset did not correspond to the beginning of a valid UTF-8 code point";
142 case PHP_PCRE_BACKTRACK_LIMIT_ERROR:
143 return "Backtrack limit exhausted";
144 case PHP_PCRE_RECURSION_LIMIT_ERROR:
145 return "Recursion limit exhausted";
146
147 #ifdef HAVE_PCRE_JIT_SUPPORT
148 case PHP_PCRE_JIT_STACKLIMIT_ERROR:
149 return "JIT stack limit exhausted";
150 #endif
151
152 default:
153 return "Unknown error";
154 }
155 }
156 /* }}} */
157
php_free_pcre_cache(zval * data)158 static void php_free_pcre_cache(zval *data) /* {{{ */
159 {
160 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
161 if (!pce) return;
162 pcre2_code_free(pce->re);
163 free(pce);
164 }
165 /* }}} */
166
php_efree_pcre_cache(zval * data)167 static void php_efree_pcre_cache(zval *data) /* {{{ */
168 {
169 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
170 if (!pce) return;
171 pcre2_code_free(pce->re);
172 efree(pce);
173 }
174 /* }}} */
175
php_pcre_malloc(PCRE2_SIZE size,void * data)176 static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
177 {
178 return pemalloc(size, 1);
179 }
180
php_pcre_free(void * block,void * data)181 static void php_pcre_free(void *block, void *data)
182 {
183 pefree(block, 1);
184 }
185
php_pcre_emalloc(PCRE2_SIZE size,void * data)186 static void *php_pcre_emalloc(PCRE2_SIZE size, void *data)
187 {
188 return emalloc(size);
189 }
190
php_pcre_efree(void * block,void * data)191 static void php_pcre_efree(void *block, void *data)
192 {
193 efree(block);
194 }
195
196 #ifdef PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
197 /* pcre 10.38 needs PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, disabled by default */
198 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
199 #else
200 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS 0
201 #endif
202
203 #define PHP_PCRE_PREALLOC_MDATA_SIZE 32
204
php_pcre_init_pcre2(uint8_t jit)205 static void php_pcre_init_pcre2(uint8_t jit)
206 {/*{{{*/
207 if (!gctx) {
208 gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
209 if (!gctx) {
210 pcre2_init_ok = 0;
211 return;
212 }
213 }
214
215 if (!cctx) {
216 cctx = pcre2_compile_context_create(gctx);
217 if (!cctx) {
218 pcre2_init_ok = 0;
219 return;
220 }
221 }
222
223 pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
224
225 if (!mctx) {
226 mctx = pcre2_match_context_create(gctx);
227 if (!mctx) {
228 pcre2_init_ok = 0;
229 return;
230 }
231 }
232
233 #ifdef HAVE_PCRE_JIT_SUPPORT
234 if (jit && !jit_stack) {
235 jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
236 if (!jit_stack) {
237 pcre2_init_ok = 0;
238 return;
239 }
240 }
241 #endif
242
243 if (!mdata) {
244 mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
245 if (!mdata) {
246 pcre2_init_ok = 0;
247 return;
248 }
249 }
250
251 pcre2_init_ok = 1;
252 }/*}}}*/
253
php_pcre_shutdown_pcre2(void)254 static void php_pcre_shutdown_pcre2(void)
255 {/*{{{*/
256 if (gctx) {
257 pcre2_general_context_free(gctx);
258 gctx = NULL;
259 }
260
261 if (cctx) {
262 pcre2_compile_context_free(cctx);
263 cctx = NULL;
264 }
265
266 if (mctx) {
267 pcre2_match_context_free(mctx);
268 mctx = NULL;
269 }
270
271 #ifdef HAVE_PCRE_JIT_SUPPORT
272 /* Stack may only be destroyed when no cached patterns
273 possibly associated with it do exist. */
274 if (jit_stack) {
275 pcre2_jit_stack_free(jit_stack);
276 jit_stack = NULL;
277 }
278 #endif
279
280 if (mdata) {
281 pcre2_match_data_free(mdata);
282 mdata = NULL;
283 }
284
285 pcre2_init_ok = 0;
286 }/*}}}*/
287
PHP_GINIT_FUNCTION(pcre)288 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
289 {
290 php_pcre_mutex_alloc();
291
292 /* If we're on the CLI SAPI, there will only be one request, so we don't need the
293 * cache to survive after RSHUTDOWN. */
294 pcre_globals->per_request_cache = strcmp(sapi_module.name, "cli") == 0;
295 if (!pcre_globals->per_request_cache) {
296 zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
297 }
298
299 pcre_globals->backtrack_limit = 0;
300 pcre_globals->recursion_limit = 0;
301 pcre_globals->error_code = PHP_PCRE_NO_ERROR;
302 ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
303 ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
304 #ifdef HAVE_PCRE_JIT_SUPPORT
305 pcre_globals->jit = 1;
306 #endif
307
308 php_pcre_init_pcre2(1);
309 zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
310 }
311 /* }}} */
312
PHP_GSHUTDOWN_FUNCTION(pcre)313 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
314 {
315 if (!pcre_globals->per_request_cache) {
316 zend_hash_destroy(&pcre_globals->pcre_cache);
317 }
318
319 php_pcre_shutdown_pcre2();
320 zend_hash_destroy(&char_tables);
321 php_pcre_mutex_free();
322 }
323 /* }}} */
324
PHP_INI_MH(OnUpdateBacktrackLimit)325 static PHP_INI_MH(OnUpdateBacktrackLimit)
326 {/*{{{*/
327 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
328 if (mctx) {
329 pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
330 }
331
332 return SUCCESS;
333 }/*}}}*/
334
PHP_INI_MH(OnUpdateRecursionLimit)335 static PHP_INI_MH(OnUpdateRecursionLimit)
336 {/*{{{*/
337 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
338 if (mctx) {
339 pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
340 }
341
342 return SUCCESS;
343 }/*}}}*/
344
345 #ifdef HAVE_PCRE_JIT_SUPPORT
PHP_INI_MH(OnUpdateJit)346 static PHP_INI_MH(OnUpdateJit)
347 {/*{{{*/
348 OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
349 if (PCRE_G(jit) && jit_stack) {
350 pcre2_jit_stack_assign(mctx, NULL, jit_stack);
351 } else {
352 pcre2_jit_stack_assign(mctx, NULL, NULL);
353 }
354
355 return SUCCESS;
356 }/*}}}*/
357 #endif
358
359 PHP_INI_BEGIN()
360 STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
361 STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
362 #ifdef HAVE_PCRE_JIT_SUPPORT
363 STD_PHP_INI_ENTRY("pcre.jit", "1", PHP_INI_ALL, OnUpdateJit, jit, zend_pcre_globals, pcre_globals)
364 #endif
PHP_INI_END()365 PHP_INI_END()
366
367 static char *_pcre2_config_str(uint32_t what)
368 {/*{{{*/
369 int len = pcre2_config(what, NULL);
370 char *ret = (char *) malloc(len + 1);
371
372 len = pcre2_config(what, ret);
373 if (!len) {
374 free(ret);
375 return NULL;
376 }
377
378 return ret;
379 }/*}}}*/
380
381 /* {{{ PHP_MINFO_FUNCTION(pcre) */
PHP_MINFO_FUNCTION(pcre)382 static PHP_MINFO_FUNCTION(pcre)
383 {
384 #ifdef HAVE_PCRE_JIT_SUPPORT
385 uint32_t flag = 0;
386 char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
387 #endif
388 char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
389 char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
390
391 php_info_print_table_start();
392 php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
393 php_info_print_table_row(2, "PCRE Library Version", version);
394 free(version);
395 php_info_print_table_row(2, "PCRE Unicode Version", unicode);
396 free(unicode);
397
398 #ifdef HAVE_PCRE_JIT_SUPPORT
399 if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
400 php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
401 } else {
402 php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
403 }
404 if (jit_target) {
405 php_info_print_table_row(2, "PCRE JIT Target", jit_target);
406 }
407 free(jit_target);
408 #else
409 php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
410 #endif
411
412 #ifdef HAVE_PCRE_VALGRIND_SUPPORT
413 php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
414 #endif
415
416 php_info_print_table_end();
417
418 DISPLAY_INI_ENTRIES();
419 }
420 /* }}} */
421
422 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTION(pcre)423 static PHP_MINIT_FUNCTION(pcre)
424 {
425 char *version;
426
427 #ifdef HAVE_PCRE_JIT_SUPPORT
428 if (UNEXPECTED(!pcre2_init_ok)) {
429 /* Retry. */
430 php_pcre_init_pcre2(PCRE_G(jit));
431 if (!pcre2_init_ok) {
432 return FAILURE;
433 }
434 }
435 #endif
436
437 REGISTER_INI_ENTRIES();
438
439 REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
440 REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
441 REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
442 REGISTER_LONG_CONSTANT("PREG_UNMATCHED_AS_NULL", PREG_UNMATCHED_AS_NULL, CONST_CS | CONST_PERSISTENT);
443 REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
444 REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
445 REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
446 REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
447
448 REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
449 REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
450 REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
451 REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
452 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
453 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
454 REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
455 version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
456 REGISTER_STRING_CONSTANT("PCRE_VERSION", version, CONST_CS | CONST_PERSISTENT);
457 free(version);
458 REGISTER_LONG_CONSTANT("PCRE_VERSION_MAJOR", PCRE2_MAJOR, CONST_CS | CONST_PERSISTENT);
459 REGISTER_LONG_CONSTANT("PCRE_VERSION_MINOR", PCRE2_MINOR, CONST_CS | CONST_PERSISTENT);
460
461 #ifdef HAVE_PCRE_JIT_SUPPORT
462 REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 1, CONST_CS | CONST_PERSISTENT);
463 #else
464 REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 0, CONST_CS | CONST_PERSISTENT);
465 #endif
466
467 return SUCCESS;
468 }
469 /* }}} */
470
471 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTION(pcre)472 static PHP_MSHUTDOWN_FUNCTION(pcre)
473 {
474 UNREGISTER_INI_ENTRIES();
475
476 return SUCCESS;
477 }
478 /* }}} */
479
480 /* {{{ PHP_RINIT_FUNCTION(pcre) */
PHP_RINIT_FUNCTION(pcre)481 static PHP_RINIT_FUNCTION(pcre)
482 {
483 #ifdef HAVE_PCRE_JIT_SUPPORT
484 if (UNEXPECTED(!pcre2_init_ok)) {
485 /* Retry. */
486 php_pcre_mutex_lock();
487 php_pcre_init_pcre2(PCRE_G(jit));
488 if (!pcre2_init_ok) {
489 php_pcre_mutex_unlock();
490 return FAILURE;
491 }
492 php_pcre_mutex_unlock();
493 }
494
495 mdata_used = 0;
496 #endif
497
498 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
499 PCRE_G(gctx_zmm) = pcre2_general_context_create(php_pcre_emalloc, php_pcre_efree, NULL);
500 if (!PCRE_G(gctx_zmm)) {
501 return FAILURE;
502 }
503
504 if (PCRE_G(per_request_cache)) {
505 zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, php_efree_pcre_cache, 0);
506 }
507
508 return SUCCESS;
509 }
510 /* }}} */
511
PHP_RSHUTDOWN_FUNCTION(pcre)512 static PHP_RSHUTDOWN_FUNCTION(pcre)
513 {
514 pcre2_general_context_free(PCRE_G(gctx_zmm));
515 PCRE_G(gctx_zmm) = NULL;
516
517 if (PCRE_G(per_request_cache)) {
518 zend_hash_destroy(&PCRE_G(pcre_cache));
519 }
520
521 zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
522 zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
523 ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
524 ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
525 return SUCCESS;
526 }
527
528 /* {{{ static pcre_clean_cache */
pcre_clean_cache(zval * data,void * arg)529 static int pcre_clean_cache(zval *data, void *arg)
530 {
531 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
532 int *num_clean = (int *)arg;
533
534 if (*num_clean > 0 && !pce->refcount) {
535 (*num_clean)--;
536 return ZEND_HASH_APPLY_REMOVE;
537 } else {
538 return ZEND_HASH_APPLY_KEEP;
539 }
540 }
541 /* }}} */
542
free_subpats_table(zend_string ** subpat_names,uint32_t num_subpats)543 static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
544 uint32_t i;
545 for (i = 0; i < num_subpats; i++) {
546 if (subpat_names[i]) {
547 zend_string_release(subpat_names[i]);
548 }
549 }
550 efree(subpat_names);
551 }
552
553 /* {{{ static make_subpats_table */
make_subpats_table(uint32_t num_subpats,pcre_cache_entry * pce)554 static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
555 {
556 uint32_t name_cnt = pce->name_count, name_size, ni = 0;
557 char *name_table;
558 zend_string **subpat_names;
559 int rc1, rc2;
560
561 rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
562 rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
563 if (rc1 < 0 || rc2 < 0) {
564 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
565 return NULL;
566 }
567
568 subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
569 while (ni++ < name_cnt) {
570 unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
571 const char *name = name_table + 2;
572 subpat_names[name_idx] = zend_string_init(name, strlen(name), 0);
573 if (is_numeric_string(ZSTR_VAL(subpat_names[name_idx]), ZSTR_LEN(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
574 php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
575 free_subpats_table(subpat_names, num_subpats);
576 return NULL;
577 }
578 name_table += name_size;
579 }
580 return subpat_names;
581 }
582 /* }}} */
583
584 /* {{{ static calculate_unit_length */
585 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
calculate_unit_length(pcre_cache_entry * pce,const char * start)586 static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
587 {
588 size_t unit_len;
589
590 if (pce->compile_options & PCRE2_UTF) {
591 const char *end = start;
592
593 /* skip continuation bytes */
594 while ((*++end & 0xC0) == 0x80);
595 unit_len = end - start;
596 } else {
597 unit_len = 1;
598 }
599 return unit_len;
600 }
601 /* }}} */
602
603 /* {{{ pcre_get_compiled_regex_cache */
pcre_get_compiled_regex_cache_ex(zend_string * regex,int locale_aware)604 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware)
605 {
606 pcre2_code *re = NULL;
607 #if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR && !HAVE_BUNDLED_PCRE
608 uint32_t coptions = PCRE2_NO_START_OPTIMIZE;
609 #else
610 uint32_t coptions = 0;
611 #endif
612 PCRE2_UCHAR error[128];
613 PCRE2_SIZE erroffset;
614 int errnumber;
615 char delimiter;
616 char start_delimiter;
617 char end_delimiter;
618 char *p, *pp;
619 char *pattern;
620 size_t pattern_len;
621 uint32_t poptions = 0;
622 const uint8_t *tables = NULL;
623 zval *zv;
624 pcre_cache_entry new_entry;
625 int rc;
626 zend_string *key;
627 pcre_cache_entry *ret;
628
629 if (locale_aware && BG(ctype_string)) {
630 key = zend_string_concat2(
631 ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)),
632 ZSTR_VAL(regex), ZSTR_LEN(regex));
633 } else {
634 key = regex;
635 }
636
637 /* Try to lookup the cached regex entry, and if successful, just pass
638 back the compiled pattern, otherwise go on and compile it. */
639 zv = zend_hash_find(&PCRE_G(pcre_cache), key);
640 if (zv) {
641 if (key != regex) {
642 zend_string_release_ex(key, 0);
643 }
644 return (pcre_cache_entry*)Z_PTR_P(zv);
645 }
646
647 p = ZSTR_VAL(regex);
648
649 /* Parse through the leading whitespace, and display a warning if we
650 get to the end without encountering a delimiter. */
651 while (isspace((int)*(unsigned char *)p)) p++;
652 if (*p == 0) {
653 if (key != regex) {
654 zend_string_release_ex(key, 0);
655 }
656 php_error_docref(NULL, E_WARNING,
657 p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
658 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
659 return NULL;
660 }
661
662 /* Get the delimiter and display a warning if it is alphanumeric
663 or a backslash. */
664 delimiter = *p++;
665 if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
666 if (key != regex) {
667 zend_string_release_ex(key, 0);
668 }
669 php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
670 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
671 return NULL;
672 }
673
674 start_delimiter = delimiter;
675 if ((pp = strchr("([{< )]}> )]}>", delimiter)))
676 delimiter = pp[5];
677 end_delimiter = delimiter;
678
679 pp = p;
680
681 if (start_delimiter == end_delimiter) {
682 /* We need to iterate through the pattern, searching for the ending delimiter,
683 but skipping the backslashed delimiters. If the ending delimiter is not
684 found, display a warning. */
685 while (*pp != 0) {
686 if (*pp == '\\' && pp[1] != 0) pp++;
687 else if (*pp == delimiter)
688 break;
689 pp++;
690 }
691 } else {
692 /* We iterate through the pattern, searching for the matching ending
693 * delimiter. For each matching starting delimiter, we increment nesting
694 * level, and decrement it for each matching ending delimiter. If we
695 * reach the end of the pattern without matching, display a warning.
696 */
697 int brackets = 1; /* brackets nesting level */
698 while (*pp != 0) {
699 if (*pp == '\\' && pp[1] != 0) pp++;
700 else if (*pp == end_delimiter && --brackets <= 0)
701 break;
702 else if (*pp == start_delimiter)
703 brackets++;
704 pp++;
705 }
706 }
707
708 if (*pp == 0) {
709 if (key != regex) {
710 zend_string_release_ex(key, 0);
711 }
712 if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
713 php_error_docref(NULL,E_WARNING, "Null byte in regex");
714 } else if (start_delimiter == end_delimiter) {
715 php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
716 } else {
717 php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
718 }
719 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
720 return NULL;
721 }
722
723 /* Make a copy of the actual pattern. */
724 pattern_len = pp - p;
725 pattern = estrndup(p, pattern_len);
726
727 /* Move on to the options */
728 pp++;
729
730 /* Parse through the options, setting appropriate flags. Display
731 a warning if we encounter an unknown modifier. */
732 while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
733 switch (*pp++) {
734 /* Perl compatible options */
735 case 'i': coptions |= PCRE2_CASELESS; break;
736 case 'm': coptions |= PCRE2_MULTILINE; break;
737 case 'n': coptions |= PCRE2_NO_AUTO_CAPTURE; break;
738 case 's': coptions |= PCRE2_DOTALL; break;
739 case 'x': coptions |= PCRE2_EXTENDED; break;
740
741 /* PCRE specific options */
742 case 'A': coptions |= PCRE2_ANCHORED; break;
743 case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
744 case 'S': /* Pass. */ break;
745 case 'X': /* Pass. */ break;
746 case 'U': coptions |= PCRE2_UNGREEDY; break;
747 case 'u': coptions |= PCRE2_UTF;
748 /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
749 characters, even in UTF-8 mode. However, this can be changed by setting
750 the PCRE2_UCP option. */
751 #ifdef PCRE2_UCP
752 coptions |= PCRE2_UCP;
753 #endif
754 break;
755 case 'J': coptions |= PCRE2_DUPNAMES; break;
756
757 /* Custom preg options */
758 case 'e': poptions |= PREG_REPLACE_EVAL; break;
759
760 case ' ':
761 case '\n':
762 case '\r':
763 break;
764
765 default:
766 if (pp[-1]) {
767 php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
768 } else {
769 php_error_docref(NULL,E_WARNING, "Null byte in regex");
770 }
771 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
772 efree(pattern);
773 if (key != regex) {
774 zend_string_release_ex(key, 0);
775 }
776 return NULL;
777 }
778 }
779
780 if (poptions & PREG_REPLACE_EVAL) {
781 php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
782 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
783 efree(pattern);
784 if (key != regex) {
785 zend_string_release_ex(key, 0);
786 }
787 return NULL;
788 }
789
790 if (key != regex) {
791 tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(ctype_string));
792 if (!tables) {
793 zend_string *_k;
794 tables = pcre2_maketables(gctx);
795 if (UNEXPECTED(!tables)) {
796 php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
797 pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
798 zend_string_release_ex(key, 0);
799 efree(pattern);
800 return NULL;
801 }
802 _k = zend_string_init(ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)), 1);
803 GC_MAKE_PERSISTENT_LOCAL(_k);
804 zend_hash_add_ptr(&char_tables, _k, (void *)tables);
805 zend_string_release(_k);
806 }
807 }
808 pcre2_set_character_tables(cctx, tables);
809
810 /* Compile pattern and display a warning if compilation failed. */
811 re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
812
813 if (re == NULL) {
814 if (key != regex) {
815 zend_string_release_ex(key, 0);
816 }
817 pcre2_get_error_message(errnumber, error, sizeof(error));
818 php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
819 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
820 efree(pattern);
821 return NULL;
822 }
823
824 #ifdef HAVE_PCRE_JIT_SUPPORT
825 if (PCRE_G(jit)) {
826 /* Enable PCRE JIT compiler */
827 rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
828 if (EXPECTED(rc >= 0)) {
829 size_t jit_size = 0;
830 if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
831 poptions |= PREG_JIT;
832 }
833 } else if (rc == PCRE2_ERROR_NOMEMORY) {
834 php_error_docref(NULL, E_WARNING,
835 "Allocation of JIT memory failed, PCRE JIT will be disabled. "
836 "This is likely caused by security restrictions. "
837 "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
838 PCRE_G(jit) = 0;
839 } else {
840 pcre2_get_error_message(rc, error, sizeof(error));
841 php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
842 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
843 }
844 }
845 #endif
846 efree(pattern);
847
848 /*
849 * If we reached cache limit, clean out the items from the head of the list;
850 * these are supposedly the oldest ones (but not necessarily the least used
851 * ones).
852 */
853 if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
854 int num_clean = PCRE_CACHE_SIZE / 8;
855 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
856 }
857
858 /* Store the compiled pattern and extra info in the cache. */
859 new_entry.re = re;
860 new_entry.preg_options = poptions;
861 new_entry.compile_options = coptions;
862 new_entry.refcount = 0;
863
864 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
865 if (rc < 0) {
866 if (key != regex) {
867 zend_string_release_ex(key, 0);
868 }
869 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
870 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
871 return NULL;
872 }
873
874 rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
875 if (rc < 0) {
876 if (key != regex) {
877 zend_string_release_ex(key, 0);
878 }
879 php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
880 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
881 return NULL;
882 }
883
884 /*
885 * Interned strings are not duplicated when stored in HashTable,
886 * but all the interned strings created during HTTP request are removed
887 * at end of request. However PCRE_G(pcre_cache) must be consistent
888 * on the next request as well. So we disable usage of interned strings
889 * as hash keys especually for this table.
890 * See bug #63180
891 */
892 if (!(GC_FLAGS(key) & IS_STR_PERMANENT) && !PCRE_G(per_request_cache)) {
893 zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
894 GC_MAKE_PERSISTENT_LOCAL(str);
895
896 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
897 zend_string_release(str);
898 } else {
899 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
900 }
901
902 if (key != regex) {
903 zend_string_release_ex(key, 0);
904 }
905
906 return ret;
907 }
908 /* }}} */
909
910 /* {{{ pcre_get_compiled_regex_cache */
pcre_get_compiled_regex_cache(zend_string * regex)911 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
912 {
913 return pcre_get_compiled_regex_cache_ex(regex, 1);
914 }
915 /* }}} */
916
917 /* {{{ pcre_get_compiled_regex */
pcre_get_compiled_regex(zend_string * regex,uint32_t * capture_count)918 PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
919 {
920 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
921
922 if (capture_count) {
923 *capture_count = pce ? pce->capture_count : 0;
924 }
925
926 return pce ? pce->re : NULL;
927 }
928 /* }}} */
929
930 /* {{{ pcre_get_compiled_regex_ex */
pcre_get_compiled_regex_ex(zend_string * regex,uint32_t * capture_count,uint32_t * preg_options,uint32_t * compile_options)931 PHPAPI pcre2_code* pcre_get_compiled_regex_ex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options, uint32_t *compile_options)
932 {
933 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
934
935 if (preg_options) {
936 *preg_options = pce ? pce->preg_options : 0;
937 }
938 if (compile_options) {
939 *compile_options = pce ? pce->compile_options : 0;
940 }
941 if (capture_count) {
942 *capture_count = pce ? pce->capture_count : 0;
943 }
944
945 return pce ? pce->re : NULL;
946 }
947 /* }}} */
948
949 /* XXX For the cases where it's only about match yes/no and no capture
950 required, perhaps just a minimum sized data would suffice. */
php_pcre_create_match_data(uint32_t capture_count,pcre2_code * re)951 PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
952 {/*{{{*/
953
954 assert(NULL != re);
955
956 if (EXPECTED(!mdata_used)) {
957 int rc = 0;
958
959 if (!capture_count) {
960 /* As we deal with a non cached pattern, no other way to gather this info. */
961 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
962 }
963
964 if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
965 mdata_used = 1;
966 return mdata;
967 }
968 }
969
970 return pcre2_match_data_create_from_pattern(re, gctx);
971 }/*}}}*/
972
php_pcre_free_match_data(pcre2_match_data * match_data)973 PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
974 {/*{{{*/
975 if (UNEXPECTED(match_data != mdata)) {
976 pcre2_match_data_free(match_data);
977 } else {
978 mdata_used = 0;
979 }
980 }/*}}}*/
981
init_unmatched_null_pair(void)982 static void init_unmatched_null_pair(void) {
983 zval val1, val2;
984 ZVAL_NULL(&val1);
985 ZVAL_LONG(&val2, -1);
986 ZVAL_ARR(&PCRE_G(unmatched_null_pair), zend_new_pair(&val1, &val2));
987 }
988
init_unmatched_empty_pair(void)989 static void init_unmatched_empty_pair(void) {
990 zval val1, val2;
991 ZVAL_EMPTY_STRING(&val1);
992 ZVAL_LONG(&val2, -1);
993 ZVAL_ARR(&PCRE_G(unmatched_empty_pair), zend_new_pair(&val1, &val2));
994 }
995
populate_match_value_str(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset)996 static zend_always_inline void populate_match_value_str(
997 zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
998 ZVAL_STRINGL_FAST(val, subject + start_offset, end_offset - start_offset);
999 }
1000
populate_match_value(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,uint32_t unmatched_as_null)1001 static inline void populate_match_value(
1002 zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
1003 uint32_t unmatched_as_null) {
1004 if (PCRE2_UNSET == start_offset) {
1005 if (unmatched_as_null) {
1006 ZVAL_NULL(val);
1007 } else {
1008 ZVAL_EMPTY_STRING(val);
1009 }
1010 } else {
1011 populate_match_value_str(val, subject, start_offset, end_offset);
1012 }
1013 }
1014
add_named(zval * subpats,zend_string * name,zval * val,bool unmatched)1015 static inline void add_named(
1016 zval *subpats, zend_string *name, zval *val, bool unmatched) {
1017 /* If the DUPNAMES option is used, multiple subpatterns might have the same name.
1018 * In this case we want to preserve the one that actually has a value. */
1019 if (!unmatched) {
1020 zend_hash_update(Z_ARRVAL_P(subpats), name, val);
1021 } else {
1022 if (!zend_hash_add(Z_ARRVAL_P(subpats), name, val)) {
1023 return;
1024 }
1025 }
1026 Z_TRY_ADDREF_P(val);
1027 }
1028
1029 /* {{{ add_offset_pair */
add_offset_pair(zval * result,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,zend_string * name,uint32_t unmatched_as_null)1030 static inline void add_offset_pair(
1031 zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
1032 zend_string *name, uint32_t unmatched_as_null)
1033 {
1034 zval match_pair;
1035
1036 /* Add (match, offset) to the return value */
1037 if (PCRE2_UNSET == start_offset) {
1038 if (unmatched_as_null) {
1039 if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
1040 init_unmatched_null_pair();
1041 }
1042 ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
1043 } else {
1044 if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
1045 init_unmatched_empty_pair();
1046 }
1047 ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
1048 }
1049 } else {
1050 zval val1, val2;
1051 populate_match_value_str(&val1, subject, start_offset, end_offset);
1052 ZVAL_LONG(&val2, start_offset);
1053 ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1054 }
1055
1056 if (name) {
1057 add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1058 }
1059 zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
1060 }
1061 /* }}} */
1062
populate_subpat_array(zval * subpats,const char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1063 static void populate_subpat_array(
1064 zval *subpats, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1065 uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1066 bool offset_capture = (flags & PREG_OFFSET_CAPTURE) != 0;
1067 bool unmatched_as_null = (flags & PREG_UNMATCHED_AS_NULL) != 0;
1068 zval val;
1069 int i;
1070 if (subpat_names) {
1071 if (offset_capture) {
1072 for (i = 0; i < count; i++) {
1073 add_offset_pair(
1074 subpats, subject, offsets[2*i], offsets[2*i+1],
1075 subpat_names[i], unmatched_as_null);
1076 }
1077 if (unmatched_as_null) {
1078 for (i = count; i < num_subpats; i++) {
1079 add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1080 }
1081 }
1082 } else {
1083 for (i = 0; i < count; i++) {
1084 populate_match_value(
1085 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1086 if (subpat_names[i]) {
1087 add_named(subpats, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1088 }
1089 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1090 }
1091 if (unmatched_as_null) {
1092 for (i = count; i < num_subpats; i++) {
1093 ZVAL_NULL(&val);
1094 if (subpat_names[i]) {
1095 zend_hash_add(Z_ARRVAL_P(subpats), subpat_names[i], &val);
1096 }
1097 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1098 }
1099 }
1100 }
1101 } else {
1102 if (offset_capture) {
1103 for (i = 0; i < count; i++) {
1104 add_offset_pair(
1105 subpats, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1106 }
1107 if (unmatched_as_null) {
1108 for (i = count; i < num_subpats; i++) {
1109 add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1110 }
1111 }
1112 } else {
1113 for (i = 0; i < count; i++) {
1114 populate_match_value(
1115 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1116 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1117 }
1118 if (unmatched_as_null) {
1119 for (i = count; i < num_subpats; i++) {
1120 add_next_index_null(subpats);
1121 }
1122 }
1123 }
1124 }
1125 /* Add MARK, if available */
1126 if (mark) {
1127 add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
1128 }
1129 }
1130
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS,int global)1131 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
1132 {
1133 /* parameters */
1134 zend_string *regex; /* Regular expression */
1135 zend_string *subject; /* String to match against */
1136 pcre_cache_entry *pce; /* Compiled regular expression */
1137 zval *subpats = NULL; /* Array for subpatterns */
1138 zend_long flags = 0; /* Match control flags */
1139 zend_long start_offset = 0; /* Where the new search starts */
1140
1141 ZEND_PARSE_PARAMETERS_START(2, 5)
1142 Z_PARAM_STR(regex)
1143 Z_PARAM_STR(subject)
1144 Z_PARAM_OPTIONAL
1145 Z_PARAM_ZVAL(subpats)
1146 Z_PARAM_LONG(flags)
1147 Z_PARAM_LONG(start_offset)
1148 ZEND_PARSE_PARAMETERS_END();
1149
1150 /* Compile regex or get it from cache. */
1151 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1152 RETURN_FALSE;
1153 }
1154
1155 pce->refcount++;
1156 php_pcre_match_impl(pce, subject, return_value, subpats,
1157 global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
1158 pce->refcount--;
1159 }
1160 /* }}} */
1161
is_known_valid_utf8(zend_string * subject_str,PCRE2_SIZE start_offset)1162 static zend_always_inline bool is_known_valid_utf8(
1163 zend_string *subject_str, PCRE2_SIZE start_offset) {
1164 if (!(GC_FLAGS(subject_str) & IS_STR_VALID_UTF8)) {
1165 /* We don't know whether the string is valid UTF-8 or not. */
1166 return 0;
1167 }
1168
1169 if (start_offset == ZSTR_LEN(subject_str)) {
1170 /* Degenerate case: Offset points to end of string. */
1171 return 1;
1172 }
1173
1174 /* Check that the offset does not point to an UTF-8 continuation byte. */
1175 return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1176 }
1177
1178 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zval * subpats,int global,int use_flags,zend_long flags,zend_off_t start_offset)1179 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1180 zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset)
1181 {
1182 zval result_set, /* Holds a set of subpatterns after
1183 a global match */
1184 *match_sets = NULL; /* An array of sets of matches for each
1185 subpattern after a global match */
1186 uint32_t options; /* Execution options */
1187 int count; /* Count of matched subpatterns */
1188 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1189 uint32_t num_subpats; /* Number of captured subpatterns */
1190 int matched; /* Has anything matched */
1191 zend_string **subpat_names; /* Array for named subpatterns */
1192 size_t i;
1193 uint32_t subpats_order; /* Order of subpattern matches */
1194 uint32_t offset_capture; /* Capture match offsets: yes/no */
1195 uint32_t unmatched_as_null; /* Null non-matches: yes/no */
1196 PCRE2_SPTR mark = NULL; /* Target for MARK name */
1197 zval marks; /* Array of marks for PREG_PATTERN_ORDER */
1198 pcre2_match_data *match_data;
1199 PCRE2_SIZE start_offset2, orig_start_offset;
1200
1201 char *subject = ZSTR_VAL(subject_str);
1202 size_t subject_len = ZSTR_LEN(subject_str);
1203
1204 ZVAL_UNDEF(&marks);
1205
1206 /* Overwrite the passed-in value for subpatterns with an empty array. */
1207 if (subpats != NULL) {
1208 subpats = zend_try_array_init(subpats);
1209 if (!subpats) {
1210 RETURN_THROWS();
1211 }
1212 }
1213
1214 subpats_order = global ? PREG_PATTERN_ORDER : 0;
1215
1216 if (use_flags) {
1217 offset_capture = flags & PREG_OFFSET_CAPTURE;
1218 unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1219
1220 /*
1221 * subpats_order is pre-set to pattern mode so we change it only if
1222 * necessary.
1223 */
1224 if (flags & 0xff) {
1225 subpats_order = flags & 0xff;
1226 }
1227 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1228 (!global && subpats_order != 0)) {
1229 zend_argument_value_error(4, "must be a PREG_* constant");
1230 RETURN_THROWS();
1231 }
1232 } else {
1233 offset_capture = 0;
1234 unmatched_as_null = 0;
1235 }
1236
1237 /* Negative offset counts from the end of the string. */
1238 if (start_offset < 0) {
1239 if ((PCRE2_SIZE)-start_offset <= subject_len) {
1240 start_offset2 = subject_len + start_offset;
1241 } else {
1242 start_offset2 = 0;
1243 }
1244 } else {
1245 start_offset2 = (PCRE2_SIZE)start_offset;
1246 }
1247
1248 if (start_offset2 > subject_len) {
1249 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1250 RETURN_FALSE;
1251 }
1252
1253 /* Calculate the size of the offsets array, and allocate memory for it. */
1254 num_subpats = pce->capture_count + 1;
1255
1256 /*
1257 * Build a mapping from subpattern numbers to their names. We will
1258 * allocate the table only if there are any named subpatterns.
1259 */
1260 subpat_names = NULL;
1261 if (subpats && pce->name_count > 0) {
1262 subpat_names = make_subpats_table(num_subpats, pce);
1263 if (!subpat_names) {
1264 RETURN_FALSE;
1265 }
1266 }
1267
1268 /* Allocate match sets array and initialize the values. */
1269 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1270 match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
1271 for (i=0; i<num_subpats; i++) {
1272 array_init(&match_sets[i]);
1273 }
1274 }
1275
1276 matched = 0;
1277 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1278
1279 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1280 match_data = mdata;
1281 } else {
1282 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1283 if (!match_data) {
1284 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1285 if (subpat_names) {
1286 free_subpats_table(subpat_names, num_subpats);
1287 }
1288 if (match_sets) {
1289 efree(match_sets);
1290 }
1291 RETURN_FALSE;
1292 }
1293 }
1294
1295 orig_start_offset = start_offset2;
1296 options =
1297 (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1298 ? 0 : PCRE2_NO_UTF_CHECK;
1299
1300 /* Execute the regular expression. */
1301 #ifdef HAVE_PCRE_JIT_SUPPORT
1302 if ((pce->preg_options & PREG_JIT) && options) {
1303 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1304 PCRE2_NO_UTF_CHECK, match_data, mctx);
1305 } else
1306 #endif
1307 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1308 options, match_data, mctx);
1309
1310 while (1) {
1311 /* If something has matched */
1312 if (count >= 0) {
1313 /* Check for too many substrings condition. */
1314 if (UNEXPECTED(count == 0)) {
1315 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1316 count = num_subpats;
1317 }
1318
1319 matched:
1320 matched++;
1321
1322 offsets = pcre2_get_ovector_pointer(match_data);
1323
1324 /* If subpatterns array has been passed, fill it in with values. */
1325 if (subpats != NULL) {
1326 /* Try to get the list of substrings and display a warning if failed. */
1327 if (offsets[1] < offsets[0]) {
1328 if (subpat_names) {
1329 free_subpats_table(subpat_names, num_subpats);
1330 }
1331 if (match_sets) efree(match_sets);
1332 php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1333 RETURN_FALSE;
1334 }
1335
1336 if (global) { /* global pattern matching */
1337 if (subpats && subpats_order == PREG_PATTERN_ORDER) {
1338 /* For each subpattern, insert it into the appropriate array. */
1339 if (offset_capture) {
1340 for (i = 0; i < count; i++) {
1341 add_offset_pair(
1342 &match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1343 NULL, unmatched_as_null);
1344 }
1345 } else {
1346 for (i = 0; i < count; i++) {
1347 zval val;
1348 populate_match_value(
1349 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1350 zend_hash_next_index_insert_new(Z_ARRVAL(match_sets[i]), &val);
1351 }
1352 }
1353 mark = pcre2_get_mark(match_data);
1354 /* Add MARK, if available */
1355 if (mark) {
1356 if (Z_TYPE(marks) == IS_UNDEF) {
1357 array_init(&marks);
1358 }
1359 add_index_string(&marks, matched - 1, (char *) mark);
1360 }
1361 /*
1362 * If the number of captured subpatterns on this run is
1363 * less than the total possible number, pad the result
1364 * arrays with NULLs or empty strings.
1365 */
1366 if (count < num_subpats) {
1367 for (; i < num_subpats; i++) {
1368 if (offset_capture) {
1369 add_offset_pair(
1370 &match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1371 NULL, unmatched_as_null);
1372 } else if (unmatched_as_null) {
1373 add_next_index_null(&match_sets[i]);
1374 } else {
1375 add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
1376 }
1377 }
1378 }
1379 } else {
1380 /* Allocate and populate the result set array */
1381 array_init_size(&result_set, count + (mark ? 1 : 0));
1382 mark = pcre2_get_mark(match_data);
1383 populate_subpat_array(
1384 &result_set, subject, offsets, subpat_names,
1385 num_subpats, count, mark, flags);
1386 /* And add it to the output array */
1387 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
1388 }
1389 } else { /* single pattern matching */
1390 /* For each subpattern, insert it into the subpatterns array. */
1391 mark = pcre2_get_mark(match_data);
1392 populate_subpat_array(
1393 subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1394 break;
1395 }
1396 }
1397
1398 /* Advance to the next piece. */
1399 start_offset2 = offsets[1];
1400
1401 /* If we have matched an empty string, mimic what Perl's /g options does.
1402 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1403 the match again at the same point. If this fails (picked up above) we
1404 advance to the next character. */
1405 if (start_offset2 == offsets[0]) {
1406 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1407 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1408 if (count >= 0) {
1409 if (global) {
1410 goto matched;
1411 } else {
1412 break;
1413 }
1414 } else if (count == PCRE2_ERROR_NOMATCH) {
1415 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1416 this is not necessarily the end. We need to advance
1417 the start offset, and continue. Fudge the offset values
1418 to achieve this, unless we're already at the end of the string. */
1419 if (start_offset2 < subject_len) {
1420 size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1421
1422 start_offset2 += unit_len;
1423 } else {
1424 break;
1425 }
1426 } else {
1427 goto error;
1428 }
1429 }
1430 } else if (count == PCRE2_ERROR_NOMATCH) {
1431 break;
1432 } else {
1433 error:
1434 pcre_handle_exec_error(count);
1435 break;
1436 }
1437
1438 if (!global) {
1439 break;
1440 }
1441
1442 /* Execute the regular expression. */
1443 #ifdef HAVE_PCRE_JIT_SUPPORT
1444 if ((pce->preg_options & PREG_JIT)) {
1445 if (PCRE2_UNSET == start_offset2 || start_offset2 > subject_len) {
1446 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1447 break;
1448 }
1449 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1450 PCRE2_NO_UTF_CHECK, match_data, mctx);
1451 } else
1452 #endif
1453 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1454 PCRE2_NO_UTF_CHECK, match_data, mctx);
1455 }
1456 if (match_data != mdata) {
1457 pcre2_match_data_free(match_data);
1458 }
1459
1460 /* Add the match sets to the output array and clean up */
1461 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1462 if (subpat_names) {
1463 for (i = 0; i < num_subpats; i++) {
1464 if (subpat_names[i]) {
1465 zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &match_sets[i]);
1466 Z_ADDREF(match_sets[i]);
1467 }
1468 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1469 }
1470 } else {
1471 for (i = 0; i < num_subpats; i++) {
1472 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1473 }
1474 }
1475 efree(match_sets);
1476
1477 if (Z_TYPE(marks) != IS_UNDEF) {
1478 add_assoc_zval(subpats, "MARK", &marks);
1479 }
1480 }
1481
1482 if (subpat_names) {
1483 free_subpats_table(subpat_names, num_subpats);
1484 }
1485
1486 if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1487 /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1488 if ((pce->compile_options & PCRE2_UTF)
1489 && !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1490 GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1491 }
1492
1493 RETVAL_LONG(matched);
1494 } else {
1495 RETVAL_FALSE;
1496 }
1497 }
1498 /* }}} */
1499
1500 /* {{{ Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)1501 PHP_FUNCTION(preg_match)
1502 {
1503 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1504 }
1505 /* }}} */
1506
1507 /* {{{ Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)1508 PHP_FUNCTION(preg_match_all)
1509 {
1510 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1511 }
1512 /* }}} */
1513
1514 /* {{{ preg_get_backref */
preg_get_backref(char ** str,int * backref)1515 static int preg_get_backref(char **str, int *backref)
1516 {
1517 char in_brace = 0;
1518 char *walk = *str;
1519
1520 if (walk[1] == 0)
1521 return 0;
1522
1523 if (*walk == '$' && walk[1] == '{') {
1524 in_brace = 1;
1525 walk++;
1526 }
1527 walk++;
1528
1529 if (*walk >= '0' && *walk <= '9') {
1530 *backref = *walk - '0';
1531 walk++;
1532 } else
1533 return 0;
1534
1535 if (*walk && *walk >= '0' && *walk <= '9') {
1536 *backref = *backref * 10 + *walk - '0';
1537 walk++;
1538 }
1539
1540 if (in_brace) {
1541 if (*walk != '}')
1542 return 0;
1543 else
1544 walk++;
1545 }
1546
1547 *str = walk;
1548 return 1;
1549 }
1550 /* }}} */
1551
1552 /* {{{ preg_do_repl_func */
preg_do_repl_func(zend_fcall_info * fci,zend_fcall_info_cache * fcc,const char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1553 static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1554 {
1555 zend_string *result_str;
1556 zval retval; /* Function return value */
1557 zval arg; /* Argument to pass to function */
1558
1559 array_init_size(&arg, count + (mark ? 1 : 0));
1560 populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1561
1562 fci->retval = &retval;
1563 fci->param_count = 1;
1564 fci->params = &arg;
1565
1566 if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1567 if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1568 result_str = Z_STR(retval);
1569 } else {
1570 result_str = zval_get_string_func(&retval);
1571 zval_ptr_dtor(&retval);
1572 }
1573 } else {
1574 if (!EG(exception)) {
1575 php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1576 }
1577
1578 result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1579 }
1580
1581 zval_ptr_dtor(&arg);
1582
1583 return result_str;
1584 }
1585 /* }}} */
1586
1587 /* {{{ php_pcre_replace */
php_pcre_replace(zend_string * regex,zend_string * subject_str,const char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1588 PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1589 zend_string *subject_str,
1590 const char *subject, size_t subject_len,
1591 zend_string *replace_str,
1592 size_t limit, size_t *replace_count)
1593 {
1594 pcre_cache_entry *pce; /* Compiled regular expression */
1595 zend_string *result; /* Function result */
1596
1597 /* Abort on pending exception, e.g. thrown from __toString(). */
1598 if (UNEXPECTED(EG(exception))) {
1599 return NULL;
1600 }
1601
1602 /* Compile regex or get it from cache. */
1603 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1604 return NULL;
1605 }
1606 pce->refcount++;
1607 result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1608 limit, replace_count);
1609 pce->refcount--;
1610
1611 return result;
1612 }
1613 /* }}} */
1614
1615 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry * pce,zend_string * subject_str,const char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1616 PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1617 {
1618 uint32_t options; /* Execution options */
1619 int count; /* Count of matched subpatterns */
1620 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1621 uint32_t num_subpats; /* Number of captured subpatterns */
1622 size_t new_len; /* Length of needed storage */
1623 size_t alloc_len; /* Actual allocated length */
1624 size_t match_len; /* Length of the current match */
1625 int backref; /* Backreference number */
1626 PCRE2_SIZE start_offset; /* Where the new search starts */
1627 size_t last_end_offset; /* Where the last search ended */
1628 char *walkbuf, /* Location of current replacement in the result */
1629 *walk, /* Used to walk the replacement string */
1630 walk_last; /* Last walked character */
1631 const char *match, /* The current match */
1632 *piece, /* The current piece of subject */
1633 *replace_end; /* End of replacement string */
1634 size_t result_len; /* Length of result */
1635 zend_string *result; /* Result of replacement */
1636 pcre2_match_data *match_data;
1637
1638 /* Calculate the size of the offsets array, and allocate memory for it. */
1639 num_subpats = pce->capture_count + 1;
1640 alloc_len = 0;
1641 result = NULL;
1642
1643 /* Initialize */
1644 match = NULL;
1645 start_offset = 0;
1646 last_end_offset = 0;
1647 result_len = 0;
1648 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1649
1650 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1651 match_data = mdata;
1652 } else {
1653 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1654 if (!match_data) {
1655 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1656 return NULL;
1657 }
1658 }
1659
1660 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1661
1662 /* Execute the regular expression. */
1663 #ifdef HAVE_PCRE_JIT_SUPPORT
1664 if ((pce->preg_options & PREG_JIT) && options) {
1665 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1666 PCRE2_NO_UTF_CHECK, match_data, mctx);
1667 } else
1668 #endif
1669 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1670 options, match_data, mctx);
1671
1672 while (1) {
1673 piece = subject + last_end_offset;
1674
1675 if (count >= 0 && limit > 0) {
1676 bool simple_string;
1677
1678 /* Check for too many substrings condition. */
1679 if (UNEXPECTED(count == 0)) {
1680 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1681 count = num_subpats;
1682 }
1683
1684 matched:
1685 offsets = pcre2_get_ovector_pointer(match_data);
1686
1687 if (UNEXPECTED(offsets[1] < offsets[0])) {
1688 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1689 if (result) {
1690 zend_string_release_ex(result, 0);
1691 result = NULL;
1692 }
1693 break;
1694 }
1695
1696 if (replace_count) {
1697 ++*replace_count;
1698 }
1699
1700 /* Set the match location in subject */
1701 match = subject + offsets[0];
1702
1703 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1704
1705 walk = ZSTR_VAL(replace_str);
1706 replace_end = walk + ZSTR_LEN(replace_str);
1707 walk_last = 0;
1708 simple_string = 1;
1709 while (walk < replace_end) {
1710 if ('\\' == *walk || '$' == *walk) {
1711 simple_string = 0;
1712 if (walk_last == '\\') {
1713 walk++;
1714 walk_last = 0;
1715 continue;
1716 }
1717 if (preg_get_backref(&walk, &backref)) {
1718 if (backref < count)
1719 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1720 continue;
1721 }
1722 }
1723 new_len++;
1724 walk++;
1725 walk_last = walk[-1];
1726 }
1727
1728 if (new_len >= alloc_len) {
1729 alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1730 if (result == NULL) {
1731 result = zend_string_alloc(alloc_len, 0);
1732 } else {
1733 result = zend_string_extend(result, alloc_len, 0);
1734 }
1735 }
1736
1737 if (match-piece > 0) {
1738 /* copy the part of the string before the match */
1739 memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1740 result_len += (match-piece);
1741 }
1742
1743 if (simple_string) {
1744 /* copy replacement */
1745 memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1746 result_len += ZSTR_LEN(replace_str);
1747 } else {
1748 /* copy replacement and backrefs */
1749 walkbuf = ZSTR_VAL(result) + result_len;
1750
1751 walk = ZSTR_VAL(replace_str);
1752 walk_last = 0;
1753 while (walk < replace_end) {
1754 if ('\\' == *walk || '$' == *walk) {
1755 if (walk_last == '\\') {
1756 *(walkbuf-1) = *walk++;
1757 walk_last = 0;
1758 continue;
1759 }
1760 if (preg_get_backref(&walk, &backref)) {
1761 if (backref < count) {
1762 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1763 memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1764 walkbuf += match_len;
1765 }
1766 continue;
1767 }
1768 }
1769 *walkbuf++ = *walk++;
1770 walk_last = walk[-1];
1771 }
1772 *walkbuf = '\0';
1773 /* increment the result length by how much we've added to the string */
1774 result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1775 }
1776
1777 limit--;
1778
1779 /* Advance to the next piece. */
1780 start_offset = last_end_offset = offsets[1];
1781
1782 /* If we have matched an empty string, mimic what Perl's /g options does.
1783 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1784 the match again at the same point. If this fails (picked up above) we
1785 advance to the next character. */
1786 if (start_offset == offsets[0]) {
1787 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1788 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1789
1790 piece = subject + start_offset;
1791 if (count >= 0 && limit > 0) {
1792 goto matched;
1793 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1794 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1795 this is not necessarily the end. We need to advance
1796 the start offset, and continue. Fudge the offset values
1797 to achieve this, unless we're already at the end of the string. */
1798 if (start_offset < subject_len) {
1799 size_t unit_len = calculate_unit_length(pce, piece);
1800 start_offset += unit_len;
1801 } else {
1802 goto not_matched;
1803 }
1804 } else {
1805 goto error;
1806 }
1807 }
1808
1809 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1810 not_matched:
1811 if (!result && subject_str) {
1812 result = zend_string_copy(subject_str);
1813 break;
1814 }
1815 /* now we know exactly how long it is */
1816 alloc_len = result_len + subject_len - last_end_offset;
1817 if (NULL != result) {
1818 result = zend_string_realloc(result, alloc_len, 0);
1819 } else {
1820 result = zend_string_alloc(alloc_len, 0);
1821 }
1822 /* stick that last bit of string on our output */
1823 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1824 result_len += subject_len - last_end_offset;
1825 ZSTR_VAL(result)[result_len] = '\0';
1826 ZSTR_LEN(result) = result_len;
1827 break;
1828 } else {
1829 error:
1830 pcre_handle_exec_error(count);
1831 if (result) {
1832 zend_string_release_ex(result, 0);
1833 result = NULL;
1834 }
1835 break;
1836 }
1837
1838 #ifdef HAVE_PCRE_JIT_SUPPORT
1839 if (pce->preg_options & PREG_JIT) {
1840 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1841 PCRE2_NO_UTF_CHECK, match_data, mctx);
1842 } else
1843 #endif
1844 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1845 PCRE2_NO_UTF_CHECK, match_data, mctx);
1846 }
1847 if (match_data != mdata) {
1848 pcre2_match_data_free(match_data);
1849 }
1850
1851 return result;
1852 }
1853 /* }}} */
1854
1855 /* {{{ php_pcre_replace_func_impl() */
php_pcre_replace_func_impl(pcre_cache_entry * pce,zend_string * subject_str,const char * subject,size_t subject_len,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)1856 static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)
1857 {
1858 uint32_t options; /* Execution options */
1859 int count; /* Count of matched subpatterns */
1860 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1861 zend_string **subpat_names; /* Array for named subpatterns */
1862 uint32_t num_subpats; /* Number of captured subpatterns */
1863 size_t new_len; /* Length of needed storage */
1864 size_t alloc_len; /* Actual allocated length */
1865 PCRE2_SIZE start_offset; /* Where the new search starts */
1866 size_t last_end_offset; /* Where the last search ended */
1867 const char *match, /* The current match */
1868 *piece; /* The current piece of subject */
1869 size_t result_len; /* Length of result */
1870 zend_string *result; /* Result of replacement */
1871 zend_string *eval_result; /* Result of custom function */
1872 pcre2_match_data *match_data;
1873 bool old_mdata_used;
1874
1875 /* Calculate the size of the offsets array, and allocate memory for it. */
1876 num_subpats = pce->capture_count + 1;
1877
1878 /*
1879 * Build a mapping from subpattern numbers to their names. We will
1880 * allocate the table only if there are any named subpatterns.
1881 */
1882 subpat_names = NULL;
1883 if (UNEXPECTED(pce->name_count > 0)) {
1884 subpat_names = make_subpats_table(num_subpats, pce);
1885 if (!subpat_names) {
1886 return NULL;
1887 }
1888 }
1889
1890 alloc_len = 0;
1891 result = NULL;
1892
1893 /* Initialize */
1894 match = NULL;
1895 start_offset = 0;
1896 last_end_offset = 0;
1897 result_len = 0;
1898 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1899
1900 old_mdata_used = mdata_used;
1901 if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1902 mdata_used = 1;
1903 match_data = mdata;
1904 } else {
1905 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1906 if (!match_data) {
1907 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1908 if (subpat_names) {
1909 free_subpats_table(subpat_names, num_subpats);
1910 }
1911 mdata_used = old_mdata_used;
1912 return NULL;
1913 }
1914 }
1915
1916 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1917
1918 /* Execute the regular expression. */
1919 #ifdef HAVE_PCRE_JIT_SUPPORT
1920 if ((pce->preg_options & PREG_JIT) && options) {
1921 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1922 PCRE2_NO_UTF_CHECK, match_data, mctx);
1923 } else
1924 #endif
1925 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1926 options, match_data, mctx);
1927
1928 while (1) {
1929 piece = subject + last_end_offset;
1930
1931 if (count >= 0 && limit) {
1932 /* Check for too many substrings condition. */
1933 if (UNEXPECTED(count == 0)) {
1934 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1935 count = num_subpats;
1936 }
1937
1938 matched:
1939 offsets = pcre2_get_ovector_pointer(match_data);
1940
1941 if (UNEXPECTED(offsets[1] < offsets[0])) {
1942 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1943 if (result) {
1944 zend_string_release_ex(result, 0);
1945 result = NULL;
1946 }
1947 break;
1948 }
1949
1950 if (replace_count) {
1951 ++*replace_count;
1952 }
1953
1954 /* Set the match location in subject */
1955 match = subject + offsets[0];
1956
1957 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1958
1959 /* Use custom function to get replacement string and its length. */
1960 eval_result = preg_do_repl_func(
1961 fci, fcc, subject, offsets, subpat_names, num_subpats, count,
1962 pcre2_get_mark(match_data), flags);
1963
1964 ZEND_ASSERT(eval_result);
1965 new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD;
1966 if (new_len >= alloc_len) {
1967 alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1968 if (result == NULL) {
1969 result = zend_string_alloc(alloc_len, 0);
1970 } else {
1971 result = zend_string_extend(result, alloc_len, 0);
1972 }
1973 }
1974
1975 if (match-piece > 0) {
1976 /* copy the part of the string before the match */
1977 memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1978 result_len += (match-piece);
1979 }
1980
1981 /* If using custom function, copy result to the buffer and clean up. */
1982 memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1983 result_len += ZSTR_LEN(eval_result);
1984 zend_string_release_ex(eval_result, 0);
1985
1986 limit--;
1987
1988 /* Advance to the next piece. */
1989 start_offset = last_end_offset = offsets[1];
1990
1991 /* If we have matched an empty string, mimic what Perl's /g options does.
1992 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1993 the match again at the same point. If this fails (picked up above) we
1994 advance to the next character. */
1995 if (start_offset == offsets[0]) {
1996 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1997 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1998
1999 piece = subject + start_offset;
2000 if (count >= 0 && limit) {
2001 goto matched;
2002 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2003 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2004 this is not necessarily the end. We need to advance
2005 the start offset, and continue. Fudge the offset values
2006 to achieve this, unless we're already at the end of the string. */
2007 if (start_offset < subject_len) {
2008 size_t unit_len = calculate_unit_length(pce, piece);
2009 start_offset += unit_len;
2010 } else {
2011 goto not_matched;
2012 }
2013 } else {
2014 goto error;
2015 }
2016 }
2017
2018 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2019 not_matched:
2020 if (!result && subject_str) {
2021 result = zend_string_copy(subject_str);
2022 break;
2023 }
2024 /* now we know exactly how long it is */
2025 alloc_len = result_len + subject_len - last_end_offset;
2026 if (NULL != result) {
2027 result = zend_string_realloc(result, alloc_len, 0);
2028 } else {
2029 result = zend_string_alloc(alloc_len, 0);
2030 }
2031 /* stick that last bit of string on our output */
2032 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
2033 result_len += subject_len - last_end_offset;
2034 ZSTR_VAL(result)[result_len] = '\0';
2035 ZSTR_LEN(result) = result_len;
2036 break;
2037 } else {
2038 error:
2039 pcre_handle_exec_error(count);
2040 if (result) {
2041 zend_string_release_ex(result, 0);
2042 result = NULL;
2043 }
2044 break;
2045 }
2046 #ifdef HAVE_PCRE_JIT_SUPPORT
2047 if ((pce->preg_options & PREG_JIT)) {
2048 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2049 PCRE2_NO_UTF_CHECK, match_data, mctx);
2050 } else
2051 #endif
2052 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2053 PCRE2_NO_UTF_CHECK, match_data, mctx);
2054 }
2055 if (match_data != mdata) {
2056 pcre2_match_data_free(match_data);
2057 }
2058 mdata_used = old_mdata_used;
2059
2060 if (UNEXPECTED(subpat_names)) {
2061 free_subpats_table(subpat_names, num_subpats);
2062 }
2063
2064 return result;
2065 }
2066 /* }}} */
2067
2068 /* {{{ php_pcre_replace_func */
php_pcre_replace_func(zend_string * regex,zend_string * subject_str,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)2069 static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2070 zend_string *subject_str,
2071 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2072 size_t limit, size_t *replace_count, zend_long flags)
2073 {
2074 pcre_cache_entry *pce; /* Compiled regular expression */
2075 zend_string *result; /* Function result */
2076
2077 /* Compile regex or get it from cache. */
2078 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2079 return NULL;
2080 }
2081 pce->refcount++;
2082 result = php_pcre_replace_func_impl(
2083 pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
2084 limit, replace_count, flags);
2085 pce->refcount--;
2086
2087 return result;
2088 }
2089 /* }}} */
2090
2091 /* {{{ php_pcre_replace_array */
php_pcre_replace_array(HashTable * regex,zend_string * replace_str,HashTable * replace_ht,zend_string * subject_str,size_t limit,size_t * replace_count)2092 static zend_string *php_pcre_replace_array(HashTable *regex,
2093 zend_string *replace_str, HashTable *replace_ht,
2094 zend_string *subject_str, size_t limit, size_t *replace_count)
2095 {
2096 zval *regex_entry;
2097 zend_string *result;
2098
2099 zend_string_addref(subject_str);
2100
2101 if (replace_ht) {
2102 uint32_t replace_idx = 0;
2103
2104 /* For each entry in the regex array, get the entry */
2105 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2106 /* Make sure we're dealing with strings. */
2107 zend_string *tmp_regex_str;
2108 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2109 zend_string *replace_entry_str, *tmp_replace_entry_str;
2110 zval *zv;
2111
2112 /* Get current entry */
2113 while (1) {
2114 if (replace_idx == replace_ht->nNumUsed) {
2115 replace_entry_str = ZSTR_EMPTY_ALLOC();
2116 tmp_replace_entry_str = NULL;
2117 break;
2118 }
2119 zv = ZEND_HASH_ELEMENT(replace_ht, replace_idx);
2120 replace_idx++;
2121 if (Z_TYPE_P(zv) != IS_UNDEF) {
2122 replace_entry_str = zval_get_tmp_string(zv, &tmp_replace_entry_str);
2123 break;
2124 }
2125 }
2126
2127 /* Do the actual replacement and put the result back into subject_str
2128 for further replacements. */
2129 result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2130 ZSTR_LEN(subject_str), replace_entry_str, limit, replace_count);
2131 zend_tmp_string_release(tmp_replace_entry_str);
2132 zend_tmp_string_release(tmp_regex_str);
2133 zend_string_release_ex(subject_str, 0);
2134 subject_str = result;
2135 if (UNEXPECTED(result == NULL)) {
2136 break;
2137 }
2138 } ZEND_HASH_FOREACH_END();
2139
2140 } else {
2141 ZEND_ASSERT(replace_str != NULL);
2142
2143 /* For each entry in the regex array, get the entry */
2144 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2145 /* Make sure we're dealing with strings. */
2146 zend_string *tmp_regex_str;
2147 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2148
2149 /* Do the actual replacement and put the result back into subject_str
2150 for further replacements. */
2151 result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2152 ZSTR_LEN(subject_str), replace_str, limit, replace_count);
2153 zend_tmp_string_release(tmp_regex_str);
2154 zend_string_release_ex(subject_str, 0);
2155 subject_str = result;
2156
2157 if (UNEXPECTED(result == NULL)) {
2158 break;
2159 }
2160 } ZEND_HASH_FOREACH_END();
2161 }
2162
2163 return subject_str;
2164 }
2165 /* }}} */
2166
2167 /* {{{ php_replace_in_subject */
php_replace_in_subject(zend_string * regex_str,HashTable * regex_ht,zend_string * replace_str,HashTable * replace_ht,zend_string * subject,size_t limit,size_t * replace_count)2168 static zend_always_inline zend_string *php_replace_in_subject(
2169 zend_string *regex_str, HashTable *regex_ht,
2170 zend_string *replace_str, HashTable *replace_ht,
2171 zend_string *subject, size_t limit, size_t *replace_count)
2172 {
2173 zend_string *result;
2174
2175 if (regex_str) {
2176 ZEND_ASSERT(replace_str != NULL);
2177 result = php_pcre_replace(regex_str, subject, ZSTR_VAL(subject), ZSTR_LEN(subject),
2178 replace_str, limit, replace_count);
2179 } else {
2180 ZEND_ASSERT(regex_ht != NULL);
2181 result = php_pcre_replace_array(regex_ht, replace_str, replace_ht, subject,
2182 limit, replace_count);
2183 }
2184 return result;
2185 }
2186 /* }}} */
2187
2188 /* {{{ php_replace_in_subject_func */
php_replace_in_subject_func(zend_string * regex_str,HashTable * regex_ht,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zend_string * subject,size_t limit,size_t * replace_count,zend_long flags)2189 static zend_string *php_replace_in_subject_func(zend_string *regex_str, HashTable *regex_ht,
2190 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2191 zend_string *subject, size_t limit, size_t *replace_count, zend_long flags)
2192 {
2193 zend_string *result;
2194
2195 if (regex_str) {
2196 result = php_pcre_replace_func(
2197 regex_str, subject, fci, fcc, limit, replace_count, flags);
2198 return result;
2199 } else {
2200 /* If regex is an array */
2201 zval *regex_entry;
2202
2203 ZEND_ASSERT(regex_ht != NULL);
2204
2205 zend_string_addref(subject);
2206
2207 /* For each entry in the regex array, get the entry */
2208 ZEND_HASH_FOREACH_VAL(regex_ht, regex_entry) {
2209 /* Make sure we're dealing with strings. */
2210 zend_string *tmp_regex_entry_str;
2211 zend_string *regex_entry_str = zval_get_tmp_string(regex_entry, &tmp_regex_entry_str);
2212
2213 /* Do the actual replacement and put the result back into subject
2214 for further replacements. */
2215 result = php_pcre_replace_func(
2216 regex_entry_str, subject, fci, fcc, limit, replace_count, flags);
2217 zend_tmp_string_release(tmp_regex_entry_str);
2218 zend_string_release(subject);
2219 subject = result;
2220 if (UNEXPECTED(result == NULL)) {
2221 break;
2222 }
2223 } ZEND_HASH_FOREACH_END();
2224
2225 return subject;
2226 }
2227 }
2228 /* }}} */
2229
2230 /* {{{ preg_replace_func_impl */
preg_replace_func_impl(zval * return_value,zend_string * regex_str,HashTable * regex_ht,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zend_string * subject_str,HashTable * subject_ht,zend_long limit_val,zend_long flags)2231 static size_t preg_replace_func_impl(zval *return_value,
2232 zend_string *regex_str, HashTable *regex_ht,
2233 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2234 zend_string *subject_str, HashTable *subject_ht, zend_long limit_val, zend_long flags)
2235 {
2236 zend_string *result;
2237 size_t replace_count = 0;
2238
2239 if (subject_str) {
2240 result = php_replace_in_subject_func(
2241 regex_str, regex_ht, fci, fcc, subject_str, limit_val, &replace_count, flags);
2242 if (result != NULL) {
2243 RETVAL_STR(result);
2244 } else {
2245 RETVAL_NULL();
2246 }
2247 } else {
2248 /* if subject is an array */
2249 zval *subject_entry, zv;
2250 zend_string *string_key;
2251 zend_ulong num_key;
2252
2253 ZEND_ASSERT(subject_ht != NULL);
2254
2255 array_init_size(return_value, zend_hash_num_elements(subject_ht));
2256
2257 /* For each subject entry, convert it to string, then perform replacement
2258 and add the result to the return_value array. */
2259 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2260 zend_string *tmp_subject_entry_str;
2261 zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2262
2263 result = php_replace_in_subject_func(
2264 regex_str, regex_ht, fci, fcc, subject_entry_str, limit_val, &replace_count, flags);
2265 if (result != NULL) {
2266 /* Add to return array */
2267 ZVAL_STR(&zv, result);
2268 if (string_key) {
2269 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2270 } else {
2271 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2272 }
2273 }
2274 zend_tmp_string_release(tmp_subject_entry_str);
2275 } ZEND_HASH_FOREACH_END();
2276 }
2277
2278 return replace_count;
2279 }
2280 /* }}} */
2281
2282 /* {{{ preg_replace_common */
preg_replace_common(INTERNAL_FUNCTION_PARAMETERS,bool is_filter)2283 static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter)
2284 {
2285 zval *zcount = NULL;
2286 zend_string *regex_str;
2287 HashTable *regex_ht;
2288 zend_string *replace_str;
2289 HashTable *replace_ht;
2290 zend_string *subject_str;
2291 HashTable *subject_ht;
2292 zend_long limit = -1;
2293 size_t replace_count = 0;
2294 zend_string *result;
2295 size_t old_replace_count;
2296
2297 /* Get function parameters and do error-checking. */
2298 ZEND_PARSE_PARAMETERS_START(3, 5)
2299 Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2300 Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
2301 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2302 Z_PARAM_OPTIONAL
2303 Z_PARAM_LONG(limit)
2304 Z_PARAM_ZVAL(zcount)
2305 ZEND_PARSE_PARAMETERS_END();
2306
2307 /* If replace is an array then the regex argument needs to also be an array */
2308 if (replace_ht && !regex_ht) {
2309 zend_argument_type_error(1, "must be of type array when argument #2 ($replacement) is an array, string given");
2310 RETURN_THROWS();
2311 }
2312
2313 if (subject_str) {
2314 old_replace_count = replace_count;
2315 result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2316 subject_str, limit, &replace_count);
2317 if (result != NULL) {
2318 if (!is_filter || replace_count > old_replace_count) {
2319 RETVAL_STR(result);
2320 } else {
2321 zend_string_release_ex(result, 0);
2322 RETVAL_NULL();
2323 }
2324 } else {
2325 RETVAL_NULL();
2326 }
2327 } else {
2328 /* if subject is an array */
2329 zval *subject_entry, zv;
2330 zend_string *string_key;
2331 zend_ulong num_key;
2332
2333 ZEND_ASSERT(subject_ht != NULL);
2334
2335 array_init_size(return_value, zend_hash_num_elements(subject_ht));
2336
2337 /* For each subject entry, convert it to string, then perform replacement
2338 and add the result to the return_value array. */
2339 ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2340 old_replace_count = replace_count;
2341 zend_string *tmp_subject_entry_str;
2342 zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2343 result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2344 subject_entry_str, limit, &replace_count);
2345
2346 if (result != NULL) {
2347 if (!is_filter || replace_count > old_replace_count) {
2348 /* Add to return array */
2349 ZVAL_STR(&zv, result);
2350 if (string_key) {
2351 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2352 } else {
2353 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2354 }
2355 } else {
2356 zend_string_release_ex(result, 0);
2357 }
2358 }
2359 zend_tmp_string_release(tmp_subject_entry_str);
2360 } ZEND_HASH_FOREACH_END();
2361 }
2362
2363 if (zcount) {
2364 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2365 }
2366 }
2367 /* }}} */
2368
2369 /* {{{ Perform Perl-style regular expression replacement. */
PHP_FUNCTION(preg_replace)2370 PHP_FUNCTION(preg_replace)
2371 {
2372 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
2373 }
2374 /* }}} */
2375
2376 /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback)2377 PHP_FUNCTION(preg_replace_callback)
2378 {
2379 zval *zcount = NULL;
2380 zend_string *regex_str;
2381 HashTable *regex_ht;
2382 zend_string *subject_str;
2383 HashTable *subject_ht;
2384 zend_long limit = -1, flags = 0;
2385 size_t replace_count;
2386 zend_fcall_info fci;
2387 zend_fcall_info_cache fcc;
2388
2389 /* Get function parameters and do error-checking. */
2390 ZEND_PARSE_PARAMETERS_START(3, 6)
2391 Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2392 Z_PARAM_FUNC(fci, fcc)
2393 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2394 Z_PARAM_OPTIONAL
2395 Z_PARAM_LONG(limit)
2396 Z_PARAM_ZVAL(zcount)
2397 Z_PARAM_LONG(flags)
2398 ZEND_PARSE_PARAMETERS_END();
2399
2400 replace_count = preg_replace_func_impl(return_value, regex_str, regex_ht,
2401 &fci, &fcc,
2402 subject_str, subject_ht, limit, flags);
2403 if (zcount) {
2404 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2405 }
2406 }
2407 /* }}} */
2408
2409 /* {{{ Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback_array)2410 PHP_FUNCTION(preg_replace_callback_array)
2411 {
2412 zval zv, *replace, *zcount = NULL;
2413 HashTable *pattern, *subject_ht;
2414 zend_string *subject_str, *str_idx_regex;
2415 zend_long limit = -1, flags = 0;
2416 size_t replace_count = 0;
2417 zend_fcall_info fci;
2418 zend_fcall_info_cache fcc;
2419
2420 /* Get function parameters and do error-checking. */
2421 ZEND_PARSE_PARAMETERS_START(2, 5)
2422 Z_PARAM_ARRAY_HT(pattern)
2423 Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2424 Z_PARAM_OPTIONAL
2425 Z_PARAM_LONG(limit)
2426 Z_PARAM_ZVAL(zcount)
2427 Z_PARAM_LONG(flags)
2428 ZEND_PARSE_PARAMETERS_END();
2429
2430 fci.size = sizeof(fci);
2431 fci.object = NULL;
2432 fci.named_params = NULL;
2433
2434 if (subject_ht) {
2435 GC_TRY_ADDREF(subject_ht);
2436 } else {
2437 GC_TRY_ADDREF(subject_str);
2438 }
2439
2440 ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) {
2441 if (!str_idx_regex) {
2442 php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
2443 RETVAL_NULL();
2444 goto error;
2445 }
2446
2447 if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2448 zend_argument_type_error(1, "must contain only valid callbacks");
2449 goto error;
2450 }
2451
2452 ZVAL_COPY_VALUE(&fci.function_name, replace);
2453
2454 replace_count += preg_replace_func_impl(&zv, str_idx_regex, /* regex_ht */ NULL, &fci, &fcc,
2455 subject_str, subject_ht, limit, flags);
2456 switch (Z_TYPE(zv)) {
2457 case IS_ARRAY:
2458 ZEND_ASSERT(subject_ht);
2459 zend_array_release(subject_ht);
2460 subject_ht = Z_ARR(zv);
2461 break;
2462 case IS_STRING:
2463 ZEND_ASSERT(subject_str);
2464 zend_string_release(subject_str);
2465 subject_str = Z_STR(zv);
2466 break;
2467 case IS_NULL:
2468 RETVAL_NULL();
2469 goto error;
2470 EMPTY_SWITCH_DEFAULT_CASE()
2471 }
2472
2473 if (EG(exception)) {
2474 goto error;
2475 }
2476 } ZEND_HASH_FOREACH_END();
2477
2478 if (zcount) {
2479 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2480 }
2481
2482 if (subject_ht) {
2483 RETURN_ARR(subject_ht);
2484 } else {
2485 RETURN_STR(subject_str);
2486 }
2487
2488 error:
2489 if (subject_ht) {
2490 zend_array_release(subject_ht);
2491 } else {
2492 zend_string_release(subject_str);
2493 }
2494 }
2495 /* }}} */
2496
2497 /* {{{ Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTION(preg_filter)2498 PHP_FUNCTION(preg_filter)
2499 {
2500 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
2501 }
2502 /* }}} */
2503
2504 /* {{{ Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)2505 PHP_FUNCTION(preg_split)
2506 {
2507 zend_string *regex; /* Regular expression */
2508 zend_string *subject; /* String to match against */
2509 zend_long limit_val = -1;/* Integer value of limit */
2510 zend_long flags = 0; /* Match control flags */
2511 pcre_cache_entry *pce; /* Compiled regular expression */
2512
2513 /* Get function parameters and do error checking */
2514 ZEND_PARSE_PARAMETERS_START(2, 4)
2515 Z_PARAM_STR(regex)
2516 Z_PARAM_STR(subject)
2517 Z_PARAM_OPTIONAL
2518 Z_PARAM_LONG(limit_val)
2519 Z_PARAM_LONG(flags)
2520 ZEND_PARSE_PARAMETERS_END();
2521
2522 /* Compile regex or get it from cache. */
2523 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2524 RETURN_FALSE;
2525 }
2526
2527 pce->refcount++;
2528 php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2529 pce->refcount--;
2530 }
2531 /* }}} */
2532
2533 /* {{{ php_pcre_split */
php_pcre_split_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zend_long limit_val,zend_long flags)2534 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2535 zend_long limit_val, zend_long flags)
2536 {
2537 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
2538 uint32_t options; /* Execution options */
2539 int count; /* Count of matched subpatterns */
2540 PCRE2_SIZE start_offset; /* Where the new search starts */
2541 PCRE2_SIZE last_match_offset; /* Location of last match */
2542 uint32_t no_empty; /* If NO_EMPTY flag is set */
2543 uint32_t delim_capture; /* If delimiters should be captured */
2544 uint32_t offset_capture; /* If offsets should be captured */
2545 uint32_t num_subpats; /* Number of captured subpatterns */
2546 zval tmp;
2547 pcre2_match_data *match_data;
2548 char *subject = ZSTR_VAL(subject_str);
2549
2550 no_empty = flags & PREG_SPLIT_NO_EMPTY;
2551 delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2552 offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2553
2554 /* Initialize return value */
2555 array_init(return_value);
2556
2557 /* Calculate the size of the offsets array, and allocate memory for it. */
2558 num_subpats = pce->capture_count + 1;
2559
2560 /* Start at the beginning of the string */
2561 start_offset = 0;
2562 last_match_offset = 0;
2563 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2564
2565 if (limit_val == -1) {
2566 /* pass */
2567 } else if (limit_val == 0) {
2568 limit_val = -1;
2569 } else if (limit_val <= 1) {
2570 goto last;
2571 }
2572
2573 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2574 match_data = mdata;
2575 } else {
2576 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2577 if (!match_data) {
2578 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2579 zval_ptr_dtor(return_value);
2580 RETURN_FALSE;
2581 }
2582 }
2583
2584 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2585
2586 #ifdef HAVE_PCRE_JIT_SUPPORT
2587 if ((pce->preg_options & PREG_JIT) && options) {
2588 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2589 PCRE2_NO_UTF_CHECK, match_data, mctx);
2590 } else
2591 #endif
2592 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2593 options, match_data, mctx);
2594
2595 while (1) {
2596 /* If something matched */
2597 if (count >= 0) {
2598 /* Check for too many substrings condition. */
2599 if (UNEXPECTED(count == 0)) {
2600 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2601 count = num_subpats;
2602 }
2603
2604 matched:
2605 offsets = pcre2_get_ovector_pointer(match_data);
2606
2607 if (UNEXPECTED(offsets[1] < offsets[0])) {
2608 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2609 break;
2610 }
2611
2612 if (!no_empty || offsets[0] != last_match_offset) {
2613 if (offset_capture) {
2614 /* Add (match, offset) pair to the return value */
2615 add_offset_pair(
2616 return_value, subject, last_match_offset, offsets[0],
2617 NULL, 0);
2618 } else {
2619 /* Add the piece to the return value */
2620 populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2621 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2622 }
2623
2624 /* One less left to do */
2625 if (limit_val != -1)
2626 limit_val--;
2627 }
2628
2629 if (delim_capture) {
2630 size_t i;
2631 for (i = 1; i < count; i++) {
2632 /* If we have matched a delimiter */
2633 if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2634 if (offset_capture) {
2635 add_offset_pair(
2636 return_value, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2637 } else {
2638 populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2639 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2640 }
2641 }
2642 }
2643 }
2644
2645 /* Advance to the position right after the last full match */
2646 start_offset = last_match_offset = offsets[1];
2647
2648 /* If we have matched an empty string, mimic what Perl's /g options does.
2649 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2650 the match again at the same point. If this fails (picked up above) we
2651 advance to the next character. */
2652 if (start_offset == offsets[0]) {
2653 /* Get next piece if no limit or limit not yet reached and something matched*/
2654 if (limit_val != -1 && limit_val <= 1) {
2655 break;
2656 }
2657 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2658 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2659 if (count >= 0) {
2660 goto matched;
2661 } else if (count == PCRE2_ERROR_NOMATCH) {
2662 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2663 this is not necessarily the end. We need to advance
2664 the start offset, and continue. Fudge the offset values
2665 to achieve this, unless we're already at the end of the string. */
2666 if (start_offset < ZSTR_LEN(subject_str)) {
2667 start_offset += calculate_unit_length(pce, subject + start_offset);
2668 } else {
2669 break;
2670 }
2671 } else {
2672 goto error;
2673 }
2674 }
2675
2676 } else if (count == PCRE2_ERROR_NOMATCH) {
2677 break;
2678 } else {
2679 error:
2680 pcre_handle_exec_error(count);
2681 break;
2682 }
2683
2684 /* Get next piece if no limit or limit not yet reached and something matched*/
2685 if (limit_val != -1 && limit_val <= 1) {
2686 break;
2687 }
2688
2689 #ifdef HAVE_PCRE_JIT_SUPPORT
2690 if (pce->preg_options & PREG_JIT) {
2691 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2692 PCRE2_NO_UTF_CHECK, match_data, mctx);
2693 } else
2694 #endif
2695 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2696 PCRE2_NO_UTF_CHECK, match_data, mctx);
2697 }
2698 if (match_data != mdata) {
2699 pcre2_match_data_free(match_data);
2700 }
2701
2702 if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2703 zval_ptr_dtor(return_value);
2704 RETURN_FALSE;
2705 }
2706
2707 last:
2708 start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2709
2710 if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2711 if (offset_capture) {
2712 /* Add the last (match, offset) pair to the return value */
2713 add_offset_pair(return_value, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2714 } else {
2715 /* Add the last piece to the return value */
2716 if (start_offset == 0) {
2717 ZVAL_STR_COPY(&tmp, subject_str);
2718 } else {
2719 populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2720 }
2721 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2722 }
2723 }
2724 }
2725 /* }}} */
2726
2727 /* {{{ Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)2728 PHP_FUNCTION(preg_quote)
2729 {
2730 zend_string *str; /* Input string argument */
2731 zend_string *delim = NULL; /* Additional delimiter argument */
2732 char *in_str; /* Input string */
2733 char *in_str_end; /* End of the input string */
2734 zend_string *out_str; /* Output string with quoted characters */
2735 size_t extra_len; /* Number of additional characters */
2736 char *p, /* Iterator for input string */
2737 *q, /* Iterator for output string */
2738 delim_char = '\0', /* Delimiter character to be quoted */
2739 c; /* Current character */
2740
2741 /* Get the arguments and check for errors */
2742 ZEND_PARSE_PARAMETERS_START(1, 2)
2743 Z_PARAM_STR(str)
2744 Z_PARAM_OPTIONAL
2745 Z_PARAM_STR_OR_NULL(delim)
2746 ZEND_PARSE_PARAMETERS_END();
2747
2748 /* Nothing to do if we got an empty string */
2749 if (ZSTR_LEN(str) == 0) {
2750 RETURN_EMPTY_STRING();
2751 }
2752
2753 in_str = ZSTR_VAL(str);
2754 in_str_end = in_str + ZSTR_LEN(str);
2755
2756 if (delim) {
2757 delim_char = ZSTR_VAL(delim)[0];
2758 }
2759
2760 /* Go through the string and quote necessary characters */
2761 extra_len = 0;
2762 p = in_str;
2763 do {
2764 c = *p;
2765 switch(c) {
2766 case '.':
2767 case '\\':
2768 case '+':
2769 case '*':
2770 case '?':
2771 case '[':
2772 case '^':
2773 case ']':
2774 case '$':
2775 case '(':
2776 case ')':
2777 case '{':
2778 case '}':
2779 case '=':
2780 case '!':
2781 case '>':
2782 case '<':
2783 case '|':
2784 case ':':
2785 case '-':
2786 case '#':
2787 extra_len++;
2788 break;
2789
2790 case '\0':
2791 extra_len+=3;
2792 break;
2793
2794 default:
2795 if (c == delim_char) {
2796 extra_len++;
2797 }
2798 break;
2799 }
2800 p++;
2801 } while (p != in_str_end);
2802
2803 if (extra_len == 0) {
2804 RETURN_STR_COPY(str);
2805 }
2806
2807 /* Allocate enough memory so that even if each character
2808 is quoted, we won't run out of room */
2809 out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2810 q = ZSTR_VAL(out_str);
2811 p = in_str;
2812
2813 do {
2814 c = *p;
2815 switch(c) {
2816 case '.':
2817 case '\\':
2818 case '+':
2819 case '*':
2820 case '?':
2821 case '[':
2822 case '^':
2823 case ']':
2824 case '$':
2825 case '(':
2826 case ')':
2827 case '{':
2828 case '}':
2829 case '=':
2830 case '!':
2831 case '>':
2832 case '<':
2833 case '|':
2834 case ':':
2835 case '-':
2836 case '#':
2837 *q++ = '\\';
2838 *q++ = c;
2839 break;
2840
2841 case '\0':
2842 *q++ = '\\';
2843 *q++ = '0';
2844 *q++ = '0';
2845 *q++ = '0';
2846 break;
2847
2848 default:
2849 if (c == delim_char) {
2850 *q++ = '\\';
2851 }
2852 *q++ = c;
2853 break;
2854 }
2855 p++;
2856 } while (p != in_str_end);
2857 *q = '\0';
2858
2859 RETURN_NEW_STR(out_str);
2860 }
2861 /* }}} */
2862
2863 /* {{{ Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)2864 PHP_FUNCTION(preg_grep)
2865 {
2866 zend_string *regex; /* Regular expression */
2867 zval *input; /* Input array */
2868 zend_long flags = 0; /* Match control flags */
2869 pcre_cache_entry *pce; /* Compiled regular expression */
2870
2871 /* Get arguments and do error checking */
2872 ZEND_PARSE_PARAMETERS_START(2, 3)
2873 Z_PARAM_STR(regex)
2874 Z_PARAM_ARRAY(input)
2875 Z_PARAM_OPTIONAL
2876 Z_PARAM_LONG(flags)
2877 ZEND_PARSE_PARAMETERS_END();
2878
2879 /* Compile regex or get it from cache. */
2880 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2881 RETURN_FALSE;
2882 }
2883
2884 pce->refcount++;
2885 php_pcre_grep_impl(pce, input, return_value, flags);
2886 pce->refcount--;
2887 }
2888 /* }}} */
2889
php_pcre_grep_impl(pcre_cache_entry * pce,zval * input,zval * return_value,zend_long flags)2890 PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2891 {
2892 zval *entry; /* An entry in the input array */
2893 uint32_t num_subpats; /* Number of captured subpatterns */
2894 int count; /* Count of matched subpatterns */
2895 uint32_t options; /* Execution options */
2896 zend_string *string_key;
2897 zend_ulong num_key;
2898 bool invert; /* Whether to return non-matching
2899 entries */
2900 pcre2_match_data *match_data;
2901 invert = flags & PREG_GREP_INVERT ? 1 : 0;
2902
2903 /* Calculate the size of the offsets array, and allocate memory for it. */
2904 num_subpats = pce->capture_count + 1;
2905
2906 /* Initialize return array */
2907 array_init(return_value);
2908
2909 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2910
2911 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2912 match_data = mdata;
2913 } else {
2914 match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2915 if (!match_data) {
2916 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2917 return;
2918 }
2919 }
2920
2921 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2922
2923 /* Go through the input array */
2924 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2925 zend_string *tmp_subject_str;
2926 zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2927
2928 /* Perform the match */
2929 #ifdef HAVE_PCRE_JIT_SUPPORT
2930 if ((pce->preg_options & PREG_JIT) && options) {
2931 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2932 PCRE2_NO_UTF_CHECK, match_data, mctx);
2933 } else
2934 #endif
2935 count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2936 options, match_data, mctx);
2937
2938 /* If the entry fits our requirements */
2939 if (count >= 0) {
2940 /* Check for too many substrings condition. */
2941 if (UNEXPECTED(count == 0)) {
2942 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2943 }
2944 if (!invert) {
2945 Z_TRY_ADDREF_P(entry);
2946
2947 /* Add to return array */
2948 if (string_key) {
2949 zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2950 } else {
2951 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2952 }
2953 }
2954 } else if (count == PCRE2_ERROR_NOMATCH) {
2955 if (invert) {
2956 Z_TRY_ADDREF_P(entry);
2957
2958 /* Add to return array */
2959 if (string_key) {
2960 zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2961 } else {
2962 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2963 }
2964 }
2965 } else {
2966 pcre_handle_exec_error(count);
2967 zend_tmp_string_release(tmp_subject_str);
2968 break;
2969 }
2970
2971 zend_tmp_string_release(tmp_subject_str);
2972 } ZEND_HASH_FOREACH_END();
2973 if (match_data != mdata) {
2974 pcre2_match_data_free(match_data);
2975 }
2976 }
2977 /* }}} */
2978
2979 /* {{{ Returns the error code of the last regexp execution. */
PHP_FUNCTION(preg_last_error)2980 PHP_FUNCTION(preg_last_error)
2981 {
2982 ZEND_PARSE_PARAMETERS_NONE();
2983
2984 RETURN_LONG(PCRE_G(error_code));
2985 }
2986 /* }}} */
2987
2988 /* {{{ Returns the error message of the last regexp execution. */
PHP_FUNCTION(preg_last_error_msg)2989 PHP_FUNCTION(preg_last_error_msg)
2990 {
2991 ZEND_PARSE_PARAMETERS_NONE();
2992
2993 RETURN_STRING(php_pcre_get_error_msg(PCRE_G(error_code)));
2994 }
2995 /* }}} */
2996
2997 /* {{{ module definition structures */
2998
2999 zend_module_entry pcre_module_entry = {
3000 STANDARD_MODULE_HEADER,
3001 "pcre",
3002 ext_functions,
3003 PHP_MINIT(pcre),
3004 PHP_MSHUTDOWN(pcre),
3005 PHP_RINIT(pcre),
3006 PHP_RSHUTDOWN(pcre),
3007 PHP_MINFO(pcre),
3008 PHP_PCRE_VERSION,
3009 PHP_MODULE_GLOBALS(pcre),
3010 PHP_GINIT(pcre),
3011 PHP_GSHUTDOWN(pcre),
3012 NULL,
3013 STANDARD_MODULE_PROPERTIES_EX
3014 };
3015
3016 #ifdef COMPILE_DL_PCRE
ZEND_GET_MODULE(pcre)3017 ZEND_GET_MODULE(pcre)
3018 #endif
3019
3020 /* }}} */
3021
3022 PHPAPI pcre2_match_context *php_pcre_mctx(void)
3023 {/*{{{*/
3024 return mctx;
3025 }/*}}}*/
3026
php_pcre_gctx(void)3027 PHPAPI pcre2_general_context *php_pcre_gctx(void)
3028 {/*{{{*/
3029 return gctx;
3030 }/*}}}*/
3031
php_pcre_cctx(void)3032 PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3033 {/*{{{*/
3034 return cctx;
3035 }/*}}}*/
3036
php_pcre_pce_incref(pcre_cache_entry * pce)3037 PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3038 {/*{{{*/
3039 assert(NULL != pce);
3040 pce->refcount++;
3041 }/*}}}*/
3042
php_pcre_pce_decref(pcre_cache_entry * pce)3043 PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3044 {/*{{{*/
3045 assert(NULL != pce);
3046 assert(0 != pce->refcount);
3047 pce->refcount--;
3048 }/*}}}*/
3049
php_pcre_pce_re(pcre_cache_entry * pce)3050 PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3051 {/*{{{*/
3052 assert(NULL != pce);
3053 return pce->re;
3054 }/*}}}*/
3055