1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Andrei Zmievski <andrei@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 #include "php.h"
20 #include "php_ini.h"
21 #include "php_globals.h"
22 #include "php_pcre.h"
23 #include "ext/standard/info.h"
24 #include "ext/standard/basic_functions.h"
25 #include "zend_smart_str.h"
26 #include "SAPI.h"
27
28 #include "ext/standard/php_string.h"
29
30 #define PREG_PATTERN_ORDER 1
31 #define PREG_SET_ORDER 2
32 #define PREG_OFFSET_CAPTURE (1<<8)
33 #define PREG_UNMATCHED_AS_NULL (1<<9)
34
35 #define PREG_SPLIT_NO_EMPTY (1<<0)
36 #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
37 #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
38
39 #define PREG_REPLACE_EVAL (1<<0)
40
41 #define PREG_GREP_INVERT (1<<0)
42
43 #define PREG_JIT (1<<3)
44
45 #define PCRE_CACHE_SIZE 4096
46
47 struct _pcre_cache_entry {
48 pcre2_code *re;
49 uint32_t preg_options;
50 uint32_t capture_count;
51 uint32_t name_count;
52 uint32_t compile_options;
53 uint32_t extra_compile_options;
54 uint32_t refcount;
55 };
56
57 enum {
58 PHP_PCRE_NO_ERROR = 0,
59 PHP_PCRE_INTERNAL_ERROR,
60 PHP_PCRE_BACKTRACK_LIMIT_ERROR,
61 PHP_PCRE_RECURSION_LIMIT_ERROR,
62 PHP_PCRE_BAD_UTF8_ERROR,
63 PHP_PCRE_BAD_UTF8_OFFSET_ERROR,
64 PHP_PCRE_JIT_STACKLIMIT_ERROR
65 };
66
67
68 PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
69
70 #ifdef HAVE_PCRE_JIT_SUPPORT
71 #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
72 #define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
73 ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
74 #endif
75 ZEND_TLS pcre2_general_context *gctx = NULL;
76 /* These two are global per thread for now. Though it is possible to use these
77 per pattern. Either one can copy it and use in pce, or one does no global
78 contexts at all, but creates for every pce. */
79 ZEND_TLS pcre2_compile_context *cctx = NULL;
80 ZEND_TLS pcre2_match_context *mctx = NULL;
81 ZEND_TLS pcre2_match_data *mdata = NULL;
82 ZEND_TLS zend_bool mdata_used = 0;
83 ZEND_TLS uint8_t pcre2_init_ok = 0;
84 #if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
85 static MUTEX_T pcre_mt = NULL;
86 #define php_pcre_mutex_alloc() \
87 if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
88 #define php_pcre_mutex_free() \
89 if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
90 #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
91 #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
92 #else
93 #define php_pcre_mutex_alloc()
94 #define php_pcre_mutex_free()
95 #define php_pcre_mutex_lock()
96 #define php_pcre_mutex_unlock()
97 #endif
98
99 ZEND_TLS HashTable char_tables;
100
php_pcre_free_char_table(zval * data)101 static void php_pcre_free_char_table(zval *data)
102 {/*{{{*/
103 void *ptr = Z_PTR_P(data);
104 pefree(ptr, 1);
105 }/*}}}*/
106
pcre_handle_exec_error(int pcre_code)107 static void pcre_handle_exec_error(int pcre_code) /* {{{ */
108 {
109 int preg_code = 0;
110
111 switch (pcre_code) {
112 case PCRE2_ERROR_MATCHLIMIT:
113 preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
114 break;
115
116 case PCRE2_ERROR_RECURSIONLIMIT:
117 preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
118 break;
119
120 case PCRE2_ERROR_BADUTFOFFSET:
121 preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
122 break;
123
124 #ifdef HAVE_PCRE_JIT_SUPPORT
125 case PCRE2_ERROR_JIT_STACKLIMIT:
126 preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
127 break;
128 #endif
129
130 default:
131 if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
132 preg_code = PHP_PCRE_BAD_UTF8_ERROR;
133 } else {
134 preg_code = PHP_PCRE_INTERNAL_ERROR;
135 }
136 break;
137 }
138
139 PCRE_G(error_code) = preg_code;
140 }
141 /* }}} */
142
php_free_pcre_cache(zval * data)143 static void php_free_pcre_cache(zval *data) /* {{{ */
144 {
145 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
146 if (!pce) return;
147 pcre2_code_free(pce->re);
148 free(pce);
149 }
150 /* }}} */
151
php_efree_pcre_cache(zval * data)152 static void php_efree_pcre_cache(zval *data) /* {{{ */
153 {
154 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
155 if (!pce) return;
156 pcre2_code_free(pce->re);
157 efree(pce);
158 }
159 /* }}} */
160
php_pcre_malloc(PCRE2_SIZE size,void * data)161 static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
162 {/*{{{*/
163 void *p = pemalloc(size, 1);
164 return p;
165 }/*}}}*/
166
php_pcre_free(void * block,void * data)167 static void php_pcre_free(void *block, void *data)
168 {/*{{{*/
169 pefree(block, 1);
170 }/*}}}*/
171
172 #ifdef PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
173 /* pcre 10.38 needs PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, disabled by default */
174 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS (PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL|PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)
175 #else
176 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
177 #endif
178
179 #define PHP_PCRE_PREALLOC_MDATA_SIZE 32
180
php_pcre_init_pcre2(uint8_t jit)181 static void php_pcre_init_pcre2(uint8_t jit)
182 {/*{{{*/
183 if (!gctx) {
184 gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
185 if (!gctx) {
186 pcre2_init_ok = 0;
187 return;
188 }
189 }
190
191 if (!cctx) {
192 cctx = pcre2_compile_context_create(gctx);
193 if (!cctx) {
194 pcre2_init_ok = 0;
195 return;
196 }
197 }
198
199 /* XXX The 'X' modifier is the default behavior in PCRE2. This option is
200 called dangerous in the manual, as typos in patterns can cause
201 unexpected results. We might want to to switch to the default PCRE2
202 behavior, too, thus causing a certain BC break. */
203 pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
204
205 if (!mctx) {
206 mctx = pcre2_match_context_create(gctx);
207 if (!mctx) {
208 pcre2_init_ok = 0;
209 return;
210 }
211 }
212
213 #ifdef HAVE_PCRE_JIT_SUPPORT
214 if (jit && !jit_stack) {
215 jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
216 if (!jit_stack) {
217 pcre2_init_ok = 0;
218 return;
219 }
220 }
221 #endif
222
223 if (!mdata) {
224 mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
225 if (!mdata) {
226 pcre2_init_ok = 0;
227 return;
228 }
229 }
230
231 pcre2_init_ok = 1;
232 }/*}}}*/
233
php_pcre_shutdown_pcre2(void)234 static void php_pcre_shutdown_pcre2(void)
235 {/*{{{*/
236 if (gctx) {
237 pcre2_general_context_free(gctx);
238 gctx = NULL;
239 }
240
241 if (cctx) {
242 pcre2_compile_context_free(cctx);
243 cctx = NULL;
244 }
245
246 if (mctx) {
247 pcre2_match_context_free(mctx);
248 mctx = NULL;
249 }
250
251 #ifdef HAVE_PCRE_JIT_SUPPORT
252 /* Stack may only be destroyed when no cached patterns
253 possibly associated with it do exist. */
254 if (jit_stack) {
255 pcre2_jit_stack_free(jit_stack);
256 jit_stack = NULL;
257 }
258 #endif
259
260 if (mdata) {
261 pcre2_match_data_free(mdata);
262 mdata = NULL;
263 }
264
265 pcre2_init_ok = 0;
266 }/*}}}*/
267
PHP_GINIT_FUNCTION(pcre)268 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
269 {
270 php_pcre_mutex_alloc();
271
272 /* If we're on the CLI SAPI, there will only be one request, so we don't need the
273 * cache to survive after RSHUTDOWN. */
274 pcre_globals->per_request_cache = strcmp(sapi_module.name, "cli") == 0;
275 if (!pcre_globals->per_request_cache) {
276 zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
277 }
278
279 pcre_globals->backtrack_limit = 0;
280 pcre_globals->recursion_limit = 0;
281 pcre_globals->error_code = PHP_PCRE_NO_ERROR;
282 ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
283 ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
284 #ifdef HAVE_PCRE_JIT_SUPPORT
285 pcre_globals->jit = 1;
286 #endif
287
288 php_pcre_init_pcre2(1);
289 zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
290 }
291 /* }}} */
292
PHP_GSHUTDOWN_FUNCTION(pcre)293 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
294 {
295 if (!pcre_globals->per_request_cache) {
296 zend_hash_destroy(&pcre_globals->pcre_cache);
297 }
298
299 php_pcre_shutdown_pcre2();
300 zend_hash_destroy(&char_tables);
301 php_pcre_mutex_free();
302 }
303 /* }}} */
304
PHP_INI_MH(OnUpdateBacktrackLimit)305 static PHP_INI_MH(OnUpdateBacktrackLimit)
306 {/*{{{*/
307 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
308 if (mctx) {
309 pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
310 }
311
312 return SUCCESS;
313 }/*}}}*/
314
PHP_INI_MH(OnUpdateRecursionLimit)315 static PHP_INI_MH(OnUpdateRecursionLimit)
316 {/*{{{*/
317 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
318 if (mctx) {
319 pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
320 }
321
322 return SUCCESS;
323 }/*}}}*/
324
325 #ifdef HAVE_PCRE_JIT_SUPPORT
PHP_INI_MH(OnUpdateJit)326 static PHP_INI_MH(OnUpdateJit)
327 {/*{{{*/
328 OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
329 if (PCRE_G(jit) && jit_stack) {
330 pcre2_jit_stack_assign(mctx, NULL, jit_stack);
331 } else {
332 pcre2_jit_stack_assign(mctx, NULL, NULL);
333 }
334
335 return SUCCESS;
336 }/*}}}*/
337 #endif
338
339 PHP_INI_BEGIN()
340 STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
341 STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
342 #ifdef HAVE_PCRE_JIT_SUPPORT
343 STD_PHP_INI_ENTRY("pcre.jit", "1", PHP_INI_ALL, OnUpdateJit, jit, zend_pcre_globals, pcre_globals)
344 #endif
PHP_INI_END()345 PHP_INI_END()
346
347 static char *_pcre2_config_str(uint32_t what)
348 {/*{{{*/
349 int len = pcre2_config(what, NULL);
350 char *ret = (char *) malloc(len + 1);
351
352 len = pcre2_config(what, ret);
353 if (!len) {
354 free(ret);
355 return NULL;
356 }
357
358 return ret;
359 }/*}}}*/
360
361 /* {{{ PHP_MINFO_FUNCTION(pcre) */
PHP_MINFO_FUNCTION(pcre)362 static PHP_MINFO_FUNCTION(pcre)
363 {
364 #ifdef HAVE_PCRE_JIT_SUPPORT
365 uint32_t flag = 0;
366 char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
367 #endif
368 char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
369 char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
370
371 php_info_print_table_start();
372 php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
373 php_info_print_table_row(2, "PCRE Library Version", version);
374 free(version);
375 php_info_print_table_row(2, "PCRE Unicode Version", unicode);
376 free(unicode);
377
378 #ifdef HAVE_PCRE_JIT_SUPPORT
379 if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
380 php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
381 } else {
382 php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
383 }
384 if (jit_target) {
385 php_info_print_table_row(2, "PCRE JIT Target", jit_target);
386 }
387 free(jit_target);
388 #else
389 php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
390 #endif
391
392 #ifdef HAVE_PCRE_VALGRIND_SUPPORT
393 php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
394 #endif
395
396 php_info_print_table_end();
397
398 DISPLAY_INI_ENTRIES();
399 }
400 /* }}} */
401
402 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTION(pcre)403 static PHP_MINIT_FUNCTION(pcre)
404 {
405 char *version;
406
407 #ifdef HAVE_PCRE_JIT_SUPPORT
408 if (UNEXPECTED(!pcre2_init_ok)) {
409 /* Retry. */
410 php_pcre_init_pcre2(PCRE_G(jit));
411 if (!pcre2_init_ok) {
412 return FAILURE;
413 }
414 }
415 #endif
416
417 REGISTER_INI_ENTRIES();
418
419 REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
420 REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
421 REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
422 REGISTER_LONG_CONSTANT("PREG_UNMATCHED_AS_NULL", PREG_UNMATCHED_AS_NULL, CONST_CS | CONST_PERSISTENT);
423 REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
424 REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
425 REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
426 REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
427
428 REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
429 REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
430 REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
431 REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
432 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
433 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
434 REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
435 version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
436 REGISTER_STRING_CONSTANT("PCRE_VERSION", version, CONST_CS | CONST_PERSISTENT);
437 free(version);
438 REGISTER_LONG_CONSTANT("PCRE_VERSION_MAJOR", PCRE2_MAJOR, CONST_CS | CONST_PERSISTENT);
439 REGISTER_LONG_CONSTANT("PCRE_VERSION_MINOR", PCRE2_MINOR, CONST_CS | CONST_PERSISTENT);
440
441 #ifdef HAVE_PCRE_JIT_SUPPORT
442 REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 1, CONST_CS | CONST_PERSISTENT);
443 #else
444 REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 0, CONST_CS | CONST_PERSISTENT);
445 #endif
446
447 return SUCCESS;
448 }
449 /* }}} */
450
451 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTION(pcre)452 static PHP_MSHUTDOWN_FUNCTION(pcre)
453 {
454 UNREGISTER_INI_ENTRIES();
455
456 return SUCCESS;
457 }
458 /* }}} */
459
460 /* {{{ PHP_RINIT_FUNCTION(pcre) */
PHP_RINIT_FUNCTION(pcre)461 static PHP_RINIT_FUNCTION(pcre)
462 {
463 #ifdef HAVE_PCRE_JIT_SUPPORT
464 if (UNEXPECTED(!pcre2_init_ok)) {
465 /* Retry. */
466 php_pcre_mutex_lock();
467 php_pcre_init_pcre2(PCRE_G(jit));
468 if (!pcre2_init_ok) {
469 php_pcre_mutex_unlock();
470 return FAILURE;
471 }
472 php_pcre_mutex_unlock();
473 }
474
475 mdata_used = 0;
476 #endif
477
478 if (PCRE_G(per_request_cache)) {
479 zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, php_efree_pcre_cache, 0);
480 }
481
482 return SUCCESS;
483 }
484 /* }}} */
485
PHP_RSHUTDOWN_FUNCTION(pcre)486 static PHP_RSHUTDOWN_FUNCTION(pcre)
487 {
488 if (PCRE_G(per_request_cache)) {
489 zend_hash_destroy(&PCRE_G(pcre_cache));
490 }
491
492 zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
493 zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
494 ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
495 ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
496 return SUCCESS;
497 }
498
499 /* {{{ static pcre_clean_cache */
pcre_clean_cache(zval * data,void * arg)500 static int pcre_clean_cache(zval *data, void *arg)
501 {
502 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
503 int *num_clean = (int *)arg;
504
505 if (*num_clean > 0 && !pce->refcount) {
506 (*num_clean)--;
507 return ZEND_HASH_APPLY_REMOVE;
508 } else {
509 return ZEND_HASH_APPLY_KEEP;
510 }
511 }
512 /* }}} */
513
free_subpats_table(zend_string ** subpat_names,uint32_t num_subpats)514 static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
515 uint32_t i;
516 for (i = 0; i < num_subpats; i++) {
517 if (subpat_names[i]) {
518 zend_string_release(subpat_names[i]);
519 }
520 }
521 efree(subpat_names);
522 }
523
524 /* {{{ static make_subpats_table */
make_subpats_table(uint32_t num_subpats,pcre_cache_entry * pce)525 static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
526 {
527 uint32_t name_cnt = pce->name_count, name_size, ni = 0;
528 char *name_table;
529 zend_string **subpat_names;
530 int rc1, rc2;
531
532 rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
533 rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
534 if (rc1 < 0 || rc2 < 0) {
535 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
536 return NULL;
537 }
538
539 subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
540 while (ni++ < name_cnt) {
541 unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
542 const char *name = name_table + 2;
543 subpat_names[name_idx] = zend_string_init(name, strlen(name), 0);
544 if (is_numeric_string(ZSTR_VAL(subpat_names[name_idx]), ZSTR_LEN(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
545 php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
546 free_subpats_table(subpat_names, num_subpats);
547 return NULL;
548 }
549 name_table += name_size;
550 }
551 return subpat_names;
552 }
553 /* }}} */
554
555 /* {{{ static calculate_unit_length */
556 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
calculate_unit_length(pcre_cache_entry * pce,char * start)557 static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, char *start)
558 {
559 size_t unit_len;
560
561 if (pce->compile_options & PCRE2_UTF) {
562 char *end = start;
563
564 /* skip continuation bytes */
565 while ((*++end & 0xC0) == 0x80);
566 unit_len = end - start;
567 } else {
568 unit_len = 1;
569 }
570 return unit_len;
571 }
572 /* }}} */
573
574 /* {{{ pcre_get_compiled_regex_cache
575 */
pcre_get_compiled_regex_cache_ex(zend_string * regex,int locale_aware)576 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware)
577 {
578 pcre2_code *re = NULL;
579 #if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR
580 uint32_t coptions = PCRE2_NO_START_OPTIMIZE;
581 #else
582 uint32_t coptions = 0;
583 #endif
584 uint32_t extra_coptions = PHP_PCRE_DEFAULT_EXTRA_COPTIONS;
585 PCRE2_UCHAR error[128];
586 PCRE2_SIZE erroffset;
587 int errnumber;
588 char delimiter;
589 char start_delimiter;
590 char end_delimiter;
591 char *p, *pp;
592 char *pattern;
593 size_t pattern_len;
594 uint32_t poptions = 0;
595 const uint8_t *tables = NULL;
596 zval *zv;
597 pcre_cache_entry new_entry;
598 int rc;
599 zend_string *key;
600 pcre_cache_entry *ret;
601
602 if (locale_aware && BG(locale_string) &&
603 (ZSTR_LEN(BG(locale_string)) != 1 && ZSTR_VAL(BG(locale_string))[0] != 'C')) {
604 key = zend_string_alloc(ZSTR_LEN(regex) + ZSTR_LEN(BG(locale_string)) + 1, 0);
605 memcpy(ZSTR_VAL(key), ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)) + 1);
606 memcpy(ZSTR_VAL(key) + ZSTR_LEN(BG(locale_string)), ZSTR_VAL(regex), ZSTR_LEN(regex) + 1);
607 } else {
608 key = regex;
609 }
610
611 /* Try to lookup the cached regex entry, and if successful, just pass
612 back the compiled pattern, otherwise go on and compile it. */
613 zv = zend_hash_find(&PCRE_G(pcre_cache), key);
614 if (zv) {
615 if (key != regex) {
616 zend_string_release_ex(key, 0);
617 }
618 return (pcre_cache_entry*)Z_PTR_P(zv);
619 }
620
621 p = ZSTR_VAL(regex);
622
623 /* Parse through the leading whitespace, and display a warning if we
624 get to the end without encountering a delimiter. */
625 while (isspace((int)*(unsigned char *)p)) p++;
626 if (*p == 0) {
627 if (key != regex) {
628 zend_string_release_ex(key, 0);
629 }
630 php_error_docref(NULL, E_WARNING,
631 p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
632 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
633 return NULL;
634 }
635
636 /* Get the delimiter and display a warning if it is alphanumeric
637 or a backslash. */
638 delimiter = *p++;
639 if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
640 if (key != regex) {
641 zend_string_release_ex(key, 0);
642 }
643 php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
644 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
645 return NULL;
646 }
647
648 start_delimiter = delimiter;
649 if ((pp = strchr("([{< )]}> )]}>", delimiter)))
650 delimiter = pp[5];
651 end_delimiter = delimiter;
652
653 pp = p;
654
655 if (start_delimiter == end_delimiter) {
656 /* We need to iterate through the pattern, searching for the ending delimiter,
657 but skipping the backslashed delimiters. If the ending delimiter is not
658 found, display a warning. */
659 while (*pp != 0) {
660 if (*pp == '\\' && pp[1] != 0) pp++;
661 else if (*pp == delimiter)
662 break;
663 pp++;
664 }
665 } else {
666 /* We iterate through the pattern, searching for the matching ending
667 * delimiter. For each matching starting delimiter, we increment nesting
668 * level, and decrement it for each matching ending delimiter. If we
669 * reach the end of the pattern without matching, display a warning.
670 */
671 int brackets = 1; /* brackets nesting level */
672 while (*pp != 0) {
673 if (*pp == '\\' && pp[1] != 0) pp++;
674 else if (*pp == end_delimiter && --brackets <= 0)
675 break;
676 else if (*pp == start_delimiter)
677 brackets++;
678 pp++;
679 }
680 }
681
682 if (*pp == 0) {
683 if (key != regex) {
684 zend_string_release_ex(key, 0);
685 }
686 if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
687 php_error_docref(NULL,E_WARNING, "Null byte in regex");
688 } else if (start_delimiter == end_delimiter) {
689 php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
690 } else {
691 php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
692 }
693 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
694 return NULL;
695 }
696
697 /* Make a copy of the actual pattern. */
698 pattern_len = pp - p;
699 pattern = estrndup(p, pattern_len);
700
701 /* Move on to the options */
702 pp++;
703
704 /* Parse through the options, setting appropriate flags. Display
705 a warning if we encounter an unknown modifier. */
706 while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
707 switch (*pp++) {
708 /* Perl compatible options */
709 case 'i': coptions |= PCRE2_CASELESS; break;
710 case 'm': coptions |= PCRE2_MULTILINE; break;
711 case 's': coptions |= PCRE2_DOTALL; break;
712 case 'x': coptions |= PCRE2_EXTENDED; break;
713
714 /* PCRE specific options */
715 case 'A': coptions |= PCRE2_ANCHORED; break;
716 case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
717 case 'S': /* Pass. */ break;
718 case 'U': coptions |= PCRE2_UNGREEDY; break;
719 case 'X': extra_coptions &= ~PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL; break;
720 case 'u': coptions |= PCRE2_UTF;
721 /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
722 characters, even in UTF-8 mode. However, this can be changed by setting
723 the PCRE2_UCP option. */
724 #ifdef PCRE2_UCP
725 coptions |= PCRE2_UCP;
726 #endif
727 break;
728 case 'J': coptions |= PCRE2_DUPNAMES; break;
729
730 /* Custom preg options */
731 case 'e': poptions |= PREG_REPLACE_EVAL; break;
732
733 case ' ':
734 case '\n':
735 case '\r':
736 break;
737
738 default:
739 if (pp[-1]) {
740 php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
741 } else {
742 php_error_docref(NULL,E_WARNING, "Null byte in regex");
743 }
744 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
745 efree(pattern);
746 if (key != regex) {
747 zend_string_release_ex(key, 0);
748 }
749 return NULL;
750 }
751 }
752
753 if (poptions & PREG_REPLACE_EVAL) {
754 php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
755 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
756 efree(pattern);
757 if (key != regex) {
758 zend_string_release_ex(key, 0);
759 }
760 return NULL;
761 }
762
763 if (key != regex) {
764 tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(locale_string));
765 if (!tables) {
766 zend_string *_k;
767 tables = pcre2_maketables(gctx);
768 if (UNEXPECTED(!tables)) {
769 php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
770 pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
771 zend_string_release_ex(key, 0);
772 efree(pattern);
773 return NULL;
774 }
775 _k = zend_string_init(ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)), 1);
776 GC_MAKE_PERSISTENT_LOCAL(_k);
777 zend_hash_add_ptr(&char_tables, _k, (void *)tables);
778 zend_string_release(_k);
779 }
780 }
781 pcre2_set_character_tables(cctx, tables);
782
783 /* Set extra options for the compile context. */
784 if (PHP_PCRE_DEFAULT_EXTRA_COPTIONS != extra_coptions) {
785 pcre2_set_compile_extra_options(cctx, extra_coptions);
786 }
787
788 /* Compile pattern and display a warning if compilation failed. */
789 re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
790
791 /* Reset the compile context extra options to default. */
792 if (PHP_PCRE_DEFAULT_EXTRA_COPTIONS != extra_coptions) {
793 pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
794 }
795
796 if (re == NULL) {
797 if (key != regex) {
798 zend_string_release_ex(key, 0);
799 }
800 pcre2_get_error_message(errnumber, error, sizeof(error));
801 php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
802 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
803 efree(pattern);
804 return NULL;
805 }
806
807 #ifdef HAVE_PCRE_JIT_SUPPORT
808 if (PCRE_G(jit)) {
809 /* Enable PCRE JIT compiler */
810 rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
811 if (EXPECTED(rc >= 0)) {
812 size_t jit_size = 0;
813 if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
814 poptions |= PREG_JIT;
815 }
816 } else if (rc == PCRE2_ERROR_NOMEMORY) {
817 php_error_docref(NULL, E_WARNING,
818 "Allocation of JIT memory failed, PCRE JIT will be disabled. "
819 "This is likely caused by security restrictions. "
820 "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
821 PCRE_G(jit) = 0;
822 } else {
823 pcre2_get_error_message(rc, error, sizeof(error));
824 php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
825 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
826 }
827 }
828 #endif
829 efree(pattern);
830
831 /*
832 * If we reached cache limit, clean out the items from the head of the list;
833 * these are supposedly the oldest ones (but not necessarily the least used
834 * ones).
835 */
836 if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
837 int num_clean = PCRE_CACHE_SIZE / 8;
838 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
839 }
840
841 /* Store the compiled pattern and extra info in the cache. */
842 new_entry.re = re;
843 new_entry.preg_options = poptions;
844 new_entry.compile_options = coptions;
845 new_entry.extra_compile_options = extra_coptions;
846 new_entry.refcount = 0;
847
848 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
849 if (rc < 0) {
850 if (key != regex) {
851 zend_string_release_ex(key, 0);
852 }
853 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
854 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
855 return NULL;
856 }
857
858 rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
859 if (rc < 0) {
860 if (key != regex) {
861 zend_string_release_ex(key, 0);
862 }
863 php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
864 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
865 return NULL;
866 }
867
868 /*
869 * Interned strings are not duplicated when stored in HashTable,
870 * but all the interned strings created during HTTP request are removed
871 * at end of request. However PCRE_G(pcre_cache) must be consistent
872 * on the next request as well. So we disable usage of interned strings
873 * as hash keys especually for this table.
874 * See bug #63180
875 */
876 if (!(GC_FLAGS(key) & IS_STR_PERMANENT) && !PCRE_G(per_request_cache)) {
877 zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
878 GC_MAKE_PERSISTENT_LOCAL(str);
879
880 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
881 zend_string_release(str);
882 } else {
883 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
884 }
885
886 if (key != regex) {
887 zend_string_release_ex(key, 0);
888 }
889
890 return ret;
891 }
892 /* }}} */
893
894 /* {{{ pcre_get_compiled_regex_cache
895 */
pcre_get_compiled_regex_cache(zend_string * regex)896 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
897 {
898 return pcre_get_compiled_regex_cache_ex(regex, 1);
899 }
900 /* }}} */
901
902 /* {{{ pcre_get_compiled_regex
903 */
pcre_get_compiled_regex(zend_string * regex,uint32_t * capture_count)904 PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
905 {
906 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
907
908 if (capture_count) {
909 *capture_count = pce ? pce->capture_count : 0;
910 }
911
912 return pce ? pce->re : NULL;
913 }
914 /* }}} */
915
916 /* {{{ pcre_get_compiled_regex_ex
917 */
pcre_get_compiled_regex_ex(zend_string * regex,uint32_t * capture_count,uint32_t * preg_options,uint32_t * compile_options)918 PHPAPI pcre2_code* pcre_get_compiled_regex_ex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options, uint32_t *compile_options)
919 {
920 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
921
922 if (preg_options) {
923 *preg_options = pce ? pce->preg_options : 0;
924 }
925 if (compile_options) {
926 *compile_options = pce ? pce->compile_options : 0;
927 }
928 if (capture_count) {
929 *capture_count = pce ? pce->capture_count : 0;
930 }
931
932 return pce ? pce->re : NULL;
933 }
934 /* }}} */
935
936 /* XXX For the cases where it's only about match yes/no and no capture
937 required, perhaps just a minimum sized data would suffice. */
php_pcre_create_match_data(uint32_t capture_count,pcre2_code * re)938 PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
939 {/*{{{*/
940
941 assert(NULL != re);
942
943 if (EXPECTED(!mdata_used)) {
944 int rc = 0;
945
946 if (!capture_count) {
947 /* As we deal with a non cached pattern, no other way to gather this info. */
948 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
949 }
950
951 if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
952 mdata_used = 1;
953 return mdata;
954 }
955 }
956
957 return pcre2_match_data_create_from_pattern(re, gctx);
958 }/*}}}*/
959
php_pcre_free_match_data(pcre2_match_data * match_data)960 PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
961 {/*{{{*/
962 if (UNEXPECTED(match_data != mdata)) {
963 pcre2_match_data_free(match_data);
964 } else {
965 mdata_used = 0;
966 }
967 }/*}}}*/
968
init_unmatched_null_pair()969 static void init_unmatched_null_pair() {
970 zval val1, val2;
971 ZVAL_NULL(&val1);
972 ZVAL_LONG(&val2, -1);
973 ZVAL_ARR(&PCRE_G(unmatched_null_pair), zend_new_pair(&val1, &val2));
974 }
975
init_unmatched_empty_pair()976 static void init_unmatched_empty_pair() {
977 zval val1, val2;
978 ZVAL_EMPTY_STRING(&val1);
979 ZVAL_LONG(&val2, -1);
980 ZVAL_ARR(&PCRE_G(unmatched_empty_pair), zend_new_pair(&val1, &val2));
981 }
982
populate_match_value_str(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset)983 static zend_always_inline void populate_match_value_str(
984 zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
985 if (start_offset == end_offset) {
986 ZVAL_EMPTY_STRING(val);
987 } else if (start_offset + 1 == end_offset) {
988 ZVAL_INTERNED_STR(val, ZSTR_CHAR((unsigned char) subject[start_offset]));
989 } else {
990 ZVAL_STRINGL(val, subject + start_offset, end_offset - start_offset);
991 }
992 }
993
populate_match_value(zval * val,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,uint32_t unmatched_as_null)994 static inline void populate_match_value(
995 zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
996 uint32_t unmatched_as_null) {
997 if (PCRE2_UNSET == start_offset) {
998 if (unmatched_as_null) {
999 ZVAL_NULL(val);
1000 } else {
1001 ZVAL_EMPTY_STRING(val);
1002 }
1003 } else {
1004 populate_match_value_str(val, subject, start_offset, end_offset);
1005 }
1006 }
1007
add_named(zval * subpats,zend_string * name,zval * val,zend_bool unmatched)1008 static inline void add_named(
1009 zval *subpats, zend_string *name, zval *val, zend_bool unmatched) {
1010 /* If the DUPNAMES option is used, multiple subpatterns might have the same name.
1011 * In this case we want to preserve the one that actually has a value. */
1012 if (!unmatched) {
1013 zend_hash_update(Z_ARRVAL_P(subpats), name, val);
1014 } else {
1015 if (!zend_hash_add(Z_ARRVAL_P(subpats), name, val)) {
1016 return;
1017 }
1018 }
1019 Z_TRY_ADDREF_P(val);
1020 }
1021
1022 /* {{{ add_offset_pair */
add_offset_pair(zval * result,const char * subject,PCRE2_SIZE start_offset,PCRE2_SIZE end_offset,zend_string * name,uint32_t unmatched_as_null)1023 static inline void add_offset_pair(
1024 zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
1025 zend_string *name, uint32_t unmatched_as_null)
1026 {
1027 zval match_pair;
1028
1029 /* Add (match, offset) to the return value */
1030 if (PCRE2_UNSET == start_offset) {
1031 if (unmatched_as_null) {
1032 if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
1033 init_unmatched_null_pair();
1034 }
1035 ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
1036 } else {
1037 if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
1038 init_unmatched_empty_pair();
1039 }
1040 ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
1041 }
1042 } else {
1043 zval val1, val2;
1044 populate_match_value_str(&val1, subject, start_offset, end_offset);
1045 ZVAL_LONG(&val2, start_offset);
1046 ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1047 }
1048
1049 if (name) {
1050 add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1051 }
1052 zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
1053 }
1054 /* }}} */
1055
populate_subpat_array(zval * subpats,char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1056 static void populate_subpat_array(
1057 zval *subpats, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1058 uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1059 zend_bool offset_capture = (flags & PREG_OFFSET_CAPTURE) != 0;
1060 zend_bool unmatched_as_null = (flags & PREG_UNMATCHED_AS_NULL) != 0;
1061 zval val;
1062 int i;
1063 if (subpat_names) {
1064 if (offset_capture) {
1065 for (i = 0; i < count; i++) {
1066 add_offset_pair(
1067 subpats, subject, offsets[2*i], offsets[2*i+1],
1068 subpat_names[i], unmatched_as_null);
1069 }
1070 if (unmatched_as_null) {
1071 for (i = count; i < num_subpats; i++) {
1072 add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1073 }
1074 }
1075 } else {
1076 for (i = 0; i < count; i++) {
1077 populate_match_value(
1078 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1079 if (subpat_names[i]) {
1080 add_named(subpats, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1081 }
1082 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1083 }
1084 if (unmatched_as_null) {
1085 for (i = count; i < num_subpats; i++) {
1086 ZVAL_NULL(&val);
1087 if (subpat_names[i]) {
1088 zend_hash_add(Z_ARRVAL_P(subpats), subpat_names[i], &val);
1089 }
1090 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1091 }
1092 }
1093 }
1094 } else {
1095 if (offset_capture) {
1096 for (i = 0; i < count; i++) {
1097 add_offset_pair(
1098 subpats, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1099 }
1100 if (unmatched_as_null) {
1101 for (i = count; i < num_subpats; i++) {
1102 add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1103 }
1104 }
1105 } else {
1106 for (i = 0; i < count; i++) {
1107 populate_match_value(
1108 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1109 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1110 }
1111 if (unmatched_as_null) {
1112 for (i = count; i < num_subpats; i++) {
1113 add_next_index_null(subpats);
1114 }
1115 }
1116 }
1117 }
1118 /* Add MARK, if available */
1119 if (mark) {
1120 add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
1121 }
1122 }
1123
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS,int global)1124 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
1125 {
1126 /* parameters */
1127 zend_string *regex; /* Regular expression */
1128 zend_string *subject; /* String to match against */
1129 pcre_cache_entry *pce; /* Compiled regular expression */
1130 zval *subpats = NULL; /* Array for subpatterns */
1131 zend_long flags = 0; /* Match control flags */
1132 zend_long start_offset = 0; /* Where the new search starts */
1133
1134 ZEND_PARSE_PARAMETERS_START(2, 5)
1135 Z_PARAM_STR(regex)
1136 Z_PARAM_STR(subject)
1137 Z_PARAM_OPTIONAL
1138 Z_PARAM_ZVAL(subpats)
1139 Z_PARAM_LONG(flags)
1140 Z_PARAM_LONG(start_offset)
1141 ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
1142
1143 /* Compile regex or get it from cache. */
1144 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1145 RETURN_FALSE;
1146 }
1147
1148 pce->refcount++;
1149 php_pcre_match_impl(pce, subject, return_value, subpats,
1150 global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
1151 pce->refcount--;
1152 }
1153 /* }}} */
1154
is_known_valid_utf8(zend_string * subject_str,PCRE2_SIZE start_offset)1155 static zend_always_inline zend_bool is_known_valid_utf8(
1156 zend_string *subject_str, PCRE2_SIZE start_offset) {
1157 if (!(GC_FLAGS(subject_str) & IS_STR_VALID_UTF8)) {
1158 /* We don't know whether the string is valid UTF-8 or not. */
1159 return 0;
1160 }
1161
1162 if (start_offset == ZSTR_LEN(subject_str)) {
1163 /* Degenerate case: Offset points to end of string. */
1164 return 1;
1165 }
1166
1167 /* Check that the offset does not point to an UTF-8 continuation byte. */
1168 return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1169 }
1170
1171 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zval * subpats,int global,int use_flags,zend_long flags,zend_off_t start_offset)1172 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1173 zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset)
1174 {
1175 zval result_set, /* Holds a set of subpatterns after
1176 a global match */
1177 *match_sets = NULL; /* An array of sets of matches for each
1178 subpattern after a global match */
1179 uint32_t options; /* Execution options */
1180 int count; /* Count of matched subpatterns */
1181 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1182 uint32_t num_subpats; /* Number of captured subpatterns */
1183 int matched; /* Has anything matched */
1184 zend_string **subpat_names; /* Array for named subpatterns */
1185 size_t i;
1186 uint32_t subpats_order; /* Order of subpattern matches */
1187 uint32_t offset_capture; /* Capture match offsets: yes/no */
1188 uint32_t unmatched_as_null; /* Null non-matches: yes/no */
1189 PCRE2_SPTR mark = NULL; /* Target for MARK name */
1190 zval marks; /* Array of marks for PREG_PATTERN_ORDER */
1191 pcre2_match_data *match_data;
1192 PCRE2_SIZE start_offset2, orig_start_offset;
1193
1194 char *subject = ZSTR_VAL(subject_str);
1195 size_t subject_len = ZSTR_LEN(subject_str);
1196
1197 ZVAL_UNDEF(&marks);
1198
1199 /* Overwrite the passed-in value for subpatterns with an empty array. */
1200 if (subpats != NULL) {
1201 subpats = zend_try_array_init(subpats);
1202 if (!subpats) {
1203 return;
1204 }
1205 }
1206
1207 subpats_order = global ? PREG_PATTERN_ORDER : 0;
1208
1209 if (use_flags) {
1210 offset_capture = flags & PREG_OFFSET_CAPTURE;
1211 unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1212
1213 /*
1214 * subpats_order is pre-set to pattern mode so we change it only if
1215 * necessary.
1216 */
1217 if (flags & 0xff) {
1218 subpats_order = flags & 0xff;
1219 }
1220 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1221 (!global && subpats_order != 0)) {
1222 php_error_docref(NULL, E_WARNING, "Invalid flags specified");
1223 return;
1224 }
1225 } else {
1226 offset_capture = 0;
1227 unmatched_as_null = 0;
1228 }
1229
1230 /* Negative offset counts from the end of the string. */
1231 if (start_offset < 0) {
1232 if ((PCRE2_SIZE)-start_offset <= subject_len) {
1233 start_offset2 = subject_len + start_offset;
1234 } else {
1235 start_offset2 = 0;
1236 }
1237 } else {
1238 start_offset2 = (PCRE2_SIZE)start_offset;
1239 }
1240
1241 if (start_offset2 > subject_len) {
1242 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1243 RETURN_FALSE;
1244 }
1245
1246 /* Calculate the size of the offsets array, and allocate memory for it. */
1247 num_subpats = pce->capture_count + 1;
1248
1249 /*
1250 * Build a mapping from subpattern numbers to their names. We will
1251 * allocate the table only if there are any named subpatterns.
1252 */
1253 subpat_names = NULL;
1254 if (subpats && pce->name_count > 0) {
1255 subpat_names = make_subpats_table(num_subpats, pce);
1256 if (!subpat_names) {
1257 RETURN_FALSE;
1258 }
1259 }
1260
1261 /* Allocate match sets array and initialize the values. */
1262 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1263 match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
1264 for (i=0; i<num_subpats; i++) {
1265 array_init(&match_sets[i]);
1266 }
1267 }
1268
1269 matched = 0;
1270 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1271
1272 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1273 match_data = mdata;
1274 } else {
1275 match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1276 if (!match_data) {
1277 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1278 if (subpat_names) {
1279 free_subpats_table(subpat_names, num_subpats);
1280 }
1281 if (match_sets) {
1282 efree(match_sets);
1283 }
1284 RETURN_FALSE;
1285 }
1286 }
1287
1288 orig_start_offset = start_offset2;
1289 options =
1290 (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1291 ? 0 : PCRE2_NO_UTF_CHECK;
1292
1293 /* Execute the regular expression. */
1294 #ifdef HAVE_PCRE_JIT_SUPPORT
1295 if ((pce->preg_options & PREG_JIT) && options) {
1296 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1297 PCRE2_NO_UTF_CHECK, match_data, mctx);
1298 } else
1299 #endif
1300 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1301 options, match_data, mctx);
1302
1303 while (1) {
1304 /* If something has matched */
1305 if (count >= 0) {
1306 /* Check for too many substrings condition. */
1307 if (UNEXPECTED(count == 0)) {
1308 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1309 count = num_subpats;
1310 }
1311
1312 matched:
1313 matched++;
1314
1315 offsets = pcre2_get_ovector_pointer(match_data);
1316
1317 /* If subpatterns array has been passed, fill it in with values. */
1318 if (subpats != NULL) {
1319 /* Try to get the list of substrings and display a warning if failed. */
1320 if (offsets[1] < offsets[0]) {
1321 if (subpat_names) {
1322 free_subpats_table(subpat_names, num_subpats);
1323 }
1324 if (match_sets) efree(match_sets);
1325 php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1326 RETURN_FALSE;
1327 }
1328
1329 if (global) { /* global pattern matching */
1330 if (subpats && subpats_order == PREG_PATTERN_ORDER) {
1331 /* For each subpattern, insert it into the appropriate array. */
1332 if (offset_capture) {
1333 for (i = 0; i < count; i++) {
1334 add_offset_pair(
1335 &match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1336 NULL, unmatched_as_null);
1337 }
1338 } else {
1339 for (i = 0; i < count; i++) {
1340 zval val;
1341 populate_match_value(
1342 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1343 zend_hash_next_index_insert_new(Z_ARRVAL(match_sets[i]), &val);
1344 }
1345 }
1346 mark = pcre2_get_mark(match_data);
1347 /* Add MARK, if available */
1348 if (mark) {
1349 if (Z_TYPE(marks) == IS_UNDEF) {
1350 array_init(&marks);
1351 }
1352 add_index_string(&marks, matched - 1, (char *) mark);
1353 }
1354 /*
1355 * If the number of captured subpatterns on this run is
1356 * less than the total possible number, pad the result
1357 * arrays with NULLs or empty strings.
1358 */
1359 if (count < num_subpats) {
1360 for (; i < num_subpats; i++) {
1361 if (offset_capture) {
1362 add_offset_pair(
1363 &match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1364 NULL, unmatched_as_null);
1365 } else if (unmatched_as_null) {
1366 add_next_index_null(&match_sets[i]);
1367 } else {
1368 add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
1369 }
1370 }
1371 }
1372 } else {
1373 /* Allocate and populate the result set array */
1374 array_init_size(&result_set, count + (mark ? 1 : 0));
1375 mark = pcre2_get_mark(match_data);
1376 populate_subpat_array(
1377 &result_set, subject, offsets, subpat_names,
1378 num_subpats, count, mark, flags);
1379 /* And add it to the output array */
1380 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
1381 }
1382 } else { /* single pattern matching */
1383 /* For each subpattern, insert it into the subpatterns array. */
1384 mark = pcre2_get_mark(match_data);
1385 populate_subpat_array(
1386 subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1387 break;
1388 }
1389 }
1390
1391 /* Advance to the next piece. */
1392 start_offset2 = offsets[1];
1393
1394 /* If we have matched an empty string, mimic what Perl's /g options does.
1395 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1396 the match again at the same point. If this fails (picked up above) we
1397 advance to the next character. */
1398 if (start_offset2 == offsets[0]) {
1399 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1400 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1401 if (count >= 0) {
1402 if (global) {
1403 goto matched;
1404 } else {
1405 break;
1406 }
1407 } else if (count == PCRE2_ERROR_NOMATCH) {
1408 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1409 this is not necessarily the end. We need to advance
1410 the start offset, and continue. Fudge the offset values
1411 to achieve this, unless we're already at the end of the string. */
1412 if (start_offset2 < subject_len) {
1413 size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1414
1415 start_offset2 += unit_len;
1416 } else {
1417 break;
1418 }
1419 } else {
1420 goto error;
1421 }
1422 }
1423 } else if (count == PCRE2_ERROR_NOMATCH) {
1424 break;
1425 } else {
1426 error:
1427 pcre_handle_exec_error(count);
1428 break;
1429 }
1430
1431 if (!global) {
1432 break;
1433 }
1434
1435 /* Execute the regular expression. */
1436 #ifdef HAVE_PCRE_JIT_SUPPORT
1437 if ((pce->preg_options & PREG_JIT)) {
1438 if (PCRE2_UNSET == start_offset2 || start_offset2 > subject_len) {
1439 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1440 break;
1441 }
1442 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1443 PCRE2_NO_UTF_CHECK, match_data, mctx);
1444 } else
1445 #endif
1446 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1447 PCRE2_NO_UTF_CHECK, match_data, mctx);
1448 }
1449 if (match_data != mdata) {
1450 pcre2_match_data_free(match_data);
1451 }
1452
1453 /* Add the match sets to the output array and clean up */
1454 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1455 if (subpat_names) {
1456 for (i = 0; i < num_subpats; i++) {
1457 if (subpat_names[i]) {
1458 zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &match_sets[i]);
1459 Z_ADDREF(match_sets[i]);
1460 }
1461 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1462 }
1463 } else {
1464 for (i = 0; i < num_subpats; i++) {
1465 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1466 }
1467 }
1468 efree(match_sets);
1469
1470 if (Z_TYPE(marks) != IS_UNDEF) {
1471 add_assoc_zval(subpats, "MARK", &marks);
1472 }
1473 }
1474
1475 if (subpat_names) {
1476 free_subpats_table(subpat_names, num_subpats);
1477 }
1478
1479 if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1480 /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1481 if ((pce->compile_options & PCRE2_UTF)
1482 && !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1483 GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1484 }
1485
1486 RETVAL_LONG(matched);
1487 } else {
1488 RETVAL_FALSE;
1489 }
1490 }
1491 /* }}} */
1492
1493 /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1494 Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)1495 static PHP_FUNCTION(preg_match)
1496 {
1497 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1498 }
1499 /* }}} */
1500
1501 /* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1502 Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)1503 static PHP_FUNCTION(preg_match_all)
1504 {
1505 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1506 }
1507 /* }}} */
1508
1509 /* {{{ preg_get_backref
1510 */
preg_get_backref(char ** str,int * backref)1511 static int preg_get_backref(char **str, int *backref)
1512 {
1513 register char in_brace = 0;
1514 register char *walk = *str;
1515
1516 if (walk[1] == 0)
1517 return 0;
1518
1519 if (*walk == '$' && walk[1] == '{') {
1520 in_brace = 1;
1521 walk++;
1522 }
1523 walk++;
1524
1525 if (*walk >= '0' && *walk <= '9') {
1526 *backref = *walk - '0';
1527 walk++;
1528 } else
1529 return 0;
1530
1531 if (*walk && *walk >= '0' && *walk <= '9') {
1532 *backref = *backref * 10 + *walk - '0';
1533 walk++;
1534 }
1535
1536 if (in_brace) {
1537 if (*walk != '}')
1538 return 0;
1539 else
1540 walk++;
1541 }
1542
1543 *str = walk;
1544 return 1;
1545 }
1546 /* }}} */
1547
1548 /* {{{ preg_do_repl_func
1549 */
preg_do_repl_func(zend_fcall_info * fci,zend_fcall_info_cache * fcc,char * subject,PCRE2_SIZE * offsets,zend_string ** subpat_names,uint32_t num_subpats,int count,const PCRE2_SPTR mark,zend_long flags)1550 static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1551 {
1552 zend_string *result_str;
1553 zval retval; /* Function return value */
1554 zval arg; /* Argument to pass to function */
1555
1556 array_init_size(&arg, count + (mark ? 1 : 0));
1557 populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1558
1559 fci->retval = &retval;
1560 fci->param_count = 1;
1561 fci->params = &arg;
1562 fci->no_separation = 0;
1563
1564 if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1565 if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1566 result_str = Z_STR(retval);
1567 } else {
1568 result_str = zval_get_string_func(&retval);
1569 zval_ptr_dtor(&retval);
1570 }
1571 } else {
1572 if (!EG(exception)) {
1573 php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1574 }
1575
1576 result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1577 }
1578
1579 zval_ptr_dtor(&arg);
1580
1581 return result_str;
1582 }
1583 /* }}} */
1584
1585 /* {{{ php_pcre_replace
1586 */
php_pcre_replace(zend_string * regex,zend_string * subject_str,char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1587 PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1588 zend_string *subject_str,
1589 char *subject, size_t subject_len,
1590 zend_string *replace_str,
1591 size_t limit, size_t *replace_count)
1592 {
1593 pcre_cache_entry *pce; /* Compiled regular expression */
1594 zend_string *result; /* Function result */
1595
1596 /* Abort on pending exception, e.g. thrown from __toString(). */
1597 if (UNEXPECTED(EG(exception))) {
1598 return NULL;
1599 }
1600
1601 /* Compile regex or get it from cache. */
1602 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1603 return NULL;
1604 }
1605 pce->refcount++;
1606 result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1607 limit, replace_count);
1608 pce->refcount--;
1609
1610 return result;
1611 }
1612 /* }}} */
1613
1614 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry * pce,zend_string * subject_str,char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1615 PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1616 {
1617 uint32_t options; /* Execution options */
1618 int count; /* Count of matched subpatterns */
1619 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1620 uint32_t num_subpats; /* Number of captured subpatterns */
1621 size_t new_len; /* Length of needed storage */
1622 size_t alloc_len; /* Actual allocated length */
1623 size_t match_len; /* Length of the current match */
1624 int backref; /* Backreference number */
1625 PCRE2_SIZE start_offset; /* Where the new search starts */
1626 size_t last_end_offset; /* Where the last search ended */
1627 char *walkbuf, /* Location of current replacement in the result */
1628 *walk, /* Used to walk the replacement string */
1629 *match, /* The current match */
1630 *piece, /* The current piece of subject */
1631 *replace_end, /* End of replacement string */
1632 walk_last; /* Last walked character */
1633 size_t result_len; /* Length of result */
1634 zend_string *result; /* Result of replacement */
1635 pcre2_match_data *match_data;
1636
1637 /* Calculate the size of the offsets array, and allocate memory for it. */
1638 num_subpats = pce->capture_count + 1;
1639 alloc_len = 0;
1640 result = NULL;
1641
1642 /* Initialize */
1643 match = NULL;
1644 start_offset = 0;
1645 last_end_offset = 0;
1646 result_len = 0;
1647 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1648
1649 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1650 match_data = mdata;
1651 } else {
1652 match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1653 if (!match_data) {
1654 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1655 return NULL;
1656 }
1657 }
1658
1659 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1660
1661 /* Execute the regular expression. */
1662 #ifdef HAVE_PCRE_JIT_SUPPORT
1663 if ((pce->preg_options & PREG_JIT) && options) {
1664 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1665 PCRE2_NO_UTF_CHECK, match_data, mctx);
1666 } else
1667 #endif
1668 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1669 options, match_data, mctx);
1670
1671 while (1) {
1672 piece = subject + last_end_offset;
1673
1674 if (count >= 0 && limit > 0) {
1675 zend_bool simple_string;
1676
1677 /* Check for too many substrings condition. */
1678 if (UNEXPECTED(count == 0)) {
1679 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1680 count = num_subpats;
1681 }
1682
1683 matched:
1684 offsets = pcre2_get_ovector_pointer(match_data);
1685
1686 if (UNEXPECTED(offsets[1] < offsets[0])) {
1687 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1688 if (result) {
1689 zend_string_release_ex(result, 0);
1690 result = NULL;
1691 }
1692 break;
1693 }
1694
1695 if (replace_count) {
1696 ++*replace_count;
1697 }
1698
1699 /* Set the match location in subject */
1700 match = subject + offsets[0];
1701
1702 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1703
1704 walk = ZSTR_VAL(replace_str);
1705 replace_end = walk + ZSTR_LEN(replace_str);
1706 walk_last = 0;
1707 simple_string = 1;
1708 while (walk < replace_end) {
1709 if ('\\' == *walk || '$' == *walk) {
1710 simple_string = 0;
1711 if (walk_last == '\\') {
1712 walk++;
1713 walk_last = 0;
1714 continue;
1715 }
1716 if (preg_get_backref(&walk, &backref)) {
1717 if (backref < count)
1718 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1719 continue;
1720 }
1721 }
1722 new_len++;
1723 walk++;
1724 walk_last = walk[-1];
1725 }
1726
1727 if (new_len >= alloc_len) {
1728 alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1729 if (result == NULL) {
1730 result = zend_string_alloc(alloc_len, 0);
1731 } else {
1732 result = zend_string_extend(result, alloc_len, 0);
1733 }
1734 }
1735
1736 if (match-piece > 0) {
1737 /* copy the part of the string before the match */
1738 memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1739 result_len += (match-piece);
1740 }
1741
1742 if (simple_string) {
1743 /* copy replacement */
1744 memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1745 result_len += ZSTR_LEN(replace_str);
1746 } else {
1747 /* copy replacement and backrefs */
1748 walkbuf = ZSTR_VAL(result) + result_len;
1749
1750 walk = ZSTR_VAL(replace_str);
1751 walk_last = 0;
1752 while (walk < replace_end) {
1753 if ('\\' == *walk || '$' == *walk) {
1754 if (walk_last == '\\') {
1755 *(walkbuf-1) = *walk++;
1756 walk_last = 0;
1757 continue;
1758 }
1759 if (preg_get_backref(&walk, &backref)) {
1760 if (backref < count) {
1761 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1762 memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1763 walkbuf += match_len;
1764 }
1765 continue;
1766 }
1767 }
1768 *walkbuf++ = *walk++;
1769 walk_last = walk[-1];
1770 }
1771 *walkbuf = '\0';
1772 /* increment the result length by how much we've added to the string */
1773 result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1774 }
1775
1776 limit--;
1777
1778 /* Advance to the next piece. */
1779 start_offset = last_end_offset = offsets[1];
1780
1781 /* If we have matched an empty string, mimic what Perl's /g options does.
1782 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1783 the match again at the same point. If this fails (picked up above) we
1784 advance to the next character. */
1785 if (start_offset == offsets[0]) {
1786 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1787 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1788
1789 piece = subject + start_offset;
1790 if (count >= 0 && limit > 0) {
1791 goto matched;
1792 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1793 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1794 this is not necessarily the end. We need to advance
1795 the start offset, and continue. Fudge the offset values
1796 to achieve this, unless we're already at the end of the string. */
1797 if (start_offset < subject_len) {
1798 size_t unit_len = calculate_unit_length(pce, piece);
1799 start_offset += unit_len;
1800 } else {
1801 goto not_matched;
1802 }
1803 } else {
1804 goto error;
1805 }
1806 }
1807
1808 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1809 not_matched:
1810 if (!result && subject_str) {
1811 result = zend_string_copy(subject_str);
1812 break;
1813 }
1814 /* now we know exactly how long it is */
1815 alloc_len = result_len + subject_len - last_end_offset;
1816 if (NULL != result) {
1817 result = zend_string_realloc(result, alloc_len, 0);
1818 } else {
1819 result = zend_string_alloc(alloc_len, 0);
1820 }
1821 /* stick that last bit of string on our output */
1822 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1823 result_len += subject_len - last_end_offset;
1824 ZSTR_VAL(result)[result_len] = '\0';
1825 ZSTR_LEN(result) = result_len;
1826 break;
1827 } else {
1828 error:
1829 pcre_handle_exec_error(count);
1830 if (result) {
1831 zend_string_release_ex(result, 0);
1832 result = NULL;
1833 }
1834 break;
1835 }
1836
1837 #ifdef HAVE_PCRE_JIT_SUPPORT
1838 if (pce->preg_options & PREG_JIT) {
1839 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1840 PCRE2_NO_UTF_CHECK, match_data, mctx);
1841 } else
1842 #endif
1843 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1844 PCRE2_NO_UTF_CHECK, match_data, mctx);
1845 }
1846 if (match_data != mdata) {
1847 pcre2_match_data_free(match_data);
1848 }
1849
1850 return result;
1851 }
1852 /* }}} */
1853
1854 /* {{{ php_pcre_replace_func_impl() */
php_pcre_replace_func_impl(pcre_cache_entry * pce,zend_string * subject_str,char * subject,size_t subject_len,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)1855 static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)
1856 {
1857 uint32_t options; /* Execution options */
1858 int count; /* Count of matched subpatterns */
1859 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1860 zend_string **subpat_names; /* Array for named subpatterns */
1861 uint32_t num_subpats; /* Number of captured subpatterns */
1862 size_t new_len; /* Length of needed storage */
1863 size_t alloc_len; /* Actual allocated length */
1864 PCRE2_SIZE start_offset; /* Where the new search starts */
1865 size_t last_end_offset; /* Where the last search ended */
1866 char *match, /* The current match */
1867 *piece; /* The current piece of subject */
1868 size_t result_len; /* Length of result */
1869 zend_string *result; /* Result of replacement */
1870 zend_string *eval_result; /* Result of custom function */
1871 pcre2_match_data *match_data;
1872 zend_bool old_mdata_used;
1873
1874 /* Calculate the size of the offsets array, and allocate memory for it. */
1875 num_subpats = pce->capture_count + 1;
1876
1877 /*
1878 * Build a mapping from subpattern numbers to their names. We will
1879 * allocate the table only if there are any named subpatterns.
1880 */
1881 subpat_names = NULL;
1882 if (UNEXPECTED(pce->name_count > 0)) {
1883 subpat_names = make_subpats_table(num_subpats, pce);
1884 if (!subpat_names) {
1885 return NULL;
1886 }
1887 }
1888
1889 alloc_len = 0;
1890 result = NULL;
1891
1892 /* Initialize */
1893 match = NULL;
1894 start_offset = 0;
1895 last_end_offset = 0;
1896 result_len = 0;
1897 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1898
1899 old_mdata_used = mdata_used;
1900 if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1901 mdata_used = 1;
1902 match_data = mdata;
1903 } else {
1904 match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1905 if (!match_data) {
1906 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1907 if (subpat_names) {
1908 free_subpats_table(subpat_names, num_subpats);
1909 }
1910 mdata_used = old_mdata_used;
1911 return NULL;
1912 }
1913 }
1914
1915 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1916
1917 /* Execute the regular expression. */
1918 #ifdef HAVE_PCRE_JIT_SUPPORT
1919 if ((pce->preg_options & PREG_JIT) && options) {
1920 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1921 PCRE2_NO_UTF_CHECK, match_data, mctx);
1922 } else
1923 #endif
1924 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1925 options, match_data, mctx);
1926
1927 while (1) {
1928 piece = subject + last_end_offset;
1929
1930 if (count >= 0 && limit) {
1931 /* Check for too many substrings condition. */
1932 if (UNEXPECTED(count == 0)) {
1933 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1934 count = num_subpats;
1935 }
1936
1937 matched:
1938 offsets = pcre2_get_ovector_pointer(match_data);
1939
1940 if (UNEXPECTED(offsets[1] < offsets[0])) {
1941 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1942 if (result) {
1943 zend_string_release_ex(result, 0);
1944 result = NULL;
1945 }
1946 break;
1947 }
1948
1949 if (replace_count) {
1950 ++*replace_count;
1951 }
1952
1953 /* Set the match location in subject */
1954 match = subject + offsets[0];
1955
1956 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1957
1958 /* Use custom function to get replacement string and its length. */
1959 eval_result = preg_do_repl_func(
1960 fci, fcc, subject, offsets, subpat_names, num_subpats, count,
1961 pcre2_get_mark(match_data), flags);
1962
1963 ZEND_ASSERT(eval_result);
1964 new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD;
1965 if (new_len >= alloc_len) {
1966 alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1967 if (result == NULL) {
1968 result = zend_string_alloc(alloc_len, 0);
1969 } else {
1970 result = zend_string_extend(result, alloc_len, 0);
1971 }
1972 }
1973
1974 if (match-piece > 0) {
1975 /* copy the part of the string before the match */
1976 memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1977 result_len += (match-piece);
1978 }
1979
1980 /* If using custom function, copy result to the buffer and clean up. */
1981 memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1982 result_len += ZSTR_LEN(eval_result);
1983 zend_string_release_ex(eval_result, 0);
1984
1985 limit--;
1986
1987 /* Advance to the next piece. */
1988 start_offset = last_end_offset = offsets[1];
1989
1990 /* If we have matched an empty string, mimic what Perl's /g options does.
1991 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1992 the match again at the same point. If this fails (picked up above) we
1993 advance to the next character. */
1994 if (start_offset == offsets[0]) {
1995 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1996 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1997
1998 piece = subject + start_offset;
1999 if (count >= 0 && limit) {
2000 goto matched;
2001 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2002 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2003 this is not necessarily the end. We need to advance
2004 the start offset, and continue. Fudge the offset values
2005 to achieve this, unless we're already at the end of the string. */
2006 if (start_offset < subject_len) {
2007 size_t unit_len = calculate_unit_length(pce, piece);
2008 start_offset += unit_len;
2009 } else {
2010 goto not_matched;
2011 }
2012 } else {
2013 goto error;
2014 }
2015 }
2016
2017 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2018 not_matched:
2019 if (!result && subject_str) {
2020 result = zend_string_copy(subject_str);
2021 break;
2022 }
2023 /* now we know exactly how long it is */
2024 alloc_len = result_len + subject_len - last_end_offset;
2025 if (NULL != result) {
2026 result = zend_string_realloc(result, alloc_len, 0);
2027 } else {
2028 result = zend_string_alloc(alloc_len, 0);
2029 }
2030 /* stick that last bit of string on our output */
2031 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
2032 result_len += subject_len - last_end_offset;
2033 ZSTR_VAL(result)[result_len] = '\0';
2034 ZSTR_LEN(result) = result_len;
2035 break;
2036 } else {
2037 error:
2038 pcre_handle_exec_error(count);
2039 if (result) {
2040 zend_string_release_ex(result, 0);
2041 result = NULL;
2042 }
2043 break;
2044 }
2045 #ifdef HAVE_PCRE_JIT_SUPPORT
2046 if ((pce->preg_options & PREG_JIT)) {
2047 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2048 PCRE2_NO_UTF_CHECK, match_data, mctx);
2049 } else
2050 #endif
2051 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2052 PCRE2_NO_UTF_CHECK, match_data, mctx);
2053 }
2054 if (match_data != mdata) {
2055 pcre2_match_data_free(match_data);
2056 }
2057 mdata_used = old_mdata_used;
2058
2059 if (UNEXPECTED(subpat_names)) {
2060 free_subpats_table(subpat_names, num_subpats);
2061 }
2062
2063 return result;
2064 }
2065 /* }}} */
2066
2067 /* {{{ php_pcre_replace_func
2068 */
php_pcre_replace_func(zend_string * regex,zend_string * subject_str,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count,zend_long flags)2069 static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2070 zend_string *subject_str,
2071 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2072 size_t limit, size_t *replace_count, zend_long flags)
2073 {
2074 pcre_cache_entry *pce; /* Compiled regular expression */
2075 zend_string *result; /* Function result */
2076
2077 /* Compile regex or get it from cache. */
2078 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2079 return NULL;
2080 }
2081 pce->refcount++;
2082 result = php_pcre_replace_func_impl(
2083 pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
2084 limit, replace_count, flags);
2085 pce->refcount--;
2086
2087 return result;
2088 }
2089 /* }}} */
2090
2091 /* {{{ php_pcre_replace_array
2092 */
php_pcre_replace_array(HashTable * regex,zval * replace,zend_string * subject_str,size_t limit,size_t * replace_count)2093 static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend_string *subject_str, size_t limit, size_t *replace_count)
2094 {
2095 zval *regex_entry;
2096 zend_string *result;
2097 zend_string *replace_str, *tmp_replace_str;
2098
2099 if (Z_TYPE_P(replace) == IS_ARRAY) {
2100 uint32_t replace_idx = 0;
2101 HashTable *replace_ht = Z_ARRVAL_P(replace);
2102
2103 /* For each entry in the regex array, get the entry */
2104 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2105 /* Make sure we're dealing with strings. */
2106 zend_string *tmp_regex_str;
2107 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2108 zval *zv;
2109
2110 /* Get current entry */
2111 while (1) {
2112 if (replace_idx == replace_ht->nNumUsed) {
2113 replace_str = ZSTR_EMPTY_ALLOC();
2114 tmp_replace_str = NULL;
2115 break;
2116 }
2117 zv = &replace_ht->arData[replace_idx].val;
2118 replace_idx++;
2119 if (Z_TYPE_P(zv) != IS_UNDEF) {
2120 replace_str = zval_get_tmp_string(zv, &tmp_replace_str);
2121 break;
2122 }
2123 }
2124
2125 /* Do the actual replacement and put the result back into subject_str
2126 for further replacements. */
2127 result = php_pcre_replace(regex_str,
2128 subject_str,
2129 ZSTR_VAL(subject_str),
2130 ZSTR_LEN(subject_str),
2131 replace_str,
2132 limit,
2133 replace_count);
2134 zend_tmp_string_release(tmp_replace_str);
2135 zend_tmp_string_release(tmp_regex_str);
2136 zend_string_release_ex(subject_str, 0);
2137 subject_str = result;
2138 if (UNEXPECTED(result == NULL)) {
2139 break;
2140 }
2141 } ZEND_HASH_FOREACH_END();
2142
2143 } else {
2144 replace_str = Z_STR_P(replace);
2145
2146 /* For each entry in the regex array, get the entry */
2147 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2148 /* Make sure we're dealing with strings. */
2149 zend_string *tmp_regex_str;
2150 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2151
2152 /* Do the actual replacement and put the result back into subject_str
2153 for further replacements. */
2154 result = php_pcre_replace(regex_str,
2155 subject_str,
2156 ZSTR_VAL(subject_str),
2157 ZSTR_LEN(subject_str),
2158 replace_str,
2159 limit,
2160 replace_count);
2161 zend_tmp_string_release(tmp_regex_str);
2162 zend_string_release_ex(subject_str, 0);
2163 subject_str = result;
2164
2165 if (UNEXPECTED(result == NULL)) {
2166 break;
2167 }
2168 } ZEND_HASH_FOREACH_END();
2169 }
2170
2171 return subject_str;
2172 }
2173 /* }}} */
2174
2175 /* {{{ php_replace_in_subject
2176 */
php_replace_in_subject(zval * regex,zval * replace,zval * subject,size_t limit,size_t * replace_count)2177 static zend_always_inline zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, size_t limit, size_t *replace_count)
2178 {
2179 zend_string *result;
2180 zend_string *subject_str = zval_get_string(subject);
2181
2182 if (Z_TYPE_P(regex) != IS_ARRAY) {
2183 result = php_pcre_replace(Z_STR_P(regex),
2184 subject_str,
2185 ZSTR_VAL(subject_str),
2186 ZSTR_LEN(subject_str),
2187 Z_STR_P(replace),
2188 limit,
2189 replace_count);
2190 zend_string_release_ex(subject_str, 0);
2191 } else {
2192 result = php_pcre_replace_array(Z_ARRVAL_P(regex),
2193 replace,
2194 subject_str,
2195 limit,
2196 replace_count);
2197 }
2198 return result;
2199 }
2200 /* }}} */
2201
2202 /* {{{ php_replace_in_subject_func
2203 */
php_replace_in_subject_func(zval * regex,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zval * subject,size_t limit,size_t * replace_count,zend_long flags)2204 static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, size_t limit, size_t *replace_count, zend_long flags)
2205 {
2206 zend_string *result;
2207 zend_string *subject_str = zval_get_string(subject);
2208
2209 if (Z_TYPE_P(regex) != IS_ARRAY) {
2210 result = php_pcre_replace_func(
2211 Z_STR_P(regex), subject_str, fci, fcc, limit, replace_count, flags);
2212 zend_string_release_ex(subject_str, 0);
2213 return result;
2214 } else {
2215 zval *regex_entry;
2216
2217 /* If regex is an array */
2218
2219 /* For each entry in the regex array, get the entry */
2220 ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
2221 /* Make sure we're dealing with strings. */
2222 zend_string *tmp_regex_str;
2223 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2224
2225 /* Do the actual replacement and put the result back into subject_str
2226 for further replacements. */
2227 result = php_pcre_replace_func(
2228 regex_str, subject_str, fci, fcc, limit, replace_count, flags);
2229 zend_tmp_string_release(tmp_regex_str);
2230 zend_string_release_ex(subject_str, 0);
2231 subject_str = result;
2232 if (UNEXPECTED(result == NULL)) {
2233 break;
2234 }
2235 } ZEND_HASH_FOREACH_END();
2236
2237 return subject_str;
2238 }
2239 }
2240 /* }}} */
2241
2242 /* {{{ preg_replace_func_impl
2243 */
preg_replace_func_impl(zval * return_value,zval * regex,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zval * subject,zend_long limit_val,zend_long flags)2244 static size_t preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, zend_long limit_val, zend_long flags)
2245 {
2246 zend_string *result;
2247 size_t replace_count = 0;
2248
2249 if (Z_TYPE_P(regex) != IS_ARRAY) {
2250 convert_to_string_ex(regex);
2251 }
2252
2253 if (Z_TYPE_P(subject) != IS_ARRAY) {
2254 result = php_replace_in_subject_func(
2255 regex, fci, fcc, subject, limit_val, &replace_count, flags);
2256 if (result != NULL) {
2257 RETVAL_STR(result);
2258 } else {
2259 RETVAL_NULL();
2260 }
2261 } else {
2262 /* if subject is an array */
2263 zval *subject_entry, zv;
2264 zend_string *string_key;
2265 zend_ulong num_key;
2266
2267 array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
2268
2269 /* For each subject entry, convert it to string, then perform replacement
2270 and add the result to the return_value array. */
2271 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
2272 result = php_replace_in_subject_func(
2273 regex, fci, fcc, subject_entry, limit_val, &replace_count, flags);
2274 if (result != NULL) {
2275 /* Add to return array */
2276 ZVAL_STR(&zv, result);
2277 if (string_key) {
2278 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2279 } else {
2280 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2281 }
2282 }
2283 } ZEND_HASH_FOREACH_END();
2284 }
2285
2286 return replace_count;
2287 }
2288 /* }}} */
2289
2290 /* {{{ preg_replace_common
2291 */
preg_replace_common(INTERNAL_FUNCTION_PARAMETERS,int is_filter)2292 static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, int is_filter)
2293 {
2294 zval *regex, *replace, *subject, *zcount = NULL;
2295 zend_long limit = -1;
2296 size_t replace_count = 0;
2297 zend_string *result;
2298 size_t old_replace_count;
2299
2300 /* Get function parameters and do error-checking. */
2301 ZEND_PARSE_PARAMETERS_START(3, 5)
2302 Z_PARAM_ZVAL(regex)
2303 Z_PARAM_ZVAL(replace)
2304 Z_PARAM_ZVAL(subject)
2305 Z_PARAM_OPTIONAL
2306 Z_PARAM_LONG(limit)
2307 Z_PARAM_ZVAL(zcount)
2308 ZEND_PARSE_PARAMETERS_END();
2309
2310 if (Z_TYPE_P(replace) != IS_ARRAY) {
2311 convert_to_string_ex(replace);
2312 if (Z_TYPE_P(regex) != IS_ARRAY) {
2313 convert_to_string_ex(regex);
2314 }
2315 } else {
2316 if (Z_TYPE_P(regex) != IS_ARRAY) {
2317 php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
2318 RETURN_FALSE;
2319 }
2320 }
2321
2322 if (Z_TYPE_P(subject) != IS_ARRAY) {
2323 old_replace_count = replace_count;
2324 result = php_replace_in_subject(regex,
2325 replace,
2326 subject,
2327 limit,
2328 &replace_count);
2329 if (result != NULL) {
2330 if (!is_filter || replace_count > old_replace_count) {
2331 RETVAL_STR(result);
2332 } else {
2333 zend_string_release_ex(result, 0);
2334 RETVAL_NULL();
2335 }
2336 } else {
2337 RETVAL_NULL();
2338 }
2339 } else {
2340 /* if subject is an array */
2341 zval *subject_entry, zv;
2342 zend_string *string_key;
2343 zend_ulong num_key;
2344
2345 array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
2346
2347 /* For each subject entry, convert it to string, then perform replacement
2348 and add the result to the return_value array. */
2349 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
2350 old_replace_count = replace_count;
2351 result = php_replace_in_subject(regex,
2352 replace,
2353 subject_entry,
2354 limit,
2355 &replace_count);
2356 if (result != NULL) {
2357 if (!is_filter || replace_count > old_replace_count) {
2358 /* Add to return array */
2359 ZVAL_STR(&zv, result);
2360 if (string_key) {
2361 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2362 } else {
2363 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2364 }
2365 } else {
2366 zend_string_release_ex(result, 0);
2367 }
2368 }
2369 } ZEND_HASH_FOREACH_END();
2370 }
2371
2372 if (zcount) {
2373 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2374 }
2375 }
2376 /* }}} */
2377
2378 /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
2379 Perform Perl-style regular expression replacement. */
PHP_FUNCTION(preg_replace)2380 static PHP_FUNCTION(preg_replace)
2381 {
2382 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
2383 }
2384 /* }}} */
2385
2386 /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
2387 Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback)2388 static PHP_FUNCTION(preg_replace_callback)
2389 {
2390 zval *regex, *replace, *subject, *zcount = NULL;
2391 zend_long limit = -1, flags = 0;
2392 size_t replace_count;
2393 zend_fcall_info fci;
2394 zend_fcall_info_cache fcc;
2395
2396 /* Get function parameters and do error-checking. */
2397 ZEND_PARSE_PARAMETERS_START(3, 6)
2398 Z_PARAM_ZVAL(regex)
2399 Z_PARAM_ZVAL(replace)
2400 Z_PARAM_ZVAL(subject)
2401 Z_PARAM_OPTIONAL
2402 Z_PARAM_LONG(limit)
2403 Z_PARAM_ZVAL(zcount)
2404 Z_PARAM_LONG(flags)
2405 ZEND_PARSE_PARAMETERS_END();
2406
2407 if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2408 zend_string *callback_name = zend_get_callable_name(replace);
2409 php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", ZSTR_VAL(callback_name));
2410 zend_string_release_ex(callback_name, 0);
2411 ZVAL_STR(return_value, zval_get_string(subject));
2412 return;
2413 }
2414
2415 fci.size = sizeof(fci);
2416 fci.object = NULL;
2417 ZVAL_COPY_VALUE(&fci.function_name, replace);
2418
2419 replace_count = preg_replace_func_impl(return_value, regex, &fci, &fcc, subject, limit, flags);
2420 if (zcount) {
2421 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2422 }
2423 }
2424 /* }}} */
2425
2426 /* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]])
2427 Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback_array)2428 static PHP_FUNCTION(preg_replace_callback_array)
2429 {
2430 zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
2431 zend_long limit = -1, flags = 0;
2432 zend_string *str_idx;
2433 size_t replace_count = 0;
2434 zend_fcall_info fci;
2435 zend_fcall_info_cache fcc;
2436
2437 /* Get function parameters and do error-checking. */
2438 ZEND_PARSE_PARAMETERS_START(2, 5)
2439 Z_PARAM_ARRAY(pattern)
2440 Z_PARAM_ZVAL(subject)
2441 Z_PARAM_OPTIONAL
2442 Z_PARAM_LONG(limit)
2443 Z_PARAM_ZVAL(zcount)
2444 Z_PARAM_LONG(flags)
2445 ZEND_PARSE_PARAMETERS_END();
2446
2447 fci.size = sizeof(fci);
2448 fci.object = NULL;
2449
2450 ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(pattern), str_idx, replace) {
2451 if (str_idx) {
2452 ZVAL_STR_COPY(®ex, str_idx);
2453 } else {
2454 php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
2455 zval_ptr_dtor(return_value);
2456 RETURN_NULL();
2457 }
2458
2459 if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2460 zend_string *callback_name = zend_get_callable_name(replace);
2461 php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", ZSTR_VAL(callback_name));
2462 zend_string_release_ex(callback_name, 0);
2463 zval_ptr_dtor(®ex);
2464 zval_ptr_dtor(return_value);
2465 ZVAL_COPY(return_value, subject);
2466 return;
2467 }
2468
2469 ZVAL_COPY_VALUE(&fci.function_name, replace);
2470
2471 replace_count += preg_replace_func_impl(&zv, ®ex, &fci, &fcc, subject, limit, flags);
2472 if (subject != return_value) {
2473 subject = return_value;
2474 } else {
2475 zval_ptr_dtor(return_value);
2476 }
2477
2478 zval_ptr_dtor(®ex);
2479
2480 ZVAL_COPY_VALUE(return_value, &zv);
2481
2482 if (UNEXPECTED(EG(exception))) {
2483 zval_ptr_dtor(return_value);
2484 RETURN_NULL();
2485 }
2486 } ZEND_HASH_FOREACH_END();
2487
2488 if (zcount) {
2489 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2490 }
2491 }
2492 /* }}} */
2493
2494 /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
2495 Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTION(preg_filter)2496 static PHP_FUNCTION(preg_filter)
2497 {
2498 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
2499 }
2500 /* }}} */
2501
2502 /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
2503 Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)2504 static PHP_FUNCTION(preg_split)
2505 {
2506 zend_string *regex; /* Regular expression */
2507 zend_string *subject; /* String to match against */
2508 zend_long limit_val = -1;/* Integer value of limit */
2509 zend_long flags = 0; /* Match control flags */
2510 pcre_cache_entry *pce; /* Compiled regular expression */
2511
2512 /* Get function parameters and do error checking */
2513 ZEND_PARSE_PARAMETERS_START(2, 4)
2514 Z_PARAM_STR(regex)
2515 Z_PARAM_STR(subject)
2516 Z_PARAM_OPTIONAL
2517 Z_PARAM_LONG(limit_val)
2518 Z_PARAM_LONG(flags)
2519 ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
2520
2521 /* Compile regex or get it from cache. */
2522 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2523 RETURN_FALSE;
2524 }
2525
2526 pce->refcount++;
2527 php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2528 pce->refcount--;
2529 }
2530 /* }}} */
2531
2532 /* {{{ php_pcre_split
2533 */
php_pcre_split_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zend_long limit_val,zend_long flags)2534 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2535 zend_long limit_val, zend_long flags)
2536 {
2537 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
2538 uint32_t options; /* Execution options */
2539 int count; /* Count of matched subpatterns */
2540 PCRE2_SIZE start_offset; /* Where the new search starts */
2541 PCRE2_SIZE last_match_offset; /* Location of last match */
2542 uint32_t no_empty; /* If NO_EMPTY flag is set */
2543 uint32_t delim_capture; /* If delimiters should be captured */
2544 uint32_t offset_capture; /* If offsets should be captured */
2545 uint32_t num_subpats; /* Number of captured subpatterns */
2546 zval tmp;
2547 pcre2_match_data *match_data;
2548 char *subject = ZSTR_VAL(subject_str);
2549
2550 no_empty = flags & PREG_SPLIT_NO_EMPTY;
2551 delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2552 offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2553
2554 /* Initialize return value */
2555 array_init(return_value);
2556
2557 /* Calculate the size of the offsets array, and allocate memory for it. */
2558 num_subpats = pce->capture_count + 1;
2559
2560 /* Start at the beginning of the string */
2561 start_offset = 0;
2562 last_match_offset = 0;
2563 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2564
2565 if (limit_val == -1) {
2566 /* pass */
2567 } else if (limit_val == 0) {
2568 limit_val = -1;
2569 } else if (limit_val <= 1) {
2570 goto last;
2571 }
2572
2573 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2574 match_data = mdata;
2575 } else {
2576 match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
2577 if (!match_data) {
2578 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2579 zval_ptr_dtor(return_value);
2580 RETURN_FALSE;
2581 }
2582 }
2583
2584 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2585
2586 #ifdef HAVE_PCRE_JIT_SUPPORT
2587 if ((pce->preg_options & PREG_JIT) && options) {
2588 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2589 PCRE2_NO_UTF_CHECK, match_data, mctx);
2590 } else
2591 #endif
2592 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2593 options, match_data, mctx);
2594
2595 while (1) {
2596 /* If something matched */
2597 if (count >= 0) {
2598 /* Check for too many substrings condition. */
2599 if (UNEXPECTED(count == 0)) {
2600 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2601 count = num_subpats;
2602 }
2603
2604 matched:
2605 offsets = pcre2_get_ovector_pointer(match_data);
2606
2607 if (UNEXPECTED(offsets[1] < offsets[0])) {
2608 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2609 break;
2610 }
2611
2612 if (!no_empty || offsets[0] != last_match_offset) {
2613 if (offset_capture) {
2614 /* Add (match, offset) pair to the return value */
2615 add_offset_pair(
2616 return_value, subject, last_match_offset, offsets[0],
2617 NULL, 0);
2618 } else {
2619 /* Add the piece to the return value */
2620 populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2621 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2622 }
2623
2624 /* One less left to do */
2625 if (limit_val != -1)
2626 limit_val--;
2627 }
2628
2629 if (delim_capture) {
2630 size_t i;
2631 for (i = 1; i < count; i++) {
2632 /* If we have matched a delimiter */
2633 if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2634 if (offset_capture) {
2635 add_offset_pair(
2636 return_value, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2637 } else {
2638 populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2639 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2640 }
2641 }
2642 }
2643 }
2644
2645 /* Advance to the position right after the last full match */
2646 start_offset = last_match_offset = offsets[1];
2647
2648 /* If we have matched an empty string, mimic what Perl's /g options does.
2649 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2650 the match again at the same point. If this fails (picked up above) we
2651 advance to the next character. */
2652 if (start_offset == offsets[0]) {
2653 /* Get next piece if no limit or limit not yet reached and something matched*/
2654 if (limit_val != -1 && limit_val <= 1) {
2655 break;
2656 }
2657 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2658 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2659 if (count >= 0) {
2660 goto matched;
2661 } else if (count == PCRE2_ERROR_NOMATCH) {
2662 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2663 this is not necessarily the end. We need to advance
2664 the start offset, and continue. Fudge the offset values
2665 to achieve this, unless we're already at the end of the string. */
2666 if (start_offset < ZSTR_LEN(subject_str)) {
2667 start_offset += calculate_unit_length(pce, subject + start_offset);
2668 } else {
2669 break;
2670 }
2671 } else {
2672 goto error;
2673 }
2674 }
2675
2676 } else if (count == PCRE2_ERROR_NOMATCH) {
2677 break;
2678 } else {
2679 error:
2680 pcre_handle_exec_error(count);
2681 break;
2682 }
2683
2684 /* Get next piece if no limit or limit not yet reached and something matched*/
2685 if (limit_val != -1 && limit_val <= 1) {
2686 break;
2687 }
2688
2689 #ifdef HAVE_PCRE_JIT_SUPPORT
2690 if (pce->preg_options & PREG_JIT) {
2691 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2692 PCRE2_NO_UTF_CHECK, match_data, mctx);
2693 } else
2694 #endif
2695 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2696 PCRE2_NO_UTF_CHECK, match_data, mctx);
2697 }
2698 if (match_data != mdata) {
2699 pcre2_match_data_free(match_data);
2700 }
2701
2702 if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2703 zval_ptr_dtor(return_value);
2704 RETURN_FALSE;
2705 }
2706
2707 last:
2708 start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2709
2710 if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2711 if (offset_capture) {
2712 /* Add the last (match, offset) pair to the return value */
2713 add_offset_pair(return_value, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2714 } else {
2715 /* Add the last piece to the return value */
2716 if (start_offset == 0) {
2717 ZVAL_STR_COPY(&tmp, subject_str);
2718 } else {
2719 populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2720 }
2721 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2722 }
2723 }
2724 }
2725 /* }}} */
2726
2727 /* {{{ proto string preg_quote(string str [, string delim_char])
2728 Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)2729 static PHP_FUNCTION(preg_quote)
2730 {
2731 zend_string *str; /* Input string argument */
2732 zend_string *delim = NULL; /* Additional delimiter argument */
2733 char *in_str; /* Input string */
2734 char *in_str_end; /* End of the input string */
2735 zend_string *out_str; /* Output string with quoted characters */
2736 size_t extra_len; /* Number of additional characters */
2737 char *p, /* Iterator for input string */
2738 *q, /* Iterator for output string */
2739 delim_char = '\0', /* Delimiter character to be quoted */
2740 c; /* Current character */
2741
2742 /* Get the arguments and check for errors */
2743 ZEND_PARSE_PARAMETERS_START(1, 2)
2744 Z_PARAM_STR(str)
2745 Z_PARAM_OPTIONAL
2746 Z_PARAM_STR_EX(delim, 1, 0)
2747 ZEND_PARSE_PARAMETERS_END();
2748
2749 /* Nothing to do if we got an empty string */
2750 if (ZSTR_LEN(str) == 0) {
2751 RETURN_EMPTY_STRING();
2752 }
2753
2754 in_str = ZSTR_VAL(str);
2755 in_str_end = in_str + ZSTR_LEN(str);
2756
2757 if (delim) {
2758 delim_char = ZSTR_VAL(delim)[0];
2759 }
2760
2761 /* Go through the string and quote necessary characters */
2762 extra_len = 0;
2763 p = in_str;
2764 do {
2765 c = *p;
2766 switch(c) {
2767 case '.':
2768 case '\\':
2769 case '+':
2770 case '*':
2771 case '?':
2772 case '[':
2773 case '^':
2774 case ']':
2775 case '$':
2776 case '(':
2777 case ')':
2778 case '{':
2779 case '}':
2780 case '=':
2781 case '!':
2782 case '>':
2783 case '<':
2784 case '|':
2785 case ':':
2786 case '-':
2787 case '#':
2788 extra_len++;
2789 break;
2790
2791 case '\0':
2792 extra_len+=3;
2793 break;
2794
2795 default:
2796 if (c == delim_char) {
2797 extra_len++;
2798 }
2799 break;
2800 }
2801 p++;
2802 } while (p != in_str_end);
2803
2804 if (extra_len == 0) {
2805 RETURN_STR_COPY(str);
2806 }
2807
2808 /* Allocate enough memory so that even if each character
2809 is quoted, we won't run out of room */
2810 out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2811 q = ZSTR_VAL(out_str);
2812 p = in_str;
2813
2814 do {
2815 c = *p;
2816 switch(c) {
2817 case '.':
2818 case '\\':
2819 case '+':
2820 case '*':
2821 case '?':
2822 case '[':
2823 case '^':
2824 case ']':
2825 case '$':
2826 case '(':
2827 case ')':
2828 case '{':
2829 case '}':
2830 case '=':
2831 case '!':
2832 case '>':
2833 case '<':
2834 case '|':
2835 case ':':
2836 case '-':
2837 case '#':
2838 *q++ = '\\';
2839 *q++ = c;
2840 break;
2841
2842 case '\0':
2843 *q++ = '\\';
2844 *q++ = '0';
2845 *q++ = '0';
2846 *q++ = '0';
2847 break;
2848
2849 default:
2850 if (c == delim_char) {
2851 *q++ = '\\';
2852 }
2853 *q++ = c;
2854 break;
2855 }
2856 p++;
2857 } while (p != in_str_end);
2858 *q = '\0';
2859
2860 RETURN_NEW_STR(out_str);
2861 }
2862 /* }}} */
2863
2864 /* {{{ proto array preg_grep(string regex, array input [, int flags])
2865 Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)2866 static PHP_FUNCTION(preg_grep)
2867 {
2868 zend_string *regex; /* Regular expression */
2869 zval *input; /* Input array */
2870 zend_long flags = 0; /* Match control flags */
2871 pcre_cache_entry *pce; /* Compiled regular expression */
2872
2873 /* Get arguments and do error checking */
2874 ZEND_PARSE_PARAMETERS_START(2, 3)
2875 Z_PARAM_STR(regex)
2876 Z_PARAM_ARRAY(input)
2877 Z_PARAM_OPTIONAL
2878 Z_PARAM_LONG(flags)
2879 ZEND_PARSE_PARAMETERS_END();
2880
2881 /* Compile regex or get it from cache. */
2882 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2883 RETURN_FALSE;
2884 }
2885
2886 pce->refcount++;
2887 php_pcre_grep_impl(pce, input, return_value, flags);
2888 pce->refcount--;
2889 }
2890 /* }}} */
2891
php_pcre_grep_impl(pcre_cache_entry * pce,zval * input,zval * return_value,zend_long flags)2892 PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2893 {
2894 zval *entry; /* An entry in the input array */
2895 uint32_t num_subpats; /* Number of captured subpatterns */
2896 int count; /* Count of matched subpatterns */
2897 uint32_t options; /* Execution options */
2898 zend_string *string_key;
2899 zend_ulong num_key;
2900 zend_bool invert; /* Whether to return non-matching
2901 entries */
2902 pcre2_match_data *match_data;
2903 invert = flags & PREG_GREP_INVERT ? 1 : 0;
2904
2905 /* Calculate the size of the offsets array, and allocate memory for it. */
2906 num_subpats = pce->capture_count + 1;
2907
2908 /* Initialize return array */
2909 array_init(return_value);
2910
2911 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2912
2913 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2914 match_data = mdata;
2915 } else {
2916 match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
2917 if (!match_data) {
2918 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2919 return;
2920 }
2921 }
2922
2923 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2924
2925 /* Go through the input array */
2926 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2927 zend_string *tmp_subject_str;
2928 zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2929
2930 /* Perform the match */
2931 #ifdef HAVE_PCRE_JIT_SUPPORT
2932 if ((pce->preg_options & PREG_JIT) && options) {
2933 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2934 PCRE2_NO_UTF_CHECK, match_data, mctx);
2935 } else
2936 #endif
2937 count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2938 options, match_data, mctx);
2939
2940 /* If the entry fits our requirements */
2941 if (count >= 0) {
2942 /* Check for too many substrings condition. */
2943 if (UNEXPECTED(count == 0)) {
2944 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2945 }
2946 if (!invert) {
2947 Z_TRY_ADDREF_P(entry);
2948
2949 /* Add to return array */
2950 if (string_key) {
2951 zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2952 } else {
2953 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2954 }
2955 }
2956 } else if (count == PCRE2_ERROR_NOMATCH) {
2957 if (invert) {
2958 Z_TRY_ADDREF_P(entry);
2959
2960 /* Add to return array */
2961 if (string_key) {
2962 zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2963 } else {
2964 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2965 }
2966 }
2967 } else {
2968 pcre_handle_exec_error(count);
2969 zend_tmp_string_release(tmp_subject_str);
2970 break;
2971 }
2972
2973 zend_tmp_string_release(tmp_subject_str);
2974 } ZEND_HASH_FOREACH_END();
2975 if (match_data != mdata) {
2976 pcre2_match_data_free(match_data);
2977 }
2978 }
2979 /* }}} */
2980
2981 /* {{{ proto int preg_last_error()
2982 Returns the error code of the last regexp execution. */
PHP_FUNCTION(preg_last_error)2983 static PHP_FUNCTION(preg_last_error)
2984 {
2985 ZEND_PARSE_PARAMETERS_NONE();
2986
2987 RETURN_LONG(PCRE_G(error_code));
2988 }
2989 /* }}} */
2990
2991 /* {{{ module definition structures */
2992
2993 /* {{{ arginfo */
2994 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
2995 ZEND_ARG_INFO(0, pattern)
2996 ZEND_ARG_INFO(0, subject)
2997 ZEND_ARG_INFO(1, subpatterns) /* array */
2998 ZEND_ARG_INFO(0, flags)
2999 ZEND_ARG_INFO(0, offset)
3000 ZEND_END_ARG_INFO()
3001
3002 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
3003 ZEND_ARG_INFO(0, pattern)
3004 ZEND_ARG_INFO(0, subject)
3005 ZEND_ARG_INFO(1, subpatterns) /* array */
3006 ZEND_ARG_INFO(0, flags)
3007 ZEND_ARG_INFO(0, offset)
3008 ZEND_END_ARG_INFO()
3009
3010 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
3011 ZEND_ARG_INFO(0, regex)
3012 ZEND_ARG_INFO(0, replace)
3013 ZEND_ARG_INFO(0, subject)
3014 ZEND_ARG_INFO(0, limit)
3015 ZEND_ARG_INFO(1, count)
3016 ZEND_END_ARG_INFO()
3017
3018 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
3019 ZEND_ARG_INFO(0, regex)
3020 ZEND_ARG_INFO(0, callback)
3021 ZEND_ARG_INFO(0, subject)
3022 ZEND_ARG_INFO(0, limit)
3023 ZEND_ARG_INFO(1, count)
3024 ZEND_ARG_INFO(0, flags)
3025 ZEND_END_ARG_INFO()
3026
3027 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
3028 ZEND_ARG_INFO(0, pattern)
3029 ZEND_ARG_INFO(0, subject)
3030 ZEND_ARG_INFO(0, limit)
3031 ZEND_ARG_INFO(1, count)
3032 ZEND_ARG_INFO(0, flags)
3033 ZEND_END_ARG_INFO()
3034
3035 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
3036 ZEND_ARG_INFO(0, pattern)
3037 ZEND_ARG_INFO(0, subject)
3038 ZEND_ARG_INFO(0, limit)
3039 ZEND_ARG_INFO(0, flags)
3040 ZEND_END_ARG_INFO()
3041
3042 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
3043 ZEND_ARG_INFO(0, str)
3044 ZEND_ARG_INFO(0, delim_char)
3045 ZEND_END_ARG_INFO()
3046
3047 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
3048 ZEND_ARG_INFO(0, regex)
3049 ZEND_ARG_INFO(0, input) /* array */
3050 ZEND_ARG_INFO(0, flags)
3051 ZEND_END_ARG_INFO()
3052
3053 ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
3054 ZEND_END_ARG_INFO()
3055 /* }}} */
3056
3057 static const zend_function_entry pcre_functions[] = {
3058 PHP_FE(preg_match, arginfo_preg_match)
3059 PHP_FE(preg_match_all, arginfo_preg_match_all)
3060 PHP_FE(preg_replace, arginfo_preg_replace)
3061 PHP_FE(preg_replace_callback, arginfo_preg_replace_callback)
3062 PHP_FE(preg_replace_callback_array, arginfo_preg_replace_callback_array)
3063 PHP_FE(preg_filter, arginfo_preg_replace)
3064 PHP_FE(preg_split, arginfo_preg_split)
3065 PHP_FE(preg_quote, arginfo_preg_quote)
3066 PHP_FE(preg_grep, arginfo_preg_grep)
3067 PHP_FE(preg_last_error, arginfo_preg_last_error)
3068 PHP_FE_END
3069 };
3070
3071 zend_module_entry pcre_module_entry = {
3072 STANDARD_MODULE_HEADER,
3073 "pcre",
3074 pcre_functions,
3075 PHP_MINIT(pcre),
3076 PHP_MSHUTDOWN(pcre),
3077 PHP_RINIT(pcre),
3078 PHP_RSHUTDOWN(pcre),
3079 PHP_MINFO(pcre),
3080 PHP_PCRE_VERSION,
3081 PHP_MODULE_GLOBALS(pcre),
3082 PHP_GINIT(pcre),
3083 PHP_GSHUTDOWN(pcre),
3084 NULL,
3085 STANDARD_MODULE_PROPERTIES_EX
3086 };
3087
3088 #ifdef COMPILE_DL_PCRE
ZEND_GET_MODULE(pcre)3089 ZEND_GET_MODULE(pcre)
3090 #endif
3091
3092 /* }}} */
3093
3094 PHPAPI pcre2_match_context *php_pcre_mctx(void)
3095 {/*{{{*/
3096 return mctx;
3097 }/*}}}*/
3098
php_pcre_gctx(void)3099 PHPAPI pcre2_general_context *php_pcre_gctx(void)
3100 {/*{{{*/
3101 return gctx;
3102 }/*}}}*/
3103
php_pcre_cctx(void)3104 PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3105 {/*{{{*/
3106 return cctx;
3107 }/*}}}*/
3108
php_pcre_pce_incref(pcre_cache_entry * pce)3109 PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3110 {/*{{{*/
3111 assert(NULL != pce);
3112 pce->refcount++;
3113 }/*}}}*/
3114
php_pcre_pce_decref(pcre_cache_entry * pce)3115 PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3116 {/*{{{*/
3117 assert(NULL != pce);
3118 assert(0 != pce->refcount);
3119 pce->refcount--;
3120 }/*}}}*/
3121
php_pcre_pce_re(pcre_cache_entry * pce)3122 PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3123 {/*{{{*/
3124 assert(NULL != pce);
3125 return pce->re;
3126 }/*}}}*/
3127