1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2018 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Andrei Zmievski <andrei@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 #include "php.h"
20 #include "php_ini.h"
21 #include "php_globals.h"
22 #include "php_pcre.h"
23 #include "ext/standard/info.h"
24 #include "ext/standard/basic_functions.h"
25 #include "zend_smart_str.h"
26
27 #if HAVE_PCRE || HAVE_BUNDLED_PCRE
28
29 #include "ext/standard/php_string.h"
30
31 #define PREG_PATTERN_ORDER 1
32 #define PREG_SET_ORDER 2
33 #define PREG_OFFSET_CAPTURE (1<<8)
34 #define PREG_UNMATCHED_AS_NULL (1<<9)
35
36 #define PREG_SPLIT_NO_EMPTY (1<<0)
37 #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
38 #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
39
40 #define PREG_REPLACE_EVAL (1<<0)
41
42 #define PREG_GREP_INVERT (1<<0)
43
44 #define PREG_JIT (1<<3)
45
46 #define PCRE_CACHE_SIZE 4096
47
48 struct _pcre_cache_entry {
49 pcre2_code *re;
50 uint32_t preg_options;
51 uint32_t capture_count;
52 uint32_t name_count;
53 uint32_t compile_options;
54 uint32_t extra_compile_options;
55 uint32_t refcount;
56 };
57
58 enum {
59 PHP_PCRE_NO_ERROR = 0,
60 PHP_PCRE_INTERNAL_ERROR,
61 PHP_PCRE_BACKTRACK_LIMIT_ERROR,
62 PHP_PCRE_RECURSION_LIMIT_ERROR,
63 PHP_PCRE_BAD_UTF8_ERROR,
64 PHP_PCRE_BAD_UTF8_OFFSET_ERROR,
65 PHP_PCRE_JIT_STACKLIMIT_ERROR
66 };
67
68
69 PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
70
71 #ifdef HAVE_PCRE_JIT_SUPPORT
72 #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
73 #define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
74 ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
75 #endif
76 ZEND_TLS pcre2_general_context *gctx = NULL;
77 /* These two are global per thread for now. Though it is possible to use these
78 per pattern. Either one can copy it and use in pce, or one does no global
79 contexts at all, but creates for every pce. */
80 ZEND_TLS pcre2_compile_context *cctx = NULL;
81 ZEND_TLS pcre2_match_context *mctx = NULL;
82 ZEND_TLS pcre2_match_data *mdata = NULL;
83 ZEND_TLS zend_bool mdata_used = 0;
84 ZEND_TLS uint8_t pcre2_init_ok = 0;
85 #if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
86 static MUTEX_T pcre_mt = NULL;
87 #define php_pcre_mutex_alloc() \
88 if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
89 #define php_pcre_mutex_free() \
90 if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
91 #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
92 #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
93 #else
94 #define php_pcre_mutex_alloc()
95 #define php_pcre_mutex_free()
96 #define php_pcre_mutex_lock()
97 #define php_pcre_mutex_unlock()
98 #endif
99
100 #if HAVE_SETLOCALE
101 ZEND_TLS HashTable char_tables;
102
php_pcre_free_char_table(zval * data)103 static void php_pcre_free_char_table(zval *data)
104 {/*{{{*/
105 void *ptr = Z_PTR_P(data);
106 pefree(ptr, 1);
107 }/*}}}*/
108 #endif
109
pcre_handle_exec_error(int pcre_code)110 static void pcre_handle_exec_error(int pcre_code) /* {{{ */
111 {
112 int preg_code = 0;
113
114 switch (pcre_code) {
115 case PCRE2_ERROR_MATCHLIMIT:
116 preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
117 break;
118
119 case PCRE2_ERROR_RECURSIONLIMIT:
120 preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
121 break;
122
123 case PCRE2_ERROR_BADUTFOFFSET:
124 preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
125 break;
126
127 #ifdef HAVE_PCRE_JIT_SUPPORT
128 case PCRE2_ERROR_JIT_STACKLIMIT:
129 preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
130 break;
131 #endif
132
133 default:
134 if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
135 preg_code = PHP_PCRE_BAD_UTF8_ERROR;
136 } else {
137 preg_code = PHP_PCRE_INTERNAL_ERROR;
138 }
139 break;
140 }
141
142 PCRE_G(error_code) = preg_code;
143 }
144 /* }}} */
145
php_free_pcre_cache(zval * data)146 static void php_free_pcre_cache(zval *data) /* {{{ */
147 {
148 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
149 if (!pce) return;
150 pcre2_code_free(pce->re);
151 pefree(pce, 1);
152 }
153 /* }}} */
154
php_pcre_malloc(PCRE2_SIZE size,void * data)155 static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
156 {/*{{{*/
157 void *p = pemalloc(size, 1);
158 return p;
159 }/*}}}*/
160
php_pcre_free(void * block,void * data)161 static void php_pcre_free(void *block, void *data)
162 {/*{{{*/
163 pefree(block, 1);
164 }/*}}}*/
165
166 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
167 #define PHP_PCRE_PREALLOC_MDATA_SIZE 32
168
php_pcre_init_pcre2(uint8_t jit)169 static void php_pcre_init_pcre2(uint8_t jit)
170 {/*{{{*/
171 if (!gctx) {
172 gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
173 if (!gctx) {
174 pcre2_init_ok = 0;
175 return;
176 }
177 }
178
179 if (!cctx) {
180 cctx = pcre2_compile_context_create(gctx);
181 if (!cctx) {
182 pcre2_init_ok = 0;
183 return;
184 }
185 }
186
187 /* XXX The 'X' modifier is the default behavior in PCRE2. This option is
188 called dangerous in the manual, as typos in patterns can cause
189 unexpected results. We might want to to switch to the default PCRE2
190 behavior, too, thus causing a certain BC break. */
191 pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
192
193 if (!mctx) {
194 mctx = pcre2_match_context_create(gctx);
195 if (!mctx) {
196 pcre2_init_ok = 0;
197 return;
198 }
199 }
200
201 #ifdef HAVE_PCRE_JIT_SUPPORT
202 if (jit && !jit_stack) {
203 jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
204 if (!jit_stack) {
205 pcre2_init_ok = 0;
206 return;
207 }
208 }
209 #endif
210
211 if (!mdata) {
212 mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
213 if (!mdata) {
214 pcre2_init_ok = 0;
215 return;
216 }
217 }
218
219 pcre2_init_ok = 1;
220 }/*}}}*/
221
php_pcre_shutdown_pcre2(void)222 static void php_pcre_shutdown_pcre2(void)
223 {/*{{{*/
224 if (gctx) {
225 pcre2_general_context_free(gctx);
226 gctx = NULL;
227 }
228
229 if (cctx) {
230 pcre2_compile_context_free(cctx);
231 cctx = NULL;
232 }
233
234 if (mctx) {
235 pcre2_match_context_free(mctx);
236 mctx = NULL;
237 }
238
239 #ifdef HAVE_PCRE_JIT_SUPPORT
240 /* Stack may only be destroyed when no cached patterns
241 possibly associated with it do exist. */
242 if (jit_stack) {
243 pcre2_jit_stack_free(jit_stack);
244 jit_stack = NULL;
245 }
246 #endif
247
248 if (mdata) {
249 pcre2_match_data_free(mdata);
250 mdata = NULL;
251 }
252
253 pcre2_init_ok = 0;
254 }/*}}}*/
255
PHP_GINIT_FUNCTION(pcre)256 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
257 {
258 php_pcre_mutex_alloc();
259
260 zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
261 pcre_globals->backtrack_limit = 0;
262 pcre_globals->recursion_limit = 0;
263 pcre_globals->error_code = PHP_PCRE_NO_ERROR;
264 #ifdef HAVE_PCRE_JIT_SUPPORT
265 pcre_globals->jit = 1;
266 #endif
267
268 php_pcre_init_pcre2(1);
269 #if HAVE_SETLOCALE
270 zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
271 #endif
272 }
273 /* }}} */
274
PHP_GSHUTDOWN_FUNCTION(pcre)275 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
276 {
277 zend_hash_destroy(&pcre_globals->pcre_cache);
278
279 php_pcre_shutdown_pcre2();
280 #if HAVE_SETLOCALE
281 zend_hash_destroy(&char_tables);
282 #endif
283
284 php_pcre_mutex_free();
285 }
286 /* }}} */
287
PHP_INI_MH(OnUpdateBacktrackLimit)288 static PHP_INI_MH(OnUpdateBacktrackLimit)
289 {/*{{{*/
290 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
291 if (mctx) {
292 pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
293 }
294
295 return SUCCESS;
296 }/*}}}*/
297
PHP_INI_MH(OnUpdateRecursionLimit)298 static PHP_INI_MH(OnUpdateRecursionLimit)
299 {/*{{{*/
300 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
301 if (mctx) {
302 pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
303 }
304
305 return SUCCESS;
306 }/*}}}*/
307
308 #ifdef HAVE_PCRE_JIT_SUPPORT
PHP_INI_MH(OnUpdateJit)309 static PHP_INI_MH(OnUpdateJit)
310 {/*{{{*/
311 OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
312 if (PCRE_G(jit) && jit_stack) {
313 pcre2_jit_stack_assign(mctx, NULL, jit_stack);
314 } else {
315 pcre2_jit_stack_assign(mctx, NULL, NULL);
316 }
317
318 return SUCCESS;
319 }/*}}}*/
320 #endif
321
322 PHP_INI_BEGIN()
323 STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
324 STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
325 #ifdef HAVE_PCRE_JIT_SUPPORT
326 STD_PHP_INI_ENTRY("pcre.jit", "1", PHP_INI_ALL, OnUpdateJit, jit, zend_pcre_globals, pcre_globals)
327 #endif
PHP_INI_END()328 PHP_INI_END()
329
330 static char *_pcre2_config_str(uint32_t what)
331 {/*{{{*/
332 int len = pcre2_config(what, NULL);
333 char *ret = (char *) malloc(len + 1);
334
335 len = pcre2_config(what, ret);
336 if (!len) {
337 free(ret);
338 return NULL;
339 }
340
341 return ret;
342 }/*}}}*/
343
344 /* {{{ PHP_MINFO_FUNCTION(pcre) */
PHP_MINFO_FUNCTION(pcre)345 static PHP_MINFO_FUNCTION(pcre)
346 {
347 #ifdef HAVE_PCRE_JIT_SUPPORT
348 uint32_t flag = 0;
349 char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
350 #endif
351 char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
352 char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
353
354 php_info_print_table_start();
355 php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
356 php_info_print_table_row(2, "PCRE Library Version", version);
357 free(version);
358 php_info_print_table_row(2, "PCRE Unicode Version", unicode);
359 free(unicode);
360
361 #ifdef HAVE_PCRE_JIT_SUPPORT
362 if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
363 php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
364 } else {
365 php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
366 }
367 if (jit_target) {
368 php_info_print_table_row(2, "PCRE JIT Target", jit_target);
369 }
370 free(jit_target);
371 #else
372 php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
373 #endif
374
375 #ifdef HAVE_PCRE_VALGRIND_SUPPORT
376 php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
377 #endif
378
379 php_info_print_table_end();
380
381 DISPLAY_INI_ENTRIES();
382 }
383 /* }}} */
384
385 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTION(pcre)386 static PHP_MINIT_FUNCTION(pcre)
387 {
388 char *version;
389
390 #ifdef HAVE_PCRE_JIT_SUPPORT
391 if (UNEXPECTED(!pcre2_init_ok)) {
392 /* Retry. */
393 php_pcre_init_pcre2(PCRE_G(jit));
394 if (!pcre2_init_ok) {
395 return FAILURE;
396 }
397 }
398 #endif
399
400 REGISTER_INI_ENTRIES();
401
402 REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
403 REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
404 REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
405 REGISTER_LONG_CONSTANT("PREG_UNMATCHED_AS_NULL", PREG_UNMATCHED_AS_NULL, CONST_CS | CONST_PERSISTENT);
406 REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
407 REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
408 REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
409 REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
410
411 REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
412 REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
413 REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
414 REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
415 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
416 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
417 REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
418 version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
419 REGISTER_STRING_CONSTANT("PCRE_VERSION", version, CONST_CS | CONST_PERSISTENT);
420 free(version);
421 REGISTER_LONG_CONSTANT("PCRE_VERSION_MAJOR", PCRE2_MAJOR, CONST_CS | CONST_PERSISTENT);
422 REGISTER_LONG_CONSTANT("PCRE_VERSION_MINOR", PCRE2_MINOR, CONST_CS | CONST_PERSISTENT);
423
424 #ifdef HAVE_PCRE_JIT_SUPPORT
425 REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 1, CONST_CS | CONST_PERSISTENT);
426 #else
427 REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 0, CONST_CS | CONST_PERSISTENT);
428 #endif
429
430 return SUCCESS;
431 }
432 /* }}} */
433
434 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTION(pcre)435 static PHP_MSHUTDOWN_FUNCTION(pcre)
436 {
437 UNREGISTER_INI_ENTRIES();
438
439 return SUCCESS;
440 }
441 /* }}} */
442
443 #ifdef HAVE_PCRE_JIT_SUPPORT
444 /* {{{ PHP_RINIT_FUNCTION(pcre) */
PHP_RINIT_FUNCTION(pcre)445 static PHP_RINIT_FUNCTION(pcre)
446 {
447 if (UNEXPECTED(!pcre2_init_ok)) {
448 /* Retry. */
449 php_pcre_mutex_lock();
450 php_pcre_init_pcre2(PCRE_G(jit));
451 if (!pcre2_init_ok) {
452 php_pcre_mutex_unlock();
453 return FAILURE;
454 }
455 php_pcre_mutex_unlock();
456 }
457
458 mdata_used = 0;
459
460 return SUCCESS;
461 }
462 /* }}} */
463 #endif
464
465 /* {{{ static pcre_clean_cache */
pcre_clean_cache(zval * data,void * arg)466 static int pcre_clean_cache(zval *data, void *arg)
467 {
468 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
469 int *num_clean = (int *)arg;
470
471 if (*num_clean > 0 && !pce->refcount) {
472 (*num_clean)--;
473 return ZEND_HASH_APPLY_REMOVE;
474 } else {
475 return ZEND_HASH_APPLY_KEEP;
476 }
477 }
478 /* }}} */
479
480 /* {{{ static make_subpats_table */
make_subpats_table(uint32_t num_subpats,pcre_cache_entry * pce)481 static char **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
482 {
483 uint32_t name_cnt = pce->name_count, name_size, ni = 0;
484 char *name_table;
485 unsigned short name_idx;
486 char **subpat_names;
487 int rc1, rc2;
488
489 rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
490 rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
491 if (rc1 < 0 || rc2 < 0) {
492 php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc1 < 0 ? rc1 : rc2);
493 return NULL;
494 }
495
496 subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
497 while (ni++ < name_cnt) {
498 name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
499 subpat_names[name_idx] = name_table + 2;
500 if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
501 php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
502 efree(subpat_names);
503 return NULL;
504 }
505 name_table += name_size;
506 }
507 return subpat_names;
508 }
509 /* }}} */
510
511 /* {{{ static calculate_unit_length */
512 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
calculate_unit_length(pcre_cache_entry * pce,char * start)513 static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, char *start)
514 {
515 size_t unit_len;
516
517 if (pce->compile_options & PCRE2_UTF) {
518 char *end = start;
519
520 /* skip continuation bytes */
521 while ((*++end & 0xC0) == 0x80);
522 unit_len = end - start;
523 } else {
524 unit_len = 1;
525 }
526 return unit_len;
527 }
528 /* }}} */
529
530 /* {{{ pcre_get_compiled_regex_cache
531 */
pcre_get_compiled_regex_cache_ex(zend_string * regex,int locale_aware)532 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware)
533 {
534 pcre2_code *re = NULL;
535 uint32_t coptions = 0;
536 uint32_t extra_coptions = PHP_PCRE_DEFAULT_EXTRA_COPTIONS;
537 PCRE2_UCHAR error[128];
538 PCRE2_SIZE erroffset;
539 int errnumber;
540 char delimiter;
541 char start_delimiter;
542 char end_delimiter;
543 char *p, *pp;
544 char *pattern;
545 size_t pattern_len;
546 uint32_t poptions = 0;
547 #if HAVE_SETLOCALE
548 const uint8_t *tables = NULL;
549 #endif
550 zval *zv;
551 pcre_cache_entry new_entry;
552 int rc;
553 zend_string *key;
554 pcre_cache_entry *ret;
555
556 #if HAVE_SETLOCALE
557 if (locale_aware && BG(locale_string) &&
558 (ZSTR_LEN(BG(locale_string)) != 1 && ZSTR_VAL(BG(locale_string))[0] != 'C')) {
559 key = zend_string_alloc(ZSTR_LEN(regex) + ZSTR_LEN(BG(locale_string)) + 1, 0);
560 memcpy(ZSTR_VAL(key), ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)) + 1);
561 memcpy(ZSTR_VAL(key) + ZSTR_LEN(BG(locale_string)), ZSTR_VAL(regex), ZSTR_LEN(regex) + 1);
562 } else
563 #endif
564 {
565 key = regex;
566 }
567
568 /* Try to lookup the cached regex entry, and if successful, just pass
569 back the compiled pattern, otherwise go on and compile it. */
570 zv = zend_hash_find(&PCRE_G(pcre_cache), key);
571 if (zv) {
572 #if HAVE_SETLOCALE
573 if (key != regex) {
574 zend_string_release_ex(key, 0);
575 }
576 #endif
577 return (pcre_cache_entry*)Z_PTR_P(zv);
578 }
579
580 p = ZSTR_VAL(regex);
581
582 /* Parse through the leading whitespace, and display a warning if we
583 get to the end without encountering a delimiter. */
584 while (isspace((int)*(unsigned char *)p)) p++;
585 if (*p == 0) {
586 #if HAVE_SETLOCALE
587 if (key != regex) {
588 zend_string_release_ex(key, 0);
589 }
590 #endif
591 php_error_docref(NULL, E_WARNING,
592 p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
593 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
594 return NULL;
595 }
596
597 /* Get the delimiter and display a warning if it is alphanumeric
598 or a backslash. */
599 delimiter = *p++;
600 if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
601 #if HAVE_SETLOCALE
602 if (key != regex) {
603 zend_string_release_ex(key, 0);
604 }
605 #endif
606 php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
607 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
608 return NULL;
609 }
610
611 start_delimiter = delimiter;
612 if ((pp = strchr("([{< )]}> )]}>", delimiter)))
613 delimiter = pp[5];
614 end_delimiter = delimiter;
615
616 pp = p;
617
618 if (start_delimiter == end_delimiter) {
619 /* We need to iterate through the pattern, searching for the ending delimiter,
620 but skipping the backslashed delimiters. If the ending delimiter is not
621 found, display a warning. */
622 while (*pp != 0) {
623 if (*pp == '\\' && pp[1] != 0) pp++;
624 else if (*pp == delimiter)
625 break;
626 pp++;
627 }
628 } else {
629 /* We iterate through the pattern, searching for the matching ending
630 * delimiter. For each matching starting delimiter, we increment nesting
631 * level, and decrement it for each matching ending delimiter. If we
632 * reach the end of the pattern without matching, display a warning.
633 */
634 int brackets = 1; /* brackets nesting level */
635 while (*pp != 0) {
636 if (*pp == '\\' && pp[1] != 0) pp++;
637 else if (*pp == end_delimiter && --brackets <= 0)
638 break;
639 else if (*pp == start_delimiter)
640 brackets++;
641 pp++;
642 }
643 }
644
645 if (*pp == 0) {
646 #if HAVE_SETLOCALE
647 if (key != regex) {
648 zend_string_release_ex(key, 0);
649 }
650 #endif
651 if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
652 php_error_docref(NULL,E_WARNING, "Null byte in regex");
653 } else if (start_delimiter == end_delimiter) {
654 php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
655 } else {
656 php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
657 }
658 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
659 return NULL;
660 }
661
662 /* Make a copy of the actual pattern. */
663 pattern_len = pp - p;
664 pattern = estrndup(p, pattern_len);
665
666 /* Move on to the options */
667 pp++;
668
669 /* Parse through the options, setting appropriate flags. Display
670 a warning if we encounter an unknown modifier. */
671 while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
672 switch (*pp++) {
673 /* Perl compatible options */
674 case 'i': coptions |= PCRE2_CASELESS; break;
675 case 'm': coptions |= PCRE2_MULTILINE; break;
676 case 's': coptions |= PCRE2_DOTALL; break;
677 case 'x': coptions |= PCRE2_EXTENDED; break;
678
679 /* PCRE specific options */
680 case 'A': coptions |= PCRE2_ANCHORED; break;
681 case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
682 case 'S': /* Pass. */ break;
683 case 'U': coptions |= PCRE2_UNGREEDY; break;
684 case 'X': extra_coptions &= ~PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL; break;
685 case 'u': coptions |= PCRE2_UTF;
686 /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
687 characters, even in UTF-8 mode. However, this can be changed by setting
688 the PCRE2_UCP option. */
689 #ifdef PCRE2_UCP
690 coptions |= PCRE2_UCP;
691 #endif
692 break;
693 case 'J': coptions |= PCRE2_DUPNAMES; break;
694
695 /* Custom preg options */
696 case 'e': poptions |= PREG_REPLACE_EVAL; break;
697
698 case ' ':
699 case '\n':
700 case '\r':
701 break;
702
703 default:
704 if (pp[-1]) {
705 php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
706 } else {
707 php_error_docref(NULL,E_WARNING, "Null byte in regex");
708 }
709 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
710 efree(pattern);
711 #if HAVE_SETLOCALE
712 if (key != regex) {
713 zend_string_release_ex(key, 0);
714 }
715 #endif
716 return NULL;
717 }
718 }
719
720 if (poptions & PREG_REPLACE_EVAL) {
721 php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
722 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
723 efree(pattern);
724 #if HAVE_SETLOCALE
725 if (key != regex) {
726 zend_string_release_ex(key, 0);
727 }
728 #endif
729 return NULL;
730 }
731
732 #if HAVE_SETLOCALE
733 if (key != regex) {
734 tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(locale_string));
735 if (!tables) {
736 zend_string *_k;
737 tables = pcre2_maketables(gctx);
738 if (UNEXPECTED(!tables)) {
739 php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
740 pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
741 zend_string_release_ex(key, 0);
742 efree(pattern);
743 return NULL;
744 }
745 _k = zend_string_init(ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)), 1);
746 zend_hash_add_ptr(&char_tables, _k, (void *)tables);
747 zend_string_release(_k);
748 }
749 pcre2_set_character_tables(cctx, tables);
750 }
751 #endif
752
753 /* Set extra options for the compile context. */
754 if (PHP_PCRE_DEFAULT_EXTRA_COPTIONS != extra_coptions) {
755 pcre2_set_compile_extra_options(cctx, extra_coptions);
756 }
757
758 /* Compile pattern and display a warning if compilation failed. */
759 re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
760
761 /* Reset the compile context extra options to default. */
762 if (PHP_PCRE_DEFAULT_EXTRA_COPTIONS != extra_coptions) {
763 pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
764 }
765
766 if (re == NULL) {
767 #if HAVE_SETLOCALE
768 if (key != regex) {
769 zend_string_release_ex(key, 0);
770 }
771 #endif
772 pcre2_get_error_message(errnumber, error, sizeof(error));
773 php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
774 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
775 efree(pattern);
776 return NULL;
777 }
778
779 #ifdef HAVE_PCRE_JIT_SUPPORT
780 if (PCRE_G(jit)) {
781 /* Enable PCRE JIT compiler */
782 rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
783 if (EXPECTED(rc >= 0)) {
784 size_t jit_size = 0;
785 if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
786 poptions |= PREG_JIT;
787 }
788 } else if (rc == PCRE2_ERROR_NOMEMORY) {
789 php_error_docref(NULL, E_WARNING,
790 "Allocation of JIT memory failed, PCRE JIT will be disabled. "
791 "This is likely caused by security restrictions. "
792 "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
793 PCRE_G(jit) = 0;
794 } else {
795 pcre2_get_error_message(rc, error, sizeof(error));
796 php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
797 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
798 }
799 }
800 #endif
801 efree(pattern);
802
803 /*
804 * If we reached cache limit, clean out the items from the head of the list;
805 * these are supposedly the oldest ones (but not necessarily the least used
806 * ones).
807 */
808 if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
809 int num_clean = PCRE_CACHE_SIZE / 8;
810 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
811 }
812
813 /* Store the compiled pattern and extra info in the cache. */
814 new_entry.re = re;
815 new_entry.preg_options = poptions;
816 new_entry.compile_options = coptions;
817 new_entry.extra_compile_options = extra_coptions;
818 new_entry.refcount = 0;
819
820 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
821 if (rc < 0) {
822 #if HAVE_SETLOCALE
823 if (key != regex) {
824 zend_string_release_ex(key, 0);
825 }
826 #endif
827 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
828 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
829 return NULL;
830 }
831
832 rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
833 if (rc < 0) {
834 #if HAVE_SETLOCALE
835 if (key != regex) {
836 zend_string_release_ex(key, 0);
837 }
838 #endif
839 php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
840 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
841 return NULL;
842 }
843
844 /*
845 * Interned strings are not duplicated when stored in HashTable,
846 * but all the interned strings created during HTTP request are removed
847 * at end of request. However PCRE_G(pcre_cache) must be consistent
848 * on the next request as well. So we disable usage of interned strings
849 * as hash keys especually for this table.
850 * See bug #63180
851 */
852 if (!(GC_FLAGS(key) & IS_STR_PERMANENT)) {
853 zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
854
855 GC_MAKE_PERSISTENT_LOCAL(str);
856
857 #if HAVE_SETLOCALE
858 if (key != regex) {
859 zend_string_release_ex(key, 0);
860 }
861 #endif
862 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
863 zend_string_release(str);
864 } else {
865 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
866 }
867
868 return ret;
869 }
870 /* }}} */
871
872 /* {{{ pcre_get_compiled_regex_cache
873 */
pcre_get_compiled_regex_cache(zend_string * regex)874 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
875 {
876 return pcre_get_compiled_regex_cache_ex(regex, 1);
877 }
878 /* }}} */
879
880 /* {{{ pcre_get_compiled_regex
881 */
pcre_get_compiled_regex(zend_string * regex,uint32_t * capture_count,uint32_t * preg_options)882 PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options)
883 {
884 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
885
886 if (preg_options) {
887 *preg_options = 0;
888 }
889 if (capture_count) {
890 *capture_count = pce ? pce->capture_count : 0;
891 }
892
893 return pce ? pce->re : NULL;
894 }
895 /* }}} */
896
897 /* {{{ pcre_get_compiled_regex_ex
898 */
pcre_get_compiled_regex_ex(zend_string * regex,uint32_t * capture_count,uint32_t * preg_options,uint32_t * compile_options)899 PHPAPI pcre2_code* pcre_get_compiled_regex_ex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options, uint32_t *compile_options)
900 {
901 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
902
903 if (preg_options) {
904 *preg_options = 0;
905 }
906 if (compile_options) {
907 *compile_options = pce ? pce->compile_options : 0;
908 }
909 if (capture_count) {
910 *capture_count = pce ? pce->capture_count : 0;
911 }
912
913 return pce ? pce->re : NULL;
914 }
915 /* }}} */
916
917 /* XXX For the cases where it's only about match yes/no and no capture
918 required, perhaps just a minimum sized data would suffice. */
php_pcre_create_match_data(uint32_t capture_count,pcre2_code * re)919 PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
920 {/*{{{*/
921
922 assert(NULL != re);
923
924 if (EXPECTED(!mdata_used)) {
925 int rc = 0;
926
927 if (!capture_count) {
928 /* As we deal with a non cached pattern, no other way to gather this info. */
929 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
930 }
931
932 if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
933 mdata_used = 1;
934 return mdata;
935 }
936 }
937
938 return pcre2_match_data_create_from_pattern(re, gctx);
939 }/*}}}*/
940
php_pcre_free_match_data(pcre2_match_data * match_data)941 PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
942 {/*{{{*/
943 if (UNEXPECTED(match_data != mdata)) {
944 pcre2_match_data_free(match_data);
945 } else {
946 mdata_used = 0;
947 }
948 }/*}}}*/
949
950 /* {{{ add_offset_pair */
add_offset_pair(zval * result,char * str,size_t len,PCRE2_SIZE offset,char * name,uint32_t unmatched_as_null)951 static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SIZE offset, char *name, uint32_t unmatched_as_null)
952 {
953 zval match_pair, tmp;
954
955 array_init_size(&match_pair, 2);
956
957 /* Add (match, offset) to the return value */
958 if (PCRE2_UNSET == offset) {
959 if (unmatched_as_null) {
960 ZVAL_NULL(&tmp);
961 } else {
962 ZVAL_EMPTY_STRING(&tmp);
963 }
964 } else {
965 ZVAL_STRINGL(&tmp, str, len);
966 }
967 zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
968 ZVAL_LONG(&tmp, offset);
969 zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
970
971 if (name) {
972 Z_ADDREF(match_pair);
973 zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
974 }
975 zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
976 }
977 /* }}} */
978
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS,int global)979 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
980 {
981 /* parameters */
982 zend_string *regex; /* Regular expression */
983 zend_string *subject; /* String to match against */
984 pcre_cache_entry *pce; /* Compiled regular expression */
985 zval *subpats = NULL; /* Array for subpatterns */
986 zend_long flags = 0; /* Match control flags */
987 zend_long start_offset = 0; /* Where the new search starts */
988
989 ZEND_PARSE_PARAMETERS_START(2, 5)
990 Z_PARAM_STR(regex)
991 Z_PARAM_STR(subject)
992 Z_PARAM_OPTIONAL
993 Z_PARAM_ZVAL_DEREF(subpats)
994 Z_PARAM_LONG(flags)
995 Z_PARAM_LONG(start_offset)
996 ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
997
998 /* Compile regex or get it from cache. */
999 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1000 RETURN_FALSE;
1001 }
1002
1003 pce->refcount++;
1004 php_pcre_match_impl(pce, ZSTR_VAL(subject), ZSTR_LEN(subject), return_value, subpats,
1005 global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
1006 pce->refcount--;
1007 }
1008 /* }}} */
1009
1010 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry * pce,char * subject,size_t subject_len,zval * return_value,zval * subpats,int global,int use_flags,zend_long flags,zend_off_t start_offset)1011 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t subject_len, zval *return_value,
1012 zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset)
1013 {
1014 zval result_set, /* Holds a set of subpatterns after
1015 a global match */
1016 *match_sets = NULL; /* An array of sets of matches for each
1017 subpattern after a global match */
1018 uint32_t options; /* Execution options */
1019 int count; /* Count of matched subpatterns */
1020 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1021 uint32_t num_subpats; /* Number of captured subpatterns */
1022 int matched; /* Has anything matched */
1023 char **subpat_names; /* Array for named subpatterns */
1024 size_t i;
1025 uint32_t subpats_order; /* Order of subpattern matches */
1026 uint32_t offset_capture; /* Capture match offsets: yes/no */
1027 uint32_t unmatched_as_null; /* Null non-matches: yes/no */
1028 PCRE2_SPTR mark = NULL; /* Target for MARK name */
1029 zval marks; /* Array of marks for PREG_PATTERN_ORDER */
1030 pcre2_match_data *match_data;
1031 PCRE2_SIZE start_offset2;
1032
1033 ZVAL_UNDEF(&marks);
1034
1035 /* Overwrite the passed-in value for subpatterns with an empty array. */
1036 if (subpats != NULL) {
1037 zval_ptr_dtor(subpats);
1038 array_init(subpats);
1039 }
1040
1041 subpats_order = global ? PREG_PATTERN_ORDER : 0;
1042
1043 if (use_flags) {
1044 offset_capture = flags & PREG_OFFSET_CAPTURE;
1045 unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1046
1047 /*
1048 * subpats_order is pre-set to pattern mode so we change it only if
1049 * necessary.
1050 */
1051 if (flags & 0xff) {
1052 subpats_order = flags & 0xff;
1053 }
1054 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1055 (!global && subpats_order != 0)) {
1056 php_error_docref(NULL, E_WARNING, "Invalid flags specified");
1057 return;
1058 }
1059 } else {
1060 offset_capture = 0;
1061 unmatched_as_null = 0;
1062 }
1063
1064 /* Negative offset counts from the end of the string. */
1065 if (start_offset < 0) {
1066 if ((PCRE2_SIZE)-start_offset <= subject_len) {
1067 start_offset2 = subject_len + start_offset;
1068 } else {
1069 start_offset2 = 0;
1070 }
1071 } else {
1072 start_offset2 = (PCRE2_SIZE)start_offset;
1073 }
1074
1075 if (start_offset2 > subject_len) {
1076 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1077 RETURN_FALSE;
1078 }
1079
1080 /* Calculate the size of the offsets array, and allocate memory for it. */
1081 num_subpats = pce->capture_count + 1;
1082
1083 /*
1084 * Build a mapping from subpattern numbers to their names. We will
1085 * allocate the table only if there are any named subpatterns.
1086 */
1087 subpat_names = NULL;
1088 if (pce->name_count > 0) {
1089 subpat_names = make_subpats_table(num_subpats, pce);
1090 if (!subpat_names) {
1091 RETURN_FALSE;
1092 }
1093 }
1094
1095 /* Allocate match sets array and initialize the values. */
1096 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1097 match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
1098 for (i=0; i<num_subpats; i++) {
1099 array_init(&match_sets[i]);
1100 }
1101 }
1102
1103 matched = 0;
1104 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1105
1106 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1107 match_data = mdata;
1108 } else {
1109 match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1110 if (!match_data) {
1111 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1112 if (subpat_names) {
1113 efree(subpat_names);
1114 }
1115 if (match_sets) {
1116 efree(match_sets);
1117 }
1118 RETURN_FALSE;
1119 }
1120 }
1121
1122 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1123
1124 /* Execute the regular expression. */
1125 #ifdef HAVE_PCRE_JIT_SUPPORT
1126 if ((pce->preg_options & PREG_JIT) && options) {
1127 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1128 PCRE2_NO_UTF_CHECK, match_data, mctx);
1129 } else
1130 #endif
1131 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1132 options, match_data, mctx);
1133
1134 while (1) {
1135 /* If something has matched */
1136 if (count >= 0) {
1137 /* Check for too many substrings condition. */
1138 if (UNEXPECTED(count == 0)) {
1139 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1140 count = num_subpats;
1141 }
1142
1143 matched:
1144 matched++;
1145
1146 offsets = pcre2_get_ovector_pointer(match_data);
1147
1148 /* If subpatterns array has been passed, fill it in with values. */
1149 if (subpats != NULL) {
1150 /* Try to get the list of substrings and display a warning if failed. */
1151 if (offsets[1] < offsets[0]) {
1152 if (subpat_names) {
1153 efree(subpat_names);
1154 }
1155 if (match_sets) efree(match_sets);
1156 php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1157 RETURN_FALSE;
1158 }
1159
1160 if (global) { /* global pattern matching */
1161 if (subpats && subpats_order == PREG_PATTERN_ORDER) {
1162 /* For each subpattern, insert it into the appropriate array. */
1163 if (offset_capture) {
1164 for (i = 0; i < count; i++) {
1165 add_offset_pair(&match_sets[i], subject + offsets[i<<1],
1166 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
1167 }
1168 } else {
1169 for (i = 0; i < count; i++) {
1170 if (PCRE2_UNSET == offsets[i<<1]) {
1171 if (unmatched_as_null) {
1172 add_next_index_null(&match_sets[i]);
1173 } else {
1174 add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
1175 }
1176 } else {
1177 add_next_index_stringl(&match_sets[i], subject + offsets[i<<1],
1178 offsets[(i<<1)+1] - offsets[i<<1]);
1179 }
1180 }
1181 }
1182 mark = pcre2_get_mark(match_data);
1183 /* Add MARK, if available */
1184 if (mark) {
1185 if (Z_TYPE(marks) == IS_UNDEF) {
1186 array_init(&marks);
1187 }
1188 add_index_string(&marks, matched - 1, (char *) mark);
1189 }
1190 /*
1191 * If the number of captured subpatterns on this run is
1192 * less than the total possible number, pad the result
1193 * arrays with NULLs or empty strings.
1194 */
1195 if (count < num_subpats) {
1196 for (; i < num_subpats; i++) {
1197 if (unmatched_as_null) {
1198 add_next_index_null(&match_sets[i]);
1199 } else {
1200 add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
1201 }
1202 }
1203 }
1204 } else {
1205 /* Allocate the result set array */
1206 array_init_size(&result_set, count + (mark ? 1 : 0));
1207
1208 /* Add all the subpatterns to it */
1209 if (subpat_names) {
1210 if (offset_capture) {
1211 for (i = 0; i < count; i++) {
1212 add_offset_pair(&result_set, subject + offsets[i<<1],
1213 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i], unmatched_as_null);
1214 }
1215 } else {
1216 for (i = 0; i < count; i++) {
1217 if (subpat_names[i]) {
1218 if (PCRE2_UNSET == offsets[i<<1]) {
1219 if (unmatched_as_null) {
1220 add_assoc_null(&result_set, subpat_names[i]);
1221 } else {
1222 add_assoc_str(&result_set, subpat_names[i], ZSTR_EMPTY_ALLOC());
1223 }
1224 } else {
1225 add_assoc_stringl(&result_set, subpat_names[i], subject + offsets[i<<1],
1226 offsets[(i<<1)+1] - offsets[i<<1]);
1227 }
1228 }
1229 if (PCRE2_UNSET == offsets[i<<1]) {
1230 if (unmatched_as_null) {
1231 add_next_index_null(&result_set);
1232 } else {
1233 add_next_index_str(&result_set, ZSTR_EMPTY_ALLOC());
1234 }
1235 } else {
1236 add_next_index_stringl(&result_set, subject + offsets[i<<1],
1237 offsets[(i<<1)+1] - offsets[i<<1]);
1238 }
1239 }
1240 }
1241 } else {
1242 if (offset_capture) {
1243 for (i = 0; i < count; i++) {
1244 add_offset_pair(&result_set, subject + offsets[i<<1],
1245 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
1246 }
1247 } else {
1248 for (i = 0; i < count; i++) {
1249 if (PCRE2_UNSET == offsets[i<<1]) {
1250 if (unmatched_as_null) {
1251 add_next_index_null(&result_set);
1252 } else {
1253 add_next_index_str(&result_set, ZSTR_EMPTY_ALLOC());
1254 }
1255 } else {
1256 add_next_index_stringl(&result_set, subject + offsets[i<<1],
1257 offsets[(i<<1)+1] - offsets[i<<1]);
1258 }
1259 }
1260 }
1261 }
1262 /* Add MARK, if available */
1263 mark = pcre2_get_mark(match_data);
1264 if (mark) {
1265 add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
1266 }
1267 /* And add it to the output array */
1268 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
1269 }
1270 } else { /* single pattern matching */
1271 /* For each subpattern, insert it into the subpatterns array. */
1272 if (subpat_names) {
1273 if (offset_capture) {
1274 for (i = 0; i < count; i++) {
1275 add_offset_pair(subpats, subject + offsets[i<<1],
1276 offsets[(i<<1)+1] - offsets[i<<1],
1277 offsets[i<<1], subpat_names[i], unmatched_as_null);
1278 }
1279 } else {
1280 for (i = 0; i < count; i++) {
1281 if (subpat_names[i]) {
1282 if (PCRE2_UNSET == offsets[i<<1]) {
1283 if (unmatched_as_null) {
1284 add_assoc_null(subpats, subpat_names[i]);
1285 } else {
1286 add_assoc_str(subpats, subpat_names[i], ZSTR_EMPTY_ALLOC());
1287 }
1288 } else {
1289 add_assoc_stringl(subpats, subpat_names[i], subject + offsets[i<<1],
1290 offsets[(i<<1)+1] - offsets[i<<1]);
1291 }
1292 }
1293 if (PCRE2_UNSET == offsets[i<<1]) {
1294 if (unmatched_as_null) {
1295 add_next_index_null(subpats);
1296 } else {
1297 add_next_index_str(subpats, ZSTR_EMPTY_ALLOC());
1298 }
1299 } else {
1300 add_next_index_stringl(subpats, subject + offsets[i<<1],
1301 offsets[(i<<1)+1] - offsets[i<<1]);
1302 }
1303 }
1304 }
1305 } else {
1306 if (offset_capture) {
1307 for (i = 0; i < count; i++) {
1308 add_offset_pair(subpats, subject + offsets[i<<1],
1309 offsets[(i<<1)+1] - offsets[i<<1],
1310 offsets[i<<1], NULL, unmatched_as_null);
1311 }
1312 } else {
1313 for (i = 0; i < count; i++) {
1314 if (PCRE2_UNSET == offsets[i<<1]) {
1315 if (unmatched_as_null) {
1316 add_next_index_null(subpats);
1317 } else {
1318 add_next_index_str(subpats, ZSTR_EMPTY_ALLOC());
1319 }
1320 } else {
1321 add_next_index_stringl(subpats, subject + offsets[i<<1],
1322 offsets[(i<<1)+1] - offsets[i<<1]);
1323 }
1324 }
1325 }
1326 }
1327 /* Add MARK, if available */
1328 mark = pcre2_get_mark(match_data);
1329 if (mark) {
1330 add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
1331 }
1332 break;
1333 }
1334 }
1335
1336 /* Advance to the next piece. */
1337 start_offset2 = offsets[1];
1338
1339 /* If we have matched an empty string, mimic what Perl's /g options does.
1340 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1341 the match again at the same point. If this fails (picked up above) we
1342 advance to the next character. */
1343 if (start_offset2 == offsets[0]) {
1344 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1345 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1346 if (count >= 0) {
1347 if (global) {
1348 goto matched;
1349 } else {
1350 break;
1351 }
1352 } else if (count == PCRE2_ERROR_NOMATCH) {
1353 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1354 this is not necessarily the end. We need to advance
1355 the start offset, and continue. Fudge the offset values
1356 to achieve this, unless we're already at the end of the string. */
1357 if (start_offset2 < subject_len) {
1358 size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1359
1360 start_offset2 += unit_len;
1361 } else {
1362 break;
1363 }
1364 } else {
1365 goto error;
1366 }
1367 }
1368 } else if (count == PCRE2_ERROR_NOMATCH) {
1369 break;
1370 } else {
1371 error:
1372 pcre_handle_exec_error(count);
1373 break;
1374 }
1375
1376 if (!global) {
1377 break;
1378 }
1379
1380 /* Execute the regular expression. */
1381 #ifdef HAVE_PCRE_JIT_SUPPORT
1382 if ((pce->preg_options & PREG_JIT)) {
1383 if (PCRE2_UNSET == start_offset2 || start_offset2 > subject_len) {
1384 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1385 break;
1386 }
1387 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1388 PCRE2_NO_UTF_CHECK, match_data, mctx);
1389 } else
1390 #endif
1391 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1392 PCRE2_NO_UTF_CHECK, match_data, mctx);
1393 }
1394 if (match_data != mdata) {
1395 pcre2_match_data_free(match_data);
1396 }
1397
1398 /* Add the match sets to the output array and clean up */
1399 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1400 if (subpat_names) {
1401 for (i = 0; i < num_subpats; i++) {
1402 if (subpat_names[i]) {
1403 zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
1404 strlen(subpat_names[i]), &match_sets[i]);
1405 Z_ADDREF(match_sets[i]);
1406 }
1407 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1408 }
1409 } else {
1410 for (i = 0; i < num_subpats; i++) {
1411 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1412 }
1413 }
1414 efree(match_sets);
1415
1416 if (Z_TYPE(marks) != IS_UNDEF) {
1417 add_assoc_zval(subpats, "MARK", &marks);
1418 }
1419 }
1420
1421 if (subpat_names) {
1422 efree(subpat_names);
1423 }
1424
1425 /* Did we encounter an error? */
1426 if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1427 RETVAL_LONG(matched);
1428 } else {
1429 RETVAL_FALSE;
1430 }
1431 }
1432 /* }}} */
1433
1434 /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1435 Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)1436 static PHP_FUNCTION(preg_match)
1437 {
1438 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1439 }
1440 /* }}} */
1441
1442 /* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1443 Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)1444 static PHP_FUNCTION(preg_match_all)
1445 {
1446 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1447 }
1448 /* }}} */
1449
1450 /* {{{ preg_get_backref
1451 */
preg_get_backref(char ** str,int * backref)1452 static int preg_get_backref(char **str, int *backref)
1453 {
1454 register char in_brace = 0;
1455 register char *walk = *str;
1456
1457 if (walk[1] == 0)
1458 return 0;
1459
1460 if (*walk == '$' && walk[1] == '{') {
1461 in_brace = 1;
1462 walk++;
1463 }
1464 walk++;
1465
1466 if (*walk >= '0' && *walk <= '9') {
1467 *backref = *walk - '0';
1468 walk++;
1469 } else
1470 return 0;
1471
1472 if (*walk && *walk >= '0' && *walk <= '9') {
1473 *backref = *backref * 10 + *walk - '0';
1474 walk++;
1475 }
1476
1477 if (in_brace) {
1478 if (*walk != '}')
1479 return 0;
1480 else
1481 walk++;
1482 }
1483
1484 *str = walk;
1485 return 1;
1486 }
1487 /* }}} */
1488
1489 /* {{{ preg_do_repl_func
1490 */
preg_do_repl_func(zend_fcall_info * fci,zend_fcall_info_cache * fcc,char * subject,PCRE2_SIZE * offsets,char ** subpat_names,int count,const PCRE2_SPTR mark)1491 static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, PCRE2_SIZE *offsets, char **subpat_names, int count, const PCRE2_SPTR mark)
1492 {
1493 zend_string *result_str;
1494 zval retval; /* Function return value */
1495 zval arg; /* Argument to pass to function */
1496 int i;
1497
1498 array_init_size(&arg, count + (mark ? 1 : 0));
1499 if (subpat_names) {
1500 for (i = 0; i < count; i++) {
1501 if (subpat_names[i]) {
1502 add_assoc_stringl(&arg, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
1503 }
1504 add_next_index_stringl(&arg, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
1505 }
1506 } else {
1507 for (i = 0; i < count; i++) {
1508 add_next_index_stringl(&arg, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
1509 }
1510 }
1511 if (mark) {
1512 add_assoc_string(&arg, "MARK", (char *) mark);
1513 }
1514
1515 fci->retval = &retval;
1516 fci->param_count = 1;
1517 fci->params = &arg;
1518 fci->no_separation = 0;
1519
1520 if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1521 if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1522 result_str = Z_STR(retval);
1523 } else {
1524 result_str = zval_get_string_func(&retval);
1525 zval_ptr_dtor(&retval);
1526 }
1527 } else {
1528 if (!EG(exception)) {
1529 php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1530 }
1531
1532 result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1533 }
1534
1535 zval_ptr_dtor(&arg);
1536
1537 return result_str;
1538 }
1539 /* }}} */
1540
1541 /* {{{ php_pcre_replace
1542 */
php_pcre_replace(zend_string * regex,zend_string * subject_str,char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1543 PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1544 zend_string *subject_str,
1545 char *subject, size_t subject_len,
1546 zend_string *replace_str,
1547 size_t limit, size_t *replace_count)
1548 {
1549 pcre_cache_entry *pce; /* Compiled regular expression */
1550 zend_string *result; /* Function result */
1551
1552 /* Compile regex or get it from cache. */
1553 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1554 return NULL;
1555 }
1556 pce->refcount++;
1557 result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1558 limit, replace_count);
1559 pce->refcount--;
1560
1561 return result;
1562 }
1563 /* }}} */
1564
1565 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry * pce,zend_string * subject_str,char * subject,size_t subject_len,zend_string * replace_str,size_t limit,size_t * replace_count)1566 PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1567 {
1568 uint32_t options; /* Execution options */
1569 int count; /* Count of matched subpatterns */
1570 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1571 uint32_t num_subpats; /* Number of captured subpatterns */
1572 size_t new_len; /* Length of needed storage */
1573 size_t alloc_len; /* Actual allocated length */
1574 size_t match_len; /* Length of the current match */
1575 int backref; /* Backreference number */
1576 PCRE2_SIZE start_offset; /* Where the new search starts */
1577 size_t last_end_offset; /* Where the last search ended */
1578 char *walkbuf, /* Location of current replacement in the result */
1579 *walk, /* Used to walk the replacement string */
1580 *match, /* The current match */
1581 *piece, /* The current piece of subject */
1582 *replace_end, /* End of replacement string */
1583 walk_last; /* Last walked character */
1584 size_t result_len; /* Length of result */
1585 zend_string *result; /* Result of replacement */
1586 pcre2_match_data *match_data;
1587
1588 /* Calculate the size of the offsets array, and allocate memory for it. */
1589 num_subpats = pce->capture_count + 1;
1590 alloc_len = 0;
1591 result = NULL;
1592
1593 /* Initialize */
1594 match = NULL;
1595 start_offset = 0;
1596 last_end_offset = 0;
1597 result_len = 0;
1598 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1599
1600 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1601 match_data = mdata;
1602 } else {
1603 match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1604 if (!match_data) {
1605 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1606 return NULL;
1607 }
1608 }
1609
1610 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1611
1612 /* Execute the regular expression. */
1613 #ifdef HAVE_PCRE_JIT_SUPPORT
1614 if ((pce->preg_options & PREG_JIT) && options) {
1615 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1616 PCRE2_NO_UTF_CHECK, match_data, mctx);
1617 } else
1618 #endif
1619 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1620 options, match_data, mctx);
1621
1622 while (1) {
1623 piece = subject + last_end_offset;
1624
1625 if (count >= 0 && limit > 0) {
1626 zend_bool simple_string;
1627
1628 /* Check for too many substrings condition. */
1629 if (UNEXPECTED(count == 0)) {
1630 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1631 count = num_subpats;
1632 }
1633
1634 matched:
1635 offsets = pcre2_get_ovector_pointer(match_data);
1636
1637 if (UNEXPECTED(offsets[1] < offsets[0])) {
1638 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1639 if (result) {
1640 zend_string_release_ex(result, 0);
1641 result = NULL;
1642 }
1643 break;
1644 }
1645
1646 if (replace_count) {
1647 ++*replace_count;
1648 }
1649
1650 /* Set the match location in subject */
1651 match = subject + offsets[0];
1652
1653 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1654
1655 walk = ZSTR_VAL(replace_str);
1656 replace_end = walk + ZSTR_LEN(replace_str);
1657 walk_last = 0;
1658 simple_string = 1;
1659 while (walk < replace_end) {
1660 if ('\\' == *walk || '$' == *walk) {
1661 simple_string = 0;
1662 if (walk_last == '\\') {
1663 walk++;
1664 walk_last = 0;
1665 continue;
1666 }
1667 if (preg_get_backref(&walk, &backref)) {
1668 if (backref < count)
1669 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1670 continue;
1671 }
1672 }
1673 new_len++;
1674 walk++;
1675 walk_last = walk[-1];
1676 }
1677
1678 if (new_len >= alloc_len) {
1679 alloc_len = zend_safe_address_guarded(2, new_len, alloc_len);
1680 if (result == NULL) {
1681 result = zend_string_alloc(alloc_len, 0);
1682 } else {
1683 result = zend_string_extend(result, alloc_len, 0);
1684 }
1685 }
1686
1687 if (match-piece > 0) {
1688 /* copy the part of the string before the match */
1689 memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1690 result_len += (match-piece);
1691 }
1692
1693 if (simple_string) {
1694 /* copy replacement */
1695 memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1696 result_len += ZSTR_LEN(replace_str);
1697 } else {
1698 /* copy replacement and backrefs */
1699 walkbuf = ZSTR_VAL(result) + result_len;
1700
1701 walk = ZSTR_VAL(replace_str);
1702 walk_last = 0;
1703 while (walk < replace_end) {
1704 if ('\\' == *walk || '$' == *walk) {
1705 if (walk_last == '\\') {
1706 *(walkbuf-1) = *walk++;
1707 walk_last = 0;
1708 continue;
1709 }
1710 if (preg_get_backref(&walk, &backref)) {
1711 if (backref < count) {
1712 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1713 memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1714 walkbuf += match_len;
1715 }
1716 continue;
1717 }
1718 }
1719 *walkbuf++ = *walk++;
1720 walk_last = walk[-1];
1721 }
1722 *walkbuf = '\0';
1723 /* increment the result length by how much we've added to the string */
1724 result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1725 }
1726
1727 limit--;
1728
1729 /* Advance to the next piece. */
1730 start_offset = last_end_offset = offsets[1];
1731
1732 /* If we have matched an empty string, mimic what Perl's /g options does.
1733 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1734 the match again at the same point. If this fails (picked up above) we
1735 advance to the next character. */
1736 if (start_offset == offsets[0]) {
1737 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1738 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1739
1740 piece = subject + start_offset;
1741 if (count >= 0 && limit > 0) {
1742 goto matched;
1743 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1744 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1745 this is not necessarily the end. We need to advance
1746 the start offset, and continue. Fudge the offset values
1747 to achieve this, unless we're already at the end of the string. */
1748 if (start_offset < subject_len) {
1749 size_t unit_len = calculate_unit_length(pce, piece);
1750 start_offset += unit_len;
1751 } else {
1752 goto not_matched;
1753 }
1754 } else {
1755 goto error;
1756 }
1757 }
1758
1759 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1760 not_matched:
1761 if (!result && subject_str) {
1762 result = zend_string_copy(subject_str);
1763 break;
1764 }
1765 new_len = result_len + subject_len - last_end_offset;
1766 if (new_len >= alloc_len) {
1767 alloc_len = new_len; /* now we know exactly how long it is */
1768 if (NULL != result) {
1769 result = zend_string_realloc(result, alloc_len, 0);
1770 } else {
1771 result = zend_string_alloc(alloc_len, 0);
1772 }
1773 }
1774 /* stick that last bit of string on our output */
1775 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1776 result_len += subject_len - last_end_offset;
1777 ZSTR_VAL(result)[result_len] = '\0';
1778 ZSTR_LEN(result) = result_len;
1779 break;
1780 } else {
1781 error:
1782 pcre_handle_exec_error(count);
1783 if (result) {
1784 zend_string_release_ex(result, 0);
1785 result = NULL;
1786 }
1787 break;
1788 }
1789
1790 #ifdef HAVE_PCRE_JIT_SUPPORT
1791 if (pce->preg_options & PREG_JIT) {
1792 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1793 PCRE2_NO_UTF_CHECK, match_data, mctx);
1794 } else
1795 #endif
1796 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1797 PCRE2_NO_UTF_CHECK, match_data, mctx);
1798 }
1799 if (match_data != mdata) {
1800 pcre2_match_data_free(match_data);
1801 }
1802
1803 return result;
1804 }
1805 /* }}} */
1806
1807 /* {{{ php_pcre_replace_func_impl() */
php_pcre_replace_func_impl(pcre_cache_entry * pce,zend_string * subject_str,char * subject,size_t subject_len,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count)1808 static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count)
1809 {
1810 uint32_t options; /* Execution options */
1811 int count; /* Count of matched subpatterns */
1812 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1813 char **subpat_names; /* Array for named subpatterns */
1814 uint32_t num_subpats; /* Number of captured subpatterns */
1815 size_t new_len; /* Length of needed storage */
1816 size_t alloc_len; /* Actual allocated length */
1817 PCRE2_SIZE start_offset; /* Where the new search starts */
1818 size_t last_end_offset; /* Where the last search ended */
1819 char *match, /* The current match */
1820 *piece; /* The current piece of subject */
1821 size_t result_len; /* Length of result */
1822 zend_string *result; /* Result of replacement */
1823 zend_string *eval_result; /* Result of custom function */
1824 pcre2_match_data *match_data;
1825 zend_bool old_mdata_used;
1826
1827 /* Calculate the size of the offsets array, and allocate memory for it. */
1828 num_subpats = pce->capture_count + 1;
1829
1830 /*
1831 * Build a mapping from subpattern numbers to their names. We will
1832 * allocate the table only if there are any named subpatterns.
1833 */
1834 subpat_names = NULL;
1835 if (UNEXPECTED(pce->name_count > 0)) {
1836 subpat_names = make_subpats_table(num_subpats, pce);
1837 if (!subpat_names) {
1838 return NULL;
1839 }
1840 }
1841
1842 alloc_len = 0;
1843 result = NULL;
1844
1845 /* Initialize */
1846 match = NULL;
1847 start_offset = 0;
1848 last_end_offset = 0;
1849 result_len = 0;
1850 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1851
1852 old_mdata_used = mdata_used;
1853 if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1854 mdata_used = 1;
1855 match_data = mdata;
1856 } else {
1857 match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1858 if (!match_data) {
1859 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1860 if (subpat_names) {
1861 efree(subpat_names);
1862 }
1863 mdata_used = old_mdata_used;
1864 return NULL;
1865 }
1866 }
1867
1868 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1869
1870 /* Execute the regular expression. */
1871 #ifdef HAVE_PCRE_JIT_SUPPORT
1872 if ((pce->preg_options & PREG_JIT) && options) {
1873 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1874 PCRE2_NO_UTF_CHECK, match_data, mctx);
1875 } else
1876 #endif
1877 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1878 options, match_data, mctx);
1879
1880 while (1) {
1881 piece = subject + last_end_offset;
1882
1883 if (count >= 0 && limit) {
1884 /* Check for too many substrings condition. */
1885 if (UNEXPECTED(count == 0)) {
1886 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1887 count = num_subpats;
1888 }
1889
1890 matched:
1891 offsets = pcre2_get_ovector_pointer(match_data);
1892
1893 if (UNEXPECTED(offsets[1] < offsets[0])) {
1894 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1895 if (result) {
1896 zend_string_release_ex(result, 0);
1897 result = NULL;
1898 }
1899 break;
1900 }
1901
1902 if (replace_count) {
1903 ++*replace_count;
1904 }
1905
1906 /* Set the match location in subject */
1907 match = subject + offsets[0];
1908
1909 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1910
1911 /* Use custom function to get replacement string and its length. */
1912 eval_result = preg_do_repl_func(fci, fcc, subject, offsets, subpat_names, count,
1913 pcre2_get_mark(match_data));
1914
1915 ZEND_ASSERT(eval_result);
1916 new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result), new_len);
1917 if (new_len >= alloc_len) {
1918 alloc_len = zend_safe_address_guarded(2, new_len, alloc_len);
1919 if (result == NULL) {
1920 result = zend_string_alloc(alloc_len, 0);
1921 } else {
1922 result = zend_string_extend(result, alloc_len, 0);
1923 }
1924 }
1925
1926 if (match-piece > 0) {
1927 /* copy the part of the string before the match */
1928 memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1929 result_len += (match-piece);
1930 }
1931
1932 /* If using custom function, copy result to the buffer and clean up. */
1933 memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1934 result_len += ZSTR_LEN(eval_result);
1935 zend_string_release_ex(eval_result, 0);
1936
1937 limit--;
1938
1939 /* Advance to the next piece. */
1940 start_offset = last_end_offset = offsets[1];
1941
1942 /* If we have matched an empty string, mimic what Perl's /g options does.
1943 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1944 the match again at the same point. If this fails (picked up above) we
1945 advance to the next character. */
1946 if (start_offset == offsets[0]) {
1947 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1948 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1949
1950 piece = subject + start_offset;
1951 if (count >= 0 && limit) {
1952 goto matched;
1953 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1954 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1955 this is not necessarily the end. We need to advance
1956 the start offset, and continue. Fudge the offset values
1957 to achieve this, unless we're already at the end of the string. */
1958 if (start_offset < subject_len) {
1959 size_t unit_len = calculate_unit_length(pce, piece);
1960 start_offset += unit_len;
1961 } else {
1962 goto not_matched;
1963 }
1964 } else {
1965 goto error;
1966 }
1967 }
1968
1969 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1970 not_matched:
1971 if (!result && subject_str) {
1972 result = zend_string_copy(subject_str);
1973 break;
1974 }
1975 new_len = result_len + subject_len - last_end_offset;
1976 if (new_len >= alloc_len) {
1977 alloc_len = new_len; /* now we know exactly how long it is */
1978 if (NULL != result) {
1979 result = zend_string_realloc(result, alloc_len, 0);
1980 } else {
1981 result = zend_string_alloc(alloc_len, 0);
1982 }
1983 }
1984 /* stick that last bit of string on our output */
1985 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1986 result_len += subject_len - last_end_offset;
1987 ZSTR_VAL(result)[result_len] = '\0';
1988 ZSTR_LEN(result) = result_len;
1989 break;
1990 } else {
1991 error:
1992 pcre_handle_exec_error(count);
1993 if (result) {
1994 zend_string_release_ex(result, 0);
1995 result = NULL;
1996 }
1997 break;
1998 }
1999 #ifdef HAVE_PCRE_JIT_SUPPORT
2000 if ((pce->preg_options & PREG_JIT)) {
2001 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2002 PCRE2_NO_UTF_CHECK, match_data, mctx);
2003 } else
2004 #endif
2005 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2006 PCRE2_NO_UTF_CHECK, match_data, mctx);
2007 }
2008 if (match_data != mdata) {
2009 pcre2_match_data_free(match_data);
2010 }
2011 mdata_used = old_mdata_used;
2012
2013 if (UNEXPECTED(subpat_names)) {
2014 efree(subpat_names);
2015 }
2016
2017 return result;
2018 }
2019 /* }}} */
2020
2021 /* {{{ php_pcre_replace_func
2022 */
php_pcre_replace_func(zend_string * regex,zend_string * subject_str,zend_fcall_info * fci,zend_fcall_info_cache * fcc,size_t limit,size_t * replace_count)2023 static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2024 zend_string *subject_str,
2025 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2026 size_t limit, size_t *replace_count)
2027 {
2028 pcre_cache_entry *pce; /* Compiled regular expression */
2029 zend_string *result; /* Function result */
2030
2031 /* Compile regex or get it from cache. */
2032 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2033 return NULL;
2034 }
2035 pce->refcount++;
2036 result = php_pcre_replace_func_impl(pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
2037 limit, replace_count);
2038 pce->refcount--;
2039
2040 return result;
2041 }
2042 /* }}} */
2043
2044 /* {{{ php_pcre_replace_array
2045 */
php_pcre_replace_array(HashTable * regex,zval * replace,zend_string * subject_str,size_t limit,size_t * replace_count)2046 static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend_string *subject_str, size_t limit, size_t *replace_count)
2047 {
2048 zval *regex_entry;
2049 zend_string *result;
2050 zend_string *replace_str, *tmp_replace_str;
2051
2052 if (Z_TYPE_P(replace) == IS_ARRAY) {
2053 uint32_t replace_idx = 0;
2054 HashTable *replace_ht = Z_ARRVAL_P(replace);
2055
2056 /* For each entry in the regex array, get the entry */
2057 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2058 /* Make sure we're dealing with strings. */
2059 zend_string *tmp_regex_str;
2060 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2061 zval *zv;
2062
2063 /* Get current entry */
2064 while (1) {
2065 if (replace_idx == replace_ht->nNumUsed) {
2066 replace_str = ZSTR_EMPTY_ALLOC();
2067 tmp_replace_str = NULL;
2068 break;
2069 }
2070 zv = &replace_ht->arData[replace_idx].val;
2071 replace_idx++;
2072 if (Z_TYPE_P(zv) != IS_UNDEF) {
2073 replace_str = zval_get_tmp_string(zv, &tmp_replace_str);
2074 break;
2075 }
2076 }
2077
2078 /* Do the actual replacement and put the result back into subject_str
2079 for further replacements. */
2080 result = php_pcre_replace(regex_str,
2081 subject_str,
2082 ZSTR_VAL(subject_str),
2083 ZSTR_LEN(subject_str),
2084 replace_str,
2085 limit,
2086 replace_count);
2087 zend_tmp_string_release(tmp_replace_str);
2088 zend_tmp_string_release(tmp_regex_str);
2089 zend_string_release_ex(subject_str, 0);
2090 subject_str = result;
2091 if (UNEXPECTED(result == NULL)) {
2092 break;
2093 }
2094 } ZEND_HASH_FOREACH_END();
2095
2096 } else {
2097 replace_str = Z_STR_P(replace);
2098
2099 /* For each entry in the regex array, get the entry */
2100 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2101 /* Make sure we're dealing with strings. */
2102 zend_string *tmp_regex_str;
2103 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2104
2105 /* Do the actual replacement and put the result back into subject_str
2106 for further replacements. */
2107 result = php_pcre_replace(regex_str,
2108 subject_str,
2109 ZSTR_VAL(subject_str),
2110 ZSTR_LEN(subject_str),
2111 replace_str,
2112 limit,
2113 replace_count);
2114 zend_tmp_string_release(tmp_regex_str);
2115 zend_string_release_ex(subject_str, 0);
2116 subject_str = result;
2117
2118 if (UNEXPECTED(result == NULL)) {
2119 break;
2120 }
2121 } ZEND_HASH_FOREACH_END();
2122 }
2123
2124 return subject_str;
2125 }
2126 /* }}} */
2127
2128 /* {{{ php_replace_in_subject
2129 */
php_replace_in_subject(zval * regex,zval * replace,zval * subject,size_t limit,size_t * replace_count)2130 static zend_always_inline zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, size_t limit, size_t *replace_count)
2131 {
2132 zend_string *result;
2133 zend_string *subject_str = zval_get_string(subject);
2134
2135 if (Z_TYPE_P(regex) != IS_ARRAY) {
2136 result = php_pcre_replace(Z_STR_P(regex),
2137 subject_str,
2138 ZSTR_VAL(subject_str),
2139 ZSTR_LEN(subject_str),
2140 Z_STR_P(replace),
2141 limit,
2142 replace_count);
2143 zend_string_release_ex(subject_str, 0);
2144 } else {
2145 result = php_pcre_replace_array(Z_ARRVAL_P(regex),
2146 replace,
2147 subject_str,
2148 limit,
2149 replace_count);
2150 }
2151 return result;
2152 }
2153 /* }}} */
2154
2155 /* {{{ php_replace_in_subject_func
2156 */
php_replace_in_subject_func(zval * regex,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zval * subject,size_t limit,size_t * replace_count)2157 static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, size_t limit, size_t *replace_count)
2158 {
2159 zend_string *result;
2160 zend_string *subject_str = zval_get_string(subject);
2161
2162 if (Z_TYPE_P(regex) != IS_ARRAY) {
2163 result = php_pcre_replace_func(Z_STR_P(regex),
2164 subject_str,
2165 fci, fcc,
2166 limit,
2167 replace_count);
2168 zend_string_release_ex(subject_str, 0);
2169 return result;
2170 } else {
2171 zval *regex_entry;
2172
2173 /* If regex is an array */
2174
2175 /* For each entry in the regex array, get the entry */
2176 ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
2177 /* Make sure we're dealing with strings. */
2178 zend_string *tmp_regex_str;
2179 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2180
2181 /* Do the actual replacement and put the result back into subject_str
2182 for further replacements. */
2183 result = php_pcre_replace_func(regex_str,
2184 subject_str,
2185 fci, fcc,
2186 limit,
2187 replace_count);
2188 zend_tmp_string_release(tmp_regex_str);
2189 zend_string_release_ex(subject_str, 0);
2190 subject_str = result;
2191 if (UNEXPECTED(result == NULL)) {
2192 break;
2193 }
2194 } ZEND_HASH_FOREACH_END();
2195
2196 return subject_str;
2197 }
2198 }
2199 /* }}} */
2200
2201 /* {{{ preg_replace_func_impl
2202 */
preg_replace_func_impl(zval * return_value,zval * regex,zend_fcall_info * fci,zend_fcall_info_cache * fcc,zval * subject,zend_long limit_val)2203 static size_t preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, zend_long limit_val)
2204 {
2205 zend_string *result;
2206 size_t replace_count = 0;
2207
2208 if (Z_TYPE_P(regex) != IS_ARRAY) {
2209 convert_to_string_ex(regex);
2210 }
2211
2212 if (Z_TYPE_P(subject) != IS_ARRAY) {
2213 result = php_replace_in_subject_func(regex, fci, fcc, subject, limit_val, &replace_count);
2214 if (result != NULL) {
2215 RETVAL_STR(result);
2216 } else {
2217 RETVAL_NULL();
2218 }
2219 } else {
2220 /* if subject is an array */
2221 zval *subject_entry, zv;
2222 zend_string *string_key;
2223 zend_ulong num_key;
2224
2225 array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
2226
2227 /* For each subject entry, convert it to string, then perform replacement
2228 and add the result to the return_value array. */
2229 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
2230 result = php_replace_in_subject_func(regex, fci, fcc, subject_entry, limit_val, &replace_count);
2231 if (result != NULL) {
2232 /* Add to return array */
2233 ZVAL_STR(&zv, result);
2234 if (string_key) {
2235 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2236 } else {
2237 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2238 }
2239 }
2240 } ZEND_HASH_FOREACH_END();
2241 }
2242
2243 return replace_count;
2244 }
2245 /* }}} */
2246
2247 /* {{{ preg_replace_common
2248 */
preg_replace_common(INTERNAL_FUNCTION_PARAMETERS,int is_filter)2249 static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, int is_filter)
2250 {
2251 zval *regex, *replace, *subject, *zcount = NULL;
2252 zend_long limit = -1;
2253 size_t replace_count = 0;
2254 zend_string *result;
2255 size_t old_replace_count;
2256
2257 /* Get function parameters and do error-checking. */
2258 ZEND_PARSE_PARAMETERS_START(3, 5)
2259 Z_PARAM_ZVAL(regex)
2260 Z_PARAM_ZVAL(replace)
2261 Z_PARAM_ZVAL(subject)
2262 Z_PARAM_OPTIONAL
2263 Z_PARAM_LONG(limit)
2264 Z_PARAM_ZVAL_DEREF(zcount)
2265 ZEND_PARSE_PARAMETERS_END();
2266
2267 if (Z_TYPE_P(replace) != IS_ARRAY) {
2268 convert_to_string_ex(replace);
2269 if (Z_TYPE_P(regex) != IS_ARRAY) {
2270 convert_to_string_ex(regex);
2271 }
2272 } else {
2273 if (Z_TYPE_P(regex) != IS_ARRAY) {
2274 php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
2275 RETURN_FALSE;
2276 }
2277 }
2278
2279 if (Z_TYPE_P(subject) != IS_ARRAY) {
2280 old_replace_count = replace_count;
2281 result = php_replace_in_subject(regex,
2282 replace,
2283 subject,
2284 limit,
2285 &replace_count);
2286 if (result != NULL) {
2287 if (!is_filter || replace_count > old_replace_count) {
2288 RETVAL_STR(result);
2289 } else {
2290 zend_string_release_ex(result, 0);
2291 RETVAL_NULL();
2292 }
2293 } else {
2294 RETVAL_NULL();
2295 }
2296 } else {
2297 /* if subject is an array */
2298 zval *subject_entry, zv;
2299 zend_string *string_key;
2300 zend_ulong num_key;
2301
2302 array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
2303
2304 /* For each subject entry, convert it to string, then perform replacement
2305 and add the result to the return_value array. */
2306 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
2307 old_replace_count = replace_count;
2308 result = php_replace_in_subject(regex,
2309 replace,
2310 subject_entry,
2311 limit,
2312 &replace_count);
2313 if (result != NULL) {
2314 if (!is_filter || replace_count > old_replace_count) {
2315 /* Add to return array */
2316 ZVAL_STR(&zv, result);
2317 if (string_key) {
2318 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2319 } else {
2320 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2321 }
2322 } else {
2323 zend_string_release_ex(result, 0);
2324 }
2325 }
2326 } ZEND_HASH_FOREACH_END();
2327 }
2328
2329 if (zcount) {
2330 zval_ptr_dtor(zcount);
2331 ZVAL_LONG(zcount, replace_count);
2332 }
2333 }
2334 /* }}} */
2335
2336 /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
2337 Perform Perl-style regular expression replacement. */
PHP_FUNCTION(preg_replace)2338 static PHP_FUNCTION(preg_replace)
2339 {
2340 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
2341 }
2342 /* }}} */
2343
2344 /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
2345 Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback)2346 static PHP_FUNCTION(preg_replace_callback)
2347 {
2348 zval *regex, *replace, *subject, *zcount = NULL;
2349 zend_long limit = -1;
2350 size_t replace_count;
2351 zend_fcall_info fci;
2352 zend_fcall_info_cache fcc;
2353
2354 /* Get function parameters and do error-checking. */
2355 ZEND_PARSE_PARAMETERS_START(3, 5)
2356 Z_PARAM_ZVAL(regex)
2357 Z_PARAM_ZVAL(replace)
2358 Z_PARAM_ZVAL(subject)
2359 Z_PARAM_OPTIONAL
2360 Z_PARAM_LONG(limit)
2361 Z_PARAM_ZVAL_DEREF(zcount)
2362 ZEND_PARSE_PARAMETERS_END();
2363
2364 if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2365 zend_string *callback_name = zend_get_callable_name(replace);
2366 php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", ZSTR_VAL(callback_name));
2367 zend_string_release_ex(callback_name, 0);
2368 ZVAL_STR(return_value, zval_get_string(subject));
2369 return;
2370 }
2371
2372 fci.size = sizeof(fci);
2373 fci.object = NULL;
2374 ZVAL_COPY_VALUE(&fci.function_name, replace);
2375
2376 replace_count = preg_replace_func_impl(return_value, regex, &fci, &fcc, subject, limit);
2377 if (zcount) {
2378 zval_ptr_dtor(zcount);
2379 ZVAL_LONG(zcount, replace_count);
2380 }
2381 }
2382 /* }}} */
2383
2384 /* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]])
2385 Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTION(preg_replace_callback_array)2386 static PHP_FUNCTION(preg_replace_callback_array)
2387 {
2388 zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
2389 zend_long limit = -1;
2390 zend_string *str_idx;
2391 size_t replace_count = 0;
2392 zend_fcall_info fci;
2393 zend_fcall_info_cache fcc;
2394
2395 /* Get function parameters and do error-checking. */
2396 ZEND_PARSE_PARAMETERS_START(2, 4)
2397 Z_PARAM_ARRAY(pattern)
2398 Z_PARAM_ZVAL(subject)
2399 Z_PARAM_OPTIONAL
2400 Z_PARAM_LONG(limit)
2401 Z_PARAM_ZVAL_DEREF(zcount)
2402 ZEND_PARSE_PARAMETERS_END();
2403
2404 fci.size = sizeof(fci);
2405 fci.object = NULL;
2406
2407 ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(pattern), str_idx, replace) {
2408 if (str_idx) {
2409 ZVAL_STR_COPY(®ex, str_idx);
2410 } else {
2411 php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
2412 zval_ptr_dtor(return_value);
2413 RETURN_NULL();
2414 }
2415
2416 if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2417 zend_string *callback_name = zend_get_callable_name(replace);
2418 php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", ZSTR_VAL(callback_name));
2419 zend_string_release_ex(callback_name, 0);
2420 zval_ptr_dtor(®ex);
2421 zval_ptr_dtor(return_value);
2422 ZVAL_COPY(return_value, subject);
2423 return;
2424 }
2425
2426 ZVAL_COPY_VALUE(&fci.function_name, replace);
2427
2428 replace_count += preg_replace_func_impl(&zv, ®ex, &fci, &fcc, subject, limit);
2429 if (subject != return_value) {
2430 subject = return_value;
2431 } else {
2432 zval_ptr_dtor(return_value);
2433 }
2434
2435 zval_ptr_dtor(®ex);
2436
2437 ZVAL_COPY_VALUE(return_value, &zv);
2438
2439 if (UNEXPECTED(EG(exception))) {
2440 zval_ptr_dtor(return_value);
2441 RETURN_NULL();
2442 }
2443 } ZEND_HASH_FOREACH_END();
2444
2445 if (zcount) {
2446 zval_ptr_dtor(zcount);
2447 ZVAL_LONG(zcount, replace_count);
2448 }
2449 }
2450 /* }}} */
2451
2452 /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
2453 Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTION(preg_filter)2454 static PHP_FUNCTION(preg_filter)
2455 {
2456 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
2457 }
2458 /* }}} */
2459
2460 /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
2461 Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)2462 static PHP_FUNCTION(preg_split)
2463 {
2464 zend_string *regex; /* Regular expression */
2465 zend_string *subject; /* String to match against */
2466 zend_long limit_val = -1;/* Integer value of limit */
2467 zend_long flags = 0; /* Match control flags */
2468 pcre_cache_entry *pce; /* Compiled regular expression */
2469
2470 /* Get function parameters and do error checking */
2471 ZEND_PARSE_PARAMETERS_START(2, 4)
2472 Z_PARAM_STR(regex)
2473 Z_PARAM_STR(subject)
2474 Z_PARAM_OPTIONAL
2475 Z_PARAM_LONG(limit_val)
2476 Z_PARAM_LONG(flags)
2477 ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
2478
2479 /* Compile regex or get it from cache. */
2480 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2481 RETURN_FALSE;
2482 }
2483
2484 pce->refcount++;
2485 php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2486 pce->refcount--;
2487 }
2488 /* }}} */
2489
2490 /* {{{ php_pcre_split
2491 */
php_pcre_split_impl(pcre_cache_entry * pce,zend_string * subject_str,zval * return_value,zend_long limit_val,zend_long flags)2492 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2493 zend_long limit_val, zend_long flags)
2494 {
2495 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
2496 uint32_t options; /* Execution options */
2497 int count; /* Count of matched subpatterns */
2498 PCRE2_SIZE start_offset; /* Where the new search starts */
2499 PCRE2_SIZE next_offset; /* End of the last delimiter match + 1 */
2500 char *last_match; /* Location of last match */
2501 uint32_t no_empty; /* If NO_EMPTY flag is set */
2502 uint32_t delim_capture; /* If delimiters should be captured */
2503 uint32_t offset_capture; /* If offsets should be captured */
2504 uint32_t num_subpats; /* Number of captured subpatterns */
2505 zval tmp;
2506 pcre2_match_data *match_data;
2507
2508 no_empty = flags & PREG_SPLIT_NO_EMPTY;
2509 delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2510 offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2511
2512 /* Initialize return value */
2513 array_init(return_value);
2514
2515 /* Calculate the size of the offsets array, and allocate memory for it. */
2516 num_subpats = pce->capture_count + 1;
2517
2518 /* Start at the beginning of the string */
2519 start_offset = 0;
2520 next_offset = 0;
2521 last_match = ZSTR_VAL(subject_str);
2522 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2523
2524
2525 if (limit_val == -1) {
2526 /* pass */
2527 } else if (limit_val == 0) {
2528 limit_val = -1;
2529 } else if (limit_val <= 1) {
2530 goto last;
2531 }
2532
2533 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2534 match_data = mdata;
2535 } else {
2536 match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
2537 if (!match_data) {
2538 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2539 zval_ptr_dtor(return_value);
2540 RETURN_FALSE;
2541 }
2542 }
2543
2544 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2545
2546 #ifdef HAVE_PCRE_JIT_SUPPORT
2547 if ((pce->preg_options & PREG_JIT) && options) {
2548 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2549 PCRE2_NO_UTF_CHECK, match_data, mctx);
2550 } else
2551 #endif
2552 count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2553 options, match_data, mctx);
2554
2555 while (1) {
2556 /* If something matched */
2557 if (count >= 0) {
2558 /* Check for too many substrings condition. */
2559 if (UNEXPECTED(count == 0)) {
2560 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2561 count = num_subpats;
2562 }
2563
2564 matched:
2565 offsets = pcre2_get_ovector_pointer(match_data);
2566
2567 if (UNEXPECTED(offsets[1] < offsets[0])) {
2568 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2569 break;
2570 }
2571
2572 if (!no_empty || &ZSTR_VAL(subject_str)[offsets[0]] != last_match) {
2573
2574 if (offset_capture) {
2575 /* Add (match, offset) pair to the return value */
2576 add_offset_pair(return_value, last_match, (&ZSTR_VAL(subject_str)[offsets[0]]-last_match), next_offset, NULL, 0);
2577 } else {
2578 /* Add the piece to the return value */
2579 ZVAL_STRINGL(&tmp, last_match, &ZSTR_VAL(subject_str)[offsets[0]]-last_match);
2580 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2581 }
2582
2583 /* One less left to do */
2584 if (limit_val != -1)
2585 limit_val--;
2586 }
2587
2588 last_match = &ZSTR_VAL(subject_str)[offsets[1]];
2589 next_offset = offsets[1];
2590
2591 if (delim_capture) {
2592 size_t i, match_len;
2593 for (i = 1; i < count; i++) {
2594 match_len = offsets[(i<<1)+1] - offsets[i<<1];
2595 /* If we have matched a delimiter */
2596 if (!no_empty || match_len > 0) {
2597 if (offset_capture) {
2598 add_offset_pair(return_value, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len, offsets[i<<1], NULL, 0);
2599 } else {
2600 ZVAL_STRINGL(&tmp, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len);
2601 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2602 }
2603 }
2604 }
2605 }
2606
2607 /* Advance to the position right after the last full match */
2608 start_offset = offsets[1];
2609
2610 /* If we have matched an empty string, mimic what Perl's /g options does.
2611 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2612 the match again at the same point. If this fails (picked up above) we
2613 advance to the next character. */
2614 if (start_offset == offsets[0]) {
2615 count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2616 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2617 if (count >= 0) {
2618 goto matched;
2619 } else if (count == PCRE2_ERROR_NOMATCH) {
2620 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2621 this is not necessarily the end. We need to advance
2622 the start offset, and continue. Fudge the offset values
2623 to achieve this, unless we're already at the end of the string. */
2624 if (start_offset < ZSTR_LEN(subject_str)) {
2625 start_offset += calculate_unit_length(pce, ZSTR_VAL(subject_str) + start_offset);
2626 } else {
2627 break;
2628 }
2629 } else {
2630 goto error;
2631 }
2632 }
2633
2634 } else if (count == PCRE2_ERROR_NOMATCH) {
2635 break;
2636 } else {
2637 error:
2638 pcre_handle_exec_error(count);
2639 break;
2640 }
2641
2642 /* Get next piece if no limit or limit not yet reached and something matched*/
2643 if (limit_val != -1 && limit_val <= 1) {
2644 break;
2645 }
2646
2647 #ifdef HAVE_PCRE_JIT_SUPPORT
2648 if (pce->preg_options & PREG_JIT) {
2649 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2650 PCRE2_NO_UTF_CHECK, match_data, mctx);
2651 } else
2652 #endif
2653 count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2654 PCRE2_NO_UTF_CHECK, match_data, mctx);
2655 }
2656 if (match_data != mdata) {
2657 pcre2_match_data_free(match_data);
2658 }
2659
2660 if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2661 zval_ptr_dtor(return_value);
2662 RETURN_FALSE;
2663 }
2664
2665 last:
2666 start_offset = (last_match - ZSTR_VAL(subject_str)); /* the offset might have been incremented, but without further successful matches */
2667
2668 if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2669 if (offset_capture) {
2670 /* Add the last (match, offset) pair to the return value */
2671 add_offset_pair(return_value, &ZSTR_VAL(subject_str)[start_offset], ZSTR_LEN(subject_str) - start_offset, start_offset, NULL, 0);
2672 } else {
2673 /* Add the last piece to the return value */
2674 if (last_match == ZSTR_VAL(subject_str)) {
2675 ZVAL_STR_COPY(&tmp, subject_str);
2676 } else {
2677 ZVAL_STRINGL(&tmp, last_match, ZSTR_VAL(subject_str) + ZSTR_LEN(subject_str) - last_match);
2678 }
2679 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2680 }
2681 }
2682 }
2683 /* }}} */
2684
2685 /* {{{ proto string preg_quote(string str [, string delim_char])
2686 Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)2687 static PHP_FUNCTION(preg_quote)
2688 {
2689 zend_string *str; /* Input string argument */
2690 zend_string *delim = NULL; /* Additional delimiter argument */
2691 char *in_str; /* Input string */
2692 char *in_str_end; /* End of the input string */
2693 zend_string *out_str; /* Output string with quoted characters */
2694 size_t extra_len; /* Number of additional characters */
2695 char *p, /* Iterator for input string */
2696 *q, /* Iterator for output string */
2697 delim_char = '\0', /* Delimiter character to be quoted */
2698 c; /* Current character */
2699
2700 /* Get the arguments and check for errors */
2701 ZEND_PARSE_PARAMETERS_START(1, 2)
2702 Z_PARAM_STR(str)
2703 Z_PARAM_OPTIONAL
2704 Z_PARAM_STR_EX(delim, 1, 0)
2705 ZEND_PARSE_PARAMETERS_END();
2706
2707 /* Nothing to do if we got an empty string */
2708 if (ZSTR_LEN(str) == 0) {
2709 RETURN_EMPTY_STRING();
2710 }
2711
2712 in_str = ZSTR_VAL(str);
2713 in_str_end = in_str + ZSTR_LEN(str);
2714
2715 if (delim) {
2716 delim_char = ZSTR_VAL(delim)[0];
2717 }
2718
2719 /* Go through the string and quote necessary characters */
2720 extra_len = 0;
2721 p = in_str;
2722 do {
2723 c = *p;
2724 switch(c) {
2725 case '.':
2726 case '\\':
2727 case '+':
2728 case '*':
2729 case '?':
2730 case '[':
2731 case '^':
2732 case ']':
2733 case '$':
2734 case '(':
2735 case ')':
2736 case '{':
2737 case '}':
2738 case '=':
2739 case '!':
2740 case '>':
2741 case '<':
2742 case '|':
2743 case ':':
2744 case '-':
2745 case '#':
2746 extra_len++;
2747 break;
2748
2749 case '\0':
2750 extra_len+=3;
2751 break;
2752
2753 default:
2754 if (c == delim_char) {
2755 extra_len++;
2756 }
2757 break;
2758 }
2759 p++;
2760 } while (p != in_str_end);
2761
2762 if (extra_len == 0) {
2763 RETURN_STR_COPY(str);
2764 }
2765
2766 /* Allocate enough memory so that even if each character
2767 is quoted, we won't run out of room */
2768 out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2769 q = ZSTR_VAL(out_str);
2770 p = in_str;
2771
2772 do {
2773 c = *p;
2774 switch(c) {
2775 case '.':
2776 case '\\':
2777 case '+':
2778 case '*':
2779 case '?':
2780 case '[':
2781 case '^':
2782 case ']':
2783 case '$':
2784 case '(':
2785 case ')':
2786 case '{':
2787 case '}':
2788 case '=':
2789 case '!':
2790 case '>':
2791 case '<':
2792 case '|':
2793 case ':':
2794 case '-':
2795 case '#':
2796 *q++ = '\\';
2797 *q++ = c;
2798 break;
2799
2800 case '\0':
2801 *q++ = '\\';
2802 *q++ = '0';
2803 *q++ = '0';
2804 *q++ = '0';
2805 break;
2806
2807 default:
2808 if (c == delim_char) {
2809 *q++ = '\\';
2810 }
2811 *q++ = c;
2812 break;
2813 }
2814 p++;
2815 } while (p != in_str_end);
2816 *q = '\0';
2817
2818 RETURN_NEW_STR(out_str);
2819 }
2820 /* }}} */
2821
2822 /* {{{ proto array preg_grep(string regex, array input [, int flags])
2823 Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)2824 static PHP_FUNCTION(preg_grep)
2825 {
2826 zend_string *regex; /* Regular expression */
2827 zval *input; /* Input array */
2828 zend_long flags = 0; /* Match control flags */
2829 pcre_cache_entry *pce; /* Compiled regular expression */
2830
2831 /* Get arguments and do error checking */
2832 ZEND_PARSE_PARAMETERS_START(2, 3)
2833 Z_PARAM_STR(regex)
2834 Z_PARAM_ARRAY(input)
2835 Z_PARAM_OPTIONAL
2836 Z_PARAM_LONG(flags)
2837 ZEND_PARSE_PARAMETERS_END();
2838
2839 /* Compile regex or get it from cache. */
2840 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2841 RETURN_FALSE;
2842 }
2843
2844 pce->refcount++;
2845 php_pcre_grep_impl(pce, input, return_value, flags);
2846 pce->refcount--;
2847 }
2848 /* }}} */
2849
php_pcre_grep_impl(pcre_cache_entry * pce,zval * input,zval * return_value,zend_long flags)2850 PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2851 {
2852 zval *entry; /* An entry in the input array */
2853 uint32_t num_subpats; /* Number of captured subpatterns */
2854 int count; /* Count of matched subpatterns */
2855 uint32_t options; /* Execution options */
2856 zend_string *string_key;
2857 zend_ulong num_key;
2858 zend_bool invert; /* Whether to return non-matching
2859 entries */
2860 pcre2_match_data *match_data;
2861 invert = flags & PREG_GREP_INVERT ? 1 : 0;
2862
2863 /* Calculate the size of the offsets array, and allocate memory for it. */
2864 num_subpats = pce->capture_count + 1;
2865
2866 /* Initialize return array */
2867 array_init(return_value);
2868
2869 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2870
2871 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2872 match_data = mdata;
2873 } else {
2874 match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
2875 if (!match_data) {
2876 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2877 return;
2878 }
2879 }
2880
2881 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2882
2883 /* Go through the input array */
2884 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2885 zend_string *tmp_subject_str;
2886 zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2887
2888 /* Perform the match */
2889 #ifdef HAVE_PCRE_JIT_SUPPORT
2890 if ((pce->preg_options & PREG_JIT) && options) {
2891 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2892 PCRE2_NO_UTF_CHECK, match_data, mctx);
2893 } else
2894 #endif
2895 count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2896 options, match_data, mctx);
2897
2898 /* If the entry fits our requirements */
2899 if (count >= 0) {
2900 /* Check for too many substrings condition. */
2901 if (UNEXPECTED(count == 0)) {
2902 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2903 }
2904 if (!invert) {
2905 Z_TRY_ADDREF_P(entry);
2906
2907 /* Add to return array */
2908 if (string_key) {
2909 zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2910 } else {
2911 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2912 }
2913 }
2914 } else if (count == PCRE2_ERROR_NOMATCH) {
2915 if (invert) {
2916 Z_TRY_ADDREF_P(entry);
2917
2918 /* Add to return array */
2919 if (string_key) {
2920 zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2921 } else {
2922 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2923 }
2924 }
2925 } else {
2926 pcre_handle_exec_error(count);
2927 zend_tmp_string_release(tmp_subject_str);
2928 break;
2929 }
2930
2931 zend_tmp_string_release(tmp_subject_str);
2932 } ZEND_HASH_FOREACH_END();
2933 if (match_data != mdata) {
2934 pcre2_match_data_free(match_data);
2935 }
2936 }
2937 /* }}} */
2938
2939 /* {{{ proto int preg_last_error()
2940 Returns the error code of the last regexp execution. */
PHP_FUNCTION(preg_last_error)2941 static PHP_FUNCTION(preg_last_error)
2942 {
2943 ZEND_PARSE_PARAMETERS_START(0, 0)
2944 ZEND_PARSE_PARAMETERS_END();
2945
2946 RETURN_LONG(PCRE_G(error_code));
2947 }
2948 /* }}} */
2949
2950 /* {{{ module definition structures */
2951
2952 /* {{{ arginfo */
2953 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
2954 ZEND_ARG_INFO(0, pattern)
2955 ZEND_ARG_INFO(0, subject)
2956 ZEND_ARG_INFO(1, subpatterns) /* array */
2957 ZEND_ARG_INFO(0, flags)
2958 ZEND_ARG_INFO(0, offset)
2959 ZEND_END_ARG_INFO()
2960
2961 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
2962 ZEND_ARG_INFO(0, pattern)
2963 ZEND_ARG_INFO(0, subject)
2964 ZEND_ARG_INFO(1, subpatterns) /* array */
2965 ZEND_ARG_INFO(0, flags)
2966 ZEND_ARG_INFO(0, offset)
2967 ZEND_END_ARG_INFO()
2968
2969 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
2970 ZEND_ARG_INFO(0, regex)
2971 ZEND_ARG_INFO(0, replace)
2972 ZEND_ARG_INFO(0, subject)
2973 ZEND_ARG_INFO(0, limit)
2974 ZEND_ARG_INFO(1, count)
2975 ZEND_END_ARG_INFO()
2976
2977 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
2978 ZEND_ARG_INFO(0, regex)
2979 ZEND_ARG_INFO(0, callback)
2980 ZEND_ARG_INFO(0, subject)
2981 ZEND_ARG_INFO(0, limit)
2982 ZEND_ARG_INFO(1, count)
2983 ZEND_END_ARG_INFO()
2984
2985 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
2986 ZEND_ARG_INFO(0, pattern)
2987 ZEND_ARG_INFO(0, subject)
2988 ZEND_ARG_INFO(0, limit)
2989 ZEND_ARG_INFO(1, count)
2990 ZEND_END_ARG_INFO()
2991
2992 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
2993 ZEND_ARG_INFO(0, pattern)
2994 ZEND_ARG_INFO(0, subject)
2995 ZEND_ARG_INFO(0, limit)
2996 ZEND_ARG_INFO(0, flags)
2997 ZEND_END_ARG_INFO()
2998
2999 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
3000 ZEND_ARG_INFO(0, str)
3001 ZEND_ARG_INFO(0, delim_char)
3002 ZEND_END_ARG_INFO()
3003
3004 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
3005 ZEND_ARG_INFO(0, regex)
3006 ZEND_ARG_INFO(0, input) /* array */
3007 ZEND_ARG_INFO(0, flags)
3008 ZEND_END_ARG_INFO()
3009
3010 ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
3011 ZEND_END_ARG_INFO()
3012 /* }}} */
3013
3014 static const zend_function_entry pcre_functions[] = {
3015 PHP_FE(preg_match, arginfo_preg_match)
3016 PHP_FE(preg_match_all, arginfo_preg_match_all)
3017 PHP_FE(preg_replace, arginfo_preg_replace)
3018 PHP_FE(preg_replace_callback, arginfo_preg_replace_callback)
3019 PHP_FE(preg_replace_callback_array, arginfo_preg_replace_callback_array)
3020 PHP_FE(preg_filter, arginfo_preg_replace)
3021 PHP_FE(preg_split, arginfo_preg_split)
3022 PHP_FE(preg_quote, arginfo_preg_quote)
3023 PHP_FE(preg_grep, arginfo_preg_grep)
3024 PHP_FE(preg_last_error, arginfo_preg_last_error)
3025 PHP_FE_END
3026 };
3027
3028 zend_module_entry pcre_module_entry = {
3029 STANDARD_MODULE_HEADER,
3030 "pcre",
3031 pcre_functions,
3032 PHP_MINIT(pcre),
3033 PHP_MSHUTDOWN(pcre),
3034 #ifdef HAVE_PCRE_JIT_SUPPORT
3035 PHP_RINIT(pcre),
3036 #else
3037 NULL,
3038 #endif
3039 NULL,
3040 PHP_MINFO(pcre),
3041 PHP_PCRE_VERSION,
3042 PHP_MODULE_GLOBALS(pcre),
3043 PHP_GINIT(pcre),
3044 PHP_GSHUTDOWN(pcre),
3045 NULL,
3046 STANDARD_MODULE_PROPERTIES_EX
3047 };
3048
3049 #ifdef COMPILE_DL_PCRE
ZEND_GET_MODULE(pcre)3050 ZEND_GET_MODULE(pcre)
3051 #endif
3052
3053 /* }}} */
3054
3055 PHPAPI pcre2_match_context *php_pcre_mctx(void)
3056 {/*{{{*/
3057 return mctx;
3058 }/*}}}*/
3059
php_pcre_gctx(void)3060 PHPAPI pcre2_general_context *php_pcre_gctx(void)
3061 {/*{{{*/
3062 return gctx;
3063 }/*}}}*/
3064
php_pcre_cctx(void)3065 PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3066 {/*{{{*/
3067 return cctx;
3068 }/*}}}*/
3069
php_pcre_pce_incref(pcre_cache_entry * pce)3070 PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3071 {/*{{{*/
3072 assert(NULL != pce);
3073 pce->refcount++;
3074 }/*}}}*/
3075
php_pcre_pce_decref(pcre_cache_entry * pce)3076 PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3077 {/*{{{*/
3078 assert(NULL != pce);
3079 assert(0 != pce->refcount);
3080 pce->refcount--;
3081 }/*}}}*/
3082
php_pcre_pce_re(pcre_cache_entry * pce)3083 PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3084 {/*{{{*/
3085 assert(NULL != pce);
3086 return pce->re;
3087 }/*}}}*/
3088
3089 #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
3090
3091 /*
3092 * Local variables:
3093 * tab-width: 4
3094 * c-basic-offset: 4
3095 * End:
3096 * vim600: sw=4 ts=4 fdm=marker
3097 * vim<600: sw=4 ts=4
3098 */
3099