1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2017 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
16 +----------------------------------------------------------------------+
17 */
18
19 /* $Id$ */
20
21
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include "php.h"
27 #include "php_ini.h"
28
29 #if HAVE_MBREGEX
30
31 #include "zend_smart_str.h"
32 #include "ext/standard/info.h"
33 #include "php_mbregex.h"
34 #include "mbstring.h"
35
36 #include "php_onig_compat.h" /* must come prior to the oniguruma header */
37 #include <oniguruma.h>
38 #undef UChar
39
40 ZEND_EXTERN_MODULE_GLOBALS(mbstring)
41
42 struct _zend_mb_regex_globals {
43 OnigEncoding default_mbctype;
44 OnigEncoding current_mbctype;
45 HashTable ht_rc;
46 zval search_str;
47 zval *search_str_val;
48 unsigned int search_pos;
49 php_mb_regex_t *search_re;
50 OnigRegion *search_regs;
51 OnigOptionType regex_default_options;
52 OnigSyntaxType *regex_default_syntax;
53 };
54
55 #define MBREX(g) (MBSTRG(mb_regex_globals)->g)
56
57 /* {{{ static void php_mb_regex_free_cache() */
php_mb_regex_free_cache(zval * el)58 static void php_mb_regex_free_cache(zval *el) {
59 onig_free((php_mb_regex_t *)Z_PTR_P(el));
60 }
61 /* }}} */
62
63 /* {{{ _php_mb_regex_globals_ctor */
_php_mb_regex_globals_ctor(zend_mb_regex_globals * pglobals)64 static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals)
65 {
66 pglobals->default_mbctype = ONIG_ENCODING_UTF8;
67 pglobals->current_mbctype = ONIG_ENCODING_UTF8;
68 zend_hash_init(&(pglobals->ht_rc), 0, NULL, php_mb_regex_free_cache, 1);
69 ZVAL_UNDEF(&pglobals->search_str);
70 pglobals->search_re = (php_mb_regex_t*)NULL;
71 pglobals->search_pos = 0;
72 pglobals->search_regs = (OnigRegion*)NULL;
73 pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
74 pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
75 return SUCCESS;
76 }
77 /* }}} */
78
79 /* {{{ _php_mb_regex_globals_dtor */
_php_mb_regex_globals_dtor(zend_mb_regex_globals * pglobals)80 static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals)
81 {
82 zend_hash_destroy(&pglobals->ht_rc);
83 }
84 /* }}} */
85
86 /* {{{ php_mb_regex_globals_alloc */
php_mb_regex_globals_alloc(void)87 zend_mb_regex_globals *php_mb_regex_globals_alloc(void)
88 {
89 zend_mb_regex_globals *pglobals = pemalloc(
90 sizeof(zend_mb_regex_globals), 1);
91 if (!pglobals) {
92 return NULL;
93 }
94 if (SUCCESS != _php_mb_regex_globals_ctor(pglobals)) {
95 pefree(pglobals, 1);
96 return NULL;
97 }
98 return pglobals;
99 }
100 /* }}} */
101
102 /* {{{ php_mb_regex_globals_free */
php_mb_regex_globals_free(zend_mb_regex_globals * pglobals)103 void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals)
104 {
105 if (!pglobals) {
106 return;
107 }
108 _php_mb_regex_globals_dtor(pglobals);
109 pefree(pglobals, 1);
110 }
111 /* }}} */
112
113 /* {{{ PHP_MINIT_FUNCTION(mb_regex) */
PHP_MINIT_FUNCTION(mb_regex)114 PHP_MINIT_FUNCTION(mb_regex)
115 {
116 onig_init();
117 return SUCCESS;
118 }
119 /* }}} */
120
121 /* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
PHP_MSHUTDOWN_FUNCTION(mb_regex)122 PHP_MSHUTDOWN_FUNCTION(mb_regex)
123 {
124 onig_end();
125 return SUCCESS;
126 }
127 /* }}} */
128
129 /* {{{ PHP_RINIT_FUNCTION(mb_regex) */
PHP_RINIT_FUNCTION(mb_regex)130 PHP_RINIT_FUNCTION(mb_regex)
131 {
132 return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
133 }
134 /* }}} */
135
136 /* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
PHP_RSHUTDOWN_FUNCTION(mb_regex)137 PHP_RSHUTDOWN_FUNCTION(mb_regex)
138 {
139 MBREX(current_mbctype) = MBREX(default_mbctype);
140
141 if (!Z_ISUNDEF(MBREX(search_str))) {
142 zval_ptr_dtor(&MBREX(search_str));
143 ZVAL_UNDEF(&MBREX(search_str));
144 }
145 MBREX(search_pos) = 0;
146
147 if (MBREX(search_regs) != NULL) {
148 onig_region_free(MBREX(search_regs), 1);
149 MBREX(search_regs) = (OnigRegion *)NULL;
150 }
151 zend_hash_clean(&MBREX(ht_rc));
152
153 return SUCCESS;
154 }
155 /* }}} */
156
157 /* {{{ PHP_MINFO_FUNCTION(mb_regex) */
PHP_MINFO_FUNCTION(mb_regex)158 PHP_MINFO_FUNCTION(mb_regex)
159 {
160 char buf[32];
161 php_info_print_table_start();
162 php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
163 snprintf(buf, sizeof(buf), "%d.%d.%d",
164 ONIGURUMA_VERSION_MAJOR,
165 ONIGURUMA_VERSION_MINOR,
166 ONIGURUMA_VERSION_TEENY);
167 #ifdef PHP_ONIG_BUNDLED
168 #ifdef USE_COMBINATION_EXPLOSION_CHECK
169 php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
170 #else /* USE_COMBINATION_EXPLOSION_CHECK */
171 php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
172 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
173 #endif /* PHP_BUNDLED_ONIG */
174 php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
175 php_info_print_table_end();
176 }
177 /* }}} */
178
179 /*
180 * encoding name resolver
181 */
182
183 /* {{{ encoding name map */
184 typedef struct _php_mb_regex_enc_name_map_t {
185 const char *names;
186 OnigEncoding code;
187 } php_mb_regex_enc_name_map_t;
188
189 php_mb_regex_enc_name_map_t enc_name_map[] = {
190 #ifdef ONIG_ENCODING_EUC_JP
191 {
192 "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
193 ONIG_ENCODING_EUC_JP
194 },
195 #endif
196 #ifdef ONIG_ENCODING_UTF8
197 {
198 "UTF-8\0UTF8\0",
199 ONIG_ENCODING_UTF8
200 },
201 #endif
202 #ifdef ONIG_ENCODING_UTF16_BE
203 {
204 "UTF-16\0UTF-16BE\0",
205 ONIG_ENCODING_UTF16_BE
206 },
207 #endif
208 #ifdef ONIG_ENCODING_UTF16_LE
209 {
210 "UTF-16LE\0",
211 ONIG_ENCODING_UTF16_LE
212 },
213 #endif
214 #ifdef ONIG_ENCODING_UTF32_BE
215 {
216 "UCS-4\0UTF-32\0UTF-32BE\0",
217 ONIG_ENCODING_UTF32_BE
218 },
219 #endif
220 #ifdef ONIG_ENCODING_UTF32_LE
221 {
222 "UCS-4LE\0UTF-32LE\0",
223 ONIG_ENCODING_UTF32_LE
224 },
225 #endif
226 #ifdef ONIG_ENCODING_SJIS
227 {
228 "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
229 ONIG_ENCODING_SJIS
230 },
231 #endif
232 #ifdef ONIG_ENCODING_BIG5
233 {
234 "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
235 ONIG_ENCODING_BIG5
236 },
237 #endif
238 #ifdef ONIG_ENCODING_EUC_CN
239 {
240 "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
241 ONIG_ENCODING_EUC_CN
242 },
243 #endif
244 #ifdef ONIG_ENCODING_EUC_TW
245 {
246 "EUC-TW\0EUCTW\0EUC_TW\0",
247 ONIG_ENCODING_EUC_TW
248 },
249 #endif
250 #ifdef ONIG_ENCODING_EUC_KR
251 {
252 "EUC-KR\0EUCKR\0EUC_KR\0",
253 ONIG_ENCODING_EUC_KR
254 },
255 #endif
256 #if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
257 {
258 "KOI8\0KOI-8\0",
259 ONIG_ENCODING_KOI8
260 },
261 #endif
262 #ifdef ONIG_ENCODING_KOI8_R
263 {
264 "KOI8R\0KOI8-R\0KOI-8R\0",
265 ONIG_ENCODING_KOI8_R
266 },
267 #endif
268 #ifdef ONIG_ENCODING_ISO_8859_1
269 {
270 "ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
271 ONIG_ENCODING_ISO_8859_1
272 },
273 #endif
274 #ifdef ONIG_ENCODING_ISO_8859_2
275 {
276 "ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
277 ONIG_ENCODING_ISO_8859_2
278 },
279 #endif
280 #ifdef ONIG_ENCODING_ISO_8859_3
281 {
282 "ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
283 ONIG_ENCODING_ISO_8859_3
284 },
285 #endif
286 #ifdef ONIG_ENCODING_ISO_8859_4
287 {
288 "ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
289 ONIG_ENCODING_ISO_8859_4
290 },
291 #endif
292 #ifdef ONIG_ENCODING_ISO_8859_5
293 {
294 "ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
295 ONIG_ENCODING_ISO_8859_5
296 },
297 #endif
298 #ifdef ONIG_ENCODING_ISO_8859_6
299 {
300 "ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
301 ONIG_ENCODING_ISO_8859_6
302 },
303 #endif
304 #ifdef ONIG_ENCODING_ISO_8859_7
305 {
306 "ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
307 ONIG_ENCODING_ISO_8859_7
308 },
309 #endif
310 #ifdef ONIG_ENCODING_ISO_8859_8
311 {
312 "ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
313 ONIG_ENCODING_ISO_8859_8
314 },
315 #endif
316 #ifdef ONIG_ENCODING_ISO_8859_9
317 {
318 "ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
319 ONIG_ENCODING_ISO_8859_9
320 },
321 #endif
322 #ifdef ONIG_ENCODING_ISO_8859_10
323 {
324 "ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
325 ONIG_ENCODING_ISO_8859_10
326 },
327 #endif
328 #ifdef ONIG_ENCODING_ISO_8859_11
329 {
330 "ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
331 ONIG_ENCODING_ISO_8859_11
332 },
333 #endif
334 #ifdef ONIG_ENCODING_ISO_8859_13
335 {
336 "ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
337 ONIG_ENCODING_ISO_8859_13
338 },
339 #endif
340 #ifdef ONIG_ENCODING_ISO_8859_14
341 {
342 "ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
343 ONIG_ENCODING_ISO_8859_14
344 },
345 #endif
346 #ifdef ONIG_ENCODING_ISO_8859_15
347 {
348 "ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
349 ONIG_ENCODING_ISO_8859_15
350 },
351 #endif
352 #ifdef ONIG_ENCODING_ISO_8859_16
353 {
354 "ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
355 ONIG_ENCODING_ISO_8859_16
356 },
357 #endif
358 #ifdef ONIG_ENCODING_ASCII
359 {
360 "ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
361 ONIG_ENCODING_ASCII
362 },
363 #endif
364 { NULL, ONIG_ENCODING_UNDEF }
365 };
366 /* }}} */
367
368 /* {{{ php_mb_regex_name2mbctype */
_php_mb_regex_name2mbctype(const char * pname)369 static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
370 {
371 const char *p;
372 php_mb_regex_enc_name_map_t *mapping;
373
374 if (pname == NULL || !*pname) {
375 return ONIG_ENCODING_UNDEF;
376 }
377
378 for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
379 for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
380 if (strcasecmp(p, pname) == 0) {
381 return mapping->code;
382 }
383 }
384 }
385
386 return ONIG_ENCODING_UNDEF;
387 }
388 /* }}} */
389
390 /* {{{ php_mb_regex_mbctype2name */
_php_mb_regex_mbctype2name(OnigEncoding mbctype)391 static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
392 {
393 php_mb_regex_enc_name_map_t *mapping;
394
395 for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
396 if (mapping->code == mbctype) {
397 return mapping->names;
398 }
399 }
400
401 return NULL;
402 }
403 /* }}} */
404
405 /* {{{ php_mb_regex_set_mbctype */
php_mb_regex_set_mbctype(const char * encname)406 int php_mb_regex_set_mbctype(const char *encname)
407 {
408 OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
409 if (mbctype == ONIG_ENCODING_UNDEF) {
410 return FAILURE;
411 }
412 MBREX(current_mbctype) = mbctype;
413 return SUCCESS;
414 }
415 /* }}} */
416
417 /* {{{ php_mb_regex_set_default_mbctype */
php_mb_regex_set_default_mbctype(const char * encname)418 int php_mb_regex_set_default_mbctype(const char *encname)
419 {
420 OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
421 if (mbctype == ONIG_ENCODING_UNDEF) {
422 return FAILURE;
423 }
424 MBREX(default_mbctype) = mbctype;
425 return SUCCESS;
426 }
427 /* }}} */
428
429 /* {{{ php_mb_regex_get_mbctype */
php_mb_regex_get_mbctype(void)430 const char *php_mb_regex_get_mbctype(void)
431 {
432 return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
433 }
434 /* }}} */
435
436 /* {{{ php_mb_regex_get_default_mbctype */
php_mb_regex_get_default_mbctype(void)437 const char *php_mb_regex_get_default_mbctype(void)
438 {
439 return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
440 }
441 /* }}} */
442
443 /*
444 * regex cache
445 */
446 /* {{{ php_mbregex_compile_pattern */
php_mbregex_compile_pattern(const char * pattern,int patlen,OnigOptionType options,OnigEncoding enc,OnigSyntaxType * syntax)447 static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax)
448 {
449 int err_code = 0;
450 php_mb_regex_t *retval = NULL, *rc = NULL;
451 OnigErrorInfo err_info;
452 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
453
454 rc = zend_hash_str_find_ptr(&MBREX(ht_rc), (char *)pattern, patlen);
455 if (!rc || rc->options != options || rc->enc != enc || rc->syntax != syntax) {
456 if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
457 onig_error_code_to_str(err_str, err_code, &err_info);
458 php_error_docref(NULL, E_WARNING, "mbregex compile err: %s", err_str);
459 retval = NULL;
460 goto out;
461 }
462 if (rc == MBREX(search_re)) {
463 /* reuse the new rc? see bug #72399 */
464 MBREX(search_re) = NULL;
465 }
466 zend_hash_str_update_ptr(&MBREX(ht_rc), (char *)pattern, patlen, retval);
467 } else {
468 retval = rc;
469 }
470 out:
471 return retval;
472 }
473 /* }}} */
474
475 /* {{{ _php_mb_regex_get_option_string */
_php_mb_regex_get_option_string(char * str,size_t len,OnigOptionType option,OnigSyntaxType * syntax)476 static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
477 {
478 size_t len_left = len;
479 size_t len_req = 0;
480 char *p = str;
481 char c;
482
483 if ((option & ONIG_OPTION_IGNORECASE) != 0) {
484 if (len_left > 0) {
485 --len_left;
486 *(p++) = 'i';
487 }
488 ++len_req;
489 }
490
491 if ((option & ONIG_OPTION_EXTEND) != 0) {
492 if (len_left > 0) {
493 --len_left;
494 *(p++) = 'x';
495 }
496 ++len_req;
497 }
498
499 if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
500 (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
501 if (len_left > 0) {
502 --len_left;
503 *(p++) = 'p';
504 }
505 ++len_req;
506 } else {
507 if ((option & ONIG_OPTION_MULTILINE) != 0) {
508 if (len_left > 0) {
509 --len_left;
510 *(p++) = 'm';
511 }
512 ++len_req;
513 }
514
515 if ((option & ONIG_OPTION_SINGLELINE) != 0) {
516 if (len_left > 0) {
517 --len_left;
518 *(p++) = 's';
519 }
520 ++len_req;
521 }
522 }
523 if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
524 if (len_left > 0) {
525 --len_left;
526 *(p++) = 'l';
527 }
528 ++len_req;
529 }
530 if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
531 if (len_left > 0) {
532 --len_left;
533 *(p++) = 'n';
534 }
535 ++len_req;
536 }
537
538 c = 0;
539
540 if (syntax == ONIG_SYNTAX_JAVA) {
541 c = 'j';
542 } else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
543 c = 'u';
544 } else if (syntax == ONIG_SYNTAX_GREP) {
545 c = 'g';
546 } else if (syntax == ONIG_SYNTAX_EMACS) {
547 c = 'c';
548 } else if (syntax == ONIG_SYNTAX_RUBY) {
549 c = 'r';
550 } else if (syntax == ONIG_SYNTAX_PERL) {
551 c = 'z';
552 } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
553 c = 'b';
554 } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
555 c = 'd';
556 }
557
558 if (c != 0) {
559 if (len_left > 0) {
560 --len_left;
561 *(p++) = c;
562 }
563 ++len_req;
564 }
565
566
567 if (len_left > 0) {
568 --len_left;
569 *(p++) = '\0';
570 }
571 ++len_req;
572 if (len < len_req) {
573 return len_req;
574 }
575
576 return 0;
577 }
578 /* }}} */
579
580 /* {{{ _php_mb_regex_init_options */
581 static void
_php_mb_regex_init_options(const char * parg,int narg,OnigOptionType * option,OnigSyntaxType ** syntax,int * eval)582 _php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
583 {
584 int n;
585 char c;
586 int optm = 0;
587
588 *syntax = ONIG_SYNTAX_RUBY;
589
590 if (parg != NULL) {
591 n = 0;
592 while(n < narg) {
593 c = parg[n++];
594 switch (c) {
595 case 'i':
596 optm |= ONIG_OPTION_IGNORECASE;
597 break;
598 case 'x':
599 optm |= ONIG_OPTION_EXTEND;
600 break;
601 case 'm':
602 optm |= ONIG_OPTION_MULTILINE;
603 break;
604 case 's':
605 optm |= ONIG_OPTION_SINGLELINE;
606 break;
607 case 'p':
608 optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
609 break;
610 case 'l':
611 optm |= ONIG_OPTION_FIND_LONGEST;
612 break;
613 case 'n':
614 optm |= ONIG_OPTION_FIND_NOT_EMPTY;
615 break;
616 case 'j':
617 *syntax = ONIG_SYNTAX_JAVA;
618 break;
619 case 'u':
620 *syntax = ONIG_SYNTAX_GNU_REGEX;
621 break;
622 case 'g':
623 *syntax = ONIG_SYNTAX_GREP;
624 break;
625 case 'c':
626 *syntax = ONIG_SYNTAX_EMACS;
627 break;
628 case 'r':
629 *syntax = ONIG_SYNTAX_RUBY;
630 break;
631 case 'z':
632 *syntax = ONIG_SYNTAX_PERL;
633 break;
634 case 'b':
635 *syntax = ONIG_SYNTAX_POSIX_BASIC;
636 break;
637 case 'd':
638 *syntax = ONIG_SYNTAX_POSIX_EXTENDED;
639 break;
640 case 'e':
641 if (eval != NULL) *eval = 1;
642 break;
643 default:
644 break;
645 }
646 }
647 if (option != NULL) *option|=optm;
648 }
649 }
650 /* }}} */
651
652 /*
653 * php functions
654 */
655
656 /* {{{ proto string mb_regex_encoding([string encoding])
657 Returns the current encoding for regex as a string. */
PHP_FUNCTION(mb_regex_encoding)658 PHP_FUNCTION(mb_regex_encoding)
659 {
660 char *encoding = NULL;
661 size_t encoding_len;
662 OnigEncoding mbctype;
663
664 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &encoding, &encoding_len) == FAILURE) {
665 return;
666 }
667
668 if (!encoding) {
669 const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
670
671 if (retval == NULL) {
672 RETURN_FALSE;
673 }
674
675 RETURN_STRING((char *)retval);
676 } else {
677 mbctype = _php_mb_regex_name2mbctype(encoding);
678
679 if (mbctype == ONIG_ENCODING_UNDEF) {
680 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
681 RETURN_FALSE;
682 }
683
684 MBREX(current_mbctype) = mbctype;
685 RETURN_TRUE;
686 }
687 }
688 /* }}} */
689
690 /* {{{ _php_mb_regex_ereg_exec */
_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS,int icase)691 static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
692 {
693 zval *arg_pattern, *array = NULL;
694 char *string;
695 size_t string_len;
696 php_mb_regex_t *re;
697 OnigRegion *regs = NULL;
698 int i, match_len, beg, end;
699 OnigOptionType options;
700 char *str;
701
702 if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z/", &arg_pattern, &string, &string_len, &array) == FAILURE) {
703 RETURN_FALSE;
704 }
705
706 options = MBREX(regex_default_options);
707 if (icase) {
708 options |= ONIG_OPTION_IGNORECASE;
709 }
710
711 /* compile the regular expression from the supplied regex */
712 if (Z_TYPE_P(arg_pattern) != IS_STRING) {
713 /* we convert numbers to integers and treat them as a string */
714 if (Z_TYPE_P(arg_pattern) == IS_DOUBLE) {
715 convert_to_long_ex(arg_pattern); /* get rid of decimal places */
716 }
717 convert_to_string_ex(arg_pattern);
718 /* don't bother doing an extended regex with just a number */
719 }
720
721 if (Z_STRLEN_P(arg_pattern) == 0) {
722 php_error_docref(NULL, E_WARNING, "empty pattern");
723 RETVAL_FALSE;
724 goto out;
725 }
726
727 re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax));
728 if (re == NULL) {
729 RETVAL_FALSE;
730 goto out;
731 }
732
733 regs = onig_region_new();
734
735 /* actually execute the regular expression */
736 if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
737 RETVAL_FALSE;
738 goto out;
739 }
740
741 match_len = 1;
742 str = string;
743 if (array != NULL) {
744 zval_dtor(array);
745 array_init(array);
746
747 match_len = regs->end[0] - regs->beg[0];
748 for (i = 0; i < regs->num_regs; i++) {
749 beg = regs->beg[i];
750 end = regs->end[i];
751 if (beg >= 0 && beg < end && end <= string_len) {
752 add_index_stringl(array, i, (char *)&str[beg], end - beg);
753 } else {
754 add_index_bool(array, i, 0);
755 }
756 }
757 }
758
759 if (match_len == 0) {
760 match_len = 1;
761 }
762 RETVAL_LONG(match_len);
763 out:
764 if (regs != NULL) {
765 onig_region_free(regs, 1);
766 }
767 }
768 /* }}} */
769
770 /* {{{ proto int mb_ereg(string pattern, string string [, array registers])
771 Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg)772 PHP_FUNCTION(mb_ereg)
773 {
774 _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
775 }
776 /* }}} */
777
778 /* {{{ proto int mb_eregi(string pattern, string string [, array registers])
779 Case-insensitive regular expression match for multibyte string */
PHP_FUNCTION(mb_eregi)780 PHP_FUNCTION(mb_eregi)
781 {
782 _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
783 }
784 /* }}} */
785
786 /* {{{ _php_mb_regex_ereg_replace_exec */
_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS,OnigOptionType options,int is_callable)787 static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable)
788 {
789 zval *arg_pattern_zval;
790
791 char *arg_pattern;
792 size_t arg_pattern_len;
793
794 char *replace;
795 size_t replace_len;
796
797 zend_fcall_info arg_replace_fci;
798 zend_fcall_info_cache arg_replace_fci_cache;
799
800 char *string;
801 size_t string_len;
802
803 char *p;
804 php_mb_regex_t *re;
805 OnigSyntaxType *syntax;
806 OnigRegion *regs = NULL;
807 smart_str out_buf = {0};
808 smart_str eval_buf = {0};
809 smart_str *pbuf;
810 int i, err, eval, n;
811 OnigUChar *pos;
812 OnigUChar *string_lim;
813 char *description = NULL;
814 char pat_buf[6];
815
816 const mbfl_encoding *enc;
817
818 {
819 const char *current_enc_name;
820 current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
821 if (current_enc_name == NULL ||
822 (enc = mbfl_name2encoding(current_enc_name)) == NULL) {
823 php_error_docref(NULL, E_WARNING, "Unknown error");
824 RETURN_FALSE;
825 }
826 }
827 eval = 0;
828 {
829 char *option_str = NULL;
830 size_t option_str_len = 0;
831
832 if (!is_callable) {
833 if (zend_parse_parameters(ZEND_NUM_ARGS(), "zss|s",
834 &arg_pattern_zval,
835 &replace, &replace_len,
836 &string, &string_len,
837 &option_str, &option_str_len) == FAILURE) {
838 RETURN_FALSE;
839 }
840 } else {
841 if (zend_parse_parameters(ZEND_NUM_ARGS(), "zfs|s",
842 &arg_pattern_zval,
843 &arg_replace_fci, &arg_replace_fci_cache,
844 &string, &string_len,
845 &option_str, &option_str_len) == FAILURE) {
846 RETURN_FALSE;
847 }
848 }
849
850 if (option_str != NULL) {
851 _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
852 } else {
853 options |= MBREX(regex_default_options);
854 syntax = MBREX(regex_default_syntax);
855 }
856 }
857 if (Z_TYPE_P(arg_pattern_zval) == IS_STRING) {
858 arg_pattern = Z_STRVAL_P(arg_pattern_zval);
859 arg_pattern_len = Z_STRLEN_P(arg_pattern_zval);
860 } else {
861 /* FIXME: this code is not multibyte aware! */
862 convert_to_long_ex(arg_pattern_zval);
863 pat_buf[0] = (char)Z_LVAL_P(arg_pattern_zval);
864 pat_buf[1] = '\0';
865 pat_buf[2] = '\0';
866 pat_buf[3] = '\0';
867 pat_buf[4] = '\0';
868 pat_buf[5] = '\0';
869
870 arg_pattern = pat_buf;
871 arg_pattern_len = 1;
872 }
873 /* create regex pattern buffer */
874 re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax);
875 if (re == NULL) {
876 RETURN_FALSE;
877 }
878
879 if (eval || is_callable) {
880 pbuf = &eval_buf;
881 description = zend_make_compiled_string_description("mbregex replace");
882 } else {
883 pbuf = &out_buf;
884 description = NULL;
885 }
886
887 if (is_callable) {
888 if (eval) {
889 php_error_docref(NULL, E_WARNING, "Option 'e' cannot be used with replacement callback");
890 RETURN_FALSE;
891 }
892 }
893
894 /* do the actual work */
895 err = 0;
896 pos = (OnigUChar *)string;
897 string_lim = (OnigUChar*)(string + string_len);
898 regs = onig_region_new();
899 while (err >= 0) {
900 err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
901 if (err <= -2) {
902 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
903 onig_error_code_to_str(err_str, err);
904 php_error_docref(NULL, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
905 break;
906 }
907 if (err >= 0) {
908 #if moriyoshi_0
909 if (regs->beg[0] == regs->end[0]) {
910 php_error_docref(NULL, E_WARNING, "Empty regular expression");
911 break;
912 }
913 #endif
914 /* copy the part of the string before the match */
915 smart_str_appendl(&out_buf, (char *)pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
916
917 if (!is_callable) {
918 /* copy replacement and backrefs */
919 i = 0;
920 p = replace;
921 while (i < replace_len) {
922 int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
923 n = -1;
924 if ((replace_len - i) >= 2 && fwd == 1 &&
925 p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
926 n = p[1] - '0';
927 }
928 if (n >= 0 && n < regs->num_regs) {
929 if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && regs->end[n] <= string_len) {
930 smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
931 }
932 p += 2;
933 i += 2;
934 } else {
935 smart_str_appendl(pbuf, p, fwd);
936 p += fwd;
937 i += fwd;
938 }
939 }
940 }
941
942 if (eval) {
943 zval v;
944 zend_string *eval_str;
945 /* null terminate buffer */
946 smart_str_0(&eval_buf);
947
948 if (eval_buf.s) {
949 eval_str = eval_buf.s;
950 } else {
951 eval_str = ZSTR_EMPTY_ALLOC();
952 }
953
954 /* do eval */
955 if (zend_eval_stringl(ZSTR_VAL(eval_str), ZSTR_LEN(eval_str), &v, description) == FAILURE) {
956 efree(description);
957 php_error_docref(NULL,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, ZSTR_VAL(eval_str));
958 /* zend_error() does not return in this case */
959 }
960
961 /* result of eval */
962 convert_to_string(&v);
963 smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
964 /* Clean up */
965 smart_str_free(&eval_buf);
966 zval_dtor(&v);
967 } else if (is_callable) {
968 zval args[1];
969 zval subpats, retval;
970 int i;
971
972 array_init(&subpats);
973 for (i = 0; i < regs->num_regs; i++) {
974 add_next_index_stringl(&subpats, string + regs->beg[i], regs->end[i] - regs->beg[i]);
975 }
976
977 ZVAL_COPY_VALUE(&args[0], &subpats);
978 /* null terminate buffer */
979 smart_str_0(&eval_buf);
980
981 arg_replace_fci.param_count = 1;
982 arg_replace_fci.params = args;
983 arg_replace_fci.retval = &retval;
984 if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache) == SUCCESS &&
985 !Z_ISUNDEF(retval)) {
986 convert_to_string_ex(&retval);
987 smart_str_appendl(&out_buf, Z_STRVAL(retval), Z_STRLEN(retval));
988 smart_str_free(&eval_buf);
989 zval_ptr_dtor(&retval);
990 } else {
991 if (!EG(exception)) {
992 php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
993 }
994 }
995 zval_ptr_dtor(&subpats);
996 }
997
998 n = regs->end[0];
999 if ((pos - (OnigUChar *)string) < n) {
1000 pos = (OnigUChar *)string + n;
1001 } else {
1002 if (pos < string_lim) {
1003 smart_str_appendl(&out_buf, (char *)pos, 1);
1004 }
1005 pos++;
1006 }
1007 } else { /* nomatch */
1008 /* stick that last bit of string on our output */
1009 if (string_lim - pos > 0) {
1010 smart_str_appendl(&out_buf, (char *)pos, string_lim - pos);
1011 }
1012 }
1013 onig_region_free(regs, 0);
1014 }
1015
1016 if (description) {
1017 efree(description);
1018 }
1019 if (regs != NULL) {
1020 onig_region_free(regs, 1);
1021 }
1022 smart_str_free(&eval_buf);
1023
1024 if (err <= -2) {
1025 smart_str_free(&out_buf);
1026 RETVAL_FALSE;
1027 } else if (out_buf.s) {
1028 smart_str_0(&out_buf);
1029 RETVAL_STR(out_buf.s);
1030 } else {
1031 RETVAL_EMPTY_STRING();
1032 }
1033 }
1034 /* }}} */
1035
1036 /* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option])
1037 Replace regular expression for multibyte string */
PHP_FUNCTION(mb_ereg_replace)1038 PHP_FUNCTION(mb_ereg_replace)
1039 {
1040 _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1041 }
1042 /* }}} */
1043
1044 /* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string)
1045 Case insensitive replace regular expression for multibyte string */
PHP_FUNCTION(mb_eregi_replace)1046 PHP_FUNCTION(mb_eregi_replace)
1047 {
1048 _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0);
1049 }
1050 /* }}} */
1051
1052 /* {{{ proto string mb_ereg_replace_callback(string pattern, string callback, string string [, string option])
1053 regular expression for multibyte string using replacement callback */
PHP_FUNCTION(mb_ereg_replace_callback)1054 PHP_FUNCTION(mb_ereg_replace_callback)
1055 {
1056 _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1057 }
1058 /* }}} */
1059
1060 /* {{{ proto array mb_split(string pattern, string string [, int limit])
1061 split multibyte string into array by regular expression */
PHP_FUNCTION(mb_split)1062 PHP_FUNCTION(mb_split)
1063 {
1064 char *arg_pattern;
1065 size_t arg_pattern_len;
1066 php_mb_regex_t *re;
1067 OnigRegion *regs = NULL;
1068 char *string;
1069 OnigUChar *pos, *chunk_pos;
1070 size_t string_len;
1071
1072 int n, err;
1073 zend_long count = -1;
1074
1075 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
1076 RETURN_FALSE;
1077 }
1078
1079 if (count > 0) {
1080 count--;
1081 }
1082
1083 /* create regex pattern buffer */
1084 if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax))) == NULL) {
1085 RETURN_FALSE;
1086 }
1087
1088 array_init(return_value);
1089
1090 chunk_pos = pos = (OnigUChar *)string;
1091 err = 0;
1092 regs = onig_region_new();
1093 /* churn through str, generating array entries as we go */
1094 while (count != 0 && (pos - (OnigUChar *)string) < (ptrdiff_t)string_len) {
1095 int beg, end;
1096 err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0);
1097 if (err < 0) {
1098 break;
1099 }
1100 beg = regs->beg[0], end = regs->end[0];
1101 /* add it to the array */
1102 if ((pos - (OnigUChar *)string) < end) {
1103 if (beg < string_len && beg >= (chunk_pos - (OnigUChar *)string)) {
1104 add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos));
1105 --count;
1106 } else {
1107 err = -2;
1108 break;
1109 }
1110 /* point at our new starting point */
1111 chunk_pos = pos = (OnigUChar *)string + end;
1112 } else {
1113 pos++;
1114 }
1115 onig_region_free(regs, 0);
1116 }
1117
1118 onig_region_free(regs, 1);
1119
1120 /* see if we encountered an error */
1121 if (err <= -2) {
1122 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1123 onig_error_code_to_str(err_str, err);
1124 php_error_docref(NULL, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
1125 zval_dtor(return_value);
1126 RETURN_FALSE;
1127 }
1128
1129 /* otherwise we just have one last element to add to the array */
1130 n = ((OnigUChar *)(string + string_len) - chunk_pos);
1131 if (n > 0) {
1132 add_next_index_stringl(return_value, (char *)chunk_pos, n);
1133 } else {
1134 add_next_index_stringl(return_value, "", 0);
1135 }
1136 }
1137 /* }}} */
1138
1139 /* {{{ proto bool mb_ereg_match(string pattern, string string [,string option])
1140 Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg_match)1141 PHP_FUNCTION(mb_ereg_match)
1142 {
1143 char *arg_pattern;
1144 size_t arg_pattern_len;
1145
1146 char *string;
1147 size_t string_len;
1148
1149 php_mb_regex_t *re;
1150 OnigSyntaxType *syntax;
1151 OnigOptionType option = 0;
1152 int err;
1153
1154 {
1155 char *option_str = NULL;
1156 size_t option_str_len = 0;
1157
1158 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s",
1159 &arg_pattern, &arg_pattern_len, &string, &string_len,
1160 &option_str, &option_str_len)==FAILURE) {
1161 RETURN_FALSE;
1162 }
1163
1164 if (option_str != NULL) {
1165 _php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
1166 } else {
1167 option |= MBREX(regex_default_options);
1168 syntax = MBREX(regex_default_syntax);
1169 }
1170 }
1171
1172 if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax)) == NULL) {
1173 RETURN_FALSE;
1174 }
1175
1176 /* match */
1177 err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0);
1178 if (err >= 0) {
1179 RETVAL_TRUE;
1180 } else {
1181 RETVAL_FALSE;
1182 }
1183 }
1184 /* }}} */
1185
1186 /* regex search */
1187 /* {{{ _php_mb_regex_ereg_search_exec */
1188 static void
_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS,int mode)1189 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
1190 {
1191 char *arg_pattern = NULL, *arg_options = NULL;
1192 size_t arg_pattern_len, arg_options_len;
1193 int n, i, err, pos, len, beg, end;
1194 OnigOptionType option;
1195 OnigUChar *str;
1196 OnigSyntaxType *syntax;
1197
1198 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1199 return;
1200 }
1201
1202 option = MBREX(regex_default_options);
1203
1204 if (arg_options) {
1205 option = 0;
1206 _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1207 }
1208
1209 if (arg_pattern) {
1210 /* create regex pattern buffer */
1211 if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax))) == NULL) {
1212 RETURN_FALSE;
1213 }
1214 }
1215
1216 pos = MBREX(search_pos);
1217 str = NULL;
1218 len = 0;
1219 if (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING){
1220 str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
1221 len = Z_STRLEN(MBREX(search_str));
1222 }
1223
1224 if (MBREX(search_re) == NULL) {
1225 php_error_docref(NULL, E_WARNING, "No regex given");
1226 RETURN_FALSE;
1227 }
1228
1229 if (str == NULL) {
1230 php_error_docref(NULL, E_WARNING, "No string given");
1231 RETURN_FALSE;
1232 }
1233
1234 if (MBREX(search_regs)) {
1235 onig_region_free(MBREX(search_regs), 1);
1236 }
1237 MBREX(search_regs) = onig_region_new();
1238
1239 err = onig_search(MBREX(search_re), str, str + len, str + pos, str + len, MBREX(search_regs), 0);
1240 if (err == ONIG_MISMATCH) {
1241 MBREX(search_pos) = len;
1242 RETVAL_FALSE;
1243 } else if (err <= -2) {
1244 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1245 onig_error_code_to_str(err_str, err);
1246 php_error_docref(NULL, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
1247 RETVAL_FALSE;
1248 } else {
1249 switch (mode) {
1250 case 1:
1251 array_init(return_value);
1252 beg = MBREX(search_regs)->beg[0];
1253 end = MBREX(search_regs)->end[0];
1254 add_next_index_long(return_value, beg);
1255 add_next_index_long(return_value, end - beg);
1256 break;
1257 case 2:
1258 array_init(return_value);
1259 n = MBREX(search_regs)->num_regs;
1260 for (i = 0; i < n; i++) {
1261 beg = MBREX(search_regs)->beg[i];
1262 end = MBREX(search_regs)->end[i];
1263 if (beg >= 0 && beg <= end && end <= len) {
1264 add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
1265 } else {
1266 add_index_bool(return_value, i, 0);
1267 }
1268 }
1269 break;
1270 default:
1271 RETVAL_TRUE;
1272 break;
1273 }
1274 end = MBREX(search_regs)->end[0];
1275 if (pos <= end) {
1276 MBREX(search_pos) = end;
1277 } else {
1278 MBREX(search_pos) = pos + 1;
1279 }
1280 }
1281
1282 if (err < 0) {
1283 onig_region_free(MBREX(search_regs), 1);
1284 MBREX(search_regs) = (OnigRegion *)NULL;
1285 }
1286 }
1287 /* }}} */
1288
1289 /* {{{ proto bool mb_ereg_search([string pattern[, string option]])
1290 Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search)1291 PHP_FUNCTION(mb_ereg_search)
1292 {
1293 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1294 }
1295 /* }}} */
1296
1297 /* {{{ proto array mb_ereg_search_pos([string pattern[, string option]])
1298 Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_pos)1299 PHP_FUNCTION(mb_ereg_search_pos)
1300 {
1301 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1302 }
1303 /* }}} */
1304
1305 /* {{{ proto array mb_ereg_search_regs([string pattern[, string option]])
1306 Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_regs)1307 PHP_FUNCTION(mb_ereg_search_regs)
1308 {
1309 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
1310 }
1311 /* }}} */
1312
1313 /* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]])
1314 Initialize string and regular expression for search. */
PHP_FUNCTION(mb_ereg_search_init)1315 PHP_FUNCTION(mb_ereg_search_init)
1316 {
1317 size_t argc = ZEND_NUM_ARGS();
1318 zval *arg_str;
1319 char *arg_pattern = NULL, *arg_options = NULL;
1320 size_t arg_pattern_len = 0, arg_options_len = 0;
1321 OnigSyntaxType *syntax = NULL;
1322 OnigOptionType option;
1323
1324 if (zend_parse_parameters(argc, "z|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1325 return;
1326 }
1327
1328 if (argc > 1 && arg_pattern_len == 0) {
1329 php_error_docref(NULL, E_WARNING, "Empty pattern");
1330 RETURN_FALSE;
1331 }
1332
1333 option = MBREX(regex_default_options);
1334 syntax = MBREX(regex_default_syntax);
1335
1336 if (argc == 3) {
1337 option = 0;
1338 _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1339 }
1340
1341 if (argc > 1) {
1342 /* create regex pattern buffer */
1343 if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax)) == NULL) {
1344 RETURN_FALSE;
1345 }
1346 }
1347
1348 if (!Z_ISNULL(MBREX(search_str))) {
1349 zval_ptr_dtor(&MBREX(search_str));
1350 }
1351
1352 ZVAL_DUP(&MBREX(search_str), arg_str);
1353
1354 MBREX(search_pos) = 0;
1355
1356 if (MBREX(search_regs) != NULL) {
1357 onig_region_free(MBREX(search_regs), 1);
1358 MBREX(search_regs) = NULL;
1359 }
1360
1361 RETURN_TRUE;
1362 }
1363 /* }}} */
1364
1365 /* {{{ proto array mb_ereg_search_getregs(void)
1366 Get matched substring of the last time */
PHP_FUNCTION(mb_ereg_search_getregs)1367 PHP_FUNCTION(mb_ereg_search_getregs)
1368 {
1369 int n, i, len, beg, end;
1370 OnigUChar *str;
1371
1372 if (MBREX(search_regs) != NULL && Z_TYPE(MBREX(search_str)) == IS_STRING) {
1373 array_init(return_value);
1374
1375 str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
1376 len = Z_STRLEN(MBREX(search_str));
1377 n = MBREX(search_regs)->num_regs;
1378 for (i = 0; i < n; i++) {
1379 beg = MBREX(search_regs)->beg[i];
1380 end = MBREX(search_regs)->end[i];
1381 if (beg >= 0 && beg <= end && end <= len) {
1382 add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
1383 } else {
1384 add_index_bool(return_value, i, 0);
1385 }
1386 }
1387 } else {
1388 RETVAL_FALSE;
1389 }
1390 }
1391 /* }}} */
1392
1393 /* {{{ proto int mb_ereg_search_getpos(void)
1394 Get search start position */
PHP_FUNCTION(mb_ereg_search_getpos)1395 PHP_FUNCTION(mb_ereg_search_getpos)
1396 {
1397 RETVAL_LONG(MBREX(search_pos));
1398 }
1399 /* }}} */
1400
1401 /* {{{ proto bool mb_ereg_search_setpos(int position)
1402 Set search start position */
PHP_FUNCTION(mb_ereg_search_setpos)1403 PHP_FUNCTION(mb_ereg_search_setpos)
1404 {
1405 zend_long position;
1406
1407 if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &position) == FAILURE) {
1408 return;
1409 }
1410
1411 if (position < 0 || (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING && (size_t)position > Z_STRLEN(MBREX(search_str)))) {
1412 php_error_docref(NULL, E_WARNING, "Position is out of range");
1413 MBREX(search_pos) = 0;
1414 RETURN_FALSE;
1415 }
1416
1417 MBREX(search_pos) = position;
1418 RETURN_TRUE;
1419 }
1420 /* }}} */
1421
1422 /* {{{ php_mb_regex_set_options */
_php_mb_regex_set_options(OnigOptionType options,OnigSyntaxType * syntax,OnigOptionType * prev_options,OnigSyntaxType ** prev_syntax)1423 static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax)
1424 {
1425 if (prev_options != NULL) {
1426 *prev_options = MBREX(regex_default_options);
1427 }
1428 if (prev_syntax != NULL) {
1429 *prev_syntax = MBREX(regex_default_syntax);
1430 }
1431 MBREX(regex_default_options) = options;
1432 MBREX(regex_default_syntax) = syntax;
1433 }
1434 /* }}} */
1435
1436 /* {{{ proto string mb_regex_set_options([string options])
1437 Set or get the default options for mbregex functions */
PHP_FUNCTION(mb_regex_set_options)1438 PHP_FUNCTION(mb_regex_set_options)
1439 {
1440 OnigOptionType opt;
1441 OnigSyntaxType *syntax;
1442 char *string = NULL;
1443 size_t string_len;
1444 char buf[16];
1445
1446 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s",
1447 &string, &string_len) == FAILURE) {
1448 RETURN_FALSE;
1449 }
1450 if (string != NULL) {
1451 opt = 0;
1452 syntax = NULL;
1453 _php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
1454 _php_mb_regex_set_options(opt, syntax, NULL, NULL);
1455 } else {
1456 opt = MBREX(regex_default_options);
1457 syntax = MBREX(regex_default_syntax);
1458 }
1459 _php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
1460
1461 RETVAL_STRING(buf);
1462 }
1463 /* }}} */
1464
1465 #endif /* HAVE_MBREGEX */
1466
1467 /*
1468 * Local variables:
1469 * tab-width: 4
1470 * c-basic-offset: 4
1471 * End:
1472 * vim600: fdm=marker
1473 * vim: noet sw=4 ts=4
1474 */
1475