1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2018 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
16 +----------------------------------------------------------------------+
17 */
18
19 /* $Id$ */
20
21
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include "php.h"
27 #include "php_ini.h"
28
29 #if HAVE_MBREGEX
30
31 #include "zend_smart_str.h"
32 #include "ext/standard/info.h"
33 #include "php_mbregex.h"
34 #include "mbstring.h"
35
36 #include "php_onig_compat.h" /* must come prior to the oniguruma header */
37 #include <oniguruma.h>
38 #undef UChar
39
40 ZEND_EXTERN_MODULE_GLOBALS(mbstring)
41
42 struct _zend_mb_regex_globals {
43 OnigEncoding default_mbctype;
44 OnigEncoding current_mbctype;
45 HashTable ht_rc;
46 zval search_str;
47 zval *search_str_val;
48 unsigned int search_pos;
49 php_mb_regex_t *search_re;
50 OnigRegion *search_regs;
51 OnigOptionType regex_default_options;
52 OnigSyntaxType *regex_default_syntax;
53 };
54
55 #define MBREX(g) (MBSTRG(mb_regex_globals)->g)
56
57 /* {{{ static void php_mb_regex_free_cache() */
php_mb_regex_free_cache(zval * el)58 static void php_mb_regex_free_cache(zval *el) {
59 onig_free((php_mb_regex_t *)Z_PTR_P(el));
60 }
61 /* }}} */
62
63 /* {{{ _php_mb_regex_globals_ctor */
_php_mb_regex_globals_ctor(zend_mb_regex_globals * pglobals)64 static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals)
65 {
66 pglobals->default_mbctype = ONIG_ENCODING_UTF8;
67 pglobals->current_mbctype = ONIG_ENCODING_UTF8;
68 zend_hash_init(&(pglobals->ht_rc), 0, NULL, php_mb_regex_free_cache, 1);
69 ZVAL_UNDEF(&pglobals->search_str);
70 pglobals->search_re = (php_mb_regex_t*)NULL;
71 pglobals->search_pos = 0;
72 pglobals->search_regs = (OnigRegion*)NULL;
73 pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
74 pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
75 return SUCCESS;
76 }
77 /* }}} */
78
79 /* {{{ _php_mb_regex_globals_dtor */
_php_mb_regex_globals_dtor(zend_mb_regex_globals * pglobals)80 static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals)
81 {
82 zend_hash_destroy(&pglobals->ht_rc);
83 }
84 /* }}} */
85
86 /* {{{ php_mb_regex_globals_alloc */
php_mb_regex_globals_alloc(void)87 zend_mb_regex_globals *php_mb_regex_globals_alloc(void)
88 {
89 zend_mb_regex_globals *pglobals = pemalloc(
90 sizeof(zend_mb_regex_globals), 1);
91 if (!pglobals) {
92 return NULL;
93 }
94 if (SUCCESS != _php_mb_regex_globals_ctor(pglobals)) {
95 pefree(pglobals, 1);
96 return NULL;
97 }
98 return pglobals;
99 }
100 /* }}} */
101
102 /* {{{ php_mb_regex_globals_free */
php_mb_regex_globals_free(zend_mb_regex_globals * pglobals)103 void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals)
104 {
105 if (!pglobals) {
106 return;
107 }
108 _php_mb_regex_globals_dtor(pglobals);
109 pefree(pglobals, 1);
110 }
111 /* }}} */
112
113 /* {{{ PHP_MINIT_FUNCTION(mb_regex) */
PHP_MINIT_FUNCTION(mb_regex)114 PHP_MINIT_FUNCTION(mb_regex)
115 {
116 onig_init();
117 return SUCCESS;
118 }
119 /* }}} */
120
121 /* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
PHP_MSHUTDOWN_FUNCTION(mb_regex)122 PHP_MSHUTDOWN_FUNCTION(mb_regex)
123 {
124 onig_end();
125 return SUCCESS;
126 }
127 /* }}} */
128
129 /* {{{ PHP_RINIT_FUNCTION(mb_regex) */
PHP_RINIT_FUNCTION(mb_regex)130 PHP_RINIT_FUNCTION(mb_regex)
131 {
132 return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
133 }
134 /* }}} */
135
136 /* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
PHP_RSHUTDOWN_FUNCTION(mb_regex)137 PHP_RSHUTDOWN_FUNCTION(mb_regex)
138 {
139 MBREX(current_mbctype) = MBREX(default_mbctype);
140
141 if (!Z_ISUNDEF(MBREX(search_str))) {
142 zval_ptr_dtor(&MBREX(search_str));
143 ZVAL_UNDEF(&MBREX(search_str));
144 }
145 MBREX(search_pos) = 0;
146
147 if (MBREX(search_regs) != NULL) {
148 onig_region_free(MBREX(search_regs), 1);
149 MBREX(search_regs) = (OnigRegion *)NULL;
150 }
151 zend_hash_clean(&MBREX(ht_rc));
152
153 return SUCCESS;
154 }
155 /* }}} */
156
157 /* {{{ PHP_MINFO_FUNCTION(mb_regex) */
PHP_MINFO_FUNCTION(mb_regex)158 PHP_MINFO_FUNCTION(mb_regex)
159 {
160 char buf[32];
161 php_info_print_table_start();
162 php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
163 snprintf(buf, sizeof(buf), "%d.%d.%d",
164 ONIGURUMA_VERSION_MAJOR,
165 ONIGURUMA_VERSION_MINOR,
166 ONIGURUMA_VERSION_TEENY);
167 #ifdef PHP_ONIG_BUNDLED
168 #ifdef USE_COMBINATION_EXPLOSION_CHECK
169 php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
170 #else /* USE_COMBINATION_EXPLOSION_CHECK */
171 php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
172 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
173 #endif /* PHP_BUNDLED_ONIG */
174 php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
175 php_info_print_table_end();
176 }
177 /* }}} */
178
179 /*
180 * encoding name resolver
181 */
182
183 /* {{{ encoding name map */
184 typedef struct _php_mb_regex_enc_name_map_t {
185 const char *names;
186 OnigEncoding code;
187 } php_mb_regex_enc_name_map_t;
188
189 php_mb_regex_enc_name_map_t enc_name_map[] = {
190 #ifdef ONIG_ENCODING_EUC_JP
191 {
192 "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
193 ONIG_ENCODING_EUC_JP
194 },
195 #endif
196 #ifdef ONIG_ENCODING_UTF8
197 {
198 "UTF-8\0UTF8\0",
199 ONIG_ENCODING_UTF8
200 },
201 #endif
202 #ifdef ONIG_ENCODING_UTF16_BE
203 {
204 "UTF-16\0UTF-16BE\0",
205 ONIG_ENCODING_UTF16_BE
206 },
207 #endif
208 #ifdef ONIG_ENCODING_UTF16_LE
209 {
210 "UTF-16LE\0",
211 ONIG_ENCODING_UTF16_LE
212 },
213 #endif
214 #ifdef ONIG_ENCODING_UTF32_BE
215 {
216 "UCS-4\0UTF-32\0UTF-32BE\0",
217 ONIG_ENCODING_UTF32_BE
218 },
219 #endif
220 #ifdef ONIG_ENCODING_UTF32_LE
221 {
222 "UCS-4LE\0UTF-32LE\0",
223 ONIG_ENCODING_UTF32_LE
224 },
225 #endif
226 #ifdef ONIG_ENCODING_SJIS
227 {
228 "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
229 ONIG_ENCODING_SJIS
230 },
231 #endif
232 #ifdef ONIG_ENCODING_BIG5
233 {
234 "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
235 ONIG_ENCODING_BIG5
236 },
237 #endif
238 #ifdef ONIG_ENCODING_EUC_CN
239 {
240 "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
241 ONIG_ENCODING_EUC_CN
242 },
243 #endif
244 #ifdef ONIG_ENCODING_EUC_TW
245 {
246 "EUC-TW\0EUCTW\0EUC_TW\0",
247 ONIG_ENCODING_EUC_TW
248 },
249 #endif
250 #ifdef ONIG_ENCODING_EUC_KR
251 {
252 "EUC-KR\0EUCKR\0EUC_KR\0",
253 ONIG_ENCODING_EUC_KR
254 },
255 #endif
256 #if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
257 {
258 "KOI8\0KOI-8\0",
259 ONIG_ENCODING_KOI8
260 },
261 #endif
262 #ifdef ONIG_ENCODING_KOI8_R
263 {
264 "KOI8R\0KOI8-R\0KOI-8R\0",
265 ONIG_ENCODING_KOI8_R
266 },
267 #endif
268 #ifdef ONIG_ENCODING_ISO_8859_1
269 {
270 "ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
271 ONIG_ENCODING_ISO_8859_1
272 },
273 #endif
274 #ifdef ONIG_ENCODING_ISO_8859_2
275 {
276 "ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
277 ONIG_ENCODING_ISO_8859_2
278 },
279 #endif
280 #ifdef ONIG_ENCODING_ISO_8859_3
281 {
282 "ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
283 ONIG_ENCODING_ISO_8859_3
284 },
285 #endif
286 #ifdef ONIG_ENCODING_ISO_8859_4
287 {
288 "ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
289 ONIG_ENCODING_ISO_8859_4
290 },
291 #endif
292 #ifdef ONIG_ENCODING_ISO_8859_5
293 {
294 "ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
295 ONIG_ENCODING_ISO_8859_5
296 },
297 #endif
298 #ifdef ONIG_ENCODING_ISO_8859_6
299 {
300 "ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
301 ONIG_ENCODING_ISO_8859_6
302 },
303 #endif
304 #ifdef ONIG_ENCODING_ISO_8859_7
305 {
306 "ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
307 ONIG_ENCODING_ISO_8859_7
308 },
309 #endif
310 #ifdef ONIG_ENCODING_ISO_8859_8
311 {
312 "ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
313 ONIG_ENCODING_ISO_8859_8
314 },
315 #endif
316 #ifdef ONIG_ENCODING_ISO_8859_9
317 {
318 "ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
319 ONIG_ENCODING_ISO_8859_9
320 },
321 #endif
322 #ifdef ONIG_ENCODING_ISO_8859_10
323 {
324 "ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
325 ONIG_ENCODING_ISO_8859_10
326 },
327 #endif
328 #ifdef ONIG_ENCODING_ISO_8859_11
329 {
330 "ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
331 ONIG_ENCODING_ISO_8859_11
332 },
333 #endif
334 #ifdef ONIG_ENCODING_ISO_8859_13
335 {
336 "ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
337 ONIG_ENCODING_ISO_8859_13
338 },
339 #endif
340 #ifdef ONIG_ENCODING_ISO_8859_14
341 {
342 "ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
343 ONIG_ENCODING_ISO_8859_14
344 },
345 #endif
346 #ifdef ONIG_ENCODING_ISO_8859_15
347 {
348 "ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
349 ONIG_ENCODING_ISO_8859_15
350 },
351 #endif
352 #ifdef ONIG_ENCODING_ISO_8859_16
353 {
354 "ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
355 ONIG_ENCODING_ISO_8859_16
356 },
357 #endif
358 #ifdef ONIG_ENCODING_ASCII
359 {
360 "ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
361 ONIG_ENCODING_ASCII
362 },
363 #endif
364 { NULL, ONIG_ENCODING_UNDEF }
365 };
366 /* }}} */
367
368 /* {{{ php_mb_regex_name2mbctype */
_php_mb_regex_name2mbctype(const char * pname)369 static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
370 {
371 const char *p;
372 php_mb_regex_enc_name_map_t *mapping;
373
374 if (pname == NULL || !*pname) {
375 return ONIG_ENCODING_UNDEF;
376 }
377
378 for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
379 for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
380 if (strcasecmp(p, pname) == 0) {
381 return mapping->code;
382 }
383 }
384 }
385
386 return ONIG_ENCODING_UNDEF;
387 }
388 /* }}} */
389
390 /* {{{ php_mb_regex_mbctype2name */
_php_mb_regex_mbctype2name(OnigEncoding mbctype)391 static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
392 {
393 php_mb_regex_enc_name_map_t *mapping;
394
395 for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
396 if (mapping->code == mbctype) {
397 return mapping->names;
398 }
399 }
400
401 return NULL;
402 }
403 /* }}} */
404
405 /* {{{ php_mb_regex_set_mbctype */
php_mb_regex_set_mbctype(const char * encname)406 int php_mb_regex_set_mbctype(const char *encname)
407 {
408 OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
409 if (mbctype == ONIG_ENCODING_UNDEF) {
410 return FAILURE;
411 }
412 MBREX(current_mbctype) = mbctype;
413 return SUCCESS;
414 }
415 /* }}} */
416
417 /* {{{ php_mb_regex_set_default_mbctype */
php_mb_regex_set_default_mbctype(const char * encname)418 int php_mb_regex_set_default_mbctype(const char *encname)
419 {
420 OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
421 if (mbctype == ONIG_ENCODING_UNDEF) {
422 return FAILURE;
423 }
424 MBREX(default_mbctype) = mbctype;
425 return SUCCESS;
426 }
427 /* }}} */
428
429 /* {{{ php_mb_regex_get_mbctype */
php_mb_regex_get_mbctype(void)430 const char *php_mb_regex_get_mbctype(void)
431 {
432 return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
433 }
434 /* }}} */
435
436 /* {{{ php_mb_regex_get_default_mbctype */
php_mb_regex_get_default_mbctype(void)437 const char *php_mb_regex_get_default_mbctype(void)
438 {
439 return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
440 }
441 /* }}} */
442
443 /*
444 * regex cache
445 */
446 /* {{{ php_mbregex_compile_pattern */
php_mbregex_compile_pattern(const char * pattern,int patlen,OnigOptionType options,OnigEncoding enc,OnigSyntaxType * syntax)447 static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax)
448 {
449 int err_code = 0;
450 php_mb_regex_t *retval = NULL, *rc = NULL;
451 OnigErrorInfo err_info;
452 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
453
454 if (!php_mb_check_encoding(pattern, patlen, _php_mb_regex_mbctype2name(enc))) {
455 php_error_docref(NULL, E_WARNING,
456 "Pattern is not valid under %s encoding", _php_mb_regex_mbctype2name(enc));
457 return NULL;
458 }
459
460 rc = zend_hash_str_find_ptr(&MBREX(ht_rc), (char *)pattern, patlen);
461 if (!rc || onig_get_options(rc) != options || onig_get_encoding(rc) != enc || onig_get_syntax(rc) != syntax) {
462 if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
463 onig_error_code_to_str(err_str, err_code, &err_info);
464 php_error_docref(NULL, E_WARNING, "mbregex compile err: %s", err_str);
465 return NULL;
466 }
467 if (rc == MBREX(search_re)) {
468 /* reuse the new rc? see bug #72399 */
469 MBREX(search_re) = NULL;
470 }
471 zend_hash_str_update_ptr(&MBREX(ht_rc), (char *)pattern, patlen, retval);
472 } else {
473 retval = rc;
474 }
475 return retval;
476 }
477 /* }}} */
478
479 /* {{{ _php_mb_regex_get_option_string */
_php_mb_regex_get_option_string(char * str,size_t len,OnigOptionType option,OnigSyntaxType * syntax)480 static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
481 {
482 size_t len_left = len;
483 size_t len_req = 0;
484 char *p = str;
485 char c;
486
487 if ((option & ONIG_OPTION_IGNORECASE) != 0) {
488 if (len_left > 0) {
489 --len_left;
490 *(p++) = 'i';
491 }
492 ++len_req;
493 }
494
495 if ((option & ONIG_OPTION_EXTEND) != 0) {
496 if (len_left > 0) {
497 --len_left;
498 *(p++) = 'x';
499 }
500 ++len_req;
501 }
502
503 if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
504 (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
505 if (len_left > 0) {
506 --len_left;
507 *(p++) = 'p';
508 }
509 ++len_req;
510 } else {
511 if ((option & ONIG_OPTION_MULTILINE) != 0) {
512 if (len_left > 0) {
513 --len_left;
514 *(p++) = 'm';
515 }
516 ++len_req;
517 }
518
519 if ((option & ONIG_OPTION_SINGLELINE) != 0) {
520 if (len_left > 0) {
521 --len_left;
522 *(p++) = 's';
523 }
524 ++len_req;
525 }
526 }
527 if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
528 if (len_left > 0) {
529 --len_left;
530 *(p++) = 'l';
531 }
532 ++len_req;
533 }
534 if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
535 if (len_left > 0) {
536 --len_left;
537 *(p++) = 'n';
538 }
539 ++len_req;
540 }
541
542 c = 0;
543
544 if (syntax == ONIG_SYNTAX_JAVA) {
545 c = 'j';
546 } else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
547 c = 'u';
548 } else if (syntax == ONIG_SYNTAX_GREP) {
549 c = 'g';
550 } else if (syntax == ONIG_SYNTAX_EMACS) {
551 c = 'c';
552 } else if (syntax == ONIG_SYNTAX_RUBY) {
553 c = 'r';
554 } else if (syntax == ONIG_SYNTAX_PERL) {
555 c = 'z';
556 } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
557 c = 'b';
558 } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
559 c = 'd';
560 }
561
562 if (c != 0) {
563 if (len_left > 0) {
564 --len_left;
565 *(p++) = c;
566 }
567 ++len_req;
568 }
569
570
571 if (len_left > 0) {
572 --len_left;
573 *(p++) = '\0';
574 }
575 ++len_req;
576 if (len < len_req) {
577 return len_req;
578 }
579
580 return 0;
581 }
582 /* }}} */
583
584 /* {{{ _php_mb_regex_init_options */
585 static void
_php_mb_regex_init_options(const char * parg,int narg,OnigOptionType * option,OnigSyntaxType ** syntax,int * eval)586 _php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
587 {
588 int n;
589 char c;
590 int optm = 0;
591
592 *syntax = ONIG_SYNTAX_RUBY;
593
594 if (parg != NULL) {
595 n = 0;
596 while(n < narg) {
597 c = parg[n++];
598 switch (c) {
599 case 'i':
600 optm |= ONIG_OPTION_IGNORECASE;
601 break;
602 case 'x':
603 optm |= ONIG_OPTION_EXTEND;
604 break;
605 case 'm':
606 optm |= ONIG_OPTION_MULTILINE;
607 break;
608 case 's':
609 optm |= ONIG_OPTION_SINGLELINE;
610 break;
611 case 'p':
612 optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
613 break;
614 case 'l':
615 optm |= ONIG_OPTION_FIND_LONGEST;
616 break;
617 case 'n':
618 optm |= ONIG_OPTION_FIND_NOT_EMPTY;
619 break;
620 case 'j':
621 *syntax = ONIG_SYNTAX_JAVA;
622 break;
623 case 'u':
624 *syntax = ONIG_SYNTAX_GNU_REGEX;
625 break;
626 case 'g':
627 *syntax = ONIG_SYNTAX_GREP;
628 break;
629 case 'c':
630 *syntax = ONIG_SYNTAX_EMACS;
631 break;
632 case 'r':
633 *syntax = ONIG_SYNTAX_RUBY;
634 break;
635 case 'z':
636 *syntax = ONIG_SYNTAX_PERL;
637 break;
638 case 'b':
639 *syntax = ONIG_SYNTAX_POSIX_BASIC;
640 break;
641 case 'd':
642 *syntax = ONIG_SYNTAX_POSIX_EXTENDED;
643 break;
644 case 'e':
645 if (eval != NULL) *eval = 1;
646 break;
647 default:
648 break;
649 }
650 }
651 if (option != NULL) *option|=optm;
652 }
653 }
654 /* }}} */
655
656 /*
657 * php functions
658 */
659
660 /* {{{ proto string mb_regex_encoding([string encoding])
661 Returns the current encoding for regex as a string. */
PHP_FUNCTION(mb_regex_encoding)662 PHP_FUNCTION(mb_regex_encoding)
663 {
664 char *encoding = NULL;
665 size_t encoding_len;
666 OnigEncoding mbctype;
667
668 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &encoding, &encoding_len) == FAILURE) {
669 return;
670 }
671
672 if (!encoding) {
673 const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
674
675 if (retval == NULL) {
676 RETURN_FALSE;
677 }
678
679 RETURN_STRING((char *)retval);
680 } else {
681 mbctype = _php_mb_regex_name2mbctype(encoding);
682
683 if (mbctype == ONIG_ENCODING_UNDEF) {
684 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
685 RETURN_FALSE;
686 }
687
688 MBREX(current_mbctype) = mbctype;
689 RETURN_TRUE;
690 }
691 }
692 /* }}} */
693
694 /* {{{ _php_mb_regex_ereg_exec */
_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS,int icase)695 static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
696 {
697 zval *arg_pattern, *array = NULL;
698 char *string;
699 size_t string_len;
700 php_mb_regex_t *re;
701 OnigRegion *regs = NULL;
702 int i, match_len, beg, end;
703 OnigOptionType options;
704 char *str;
705
706 if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z/", &arg_pattern, &string, &string_len, &array) == FAILURE) {
707 RETURN_FALSE;
708 }
709
710 if (!php_mb_check_encoding(
711 string,
712 string_len,
713 _php_mb_regex_mbctype2name(MBREX(current_mbctype))
714 )) {
715 if (array != NULL) {
716 zval_dtor(array);
717 array_init(array);
718 }
719 RETURN_FALSE;
720 }
721
722 if (array != NULL) {
723 zval_dtor(array);
724 array_init(array);
725 }
726
727 options = MBREX(regex_default_options);
728 if (icase) {
729 options |= ONIG_OPTION_IGNORECASE;
730 }
731
732 /* compile the regular expression from the supplied regex */
733 if (Z_TYPE_P(arg_pattern) != IS_STRING) {
734 /* we convert numbers to integers and treat them as a string */
735 if (Z_TYPE_P(arg_pattern) == IS_DOUBLE) {
736 convert_to_long_ex(arg_pattern); /* get rid of decimal places */
737 }
738 convert_to_string_ex(arg_pattern);
739 /* don't bother doing an extended regex with just a number */
740 }
741
742 if (Z_STRLEN_P(arg_pattern) == 0) {
743 php_error_docref(NULL, E_WARNING, "empty pattern");
744 RETVAL_FALSE;
745 goto out;
746 }
747
748 re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax));
749 if (re == NULL) {
750 RETVAL_FALSE;
751 goto out;
752 }
753
754 regs = onig_region_new();
755
756 /* actually execute the regular expression */
757 if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
758 RETVAL_FALSE;
759 goto out;
760 }
761
762 match_len = 1;
763 str = string;
764 if (array != NULL) {
765
766 match_len = regs->end[0] - regs->beg[0];
767 for (i = 0; i < regs->num_regs; i++) {
768 beg = regs->beg[i];
769 end = regs->end[i];
770 if (beg >= 0 && beg < end && (size_t)end <= string_len) {
771 add_index_stringl(array, i, (char *)&str[beg], end - beg);
772 } else {
773 add_index_bool(array, i, 0);
774 }
775 }
776 }
777
778 if (match_len == 0) {
779 match_len = 1;
780 }
781 RETVAL_LONG(match_len);
782 out:
783 if (regs != NULL) {
784 onig_region_free(regs, 1);
785 }
786 }
787 /* }}} */
788
789 /* {{{ proto int mb_ereg(string pattern, string string [, array registers])
790 Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg)791 PHP_FUNCTION(mb_ereg)
792 {
793 _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
794 }
795 /* }}} */
796
797 /* {{{ proto int mb_eregi(string pattern, string string [, array registers])
798 Case-insensitive regular expression match for multibyte string */
PHP_FUNCTION(mb_eregi)799 PHP_FUNCTION(mb_eregi)
800 {
801 _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
802 }
803 /* }}} */
804
805 /* {{{ _php_mb_regex_ereg_replace_exec */
_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS,OnigOptionType options,int is_callable)806 static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable)
807 {
808 zval *arg_pattern_zval;
809
810 char *arg_pattern;
811 size_t arg_pattern_len;
812
813 char *replace;
814 size_t replace_len;
815
816 zend_fcall_info arg_replace_fci;
817 zend_fcall_info_cache arg_replace_fci_cache;
818
819 char *string;
820 size_t string_len;
821
822 char *p;
823 php_mb_regex_t *re;
824 OnigSyntaxType *syntax;
825 OnigRegion *regs = NULL;
826 smart_str out_buf = {0};
827 smart_str eval_buf = {0};
828 smart_str *pbuf;
829 size_t i;
830 int err, eval, n;
831 OnigUChar *pos;
832 OnigUChar *string_lim;
833 char *description = NULL;
834 char pat_buf[6];
835
836 const mbfl_encoding *enc;
837
838 {
839 const char *current_enc_name;
840 current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
841 if (current_enc_name == NULL ||
842 (enc = mbfl_name2encoding(current_enc_name)) == NULL) {
843 php_error_docref(NULL, E_WARNING, "Unknown error");
844 RETURN_FALSE;
845 }
846 }
847 eval = 0;
848 {
849 char *option_str = NULL;
850 size_t option_str_len = 0;
851
852 if (!is_callable) {
853 if (zend_parse_parameters(ZEND_NUM_ARGS(), "zss|s",
854 &arg_pattern_zval,
855 &replace, &replace_len,
856 &string, &string_len,
857 &option_str, &option_str_len) == FAILURE) {
858 RETURN_FALSE;
859 }
860 } else {
861 if (zend_parse_parameters(ZEND_NUM_ARGS(), "zfs|s",
862 &arg_pattern_zval,
863 &arg_replace_fci, &arg_replace_fci_cache,
864 &string, &string_len,
865 &option_str, &option_str_len) == FAILURE) {
866 RETURN_FALSE;
867 }
868 }
869
870 if (!php_mb_check_encoding(
871 string,
872 string_len,
873 _php_mb_regex_mbctype2name(MBREX(current_mbctype))
874 )) {
875 RETURN_NULL();
876 }
877
878 if (option_str != NULL) {
879 _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
880 } else {
881 options |= MBREX(regex_default_options);
882 syntax = MBREX(regex_default_syntax);
883 }
884 }
885 if (eval && !is_callable) {
886 php_error_docref(NULL, E_DEPRECATED, "The 'e' option is deprecated, use mb_ereg_replace_callback instead");
887 }
888 if (Z_TYPE_P(arg_pattern_zval) == IS_STRING) {
889 arg_pattern = Z_STRVAL_P(arg_pattern_zval);
890 arg_pattern_len = Z_STRLEN_P(arg_pattern_zval);
891 } else {
892 /* FIXME: this code is not multibyte aware! */
893 convert_to_long_ex(arg_pattern_zval);
894 pat_buf[0] = (char)Z_LVAL_P(arg_pattern_zval);
895 pat_buf[1] = '\0';
896 pat_buf[2] = '\0';
897 pat_buf[3] = '\0';
898 pat_buf[4] = '\0';
899 pat_buf[5] = '\0';
900
901 arg_pattern = pat_buf;
902 arg_pattern_len = 1;
903 }
904 /* create regex pattern buffer */
905 re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax);
906 if (re == NULL) {
907 RETURN_FALSE;
908 }
909
910 if (eval || is_callable) {
911 pbuf = &eval_buf;
912 description = zend_make_compiled_string_description("mbregex replace");
913 } else {
914 pbuf = &out_buf;
915 description = NULL;
916 }
917
918 if (is_callable) {
919 if (eval) {
920 php_error_docref(NULL, E_WARNING, "Option 'e' cannot be used with replacement callback");
921 RETURN_FALSE;
922 }
923 }
924
925 /* do the actual work */
926 err = 0;
927 pos = (OnigUChar *)string;
928 string_lim = (OnigUChar*)(string + string_len);
929 regs = onig_region_new();
930 while (err >= 0) {
931 err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
932 if (err <= -2) {
933 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
934 onig_error_code_to_str(err_str, err);
935 php_error_docref(NULL, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
936 break;
937 }
938 if (err >= 0) {
939 #if moriyoshi_0
940 if (regs->beg[0] == regs->end[0]) {
941 php_error_docref(NULL, E_WARNING, "Empty regular expression");
942 break;
943 }
944 #endif
945 /* copy the part of the string before the match */
946 smart_str_appendl(&out_buf, (char *)pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
947
948 if (!is_callable) {
949 /* copy replacement and backrefs */
950 i = 0;
951 p = replace;
952 while (i < replace_len) {
953 int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
954 n = -1;
955 if ((replace_len - i) >= 2 && fwd == 1 &&
956 p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
957 n = p[1] - '0';
958 }
959 if (n >= 0 && n < regs->num_regs) {
960 if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && (size_t)regs->end[n] <= string_len) {
961 smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
962 }
963 p += 2;
964 i += 2;
965 } else {
966 smart_str_appendl(pbuf, p, fwd);
967 p += fwd;
968 i += fwd;
969 }
970 }
971 }
972
973 if (eval) {
974 zval v;
975 zend_string *eval_str;
976 /* null terminate buffer */
977 smart_str_0(&eval_buf);
978
979 if (eval_buf.s) {
980 eval_str = eval_buf.s;
981 } else {
982 eval_str = ZSTR_EMPTY_ALLOC();
983 }
984
985 /* do eval */
986 if (zend_eval_stringl(ZSTR_VAL(eval_str), ZSTR_LEN(eval_str), &v, description) == FAILURE) {
987 efree(description);
988 zend_throw_error(NULL, "Failed evaluating code: %s%s", PHP_EOL, ZSTR_VAL(eval_str));
989 onig_region_free(regs, 0);
990 smart_str_free(&out_buf);
991 smart_str_free(&eval_buf);
992 RETURN_FALSE;
993 }
994
995 /* result of eval */
996 convert_to_string(&v);
997 smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
998 /* Clean up */
999 smart_str_free(&eval_buf);
1000 zval_dtor(&v);
1001 } else if (is_callable) {
1002 zval args[1];
1003 zval subpats, retval;
1004 int i;
1005
1006 array_init(&subpats);
1007 for (i = 0; i < regs->num_regs; i++) {
1008 add_next_index_stringl(&subpats, string + regs->beg[i], regs->end[i] - regs->beg[i]);
1009 }
1010
1011 ZVAL_COPY_VALUE(&args[0], &subpats);
1012 /* null terminate buffer */
1013 smart_str_0(&eval_buf);
1014
1015 arg_replace_fci.param_count = 1;
1016 arg_replace_fci.params = args;
1017 arg_replace_fci.retval = &retval;
1018 if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache) == SUCCESS &&
1019 !Z_ISUNDEF(retval)) {
1020 convert_to_string_ex(&retval);
1021 smart_str_appendl(&out_buf, Z_STRVAL(retval), Z_STRLEN(retval));
1022 smart_str_free(&eval_buf);
1023 zval_ptr_dtor(&retval);
1024 } else {
1025 if (!EG(exception)) {
1026 php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1027 }
1028 }
1029 zval_ptr_dtor(&subpats);
1030 }
1031
1032 n = regs->end[0];
1033 if ((pos - (OnigUChar *)string) < n) {
1034 pos = (OnigUChar *)string + n;
1035 } else {
1036 if (pos < string_lim) {
1037 smart_str_appendl(&out_buf, (char *)pos, 1);
1038 }
1039 pos++;
1040 }
1041 } else { /* nomatch */
1042 /* stick that last bit of string on our output */
1043 if (string_lim - pos > 0) {
1044 smart_str_appendl(&out_buf, (char *)pos, string_lim - pos);
1045 }
1046 }
1047 onig_region_free(regs, 0);
1048 }
1049
1050 if (description) {
1051 efree(description);
1052 }
1053 if (regs != NULL) {
1054 onig_region_free(regs, 1);
1055 }
1056 smart_str_free(&eval_buf);
1057
1058 if (err <= -2) {
1059 smart_str_free(&out_buf);
1060 RETVAL_FALSE;
1061 } else if (out_buf.s) {
1062 smart_str_0(&out_buf);
1063 RETVAL_STR(out_buf.s);
1064 } else {
1065 RETVAL_EMPTY_STRING();
1066 }
1067 }
1068 /* }}} */
1069
1070 /* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option])
1071 Replace regular expression for multibyte string */
PHP_FUNCTION(mb_ereg_replace)1072 PHP_FUNCTION(mb_ereg_replace)
1073 {
1074 _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1075 }
1076 /* }}} */
1077
1078 /* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string)
1079 Case insensitive replace regular expression for multibyte string */
PHP_FUNCTION(mb_eregi_replace)1080 PHP_FUNCTION(mb_eregi_replace)
1081 {
1082 _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0);
1083 }
1084 /* }}} */
1085
1086 /* {{{ proto string mb_ereg_replace_callback(string pattern, string callback, string string [, string option])
1087 regular expression for multibyte string using replacement callback */
PHP_FUNCTION(mb_ereg_replace_callback)1088 PHP_FUNCTION(mb_ereg_replace_callback)
1089 {
1090 _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1091 }
1092 /* }}} */
1093
1094 /* {{{ proto array mb_split(string pattern, string string [, int limit])
1095 split multibyte string into array by regular expression */
PHP_FUNCTION(mb_split)1096 PHP_FUNCTION(mb_split)
1097 {
1098 char *arg_pattern;
1099 size_t arg_pattern_len;
1100 php_mb_regex_t *re;
1101 OnigRegion *regs = NULL;
1102 char *string;
1103 OnigUChar *pos, *chunk_pos;
1104 size_t string_len;
1105
1106 int n, err;
1107 zend_long count = -1;
1108
1109 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
1110 RETURN_FALSE;
1111 }
1112
1113 if (count > 0) {
1114 count--;
1115 }
1116
1117 if (!php_mb_check_encoding(string, string_len,
1118 _php_mb_regex_mbctype2name(MBREX(current_mbctype)))) {
1119 RETURN_FALSE;
1120 }
1121
1122 /* create regex pattern buffer */
1123 if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax))) == NULL) {
1124 RETURN_FALSE;
1125 }
1126
1127 array_init(return_value);
1128
1129 chunk_pos = pos = (OnigUChar *)string;
1130 err = 0;
1131 regs = onig_region_new();
1132 /* churn through str, generating array entries as we go */
1133 while (count != 0 && (pos - (OnigUChar *)string) < (ptrdiff_t)string_len) {
1134 int beg, end;
1135 err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0);
1136 if (err < 0) {
1137 break;
1138 }
1139 beg = regs->beg[0], end = regs->end[0];
1140 /* add it to the array */
1141 if ((pos - (OnigUChar *)string) < end) {
1142 if ((size_t)beg < string_len && beg >= (chunk_pos - (OnigUChar *)string)) {
1143 add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos));
1144 --count;
1145 } else {
1146 err = -2;
1147 break;
1148 }
1149 /* point at our new starting point */
1150 chunk_pos = pos = (OnigUChar *)string + end;
1151 } else {
1152 pos++;
1153 }
1154 onig_region_free(regs, 0);
1155 }
1156
1157 onig_region_free(regs, 1);
1158
1159 /* see if we encountered an error */
1160 if (err <= -2) {
1161 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1162 onig_error_code_to_str(err_str, err);
1163 php_error_docref(NULL, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
1164 zval_dtor(return_value);
1165 RETURN_FALSE;
1166 }
1167
1168 /* otherwise we just have one last element to add to the array */
1169 n = ((OnigUChar *)(string + string_len) - chunk_pos);
1170 if (n > 0) {
1171 add_next_index_stringl(return_value, (char *)chunk_pos, n);
1172 } else {
1173 add_next_index_stringl(return_value, "", 0);
1174 }
1175 }
1176 /* }}} */
1177
1178 /* {{{ proto bool mb_ereg_match(string pattern, string string [,string option])
1179 Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg_match)1180 PHP_FUNCTION(mb_ereg_match)
1181 {
1182 char *arg_pattern;
1183 size_t arg_pattern_len;
1184
1185 char *string;
1186 size_t string_len;
1187
1188 php_mb_regex_t *re;
1189 OnigSyntaxType *syntax;
1190 OnigOptionType option = 0;
1191 int err;
1192
1193 {
1194 char *option_str = NULL;
1195 size_t option_str_len = 0;
1196
1197 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s",
1198 &arg_pattern, &arg_pattern_len, &string, &string_len,
1199 &option_str, &option_str_len)==FAILURE) {
1200 RETURN_FALSE;
1201 }
1202
1203 if (option_str != NULL) {
1204 _php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
1205 } else {
1206 option |= MBREX(regex_default_options);
1207 syntax = MBREX(regex_default_syntax);
1208 }
1209 }
1210
1211 if (!php_mb_check_encoding(string, string_len,
1212 _php_mb_regex_mbctype2name(MBREX(current_mbctype)))) {
1213 RETURN_FALSE;
1214 }
1215
1216 if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax)) == NULL) {
1217 RETURN_FALSE;
1218 }
1219
1220 /* match */
1221 err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0);
1222 if (err >= 0) {
1223 RETVAL_TRUE;
1224 } else {
1225 RETVAL_FALSE;
1226 }
1227 }
1228 /* }}} */
1229
1230 /* regex search */
1231 /* {{{ _php_mb_regex_ereg_search_exec */
1232 static void
_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS,int mode)1233 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
1234 {
1235 char *arg_pattern = NULL, *arg_options = NULL;
1236 size_t arg_pattern_len, arg_options_len;
1237 int n, i, err, pos, len, beg, end;
1238 OnigOptionType option;
1239 OnigUChar *str;
1240 OnigSyntaxType *syntax;
1241
1242 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1243 return;
1244 }
1245
1246 option = MBREX(regex_default_options);
1247
1248 if (arg_options) {
1249 option = 0;
1250 _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1251 }
1252
1253 if (arg_pattern) {
1254 /* create regex pattern buffer */
1255 if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax))) == NULL) {
1256 RETURN_FALSE;
1257 }
1258 }
1259
1260 pos = MBREX(search_pos);
1261 str = NULL;
1262 len = 0;
1263 if (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING){
1264 str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
1265 len = Z_STRLEN(MBREX(search_str));
1266 }
1267
1268 if (MBREX(search_re) == NULL) {
1269 php_error_docref(NULL, E_WARNING, "No regex given");
1270 RETURN_FALSE;
1271 }
1272
1273 if (str == NULL) {
1274 php_error_docref(NULL, E_WARNING, "No string given");
1275 RETURN_FALSE;
1276 }
1277
1278 if (MBREX(search_regs)) {
1279 onig_region_free(MBREX(search_regs), 1);
1280 }
1281 MBREX(search_regs) = onig_region_new();
1282
1283 err = onig_search(MBREX(search_re), str, str + len, str + pos, str + len, MBREX(search_regs), 0);
1284 if (err == ONIG_MISMATCH) {
1285 MBREX(search_pos) = len;
1286 RETVAL_FALSE;
1287 } else if (err <= -2) {
1288 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1289 onig_error_code_to_str(err_str, err);
1290 php_error_docref(NULL, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
1291 RETVAL_FALSE;
1292 } else {
1293 switch (mode) {
1294 case 1:
1295 array_init(return_value);
1296 beg = MBREX(search_regs)->beg[0];
1297 end = MBREX(search_regs)->end[0];
1298 add_next_index_long(return_value, beg);
1299 add_next_index_long(return_value, end - beg);
1300 break;
1301 case 2:
1302 array_init(return_value);
1303 n = MBREX(search_regs)->num_regs;
1304 for (i = 0; i < n; i++) {
1305 beg = MBREX(search_regs)->beg[i];
1306 end = MBREX(search_regs)->end[i];
1307 if (beg >= 0 && beg <= end && end <= len) {
1308 add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
1309 } else {
1310 add_index_bool(return_value, i, 0);
1311 }
1312 }
1313 break;
1314 default:
1315 RETVAL_TRUE;
1316 break;
1317 }
1318 end = MBREX(search_regs)->end[0];
1319 if (pos <= end) {
1320 MBREX(search_pos) = end;
1321 } else {
1322 MBREX(search_pos) = pos + 1;
1323 }
1324 }
1325
1326 if (err < 0) {
1327 onig_region_free(MBREX(search_regs), 1);
1328 MBREX(search_regs) = (OnigRegion *)NULL;
1329 }
1330 }
1331 /* }}} */
1332
1333 /* {{{ proto bool mb_ereg_search([string pattern[, string option]])
1334 Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search)1335 PHP_FUNCTION(mb_ereg_search)
1336 {
1337 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1338 }
1339 /* }}} */
1340
1341 /* {{{ proto array mb_ereg_search_pos([string pattern[, string option]])
1342 Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_pos)1343 PHP_FUNCTION(mb_ereg_search_pos)
1344 {
1345 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1346 }
1347 /* }}} */
1348
1349 /* {{{ proto array mb_ereg_search_regs([string pattern[, string option]])
1350 Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_regs)1351 PHP_FUNCTION(mb_ereg_search_regs)
1352 {
1353 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
1354 }
1355 /* }}} */
1356
1357 /* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]])
1358 Initialize string and regular expression for search. */
PHP_FUNCTION(mb_ereg_search_init)1359 PHP_FUNCTION(mb_ereg_search_init)
1360 {
1361 size_t argc = ZEND_NUM_ARGS();
1362 zend_string *arg_str;
1363 char *arg_pattern = NULL, *arg_options = NULL;
1364 size_t arg_pattern_len = 0, arg_options_len = 0;
1365 OnigSyntaxType *syntax = NULL;
1366 OnigOptionType option;
1367
1368 if (zend_parse_parameters(argc, "S|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1369 return;
1370 }
1371
1372 if (argc > 1 && arg_pattern_len == 0) {
1373 php_error_docref(NULL, E_WARNING, "Empty pattern");
1374 RETURN_FALSE;
1375 }
1376
1377 option = MBREX(regex_default_options);
1378 syntax = MBREX(regex_default_syntax);
1379
1380 if (argc == 3) {
1381 option = 0;
1382 _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1383 }
1384
1385 if (argc > 1) {
1386 /* create regex pattern buffer */
1387 if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax)) == NULL) {
1388 RETURN_FALSE;
1389 }
1390 }
1391
1392 if (!Z_ISNULL(MBREX(search_str))) {
1393 zval_ptr_dtor(&MBREX(search_str));
1394 }
1395
1396 ZVAL_STR_COPY(&MBREX(search_str), arg_str);
1397
1398 if (php_mb_check_encoding(
1399 ZSTR_VAL(arg_str),
1400 ZSTR_LEN(arg_str),
1401 _php_mb_regex_mbctype2name(MBREX(current_mbctype))
1402 )) {
1403 MBREX(search_pos) = 0;
1404 RETVAL_TRUE;
1405 } else {
1406 MBREX(search_pos) = ZSTR_LEN(arg_str);
1407 RETVAL_FALSE;
1408 }
1409
1410 if (MBREX(search_regs) != NULL) {
1411 onig_region_free(MBREX(search_regs), 1);
1412 MBREX(search_regs) = NULL;
1413 }
1414 }
1415 /* }}} */
1416
1417 /* {{{ proto array mb_ereg_search_getregs(void)
1418 Get matched substring of the last time */
PHP_FUNCTION(mb_ereg_search_getregs)1419 PHP_FUNCTION(mb_ereg_search_getregs)
1420 {
1421 int n, i, len, beg, end;
1422 OnigUChar *str;
1423
1424 if (MBREX(search_regs) != NULL && Z_TYPE(MBREX(search_str)) == IS_STRING) {
1425 array_init(return_value);
1426
1427 str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
1428 len = Z_STRLEN(MBREX(search_str));
1429 n = MBREX(search_regs)->num_regs;
1430 for (i = 0; i < n; i++) {
1431 beg = MBREX(search_regs)->beg[i];
1432 end = MBREX(search_regs)->end[i];
1433 if (beg >= 0 && beg <= end && end <= len) {
1434 add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
1435 } else {
1436 add_index_bool(return_value, i, 0);
1437 }
1438 }
1439 } else {
1440 RETVAL_FALSE;
1441 }
1442 }
1443 /* }}} */
1444
1445 /* {{{ proto int mb_ereg_search_getpos(void)
1446 Get search start position */
PHP_FUNCTION(mb_ereg_search_getpos)1447 PHP_FUNCTION(mb_ereg_search_getpos)
1448 {
1449 RETVAL_LONG(MBREX(search_pos));
1450 }
1451 /* }}} */
1452
1453 /* {{{ proto bool mb_ereg_search_setpos(int position)
1454 Set search start position */
PHP_FUNCTION(mb_ereg_search_setpos)1455 PHP_FUNCTION(mb_ereg_search_setpos)
1456 {
1457 zend_long position;
1458
1459 if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &position) == FAILURE) {
1460 return;
1461 }
1462
1463 /* Accept negative position if length of search string can be determined */
1464 if ((position < 0) && (!Z_ISUNDEF(MBREX(search_str))) && (Z_TYPE(MBREX(search_str)) == IS_STRING)) {
1465 position += Z_STRLEN(MBREX(search_str));
1466 }
1467
1468 if (position < 0 || (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING && (size_t)position > Z_STRLEN(MBREX(search_str)))) {
1469 php_error_docref(NULL, E_WARNING, "Position is out of range");
1470 MBREX(search_pos) = 0;
1471 RETURN_FALSE;
1472 }
1473
1474 MBREX(search_pos) = position;
1475 RETURN_TRUE;
1476 }
1477 /* }}} */
1478
1479 /* {{{ php_mb_regex_set_options */
_php_mb_regex_set_options(OnigOptionType options,OnigSyntaxType * syntax,OnigOptionType * prev_options,OnigSyntaxType ** prev_syntax)1480 static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax)
1481 {
1482 if (prev_options != NULL) {
1483 *prev_options = MBREX(regex_default_options);
1484 }
1485 if (prev_syntax != NULL) {
1486 *prev_syntax = MBREX(regex_default_syntax);
1487 }
1488 MBREX(regex_default_options) = options;
1489 MBREX(regex_default_syntax) = syntax;
1490 }
1491 /* }}} */
1492
1493 /* {{{ proto string mb_regex_set_options([string options])
1494 Set or get the default options for mbregex functions */
PHP_FUNCTION(mb_regex_set_options)1495 PHP_FUNCTION(mb_regex_set_options)
1496 {
1497 OnigOptionType opt;
1498 OnigSyntaxType *syntax;
1499 char *string = NULL;
1500 size_t string_len;
1501 char buf[16];
1502
1503 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s",
1504 &string, &string_len) == FAILURE) {
1505 RETURN_FALSE;
1506 }
1507 if (string != NULL) {
1508 opt = 0;
1509 syntax = NULL;
1510 _php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
1511 _php_mb_regex_set_options(opt, syntax, NULL, NULL);
1512 } else {
1513 opt = MBREX(regex_default_options);
1514 syntax = MBREX(regex_default_syntax);
1515 }
1516 _php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
1517
1518 RETVAL_STRING(buf);
1519 }
1520 /* }}} */
1521
1522 #endif /* HAVE_MBREGEX */
1523
1524 /*
1525 * Local variables:
1526 * tab-width: 4
1527 * c-basic-offset: 4
1528 * End:
1529 * vim600: fdm=marker
1530 * vim: noet sw=4 ts=4
1531 */
1532