1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2018 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
16 +----------------------------------------------------------------------+
17 */
18
19 /* $Id$ */
20
21
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include "php.h"
27 #include "php_ini.h"
28
29 #if HAVE_MBREGEX
30
31 #include "zend_smart_str.h"
32 #include "ext/standard/info.h"
33 #include "php_mbregex.h"
34 #include "mbstring.h"
35
36 #include "php_onig_compat.h" /* must come prior to the oniguruma header */
37 #include <oniguruma.h>
38 #undef UChar
39
40 ZEND_EXTERN_MODULE_GLOBALS(mbstring)
41
42 struct _zend_mb_regex_globals {
43 OnigEncoding default_mbctype;
44 OnigEncoding current_mbctype;
45 HashTable ht_rc;
46 zval search_str;
47 zval *search_str_val;
48 unsigned int search_pos;
49 php_mb_regex_t *search_re;
50 OnigRegion *search_regs;
51 OnigOptionType regex_default_options;
52 OnigSyntaxType *regex_default_syntax;
53 };
54
55 #define MBREX(g) (MBSTRG(mb_regex_globals)->g)
56
57 /* {{{ static void php_mb_regex_free_cache() */
php_mb_regex_free_cache(zval * el)58 static void php_mb_regex_free_cache(zval *el) {
59 onig_free((php_mb_regex_t *)Z_PTR_P(el));
60 }
61 /* }}} */
62
63 /* {{{ _php_mb_regex_globals_ctor */
_php_mb_regex_globals_ctor(zend_mb_regex_globals * pglobals)64 static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals)
65 {
66 pglobals->default_mbctype = ONIG_ENCODING_UTF8;
67 pglobals->current_mbctype = ONIG_ENCODING_UTF8;
68 zend_hash_init(&(pglobals->ht_rc), 0, NULL, php_mb_regex_free_cache, 1);
69 ZVAL_UNDEF(&pglobals->search_str);
70 pglobals->search_re = (php_mb_regex_t*)NULL;
71 pglobals->search_pos = 0;
72 pglobals->search_regs = (OnigRegion*)NULL;
73 pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
74 pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
75 return SUCCESS;
76 }
77 /* }}} */
78
79 /* {{{ _php_mb_regex_globals_dtor */
_php_mb_regex_globals_dtor(zend_mb_regex_globals * pglobals)80 static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals)
81 {
82 zend_hash_destroy(&pglobals->ht_rc);
83 }
84 /* }}} */
85
86 /* {{{ php_mb_regex_globals_alloc */
php_mb_regex_globals_alloc(void)87 zend_mb_regex_globals *php_mb_regex_globals_alloc(void)
88 {
89 zend_mb_regex_globals *pglobals = pemalloc(
90 sizeof(zend_mb_regex_globals), 1);
91 if (SUCCESS != _php_mb_regex_globals_ctor(pglobals)) {
92 pefree(pglobals, 1);
93 return NULL;
94 }
95 return pglobals;
96 }
97 /* }}} */
98
99 /* {{{ php_mb_regex_globals_free */
php_mb_regex_globals_free(zend_mb_regex_globals * pglobals)100 void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals)
101 {
102 if (!pglobals) {
103 return;
104 }
105 _php_mb_regex_globals_dtor(pglobals);
106 pefree(pglobals, 1);
107 }
108 /* }}} */
109
110 /* {{{ PHP_MINIT_FUNCTION(mb_regex) */
PHP_MINIT_FUNCTION(mb_regex)111 PHP_MINIT_FUNCTION(mb_regex)
112 {
113 onig_init();
114 return SUCCESS;
115 }
116 /* }}} */
117
118 /* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
PHP_MSHUTDOWN_FUNCTION(mb_regex)119 PHP_MSHUTDOWN_FUNCTION(mb_regex)
120 {
121 onig_end();
122 return SUCCESS;
123 }
124 /* }}} */
125
126 /* {{{ PHP_RINIT_FUNCTION(mb_regex) */
PHP_RINIT_FUNCTION(mb_regex)127 PHP_RINIT_FUNCTION(mb_regex)
128 {
129 return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
130 }
131 /* }}} */
132
133 /* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
PHP_RSHUTDOWN_FUNCTION(mb_regex)134 PHP_RSHUTDOWN_FUNCTION(mb_regex)
135 {
136 MBREX(current_mbctype) = MBREX(default_mbctype);
137
138 if (!Z_ISUNDEF(MBREX(search_str))) {
139 zval_ptr_dtor(&MBREX(search_str));
140 ZVAL_UNDEF(&MBREX(search_str));
141 }
142 MBREX(search_pos) = 0;
143
144 if (MBREX(search_regs) != NULL) {
145 onig_region_free(MBREX(search_regs), 1);
146 MBREX(search_regs) = (OnigRegion *)NULL;
147 }
148 zend_hash_clean(&MBREX(ht_rc));
149
150 return SUCCESS;
151 }
152 /* }}} */
153
154 /* {{{ PHP_MINFO_FUNCTION(mb_regex) */
PHP_MINFO_FUNCTION(mb_regex)155 PHP_MINFO_FUNCTION(mb_regex)
156 {
157 char buf[32];
158 php_info_print_table_start();
159 php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
160 snprintf(buf, sizeof(buf), "%d.%d.%d",
161 ONIGURUMA_VERSION_MAJOR,
162 ONIGURUMA_VERSION_MINOR,
163 ONIGURUMA_VERSION_TEENY);
164 #ifdef PHP_ONIG_BUNDLED
165 #ifdef USE_COMBINATION_EXPLOSION_CHECK
166 php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
167 #else /* USE_COMBINATION_EXPLOSION_CHECK */
168 php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
169 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
170 #endif /* PHP_BUNDLED_ONIG */
171 php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
172 php_info_print_table_end();
173 }
174 /* }}} */
175
176 /*
177 * encoding name resolver
178 */
179
180 /* {{{ encoding name map */
181 typedef struct _php_mb_regex_enc_name_map_t {
182 const char *names;
183 OnigEncoding code;
184 } php_mb_regex_enc_name_map_t;
185
186 php_mb_regex_enc_name_map_t enc_name_map[] = {
187 #ifdef ONIG_ENCODING_EUC_JP
188 {
189 "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
190 ONIG_ENCODING_EUC_JP
191 },
192 #endif
193 #ifdef ONIG_ENCODING_UTF8
194 {
195 "UTF-8\0UTF8\0",
196 ONIG_ENCODING_UTF8
197 },
198 #endif
199 #ifdef ONIG_ENCODING_UTF16_BE
200 {
201 "UTF-16\0UTF-16BE\0",
202 ONIG_ENCODING_UTF16_BE
203 },
204 #endif
205 #ifdef ONIG_ENCODING_UTF16_LE
206 {
207 "UTF-16LE\0",
208 ONIG_ENCODING_UTF16_LE
209 },
210 #endif
211 #ifdef ONIG_ENCODING_UTF32_BE
212 {
213 "UCS-4\0UTF-32\0UTF-32BE\0",
214 ONIG_ENCODING_UTF32_BE
215 },
216 #endif
217 #ifdef ONIG_ENCODING_UTF32_LE
218 {
219 "UCS-4LE\0UTF-32LE\0",
220 ONIG_ENCODING_UTF32_LE
221 },
222 #endif
223 #ifdef ONIG_ENCODING_SJIS
224 {
225 "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
226 ONIG_ENCODING_SJIS
227 },
228 #endif
229 #ifdef ONIG_ENCODING_BIG5
230 {
231 "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
232 ONIG_ENCODING_BIG5
233 },
234 #endif
235 #ifdef ONIG_ENCODING_EUC_CN
236 {
237 "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
238 ONIG_ENCODING_EUC_CN
239 },
240 #endif
241 #ifdef ONIG_ENCODING_EUC_TW
242 {
243 "EUC-TW\0EUCTW\0EUC_TW\0",
244 ONIG_ENCODING_EUC_TW
245 },
246 #endif
247 #ifdef ONIG_ENCODING_EUC_KR
248 {
249 "EUC-KR\0EUCKR\0EUC_KR\0",
250 ONIG_ENCODING_EUC_KR
251 },
252 #endif
253 #if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
254 {
255 "KOI8\0KOI-8\0",
256 ONIG_ENCODING_KOI8
257 },
258 #endif
259 #ifdef ONIG_ENCODING_KOI8_R
260 {
261 "KOI8R\0KOI8-R\0KOI-8R\0",
262 ONIG_ENCODING_KOI8_R
263 },
264 #endif
265 #ifdef ONIG_ENCODING_ISO_8859_1
266 {
267 "ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
268 ONIG_ENCODING_ISO_8859_1
269 },
270 #endif
271 #ifdef ONIG_ENCODING_ISO_8859_2
272 {
273 "ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
274 ONIG_ENCODING_ISO_8859_2
275 },
276 #endif
277 #ifdef ONIG_ENCODING_ISO_8859_3
278 {
279 "ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
280 ONIG_ENCODING_ISO_8859_3
281 },
282 #endif
283 #ifdef ONIG_ENCODING_ISO_8859_4
284 {
285 "ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
286 ONIG_ENCODING_ISO_8859_4
287 },
288 #endif
289 #ifdef ONIG_ENCODING_ISO_8859_5
290 {
291 "ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
292 ONIG_ENCODING_ISO_8859_5
293 },
294 #endif
295 #ifdef ONIG_ENCODING_ISO_8859_6
296 {
297 "ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
298 ONIG_ENCODING_ISO_8859_6
299 },
300 #endif
301 #ifdef ONIG_ENCODING_ISO_8859_7
302 {
303 "ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
304 ONIG_ENCODING_ISO_8859_7
305 },
306 #endif
307 #ifdef ONIG_ENCODING_ISO_8859_8
308 {
309 "ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
310 ONIG_ENCODING_ISO_8859_8
311 },
312 #endif
313 #ifdef ONIG_ENCODING_ISO_8859_9
314 {
315 "ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
316 ONIG_ENCODING_ISO_8859_9
317 },
318 #endif
319 #ifdef ONIG_ENCODING_ISO_8859_10
320 {
321 "ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
322 ONIG_ENCODING_ISO_8859_10
323 },
324 #endif
325 #ifdef ONIG_ENCODING_ISO_8859_11
326 {
327 "ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
328 ONIG_ENCODING_ISO_8859_11
329 },
330 #endif
331 #ifdef ONIG_ENCODING_ISO_8859_13
332 {
333 "ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
334 ONIG_ENCODING_ISO_8859_13
335 },
336 #endif
337 #ifdef ONIG_ENCODING_ISO_8859_14
338 {
339 "ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
340 ONIG_ENCODING_ISO_8859_14
341 },
342 #endif
343 #ifdef ONIG_ENCODING_ISO_8859_15
344 {
345 "ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
346 ONIG_ENCODING_ISO_8859_15
347 },
348 #endif
349 #ifdef ONIG_ENCODING_ISO_8859_16
350 {
351 "ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
352 ONIG_ENCODING_ISO_8859_16
353 },
354 #endif
355 #ifdef ONIG_ENCODING_ASCII
356 {
357 "ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
358 ONIG_ENCODING_ASCII
359 },
360 #endif
361 { NULL, ONIG_ENCODING_UNDEF }
362 };
363 /* }}} */
364
365 /* {{{ php_mb_regex_name2mbctype */
_php_mb_regex_name2mbctype(const char * pname)366 static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
367 {
368 const char *p;
369 php_mb_regex_enc_name_map_t *mapping;
370
371 if (pname == NULL || !*pname) {
372 return ONIG_ENCODING_UNDEF;
373 }
374
375 for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
376 for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
377 if (strcasecmp(p, pname) == 0) {
378 return mapping->code;
379 }
380 }
381 }
382
383 return ONIG_ENCODING_UNDEF;
384 }
385 /* }}} */
386
387 /* {{{ php_mb_regex_mbctype2name */
_php_mb_regex_mbctype2name(OnigEncoding mbctype)388 static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
389 {
390 php_mb_regex_enc_name_map_t *mapping;
391
392 for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
393 if (mapping->code == mbctype) {
394 return mapping->names;
395 }
396 }
397
398 return NULL;
399 }
400 /* }}} */
401
402 /* {{{ php_mb_regex_set_mbctype */
php_mb_regex_set_mbctype(const char * encname)403 int php_mb_regex_set_mbctype(const char *encname)
404 {
405 OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
406 if (mbctype == ONIG_ENCODING_UNDEF) {
407 return FAILURE;
408 }
409 MBREX(current_mbctype) = mbctype;
410 return SUCCESS;
411 }
412 /* }}} */
413
414 /* {{{ php_mb_regex_set_default_mbctype */
php_mb_regex_set_default_mbctype(const char * encname)415 int php_mb_regex_set_default_mbctype(const char *encname)
416 {
417 OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
418 if (mbctype == ONIG_ENCODING_UNDEF) {
419 return FAILURE;
420 }
421 MBREX(default_mbctype) = mbctype;
422 return SUCCESS;
423 }
424 /* }}} */
425
426 /* {{{ php_mb_regex_get_mbctype */
php_mb_regex_get_mbctype(void)427 const char *php_mb_regex_get_mbctype(void)
428 {
429 return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
430 }
431 /* }}} */
432
433 /* {{{ php_mb_regex_get_default_mbctype */
php_mb_regex_get_default_mbctype(void)434 const char *php_mb_regex_get_default_mbctype(void)
435 {
436 return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
437 }
438 /* }}} */
439
440 /*
441 * regex cache
442 */
443 /* {{{ php_mbregex_compile_pattern */
php_mbregex_compile_pattern(const char * pattern,int patlen,OnigOptionType options,OnigEncoding enc,OnigSyntaxType * syntax)444 static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax)
445 {
446 int err_code = 0;
447 php_mb_regex_t *retval = NULL, *rc = NULL;
448 OnigErrorInfo err_info;
449 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
450
451 if (!php_mb_check_encoding(pattern, patlen, _php_mb_regex_mbctype2name(enc))) {
452 php_error_docref(NULL, E_WARNING,
453 "Pattern is not valid under %s encoding", _php_mb_regex_mbctype2name(enc));
454 return NULL;
455 }
456
457 rc = zend_hash_str_find_ptr(&MBREX(ht_rc), (char *)pattern, patlen);
458 if (!rc || onig_get_options(rc) != options || onig_get_encoding(rc) != enc || onig_get_syntax(rc) != syntax) {
459 if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
460 onig_error_code_to_str(err_str, err_code, &err_info);
461 php_error_docref(NULL, E_WARNING, "mbregex compile err: %s", err_str);
462 return NULL;
463 }
464 if (rc == MBREX(search_re)) {
465 /* reuse the new rc? see bug #72399 */
466 MBREX(search_re) = NULL;
467 }
468 zend_hash_str_update_ptr(&MBREX(ht_rc), (char *)pattern, patlen, retval);
469 } else {
470 retval = rc;
471 }
472 return retval;
473 }
474 /* }}} */
475
476 /* {{{ _php_mb_regex_get_option_string */
_php_mb_regex_get_option_string(char * str,size_t len,OnigOptionType option,OnigSyntaxType * syntax)477 static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
478 {
479 size_t len_left = len;
480 size_t len_req = 0;
481 char *p = str;
482 char c;
483
484 if ((option & ONIG_OPTION_IGNORECASE) != 0) {
485 if (len_left > 0) {
486 --len_left;
487 *(p++) = 'i';
488 }
489 ++len_req;
490 }
491
492 if ((option & ONIG_OPTION_EXTEND) != 0) {
493 if (len_left > 0) {
494 --len_left;
495 *(p++) = 'x';
496 }
497 ++len_req;
498 }
499
500 if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
501 (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
502 if (len_left > 0) {
503 --len_left;
504 *(p++) = 'p';
505 }
506 ++len_req;
507 } else {
508 if ((option & ONIG_OPTION_MULTILINE) != 0) {
509 if (len_left > 0) {
510 --len_left;
511 *(p++) = 'm';
512 }
513 ++len_req;
514 }
515
516 if ((option & ONIG_OPTION_SINGLELINE) != 0) {
517 if (len_left > 0) {
518 --len_left;
519 *(p++) = 's';
520 }
521 ++len_req;
522 }
523 }
524 if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
525 if (len_left > 0) {
526 --len_left;
527 *(p++) = 'l';
528 }
529 ++len_req;
530 }
531 if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
532 if (len_left > 0) {
533 --len_left;
534 *(p++) = 'n';
535 }
536 ++len_req;
537 }
538
539 c = 0;
540
541 if (syntax == ONIG_SYNTAX_JAVA) {
542 c = 'j';
543 } else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
544 c = 'u';
545 } else if (syntax == ONIG_SYNTAX_GREP) {
546 c = 'g';
547 } else if (syntax == ONIG_SYNTAX_EMACS) {
548 c = 'c';
549 } else if (syntax == ONIG_SYNTAX_RUBY) {
550 c = 'r';
551 } else if (syntax == ONIG_SYNTAX_PERL) {
552 c = 'z';
553 } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
554 c = 'b';
555 } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
556 c = 'd';
557 }
558
559 if (c != 0) {
560 if (len_left > 0) {
561 --len_left;
562 *(p++) = c;
563 }
564 ++len_req;
565 }
566
567
568 if (len_left > 0) {
569 --len_left;
570 *(p++) = '\0';
571 }
572 ++len_req;
573 if (len < len_req) {
574 return len_req;
575 }
576
577 return 0;
578 }
579 /* }}} */
580
581 /* {{{ _php_mb_regex_init_options */
582 static void
_php_mb_regex_init_options(const char * parg,int narg,OnigOptionType * option,OnigSyntaxType ** syntax,int * eval)583 _php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
584 {
585 int n;
586 char c;
587 int optm = 0;
588
589 *syntax = ONIG_SYNTAX_RUBY;
590
591 if (parg != NULL) {
592 n = 0;
593 while(n < narg) {
594 c = parg[n++];
595 switch (c) {
596 case 'i':
597 optm |= ONIG_OPTION_IGNORECASE;
598 break;
599 case 'x':
600 optm |= ONIG_OPTION_EXTEND;
601 break;
602 case 'm':
603 optm |= ONIG_OPTION_MULTILINE;
604 break;
605 case 's':
606 optm |= ONIG_OPTION_SINGLELINE;
607 break;
608 case 'p':
609 optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
610 break;
611 case 'l':
612 optm |= ONIG_OPTION_FIND_LONGEST;
613 break;
614 case 'n':
615 optm |= ONIG_OPTION_FIND_NOT_EMPTY;
616 break;
617 case 'j':
618 *syntax = ONIG_SYNTAX_JAVA;
619 break;
620 case 'u':
621 *syntax = ONIG_SYNTAX_GNU_REGEX;
622 break;
623 case 'g':
624 *syntax = ONIG_SYNTAX_GREP;
625 break;
626 case 'c':
627 *syntax = ONIG_SYNTAX_EMACS;
628 break;
629 case 'r':
630 *syntax = ONIG_SYNTAX_RUBY;
631 break;
632 case 'z':
633 *syntax = ONIG_SYNTAX_PERL;
634 break;
635 case 'b':
636 *syntax = ONIG_SYNTAX_POSIX_BASIC;
637 break;
638 case 'd':
639 *syntax = ONIG_SYNTAX_POSIX_EXTENDED;
640 break;
641 case 'e':
642 if (eval != NULL) *eval = 1;
643 break;
644 default:
645 break;
646 }
647 }
648 if (option != NULL) *option|=optm;
649 }
650 }
651 /* }}} */
652
653 /*
654 * php functions
655 */
656
657 /* {{{ proto string mb_regex_encoding([string encoding])
658 Returns the current encoding for regex as a string. */
PHP_FUNCTION(mb_regex_encoding)659 PHP_FUNCTION(mb_regex_encoding)
660 {
661 char *encoding = NULL;
662 size_t encoding_len;
663 OnigEncoding mbctype;
664
665 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &encoding, &encoding_len) == FAILURE) {
666 return;
667 }
668
669 if (!encoding) {
670 const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
671
672 if (retval == NULL) {
673 RETURN_FALSE;
674 }
675
676 RETURN_STRING((char *)retval);
677 } else {
678 mbctype = _php_mb_regex_name2mbctype(encoding);
679
680 if (mbctype == ONIG_ENCODING_UNDEF) {
681 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
682 RETURN_FALSE;
683 }
684
685 MBREX(current_mbctype) = mbctype;
686 RETURN_TRUE;
687 }
688 }
689 /* }}} */
690
691 /* {{{ _php_mb_regex_ereg_exec */
_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS,int icase)692 static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
693 {
694 zval *arg_pattern, *array = NULL;
695 char *string;
696 size_t string_len;
697 php_mb_regex_t *re;
698 OnigRegion *regs = NULL;
699 int i, match_len, beg, end;
700 OnigOptionType options;
701 char *str;
702
703 if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z/", &arg_pattern, &string, &string_len, &array) == FAILURE) {
704 RETURN_FALSE;
705 }
706
707 if (!php_mb_check_encoding(
708 string,
709 string_len,
710 _php_mb_regex_mbctype2name(MBREX(current_mbctype))
711 )) {
712 if (array != NULL) {
713 zval_dtor(array);
714 array_init(array);
715 }
716 RETURN_FALSE;
717 }
718
719 if (array != NULL) {
720 zval_dtor(array);
721 array_init(array);
722 }
723
724 options = MBREX(regex_default_options);
725 if (icase) {
726 options |= ONIG_OPTION_IGNORECASE;
727 }
728
729 /* compile the regular expression from the supplied regex */
730 if (Z_TYPE_P(arg_pattern) != IS_STRING) {
731 /* we convert numbers to integers and treat them as a string */
732 if (Z_TYPE_P(arg_pattern) == IS_DOUBLE) {
733 convert_to_long_ex(arg_pattern); /* get rid of decimal places */
734 }
735 convert_to_string_ex(arg_pattern);
736 /* don't bother doing an extended regex with just a number */
737 }
738
739 if (Z_STRLEN_P(arg_pattern) == 0) {
740 php_error_docref(NULL, E_WARNING, "empty pattern");
741 RETVAL_FALSE;
742 goto out;
743 }
744
745 re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax));
746 if (re == NULL) {
747 RETVAL_FALSE;
748 goto out;
749 }
750
751 regs = onig_region_new();
752
753 /* actually execute the regular expression */
754 if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
755 RETVAL_FALSE;
756 goto out;
757 }
758
759 match_len = 1;
760 str = string;
761 if (array != NULL) {
762
763 match_len = regs->end[0] - regs->beg[0];
764 for (i = 0; i < regs->num_regs; i++) {
765 beg = regs->beg[i];
766 end = regs->end[i];
767 if (beg >= 0 && beg < end && (size_t)end <= string_len) {
768 add_index_stringl(array, i, (char *)&str[beg], end - beg);
769 } else {
770 add_index_bool(array, i, 0);
771 }
772 }
773 }
774
775 if (match_len == 0) {
776 match_len = 1;
777 }
778 RETVAL_LONG(match_len);
779 out:
780 if (regs != NULL) {
781 onig_region_free(regs, 1);
782 }
783 }
784 /* }}} */
785
786 /* {{{ proto int mb_ereg(string pattern, string string [, array registers])
787 Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg)788 PHP_FUNCTION(mb_ereg)
789 {
790 _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
791 }
792 /* }}} */
793
794 /* {{{ proto int mb_eregi(string pattern, string string [, array registers])
795 Case-insensitive regular expression match for multibyte string */
PHP_FUNCTION(mb_eregi)796 PHP_FUNCTION(mb_eregi)
797 {
798 _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
799 }
800 /* }}} */
801
802 /* {{{ _php_mb_regex_ereg_replace_exec */
_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS,OnigOptionType options,int is_callable)803 static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable)
804 {
805 zval *arg_pattern_zval;
806
807 char *arg_pattern;
808 size_t arg_pattern_len;
809
810 char *replace;
811 size_t replace_len;
812
813 zend_fcall_info arg_replace_fci;
814 zend_fcall_info_cache arg_replace_fci_cache;
815
816 char *string;
817 size_t string_len;
818
819 char *p;
820 php_mb_regex_t *re;
821 OnigSyntaxType *syntax;
822 OnigRegion *regs = NULL;
823 smart_str out_buf = {0};
824 smart_str eval_buf = {0};
825 smart_str *pbuf;
826 size_t i;
827 int err, eval, n;
828 OnigUChar *pos;
829 OnigUChar *string_lim;
830 char *description = NULL;
831 char pat_buf[6];
832
833 const mbfl_encoding *enc;
834
835 {
836 const char *current_enc_name;
837 current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
838 if (current_enc_name == NULL ||
839 (enc = mbfl_name2encoding(current_enc_name)) == NULL) {
840 php_error_docref(NULL, E_WARNING, "Unknown error");
841 RETURN_FALSE;
842 }
843 }
844 eval = 0;
845 {
846 char *option_str = NULL;
847 size_t option_str_len = 0;
848
849 if (!is_callable) {
850 if (zend_parse_parameters(ZEND_NUM_ARGS(), "zss|s",
851 &arg_pattern_zval,
852 &replace, &replace_len,
853 &string, &string_len,
854 &option_str, &option_str_len) == FAILURE) {
855 RETURN_FALSE;
856 }
857 } else {
858 if (zend_parse_parameters(ZEND_NUM_ARGS(), "zfs|s",
859 &arg_pattern_zval,
860 &arg_replace_fci, &arg_replace_fci_cache,
861 &string, &string_len,
862 &option_str, &option_str_len) == FAILURE) {
863 RETURN_FALSE;
864 }
865 }
866
867 if (!php_mb_check_encoding(
868 string,
869 string_len,
870 _php_mb_regex_mbctype2name(MBREX(current_mbctype))
871 )) {
872 RETURN_NULL();
873 }
874
875 if (option_str != NULL) {
876 _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
877 } else {
878 options |= MBREX(regex_default_options);
879 syntax = MBREX(regex_default_syntax);
880 }
881 }
882 if (eval && !is_callable) {
883 php_error_docref(NULL, E_DEPRECATED, "The 'e' option is deprecated, use mb_ereg_replace_callback instead");
884 }
885 if (Z_TYPE_P(arg_pattern_zval) == IS_STRING) {
886 arg_pattern = Z_STRVAL_P(arg_pattern_zval);
887 arg_pattern_len = Z_STRLEN_P(arg_pattern_zval);
888 } else {
889 /* FIXME: this code is not multibyte aware! */
890 convert_to_long_ex(arg_pattern_zval);
891 pat_buf[0] = (char)Z_LVAL_P(arg_pattern_zval);
892 pat_buf[1] = '\0';
893 pat_buf[2] = '\0';
894 pat_buf[3] = '\0';
895 pat_buf[4] = '\0';
896 pat_buf[5] = '\0';
897
898 arg_pattern = pat_buf;
899 arg_pattern_len = 1;
900 }
901 /* create regex pattern buffer */
902 re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax);
903 if (re == NULL) {
904 RETURN_FALSE;
905 }
906
907 if (eval || is_callable) {
908 pbuf = &eval_buf;
909 description = zend_make_compiled_string_description("mbregex replace");
910 } else {
911 pbuf = &out_buf;
912 description = NULL;
913 }
914
915 if (is_callable) {
916 if (eval) {
917 php_error_docref(NULL, E_WARNING, "Option 'e' cannot be used with replacement callback");
918 RETURN_FALSE;
919 }
920 }
921
922 /* do the actual work */
923 err = 0;
924 pos = (OnigUChar *)string;
925 string_lim = (OnigUChar*)(string + string_len);
926 regs = onig_region_new();
927 while (err >= 0) {
928 err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
929 if (err <= -2) {
930 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
931 onig_error_code_to_str(err_str, err);
932 php_error_docref(NULL, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
933 break;
934 }
935 if (err >= 0) {
936 #if moriyoshi_0
937 if (regs->beg[0] == regs->end[0]) {
938 php_error_docref(NULL, E_WARNING, "Empty regular expression");
939 break;
940 }
941 #endif
942 /* copy the part of the string before the match */
943 smart_str_appendl(&out_buf, (char *)pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
944
945 if (!is_callable) {
946 /* copy replacement and backrefs */
947 i = 0;
948 p = replace;
949 while (i < replace_len) {
950 int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
951 n = -1;
952 if ((replace_len - i) >= 2 && fwd == 1 &&
953 p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
954 n = p[1] - '0';
955 }
956 if (n >= 0 && n < regs->num_regs) {
957 if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && (size_t)regs->end[n] <= string_len) {
958 smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
959 }
960 p += 2;
961 i += 2;
962 } else {
963 smart_str_appendl(pbuf, p, fwd);
964 p += fwd;
965 i += fwd;
966 }
967 }
968 }
969
970 if (eval) {
971 zval v;
972 zend_string *eval_str;
973 /* null terminate buffer */
974 smart_str_0(&eval_buf);
975
976 if (eval_buf.s) {
977 eval_str = eval_buf.s;
978 } else {
979 eval_str = ZSTR_EMPTY_ALLOC();
980 }
981
982 /* do eval */
983 if (zend_eval_stringl(ZSTR_VAL(eval_str), ZSTR_LEN(eval_str), &v, description) == FAILURE) {
984 efree(description);
985 zend_throw_error(NULL, "Failed evaluating code: %s%s", PHP_EOL, ZSTR_VAL(eval_str));
986 onig_region_free(regs, 0);
987 smart_str_free(&out_buf);
988 smart_str_free(&eval_buf);
989 RETURN_FALSE;
990 }
991
992 /* result of eval */
993 convert_to_string(&v);
994 smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
995 /* Clean up */
996 smart_str_free(&eval_buf);
997 zval_dtor(&v);
998 } else if (is_callable) {
999 zval args[1];
1000 zval subpats, retval;
1001 int i;
1002
1003 array_init(&subpats);
1004 for (i = 0; i < regs->num_regs; i++) {
1005 add_next_index_stringl(&subpats, string + regs->beg[i], regs->end[i] - regs->beg[i]);
1006 }
1007
1008 ZVAL_COPY_VALUE(&args[0], &subpats);
1009 /* null terminate buffer */
1010 smart_str_0(&eval_buf);
1011
1012 arg_replace_fci.param_count = 1;
1013 arg_replace_fci.params = args;
1014 arg_replace_fci.retval = &retval;
1015 if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache) == SUCCESS &&
1016 !Z_ISUNDEF(retval)) {
1017 convert_to_string_ex(&retval);
1018 smart_str_appendl(&out_buf, Z_STRVAL(retval), Z_STRLEN(retval));
1019 smart_str_free(&eval_buf);
1020 zval_ptr_dtor(&retval);
1021 } else {
1022 if (!EG(exception)) {
1023 php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1024 }
1025 }
1026 zval_ptr_dtor(&subpats);
1027 }
1028
1029 n = regs->end[0];
1030 if ((pos - (OnigUChar *)string) < n) {
1031 pos = (OnigUChar *)string + n;
1032 } else {
1033 if (pos < string_lim) {
1034 smart_str_appendl(&out_buf, (char *)pos, 1);
1035 }
1036 pos++;
1037 }
1038 } else { /* nomatch */
1039 /* stick that last bit of string on our output */
1040 if (string_lim - pos > 0) {
1041 smart_str_appendl(&out_buf, (char *)pos, string_lim - pos);
1042 }
1043 }
1044 onig_region_free(regs, 0);
1045 }
1046
1047 if (description) {
1048 efree(description);
1049 }
1050 if (regs != NULL) {
1051 onig_region_free(regs, 1);
1052 }
1053 smart_str_free(&eval_buf);
1054
1055 if (err <= -2) {
1056 smart_str_free(&out_buf);
1057 RETVAL_FALSE;
1058 } else if (out_buf.s) {
1059 smart_str_0(&out_buf);
1060 RETVAL_STR(out_buf.s);
1061 } else {
1062 RETVAL_EMPTY_STRING();
1063 }
1064 }
1065 /* }}} */
1066
1067 /* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option])
1068 Replace regular expression for multibyte string */
PHP_FUNCTION(mb_ereg_replace)1069 PHP_FUNCTION(mb_ereg_replace)
1070 {
1071 _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1072 }
1073 /* }}} */
1074
1075 /* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string)
1076 Case insensitive replace regular expression for multibyte string */
PHP_FUNCTION(mb_eregi_replace)1077 PHP_FUNCTION(mb_eregi_replace)
1078 {
1079 _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0);
1080 }
1081 /* }}} */
1082
1083 /* {{{ proto string mb_ereg_replace_callback(string pattern, string callback, string string [, string option])
1084 regular expression for multibyte string using replacement callback */
PHP_FUNCTION(mb_ereg_replace_callback)1085 PHP_FUNCTION(mb_ereg_replace_callback)
1086 {
1087 _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1088 }
1089 /* }}} */
1090
1091 /* {{{ proto array mb_split(string pattern, string string [, int limit])
1092 split multibyte string into array by regular expression */
PHP_FUNCTION(mb_split)1093 PHP_FUNCTION(mb_split)
1094 {
1095 char *arg_pattern;
1096 size_t arg_pattern_len;
1097 php_mb_regex_t *re;
1098 OnigRegion *regs = NULL;
1099 char *string;
1100 OnigUChar *pos, *chunk_pos;
1101 size_t string_len;
1102
1103 int n, err;
1104 zend_long count = -1;
1105
1106 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
1107 RETURN_FALSE;
1108 }
1109
1110 if (count > 0) {
1111 count--;
1112 }
1113
1114 if (!php_mb_check_encoding(string, string_len,
1115 _php_mb_regex_mbctype2name(MBREX(current_mbctype)))) {
1116 RETURN_FALSE;
1117 }
1118
1119 /* create regex pattern buffer */
1120 if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax))) == NULL) {
1121 RETURN_FALSE;
1122 }
1123
1124 array_init(return_value);
1125
1126 chunk_pos = pos = (OnigUChar *)string;
1127 err = 0;
1128 regs = onig_region_new();
1129 /* churn through str, generating array entries as we go */
1130 while (count != 0 && (pos - (OnigUChar *)string) < (ptrdiff_t)string_len) {
1131 int beg, end;
1132 err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0);
1133 if (err < 0) {
1134 break;
1135 }
1136 beg = regs->beg[0], end = regs->end[0];
1137 /* add it to the array */
1138 if ((pos - (OnigUChar *)string) < end) {
1139 if ((size_t)beg < string_len && beg >= (chunk_pos - (OnigUChar *)string)) {
1140 add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos));
1141 --count;
1142 } else {
1143 err = -2;
1144 break;
1145 }
1146 /* point at our new starting point */
1147 chunk_pos = pos = (OnigUChar *)string + end;
1148 } else {
1149 pos++;
1150 }
1151 onig_region_free(regs, 0);
1152 }
1153
1154 onig_region_free(regs, 1);
1155
1156 /* see if we encountered an error */
1157 if (err <= -2) {
1158 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1159 onig_error_code_to_str(err_str, err);
1160 php_error_docref(NULL, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
1161 zval_dtor(return_value);
1162 RETURN_FALSE;
1163 }
1164
1165 /* otherwise we just have one last element to add to the array */
1166 n = ((OnigUChar *)(string + string_len) - chunk_pos);
1167 if (n > 0) {
1168 add_next_index_stringl(return_value, (char *)chunk_pos, n);
1169 } else {
1170 add_next_index_stringl(return_value, "", 0);
1171 }
1172 }
1173 /* }}} */
1174
1175 /* {{{ proto bool mb_ereg_match(string pattern, string string [,string option])
1176 Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg_match)1177 PHP_FUNCTION(mb_ereg_match)
1178 {
1179 char *arg_pattern;
1180 size_t arg_pattern_len;
1181
1182 char *string;
1183 size_t string_len;
1184
1185 php_mb_regex_t *re;
1186 OnigSyntaxType *syntax;
1187 OnigOptionType option = 0;
1188 int err;
1189
1190 {
1191 char *option_str = NULL;
1192 size_t option_str_len = 0;
1193
1194 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s",
1195 &arg_pattern, &arg_pattern_len, &string, &string_len,
1196 &option_str, &option_str_len)==FAILURE) {
1197 RETURN_FALSE;
1198 }
1199
1200 if (option_str != NULL) {
1201 _php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
1202 } else {
1203 option |= MBREX(regex_default_options);
1204 syntax = MBREX(regex_default_syntax);
1205 }
1206 }
1207
1208 if (!php_mb_check_encoding(string, string_len,
1209 _php_mb_regex_mbctype2name(MBREX(current_mbctype)))) {
1210 RETURN_FALSE;
1211 }
1212
1213 if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax)) == NULL) {
1214 RETURN_FALSE;
1215 }
1216
1217 /* match */
1218 err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0);
1219 if (err >= 0) {
1220 RETVAL_TRUE;
1221 } else {
1222 RETVAL_FALSE;
1223 }
1224 }
1225 /* }}} */
1226
1227 /* regex search */
1228 /* {{{ _php_mb_regex_ereg_search_exec */
1229 static void
_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS,int mode)1230 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
1231 {
1232 char *arg_pattern = NULL, *arg_options = NULL;
1233 size_t arg_pattern_len, arg_options_len;
1234 int n, i, err, pos, len, beg, end;
1235 OnigOptionType option;
1236 OnigUChar *str;
1237 OnigSyntaxType *syntax;
1238
1239 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1240 return;
1241 }
1242
1243 option = MBREX(regex_default_options);
1244
1245 if (arg_options) {
1246 option = 0;
1247 _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1248 }
1249
1250 if (arg_pattern) {
1251 /* create regex pattern buffer */
1252 if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax))) == NULL) {
1253 RETURN_FALSE;
1254 }
1255 }
1256
1257 pos = MBREX(search_pos);
1258 str = NULL;
1259 len = 0;
1260 if (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING){
1261 str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
1262 len = Z_STRLEN(MBREX(search_str));
1263 }
1264
1265 if (MBREX(search_re) == NULL) {
1266 php_error_docref(NULL, E_WARNING, "No regex given");
1267 RETURN_FALSE;
1268 }
1269
1270 if (str == NULL) {
1271 php_error_docref(NULL, E_WARNING, "No string given");
1272 RETURN_FALSE;
1273 }
1274
1275 if (MBREX(search_regs)) {
1276 onig_region_free(MBREX(search_regs), 1);
1277 }
1278 MBREX(search_regs) = onig_region_new();
1279
1280 err = onig_search(MBREX(search_re), str, str + len, str + pos, str + len, MBREX(search_regs), 0);
1281 if (err == ONIG_MISMATCH) {
1282 MBREX(search_pos) = len;
1283 RETVAL_FALSE;
1284 } else if (err <= -2) {
1285 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1286 onig_error_code_to_str(err_str, err);
1287 php_error_docref(NULL, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
1288 RETVAL_FALSE;
1289 } else {
1290 switch (mode) {
1291 case 1:
1292 array_init(return_value);
1293 beg = MBREX(search_regs)->beg[0];
1294 end = MBREX(search_regs)->end[0];
1295 add_next_index_long(return_value, beg);
1296 add_next_index_long(return_value, end - beg);
1297 break;
1298 case 2:
1299 array_init(return_value);
1300 n = MBREX(search_regs)->num_regs;
1301 for (i = 0; i < n; i++) {
1302 beg = MBREX(search_regs)->beg[i];
1303 end = MBREX(search_regs)->end[i];
1304 if (beg >= 0 && beg <= end && end <= len) {
1305 add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
1306 } else {
1307 add_index_bool(return_value, i, 0);
1308 }
1309 }
1310 break;
1311 default:
1312 RETVAL_TRUE;
1313 break;
1314 }
1315 end = MBREX(search_regs)->end[0];
1316 if (pos <= end) {
1317 MBREX(search_pos) = end;
1318 } else {
1319 MBREX(search_pos) = pos + 1;
1320 }
1321 }
1322
1323 if (err < 0) {
1324 onig_region_free(MBREX(search_regs), 1);
1325 MBREX(search_regs) = (OnigRegion *)NULL;
1326 }
1327 }
1328 /* }}} */
1329
1330 /* {{{ proto bool mb_ereg_search([string pattern[, string option]])
1331 Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search)1332 PHP_FUNCTION(mb_ereg_search)
1333 {
1334 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1335 }
1336 /* }}} */
1337
1338 /* {{{ proto array mb_ereg_search_pos([string pattern[, string option]])
1339 Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_pos)1340 PHP_FUNCTION(mb_ereg_search_pos)
1341 {
1342 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1343 }
1344 /* }}} */
1345
1346 /* {{{ proto array mb_ereg_search_regs([string pattern[, string option]])
1347 Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_regs)1348 PHP_FUNCTION(mb_ereg_search_regs)
1349 {
1350 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
1351 }
1352 /* }}} */
1353
1354 /* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]])
1355 Initialize string and regular expression for search. */
PHP_FUNCTION(mb_ereg_search_init)1356 PHP_FUNCTION(mb_ereg_search_init)
1357 {
1358 size_t argc = ZEND_NUM_ARGS();
1359 zend_string *arg_str;
1360 char *arg_pattern = NULL, *arg_options = NULL;
1361 size_t arg_pattern_len = 0, arg_options_len = 0;
1362 OnigSyntaxType *syntax = NULL;
1363 OnigOptionType option;
1364
1365 if (zend_parse_parameters(argc, "S|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1366 return;
1367 }
1368
1369 if (argc > 1 && arg_pattern_len == 0) {
1370 php_error_docref(NULL, E_WARNING, "Empty pattern");
1371 RETURN_FALSE;
1372 }
1373
1374 option = MBREX(regex_default_options);
1375 syntax = MBREX(regex_default_syntax);
1376
1377 if (argc == 3) {
1378 option = 0;
1379 _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1380 }
1381
1382 if (argc > 1) {
1383 /* create regex pattern buffer */
1384 if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax)) == NULL) {
1385 RETURN_FALSE;
1386 }
1387 }
1388
1389 if (!Z_ISNULL(MBREX(search_str))) {
1390 zval_ptr_dtor(&MBREX(search_str));
1391 }
1392
1393 ZVAL_STR_COPY(&MBREX(search_str), arg_str);
1394
1395 if (php_mb_check_encoding(
1396 ZSTR_VAL(arg_str),
1397 ZSTR_LEN(arg_str),
1398 _php_mb_regex_mbctype2name(MBREX(current_mbctype))
1399 )) {
1400 MBREX(search_pos) = 0;
1401 RETVAL_TRUE;
1402 } else {
1403 MBREX(search_pos) = ZSTR_LEN(arg_str);
1404 RETVAL_FALSE;
1405 }
1406
1407 if (MBREX(search_regs) != NULL) {
1408 onig_region_free(MBREX(search_regs), 1);
1409 MBREX(search_regs) = NULL;
1410 }
1411 }
1412 /* }}} */
1413
1414 /* {{{ proto array mb_ereg_search_getregs(void)
1415 Get matched substring of the last time */
PHP_FUNCTION(mb_ereg_search_getregs)1416 PHP_FUNCTION(mb_ereg_search_getregs)
1417 {
1418 int n, i, len, beg, end;
1419 OnigUChar *str;
1420
1421 if (MBREX(search_regs) != NULL && Z_TYPE(MBREX(search_str)) == IS_STRING) {
1422 array_init(return_value);
1423
1424 str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
1425 len = Z_STRLEN(MBREX(search_str));
1426 n = MBREX(search_regs)->num_regs;
1427 for (i = 0; i < n; i++) {
1428 beg = MBREX(search_regs)->beg[i];
1429 end = MBREX(search_regs)->end[i];
1430 if (beg >= 0 && beg <= end && end <= len) {
1431 add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
1432 } else {
1433 add_index_bool(return_value, i, 0);
1434 }
1435 }
1436 } else {
1437 RETVAL_FALSE;
1438 }
1439 }
1440 /* }}} */
1441
1442 /* {{{ proto int mb_ereg_search_getpos(void)
1443 Get search start position */
PHP_FUNCTION(mb_ereg_search_getpos)1444 PHP_FUNCTION(mb_ereg_search_getpos)
1445 {
1446 RETVAL_LONG(MBREX(search_pos));
1447 }
1448 /* }}} */
1449
1450 /* {{{ proto bool mb_ereg_search_setpos(int position)
1451 Set search start position */
PHP_FUNCTION(mb_ereg_search_setpos)1452 PHP_FUNCTION(mb_ereg_search_setpos)
1453 {
1454 zend_long position;
1455
1456 if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &position) == FAILURE) {
1457 return;
1458 }
1459
1460 /* Accept negative position if length of search string can be determined */
1461 if ((position < 0) && (!Z_ISUNDEF(MBREX(search_str))) && (Z_TYPE(MBREX(search_str)) == IS_STRING)) {
1462 position += Z_STRLEN(MBREX(search_str));
1463 }
1464
1465 if (position < 0 || (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING && (size_t)position > Z_STRLEN(MBREX(search_str)))) {
1466 php_error_docref(NULL, E_WARNING, "Position is out of range");
1467 MBREX(search_pos) = 0;
1468 RETURN_FALSE;
1469 }
1470
1471 MBREX(search_pos) = position;
1472 RETURN_TRUE;
1473 }
1474 /* }}} */
1475
1476 /* {{{ php_mb_regex_set_options */
_php_mb_regex_set_options(OnigOptionType options,OnigSyntaxType * syntax,OnigOptionType * prev_options,OnigSyntaxType ** prev_syntax)1477 static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax)
1478 {
1479 if (prev_options != NULL) {
1480 *prev_options = MBREX(regex_default_options);
1481 }
1482 if (prev_syntax != NULL) {
1483 *prev_syntax = MBREX(regex_default_syntax);
1484 }
1485 MBREX(regex_default_options) = options;
1486 MBREX(regex_default_syntax) = syntax;
1487 }
1488 /* }}} */
1489
1490 /* {{{ proto string mb_regex_set_options([string options])
1491 Set or get the default options for mbregex functions */
PHP_FUNCTION(mb_regex_set_options)1492 PHP_FUNCTION(mb_regex_set_options)
1493 {
1494 OnigOptionType opt;
1495 OnigSyntaxType *syntax;
1496 char *string = NULL;
1497 size_t string_len;
1498 char buf[16];
1499
1500 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s",
1501 &string, &string_len) == FAILURE) {
1502 RETURN_FALSE;
1503 }
1504 if (string != NULL) {
1505 opt = 0;
1506 syntax = NULL;
1507 _php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
1508 _php_mb_regex_set_options(opt, syntax, NULL, NULL);
1509 } else {
1510 opt = MBREX(regex_default_options);
1511 syntax = MBREX(regex_default_syntax);
1512 }
1513 _php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
1514
1515 RETVAL_STRING(buf);
1516 }
1517 /* }}} */
1518
1519 #endif /* HAVE_MBREGEX */
1520
1521 /*
1522 * Local variables:
1523 * tab-width: 4
1524 * c-basic-offset: 4
1525 * End:
1526 * vim600: fdm=marker
1527 * vim: noet sw=4 ts=4
1528 */
1529