1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 5 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2014 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
16 +----------------------------------------------------------------------+
17 */
18
19 /* $Id$ */
20
21
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include "php.h"
27 #include "php_ini.h"
28
29 #if HAVE_MBREGEX
30
31 #include "ext/standard/php_smart_str.h"
32 #include "ext/standard/info.h"
33 #include "php_mbregex.h"
34 #include "mbstring.h"
35
36 #include "php_onig_compat.h" /* must come prior to the oniguruma header */
37 #include <oniguruma.h>
38 #undef UChar
39
40 ZEND_EXTERN_MODULE_GLOBALS(mbstring)
41
42 struct _zend_mb_regex_globals {
43 OnigEncoding default_mbctype;
44 OnigEncoding current_mbctype;
45 HashTable ht_rc;
46 zval *search_str;
47 zval *search_str_val;
48 unsigned int search_pos;
49 php_mb_regex_t *search_re;
50 OnigRegion *search_regs;
51 OnigOptionType regex_default_options;
52 OnigSyntaxType *regex_default_syntax;
53 };
54
55 #define MBREX(g) (MBSTRG(mb_regex_globals)->g)
56
57 /* {{{ static void php_mb_regex_free_cache() */
php_mb_regex_free_cache(php_mb_regex_t ** pre)58 static void php_mb_regex_free_cache(php_mb_regex_t **pre)
59 {
60 onig_free(*pre);
61 }
62 /* }}} */
63
64 /* {{{ _php_mb_regex_globals_ctor */
_php_mb_regex_globals_ctor(zend_mb_regex_globals * pglobals TSRMLS_DC)65 static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals TSRMLS_DC)
66 {
67 pglobals->default_mbctype = ONIG_ENCODING_EUC_JP;
68 pglobals->current_mbctype = ONIG_ENCODING_EUC_JP;
69 zend_hash_init(&(pglobals->ht_rc), 0, NULL, (void (*)(void *)) php_mb_regex_free_cache, 1);
70 pglobals->search_str = (zval*) NULL;
71 pglobals->search_re = (php_mb_regex_t*)NULL;
72 pglobals->search_pos = 0;
73 pglobals->search_regs = (OnigRegion*)NULL;
74 pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
75 pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
76 return SUCCESS;
77 }
78 /* }}} */
79
80 /* {{{ _php_mb_regex_globals_dtor */
_php_mb_regex_globals_dtor(zend_mb_regex_globals * pglobals TSRMLS_DC)81 static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals TSRMLS_DC)
82 {
83 zend_hash_destroy(&pglobals->ht_rc);
84 }
85 /* }}} */
86
87 /* {{{ php_mb_regex_globals_alloc */
php_mb_regex_globals_alloc(TSRMLS_D)88 zend_mb_regex_globals *php_mb_regex_globals_alloc(TSRMLS_D)
89 {
90 zend_mb_regex_globals *pglobals = pemalloc(
91 sizeof(zend_mb_regex_globals), 1);
92 if (!pglobals) {
93 return NULL;
94 }
95 if (SUCCESS != _php_mb_regex_globals_ctor(pglobals TSRMLS_CC)) {
96 pefree(pglobals, 1);
97 return NULL;
98 }
99 return pglobals;
100 }
101 /* }}} */
102
103 /* {{{ php_mb_regex_globals_free */
php_mb_regex_globals_free(zend_mb_regex_globals * pglobals TSRMLS_DC)104 void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals TSRMLS_DC)
105 {
106 if (!pglobals) {
107 return;
108 }
109 _php_mb_regex_globals_dtor(pglobals TSRMLS_CC);
110 pefree(pglobals, 1);
111 }
112 /* }}} */
113
114 /* {{{ PHP_MINIT_FUNCTION(mb_regex) */
PHP_MINIT_FUNCTION(mb_regex)115 PHP_MINIT_FUNCTION(mb_regex)
116 {
117 onig_init();
118 return SUCCESS;
119 }
120 /* }}} */
121
122 /* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
PHP_MSHUTDOWN_FUNCTION(mb_regex)123 PHP_MSHUTDOWN_FUNCTION(mb_regex)
124 {
125 onig_end();
126 return SUCCESS;
127 }
128 /* }}} */
129
130 /* {{{ PHP_RINIT_FUNCTION(mb_regex) */
PHP_RINIT_FUNCTION(mb_regex)131 PHP_RINIT_FUNCTION(mb_regex)
132 {
133 return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
134 }
135 /* }}} */
136
137 /* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
PHP_RSHUTDOWN_FUNCTION(mb_regex)138 PHP_RSHUTDOWN_FUNCTION(mb_regex)
139 {
140 MBREX(current_mbctype) = MBREX(default_mbctype);
141
142 if (MBREX(search_str) != NULL) {
143 zval_ptr_dtor(&MBREX(search_str));
144 MBREX(search_str) = (zval *)NULL;
145 }
146 MBREX(search_pos) = 0;
147
148 if (MBREX(search_regs) != NULL) {
149 onig_region_free(MBREX(search_regs), 1);
150 MBREX(search_regs) = (OnigRegion *)NULL;
151 }
152 zend_hash_clean(&MBREX(ht_rc));
153
154 return SUCCESS;
155 }
156 /* }}} */
157
158 /* {{{ PHP_MINFO_FUNCTION(mb_regex) */
PHP_MINFO_FUNCTION(mb_regex)159 PHP_MINFO_FUNCTION(mb_regex)
160 {
161 char buf[32];
162 php_info_print_table_start();
163 php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
164 snprintf(buf, sizeof(buf), "%d.%d.%d",
165 ONIGURUMA_VERSION_MAJOR,
166 ONIGURUMA_VERSION_MINOR,
167 ONIGURUMA_VERSION_TEENY);
168 #ifdef PHP_ONIG_BUNDLED
169 #ifdef USE_COMBINATION_EXPLOSION_CHECK
170 php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
171 #else /* USE_COMBINATION_EXPLOSION_CHECK */
172 php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
173 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
174 #endif /* PHP_BUNDLED_ONIG */
175 php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
176 php_info_print_table_end();
177 }
178 /* }}} */
179
180 /*
181 * encoding name resolver
182 */
183
184 /* {{{ encoding name map */
185 typedef struct _php_mb_regex_enc_name_map_t {
186 const char *names;
187 OnigEncoding code;
188 } php_mb_regex_enc_name_map_t;
189
190 php_mb_regex_enc_name_map_t enc_name_map[] = {
191 #ifdef ONIG_ENCODING_EUC_JP
192 {
193 "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
194 ONIG_ENCODING_EUC_JP
195 },
196 #endif
197 #ifdef ONIG_ENCODING_UTF8
198 {
199 "UTF-8\0UTF8\0",
200 ONIG_ENCODING_UTF8
201 },
202 #endif
203 #ifdef ONIG_ENCODING_UTF16_BE
204 {
205 "UTF-16\0UTF-16BE\0",
206 ONIG_ENCODING_UTF16_BE
207 },
208 #endif
209 #ifdef ONIG_ENCODING_UTF16_LE
210 {
211 "UTF-16LE\0",
212 ONIG_ENCODING_UTF16_LE
213 },
214 #endif
215 #ifdef ONIG_ENCODING_UTF32_BE
216 {
217 "UCS-4\0UTF-32\0UTF-32BE\0",
218 ONIG_ENCODING_UTF32_BE
219 },
220 #endif
221 #ifdef ONIG_ENCODING_UTF32_LE
222 {
223 "UCS-4LE\0UTF-32LE\0",
224 ONIG_ENCODING_UTF32_LE
225 },
226 #endif
227 #ifdef ONIG_ENCODING_SJIS
228 {
229 "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
230 ONIG_ENCODING_SJIS
231 },
232 #endif
233 #ifdef ONIG_ENCODING_BIG5
234 {
235 "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
236 ONIG_ENCODING_BIG5
237 },
238 #endif
239 #ifdef ONIG_ENCODING_EUC_CN
240 {
241 "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
242 ONIG_ENCODING_EUC_CN
243 },
244 #endif
245 #ifdef ONIG_ENCODING_EUC_TW
246 {
247 "EUC-TW\0EUCTW\0EUC_TW\0",
248 ONIG_ENCODING_EUC_TW
249 },
250 #endif
251 #ifdef ONIG_ENCODING_EUC_KR
252 {
253 "EUC-KR\0EUCKR\0EUC_KR\0",
254 ONIG_ENCODING_EUC_KR
255 },
256 #endif
257 #if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
258 {
259 "KOI8\0KOI-8\0",
260 ONIG_ENCODING_KOI8
261 },
262 #endif
263 #ifdef ONIG_ENCODING_KOI8_R
264 {
265 "KOI8R\0KOI8-R\0KOI-8R\0",
266 ONIG_ENCODING_KOI8_R
267 },
268 #endif
269 #ifdef ONIG_ENCODING_ISO_8859_1
270 {
271 "ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
272 ONIG_ENCODING_ISO_8859_1
273 },
274 #endif
275 #ifdef ONIG_ENCODING_ISO_8859_2
276 {
277 "ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
278 ONIG_ENCODING_ISO_8859_2
279 },
280 #endif
281 #ifdef ONIG_ENCODING_ISO_8859_3
282 {
283 "ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
284 ONIG_ENCODING_ISO_8859_3
285 },
286 #endif
287 #ifdef ONIG_ENCODING_ISO_8859_4
288 {
289 "ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
290 ONIG_ENCODING_ISO_8859_4
291 },
292 #endif
293 #ifdef ONIG_ENCODING_ISO_8859_5
294 {
295 "ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
296 ONIG_ENCODING_ISO_8859_5
297 },
298 #endif
299 #ifdef ONIG_ENCODING_ISO_8859_6
300 {
301 "ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
302 ONIG_ENCODING_ISO_8859_6
303 },
304 #endif
305 #ifdef ONIG_ENCODING_ISO_8859_7
306 {
307 "ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
308 ONIG_ENCODING_ISO_8859_7
309 },
310 #endif
311 #ifdef ONIG_ENCODING_ISO_8859_8
312 {
313 "ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
314 ONIG_ENCODING_ISO_8859_8
315 },
316 #endif
317 #ifdef ONIG_ENCODING_ISO_8859_9
318 {
319 "ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
320 ONIG_ENCODING_ISO_8859_9
321 },
322 #endif
323 #ifdef ONIG_ENCODING_ISO_8859_10
324 {
325 "ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
326 ONIG_ENCODING_ISO_8859_10
327 },
328 #endif
329 #ifdef ONIG_ENCODING_ISO_8859_11
330 {
331 "ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
332 ONIG_ENCODING_ISO_8859_11
333 },
334 #endif
335 #ifdef ONIG_ENCODING_ISO_8859_13
336 {
337 "ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
338 ONIG_ENCODING_ISO_8859_13
339 },
340 #endif
341 #ifdef ONIG_ENCODING_ISO_8859_14
342 {
343 "ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
344 ONIG_ENCODING_ISO_8859_14
345 },
346 #endif
347 #ifdef ONIG_ENCODING_ISO_8859_15
348 {
349 "ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
350 ONIG_ENCODING_ISO_8859_15
351 },
352 #endif
353 #ifdef ONIG_ENCODING_ISO_8859_16
354 {
355 "ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
356 ONIG_ENCODING_ISO_8859_16
357 },
358 #endif
359 #ifdef ONIG_ENCODING_ASCII
360 {
361 "ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
362 ONIG_ENCODING_ASCII
363 },
364 #endif
365 { NULL, ONIG_ENCODING_UNDEF }
366 };
367 /* }}} */
368
369 /* {{{ php_mb_regex_name2mbctype */
_php_mb_regex_name2mbctype(const char * pname)370 static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
371 {
372 const char *p;
373 php_mb_regex_enc_name_map_t *mapping;
374
375 if (pname == NULL || !*pname) {
376 return ONIG_ENCODING_UNDEF;
377 }
378
379 for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
380 for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
381 if (strcasecmp(p, pname) == 0) {
382 return mapping->code;
383 }
384 }
385 }
386
387 return ONIG_ENCODING_UNDEF;
388 }
389 /* }}} */
390
391 /* {{{ php_mb_regex_mbctype2name */
_php_mb_regex_mbctype2name(OnigEncoding mbctype)392 static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
393 {
394 php_mb_regex_enc_name_map_t *mapping;
395
396 for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
397 if (mapping->code == mbctype) {
398 return mapping->names;
399 }
400 }
401
402 return NULL;
403 }
404 /* }}} */
405
406 /* {{{ php_mb_regex_set_mbctype */
php_mb_regex_set_mbctype(const char * encname TSRMLS_DC)407 int php_mb_regex_set_mbctype(const char *encname TSRMLS_DC)
408 {
409 OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
410 if (mbctype == ONIG_ENCODING_UNDEF) {
411 return FAILURE;
412 }
413 MBREX(current_mbctype) = mbctype;
414 return SUCCESS;
415 }
416 /* }}} */
417
418 /* {{{ php_mb_regex_set_default_mbctype */
php_mb_regex_set_default_mbctype(const char * encname TSRMLS_DC)419 int php_mb_regex_set_default_mbctype(const char *encname TSRMLS_DC)
420 {
421 OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
422 if (mbctype == ONIG_ENCODING_UNDEF) {
423 return FAILURE;
424 }
425 MBREX(default_mbctype) = mbctype;
426 return SUCCESS;
427 }
428 /* }}} */
429
430 /* {{{ php_mb_regex_get_mbctype */
php_mb_regex_get_mbctype(TSRMLS_D)431 const char *php_mb_regex_get_mbctype(TSRMLS_D)
432 {
433 return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
434 }
435 /* }}} */
436
437 /* {{{ php_mb_regex_get_default_mbctype */
php_mb_regex_get_default_mbctype(TSRMLS_D)438 const char *php_mb_regex_get_default_mbctype(TSRMLS_D)
439 {
440 return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
441 }
442 /* }}} */
443
444 /*
445 * regex cache
446 */
447 /* {{{ php_mbregex_compile_pattern */
php_mbregex_compile_pattern(const char * pattern,int patlen,OnigOptionType options,OnigEncoding enc,OnigSyntaxType * syntax TSRMLS_DC)448 static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax TSRMLS_DC)
449 {
450 int err_code = 0;
451 int found = 0;
452 php_mb_regex_t *retval = NULL, **rc = NULL;
453 OnigErrorInfo err_info;
454 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
455
456 found = zend_hash_find(&MBREX(ht_rc), (char *)pattern, patlen+1, (void **) &rc);
457 if (found == FAILURE || (*rc)->options != options || (*rc)->enc != enc || (*rc)->syntax != syntax) {
458 if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
459 onig_error_code_to_str(err_str, err_code, err_info);
460 php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex compile err: %s", err_str);
461 retval = NULL;
462 goto out;
463 }
464 zend_hash_update(&MBREX(ht_rc), (char *) pattern, patlen + 1, (void *) &retval, sizeof(retval), NULL);
465 } else if (found == SUCCESS) {
466 retval = *rc;
467 }
468 out:
469 return retval;
470 }
471 /* }}} */
472
473 /* {{{ _php_mb_regex_get_option_string */
_php_mb_regex_get_option_string(char * str,size_t len,OnigOptionType option,OnigSyntaxType * syntax)474 static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
475 {
476 size_t len_left = len;
477 size_t len_req = 0;
478 char *p = str;
479 char c;
480
481 if ((option & ONIG_OPTION_IGNORECASE) != 0) {
482 if (len_left > 0) {
483 --len_left;
484 *(p++) = 'i';
485 }
486 ++len_req;
487 }
488
489 if ((option & ONIG_OPTION_EXTEND) != 0) {
490 if (len_left > 0) {
491 --len_left;
492 *(p++) = 'x';
493 }
494 ++len_req;
495 }
496
497 if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
498 (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
499 if (len_left > 0) {
500 --len_left;
501 *(p++) = 'p';
502 }
503 ++len_req;
504 } else {
505 if ((option & ONIG_OPTION_MULTILINE) != 0) {
506 if (len_left > 0) {
507 --len_left;
508 *(p++) = 'm';
509 }
510 ++len_req;
511 }
512
513 if ((option & ONIG_OPTION_SINGLELINE) != 0) {
514 if (len_left > 0) {
515 --len_left;
516 *(p++) = 's';
517 }
518 ++len_req;
519 }
520 }
521 if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
522 if (len_left > 0) {
523 --len_left;
524 *(p++) = 'l';
525 }
526 ++len_req;
527 }
528 if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
529 if (len_left > 0) {
530 --len_left;
531 *(p++) = 'n';
532 }
533 ++len_req;
534 }
535
536 c = 0;
537
538 if (syntax == ONIG_SYNTAX_JAVA) {
539 c = 'j';
540 } else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
541 c = 'u';
542 } else if (syntax == ONIG_SYNTAX_GREP) {
543 c = 'g';
544 } else if (syntax == ONIG_SYNTAX_EMACS) {
545 c = 'c';
546 } else if (syntax == ONIG_SYNTAX_RUBY) {
547 c = 'r';
548 } else if (syntax == ONIG_SYNTAX_PERL) {
549 c = 'z';
550 } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
551 c = 'b';
552 } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
553 c = 'd';
554 }
555
556 if (c != 0) {
557 if (len_left > 0) {
558 --len_left;
559 *(p++) = c;
560 }
561 ++len_req;
562 }
563
564
565 if (len_left > 0) {
566 --len_left;
567 *(p++) = '\0';
568 }
569 ++len_req;
570 if (len < len_req) {
571 return len_req;
572 }
573
574 return 0;
575 }
576 /* }}} */
577
578 /* {{{ _php_mb_regex_init_options */
579 static void
_php_mb_regex_init_options(const char * parg,int narg,OnigOptionType * option,OnigSyntaxType ** syntax,int * eval)580 _php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
581 {
582 int n;
583 char c;
584 int optm = 0;
585
586 *syntax = ONIG_SYNTAX_RUBY;
587
588 if (parg != NULL) {
589 n = 0;
590 while(n < narg) {
591 c = parg[n++];
592 switch (c) {
593 case 'i':
594 optm |= ONIG_OPTION_IGNORECASE;
595 break;
596 case 'x':
597 optm |= ONIG_OPTION_EXTEND;
598 break;
599 case 'm':
600 optm |= ONIG_OPTION_MULTILINE;
601 break;
602 case 's':
603 optm |= ONIG_OPTION_SINGLELINE;
604 break;
605 case 'p':
606 optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
607 break;
608 case 'l':
609 optm |= ONIG_OPTION_FIND_LONGEST;
610 break;
611 case 'n':
612 optm |= ONIG_OPTION_FIND_NOT_EMPTY;
613 break;
614 case 'j':
615 *syntax = ONIG_SYNTAX_JAVA;
616 break;
617 case 'u':
618 *syntax = ONIG_SYNTAX_GNU_REGEX;
619 break;
620 case 'g':
621 *syntax = ONIG_SYNTAX_GREP;
622 break;
623 case 'c':
624 *syntax = ONIG_SYNTAX_EMACS;
625 break;
626 case 'r':
627 *syntax = ONIG_SYNTAX_RUBY;
628 break;
629 case 'z':
630 *syntax = ONIG_SYNTAX_PERL;
631 break;
632 case 'b':
633 *syntax = ONIG_SYNTAX_POSIX_BASIC;
634 break;
635 case 'd':
636 *syntax = ONIG_SYNTAX_POSIX_EXTENDED;
637 break;
638 case 'e':
639 if (eval != NULL) *eval = 1;
640 break;
641 default:
642 break;
643 }
644 }
645 if (option != NULL) *option|=optm;
646 }
647 }
648 /* }}} */
649
650 /*
651 * php functions
652 */
653
654 /* {{{ proto string mb_regex_encoding([string encoding])
655 Returns the current encoding for regex as a string. */
PHP_FUNCTION(mb_regex_encoding)656 PHP_FUNCTION(mb_regex_encoding)
657 {
658 size_t argc = ZEND_NUM_ARGS();
659 char *encoding;
660 int encoding_len;
661 OnigEncoding mbctype;
662
663 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &encoding, &encoding_len) == FAILURE) {
664 return;
665 }
666
667 if (argc == 0) {
668 const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
669
670 if (retval == NULL) {
671 RETURN_FALSE;
672 }
673
674 RETURN_STRING((char *)retval, 1);
675 } else if (argc == 1) {
676 mbctype = _php_mb_regex_name2mbctype(encoding);
677
678 if (mbctype == ONIG_ENCODING_UNDEF) {
679 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
680 RETURN_FALSE;
681 }
682
683 MBREX(current_mbctype) = mbctype;
684 RETURN_TRUE;
685 }
686 }
687 /* }}} */
688
689 /* {{{ _php_mb_regex_ereg_exec */
_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS,int icase)690 static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
691 {
692 zval **arg_pattern, *array;
693 char *string;
694 int string_len;
695 php_mb_regex_t *re;
696 OnigRegion *regs = NULL;
697 int i, match_len, beg, end;
698 OnigOptionType options;
699 char *str;
700
701 array = NULL;
702
703 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zs|z", &arg_pattern, &string, &string_len, &array) == FAILURE) {
704 RETURN_FALSE;
705 }
706
707 options = MBREX(regex_default_options);
708 if (icase) {
709 options |= ONIG_OPTION_IGNORECASE;
710 }
711
712 /* compile the regular expression from the supplied regex */
713 if (Z_TYPE_PP(arg_pattern) != IS_STRING) {
714 /* we convert numbers to integers and treat them as a string */
715 if (Z_TYPE_PP(arg_pattern) == IS_DOUBLE) {
716 convert_to_long_ex(arg_pattern); /* get rid of decimal places */
717 }
718 convert_to_string_ex(arg_pattern);
719 /* don't bother doing an extended regex with just a number */
720 }
721
722 if (!Z_STRVAL_PP(arg_pattern) || Z_STRLEN_PP(arg_pattern) == 0) {
723 php_error_docref(NULL TSRMLS_CC, E_WARNING, "empty pattern");
724 RETVAL_FALSE;
725 goto out;
726 }
727
728 re = php_mbregex_compile_pattern(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC);
729 if (re == NULL) {
730 RETVAL_FALSE;
731 goto out;
732 }
733
734 regs = onig_region_new();
735
736 /* actually execute the regular expression */
737 if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
738 RETVAL_FALSE;
739 goto out;
740 }
741
742 match_len = 1;
743 str = string;
744 if (array != NULL) {
745 match_len = regs->end[0] - regs->beg[0];
746 zval_dtor(array);
747 array_init(array);
748 for (i = 0; i < regs->num_regs; i++) {
749 beg = regs->beg[i];
750 end = regs->end[i];
751 if (beg >= 0 && beg < end && end <= string_len) {
752 add_index_stringl(array, i, (char *)&str[beg], end - beg, 1);
753 } else {
754 add_index_bool(array, i, 0);
755 }
756 }
757 }
758
759 if (match_len == 0) {
760 match_len = 1;
761 }
762 RETVAL_LONG(match_len);
763 out:
764 if (regs != NULL) {
765 onig_region_free(regs, 1);
766 }
767 }
768 /* }}} */
769
770 /* {{{ proto int mb_ereg(string pattern, string string [, array registers])
771 Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg)772 PHP_FUNCTION(mb_ereg)
773 {
774 _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
775 }
776 /* }}} */
777
778 /* {{{ proto int mb_eregi(string pattern, string string [, array registers])
779 Case-insensitive regular expression match for multibyte string */
PHP_FUNCTION(mb_eregi)780 PHP_FUNCTION(mb_eregi)
781 {
782 _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
783 }
784 /* }}} */
785
786 /* {{{ _php_mb_regex_ereg_replace_exec */
_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS,OnigOptionType options,int is_callable)787 static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable)
788 {
789 zval **arg_pattern_zval;
790
791 char *arg_pattern;
792 int arg_pattern_len;
793
794 char *replace;
795 int replace_len;
796
797 zend_fcall_info arg_replace_fci;
798 zend_fcall_info_cache arg_replace_fci_cache;
799
800 char *string;
801 int string_len;
802
803 char *p;
804 php_mb_regex_t *re;
805 OnigSyntaxType *syntax;
806 OnigRegion *regs = NULL;
807 smart_str out_buf = { 0 };
808 smart_str eval_buf = { 0 };
809 smart_str *pbuf;
810 int i, err, eval, n;
811 OnigUChar *pos;
812 OnigUChar *string_lim;
813 char *description = NULL;
814 char pat_buf[2];
815
816 const mbfl_encoding *enc;
817
818 {
819 const char *current_enc_name;
820 current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
821 if (current_enc_name == NULL ||
822 (enc = mbfl_name2encoding(current_enc_name)) == NULL) {
823 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
824 RETURN_FALSE;
825 }
826 }
827 eval = 0;
828 {
829 char *option_str = NULL;
830 int option_str_len = 0;
831
832 if (!is_callable) {
833 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zss|s",
834 &arg_pattern_zval,
835 &replace, &replace_len,
836 &string, &string_len,
837 &option_str, &option_str_len) == FAILURE) {
838 RETURN_FALSE;
839 }
840 } else {
841 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zfs|s",
842 &arg_pattern_zval,
843 &arg_replace_fci, &arg_replace_fci_cache,
844 &string, &string_len,
845 &option_str, &option_str_len) == FAILURE) {
846 RETURN_FALSE;
847 }
848 }
849
850 if (option_str != NULL) {
851 _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
852 } else {
853 options |= MBREX(regex_default_options);
854 syntax = MBREX(regex_default_syntax);
855 }
856 }
857 if (Z_TYPE_PP(arg_pattern_zval) == IS_STRING) {
858 arg_pattern = Z_STRVAL_PP(arg_pattern_zval);
859 arg_pattern_len = Z_STRLEN_PP(arg_pattern_zval);
860 } else {
861 /* FIXME: this code is not multibyte aware! */
862 convert_to_long_ex(arg_pattern_zval);
863 pat_buf[0] = (char)Z_LVAL_PP(arg_pattern_zval);
864 pat_buf[1] = '\0';
865
866 arg_pattern = pat_buf;
867 arg_pattern_len = 1;
868 }
869 /* create regex pattern buffer */
870 re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax TSRMLS_CC);
871 if (re == NULL) {
872 RETURN_FALSE;
873 }
874
875 if (eval || is_callable) {
876 pbuf = &eval_buf;
877 description = zend_make_compiled_string_description("mbregex replace" TSRMLS_CC);
878 } else {
879 pbuf = &out_buf;
880 description = NULL;
881 }
882
883 if (is_callable) {
884 if (eval) {
885 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Option 'e' cannot be used with replacement callback");
886 RETURN_FALSE;
887 }
888 }
889
890 /* do the actual work */
891 err = 0;
892 pos = (OnigUChar *)string;
893 string_lim = (OnigUChar*)(string + string_len);
894 regs = onig_region_new();
895 while (err >= 0) {
896 err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
897 if (err <= -2) {
898 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
899 onig_error_code_to_str(err_str, err);
900 php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
901 break;
902 }
903 if (err >= 0) {
904 #if moriyoshi_0
905 if (regs->beg[0] == regs->end[0]) {
906 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
907 break;
908 }
909 #endif
910 /* copy the part of the string before the match */
911 smart_str_appendl(&out_buf, pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
912
913 if (!is_callable) {
914 /* copy replacement and backrefs */
915 i = 0;
916 p = replace;
917 while (i < replace_len) {
918 int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
919 n = -1;
920 if ((replace_len - i) >= 2 && fwd == 1 &&
921 p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
922 n = p[1] - '0';
923 }
924 if (n >= 0 && n < regs->num_regs) {
925 if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && regs->end[n] <= string_len) {
926 smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
927 }
928 p += 2;
929 i += 2;
930 } else {
931 smart_str_appendl(pbuf, p, fwd);
932 p += fwd;
933 i += fwd;
934 }
935 }
936 }
937
938 if (eval) {
939 zval v;
940 /* null terminate buffer */
941 smart_str_0(&eval_buf);
942 /* do eval */
943 if (zend_eval_stringl(eval_buf.c, eval_buf.len, &v, description TSRMLS_CC) == FAILURE) {
944 efree(description);
945 php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, eval_buf.c);
946 /* zend_error() does not return in this case */
947 }
948
949 /* result of eval */
950 convert_to_string(&v);
951 smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
952 /* Clean up */
953 eval_buf.len = 0;
954 zval_dtor(&v);
955 } else if (is_callable) {
956 zval *retval_ptr;
957 zval **args[1];
958 zval *subpats;
959 int i;
960
961 MAKE_STD_ZVAL(subpats);
962 array_init(subpats);
963
964 for (i = 0; i < regs->num_regs; i++) {
965 add_next_index_stringl(subpats, string + regs->beg[i], regs->end[i] - regs->beg[i], 1);
966 }
967
968 args[0] = &subpats;
969 /* null terminate buffer */
970 smart_str_0(&eval_buf);
971
972 arg_replace_fci.param_count = 1;
973 arg_replace_fci.params = args;
974 arg_replace_fci.retval_ptr_ptr = &retval_ptr;
975 if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache TSRMLS_CC) == SUCCESS && arg_replace_fci.retval_ptr_ptr) {
976 convert_to_string_ex(&retval_ptr);
977 smart_str_appendl(&out_buf, Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
978 eval_buf.len = 0;
979 zval_ptr_dtor(&retval_ptr);
980 } else {
981 efree(description);
982 if (!EG(exception)) {
983 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
984 }
985 }
986 zval_ptr_dtor(&subpats);
987 }
988
989 n = regs->end[0];
990 if ((pos - (OnigUChar *)string) < n) {
991 pos = (OnigUChar *)string + n;
992 } else {
993 if (pos < string_lim) {
994 smart_str_appendl(&out_buf, pos, 1);
995 }
996 pos++;
997 }
998 } else { /* nomatch */
999 /* stick that last bit of string on our output */
1000 if (string_lim - pos > 0) {
1001 smart_str_appendl(&out_buf, pos, string_lim - pos);
1002 }
1003 }
1004 onig_region_free(regs, 0);
1005 }
1006
1007 if (description) {
1008 efree(description);
1009 }
1010 if (regs != NULL) {
1011 onig_region_free(regs, 1);
1012 }
1013 smart_str_free(&eval_buf);
1014
1015 if (err <= -2) {
1016 smart_str_free(&out_buf);
1017 RETVAL_FALSE;
1018 } else {
1019 smart_str_appendc(&out_buf, '\0');
1020 RETVAL_STRINGL((char *)out_buf.c, out_buf.len - 1, 0);
1021 }
1022 }
1023 /* }}} */
1024
1025 /* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option])
1026 Replace regular expression for multibyte string */
PHP_FUNCTION(mb_ereg_replace)1027 PHP_FUNCTION(mb_ereg_replace)
1028 {
1029 _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1030 }
1031 /* }}} */
1032
1033 /* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string)
1034 Case insensitive replace regular expression for multibyte string */
PHP_FUNCTION(mb_eregi_replace)1035 PHP_FUNCTION(mb_eregi_replace)
1036 {
1037 _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0);
1038 }
1039 /* }}} */
1040
1041 /* {{{ proto string mb_ereg_replace_callback(string pattern, string callback, string string [, string option])
1042 regular expression for multibyte string using replacement callback */
PHP_FUNCTION(mb_ereg_replace_callback)1043 PHP_FUNCTION(mb_ereg_replace_callback)
1044 {
1045 _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1046 }
1047 /* }}} */
1048
1049 /* {{{ proto array mb_split(string pattern, string string [, int limit])
1050 split multibyte string into array by regular expression */
PHP_FUNCTION(mb_split)1051 PHP_FUNCTION(mb_split)
1052 {
1053 char *arg_pattern;
1054 int arg_pattern_len;
1055 php_mb_regex_t *re;
1056 OnigRegion *regs = NULL;
1057 char *string;
1058 OnigUChar *pos, *chunk_pos;
1059 int string_len;
1060
1061 int n, err;
1062 long count = -1;
1063
1064 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
1065 RETURN_FALSE;
1066 }
1067
1068 if (count > 0) {
1069 count--;
1070 }
1071
1072 /* create regex pattern buffer */
1073 if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
1074 RETURN_FALSE;
1075 }
1076
1077 array_init(return_value);
1078
1079 chunk_pos = pos = (OnigUChar *)string;
1080 err = 0;
1081 regs = onig_region_new();
1082 /* churn through str, generating array entries as we go */
1083 while (count != 0 && (pos - (OnigUChar *)string) < string_len) {
1084 int beg, end;
1085 err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0);
1086 if (err < 0) {
1087 break;
1088 }
1089 beg = regs->beg[0], end = regs->end[0];
1090 /* add it to the array */
1091 if ((pos - (OnigUChar *)string) < end) {
1092 if (beg < string_len && beg >= (chunk_pos - (OnigUChar *)string)) {
1093 add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos), 1);
1094 --count;
1095 } else {
1096 err = -2;
1097 break;
1098 }
1099 /* point at our new starting point */
1100 chunk_pos = pos = (OnigUChar *)string + end;
1101 } else {
1102 pos++;
1103 }
1104 onig_region_free(regs, 0);
1105 }
1106
1107 onig_region_free(regs, 1);
1108
1109 /* see if we encountered an error */
1110 if (err <= -2) {
1111 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1112 onig_error_code_to_str(err_str, err);
1113 php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
1114 zval_dtor(return_value);
1115 RETURN_FALSE;
1116 }
1117
1118 /* otherwise we just have one last element to add to the array */
1119 n = ((OnigUChar *)(string + string_len) - chunk_pos);
1120 if (n > 0) {
1121 add_next_index_stringl(return_value, (char *)chunk_pos, n, 1);
1122 } else {
1123 add_next_index_stringl(return_value, "", 0, 1);
1124 }
1125 }
1126 /* }}} */
1127
1128 /* {{{ proto bool mb_ereg_match(string pattern, string string [,string option])
1129 Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg_match)1130 PHP_FUNCTION(mb_ereg_match)
1131 {
1132 char *arg_pattern;
1133 int arg_pattern_len;
1134
1135 char *string;
1136 int string_len;
1137
1138 php_mb_regex_t *re;
1139 OnigSyntaxType *syntax;
1140 OnigOptionType option = 0;
1141 int err;
1142
1143 {
1144 char *option_str = NULL;
1145 int option_str_len = 0;
1146
1147 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s",
1148 &arg_pattern, &arg_pattern_len, &string, &string_len,
1149 &option_str, &option_str_len)==FAILURE) {
1150 RETURN_FALSE;
1151 }
1152
1153 if (option_str != NULL) {
1154 _php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
1155 } else {
1156 option |= MBREX(regex_default_options);
1157 syntax = MBREX(regex_default_syntax);
1158 }
1159 }
1160
1161 if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
1162 RETURN_FALSE;
1163 }
1164
1165 /* match */
1166 err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0);
1167 if (err >= 0) {
1168 RETVAL_TRUE;
1169 } else {
1170 RETVAL_FALSE;
1171 }
1172 }
1173 /* }}} */
1174
1175 /* regex search */
1176 /* {{{ _php_mb_regex_ereg_search_exec */
1177 static void
_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS,int mode)1178 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
1179 {
1180 size_t argc = ZEND_NUM_ARGS();
1181 char *arg_pattern, *arg_options;
1182 int arg_pattern_len, arg_options_len;
1183 int n, i, err, pos, len, beg, end;
1184 OnigOptionType option;
1185 OnigUChar *str;
1186 OnigSyntaxType *syntax;
1187
1188 if (zend_parse_parameters(argc TSRMLS_CC, "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1189 return;
1190 }
1191
1192 option = MBREX(regex_default_options);
1193
1194 if (argc == 2) {
1195 option = 0;
1196 _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1197 }
1198
1199 if (argc > 0) {
1200 /* create regex pattern buffer */
1201 if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
1202 RETURN_FALSE;
1203 }
1204 }
1205
1206 pos = MBREX(search_pos);
1207 str = NULL;
1208 len = 0;
1209 if (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING){
1210 str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
1211 len = Z_STRLEN_P(MBREX(search_str));
1212 }
1213
1214 if (MBREX(search_re) == NULL) {
1215 php_error_docref(NULL TSRMLS_CC, E_WARNING, "No regex given");
1216 RETURN_FALSE;
1217 }
1218
1219 if (str == NULL) {
1220 php_error_docref(NULL TSRMLS_CC, E_WARNING, "No string given");
1221 RETURN_FALSE;
1222 }
1223
1224 if (MBREX(search_regs)) {
1225 onig_region_free(MBREX(search_regs), 1);
1226 }
1227 MBREX(search_regs) = onig_region_new();
1228
1229 err = onig_search(MBREX(search_re), str, str + len, str + pos, str + len, MBREX(search_regs), 0);
1230 if (err == ONIG_MISMATCH) {
1231 MBREX(search_pos) = len;
1232 RETVAL_FALSE;
1233 } else if (err <= -2) {
1234 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1235 onig_error_code_to_str(err_str, err);
1236 php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
1237 RETVAL_FALSE;
1238 } else {
1239 if (MBREX(search_regs)->beg[0] == MBREX(search_regs)->end[0]) {
1240 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
1241 }
1242 switch (mode) {
1243 case 1:
1244 array_init(return_value);
1245 beg = MBREX(search_regs)->beg[0];
1246 end = MBREX(search_regs)->end[0];
1247 add_next_index_long(return_value, beg);
1248 add_next_index_long(return_value, end - beg);
1249 break;
1250 case 2:
1251 array_init(return_value);
1252 n = MBREX(search_regs)->num_regs;
1253 for (i = 0; i < n; i++) {
1254 beg = MBREX(search_regs)->beg[i];
1255 end = MBREX(search_regs)->end[i];
1256 if (beg >= 0 && beg <= end && end <= len) {
1257 add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
1258 } else {
1259 add_index_bool(return_value, i, 0);
1260 }
1261 }
1262 break;
1263 default:
1264 RETVAL_TRUE;
1265 break;
1266 }
1267 end = MBREX(search_regs)->end[0];
1268 if (pos < end) {
1269 MBREX(search_pos) = end;
1270 } else {
1271 MBREX(search_pos) = pos + 1;
1272 }
1273 }
1274
1275 if (err < 0) {
1276 onig_region_free(MBREX(search_regs), 1);
1277 MBREX(search_regs) = (OnigRegion *)NULL;
1278 }
1279 }
1280 /* }}} */
1281
1282 /* {{{ proto bool mb_ereg_search([string pattern[, string option]])
1283 Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search)1284 PHP_FUNCTION(mb_ereg_search)
1285 {
1286 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1287 }
1288 /* }}} */
1289
1290 /* {{{ proto array mb_ereg_search_pos([string pattern[, string option]])
1291 Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_pos)1292 PHP_FUNCTION(mb_ereg_search_pos)
1293 {
1294 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1295 }
1296 /* }}} */
1297
1298 /* {{{ proto array mb_ereg_search_regs([string pattern[, string option]])
1299 Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_regs)1300 PHP_FUNCTION(mb_ereg_search_regs)
1301 {
1302 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
1303 }
1304 /* }}} */
1305
1306 /* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]])
1307 Initialize string and regular expression for search. */
PHP_FUNCTION(mb_ereg_search_init)1308 PHP_FUNCTION(mb_ereg_search_init)
1309 {
1310 size_t argc = ZEND_NUM_ARGS();
1311 zval *arg_str;
1312 char *arg_pattern = NULL, *arg_options = NULL;
1313 int arg_pattern_len = 0, arg_options_len = 0;
1314 OnigSyntaxType *syntax = NULL;
1315 OnigOptionType option;
1316
1317 if (zend_parse_parameters(argc TSRMLS_CC, "z|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1318 return;
1319 }
1320
1321 if (argc > 1 && arg_pattern_len == 0) {
1322 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty pattern");
1323 RETURN_FALSE;
1324 }
1325
1326 option = MBREX(regex_default_options);
1327 syntax = MBREX(regex_default_syntax);
1328
1329 if (argc == 3) {
1330 option = 0;
1331 _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1332 }
1333
1334 if (argc > 1) {
1335 /* create regex pattern buffer */
1336 if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
1337 RETURN_FALSE;
1338 }
1339 }
1340
1341 if (MBREX(search_str) != NULL) {
1342 zval_ptr_dtor(&MBREX(search_str));
1343 MBREX(search_str) = (zval *)NULL;
1344 }
1345
1346 MBREX(search_str) = arg_str;
1347 Z_ADDREF_P(MBREX(search_str));
1348 SEPARATE_ZVAL_IF_NOT_REF(&MBREX(search_str));
1349
1350 MBREX(search_pos) = 0;
1351
1352 if (MBREX(search_regs) != NULL) {
1353 onig_region_free(MBREX(search_regs), 1);
1354 MBREX(search_regs) = (OnigRegion *) NULL;
1355 }
1356
1357 RETURN_TRUE;
1358 }
1359 /* }}} */
1360
1361 /* {{{ proto array mb_ereg_search_getregs(void)
1362 Get matched substring of the last time */
PHP_FUNCTION(mb_ereg_search_getregs)1363 PHP_FUNCTION(mb_ereg_search_getregs)
1364 {
1365 int n, i, len, beg, end;
1366 OnigUChar *str;
1367
1368 if (MBREX(search_regs) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && Z_STRVAL_P(MBREX(search_str)) != NULL) {
1369 array_init(return_value);
1370
1371 str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
1372 len = Z_STRLEN_P(MBREX(search_str));
1373 n = MBREX(search_regs)->num_regs;
1374 for (i = 0; i < n; i++) {
1375 beg = MBREX(search_regs)->beg[i];
1376 end = MBREX(search_regs)->end[i];
1377 if (beg >= 0 && beg <= end && end <= len) {
1378 add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
1379 } else {
1380 add_index_bool(return_value, i, 0);
1381 }
1382 }
1383 } else {
1384 RETVAL_FALSE;
1385 }
1386 }
1387 /* }}} */
1388
1389 /* {{{ proto int mb_ereg_search_getpos(void)
1390 Get search start position */
PHP_FUNCTION(mb_ereg_search_getpos)1391 PHP_FUNCTION(mb_ereg_search_getpos)
1392 {
1393 RETVAL_LONG(MBREX(search_pos));
1394 }
1395 /* }}} */
1396
1397 /* {{{ proto bool mb_ereg_search_setpos(int position)
1398 Set search start position */
PHP_FUNCTION(mb_ereg_search_setpos)1399 PHP_FUNCTION(mb_ereg_search_setpos)
1400 {
1401 long position;
1402
1403 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &position) == FAILURE) {
1404 return;
1405 }
1406
1407 if (position < 0 || (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && position >= Z_STRLEN_P(MBREX(search_str)))) {
1408 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Position is out of range");
1409 MBREX(search_pos) = 0;
1410 RETURN_FALSE;
1411 }
1412
1413 MBREX(search_pos) = position;
1414 RETURN_TRUE;
1415 }
1416 /* }}} */
1417
1418 /* {{{ php_mb_regex_set_options */
_php_mb_regex_set_options(OnigOptionType options,OnigSyntaxType * syntax,OnigOptionType * prev_options,OnigSyntaxType ** prev_syntax TSRMLS_DC)1419 static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC)
1420 {
1421 if (prev_options != NULL) {
1422 *prev_options = MBREX(regex_default_options);
1423 }
1424 if (prev_syntax != NULL) {
1425 *prev_syntax = MBREX(regex_default_syntax);
1426 }
1427 MBREX(regex_default_options) = options;
1428 MBREX(regex_default_syntax) = syntax;
1429 }
1430 /* }}} */
1431
1432 /* {{{ proto string mb_regex_set_options([string options])
1433 Set or get the default options for mbregex functions */
PHP_FUNCTION(mb_regex_set_options)1434 PHP_FUNCTION(mb_regex_set_options)
1435 {
1436 OnigOptionType opt;
1437 OnigSyntaxType *syntax;
1438 char *string = NULL;
1439 int string_len;
1440 char buf[16];
1441
1442 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s",
1443 &string, &string_len) == FAILURE) {
1444 RETURN_FALSE;
1445 }
1446 if (string != NULL) {
1447 opt = 0;
1448 syntax = NULL;
1449 _php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
1450 _php_mb_regex_set_options(opt, syntax, NULL, NULL TSRMLS_CC);
1451 } else {
1452 opt = MBREX(regex_default_options);
1453 syntax = MBREX(regex_default_syntax);
1454 }
1455 _php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
1456
1457 RETVAL_STRING(buf, 1);
1458 }
1459 /* }}} */
1460
1461 #endif /* HAVE_MBREGEX */
1462
1463 /*
1464 * Local variables:
1465 * tab-width: 4
1466 * c-basic-offset: 4
1467 * End:
1468 * vim600: fdm=marker
1469 * vim: noet sw=4 ts=4
1470 */
1471