xref: /PHP-7.0/ext/mbstring/php_mbregex.c (revision 478f119a)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1997-2017 The PHP Group                                |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
16    +----------------------------------------------------------------------+
17  */
18 
19 /* $Id$ */
20 
21 
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25 
26 #include "php.h"
27 #include "php_ini.h"
28 
29 #if HAVE_MBREGEX
30 
31 #include "zend_smart_str.h"
32 #include "ext/standard/info.h"
33 #include "php_mbregex.h"
34 #include "mbstring.h"
35 
36 #include "php_onig_compat.h" /* must come prior to the oniguruma header */
37 #include <oniguruma.h>
38 #undef UChar
39 
40 ZEND_EXTERN_MODULE_GLOBALS(mbstring)
41 
42 struct _zend_mb_regex_globals {
43 	OnigEncoding default_mbctype;
44 	OnigEncoding current_mbctype;
45 	HashTable ht_rc;
46 	zval search_str;
47 	zval *search_str_val;
48 	unsigned int search_pos;
49 	php_mb_regex_t *search_re;
50 	OnigRegion *search_regs;
51 	OnigOptionType regex_default_options;
52 	OnigSyntaxType *regex_default_syntax;
53 };
54 
55 #define MBREX(g) (MBSTRG(mb_regex_globals)->g)
56 
57 /* {{{ static void php_mb_regex_free_cache() */
php_mb_regex_free_cache(zval * el)58 static void php_mb_regex_free_cache(zval *el) {
59 	onig_free((php_mb_regex_t *)Z_PTR_P(el));
60 }
61 /* }}} */
62 
63 /* {{{ _php_mb_regex_globals_ctor */
_php_mb_regex_globals_ctor(zend_mb_regex_globals * pglobals)64 static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals)
65 {
66 	pglobals->default_mbctype = ONIG_ENCODING_UTF8;
67 	pglobals->current_mbctype = ONIG_ENCODING_UTF8;
68 	zend_hash_init(&(pglobals->ht_rc), 0, NULL, php_mb_regex_free_cache, 1);
69 	ZVAL_UNDEF(&pglobals->search_str);
70 	pglobals->search_re = (php_mb_regex_t*)NULL;
71 	pglobals->search_pos = 0;
72 	pglobals->search_regs = (OnigRegion*)NULL;
73 	pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
74 	pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
75 	return SUCCESS;
76 }
77 /* }}} */
78 
79 /* {{{ _php_mb_regex_globals_dtor */
_php_mb_regex_globals_dtor(zend_mb_regex_globals * pglobals)80 static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals)
81 {
82 	zend_hash_destroy(&pglobals->ht_rc);
83 }
84 /* }}} */
85 
86 /* {{{ php_mb_regex_globals_alloc */
php_mb_regex_globals_alloc(void)87 zend_mb_regex_globals *php_mb_regex_globals_alloc(void)
88 {
89 	zend_mb_regex_globals *pglobals = pemalloc(
90 			sizeof(zend_mb_regex_globals), 1);
91 	if (!pglobals) {
92 		return NULL;
93 	}
94 	if (SUCCESS != _php_mb_regex_globals_ctor(pglobals)) {
95 		pefree(pglobals, 1);
96 		return NULL;
97 	}
98 	return pglobals;
99 }
100 /* }}} */
101 
102 /* {{{ php_mb_regex_globals_free */
php_mb_regex_globals_free(zend_mb_regex_globals * pglobals)103 void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals)
104 {
105 	if (!pglobals) {
106 		return;
107 	}
108 	_php_mb_regex_globals_dtor(pglobals);
109 	pefree(pglobals, 1);
110 }
111 /* }}} */
112 
113 /* {{{ PHP_MINIT_FUNCTION(mb_regex) */
PHP_MINIT_FUNCTION(mb_regex)114 PHP_MINIT_FUNCTION(mb_regex)
115 {
116 	onig_init();
117 	return SUCCESS;
118 }
119 /* }}} */
120 
121 /* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
PHP_MSHUTDOWN_FUNCTION(mb_regex)122 PHP_MSHUTDOWN_FUNCTION(mb_regex)
123 {
124 	onig_end();
125 	return SUCCESS;
126 }
127 /* }}} */
128 
129 /* {{{ PHP_RINIT_FUNCTION(mb_regex) */
PHP_RINIT_FUNCTION(mb_regex)130 PHP_RINIT_FUNCTION(mb_regex)
131 {
132 	return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
133 }
134 /* }}} */
135 
136 /* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
PHP_RSHUTDOWN_FUNCTION(mb_regex)137 PHP_RSHUTDOWN_FUNCTION(mb_regex)
138 {
139 	MBREX(current_mbctype) = MBREX(default_mbctype);
140 
141 	if (!Z_ISUNDEF(MBREX(search_str))) {
142 		zval_ptr_dtor(&MBREX(search_str));
143 		ZVAL_UNDEF(&MBREX(search_str));
144 	}
145 	MBREX(search_pos) = 0;
146 
147 	if (MBREX(search_regs) != NULL) {
148 		onig_region_free(MBREX(search_regs), 1);
149 		MBREX(search_regs) = (OnigRegion *)NULL;
150 	}
151 	zend_hash_clean(&MBREX(ht_rc));
152 
153 	return SUCCESS;
154 }
155 /* }}} */
156 
157 /* {{{ PHP_MINFO_FUNCTION(mb_regex) */
PHP_MINFO_FUNCTION(mb_regex)158 PHP_MINFO_FUNCTION(mb_regex)
159 {
160 	char buf[32];
161 	php_info_print_table_start();
162 	php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
163 	snprintf(buf, sizeof(buf), "%d.%d.%d",
164 			ONIGURUMA_VERSION_MAJOR,
165 			ONIGURUMA_VERSION_MINOR,
166 			ONIGURUMA_VERSION_TEENY);
167 #ifdef PHP_ONIG_BUNDLED
168 #ifdef USE_COMBINATION_EXPLOSION_CHECK
169 	php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
170 #else	/* USE_COMBINATION_EXPLOSION_CHECK */
171 	php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
172 #endif	/* USE_COMBINATION_EXPLOSION_CHECK */
173 #endif /* PHP_BUNDLED_ONIG */
174 	php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
175 	php_info_print_table_end();
176 }
177 /* }}} */
178 
179 /*
180  * encoding name resolver
181  */
182 
183 /* {{{ encoding name map */
184 typedef struct _php_mb_regex_enc_name_map_t {
185 	const char *names;
186 	OnigEncoding code;
187 } php_mb_regex_enc_name_map_t;
188 
189 php_mb_regex_enc_name_map_t enc_name_map[] = {
190 #ifdef ONIG_ENCODING_EUC_JP
191 	{
192 		"EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
193 		ONIG_ENCODING_EUC_JP
194 	},
195 #endif
196 #ifdef ONIG_ENCODING_UTF8
197 	{
198 		"UTF-8\0UTF8\0",
199 		ONIG_ENCODING_UTF8
200 	},
201 #endif
202 #ifdef ONIG_ENCODING_UTF16_BE
203 	{
204 		"UTF-16\0UTF-16BE\0",
205 		ONIG_ENCODING_UTF16_BE
206 	},
207 #endif
208 #ifdef ONIG_ENCODING_UTF16_LE
209 	{
210 		"UTF-16LE\0",
211 		ONIG_ENCODING_UTF16_LE
212 	},
213 #endif
214 #ifdef ONIG_ENCODING_UTF32_BE
215 	{
216 		"UCS-4\0UTF-32\0UTF-32BE\0",
217 		ONIG_ENCODING_UTF32_BE
218 	},
219 #endif
220 #ifdef ONIG_ENCODING_UTF32_LE
221 	{
222 		"UCS-4LE\0UTF-32LE\0",
223 		ONIG_ENCODING_UTF32_LE
224 	},
225 #endif
226 #ifdef ONIG_ENCODING_SJIS
227 	{
228 		"SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
229 		ONIG_ENCODING_SJIS
230 	},
231 #endif
232 #ifdef ONIG_ENCODING_BIG5
233 	{
234 		"BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
235 		ONIG_ENCODING_BIG5
236 	},
237 #endif
238 #ifdef ONIG_ENCODING_EUC_CN
239 	{
240 		"EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
241 		ONIG_ENCODING_EUC_CN
242 	},
243 #endif
244 #ifdef ONIG_ENCODING_EUC_TW
245 	{
246 		"EUC-TW\0EUCTW\0EUC_TW\0",
247 		ONIG_ENCODING_EUC_TW
248 	},
249 #endif
250 #ifdef ONIG_ENCODING_EUC_KR
251 	{
252 		"EUC-KR\0EUCKR\0EUC_KR\0",
253 		ONIG_ENCODING_EUC_KR
254 	},
255 #endif
256 #if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
257 	{
258 		"KOI8\0KOI-8\0",
259 		ONIG_ENCODING_KOI8
260 	},
261 #endif
262 #ifdef ONIG_ENCODING_KOI8_R
263 	{
264 		"KOI8R\0KOI8-R\0KOI-8R\0",
265 		ONIG_ENCODING_KOI8_R
266 	},
267 #endif
268 #ifdef ONIG_ENCODING_ISO_8859_1
269 	{
270 		"ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
271 		ONIG_ENCODING_ISO_8859_1
272 	},
273 #endif
274 #ifdef ONIG_ENCODING_ISO_8859_2
275 	{
276 		"ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
277 		ONIG_ENCODING_ISO_8859_2
278 	},
279 #endif
280 #ifdef ONIG_ENCODING_ISO_8859_3
281 	{
282 		"ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
283 		ONIG_ENCODING_ISO_8859_3
284 	},
285 #endif
286 #ifdef ONIG_ENCODING_ISO_8859_4
287 	{
288 		"ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
289 		ONIG_ENCODING_ISO_8859_4
290 	},
291 #endif
292 #ifdef ONIG_ENCODING_ISO_8859_5
293 	{
294 		"ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
295 		ONIG_ENCODING_ISO_8859_5
296 	},
297 #endif
298 #ifdef ONIG_ENCODING_ISO_8859_6
299 	{
300 		"ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
301 		ONIG_ENCODING_ISO_8859_6
302 	},
303 #endif
304 #ifdef ONIG_ENCODING_ISO_8859_7
305 	{
306 		"ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
307 		ONIG_ENCODING_ISO_8859_7
308 	},
309 #endif
310 #ifdef ONIG_ENCODING_ISO_8859_8
311 	{
312 		"ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
313 		ONIG_ENCODING_ISO_8859_8
314 	},
315 #endif
316 #ifdef ONIG_ENCODING_ISO_8859_9
317 	{
318 		"ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
319 		ONIG_ENCODING_ISO_8859_9
320 	},
321 #endif
322 #ifdef ONIG_ENCODING_ISO_8859_10
323 	{
324 		"ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
325 		ONIG_ENCODING_ISO_8859_10
326 	},
327 #endif
328 #ifdef ONIG_ENCODING_ISO_8859_11
329 	{
330 		"ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
331 		ONIG_ENCODING_ISO_8859_11
332 	},
333 #endif
334 #ifdef ONIG_ENCODING_ISO_8859_13
335 	{
336 		"ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
337 		ONIG_ENCODING_ISO_8859_13
338 	},
339 #endif
340 #ifdef ONIG_ENCODING_ISO_8859_14
341 	{
342 		"ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
343 		ONIG_ENCODING_ISO_8859_14
344 	},
345 #endif
346 #ifdef ONIG_ENCODING_ISO_8859_15
347 	{
348 		"ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
349 		ONIG_ENCODING_ISO_8859_15
350 	},
351 #endif
352 #ifdef ONIG_ENCODING_ISO_8859_16
353 	{
354 		"ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
355 		ONIG_ENCODING_ISO_8859_16
356 	},
357 #endif
358 #ifdef ONIG_ENCODING_ASCII
359 	{
360 		"ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
361 		ONIG_ENCODING_ASCII
362 	},
363 #endif
364 	{ NULL, ONIG_ENCODING_UNDEF }
365 };
366 /* }}} */
367 
368 /* {{{ php_mb_regex_name2mbctype */
_php_mb_regex_name2mbctype(const char * pname)369 static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
370 {
371 	const char *p;
372 	php_mb_regex_enc_name_map_t *mapping;
373 
374 	if (pname == NULL || !*pname) {
375 		return ONIG_ENCODING_UNDEF;
376 	}
377 
378 	for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
379 		for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
380 			if (strcasecmp(p, pname) == 0) {
381 				return mapping->code;
382 			}
383 		}
384 	}
385 
386 	return ONIG_ENCODING_UNDEF;
387 }
388 /* }}} */
389 
390 /* {{{ php_mb_regex_mbctype2name */
_php_mb_regex_mbctype2name(OnigEncoding mbctype)391 static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
392 {
393 	php_mb_regex_enc_name_map_t *mapping;
394 
395 	for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
396 		if (mapping->code == mbctype) {
397 			return mapping->names;
398 		}
399 	}
400 
401 	return NULL;
402 }
403 /* }}} */
404 
405 /* {{{ php_mb_regex_set_mbctype */
php_mb_regex_set_mbctype(const char * encname)406 int php_mb_regex_set_mbctype(const char *encname)
407 {
408 	OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
409 	if (mbctype == ONIG_ENCODING_UNDEF) {
410 		return FAILURE;
411 	}
412 	MBREX(current_mbctype) = mbctype;
413 	return SUCCESS;
414 }
415 /* }}} */
416 
417 /* {{{ php_mb_regex_set_default_mbctype */
php_mb_regex_set_default_mbctype(const char * encname)418 int php_mb_regex_set_default_mbctype(const char *encname)
419 {
420 	OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
421 	if (mbctype == ONIG_ENCODING_UNDEF) {
422 		return FAILURE;
423 	}
424 	MBREX(default_mbctype) = mbctype;
425 	return SUCCESS;
426 }
427 /* }}} */
428 
429 /* {{{ php_mb_regex_get_mbctype */
php_mb_regex_get_mbctype(void)430 const char *php_mb_regex_get_mbctype(void)
431 {
432 	return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
433 }
434 /* }}} */
435 
436 /* {{{ php_mb_regex_get_default_mbctype */
php_mb_regex_get_default_mbctype(void)437 const char *php_mb_regex_get_default_mbctype(void)
438 {
439 	return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
440 }
441 /* }}} */
442 
443 /*
444  * regex cache
445  */
446 /* {{{ php_mbregex_compile_pattern */
php_mbregex_compile_pattern(const char * pattern,int patlen,OnigOptionType options,OnigEncoding enc,OnigSyntaxType * syntax)447 static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax)
448 {
449 	int err_code = 0;
450 	php_mb_regex_t *retval = NULL, *rc = NULL;
451 	OnigErrorInfo err_info;
452 	OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
453 
454 	rc = zend_hash_str_find_ptr(&MBREX(ht_rc), (char *)pattern, patlen);
455 	if (!rc || rc->options != options || rc->enc != enc || rc->syntax != syntax) {
456 		if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
457 			onig_error_code_to_str(err_str, err_code, &err_info);
458 			php_error_docref(NULL, E_WARNING, "mbregex compile err: %s", err_str);
459 			retval = NULL;
460 			goto out;
461 		}
462 		if (rc == MBREX(search_re)) {
463 			/* reuse the new rc? see bug #72399 */
464 			MBREX(search_re) = NULL;
465 		}
466 		zend_hash_str_update_ptr(&MBREX(ht_rc), (char *)pattern, patlen, retval);
467 	} else {
468 		retval = rc;
469 	}
470 out:
471 	return retval;
472 }
473 /* }}} */
474 
475 /* {{{ _php_mb_regex_get_option_string */
_php_mb_regex_get_option_string(char * str,size_t len,OnigOptionType option,OnigSyntaxType * syntax)476 static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
477 {
478 	size_t len_left = len;
479 	size_t len_req = 0;
480 	char *p = str;
481 	char c;
482 
483 	if ((option & ONIG_OPTION_IGNORECASE) != 0) {
484 		if (len_left > 0) {
485 			--len_left;
486 			*(p++) = 'i';
487 		}
488 		++len_req;
489 	}
490 
491 	if ((option & ONIG_OPTION_EXTEND) != 0) {
492 		if (len_left > 0) {
493 			--len_left;
494 			*(p++) = 'x';
495 		}
496 		++len_req;
497 	}
498 
499 	if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
500 			(ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
501 		if (len_left > 0) {
502 			--len_left;
503 			*(p++) = 'p';
504 		}
505 		++len_req;
506 	} else {
507 		if ((option & ONIG_OPTION_MULTILINE) != 0) {
508 			if (len_left > 0) {
509 				--len_left;
510 				*(p++) = 'm';
511 			}
512 			++len_req;
513 		}
514 
515 		if ((option & ONIG_OPTION_SINGLELINE) != 0) {
516 			if (len_left > 0) {
517 				--len_left;
518 				*(p++) = 's';
519 			}
520 			++len_req;
521 		}
522 	}
523 	if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
524 		if (len_left > 0) {
525 			--len_left;
526 			*(p++) = 'l';
527 		}
528 		++len_req;
529 	}
530 	if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
531 		if (len_left > 0) {
532 			--len_left;
533 			*(p++) = 'n';
534 		}
535 		++len_req;
536 	}
537 
538 	c = 0;
539 
540 	if (syntax == ONIG_SYNTAX_JAVA) {
541 		c = 'j';
542 	} else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
543 		c = 'u';
544 	} else if (syntax == ONIG_SYNTAX_GREP) {
545 		c = 'g';
546 	} else if (syntax == ONIG_SYNTAX_EMACS) {
547 		c = 'c';
548 	} else if (syntax == ONIG_SYNTAX_RUBY) {
549 		c = 'r';
550 	} else if (syntax == ONIG_SYNTAX_PERL) {
551 		c = 'z';
552 	} else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
553 		c = 'b';
554 	} else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
555 		c = 'd';
556 	}
557 
558 	if (c != 0) {
559 		if (len_left > 0) {
560 			--len_left;
561 			*(p++) = c;
562 		}
563 		++len_req;
564 	}
565 
566 
567 	if (len_left > 0) {
568 		--len_left;
569 		*(p++) = '\0';
570 	}
571 	++len_req;
572 	if (len < len_req) {
573 		return len_req;
574 	}
575 
576 	return 0;
577 }
578 /* }}} */
579 
580 /* {{{ _php_mb_regex_init_options */
581 static void
_php_mb_regex_init_options(const char * parg,int narg,OnigOptionType * option,OnigSyntaxType ** syntax,int * eval)582 _php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
583 {
584 	int n;
585 	char c;
586 	int optm = 0;
587 
588 	*syntax = ONIG_SYNTAX_RUBY;
589 
590 	if (parg != NULL) {
591 		n = 0;
592 		while(n < narg) {
593 			c = parg[n++];
594 			switch (c) {
595 				case 'i':
596 					optm |= ONIG_OPTION_IGNORECASE;
597 					break;
598 				case 'x':
599 					optm |= ONIG_OPTION_EXTEND;
600 					break;
601 				case 'm':
602 					optm |= ONIG_OPTION_MULTILINE;
603 					break;
604 				case 's':
605 					optm |= ONIG_OPTION_SINGLELINE;
606 					break;
607 				case 'p':
608 					optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
609 					break;
610 				case 'l':
611 					optm |= ONIG_OPTION_FIND_LONGEST;
612 					break;
613 				case 'n':
614 					optm |= ONIG_OPTION_FIND_NOT_EMPTY;
615 					break;
616 				case 'j':
617 					*syntax = ONIG_SYNTAX_JAVA;
618 					break;
619 				case 'u':
620 					*syntax = ONIG_SYNTAX_GNU_REGEX;
621 					break;
622 				case 'g':
623 					*syntax = ONIG_SYNTAX_GREP;
624 					break;
625 				case 'c':
626 					*syntax = ONIG_SYNTAX_EMACS;
627 					break;
628 				case 'r':
629 					*syntax = ONIG_SYNTAX_RUBY;
630 					break;
631 				case 'z':
632 					*syntax = ONIG_SYNTAX_PERL;
633 					break;
634 				case 'b':
635 					*syntax = ONIG_SYNTAX_POSIX_BASIC;
636 					break;
637 				case 'd':
638 					*syntax = ONIG_SYNTAX_POSIX_EXTENDED;
639 					break;
640 				case 'e':
641 					if (eval != NULL) *eval = 1;
642 					break;
643 				default:
644 					break;
645 			}
646 		}
647 		if (option != NULL) *option|=optm;
648 	}
649 }
650 /* }}} */
651 
652 /*
653  * php functions
654  */
655 
656 /* {{{ proto string mb_regex_encoding([string encoding])
657    Returns the current encoding for regex as a string. */
PHP_FUNCTION(mb_regex_encoding)658 PHP_FUNCTION(mb_regex_encoding)
659 {
660 	char *encoding = NULL;
661 	size_t encoding_len;
662 	OnigEncoding mbctype;
663 
664 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &encoding, &encoding_len) == FAILURE) {
665 		return;
666 	}
667 
668 	if (!encoding) {
669 		const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
670 
671 		if (retval == NULL) {
672 			RETURN_FALSE;
673 		}
674 
675 		RETURN_STRING((char *)retval);
676 	} else {
677 		mbctype = _php_mb_regex_name2mbctype(encoding);
678 
679 		if (mbctype == ONIG_ENCODING_UNDEF) {
680 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
681 			RETURN_FALSE;
682 		}
683 
684 		MBREX(current_mbctype) = mbctype;
685 		RETURN_TRUE;
686 	}
687 }
688 /* }}} */
689 
690 /* {{{ _php_mb_regex_ereg_exec */
_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS,int icase)691 static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
692 {
693 	zval *arg_pattern, *array = NULL;
694 	char *string;
695 	size_t string_len;
696 	php_mb_regex_t *re;
697 	OnigRegion *regs = NULL;
698 	int i, match_len, beg, end;
699 	OnigOptionType options;
700 	char *str;
701 
702 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z/", &arg_pattern, &string, &string_len, &array) == FAILURE) {
703 		RETURN_FALSE;
704 	}
705 
706 	options = MBREX(regex_default_options);
707 	if (icase) {
708 		options |= ONIG_OPTION_IGNORECASE;
709 	}
710 
711 	/* compile the regular expression from the supplied regex */
712 	if (Z_TYPE_P(arg_pattern) != IS_STRING) {
713 		/* we convert numbers to integers and treat them as a string */
714 		if (Z_TYPE_P(arg_pattern) == IS_DOUBLE) {
715 			convert_to_long_ex(arg_pattern);	/* get rid of decimal places */
716 		}
717 		convert_to_string_ex(arg_pattern);
718 		/* don't bother doing an extended regex with just a number */
719 	}
720 
721 	if (Z_STRLEN_P(arg_pattern) == 0) {
722 		php_error_docref(NULL, E_WARNING, "empty pattern");
723 		RETVAL_FALSE;
724 		goto out;
725 	}
726 
727 	re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax));
728 	if (re == NULL) {
729 		RETVAL_FALSE;
730 		goto out;
731 	}
732 
733 	regs = onig_region_new();
734 
735 	/* actually execute the regular expression */
736 	if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
737 		RETVAL_FALSE;
738 		goto out;
739 	}
740 
741 	match_len = 1;
742 	str = string;
743 	if (array != NULL) {
744 		zval_dtor(array);
745 		array_init(array);
746 
747 		match_len = regs->end[0] - regs->beg[0];
748 		for (i = 0; i < regs->num_regs; i++) {
749 			beg = regs->beg[i];
750 			end = regs->end[i];
751 			if (beg >= 0 && beg < end && end <= string_len) {
752 				add_index_stringl(array, i, (char *)&str[beg], end - beg);
753 			} else {
754 				add_index_bool(array, i, 0);
755 			}
756 		}
757 	}
758 
759 	if (match_len == 0) {
760 		match_len = 1;
761 	}
762 	RETVAL_LONG(match_len);
763 out:
764 	if (regs != NULL) {
765 		onig_region_free(regs, 1);
766 	}
767 }
768 /* }}} */
769 
770 /* {{{ proto int mb_ereg(string pattern, string string [, array registers])
771    Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg)772 PHP_FUNCTION(mb_ereg)
773 {
774 	_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
775 }
776 /* }}} */
777 
778 /* {{{ proto int mb_eregi(string pattern, string string [, array registers])
779    Case-insensitive regular expression match for multibyte string */
PHP_FUNCTION(mb_eregi)780 PHP_FUNCTION(mb_eregi)
781 {
782 	_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
783 }
784 /* }}} */
785 
786 /* {{{ _php_mb_regex_ereg_replace_exec */
_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS,OnigOptionType options,int is_callable)787 static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable)
788 {
789 	zval *arg_pattern_zval;
790 
791 	char *arg_pattern;
792 	size_t arg_pattern_len;
793 
794 	char *replace;
795 	size_t replace_len;
796 
797 	zend_fcall_info arg_replace_fci;
798 	zend_fcall_info_cache arg_replace_fci_cache;
799 
800 	char *string;
801 	size_t string_len;
802 
803 	char *p;
804 	php_mb_regex_t *re;
805 	OnigSyntaxType *syntax;
806 	OnigRegion *regs = NULL;
807 	smart_str out_buf = {0};
808 	smart_str eval_buf = {0};
809 	smart_str *pbuf;
810 	int i, err, eval, n;
811 	OnigUChar *pos;
812 	OnigUChar *string_lim;
813 	char *description = NULL;
814 	char pat_buf[6];
815 
816 	const mbfl_encoding *enc;
817 
818 	{
819 		const char *current_enc_name;
820 		current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
821 		if (current_enc_name == NULL ||
822 			(enc = mbfl_name2encoding(current_enc_name)) == NULL) {
823 			php_error_docref(NULL, E_WARNING, "Unknown error");
824 			RETURN_FALSE;
825 		}
826 	}
827 	eval = 0;
828 	{
829 		char *option_str = NULL;
830 		size_t option_str_len = 0;
831 
832 		if (!is_callable) {
833 			if (zend_parse_parameters(ZEND_NUM_ARGS(), "zss|s",
834 						&arg_pattern_zval,
835 						&replace, &replace_len,
836 						&string, &string_len,
837 						&option_str, &option_str_len) == FAILURE) {
838 				RETURN_FALSE;
839 			}
840 		} else {
841 			if (zend_parse_parameters(ZEND_NUM_ARGS(), "zfs|s",
842 						&arg_pattern_zval,
843 						&arg_replace_fci, &arg_replace_fci_cache,
844 						&string, &string_len,
845 						&option_str, &option_str_len) == FAILURE) {
846 				RETURN_FALSE;
847 			}
848 		}
849 
850 		if (option_str != NULL) {
851 			_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
852 		} else {
853 			options |= MBREX(regex_default_options);
854 			syntax = MBREX(regex_default_syntax);
855 		}
856 	}
857 	if (Z_TYPE_P(arg_pattern_zval) == IS_STRING) {
858 		arg_pattern = Z_STRVAL_P(arg_pattern_zval);
859 		arg_pattern_len = Z_STRLEN_P(arg_pattern_zval);
860 	} else {
861 		/* FIXME: this code is not multibyte aware! */
862 		convert_to_long_ex(arg_pattern_zval);
863 		pat_buf[0] = (char)Z_LVAL_P(arg_pattern_zval);
864 		pat_buf[1] = '\0';
865 		pat_buf[2] = '\0';
866 		pat_buf[3] = '\0';
867 		pat_buf[4] = '\0';
868 		pat_buf[5] = '\0';
869 
870 		arg_pattern = pat_buf;
871 		arg_pattern_len = 1;
872 	}
873 	/* create regex pattern buffer */
874 	re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax);
875 	if (re == NULL) {
876 		RETURN_FALSE;
877 	}
878 
879 	if (eval || is_callable) {
880 		pbuf = &eval_buf;
881 		description = zend_make_compiled_string_description("mbregex replace");
882 	} else {
883 		pbuf = &out_buf;
884 		description = NULL;
885 	}
886 
887 	if (is_callable) {
888 		if (eval) {
889 			php_error_docref(NULL, E_WARNING, "Option 'e' cannot be used with replacement callback");
890 			RETURN_FALSE;
891 		}
892 	}
893 
894 	/* do the actual work */
895 	err = 0;
896 	pos = (OnigUChar *)string;
897 	string_lim = (OnigUChar*)(string + string_len);
898 	regs = onig_region_new();
899 	while (err >= 0) {
900 		err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
901 		if (err <= -2) {
902 			OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
903 			onig_error_code_to_str(err_str, err);
904 			php_error_docref(NULL, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
905 			break;
906 		}
907 		if (err >= 0) {
908 #if moriyoshi_0
909 			if (regs->beg[0] == regs->end[0]) {
910 				php_error_docref(NULL, E_WARNING, "Empty regular expression");
911 				break;
912 			}
913 #endif
914 			/* copy the part of the string before the match */
915 			smart_str_appendl(&out_buf, (char *)pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
916 
917 			if (!is_callable) {
918 				/* copy replacement and backrefs */
919 				i = 0;
920 				p = replace;
921 				while (i < replace_len) {
922 					int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
923 					n = -1;
924 					if ((replace_len - i) >= 2 && fwd == 1 &&
925 					p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
926 						n = p[1] - '0';
927 					}
928 					if (n >= 0 && n < regs->num_regs) {
929 						if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && regs->end[n] <= string_len) {
930 							smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
931 						}
932 						p += 2;
933 						i += 2;
934 					} else {
935 						smart_str_appendl(pbuf, p, fwd);
936 						p += fwd;
937 						i += fwd;
938 					}
939 				}
940 			}
941 
942 			if (eval) {
943 				zval v;
944 				zend_string *eval_str;
945 				/* null terminate buffer */
946 				smart_str_0(&eval_buf);
947 
948 				if (eval_buf.s) {
949 					eval_str = eval_buf.s;
950 				} else {
951 					eval_str = ZSTR_EMPTY_ALLOC();
952 				}
953 
954 				/* do eval */
955 				if (zend_eval_stringl(ZSTR_VAL(eval_str), ZSTR_LEN(eval_str), &v, description) == FAILURE) {
956 					efree(description);
957 					php_error_docref(NULL,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, ZSTR_VAL(eval_str));
958 					/* zend_error() does not return in this case */
959 				}
960 
961 				/* result of eval */
962 				convert_to_string(&v);
963 				smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
964 				/* Clean up */
965 				smart_str_free(&eval_buf);
966 				zval_dtor(&v);
967 			} else if (is_callable) {
968 				zval args[1];
969 				zval subpats, retval;
970 				int i;
971 
972 				array_init(&subpats);
973 				for (i = 0; i < regs->num_regs; i++) {
974 					add_next_index_stringl(&subpats, string + regs->beg[i], regs->end[i] - regs->beg[i]);
975 				}
976 
977 				ZVAL_COPY_VALUE(&args[0], &subpats);
978 				/* null terminate buffer */
979 				smart_str_0(&eval_buf);
980 
981 				arg_replace_fci.param_count = 1;
982 				arg_replace_fci.params = args;
983 				arg_replace_fci.retval = &retval;
984 				if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache) == SUCCESS &&
985 						!Z_ISUNDEF(retval)) {
986 					convert_to_string_ex(&retval);
987 					smart_str_appendl(&out_buf, Z_STRVAL(retval), Z_STRLEN(retval));
988 					smart_str_free(&eval_buf);
989 					zval_ptr_dtor(&retval);
990 				} else {
991 					if (!EG(exception)) {
992 						php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
993 					}
994 				}
995 				zval_ptr_dtor(&subpats);
996 			}
997 
998 			n = regs->end[0];
999 			if ((pos - (OnigUChar *)string) < n) {
1000 				pos = (OnigUChar *)string + n;
1001 			} else {
1002 				if (pos < string_lim) {
1003 					smart_str_appendl(&out_buf, (char *)pos, 1);
1004 				}
1005 				pos++;
1006 			}
1007 		} else { /* nomatch */
1008 			/* stick that last bit of string on our output */
1009 			if (string_lim - pos > 0) {
1010 				smart_str_appendl(&out_buf, (char *)pos, string_lim - pos);
1011 			}
1012 		}
1013 		onig_region_free(regs, 0);
1014 	}
1015 
1016 	if (description) {
1017 		efree(description);
1018 	}
1019 	if (regs != NULL) {
1020 		onig_region_free(regs, 1);
1021 	}
1022 	smart_str_free(&eval_buf);
1023 
1024 	if (err <= -2) {
1025 		smart_str_free(&out_buf);
1026 		RETVAL_FALSE;
1027 	} else if (out_buf.s) {
1028 		smart_str_0(&out_buf);
1029 		RETVAL_STR(out_buf.s);
1030 	} else {
1031 		RETVAL_EMPTY_STRING();
1032 	}
1033 }
1034 /* }}} */
1035 
1036 /* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option])
1037    Replace regular expression for multibyte string */
PHP_FUNCTION(mb_ereg_replace)1038 PHP_FUNCTION(mb_ereg_replace)
1039 {
1040 	_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1041 }
1042 /* }}} */
1043 
1044 /* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string)
1045    Case insensitive replace regular expression for multibyte string */
PHP_FUNCTION(mb_eregi_replace)1046 PHP_FUNCTION(mb_eregi_replace)
1047 {
1048 	_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0);
1049 }
1050 /* }}} */
1051 
1052 /* {{{ proto string mb_ereg_replace_callback(string pattern, string callback, string string [, string option])
1053     regular expression for multibyte string using replacement callback */
PHP_FUNCTION(mb_ereg_replace_callback)1054 PHP_FUNCTION(mb_ereg_replace_callback)
1055 {
1056 	_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1057 }
1058 /* }}} */
1059 
1060 /* {{{ proto array mb_split(string pattern, string string [, int limit])
1061    split multibyte string into array by regular expression */
PHP_FUNCTION(mb_split)1062 PHP_FUNCTION(mb_split)
1063 {
1064 	char *arg_pattern;
1065 	size_t arg_pattern_len;
1066 	php_mb_regex_t *re;
1067 	OnigRegion *regs = NULL;
1068 	char *string;
1069 	OnigUChar *pos, *chunk_pos;
1070 	size_t string_len;
1071 
1072 	int n, err;
1073 	zend_long count = -1;
1074 
1075 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
1076 		RETURN_FALSE;
1077 	}
1078 
1079 	if (count > 0) {
1080 		count--;
1081 	}
1082 
1083 	/* create regex pattern buffer */
1084 	if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax))) == NULL) {
1085 		RETURN_FALSE;
1086 	}
1087 
1088 	array_init(return_value);
1089 
1090 	chunk_pos = pos = (OnigUChar *)string;
1091 	err = 0;
1092 	regs = onig_region_new();
1093 	/* churn through str, generating array entries as we go */
1094 	while (count != 0 && (pos - (OnigUChar *)string) < (ptrdiff_t)string_len) {
1095 		int beg, end;
1096 		err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0);
1097 		if (err < 0) {
1098 			break;
1099 		}
1100 		beg = regs->beg[0], end = regs->end[0];
1101 		/* add it to the array */
1102 		if ((pos - (OnigUChar *)string) < end) {
1103 			if (beg < string_len && beg >= (chunk_pos - (OnigUChar *)string)) {
1104 				add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos));
1105 				--count;
1106 			} else {
1107 				err = -2;
1108 				break;
1109 			}
1110 			/* point at our new starting point */
1111 			chunk_pos = pos = (OnigUChar *)string + end;
1112 		} else {
1113 			pos++;
1114 		}
1115 		onig_region_free(regs, 0);
1116 	}
1117 
1118 	onig_region_free(regs, 1);
1119 
1120 	/* see if we encountered an error */
1121 	if (err <= -2) {
1122 		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1123 		onig_error_code_to_str(err_str, err);
1124 		php_error_docref(NULL, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
1125 		zval_dtor(return_value);
1126 		RETURN_FALSE;
1127 	}
1128 
1129 	/* otherwise we just have one last element to add to the array */
1130 	n = ((OnigUChar *)(string + string_len) - chunk_pos);
1131 	if (n > 0) {
1132 		add_next_index_stringl(return_value, (char *)chunk_pos, n);
1133 	} else {
1134 		add_next_index_stringl(return_value, "", 0);
1135 	}
1136 }
1137 /* }}} */
1138 
1139 /* {{{ proto bool mb_ereg_match(string pattern, string string [,string option])
1140    Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg_match)1141 PHP_FUNCTION(mb_ereg_match)
1142 {
1143 	char *arg_pattern;
1144 	size_t arg_pattern_len;
1145 
1146 	char *string;
1147 	size_t string_len;
1148 
1149 	php_mb_regex_t *re;
1150 	OnigSyntaxType *syntax;
1151 	OnigOptionType option = 0;
1152 	int err;
1153 
1154 	{
1155 		char *option_str = NULL;
1156 		size_t option_str_len = 0;
1157 
1158 		if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s",
1159 		                          &arg_pattern, &arg_pattern_len, &string, &string_len,
1160 		                          &option_str, &option_str_len)==FAILURE) {
1161 			RETURN_FALSE;
1162 		}
1163 
1164 		if (option_str != NULL) {
1165 			_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
1166 		} else {
1167 			option |= MBREX(regex_default_options);
1168 			syntax = MBREX(regex_default_syntax);
1169 		}
1170 	}
1171 
1172 	if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax)) == NULL) {
1173 		RETURN_FALSE;
1174 	}
1175 
1176 	/* match */
1177 	err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0);
1178 	if (err >= 0) {
1179 		RETVAL_TRUE;
1180 	} else {
1181 		RETVAL_FALSE;
1182 	}
1183 }
1184 /* }}} */
1185 
1186 /* regex search */
1187 /* {{{ _php_mb_regex_ereg_search_exec */
1188 static void
_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS,int mode)1189 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
1190 {
1191 	char *arg_pattern = NULL, *arg_options = NULL;
1192 	size_t arg_pattern_len, arg_options_len;
1193 	int n, i, err, pos, len, beg, end;
1194 	OnigOptionType option;
1195 	OnigUChar *str;
1196 	OnigSyntaxType *syntax;
1197 
1198 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1199 		return;
1200 	}
1201 
1202 	option = MBREX(regex_default_options);
1203 
1204 	if (arg_options) {
1205 		option = 0;
1206 		_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1207 	}
1208 
1209 	if (arg_pattern) {
1210 		/* create regex pattern buffer */
1211 		if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax))) == NULL) {
1212 			RETURN_FALSE;
1213 		}
1214 	}
1215 
1216 	pos = MBREX(search_pos);
1217 	str = NULL;
1218 	len = 0;
1219 	if (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING){
1220 		str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
1221 		len = Z_STRLEN(MBREX(search_str));
1222 	}
1223 
1224 	if (MBREX(search_re) == NULL) {
1225 		php_error_docref(NULL, E_WARNING, "No regex given");
1226 		RETURN_FALSE;
1227 	}
1228 
1229 	if (str == NULL) {
1230 		php_error_docref(NULL, E_WARNING, "No string given");
1231 		RETURN_FALSE;
1232 	}
1233 
1234 	if (MBREX(search_regs)) {
1235 		onig_region_free(MBREX(search_regs), 1);
1236 	}
1237 	MBREX(search_regs) = onig_region_new();
1238 
1239 	err = onig_search(MBREX(search_re), str, str + len, str + pos, str  + len, MBREX(search_regs), 0);
1240 	if (err == ONIG_MISMATCH) {
1241 		MBREX(search_pos) = len;
1242 		RETVAL_FALSE;
1243 	} else if (err <= -2) {
1244 		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1245 		onig_error_code_to_str(err_str, err);
1246 		php_error_docref(NULL, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
1247 		RETVAL_FALSE;
1248 	} else {
1249 		switch (mode) {
1250 		case 1:
1251 			array_init(return_value);
1252 			beg = MBREX(search_regs)->beg[0];
1253 			end = MBREX(search_regs)->end[0];
1254 			add_next_index_long(return_value, beg);
1255 			add_next_index_long(return_value, end - beg);
1256 			break;
1257 		case 2:
1258 			array_init(return_value);
1259 			n = MBREX(search_regs)->num_regs;
1260 			for (i = 0; i < n; i++) {
1261 				beg = MBREX(search_regs)->beg[i];
1262 				end = MBREX(search_regs)->end[i];
1263 				if (beg >= 0 && beg <= end && end <= len) {
1264 					add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
1265 				} else {
1266 					add_index_bool(return_value, i, 0);
1267 				}
1268 			}
1269 			break;
1270 		default:
1271 			RETVAL_TRUE;
1272 			break;
1273 		}
1274 		end = MBREX(search_regs)->end[0];
1275 		if (pos <= end) {
1276 			MBREX(search_pos) = end;
1277 		} else {
1278 			MBREX(search_pos) = pos + 1;
1279 		}
1280 	}
1281 
1282 	if (err < 0) {
1283 		onig_region_free(MBREX(search_regs), 1);
1284 		MBREX(search_regs) = (OnigRegion *)NULL;
1285 	}
1286 }
1287 /* }}} */
1288 
1289 /* {{{ proto bool mb_ereg_search([string pattern[, string option]])
1290    Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search)1291 PHP_FUNCTION(mb_ereg_search)
1292 {
1293 	_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1294 }
1295 /* }}} */
1296 
1297 /* {{{ proto array mb_ereg_search_pos([string pattern[, string option]])
1298    Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_pos)1299 PHP_FUNCTION(mb_ereg_search_pos)
1300 {
1301 	_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1302 }
1303 /* }}} */
1304 
1305 /* {{{ proto array mb_ereg_search_regs([string pattern[, string option]])
1306    Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_regs)1307 PHP_FUNCTION(mb_ereg_search_regs)
1308 {
1309 	_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
1310 }
1311 /* }}} */
1312 
1313 /* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]])
1314    Initialize string and regular expression for search. */
PHP_FUNCTION(mb_ereg_search_init)1315 PHP_FUNCTION(mb_ereg_search_init)
1316 {
1317 	size_t argc = ZEND_NUM_ARGS();
1318 	zval *arg_str;
1319 	char *arg_pattern = NULL, *arg_options = NULL;
1320 	size_t arg_pattern_len = 0, arg_options_len = 0;
1321 	OnigSyntaxType *syntax = NULL;
1322 	OnigOptionType option;
1323 
1324 	if (zend_parse_parameters(argc, "z|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1325 		return;
1326 	}
1327 
1328 	if (argc > 1 && arg_pattern_len == 0) {
1329 		php_error_docref(NULL, E_WARNING, "Empty pattern");
1330 		RETURN_FALSE;
1331 	}
1332 
1333 	option = MBREX(regex_default_options);
1334 	syntax = MBREX(regex_default_syntax);
1335 
1336 	if (argc == 3) {
1337 		option = 0;
1338 		_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1339 	}
1340 
1341 	if (argc > 1) {
1342 		/* create regex pattern buffer */
1343 		if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax)) == NULL) {
1344 			RETURN_FALSE;
1345 		}
1346 	}
1347 
1348 	if (!Z_ISNULL(MBREX(search_str))) {
1349 		zval_ptr_dtor(&MBREX(search_str));
1350 	}
1351 
1352 	ZVAL_DUP(&MBREX(search_str), arg_str);
1353 
1354 	MBREX(search_pos) = 0;
1355 
1356 	if (MBREX(search_regs) != NULL) {
1357 		onig_region_free(MBREX(search_regs), 1);
1358 		MBREX(search_regs) = NULL;
1359 	}
1360 
1361 	RETURN_TRUE;
1362 }
1363 /* }}} */
1364 
1365 /* {{{ proto array mb_ereg_search_getregs(void)
1366    Get matched substring of the last time */
PHP_FUNCTION(mb_ereg_search_getregs)1367 PHP_FUNCTION(mb_ereg_search_getregs)
1368 {
1369 	int n, i, len, beg, end;
1370 	OnigUChar *str;
1371 
1372 	if (MBREX(search_regs) != NULL && Z_TYPE(MBREX(search_str)) == IS_STRING) {
1373 		array_init(return_value);
1374 
1375 		str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
1376 		len = Z_STRLEN(MBREX(search_str));
1377 		n = MBREX(search_regs)->num_regs;
1378 		for (i = 0; i < n; i++) {
1379 			beg = MBREX(search_regs)->beg[i];
1380 			end = MBREX(search_regs)->end[i];
1381 			if (beg >= 0 && beg <= end && end <= len) {
1382 				add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
1383 			} else {
1384 				add_index_bool(return_value, i, 0);
1385 			}
1386 		}
1387 	} else {
1388 		RETVAL_FALSE;
1389 	}
1390 }
1391 /* }}} */
1392 
1393 /* {{{ proto int mb_ereg_search_getpos(void)
1394    Get search start position */
PHP_FUNCTION(mb_ereg_search_getpos)1395 PHP_FUNCTION(mb_ereg_search_getpos)
1396 {
1397 	RETVAL_LONG(MBREX(search_pos));
1398 }
1399 /* }}} */
1400 
1401 /* {{{ proto bool mb_ereg_search_setpos(int position)
1402    Set search start position */
PHP_FUNCTION(mb_ereg_search_setpos)1403 PHP_FUNCTION(mb_ereg_search_setpos)
1404 {
1405 	zend_long position;
1406 
1407 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &position) == FAILURE) {
1408 		return;
1409 	}
1410 
1411 	if (position < 0 || (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING && (size_t)position > Z_STRLEN(MBREX(search_str)))) {
1412 		php_error_docref(NULL, E_WARNING, "Position is out of range");
1413 		MBREX(search_pos) = 0;
1414 		RETURN_FALSE;
1415 	}
1416 
1417 	MBREX(search_pos) = position;
1418 	RETURN_TRUE;
1419 }
1420 /* }}} */
1421 
1422 /* {{{ php_mb_regex_set_options */
_php_mb_regex_set_options(OnigOptionType options,OnigSyntaxType * syntax,OnigOptionType * prev_options,OnigSyntaxType ** prev_syntax)1423 static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax)
1424 {
1425 	if (prev_options != NULL) {
1426 		*prev_options = MBREX(regex_default_options);
1427 	}
1428 	if (prev_syntax != NULL) {
1429 		*prev_syntax = MBREX(regex_default_syntax);
1430 	}
1431 	MBREX(regex_default_options) = options;
1432 	MBREX(regex_default_syntax) = syntax;
1433 }
1434 /* }}} */
1435 
1436 /* {{{ proto string mb_regex_set_options([string options])
1437    Set or get the default options for mbregex functions */
PHP_FUNCTION(mb_regex_set_options)1438 PHP_FUNCTION(mb_regex_set_options)
1439 {
1440 	OnigOptionType opt;
1441 	OnigSyntaxType *syntax;
1442 	char *string = NULL;
1443 	size_t string_len;
1444 	char buf[16];
1445 
1446 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s",
1447 	                          &string, &string_len) == FAILURE) {
1448 		RETURN_FALSE;
1449 	}
1450 	if (string != NULL) {
1451 		opt = 0;
1452 		syntax = NULL;
1453 		_php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
1454 		_php_mb_regex_set_options(opt, syntax, NULL, NULL);
1455 	} else {
1456 		opt = MBREX(regex_default_options);
1457 		syntax = MBREX(regex_default_syntax);
1458 	}
1459 	_php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
1460 
1461 	RETVAL_STRING(buf);
1462 }
1463 /* }}} */
1464 
1465 #endif	/* HAVE_MBREGEX */
1466 
1467 /*
1468  * Local variables:
1469  * tab-width: 4
1470  * c-basic-offset: 4
1471  * End:
1472  * vim600: fdm=marker
1473  * vim: noet sw=4 ts=4
1474  */
1475