xref: /PHP-7.2/ext/mbstring/php_mbregex.c (revision 0ecac37c)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1997-2018 The PHP Group                                |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
16    +----------------------------------------------------------------------+
17  */
18 
19 /* $Id$ */
20 
21 
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25 
26 #include "php.h"
27 #include "php_ini.h"
28 
29 #if HAVE_MBREGEX
30 
31 #include "zend_smart_str.h"
32 #include "ext/standard/info.h"
33 #include "php_mbregex.h"
34 #include "mbstring.h"
35 
36 #include "php_onig_compat.h" /* must come prior to the oniguruma header */
37 #include <oniguruma.h>
38 #undef UChar
39 
40 ZEND_EXTERN_MODULE_GLOBALS(mbstring)
41 
42 struct _zend_mb_regex_globals {
43 	OnigEncoding default_mbctype;
44 	OnigEncoding current_mbctype;
45 	HashTable ht_rc;
46 	zval search_str;
47 	zval *search_str_val;
48 	unsigned int search_pos;
49 	php_mb_regex_t *search_re;
50 	OnigRegion *search_regs;
51 	OnigOptionType regex_default_options;
52 	OnigSyntaxType *regex_default_syntax;
53 };
54 
55 #define MBREX(g) (MBSTRG(mb_regex_globals)->g)
56 
57 /* {{{ static void php_mb_regex_free_cache() */
php_mb_regex_free_cache(zval * el)58 static void php_mb_regex_free_cache(zval *el) {
59 	onig_free((php_mb_regex_t *)Z_PTR_P(el));
60 }
61 /* }}} */
62 
63 /* {{{ _php_mb_regex_globals_ctor */
_php_mb_regex_globals_ctor(zend_mb_regex_globals * pglobals)64 static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals)
65 {
66 	pglobals->default_mbctype = ONIG_ENCODING_UTF8;
67 	pglobals->current_mbctype = ONIG_ENCODING_UTF8;
68 	zend_hash_init(&(pglobals->ht_rc), 0, NULL, php_mb_regex_free_cache, 1);
69 	ZVAL_UNDEF(&pglobals->search_str);
70 	pglobals->search_re = (php_mb_regex_t*)NULL;
71 	pglobals->search_pos = 0;
72 	pglobals->search_regs = (OnigRegion*)NULL;
73 	pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
74 	pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
75 	return SUCCESS;
76 }
77 /* }}} */
78 
79 /* {{{ _php_mb_regex_globals_dtor */
_php_mb_regex_globals_dtor(zend_mb_regex_globals * pglobals)80 static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals)
81 {
82 	zend_hash_destroy(&pglobals->ht_rc);
83 }
84 /* }}} */
85 
86 /* {{{ php_mb_regex_globals_alloc */
php_mb_regex_globals_alloc(void)87 zend_mb_regex_globals *php_mb_regex_globals_alloc(void)
88 {
89 	zend_mb_regex_globals *pglobals = pemalloc(
90 			sizeof(zend_mb_regex_globals), 1);
91 	if (SUCCESS != _php_mb_regex_globals_ctor(pglobals)) {
92 		pefree(pglobals, 1);
93 		return NULL;
94 	}
95 	return pglobals;
96 }
97 /* }}} */
98 
99 /* {{{ php_mb_regex_globals_free */
php_mb_regex_globals_free(zend_mb_regex_globals * pglobals)100 void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals)
101 {
102 	if (!pglobals) {
103 		return;
104 	}
105 	_php_mb_regex_globals_dtor(pglobals);
106 	pefree(pglobals, 1);
107 }
108 /* }}} */
109 
110 /* {{{ PHP_MINIT_FUNCTION(mb_regex) */
PHP_MINIT_FUNCTION(mb_regex)111 PHP_MINIT_FUNCTION(mb_regex)
112 {
113 	onig_init();
114 	return SUCCESS;
115 }
116 /* }}} */
117 
118 /* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
PHP_MSHUTDOWN_FUNCTION(mb_regex)119 PHP_MSHUTDOWN_FUNCTION(mb_regex)
120 {
121 	onig_end();
122 	return SUCCESS;
123 }
124 /* }}} */
125 
126 /* {{{ PHP_RINIT_FUNCTION(mb_regex) */
PHP_RINIT_FUNCTION(mb_regex)127 PHP_RINIT_FUNCTION(mb_regex)
128 {
129 	return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
130 }
131 /* }}} */
132 
133 /* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
PHP_RSHUTDOWN_FUNCTION(mb_regex)134 PHP_RSHUTDOWN_FUNCTION(mb_regex)
135 {
136 	MBREX(current_mbctype) = MBREX(default_mbctype);
137 
138 	if (!Z_ISUNDEF(MBREX(search_str))) {
139 		zval_ptr_dtor(&MBREX(search_str));
140 		ZVAL_UNDEF(&MBREX(search_str));
141 	}
142 	MBREX(search_pos) = 0;
143 
144 	if (MBREX(search_regs) != NULL) {
145 		onig_region_free(MBREX(search_regs), 1);
146 		MBREX(search_regs) = (OnigRegion *)NULL;
147 	}
148 	zend_hash_clean(&MBREX(ht_rc));
149 
150 	return SUCCESS;
151 }
152 /* }}} */
153 
154 /* {{{ PHP_MINFO_FUNCTION(mb_regex) */
PHP_MINFO_FUNCTION(mb_regex)155 PHP_MINFO_FUNCTION(mb_regex)
156 {
157 	char buf[32];
158 	php_info_print_table_start();
159 	php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
160 	snprintf(buf, sizeof(buf), "%d.%d.%d",
161 			ONIGURUMA_VERSION_MAJOR,
162 			ONIGURUMA_VERSION_MINOR,
163 			ONIGURUMA_VERSION_TEENY);
164 #ifdef PHP_ONIG_BUNDLED
165 #ifdef USE_COMBINATION_EXPLOSION_CHECK
166 	php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
167 #else	/* USE_COMBINATION_EXPLOSION_CHECK */
168 	php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
169 #endif	/* USE_COMBINATION_EXPLOSION_CHECK */
170 #endif /* PHP_BUNDLED_ONIG */
171 	php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
172 	php_info_print_table_end();
173 }
174 /* }}} */
175 
176 /*
177  * encoding name resolver
178  */
179 
180 /* {{{ encoding name map */
181 typedef struct _php_mb_regex_enc_name_map_t {
182 	const char *names;
183 	OnigEncoding code;
184 } php_mb_regex_enc_name_map_t;
185 
186 php_mb_regex_enc_name_map_t enc_name_map[] = {
187 #ifdef ONIG_ENCODING_EUC_JP
188 	{
189 		"EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
190 		ONIG_ENCODING_EUC_JP
191 	},
192 #endif
193 #ifdef ONIG_ENCODING_UTF8
194 	{
195 		"UTF-8\0UTF8\0",
196 		ONIG_ENCODING_UTF8
197 	},
198 #endif
199 #ifdef ONIG_ENCODING_UTF16_BE
200 	{
201 		"UTF-16\0UTF-16BE\0",
202 		ONIG_ENCODING_UTF16_BE
203 	},
204 #endif
205 #ifdef ONIG_ENCODING_UTF16_LE
206 	{
207 		"UTF-16LE\0",
208 		ONIG_ENCODING_UTF16_LE
209 	},
210 #endif
211 #ifdef ONIG_ENCODING_UTF32_BE
212 	{
213 		"UCS-4\0UTF-32\0UTF-32BE\0",
214 		ONIG_ENCODING_UTF32_BE
215 	},
216 #endif
217 #ifdef ONIG_ENCODING_UTF32_LE
218 	{
219 		"UCS-4LE\0UTF-32LE\0",
220 		ONIG_ENCODING_UTF32_LE
221 	},
222 #endif
223 #ifdef ONIG_ENCODING_SJIS
224 	{
225 		"SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
226 		ONIG_ENCODING_SJIS
227 	},
228 #endif
229 #ifdef ONIG_ENCODING_BIG5
230 	{
231 		"BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
232 		ONIG_ENCODING_BIG5
233 	},
234 #endif
235 #ifdef ONIG_ENCODING_EUC_CN
236 	{
237 		"EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
238 		ONIG_ENCODING_EUC_CN
239 	},
240 #endif
241 #ifdef ONIG_ENCODING_EUC_TW
242 	{
243 		"EUC-TW\0EUCTW\0EUC_TW\0",
244 		ONIG_ENCODING_EUC_TW
245 	},
246 #endif
247 #ifdef ONIG_ENCODING_EUC_KR
248 	{
249 		"EUC-KR\0EUCKR\0EUC_KR\0",
250 		ONIG_ENCODING_EUC_KR
251 	},
252 #endif
253 #if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
254 	{
255 		"KOI8\0KOI-8\0",
256 		ONIG_ENCODING_KOI8
257 	},
258 #endif
259 #ifdef ONIG_ENCODING_KOI8_R
260 	{
261 		"KOI8R\0KOI8-R\0KOI-8R\0",
262 		ONIG_ENCODING_KOI8_R
263 	},
264 #endif
265 #ifdef ONIG_ENCODING_ISO_8859_1
266 	{
267 		"ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
268 		ONIG_ENCODING_ISO_8859_1
269 	},
270 #endif
271 #ifdef ONIG_ENCODING_ISO_8859_2
272 	{
273 		"ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
274 		ONIG_ENCODING_ISO_8859_2
275 	},
276 #endif
277 #ifdef ONIG_ENCODING_ISO_8859_3
278 	{
279 		"ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
280 		ONIG_ENCODING_ISO_8859_3
281 	},
282 #endif
283 #ifdef ONIG_ENCODING_ISO_8859_4
284 	{
285 		"ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
286 		ONIG_ENCODING_ISO_8859_4
287 	},
288 #endif
289 #ifdef ONIG_ENCODING_ISO_8859_5
290 	{
291 		"ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
292 		ONIG_ENCODING_ISO_8859_5
293 	},
294 #endif
295 #ifdef ONIG_ENCODING_ISO_8859_6
296 	{
297 		"ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
298 		ONIG_ENCODING_ISO_8859_6
299 	},
300 #endif
301 #ifdef ONIG_ENCODING_ISO_8859_7
302 	{
303 		"ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
304 		ONIG_ENCODING_ISO_8859_7
305 	},
306 #endif
307 #ifdef ONIG_ENCODING_ISO_8859_8
308 	{
309 		"ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
310 		ONIG_ENCODING_ISO_8859_8
311 	},
312 #endif
313 #ifdef ONIG_ENCODING_ISO_8859_9
314 	{
315 		"ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
316 		ONIG_ENCODING_ISO_8859_9
317 	},
318 #endif
319 #ifdef ONIG_ENCODING_ISO_8859_10
320 	{
321 		"ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
322 		ONIG_ENCODING_ISO_8859_10
323 	},
324 #endif
325 #ifdef ONIG_ENCODING_ISO_8859_11
326 	{
327 		"ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
328 		ONIG_ENCODING_ISO_8859_11
329 	},
330 #endif
331 #ifdef ONIG_ENCODING_ISO_8859_13
332 	{
333 		"ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
334 		ONIG_ENCODING_ISO_8859_13
335 	},
336 #endif
337 #ifdef ONIG_ENCODING_ISO_8859_14
338 	{
339 		"ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
340 		ONIG_ENCODING_ISO_8859_14
341 	},
342 #endif
343 #ifdef ONIG_ENCODING_ISO_8859_15
344 	{
345 		"ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
346 		ONIG_ENCODING_ISO_8859_15
347 	},
348 #endif
349 #ifdef ONIG_ENCODING_ISO_8859_16
350 	{
351 		"ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
352 		ONIG_ENCODING_ISO_8859_16
353 	},
354 #endif
355 #ifdef ONIG_ENCODING_ASCII
356 	{
357 		"ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
358 		ONIG_ENCODING_ASCII
359 	},
360 #endif
361 	{ NULL, ONIG_ENCODING_UNDEF }
362 };
363 /* }}} */
364 
365 /* {{{ php_mb_regex_name2mbctype */
_php_mb_regex_name2mbctype(const char * pname)366 static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
367 {
368 	const char *p;
369 	php_mb_regex_enc_name_map_t *mapping;
370 
371 	if (pname == NULL || !*pname) {
372 		return ONIG_ENCODING_UNDEF;
373 	}
374 
375 	for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
376 		for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
377 			if (strcasecmp(p, pname) == 0) {
378 				return mapping->code;
379 			}
380 		}
381 	}
382 
383 	return ONIG_ENCODING_UNDEF;
384 }
385 /* }}} */
386 
387 /* {{{ php_mb_regex_mbctype2name */
_php_mb_regex_mbctype2name(OnigEncoding mbctype)388 static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
389 {
390 	php_mb_regex_enc_name_map_t *mapping;
391 
392 	for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
393 		if (mapping->code == mbctype) {
394 			return mapping->names;
395 		}
396 	}
397 
398 	return NULL;
399 }
400 /* }}} */
401 
402 /* {{{ php_mb_regex_set_mbctype */
php_mb_regex_set_mbctype(const char * encname)403 int php_mb_regex_set_mbctype(const char *encname)
404 {
405 	OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
406 	if (mbctype == ONIG_ENCODING_UNDEF) {
407 		return FAILURE;
408 	}
409 	MBREX(current_mbctype) = mbctype;
410 	return SUCCESS;
411 }
412 /* }}} */
413 
414 /* {{{ php_mb_regex_set_default_mbctype */
php_mb_regex_set_default_mbctype(const char * encname)415 int php_mb_regex_set_default_mbctype(const char *encname)
416 {
417 	OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
418 	if (mbctype == ONIG_ENCODING_UNDEF) {
419 		return FAILURE;
420 	}
421 	MBREX(default_mbctype) = mbctype;
422 	return SUCCESS;
423 }
424 /* }}} */
425 
426 /* {{{ php_mb_regex_get_mbctype */
php_mb_regex_get_mbctype(void)427 const char *php_mb_regex_get_mbctype(void)
428 {
429 	return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
430 }
431 /* }}} */
432 
433 /* {{{ php_mb_regex_get_default_mbctype */
php_mb_regex_get_default_mbctype(void)434 const char *php_mb_regex_get_default_mbctype(void)
435 {
436 	return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
437 }
438 /* }}} */
439 
440 /*
441  * regex cache
442  */
443 /* {{{ php_mbregex_compile_pattern */
php_mbregex_compile_pattern(const char * pattern,int patlen,OnigOptionType options,OnigEncoding enc,OnigSyntaxType * syntax)444 static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax)
445 {
446 	int err_code = 0;
447 	php_mb_regex_t *retval = NULL, *rc = NULL;
448 	OnigErrorInfo err_info;
449 	OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
450 
451 	if (!php_mb_check_encoding(pattern, patlen, _php_mb_regex_mbctype2name(enc))) {
452 		php_error_docref(NULL, E_WARNING,
453 			"Pattern is not valid under %s encoding", _php_mb_regex_mbctype2name(enc));
454 		return NULL;
455 	}
456 
457 	rc = zend_hash_str_find_ptr(&MBREX(ht_rc), (char *)pattern, patlen);
458 	if (!rc || onig_get_options(rc) != options || onig_get_encoding(rc) != enc || onig_get_syntax(rc) != syntax) {
459 		if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
460 			onig_error_code_to_str(err_str, err_code, &err_info);
461 			php_error_docref(NULL, E_WARNING, "mbregex compile err: %s", err_str);
462 			return NULL;
463 		}
464 		if (rc == MBREX(search_re)) {
465 			/* reuse the new rc? see bug #72399 */
466 			MBREX(search_re) = NULL;
467 		}
468 		zend_hash_str_update_ptr(&MBREX(ht_rc), (char *)pattern, patlen, retval);
469 	} else {
470 		retval = rc;
471 	}
472 	return retval;
473 }
474 /* }}} */
475 
476 /* {{{ _php_mb_regex_get_option_string */
_php_mb_regex_get_option_string(char * str,size_t len,OnigOptionType option,OnigSyntaxType * syntax)477 static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
478 {
479 	size_t len_left = len;
480 	size_t len_req = 0;
481 	char *p = str;
482 	char c;
483 
484 	if ((option & ONIG_OPTION_IGNORECASE) != 0) {
485 		if (len_left > 0) {
486 			--len_left;
487 			*(p++) = 'i';
488 		}
489 		++len_req;
490 	}
491 
492 	if ((option & ONIG_OPTION_EXTEND) != 0) {
493 		if (len_left > 0) {
494 			--len_left;
495 			*(p++) = 'x';
496 		}
497 		++len_req;
498 	}
499 
500 	if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
501 			(ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
502 		if (len_left > 0) {
503 			--len_left;
504 			*(p++) = 'p';
505 		}
506 		++len_req;
507 	} else {
508 		if ((option & ONIG_OPTION_MULTILINE) != 0) {
509 			if (len_left > 0) {
510 				--len_left;
511 				*(p++) = 'm';
512 			}
513 			++len_req;
514 		}
515 
516 		if ((option & ONIG_OPTION_SINGLELINE) != 0) {
517 			if (len_left > 0) {
518 				--len_left;
519 				*(p++) = 's';
520 			}
521 			++len_req;
522 		}
523 	}
524 	if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
525 		if (len_left > 0) {
526 			--len_left;
527 			*(p++) = 'l';
528 		}
529 		++len_req;
530 	}
531 	if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
532 		if (len_left > 0) {
533 			--len_left;
534 			*(p++) = 'n';
535 		}
536 		++len_req;
537 	}
538 
539 	c = 0;
540 
541 	if (syntax == ONIG_SYNTAX_JAVA) {
542 		c = 'j';
543 	} else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
544 		c = 'u';
545 	} else if (syntax == ONIG_SYNTAX_GREP) {
546 		c = 'g';
547 	} else if (syntax == ONIG_SYNTAX_EMACS) {
548 		c = 'c';
549 	} else if (syntax == ONIG_SYNTAX_RUBY) {
550 		c = 'r';
551 	} else if (syntax == ONIG_SYNTAX_PERL) {
552 		c = 'z';
553 	} else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
554 		c = 'b';
555 	} else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
556 		c = 'd';
557 	}
558 
559 	if (c != 0) {
560 		if (len_left > 0) {
561 			--len_left;
562 			*(p++) = c;
563 		}
564 		++len_req;
565 	}
566 
567 
568 	if (len_left > 0) {
569 		--len_left;
570 		*(p++) = '\0';
571 	}
572 	++len_req;
573 	if (len < len_req) {
574 		return len_req;
575 	}
576 
577 	return 0;
578 }
579 /* }}} */
580 
581 /* {{{ _php_mb_regex_init_options */
582 static void
_php_mb_regex_init_options(const char * parg,int narg,OnigOptionType * option,OnigSyntaxType ** syntax,int * eval)583 _php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
584 {
585 	int n;
586 	char c;
587 	int optm = 0;
588 
589 	*syntax = ONIG_SYNTAX_RUBY;
590 
591 	if (parg != NULL) {
592 		n = 0;
593 		while(n < narg) {
594 			c = parg[n++];
595 			switch (c) {
596 				case 'i':
597 					optm |= ONIG_OPTION_IGNORECASE;
598 					break;
599 				case 'x':
600 					optm |= ONIG_OPTION_EXTEND;
601 					break;
602 				case 'm':
603 					optm |= ONIG_OPTION_MULTILINE;
604 					break;
605 				case 's':
606 					optm |= ONIG_OPTION_SINGLELINE;
607 					break;
608 				case 'p':
609 					optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
610 					break;
611 				case 'l':
612 					optm |= ONIG_OPTION_FIND_LONGEST;
613 					break;
614 				case 'n':
615 					optm |= ONIG_OPTION_FIND_NOT_EMPTY;
616 					break;
617 				case 'j':
618 					*syntax = ONIG_SYNTAX_JAVA;
619 					break;
620 				case 'u':
621 					*syntax = ONIG_SYNTAX_GNU_REGEX;
622 					break;
623 				case 'g':
624 					*syntax = ONIG_SYNTAX_GREP;
625 					break;
626 				case 'c':
627 					*syntax = ONIG_SYNTAX_EMACS;
628 					break;
629 				case 'r':
630 					*syntax = ONIG_SYNTAX_RUBY;
631 					break;
632 				case 'z':
633 					*syntax = ONIG_SYNTAX_PERL;
634 					break;
635 				case 'b':
636 					*syntax = ONIG_SYNTAX_POSIX_BASIC;
637 					break;
638 				case 'd':
639 					*syntax = ONIG_SYNTAX_POSIX_EXTENDED;
640 					break;
641 				case 'e':
642 					if (eval != NULL) *eval = 1;
643 					break;
644 				default:
645 					break;
646 			}
647 		}
648 		if (option != NULL) *option|=optm;
649 	}
650 }
651 /* }}} */
652 
653 /*
654  * php functions
655  */
656 
657 /* {{{ proto string mb_regex_encoding([string encoding])
658    Returns the current encoding for regex as a string. */
PHP_FUNCTION(mb_regex_encoding)659 PHP_FUNCTION(mb_regex_encoding)
660 {
661 	char *encoding = NULL;
662 	size_t encoding_len;
663 	OnigEncoding mbctype;
664 
665 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &encoding, &encoding_len) == FAILURE) {
666 		return;
667 	}
668 
669 	if (!encoding) {
670 		const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
671 
672 		if (retval == NULL) {
673 			RETURN_FALSE;
674 		}
675 
676 		RETURN_STRING((char *)retval);
677 	} else {
678 		mbctype = _php_mb_regex_name2mbctype(encoding);
679 
680 		if (mbctype == ONIG_ENCODING_UNDEF) {
681 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
682 			RETURN_FALSE;
683 		}
684 
685 		MBREX(current_mbctype) = mbctype;
686 		RETURN_TRUE;
687 	}
688 }
689 /* }}} */
690 
691 /* {{{ _php_mb_regex_ereg_exec */
_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS,int icase)692 static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
693 {
694 	zval *arg_pattern, *array = NULL;
695 	char *string;
696 	size_t string_len;
697 	php_mb_regex_t *re;
698 	OnigRegion *regs = NULL;
699 	int i, match_len, beg, end;
700 	OnigOptionType options;
701 	char *str;
702 
703 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z/", &arg_pattern, &string, &string_len, &array) == FAILURE) {
704 		RETURN_FALSE;
705 	}
706 
707 	if (!php_mb_check_encoding(
708 	string,
709 	string_len,
710 	_php_mb_regex_mbctype2name(MBREX(current_mbctype))
711 	)) {
712 		if (array != NULL) {
713 			zval_dtor(array);
714 			array_init(array);
715 		}
716 		RETURN_FALSE;
717 	}
718 
719 	if (array != NULL) {
720 		zval_dtor(array);
721 		array_init(array);
722 	}
723 
724 	options = MBREX(regex_default_options);
725 	if (icase) {
726 		options |= ONIG_OPTION_IGNORECASE;
727 	}
728 
729 	/* compile the regular expression from the supplied regex */
730 	if (Z_TYPE_P(arg_pattern) != IS_STRING) {
731 		/* we convert numbers to integers and treat them as a string */
732 		if (Z_TYPE_P(arg_pattern) == IS_DOUBLE) {
733 			convert_to_long_ex(arg_pattern);	/* get rid of decimal places */
734 		}
735 		convert_to_string_ex(arg_pattern);
736 		/* don't bother doing an extended regex with just a number */
737 	}
738 
739 	if (Z_STRLEN_P(arg_pattern) == 0) {
740 		php_error_docref(NULL, E_WARNING, "empty pattern");
741 		RETVAL_FALSE;
742 		goto out;
743 	}
744 
745 	re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax));
746 	if (re == NULL) {
747 		RETVAL_FALSE;
748 		goto out;
749 	}
750 
751 	regs = onig_region_new();
752 
753 	/* actually execute the regular expression */
754 	if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
755 		RETVAL_FALSE;
756 		goto out;
757 	}
758 
759 	match_len = 1;
760 	str = string;
761 	if (array != NULL) {
762 
763 		match_len = regs->end[0] - regs->beg[0];
764 		for (i = 0; i < regs->num_regs; i++) {
765 			beg = regs->beg[i];
766 			end = regs->end[i];
767 			if (beg >= 0 && beg < end && (size_t)end <= string_len) {
768 				add_index_stringl(array, i, (char *)&str[beg], end - beg);
769 			} else {
770 				add_index_bool(array, i, 0);
771 			}
772 		}
773 	}
774 
775 	if (match_len == 0) {
776 		match_len = 1;
777 	}
778 	RETVAL_LONG(match_len);
779 out:
780 	if (regs != NULL) {
781 		onig_region_free(regs, 1);
782 	}
783 }
784 /* }}} */
785 
786 /* {{{ proto int mb_ereg(string pattern, string string [, array registers])
787    Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg)788 PHP_FUNCTION(mb_ereg)
789 {
790 	_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
791 }
792 /* }}} */
793 
794 /* {{{ proto int mb_eregi(string pattern, string string [, array registers])
795    Case-insensitive regular expression match for multibyte string */
PHP_FUNCTION(mb_eregi)796 PHP_FUNCTION(mb_eregi)
797 {
798 	_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
799 }
800 /* }}} */
801 
802 /* {{{ _php_mb_regex_ereg_replace_exec */
_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS,OnigOptionType options,int is_callable)803 static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable)
804 {
805 	zval *arg_pattern_zval;
806 
807 	char *arg_pattern;
808 	size_t arg_pattern_len;
809 
810 	char *replace;
811 	size_t replace_len;
812 
813 	zend_fcall_info arg_replace_fci;
814 	zend_fcall_info_cache arg_replace_fci_cache;
815 
816 	char *string;
817 	size_t string_len;
818 
819 	char *p;
820 	php_mb_regex_t *re;
821 	OnigSyntaxType *syntax;
822 	OnigRegion *regs = NULL;
823 	smart_str out_buf = {0};
824 	smart_str eval_buf = {0};
825 	smart_str *pbuf;
826 	size_t i;
827 	int err, eval, n;
828 	OnigUChar *pos;
829 	OnigUChar *string_lim;
830 	char *description = NULL;
831 	char pat_buf[6];
832 
833 	const mbfl_encoding *enc;
834 
835 	{
836 		const char *current_enc_name;
837 		current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
838 		if (current_enc_name == NULL ||
839 			(enc = mbfl_name2encoding(current_enc_name)) == NULL) {
840 			php_error_docref(NULL, E_WARNING, "Unknown error");
841 			RETURN_FALSE;
842 		}
843 	}
844 	eval = 0;
845 	{
846 		char *option_str = NULL;
847 		size_t option_str_len = 0;
848 
849 		if (!is_callable) {
850 			if (zend_parse_parameters(ZEND_NUM_ARGS(), "zss|s",
851 						&arg_pattern_zval,
852 						&replace, &replace_len,
853 						&string, &string_len,
854 						&option_str, &option_str_len) == FAILURE) {
855 				RETURN_FALSE;
856 			}
857 		} else {
858 			if (zend_parse_parameters(ZEND_NUM_ARGS(), "zfs|s",
859 						&arg_pattern_zval,
860 						&arg_replace_fci, &arg_replace_fci_cache,
861 						&string, &string_len,
862 						&option_str, &option_str_len) == FAILURE) {
863 				RETURN_FALSE;
864 			}
865 		}
866 
867 		if (!php_mb_check_encoding(
868 		string,
869 		string_len,
870 		_php_mb_regex_mbctype2name(MBREX(current_mbctype))
871 		)) {
872 			RETURN_NULL();
873 		}
874 
875 		if (option_str != NULL) {
876 			_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
877 		} else {
878 			options |= MBREX(regex_default_options);
879 			syntax = MBREX(regex_default_syntax);
880 		}
881 	}
882 	if (eval && !is_callable) {
883 		php_error_docref(NULL, E_DEPRECATED, "The 'e' option is deprecated, use mb_ereg_replace_callback instead");
884 	}
885 	if (Z_TYPE_P(arg_pattern_zval) == IS_STRING) {
886 		arg_pattern = Z_STRVAL_P(arg_pattern_zval);
887 		arg_pattern_len = Z_STRLEN_P(arg_pattern_zval);
888 	} else {
889 		/* FIXME: this code is not multibyte aware! */
890 		convert_to_long_ex(arg_pattern_zval);
891 		pat_buf[0] = (char)Z_LVAL_P(arg_pattern_zval);
892 		pat_buf[1] = '\0';
893 		pat_buf[2] = '\0';
894 		pat_buf[3] = '\0';
895 		pat_buf[4] = '\0';
896 		pat_buf[5] = '\0';
897 
898 		arg_pattern = pat_buf;
899 		arg_pattern_len = 1;
900 	}
901 	/* create regex pattern buffer */
902 	re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax);
903 	if (re == NULL) {
904 		RETURN_FALSE;
905 	}
906 
907 	if (eval || is_callable) {
908 		pbuf = &eval_buf;
909 		description = zend_make_compiled_string_description("mbregex replace");
910 	} else {
911 		pbuf = &out_buf;
912 		description = NULL;
913 	}
914 
915 	if (is_callable) {
916 		if (eval) {
917 			php_error_docref(NULL, E_WARNING, "Option 'e' cannot be used with replacement callback");
918 			RETURN_FALSE;
919 		}
920 	}
921 
922 	/* do the actual work */
923 	err = 0;
924 	pos = (OnigUChar *)string;
925 	string_lim = (OnigUChar*)(string + string_len);
926 	regs = onig_region_new();
927 	while (err >= 0) {
928 		err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
929 		if (err <= -2) {
930 			OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
931 			onig_error_code_to_str(err_str, err);
932 			php_error_docref(NULL, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
933 			break;
934 		}
935 		if (err >= 0) {
936 #if moriyoshi_0
937 			if (regs->beg[0] == regs->end[0]) {
938 				php_error_docref(NULL, E_WARNING, "Empty regular expression");
939 				break;
940 			}
941 #endif
942 			/* copy the part of the string before the match */
943 			smart_str_appendl(&out_buf, (char *)pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
944 
945 			if (!is_callable) {
946 				/* copy replacement and backrefs */
947 				i = 0;
948 				p = replace;
949 				while (i < replace_len) {
950 					int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
951 					n = -1;
952 					if ((replace_len - i) >= 2 && fwd == 1 &&
953 					p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
954 						n = p[1] - '0';
955 					}
956 					if (n >= 0 && n < regs->num_regs) {
957 						if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && (size_t)regs->end[n] <= string_len) {
958 							smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
959 						}
960 						p += 2;
961 						i += 2;
962 					} else {
963 						smart_str_appendl(pbuf, p, fwd);
964 						p += fwd;
965 						i += fwd;
966 					}
967 				}
968 			}
969 
970 			if (eval) {
971 				zval v;
972 				zend_string *eval_str;
973 				/* null terminate buffer */
974 				smart_str_0(&eval_buf);
975 
976 				if (eval_buf.s) {
977 					eval_str = eval_buf.s;
978 				} else {
979 					eval_str = ZSTR_EMPTY_ALLOC();
980 				}
981 
982 				/* do eval */
983 				if (zend_eval_stringl(ZSTR_VAL(eval_str), ZSTR_LEN(eval_str), &v, description) == FAILURE) {
984 					efree(description);
985 					zend_throw_error(NULL, "Failed evaluating code: %s%s", PHP_EOL, ZSTR_VAL(eval_str));
986 					onig_region_free(regs, 0);
987 					smart_str_free(&out_buf);
988 					smart_str_free(&eval_buf);
989 					RETURN_FALSE;
990 				}
991 
992 				/* result of eval */
993 				convert_to_string(&v);
994 				smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
995 				/* Clean up */
996 				smart_str_free(&eval_buf);
997 				zval_dtor(&v);
998 			} else if (is_callable) {
999 				zval args[1];
1000 				zval subpats, retval;
1001 				int i;
1002 
1003 				array_init(&subpats);
1004 				for (i = 0; i < regs->num_regs; i++) {
1005 					add_next_index_stringl(&subpats, string + regs->beg[i], regs->end[i] - regs->beg[i]);
1006 				}
1007 
1008 				ZVAL_COPY_VALUE(&args[0], &subpats);
1009 				/* null terminate buffer */
1010 				smart_str_0(&eval_buf);
1011 
1012 				arg_replace_fci.param_count = 1;
1013 				arg_replace_fci.params = args;
1014 				arg_replace_fci.retval = &retval;
1015 				if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache) == SUCCESS &&
1016 						!Z_ISUNDEF(retval)) {
1017 					convert_to_string_ex(&retval);
1018 					smart_str_appendl(&out_buf, Z_STRVAL(retval), Z_STRLEN(retval));
1019 					smart_str_free(&eval_buf);
1020 					zval_ptr_dtor(&retval);
1021 				} else {
1022 					if (!EG(exception)) {
1023 						php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1024 					}
1025 				}
1026 				zval_ptr_dtor(&subpats);
1027 			}
1028 
1029 			n = regs->end[0];
1030 			if ((pos - (OnigUChar *)string) < n) {
1031 				pos = (OnigUChar *)string + n;
1032 			} else {
1033 				if (pos < string_lim) {
1034 					smart_str_appendl(&out_buf, (char *)pos, 1);
1035 				}
1036 				pos++;
1037 			}
1038 		} else { /* nomatch */
1039 			/* stick that last bit of string on our output */
1040 			if (string_lim - pos > 0) {
1041 				smart_str_appendl(&out_buf, (char *)pos, string_lim - pos);
1042 			}
1043 		}
1044 		onig_region_free(regs, 0);
1045 	}
1046 
1047 	if (description) {
1048 		efree(description);
1049 	}
1050 	if (regs != NULL) {
1051 		onig_region_free(regs, 1);
1052 	}
1053 	smart_str_free(&eval_buf);
1054 
1055 	if (err <= -2) {
1056 		smart_str_free(&out_buf);
1057 		RETVAL_FALSE;
1058 	} else if (out_buf.s) {
1059 		smart_str_0(&out_buf);
1060 		RETVAL_STR(out_buf.s);
1061 	} else {
1062 		RETVAL_EMPTY_STRING();
1063 	}
1064 }
1065 /* }}} */
1066 
1067 /* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option])
1068    Replace regular expression for multibyte string */
PHP_FUNCTION(mb_ereg_replace)1069 PHP_FUNCTION(mb_ereg_replace)
1070 {
1071 	_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1072 }
1073 /* }}} */
1074 
1075 /* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string)
1076    Case insensitive replace regular expression for multibyte string */
PHP_FUNCTION(mb_eregi_replace)1077 PHP_FUNCTION(mb_eregi_replace)
1078 {
1079 	_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0);
1080 }
1081 /* }}} */
1082 
1083 /* {{{ proto string mb_ereg_replace_callback(string pattern, string callback, string string [, string option])
1084     regular expression for multibyte string using replacement callback */
PHP_FUNCTION(mb_ereg_replace_callback)1085 PHP_FUNCTION(mb_ereg_replace_callback)
1086 {
1087 	_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1088 }
1089 /* }}} */
1090 
1091 /* {{{ proto array mb_split(string pattern, string string [, int limit])
1092    split multibyte string into array by regular expression */
PHP_FUNCTION(mb_split)1093 PHP_FUNCTION(mb_split)
1094 {
1095 	char *arg_pattern;
1096 	size_t arg_pattern_len;
1097 	php_mb_regex_t *re;
1098 	OnigRegion *regs = NULL;
1099 	char *string;
1100 	OnigUChar *pos, *chunk_pos;
1101 	size_t string_len;
1102 
1103 	int n, err;
1104 	zend_long count = -1;
1105 
1106 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
1107 		RETURN_FALSE;
1108 	}
1109 
1110 	if (count > 0) {
1111 		count--;
1112 	}
1113 
1114 	if (!php_mb_check_encoding(string, string_len,
1115 			_php_mb_regex_mbctype2name(MBREX(current_mbctype)))) {
1116 		RETURN_FALSE;
1117 	}
1118 
1119 	/* create regex pattern buffer */
1120 	if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax))) == NULL) {
1121 		RETURN_FALSE;
1122 	}
1123 
1124 	array_init(return_value);
1125 
1126 	chunk_pos = pos = (OnigUChar *)string;
1127 	err = 0;
1128 	regs = onig_region_new();
1129 	/* churn through str, generating array entries as we go */
1130 	while (count != 0 && (pos - (OnigUChar *)string) < (ptrdiff_t)string_len) {
1131 		int beg, end;
1132 		err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0);
1133 		if (err < 0) {
1134 			break;
1135 		}
1136 		beg = regs->beg[0], end = regs->end[0];
1137 		/* add it to the array */
1138 		if ((pos - (OnigUChar *)string) < end) {
1139 			if ((size_t)beg < string_len && beg >= (chunk_pos - (OnigUChar *)string)) {
1140 				add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos));
1141 				--count;
1142 			} else {
1143 				err = -2;
1144 				break;
1145 			}
1146 			/* point at our new starting point */
1147 			chunk_pos = pos = (OnigUChar *)string + end;
1148 		} else {
1149 			pos++;
1150 		}
1151 		onig_region_free(regs, 0);
1152 	}
1153 
1154 	onig_region_free(regs, 1);
1155 
1156 	/* see if we encountered an error */
1157 	if (err <= -2) {
1158 		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1159 		onig_error_code_to_str(err_str, err);
1160 		php_error_docref(NULL, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
1161 		zval_dtor(return_value);
1162 		RETURN_FALSE;
1163 	}
1164 
1165 	/* otherwise we just have one last element to add to the array */
1166 	n = ((OnigUChar *)(string + string_len) - chunk_pos);
1167 	if (n > 0) {
1168 		add_next_index_stringl(return_value, (char *)chunk_pos, n);
1169 	} else {
1170 		add_next_index_stringl(return_value, "", 0);
1171 	}
1172 }
1173 /* }}} */
1174 
1175 /* {{{ proto bool mb_ereg_match(string pattern, string string [,string option])
1176    Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg_match)1177 PHP_FUNCTION(mb_ereg_match)
1178 {
1179 	char *arg_pattern;
1180 	size_t arg_pattern_len;
1181 
1182 	char *string;
1183 	size_t string_len;
1184 
1185 	php_mb_regex_t *re;
1186 	OnigSyntaxType *syntax;
1187 	OnigOptionType option = 0;
1188 	int err;
1189 
1190 	{
1191 		char *option_str = NULL;
1192 		size_t option_str_len = 0;
1193 
1194 		if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s",
1195 		                          &arg_pattern, &arg_pattern_len, &string, &string_len,
1196 		                          &option_str, &option_str_len)==FAILURE) {
1197 			RETURN_FALSE;
1198 		}
1199 
1200 		if (option_str != NULL) {
1201 			_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
1202 		} else {
1203 			option |= MBREX(regex_default_options);
1204 			syntax = MBREX(regex_default_syntax);
1205 		}
1206 	}
1207 
1208 	if (!php_mb_check_encoding(string, string_len,
1209 			_php_mb_regex_mbctype2name(MBREX(current_mbctype)))) {
1210 		RETURN_FALSE;
1211 	}
1212 
1213 	if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax)) == NULL) {
1214 		RETURN_FALSE;
1215 	}
1216 
1217 	/* match */
1218 	err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0);
1219 	if (err >= 0) {
1220 		RETVAL_TRUE;
1221 	} else {
1222 		RETVAL_FALSE;
1223 	}
1224 }
1225 /* }}} */
1226 
1227 /* regex search */
1228 /* {{{ _php_mb_regex_ereg_search_exec */
1229 static void
_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS,int mode)1230 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
1231 {
1232 	char *arg_pattern = NULL, *arg_options = NULL;
1233 	size_t arg_pattern_len, arg_options_len;
1234 	int n, i, err, pos, len, beg, end;
1235 	OnigOptionType option;
1236 	OnigUChar *str;
1237 	OnigSyntaxType *syntax;
1238 
1239 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1240 		return;
1241 	}
1242 
1243 	option = MBREX(regex_default_options);
1244 
1245 	if (arg_options) {
1246 		option = 0;
1247 		_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1248 	}
1249 
1250 	if (arg_pattern) {
1251 		/* create regex pattern buffer */
1252 		if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax))) == NULL) {
1253 			RETURN_FALSE;
1254 		}
1255 	}
1256 
1257 	pos = MBREX(search_pos);
1258 	str = NULL;
1259 	len = 0;
1260 	if (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING){
1261 		str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
1262 		len = Z_STRLEN(MBREX(search_str));
1263 	}
1264 
1265 	if (MBREX(search_re) == NULL) {
1266 		php_error_docref(NULL, E_WARNING, "No regex given");
1267 		RETURN_FALSE;
1268 	}
1269 
1270 	if (str == NULL) {
1271 		php_error_docref(NULL, E_WARNING, "No string given");
1272 		RETURN_FALSE;
1273 	}
1274 
1275 	if (MBREX(search_regs)) {
1276 		onig_region_free(MBREX(search_regs), 1);
1277 	}
1278 	MBREX(search_regs) = onig_region_new();
1279 
1280 	err = onig_search(MBREX(search_re), str, str + len, str + pos, str  + len, MBREX(search_regs), 0);
1281 	if (err == ONIG_MISMATCH) {
1282 		MBREX(search_pos) = len;
1283 		RETVAL_FALSE;
1284 	} else if (err <= -2) {
1285 		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1286 		onig_error_code_to_str(err_str, err);
1287 		php_error_docref(NULL, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
1288 		RETVAL_FALSE;
1289 	} else {
1290 		switch (mode) {
1291 		case 1:
1292 			array_init(return_value);
1293 			beg = MBREX(search_regs)->beg[0];
1294 			end = MBREX(search_regs)->end[0];
1295 			add_next_index_long(return_value, beg);
1296 			add_next_index_long(return_value, end - beg);
1297 			break;
1298 		case 2:
1299 			array_init(return_value);
1300 			n = MBREX(search_regs)->num_regs;
1301 			for (i = 0; i < n; i++) {
1302 				beg = MBREX(search_regs)->beg[i];
1303 				end = MBREX(search_regs)->end[i];
1304 				if (beg >= 0 && beg <= end && end <= len) {
1305 					add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
1306 				} else {
1307 					add_index_bool(return_value, i, 0);
1308 				}
1309 			}
1310 			break;
1311 		default:
1312 			RETVAL_TRUE;
1313 			break;
1314 		}
1315 		end = MBREX(search_regs)->end[0];
1316 		if (pos <= end) {
1317 			MBREX(search_pos) = end;
1318 		} else {
1319 			MBREX(search_pos) = pos + 1;
1320 		}
1321 	}
1322 
1323 	if (err < 0) {
1324 		onig_region_free(MBREX(search_regs), 1);
1325 		MBREX(search_regs) = (OnigRegion *)NULL;
1326 	}
1327 }
1328 /* }}} */
1329 
1330 /* {{{ proto bool mb_ereg_search([string pattern[, string option]])
1331    Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search)1332 PHP_FUNCTION(mb_ereg_search)
1333 {
1334 	_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1335 }
1336 /* }}} */
1337 
1338 /* {{{ proto array mb_ereg_search_pos([string pattern[, string option]])
1339    Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_pos)1340 PHP_FUNCTION(mb_ereg_search_pos)
1341 {
1342 	_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1343 }
1344 /* }}} */
1345 
1346 /* {{{ proto array mb_ereg_search_regs([string pattern[, string option]])
1347    Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_regs)1348 PHP_FUNCTION(mb_ereg_search_regs)
1349 {
1350 	_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
1351 }
1352 /* }}} */
1353 
1354 /* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]])
1355    Initialize string and regular expression for search. */
PHP_FUNCTION(mb_ereg_search_init)1356 PHP_FUNCTION(mb_ereg_search_init)
1357 {
1358 	size_t argc = ZEND_NUM_ARGS();
1359 	zend_string *arg_str;
1360 	char *arg_pattern = NULL, *arg_options = NULL;
1361 	size_t arg_pattern_len = 0, arg_options_len = 0;
1362 	OnigSyntaxType *syntax = NULL;
1363 	OnigOptionType option;
1364 
1365 	if (zend_parse_parameters(argc, "S|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1366 		return;
1367 	}
1368 
1369 	if (argc > 1 && arg_pattern_len == 0) {
1370 		php_error_docref(NULL, E_WARNING, "Empty pattern");
1371 		RETURN_FALSE;
1372 	}
1373 
1374 	option = MBREX(regex_default_options);
1375 	syntax = MBREX(regex_default_syntax);
1376 
1377 	if (argc == 3) {
1378 		option = 0;
1379 		_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1380 	}
1381 
1382 	if (argc > 1) {
1383 		/* create regex pattern buffer */
1384 		if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax)) == NULL) {
1385 			RETURN_FALSE;
1386 		}
1387 	}
1388 
1389 	if (!Z_ISNULL(MBREX(search_str))) {
1390 		zval_ptr_dtor(&MBREX(search_str));
1391 	}
1392 
1393 	ZVAL_STR_COPY(&MBREX(search_str), arg_str);
1394 
1395 	if (php_mb_check_encoding(
1396 	ZSTR_VAL(arg_str),
1397 	ZSTR_LEN(arg_str),
1398 	_php_mb_regex_mbctype2name(MBREX(current_mbctype))
1399 	)) {
1400 		MBREX(search_pos) = 0;
1401 		RETVAL_TRUE;
1402 	} else {
1403 		MBREX(search_pos) = ZSTR_LEN(arg_str);
1404 		RETVAL_FALSE;
1405 	}
1406 
1407 	if (MBREX(search_regs) != NULL) {
1408 		onig_region_free(MBREX(search_regs), 1);
1409 		MBREX(search_regs) = NULL;
1410 	}
1411 }
1412 /* }}} */
1413 
1414 /* {{{ proto array mb_ereg_search_getregs(void)
1415    Get matched substring of the last time */
PHP_FUNCTION(mb_ereg_search_getregs)1416 PHP_FUNCTION(mb_ereg_search_getregs)
1417 {
1418 	int n, i, len, beg, end;
1419 	OnigUChar *str;
1420 
1421 	if (MBREX(search_regs) != NULL && Z_TYPE(MBREX(search_str)) == IS_STRING) {
1422 		array_init(return_value);
1423 
1424 		str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
1425 		len = Z_STRLEN(MBREX(search_str));
1426 		n = MBREX(search_regs)->num_regs;
1427 		for (i = 0; i < n; i++) {
1428 			beg = MBREX(search_regs)->beg[i];
1429 			end = MBREX(search_regs)->end[i];
1430 			if (beg >= 0 && beg <= end && end <= len) {
1431 				add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
1432 			} else {
1433 				add_index_bool(return_value, i, 0);
1434 			}
1435 		}
1436 	} else {
1437 		RETVAL_FALSE;
1438 	}
1439 }
1440 /* }}} */
1441 
1442 /* {{{ proto int mb_ereg_search_getpos(void)
1443    Get search start position */
PHP_FUNCTION(mb_ereg_search_getpos)1444 PHP_FUNCTION(mb_ereg_search_getpos)
1445 {
1446 	RETVAL_LONG(MBREX(search_pos));
1447 }
1448 /* }}} */
1449 
1450 /* {{{ proto bool mb_ereg_search_setpos(int position)
1451    Set search start position */
PHP_FUNCTION(mb_ereg_search_setpos)1452 PHP_FUNCTION(mb_ereg_search_setpos)
1453 {
1454 	zend_long position;
1455 
1456 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &position) == FAILURE) {
1457 		return;
1458 	}
1459 
1460 	/* Accept negative position if length of search string can be determined */
1461 	if ((position < 0) && (!Z_ISUNDEF(MBREX(search_str))) && (Z_TYPE(MBREX(search_str)) == IS_STRING)) {
1462 		position += Z_STRLEN(MBREX(search_str));
1463 	}
1464 
1465 	if (position < 0 || (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING && (size_t)position > Z_STRLEN(MBREX(search_str)))) {
1466 		php_error_docref(NULL, E_WARNING, "Position is out of range");
1467 		MBREX(search_pos) = 0;
1468 		RETURN_FALSE;
1469 	}
1470 
1471 	MBREX(search_pos) = position;
1472 	RETURN_TRUE;
1473 }
1474 /* }}} */
1475 
1476 /* {{{ php_mb_regex_set_options */
_php_mb_regex_set_options(OnigOptionType options,OnigSyntaxType * syntax,OnigOptionType * prev_options,OnigSyntaxType ** prev_syntax)1477 static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax)
1478 {
1479 	if (prev_options != NULL) {
1480 		*prev_options = MBREX(regex_default_options);
1481 	}
1482 	if (prev_syntax != NULL) {
1483 		*prev_syntax = MBREX(regex_default_syntax);
1484 	}
1485 	MBREX(regex_default_options) = options;
1486 	MBREX(regex_default_syntax) = syntax;
1487 }
1488 /* }}} */
1489 
1490 /* {{{ proto string mb_regex_set_options([string options])
1491    Set or get the default options for mbregex functions */
PHP_FUNCTION(mb_regex_set_options)1492 PHP_FUNCTION(mb_regex_set_options)
1493 {
1494 	OnigOptionType opt;
1495 	OnigSyntaxType *syntax;
1496 	char *string = NULL;
1497 	size_t string_len;
1498 	char buf[16];
1499 
1500 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s",
1501 	                          &string, &string_len) == FAILURE) {
1502 		RETURN_FALSE;
1503 	}
1504 	if (string != NULL) {
1505 		opt = 0;
1506 		syntax = NULL;
1507 		_php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
1508 		_php_mb_regex_set_options(opt, syntax, NULL, NULL);
1509 	} else {
1510 		opt = MBREX(regex_default_options);
1511 		syntax = MBREX(regex_default_syntax);
1512 	}
1513 	_php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
1514 
1515 	RETVAL_STRING(buf);
1516 }
1517 /* }}} */
1518 
1519 #endif	/* HAVE_MBREGEX */
1520 
1521 /*
1522  * Local variables:
1523  * tab-width: 4
1524  * c-basic-offset: 4
1525  * End:
1526  * vim600: fdm=marker
1527  * vim: noet sw=4 ts=4
1528  */
1529