xref: /PHP-7.1/ext/mbstring/php_mbregex.c (revision 0ecac37c)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1997-2018 The PHP Group                                |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
16    +----------------------------------------------------------------------+
17  */
18 
19 /* $Id$ */
20 
21 
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25 
26 #include "php.h"
27 #include "php_ini.h"
28 
29 #if HAVE_MBREGEX
30 
31 #include "zend_smart_str.h"
32 #include "ext/standard/info.h"
33 #include "php_mbregex.h"
34 #include "mbstring.h"
35 
36 #include "php_onig_compat.h" /* must come prior to the oniguruma header */
37 #include <oniguruma.h>
38 #undef UChar
39 
40 ZEND_EXTERN_MODULE_GLOBALS(mbstring)
41 
42 struct _zend_mb_regex_globals {
43 	OnigEncoding default_mbctype;
44 	OnigEncoding current_mbctype;
45 	HashTable ht_rc;
46 	zval search_str;
47 	zval *search_str_val;
48 	unsigned int search_pos;
49 	php_mb_regex_t *search_re;
50 	OnigRegion *search_regs;
51 	OnigOptionType regex_default_options;
52 	OnigSyntaxType *regex_default_syntax;
53 };
54 
55 #define MBREX(g) (MBSTRG(mb_regex_globals)->g)
56 
57 /* {{{ static void php_mb_regex_free_cache() */
php_mb_regex_free_cache(zval * el)58 static void php_mb_regex_free_cache(zval *el) {
59 	onig_free((php_mb_regex_t *)Z_PTR_P(el));
60 }
61 /* }}} */
62 
63 /* {{{ _php_mb_regex_globals_ctor */
_php_mb_regex_globals_ctor(zend_mb_regex_globals * pglobals)64 static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals)
65 {
66 	pglobals->default_mbctype = ONIG_ENCODING_UTF8;
67 	pglobals->current_mbctype = ONIG_ENCODING_UTF8;
68 	zend_hash_init(&(pglobals->ht_rc), 0, NULL, php_mb_regex_free_cache, 1);
69 	ZVAL_UNDEF(&pglobals->search_str);
70 	pglobals->search_re = (php_mb_regex_t*)NULL;
71 	pglobals->search_pos = 0;
72 	pglobals->search_regs = (OnigRegion*)NULL;
73 	pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
74 	pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
75 	return SUCCESS;
76 }
77 /* }}} */
78 
79 /* {{{ _php_mb_regex_globals_dtor */
_php_mb_regex_globals_dtor(zend_mb_regex_globals * pglobals)80 static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals)
81 {
82 	zend_hash_destroy(&pglobals->ht_rc);
83 }
84 /* }}} */
85 
86 /* {{{ php_mb_regex_globals_alloc */
php_mb_regex_globals_alloc(void)87 zend_mb_regex_globals *php_mb_regex_globals_alloc(void)
88 {
89 	zend_mb_regex_globals *pglobals = pemalloc(
90 			sizeof(zend_mb_regex_globals), 1);
91 	if (!pglobals) {
92 		return NULL;
93 	}
94 	if (SUCCESS != _php_mb_regex_globals_ctor(pglobals)) {
95 		pefree(pglobals, 1);
96 		return NULL;
97 	}
98 	return pglobals;
99 }
100 /* }}} */
101 
102 /* {{{ php_mb_regex_globals_free */
php_mb_regex_globals_free(zend_mb_regex_globals * pglobals)103 void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals)
104 {
105 	if (!pglobals) {
106 		return;
107 	}
108 	_php_mb_regex_globals_dtor(pglobals);
109 	pefree(pglobals, 1);
110 }
111 /* }}} */
112 
113 /* {{{ PHP_MINIT_FUNCTION(mb_regex) */
PHP_MINIT_FUNCTION(mb_regex)114 PHP_MINIT_FUNCTION(mb_regex)
115 {
116 	onig_init();
117 	return SUCCESS;
118 }
119 /* }}} */
120 
121 /* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
PHP_MSHUTDOWN_FUNCTION(mb_regex)122 PHP_MSHUTDOWN_FUNCTION(mb_regex)
123 {
124 	onig_end();
125 	return SUCCESS;
126 }
127 /* }}} */
128 
129 /* {{{ PHP_RINIT_FUNCTION(mb_regex) */
PHP_RINIT_FUNCTION(mb_regex)130 PHP_RINIT_FUNCTION(mb_regex)
131 {
132 	return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
133 }
134 /* }}} */
135 
136 /* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
PHP_RSHUTDOWN_FUNCTION(mb_regex)137 PHP_RSHUTDOWN_FUNCTION(mb_regex)
138 {
139 	MBREX(current_mbctype) = MBREX(default_mbctype);
140 
141 	if (!Z_ISUNDEF(MBREX(search_str))) {
142 		zval_ptr_dtor(&MBREX(search_str));
143 		ZVAL_UNDEF(&MBREX(search_str));
144 	}
145 	MBREX(search_pos) = 0;
146 
147 	if (MBREX(search_regs) != NULL) {
148 		onig_region_free(MBREX(search_regs), 1);
149 		MBREX(search_regs) = (OnigRegion *)NULL;
150 	}
151 	zend_hash_clean(&MBREX(ht_rc));
152 
153 	return SUCCESS;
154 }
155 /* }}} */
156 
157 /* {{{ PHP_MINFO_FUNCTION(mb_regex) */
PHP_MINFO_FUNCTION(mb_regex)158 PHP_MINFO_FUNCTION(mb_regex)
159 {
160 	char buf[32];
161 	php_info_print_table_start();
162 	php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
163 	snprintf(buf, sizeof(buf), "%d.%d.%d",
164 			ONIGURUMA_VERSION_MAJOR,
165 			ONIGURUMA_VERSION_MINOR,
166 			ONIGURUMA_VERSION_TEENY);
167 #ifdef PHP_ONIG_BUNDLED
168 #ifdef USE_COMBINATION_EXPLOSION_CHECK
169 	php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
170 #else	/* USE_COMBINATION_EXPLOSION_CHECK */
171 	php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
172 #endif	/* USE_COMBINATION_EXPLOSION_CHECK */
173 #endif /* PHP_BUNDLED_ONIG */
174 	php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
175 	php_info_print_table_end();
176 }
177 /* }}} */
178 
179 /*
180  * encoding name resolver
181  */
182 
183 /* {{{ encoding name map */
184 typedef struct _php_mb_regex_enc_name_map_t {
185 	const char *names;
186 	OnigEncoding code;
187 } php_mb_regex_enc_name_map_t;
188 
189 php_mb_regex_enc_name_map_t enc_name_map[] = {
190 #ifdef ONIG_ENCODING_EUC_JP
191 	{
192 		"EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
193 		ONIG_ENCODING_EUC_JP
194 	},
195 #endif
196 #ifdef ONIG_ENCODING_UTF8
197 	{
198 		"UTF-8\0UTF8\0",
199 		ONIG_ENCODING_UTF8
200 	},
201 #endif
202 #ifdef ONIG_ENCODING_UTF16_BE
203 	{
204 		"UTF-16\0UTF-16BE\0",
205 		ONIG_ENCODING_UTF16_BE
206 	},
207 #endif
208 #ifdef ONIG_ENCODING_UTF16_LE
209 	{
210 		"UTF-16LE\0",
211 		ONIG_ENCODING_UTF16_LE
212 	},
213 #endif
214 #ifdef ONIG_ENCODING_UTF32_BE
215 	{
216 		"UCS-4\0UTF-32\0UTF-32BE\0",
217 		ONIG_ENCODING_UTF32_BE
218 	},
219 #endif
220 #ifdef ONIG_ENCODING_UTF32_LE
221 	{
222 		"UCS-4LE\0UTF-32LE\0",
223 		ONIG_ENCODING_UTF32_LE
224 	},
225 #endif
226 #ifdef ONIG_ENCODING_SJIS
227 	{
228 		"SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
229 		ONIG_ENCODING_SJIS
230 	},
231 #endif
232 #ifdef ONIG_ENCODING_BIG5
233 	{
234 		"BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
235 		ONIG_ENCODING_BIG5
236 	},
237 #endif
238 #ifdef ONIG_ENCODING_EUC_CN
239 	{
240 		"EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
241 		ONIG_ENCODING_EUC_CN
242 	},
243 #endif
244 #ifdef ONIG_ENCODING_EUC_TW
245 	{
246 		"EUC-TW\0EUCTW\0EUC_TW\0",
247 		ONIG_ENCODING_EUC_TW
248 	},
249 #endif
250 #ifdef ONIG_ENCODING_EUC_KR
251 	{
252 		"EUC-KR\0EUCKR\0EUC_KR\0",
253 		ONIG_ENCODING_EUC_KR
254 	},
255 #endif
256 #if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
257 	{
258 		"KOI8\0KOI-8\0",
259 		ONIG_ENCODING_KOI8
260 	},
261 #endif
262 #ifdef ONIG_ENCODING_KOI8_R
263 	{
264 		"KOI8R\0KOI8-R\0KOI-8R\0",
265 		ONIG_ENCODING_KOI8_R
266 	},
267 #endif
268 #ifdef ONIG_ENCODING_ISO_8859_1
269 	{
270 		"ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
271 		ONIG_ENCODING_ISO_8859_1
272 	},
273 #endif
274 #ifdef ONIG_ENCODING_ISO_8859_2
275 	{
276 		"ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
277 		ONIG_ENCODING_ISO_8859_2
278 	},
279 #endif
280 #ifdef ONIG_ENCODING_ISO_8859_3
281 	{
282 		"ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
283 		ONIG_ENCODING_ISO_8859_3
284 	},
285 #endif
286 #ifdef ONIG_ENCODING_ISO_8859_4
287 	{
288 		"ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
289 		ONIG_ENCODING_ISO_8859_4
290 	},
291 #endif
292 #ifdef ONIG_ENCODING_ISO_8859_5
293 	{
294 		"ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
295 		ONIG_ENCODING_ISO_8859_5
296 	},
297 #endif
298 #ifdef ONIG_ENCODING_ISO_8859_6
299 	{
300 		"ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
301 		ONIG_ENCODING_ISO_8859_6
302 	},
303 #endif
304 #ifdef ONIG_ENCODING_ISO_8859_7
305 	{
306 		"ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
307 		ONIG_ENCODING_ISO_8859_7
308 	},
309 #endif
310 #ifdef ONIG_ENCODING_ISO_8859_8
311 	{
312 		"ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
313 		ONIG_ENCODING_ISO_8859_8
314 	},
315 #endif
316 #ifdef ONIG_ENCODING_ISO_8859_9
317 	{
318 		"ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
319 		ONIG_ENCODING_ISO_8859_9
320 	},
321 #endif
322 #ifdef ONIG_ENCODING_ISO_8859_10
323 	{
324 		"ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
325 		ONIG_ENCODING_ISO_8859_10
326 	},
327 #endif
328 #ifdef ONIG_ENCODING_ISO_8859_11
329 	{
330 		"ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
331 		ONIG_ENCODING_ISO_8859_11
332 	},
333 #endif
334 #ifdef ONIG_ENCODING_ISO_8859_13
335 	{
336 		"ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
337 		ONIG_ENCODING_ISO_8859_13
338 	},
339 #endif
340 #ifdef ONIG_ENCODING_ISO_8859_14
341 	{
342 		"ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
343 		ONIG_ENCODING_ISO_8859_14
344 	},
345 #endif
346 #ifdef ONIG_ENCODING_ISO_8859_15
347 	{
348 		"ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
349 		ONIG_ENCODING_ISO_8859_15
350 	},
351 #endif
352 #ifdef ONIG_ENCODING_ISO_8859_16
353 	{
354 		"ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
355 		ONIG_ENCODING_ISO_8859_16
356 	},
357 #endif
358 #ifdef ONIG_ENCODING_ASCII
359 	{
360 		"ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
361 		ONIG_ENCODING_ASCII
362 	},
363 #endif
364 	{ NULL, ONIG_ENCODING_UNDEF }
365 };
366 /* }}} */
367 
368 /* {{{ php_mb_regex_name2mbctype */
_php_mb_regex_name2mbctype(const char * pname)369 static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
370 {
371 	const char *p;
372 	php_mb_regex_enc_name_map_t *mapping;
373 
374 	if (pname == NULL || !*pname) {
375 		return ONIG_ENCODING_UNDEF;
376 	}
377 
378 	for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
379 		for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
380 			if (strcasecmp(p, pname) == 0) {
381 				return mapping->code;
382 			}
383 		}
384 	}
385 
386 	return ONIG_ENCODING_UNDEF;
387 }
388 /* }}} */
389 
390 /* {{{ php_mb_regex_mbctype2name */
_php_mb_regex_mbctype2name(OnigEncoding mbctype)391 static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
392 {
393 	php_mb_regex_enc_name_map_t *mapping;
394 
395 	for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
396 		if (mapping->code == mbctype) {
397 			return mapping->names;
398 		}
399 	}
400 
401 	return NULL;
402 }
403 /* }}} */
404 
405 /* {{{ php_mb_regex_set_mbctype */
php_mb_regex_set_mbctype(const char * encname)406 int php_mb_regex_set_mbctype(const char *encname)
407 {
408 	OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
409 	if (mbctype == ONIG_ENCODING_UNDEF) {
410 		return FAILURE;
411 	}
412 	MBREX(current_mbctype) = mbctype;
413 	return SUCCESS;
414 }
415 /* }}} */
416 
417 /* {{{ php_mb_regex_set_default_mbctype */
php_mb_regex_set_default_mbctype(const char * encname)418 int php_mb_regex_set_default_mbctype(const char *encname)
419 {
420 	OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
421 	if (mbctype == ONIG_ENCODING_UNDEF) {
422 		return FAILURE;
423 	}
424 	MBREX(default_mbctype) = mbctype;
425 	return SUCCESS;
426 }
427 /* }}} */
428 
429 /* {{{ php_mb_regex_get_mbctype */
php_mb_regex_get_mbctype(void)430 const char *php_mb_regex_get_mbctype(void)
431 {
432 	return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
433 }
434 /* }}} */
435 
436 /* {{{ php_mb_regex_get_default_mbctype */
php_mb_regex_get_default_mbctype(void)437 const char *php_mb_regex_get_default_mbctype(void)
438 {
439 	return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
440 }
441 /* }}} */
442 
443 /*
444  * regex cache
445  */
446 /* {{{ php_mbregex_compile_pattern */
php_mbregex_compile_pattern(const char * pattern,int patlen,OnigOptionType options,OnigEncoding enc,OnigSyntaxType * syntax)447 static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax)
448 {
449 	int err_code = 0;
450 	php_mb_regex_t *retval = NULL, *rc = NULL;
451 	OnigErrorInfo err_info;
452 	OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
453 
454 	if (!php_mb_check_encoding(pattern, patlen, _php_mb_regex_mbctype2name(enc))) {
455 		php_error_docref(NULL, E_WARNING,
456 			"Pattern is not valid under %s encoding", _php_mb_regex_mbctype2name(enc));
457 		return NULL;
458 	}
459 
460 	rc = zend_hash_str_find_ptr(&MBREX(ht_rc), (char *)pattern, patlen);
461 	if (!rc || onig_get_options(rc) != options || onig_get_encoding(rc) != enc || onig_get_syntax(rc) != syntax) {
462 		if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
463 			onig_error_code_to_str(err_str, err_code, &err_info);
464 			php_error_docref(NULL, E_WARNING, "mbregex compile err: %s", err_str);
465 			return NULL;
466 		}
467 		if (rc == MBREX(search_re)) {
468 			/* reuse the new rc? see bug #72399 */
469 			MBREX(search_re) = NULL;
470 		}
471 		zend_hash_str_update_ptr(&MBREX(ht_rc), (char *)pattern, patlen, retval);
472 	} else {
473 		retval = rc;
474 	}
475 	return retval;
476 }
477 /* }}} */
478 
479 /* {{{ _php_mb_regex_get_option_string */
_php_mb_regex_get_option_string(char * str,size_t len,OnigOptionType option,OnigSyntaxType * syntax)480 static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
481 {
482 	size_t len_left = len;
483 	size_t len_req = 0;
484 	char *p = str;
485 	char c;
486 
487 	if ((option & ONIG_OPTION_IGNORECASE) != 0) {
488 		if (len_left > 0) {
489 			--len_left;
490 			*(p++) = 'i';
491 		}
492 		++len_req;
493 	}
494 
495 	if ((option & ONIG_OPTION_EXTEND) != 0) {
496 		if (len_left > 0) {
497 			--len_left;
498 			*(p++) = 'x';
499 		}
500 		++len_req;
501 	}
502 
503 	if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
504 			(ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
505 		if (len_left > 0) {
506 			--len_left;
507 			*(p++) = 'p';
508 		}
509 		++len_req;
510 	} else {
511 		if ((option & ONIG_OPTION_MULTILINE) != 0) {
512 			if (len_left > 0) {
513 				--len_left;
514 				*(p++) = 'm';
515 			}
516 			++len_req;
517 		}
518 
519 		if ((option & ONIG_OPTION_SINGLELINE) != 0) {
520 			if (len_left > 0) {
521 				--len_left;
522 				*(p++) = 's';
523 			}
524 			++len_req;
525 		}
526 	}
527 	if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
528 		if (len_left > 0) {
529 			--len_left;
530 			*(p++) = 'l';
531 		}
532 		++len_req;
533 	}
534 	if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
535 		if (len_left > 0) {
536 			--len_left;
537 			*(p++) = 'n';
538 		}
539 		++len_req;
540 	}
541 
542 	c = 0;
543 
544 	if (syntax == ONIG_SYNTAX_JAVA) {
545 		c = 'j';
546 	} else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
547 		c = 'u';
548 	} else if (syntax == ONIG_SYNTAX_GREP) {
549 		c = 'g';
550 	} else if (syntax == ONIG_SYNTAX_EMACS) {
551 		c = 'c';
552 	} else if (syntax == ONIG_SYNTAX_RUBY) {
553 		c = 'r';
554 	} else if (syntax == ONIG_SYNTAX_PERL) {
555 		c = 'z';
556 	} else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
557 		c = 'b';
558 	} else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
559 		c = 'd';
560 	}
561 
562 	if (c != 0) {
563 		if (len_left > 0) {
564 			--len_left;
565 			*(p++) = c;
566 		}
567 		++len_req;
568 	}
569 
570 
571 	if (len_left > 0) {
572 		--len_left;
573 		*(p++) = '\0';
574 	}
575 	++len_req;
576 	if (len < len_req) {
577 		return len_req;
578 	}
579 
580 	return 0;
581 }
582 /* }}} */
583 
584 /* {{{ _php_mb_regex_init_options */
585 static void
_php_mb_regex_init_options(const char * parg,int narg,OnigOptionType * option,OnigSyntaxType ** syntax,int * eval)586 _php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
587 {
588 	int n;
589 	char c;
590 	int optm = 0;
591 
592 	*syntax = ONIG_SYNTAX_RUBY;
593 
594 	if (parg != NULL) {
595 		n = 0;
596 		while(n < narg) {
597 			c = parg[n++];
598 			switch (c) {
599 				case 'i':
600 					optm |= ONIG_OPTION_IGNORECASE;
601 					break;
602 				case 'x':
603 					optm |= ONIG_OPTION_EXTEND;
604 					break;
605 				case 'm':
606 					optm |= ONIG_OPTION_MULTILINE;
607 					break;
608 				case 's':
609 					optm |= ONIG_OPTION_SINGLELINE;
610 					break;
611 				case 'p':
612 					optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
613 					break;
614 				case 'l':
615 					optm |= ONIG_OPTION_FIND_LONGEST;
616 					break;
617 				case 'n':
618 					optm |= ONIG_OPTION_FIND_NOT_EMPTY;
619 					break;
620 				case 'j':
621 					*syntax = ONIG_SYNTAX_JAVA;
622 					break;
623 				case 'u':
624 					*syntax = ONIG_SYNTAX_GNU_REGEX;
625 					break;
626 				case 'g':
627 					*syntax = ONIG_SYNTAX_GREP;
628 					break;
629 				case 'c':
630 					*syntax = ONIG_SYNTAX_EMACS;
631 					break;
632 				case 'r':
633 					*syntax = ONIG_SYNTAX_RUBY;
634 					break;
635 				case 'z':
636 					*syntax = ONIG_SYNTAX_PERL;
637 					break;
638 				case 'b':
639 					*syntax = ONIG_SYNTAX_POSIX_BASIC;
640 					break;
641 				case 'd':
642 					*syntax = ONIG_SYNTAX_POSIX_EXTENDED;
643 					break;
644 				case 'e':
645 					if (eval != NULL) *eval = 1;
646 					break;
647 				default:
648 					break;
649 			}
650 		}
651 		if (option != NULL) *option|=optm;
652 	}
653 }
654 /* }}} */
655 
656 /*
657  * php functions
658  */
659 
660 /* {{{ proto string mb_regex_encoding([string encoding])
661    Returns the current encoding for regex as a string. */
PHP_FUNCTION(mb_regex_encoding)662 PHP_FUNCTION(mb_regex_encoding)
663 {
664 	char *encoding = NULL;
665 	size_t encoding_len;
666 	OnigEncoding mbctype;
667 
668 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &encoding, &encoding_len) == FAILURE) {
669 		return;
670 	}
671 
672 	if (!encoding) {
673 		const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
674 
675 		if (retval == NULL) {
676 			RETURN_FALSE;
677 		}
678 
679 		RETURN_STRING((char *)retval);
680 	} else {
681 		mbctype = _php_mb_regex_name2mbctype(encoding);
682 
683 		if (mbctype == ONIG_ENCODING_UNDEF) {
684 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
685 			RETURN_FALSE;
686 		}
687 
688 		MBREX(current_mbctype) = mbctype;
689 		RETURN_TRUE;
690 	}
691 }
692 /* }}} */
693 
694 /* {{{ _php_mb_regex_ereg_exec */
_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS,int icase)695 static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
696 {
697 	zval *arg_pattern, *array = NULL;
698 	char *string;
699 	size_t string_len;
700 	php_mb_regex_t *re;
701 	OnigRegion *regs = NULL;
702 	int i, match_len, beg, end;
703 	OnigOptionType options;
704 	char *str;
705 
706 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z/", &arg_pattern, &string, &string_len, &array) == FAILURE) {
707 		RETURN_FALSE;
708 	}
709 
710 	if (!php_mb_check_encoding(
711 	string,
712 	string_len,
713 	_php_mb_regex_mbctype2name(MBREX(current_mbctype))
714 	)) {
715 		if (array != NULL) {
716 			zval_dtor(array);
717 			array_init(array);
718 		}
719 		RETURN_FALSE;
720 	}
721 
722 	if (array != NULL) {
723 		zval_dtor(array);
724 		array_init(array);
725 	}
726 
727 	options = MBREX(regex_default_options);
728 	if (icase) {
729 		options |= ONIG_OPTION_IGNORECASE;
730 	}
731 
732 	/* compile the regular expression from the supplied regex */
733 	if (Z_TYPE_P(arg_pattern) != IS_STRING) {
734 		/* we convert numbers to integers and treat them as a string */
735 		if (Z_TYPE_P(arg_pattern) == IS_DOUBLE) {
736 			convert_to_long_ex(arg_pattern);	/* get rid of decimal places */
737 		}
738 		convert_to_string_ex(arg_pattern);
739 		/* don't bother doing an extended regex with just a number */
740 	}
741 
742 	if (Z_STRLEN_P(arg_pattern) == 0) {
743 		php_error_docref(NULL, E_WARNING, "empty pattern");
744 		RETVAL_FALSE;
745 		goto out;
746 	}
747 
748 	re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax));
749 	if (re == NULL) {
750 		RETVAL_FALSE;
751 		goto out;
752 	}
753 
754 	regs = onig_region_new();
755 
756 	/* actually execute the regular expression */
757 	if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
758 		RETVAL_FALSE;
759 		goto out;
760 	}
761 
762 	match_len = 1;
763 	str = string;
764 	if (array != NULL) {
765 
766 		match_len = regs->end[0] - regs->beg[0];
767 		for (i = 0; i < regs->num_regs; i++) {
768 			beg = regs->beg[i];
769 			end = regs->end[i];
770 			if (beg >= 0 && beg < end && (size_t)end <= string_len) {
771 				add_index_stringl(array, i, (char *)&str[beg], end - beg);
772 			} else {
773 				add_index_bool(array, i, 0);
774 			}
775 		}
776 	}
777 
778 	if (match_len == 0) {
779 		match_len = 1;
780 	}
781 	RETVAL_LONG(match_len);
782 out:
783 	if (regs != NULL) {
784 		onig_region_free(regs, 1);
785 	}
786 }
787 /* }}} */
788 
789 /* {{{ proto int mb_ereg(string pattern, string string [, array registers])
790    Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg)791 PHP_FUNCTION(mb_ereg)
792 {
793 	_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
794 }
795 /* }}} */
796 
797 /* {{{ proto int mb_eregi(string pattern, string string [, array registers])
798    Case-insensitive regular expression match for multibyte string */
PHP_FUNCTION(mb_eregi)799 PHP_FUNCTION(mb_eregi)
800 {
801 	_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
802 }
803 /* }}} */
804 
805 /* {{{ _php_mb_regex_ereg_replace_exec */
_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS,OnigOptionType options,int is_callable)806 static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable)
807 {
808 	zval *arg_pattern_zval;
809 
810 	char *arg_pattern;
811 	size_t arg_pattern_len;
812 
813 	char *replace;
814 	size_t replace_len;
815 
816 	zend_fcall_info arg_replace_fci;
817 	zend_fcall_info_cache arg_replace_fci_cache;
818 
819 	char *string;
820 	size_t string_len;
821 
822 	char *p;
823 	php_mb_regex_t *re;
824 	OnigSyntaxType *syntax;
825 	OnigRegion *regs = NULL;
826 	smart_str out_buf = {0};
827 	smart_str eval_buf = {0};
828 	smart_str *pbuf;
829 	size_t i;
830 	int err, eval, n;
831 	OnigUChar *pos;
832 	OnigUChar *string_lim;
833 	char *description = NULL;
834 	char pat_buf[6];
835 
836 	const mbfl_encoding *enc;
837 
838 	{
839 		const char *current_enc_name;
840 		current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
841 		if (current_enc_name == NULL ||
842 			(enc = mbfl_name2encoding(current_enc_name)) == NULL) {
843 			php_error_docref(NULL, E_WARNING, "Unknown error");
844 			RETURN_FALSE;
845 		}
846 	}
847 	eval = 0;
848 	{
849 		char *option_str = NULL;
850 		size_t option_str_len = 0;
851 
852 		if (!is_callable) {
853 			if (zend_parse_parameters(ZEND_NUM_ARGS(), "zss|s",
854 						&arg_pattern_zval,
855 						&replace, &replace_len,
856 						&string, &string_len,
857 						&option_str, &option_str_len) == FAILURE) {
858 				RETURN_FALSE;
859 			}
860 		} else {
861 			if (zend_parse_parameters(ZEND_NUM_ARGS(), "zfs|s",
862 						&arg_pattern_zval,
863 						&arg_replace_fci, &arg_replace_fci_cache,
864 						&string, &string_len,
865 						&option_str, &option_str_len) == FAILURE) {
866 				RETURN_FALSE;
867 			}
868 		}
869 
870 		if (!php_mb_check_encoding(
871 		string,
872 		string_len,
873 		_php_mb_regex_mbctype2name(MBREX(current_mbctype))
874 		)) {
875 			RETURN_NULL();
876 		}
877 
878 		if (option_str != NULL) {
879 			_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
880 		} else {
881 			options |= MBREX(regex_default_options);
882 			syntax = MBREX(regex_default_syntax);
883 		}
884 	}
885 	if (eval && !is_callable) {
886 		php_error_docref(NULL, E_DEPRECATED, "The 'e' option is deprecated, use mb_ereg_replace_callback instead");
887 	}
888 	if (Z_TYPE_P(arg_pattern_zval) == IS_STRING) {
889 		arg_pattern = Z_STRVAL_P(arg_pattern_zval);
890 		arg_pattern_len = Z_STRLEN_P(arg_pattern_zval);
891 	} else {
892 		/* FIXME: this code is not multibyte aware! */
893 		convert_to_long_ex(arg_pattern_zval);
894 		pat_buf[0] = (char)Z_LVAL_P(arg_pattern_zval);
895 		pat_buf[1] = '\0';
896 		pat_buf[2] = '\0';
897 		pat_buf[3] = '\0';
898 		pat_buf[4] = '\0';
899 		pat_buf[5] = '\0';
900 
901 		arg_pattern = pat_buf;
902 		arg_pattern_len = 1;
903 	}
904 	/* create regex pattern buffer */
905 	re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax);
906 	if (re == NULL) {
907 		RETURN_FALSE;
908 	}
909 
910 	if (eval || is_callable) {
911 		pbuf = &eval_buf;
912 		description = zend_make_compiled_string_description("mbregex replace");
913 	} else {
914 		pbuf = &out_buf;
915 		description = NULL;
916 	}
917 
918 	if (is_callable) {
919 		if (eval) {
920 			php_error_docref(NULL, E_WARNING, "Option 'e' cannot be used with replacement callback");
921 			RETURN_FALSE;
922 		}
923 	}
924 
925 	/* do the actual work */
926 	err = 0;
927 	pos = (OnigUChar *)string;
928 	string_lim = (OnigUChar*)(string + string_len);
929 	regs = onig_region_new();
930 	while (err >= 0) {
931 		err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
932 		if (err <= -2) {
933 			OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
934 			onig_error_code_to_str(err_str, err);
935 			php_error_docref(NULL, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
936 			break;
937 		}
938 		if (err >= 0) {
939 #if moriyoshi_0
940 			if (regs->beg[0] == regs->end[0]) {
941 				php_error_docref(NULL, E_WARNING, "Empty regular expression");
942 				break;
943 			}
944 #endif
945 			/* copy the part of the string before the match */
946 			smart_str_appendl(&out_buf, (char *)pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
947 
948 			if (!is_callable) {
949 				/* copy replacement and backrefs */
950 				i = 0;
951 				p = replace;
952 				while (i < replace_len) {
953 					int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
954 					n = -1;
955 					if ((replace_len - i) >= 2 && fwd == 1 &&
956 					p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
957 						n = p[1] - '0';
958 					}
959 					if (n >= 0 && n < regs->num_regs) {
960 						if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && (size_t)regs->end[n] <= string_len) {
961 							smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
962 						}
963 						p += 2;
964 						i += 2;
965 					} else {
966 						smart_str_appendl(pbuf, p, fwd);
967 						p += fwd;
968 						i += fwd;
969 					}
970 				}
971 			}
972 
973 			if (eval) {
974 				zval v;
975 				zend_string *eval_str;
976 				/* null terminate buffer */
977 				smart_str_0(&eval_buf);
978 
979 				if (eval_buf.s) {
980 					eval_str = eval_buf.s;
981 				} else {
982 					eval_str = ZSTR_EMPTY_ALLOC();
983 				}
984 
985 				/* do eval */
986 				if (zend_eval_stringl(ZSTR_VAL(eval_str), ZSTR_LEN(eval_str), &v, description) == FAILURE) {
987 					efree(description);
988 					zend_throw_error(NULL, "Failed evaluating code: %s%s", PHP_EOL, ZSTR_VAL(eval_str));
989 					onig_region_free(regs, 0);
990 					smart_str_free(&out_buf);
991 					smart_str_free(&eval_buf);
992 					RETURN_FALSE;
993 				}
994 
995 				/* result of eval */
996 				convert_to_string(&v);
997 				smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
998 				/* Clean up */
999 				smart_str_free(&eval_buf);
1000 				zval_dtor(&v);
1001 			} else if (is_callable) {
1002 				zval args[1];
1003 				zval subpats, retval;
1004 				int i;
1005 
1006 				array_init(&subpats);
1007 				for (i = 0; i < regs->num_regs; i++) {
1008 					add_next_index_stringl(&subpats, string + regs->beg[i], regs->end[i] - regs->beg[i]);
1009 				}
1010 
1011 				ZVAL_COPY_VALUE(&args[0], &subpats);
1012 				/* null terminate buffer */
1013 				smart_str_0(&eval_buf);
1014 
1015 				arg_replace_fci.param_count = 1;
1016 				arg_replace_fci.params = args;
1017 				arg_replace_fci.retval = &retval;
1018 				if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache) == SUCCESS &&
1019 						!Z_ISUNDEF(retval)) {
1020 					convert_to_string_ex(&retval);
1021 					smart_str_appendl(&out_buf, Z_STRVAL(retval), Z_STRLEN(retval));
1022 					smart_str_free(&eval_buf);
1023 					zval_ptr_dtor(&retval);
1024 				} else {
1025 					if (!EG(exception)) {
1026 						php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1027 					}
1028 				}
1029 				zval_ptr_dtor(&subpats);
1030 			}
1031 
1032 			n = regs->end[0];
1033 			if ((pos - (OnigUChar *)string) < n) {
1034 				pos = (OnigUChar *)string + n;
1035 			} else {
1036 				if (pos < string_lim) {
1037 					smart_str_appendl(&out_buf, (char *)pos, 1);
1038 				}
1039 				pos++;
1040 			}
1041 		} else { /* nomatch */
1042 			/* stick that last bit of string on our output */
1043 			if (string_lim - pos > 0) {
1044 				smart_str_appendl(&out_buf, (char *)pos, string_lim - pos);
1045 			}
1046 		}
1047 		onig_region_free(regs, 0);
1048 	}
1049 
1050 	if (description) {
1051 		efree(description);
1052 	}
1053 	if (regs != NULL) {
1054 		onig_region_free(regs, 1);
1055 	}
1056 	smart_str_free(&eval_buf);
1057 
1058 	if (err <= -2) {
1059 		smart_str_free(&out_buf);
1060 		RETVAL_FALSE;
1061 	} else if (out_buf.s) {
1062 		smart_str_0(&out_buf);
1063 		RETVAL_STR(out_buf.s);
1064 	} else {
1065 		RETVAL_EMPTY_STRING();
1066 	}
1067 }
1068 /* }}} */
1069 
1070 /* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option])
1071    Replace regular expression for multibyte string */
PHP_FUNCTION(mb_ereg_replace)1072 PHP_FUNCTION(mb_ereg_replace)
1073 {
1074 	_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1075 }
1076 /* }}} */
1077 
1078 /* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string)
1079    Case insensitive replace regular expression for multibyte string */
PHP_FUNCTION(mb_eregi_replace)1080 PHP_FUNCTION(mb_eregi_replace)
1081 {
1082 	_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0);
1083 }
1084 /* }}} */
1085 
1086 /* {{{ proto string mb_ereg_replace_callback(string pattern, string callback, string string [, string option])
1087     regular expression for multibyte string using replacement callback */
PHP_FUNCTION(mb_ereg_replace_callback)1088 PHP_FUNCTION(mb_ereg_replace_callback)
1089 {
1090 	_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1091 }
1092 /* }}} */
1093 
1094 /* {{{ proto array mb_split(string pattern, string string [, int limit])
1095    split multibyte string into array by regular expression */
PHP_FUNCTION(mb_split)1096 PHP_FUNCTION(mb_split)
1097 {
1098 	char *arg_pattern;
1099 	size_t arg_pattern_len;
1100 	php_mb_regex_t *re;
1101 	OnigRegion *regs = NULL;
1102 	char *string;
1103 	OnigUChar *pos, *chunk_pos;
1104 	size_t string_len;
1105 
1106 	int n, err;
1107 	zend_long count = -1;
1108 
1109 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
1110 		RETURN_FALSE;
1111 	}
1112 
1113 	if (count > 0) {
1114 		count--;
1115 	}
1116 
1117 	if (!php_mb_check_encoding(string, string_len,
1118 			_php_mb_regex_mbctype2name(MBREX(current_mbctype)))) {
1119 		RETURN_FALSE;
1120 	}
1121 
1122 	/* create regex pattern buffer */
1123 	if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax))) == NULL) {
1124 		RETURN_FALSE;
1125 	}
1126 
1127 	array_init(return_value);
1128 
1129 	chunk_pos = pos = (OnigUChar *)string;
1130 	err = 0;
1131 	regs = onig_region_new();
1132 	/* churn through str, generating array entries as we go */
1133 	while (count != 0 && (pos - (OnigUChar *)string) < (ptrdiff_t)string_len) {
1134 		int beg, end;
1135 		err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0);
1136 		if (err < 0) {
1137 			break;
1138 		}
1139 		beg = regs->beg[0], end = regs->end[0];
1140 		/* add it to the array */
1141 		if ((pos - (OnigUChar *)string) < end) {
1142 			if ((size_t)beg < string_len && beg >= (chunk_pos - (OnigUChar *)string)) {
1143 				add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos));
1144 				--count;
1145 			} else {
1146 				err = -2;
1147 				break;
1148 			}
1149 			/* point at our new starting point */
1150 			chunk_pos = pos = (OnigUChar *)string + end;
1151 		} else {
1152 			pos++;
1153 		}
1154 		onig_region_free(regs, 0);
1155 	}
1156 
1157 	onig_region_free(regs, 1);
1158 
1159 	/* see if we encountered an error */
1160 	if (err <= -2) {
1161 		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1162 		onig_error_code_to_str(err_str, err);
1163 		php_error_docref(NULL, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
1164 		zval_dtor(return_value);
1165 		RETURN_FALSE;
1166 	}
1167 
1168 	/* otherwise we just have one last element to add to the array */
1169 	n = ((OnigUChar *)(string + string_len) - chunk_pos);
1170 	if (n > 0) {
1171 		add_next_index_stringl(return_value, (char *)chunk_pos, n);
1172 	} else {
1173 		add_next_index_stringl(return_value, "", 0);
1174 	}
1175 }
1176 /* }}} */
1177 
1178 /* {{{ proto bool mb_ereg_match(string pattern, string string [,string option])
1179    Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg_match)1180 PHP_FUNCTION(mb_ereg_match)
1181 {
1182 	char *arg_pattern;
1183 	size_t arg_pattern_len;
1184 
1185 	char *string;
1186 	size_t string_len;
1187 
1188 	php_mb_regex_t *re;
1189 	OnigSyntaxType *syntax;
1190 	OnigOptionType option = 0;
1191 	int err;
1192 
1193 	{
1194 		char *option_str = NULL;
1195 		size_t option_str_len = 0;
1196 
1197 		if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s",
1198 		                          &arg_pattern, &arg_pattern_len, &string, &string_len,
1199 		                          &option_str, &option_str_len)==FAILURE) {
1200 			RETURN_FALSE;
1201 		}
1202 
1203 		if (option_str != NULL) {
1204 			_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
1205 		} else {
1206 			option |= MBREX(regex_default_options);
1207 			syntax = MBREX(regex_default_syntax);
1208 		}
1209 	}
1210 
1211 	if (!php_mb_check_encoding(string, string_len,
1212 			_php_mb_regex_mbctype2name(MBREX(current_mbctype)))) {
1213 		RETURN_FALSE;
1214 	}
1215 
1216 	if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax)) == NULL) {
1217 		RETURN_FALSE;
1218 	}
1219 
1220 	/* match */
1221 	err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0);
1222 	if (err >= 0) {
1223 		RETVAL_TRUE;
1224 	} else {
1225 		RETVAL_FALSE;
1226 	}
1227 }
1228 /* }}} */
1229 
1230 /* regex search */
1231 /* {{{ _php_mb_regex_ereg_search_exec */
1232 static void
_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS,int mode)1233 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
1234 {
1235 	char *arg_pattern = NULL, *arg_options = NULL;
1236 	size_t arg_pattern_len, arg_options_len;
1237 	int n, i, err, pos, len, beg, end;
1238 	OnigOptionType option;
1239 	OnigUChar *str;
1240 	OnigSyntaxType *syntax;
1241 
1242 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1243 		return;
1244 	}
1245 
1246 	option = MBREX(regex_default_options);
1247 
1248 	if (arg_options) {
1249 		option = 0;
1250 		_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1251 	}
1252 
1253 	if (arg_pattern) {
1254 		/* create regex pattern buffer */
1255 		if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax))) == NULL) {
1256 			RETURN_FALSE;
1257 		}
1258 	}
1259 
1260 	pos = MBREX(search_pos);
1261 	str = NULL;
1262 	len = 0;
1263 	if (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING){
1264 		str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
1265 		len = Z_STRLEN(MBREX(search_str));
1266 	}
1267 
1268 	if (MBREX(search_re) == NULL) {
1269 		php_error_docref(NULL, E_WARNING, "No regex given");
1270 		RETURN_FALSE;
1271 	}
1272 
1273 	if (str == NULL) {
1274 		php_error_docref(NULL, E_WARNING, "No string given");
1275 		RETURN_FALSE;
1276 	}
1277 
1278 	if (MBREX(search_regs)) {
1279 		onig_region_free(MBREX(search_regs), 1);
1280 	}
1281 	MBREX(search_regs) = onig_region_new();
1282 
1283 	err = onig_search(MBREX(search_re), str, str + len, str + pos, str  + len, MBREX(search_regs), 0);
1284 	if (err == ONIG_MISMATCH) {
1285 		MBREX(search_pos) = len;
1286 		RETVAL_FALSE;
1287 	} else if (err <= -2) {
1288 		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1289 		onig_error_code_to_str(err_str, err);
1290 		php_error_docref(NULL, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
1291 		RETVAL_FALSE;
1292 	} else {
1293 		switch (mode) {
1294 		case 1:
1295 			array_init(return_value);
1296 			beg = MBREX(search_regs)->beg[0];
1297 			end = MBREX(search_regs)->end[0];
1298 			add_next_index_long(return_value, beg);
1299 			add_next_index_long(return_value, end - beg);
1300 			break;
1301 		case 2:
1302 			array_init(return_value);
1303 			n = MBREX(search_regs)->num_regs;
1304 			for (i = 0; i < n; i++) {
1305 				beg = MBREX(search_regs)->beg[i];
1306 				end = MBREX(search_regs)->end[i];
1307 				if (beg >= 0 && beg <= end && end <= len) {
1308 					add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
1309 				} else {
1310 					add_index_bool(return_value, i, 0);
1311 				}
1312 			}
1313 			break;
1314 		default:
1315 			RETVAL_TRUE;
1316 			break;
1317 		}
1318 		end = MBREX(search_regs)->end[0];
1319 		if (pos <= end) {
1320 			MBREX(search_pos) = end;
1321 		} else {
1322 			MBREX(search_pos) = pos + 1;
1323 		}
1324 	}
1325 
1326 	if (err < 0) {
1327 		onig_region_free(MBREX(search_regs), 1);
1328 		MBREX(search_regs) = (OnigRegion *)NULL;
1329 	}
1330 }
1331 /* }}} */
1332 
1333 /* {{{ proto bool mb_ereg_search([string pattern[, string option]])
1334    Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search)1335 PHP_FUNCTION(mb_ereg_search)
1336 {
1337 	_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1338 }
1339 /* }}} */
1340 
1341 /* {{{ proto array mb_ereg_search_pos([string pattern[, string option]])
1342    Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_pos)1343 PHP_FUNCTION(mb_ereg_search_pos)
1344 {
1345 	_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1346 }
1347 /* }}} */
1348 
1349 /* {{{ proto array mb_ereg_search_regs([string pattern[, string option]])
1350    Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_regs)1351 PHP_FUNCTION(mb_ereg_search_regs)
1352 {
1353 	_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
1354 }
1355 /* }}} */
1356 
1357 /* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]])
1358    Initialize string and regular expression for search. */
PHP_FUNCTION(mb_ereg_search_init)1359 PHP_FUNCTION(mb_ereg_search_init)
1360 {
1361 	size_t argc = ZEND_NUM_ARGS();
1362 	zend_string *arg_str;
1363 	char *arg_pattern = NULL, *arg_options = NULL;
1364 	size_t arg_pattern_len = 0, arg_options_len = 0;
1365 	OnigSyntaxType *syntax = NULL;
1366 	OnigOptionType option;
1367 
1368 	if (zend_parse_parameters(argc, "S|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1369 		return;
1370 	}
1371 
1372 	if (argc > 1 && arg_pattern_len == 0) {
1373 		php_error_docref(NULL, E_WARNING, "Empty pattern");
1374 		RETURN_FALSE;
1375 	}
1376 
1377 	option = MBREX(regex_default_options);
1378 	syntax = MBREX(regex_default_syntax);
1379 
1380 	if (argc == 3) {
1381 		option = 0;
1382 		_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1383 	}
1384 
1385 	if (argc > 1) {
1386 		/* create regex pattern buffer */
1387 		if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax)) == NULL) {
1388 			RETURN_FALSE;
1389 		}
1390 	}
1391 
1392 	if (!Z_ISNULL(MBREX(search_str))) {
1393 		zval_ptr_dtor(&MBREX(search_str));
1394 	}
1395 
1396 	ZVAL_STR_COPY(&MBREX(search_str), arg_str);
1397 
1398 	if (php_mb_check_encoding(
1399 	ZSTR_VAL(arg_str),
1400 	ZSTR_LEN(arg_str),
1401 	_php_mb_regex_mbctype2name(MBREX(current_mbctype))
1402 	)) {
1403 		MBREX(search_pos) = 0;
1404 		RETVAL_TRUE;
1405 	} else {
1406 		MBREX(search_pos) = ZSTR_LEN(arg_str);
1407 		RETVAL_FALSE;
1408 	}
1409 
1410 	if (MBREX(search_regs) != NULL) {
1411 		onig_region_free(MBREX(search_regs), 1);
1412 		MBREX(search_regs) = NULL;
1413 	}
1414 }
1415 /* }}} */
1416 
1417 /* {{{ proto array mb_ereg_search_getregs(void)
1418    Get matched substring of the last time */
PHP_FUNCTION(mb_ereg_search_getregs)1419 PHP_FUNCTION(mb_ereg_search_getregs)
1420 {
1421 	int n, i, len, beg, end;
1422 	OnigUChar *str;
1423 
1424 	if (MBREX(search_regs) != NULL && Z_TYPE(MBREX(search_str)) == IS_STRING) {
1425 		array_init(return_value);
1426 
1427 		str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
1428 		len = Z_STRLEN(MBREX(search_str));
1429 		n = MBREX(search_regs)->num_regs;
1430 		for (i = 0; i < n; i++) {
1431 			beg = MBREX(search_regs)->beg[i];
1432 			end = MBREX(search_regs)->end[i];
1433 			if (beg >= 0 && beg <= end && end <= len) {
1434 				add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
1435 			} else {
1436 				add_index_bool(return_value, i, 0);
1437 			}
1438 		}
1439 	} else {
1440 		RETVAL_FALSE;
1441 	}
1442 }
1443 /* }}} */
1444 
1445 /* {{{ proto int mb_ereg_search_getpos(void)
1446    Get search start position */
PHP_FUNCTION(mb_ereg_search_getpos)1447 PHP_FUNCTION(mb_ereg_search_getpos)
1448 {
1449 	RETVAL_LONG(MBREX(search_pos));
1450 }
1451 /* }}} */
1452 
1453 /* {{{ proto bool mb_ereg_search_setpos(int position)
1454    Set search start position */
PHP_FUNCTION(mb_ereg_search_setpos)1455 PHP_FUNCTION(mb_ereg_search_setpos)
1456 {
1457 	zend_long position;
1458 
1459 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &position) == FAILURE) {
1460 		return;
1461 	}
1462 
1463 	/* Accept negative position if length of search string can be determined */
1464 	if ((position < 0) && (!Z_ISUNDEF(MBREX(search_str))) && (Z_TYPE(MBREX(search_str)) == IS_STRING)) {
1465 		position += Z_STRLEN(MBREX(search_str));
1466 	}
1467 
1468 	if (position < 0 || (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING && (size_t)position > Z_STRLEN(MBREX(search_str)))) {
1469 		php_error_docref(NULL, E_WARNING, "Position is out of range");
1470 		MBREX(search_pos) = 0;
1471 		RETURN_FALSE;
1472 	}
1473 
1474 	MBREX(search_pos) = position;
1475 	RETURN_TRUE;
1476 }
1477 /* }}} */
1478 
1479 /* {{{ php_mb_regex_set_options */
_php_mb_regex_set_options(OnigOptionType options,OnigSyntaxType * syntax,OnigOptionType * prev_options,OnigSyntaxType ** prev_syntax)1480 static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax)
1481 {
1482 	if (prev_options != NULL) {
1483 		*prev_options = MBREX(regex_default_options);
1484 	}
1485 	if (prev_syntax != NULL) {
1486 		*prev_syntax = MBREX(regex_default_syntax);
1487 	}
1488 	MBREX(regex_default_options) = options;
1489 	MBREX(regex_default_syntax) = syntax;
1490 }
1491 /* }}} */
1492 
1493 /* {{{ proto string mb_regex_set_options([string options])
1494    Set or get the default options for mbregex functions */
PHP_FUNCTION(mb_regex_set_options)1495 PHP_FUNCTION(mb_regex_set_options)
1496 {
1497 	OnigOptionType opt;
1498 	OnigSyntaxType *syntax;
1499 	char *string = NULL;
1500 	size_t string_len;
1501 	char buf[16];
1502 
1503 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s",
1504 	                          &string, &string_len) == FAILURE) {
1505 		RETURN_FALSE;
1506 	}
1507 	if (string != NULL) {
1508 		opt = 0;
1509 		syntax = NULL;
1510 		_php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
1511 		_php_mb_regex_set_options(opt, syntax, NULL, NULL);
1512 	} else {
1513 		opt = MBREX(regex_default_options);
1514 		syntax = MBREX(regex_default_syntax);
1515 	}
1516 	_php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
1517 
1518 	RETVAL_STRING(buf);
1519 }
1520 /* }}} */
1521 
1522 #endif	/* HAVE_MBREGEX */
1523 
1524 /*
1525  * Local variables:
1526  * tab-width: 4
1527  * c-basic-offset: 4
1528  * End:
1529  * vim600: fdm=marker
1530  * vim: noet sw=4 ts=4
1531  */
1532