xref: /PHP-5.5/ext/mbstring/php_mbregex.c (revision 5b597a2e)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 5                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1997-2015 The PHP Group                                |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
16    +----------------------------------------------------------------------+
17  */
18 
19 /* $Id$ */
20 
21 
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25 
26 #include "php.h"
27 #include "php_ini.h"
28 
29 #if HAVE_MBREGEX
30 
31 #include "ext/standard/php_smart_str.h"
32 #include "ext/standard/info.h"
33 #include "php_mbregex.h"
34 #include "mbstring.h"
35 
36 #include "php_onig_compat.h" /* must come prior to the oniguruma header */
37 #include <oniguruma.h>
38 #undef UChar
39 
40 ZEND_EXTERN_MODULE_GLOBALS(mbstring)
41 
42 struct _zend_mb_regex_globals {
43 	OnigEncoding default_mbctype;
44 	OnigEncoding current_mbctype;
45 	HashTable ht_rc;
46 	zval *search_str;
47 	zval *search_str_val;
48 	unsigned int search_pos;
49 	php_mb_regex_t *search_re;
50 	OnigRegion *search_regs;
51 	OnigOptionType regex_default_options;
52 	OnigSyntaxType *regex_default_syntax;
53 };
54 
55 #define MBREX(g) (MBSTRG(mb_regex_globals)->g)
56 
57 /* {{{ static void php_mb_regex_free_cache() */
php_mb_regex_free_cache(php_mb_regex_t ** pre)58 static void php_mb_regex_free_cache(php_mb_regex_t **pre)
59 {
60 	onig_free(*pre);
61 }
62 /* }}} */
63 
64 /* {{{ _php_mb_regex_globals_ctor */
_php_mb_regex_globals_ctor(zend_mb_regex_globals * pglobals TSRMLS_DC)65 static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals TSRMLS_DC)
66 {
67 	pglobals->default_mbctype = ONIG_ENCODING_EUC_JP;
68 	pglobals->current_mbctype = ONIG_ENCODING_EUC_JP;
69 	zend_hash_init(&(pglobals->ht_rc), 0, NULL, (void (*)(void *)) php_mb_regex_free_cache, 1);
70 	pglobals->search_str = (zval*) NULL;
71 	pglobals->search_re = (php_mb_regex_t*)NULL;
72 	pglobals->search_pos = 0;
73 	pglobals->search_regs = (OnigRegion*)NULL;
74 	pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
75 	pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
76 	return SUCCESS;
77 }
78 /* }}} */
79 
80 /* {{{ _php_mb_regex_globals_dtor */
_php_mb_regex_globals_dtor(zend_mb_regex_globals * pglobals TSRMLS_DC)81 static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals TSRMLS_DC)
82 {
83 	zend_hash_destroy(&pglobals->ht_rc);
84 }
85 /* }}} */
86 
87 /* {{{ php_mb_regex_globals_alloc */
php_mb_regex_globals_alloc(TSRMLS_D)88 zend_mb_regex_globals *php_mb_regex_globals_alloc(TSRMLS_D)
89 {
90 	zend_mb_regex_globals *pglobals = pemalloc(
91 			sizeof(zend_mb_regex_globals), 1);
92 	if (!pglobals) {
93 		return NULL;
94 	}
95 	if (SUCCESS != _php_mb_regex_globals_ctor(pglobals TSRMLS_CC)) {
96 		pefree(pglobals, 1);
97 		return NULL;
98 	}
99 	return pglobals;
100 }
101 /* }}} */
102 
103 /* {{{ php_mb_regex_globals_free */
php_mb_regex_globals_free(zend_mb_regex_globals * pglobals TSRMLS_DC)104 void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals TSRMLS_DC)
105 {
106 	if (!pglobals) {
107 		return;
108 	}
109 	_php_mb_regex_globals_dtor(pglobals TSRMLS_CC);
110 	pefree(pglobals, 1);
111 }
112 /* }}} */
113 
114 /* {{{ PHP_MINIT_FUNCTION(mb_regex) */
PHP_MINIT_FUNCTION(mb_regex)115 PHP_MINIT_FUNCTION(mb_regex)
116 {
117 	onig_init();
118 	return SUCCESS;
119 }
120 /* }}} */
121 
122 /* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
PHP_MSHUTDOWN_FUNCTION(mb_regex)123 PHP_MSHUTDOWN_FUNCTION(mb_regex)
124 {
125 	onig_end();
126 	return SUCCESS;
127 }
128 /* }}} */
129 
130 /* {{{ PHP_RINIT_FUNCTION(mb_regex) */
PHP_RINIT_FUNCTION(mb_regex)131 PHP_RINIT_FUNCTION(mb_regex)
132 {
133 	return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
134 }
135 /* }}} */
136 
137 /* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
PHP_RSHUTDOWN_FUNCTION(mb_regex)138 PHP_RSHUTDOWN_FUNCTION(mb_regex)
139 {
140 	MBREX(current_mbctype) = MBREX(default_mbctype);
141 
142 	if (MBREX(search_str) != NULL) {
143 		zval_ptr_dtor(&MBREX(search_str));
144 		MBREX(search_str) = (zval *)NULL;
145 	}
146 	MBREX(search_pos) = 0;
147 
148 	if (MBREX(search_regs) != NULL) {
149 		onig_region_free(MBREX(search_regs), 1);
150 		MBREX(search_regs) = (OnigRegion *)NULL;
151 	}
152 	zend_hash_clean(&MBREX(ht_rc));
153 
154 	return SUCCESS;
155 }
156 /* }}} */
157 
158 /* {{{ PHP_MINFO_FUNCTION(mb_regex) */
PHP_MINFO_FUNCTION(mb_regex)159 PHP_MINFO_FUNCTION(mb_regex)
160 {
161 	char buf[32];
162 	php_info_print_table_start();
163 	php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
164 	snprintf(buf, sizeof(buf), "%d.%d.%d",
165 			ONIGURUMA_VERSION_MAJOR,
166 			ONIGURUMA_VERSION_MINOR,
167 			ONIGURUMA_VERSION_TEENY);
168 #ifdef PHP_ONIG_BUNDLED
169 #ifdef USE_COMBINATION_EXPLOSION_CHECK
170 	php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
171 #else	/* USE_COMBINATION_EXPLOSION_CHECK */
172 	php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
173 #endif	/* USE_COMBINATION_EXPLOSION_CHECK */
174 #endif /* PHP_BUNDLED_ONIG */
175 	php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
176 	php_info_print_table_end();
177 }
178 /* }}} */
179 
180 /*
181  * encoding name resolver
182  */
183 
184 /* {{{ encoding name map */
185 typedef struct _php_mb_regex_enc_name_map_t {
186 	const char *names;
187 	OnigEncoding code;
188 } php_mb_regex_enc_name_map_t;
189 
190 php_mb_regex_enc_name_map_t enc_name_map[] = {
191 #ifdef ONIG_ENCODING_EUC_JP
192 	{
193 		"EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
194 		ONIG_ENCODING_EUC_JP
195 	},
196 #endif
197 #ifdef ONIG_ENCODING_UTF8
198 	{
199 		"UTF-8\0UTF8\0",
200 		ONIG_ENCODING_UTF8
201 	},
202 #endif
203 #ifdef ONIG_ENCODING_UTF16_BE
204 	{
205 		"UTF-16\0UTF-16BE\0",
206 		ONIG_ENCODING_UTF16_BE
207 	},
208 #endif
209 #ifdef ONIG_ENCODING_UTF16_LE
210 	{
211 		"UTF-16LE\0",
212 		ONIG_ENCODING_UTF16_LE
213 	},
214 #endif
215 #ifdef ONIG_ENCODING_UTF32_BE
216 	{
217 		"UCS-4\0UTF-32\0UTF-32BE\0",
218 		ONIG_ENCODING_UTF32_BE
219 	},
220 #endif
221 #ifdef ONIG_ENCODING_UTF32_LE
222 	{
223 		"UCS-4LE\0UTF-32LE\0",
224 		ONIG_ENCODING_UTF32_LE
225 	},
226 #endif
227 #ifdef ONIG_ENCODING_SJIS
228 	{
229 		"SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
230 		ONIG_ENCODING_SJIS
231 	},
232 #endif
233 #ifdef ONIG_ENCODING_BIG5
234 	{
235 		"BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
236 		ONIG_ENCODING_BIG5
237 	},
238 #endif
239 #ifdef ONIG_ENCODING_EUC_CN
240 	{
241 		"EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
242 		ONIG_ENCODING_EUC_CN
243 	},
244 #endif
245 #ifdef ONIG_ENCODING_EUC_TW
246 	{
247 		"EUC-TW\0EUCTW\0EUC_TW\0",
248 		ONIG_ENCODING_EUC_TW
249 	},
250 #endif
251 #ifdef ONIG_ENCODING_EUC_KR
252 	{
253 		"EUC-KR\0EUCKR\0EUC_KR\0",
254 		ONIG_ENCODING_EUC_KR
255 	},
256 #endif
257 #if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
258 	{
259 		"KOI8\0KOI-8\0",
260 		ONIG_ENCODING_KOI8
261 	},
262 #endif
263 #ifdef ONIG_ENCODING_KOI8_R
264 	{
265 		"KOI8R\0KOI8-R\0KOI-8R\0",
266 		ONIG_ENCODING_KOI8_R
267 	},
268 #endif
269 #ifdef ONIG_ENCODING_ISO_8859_1
270 	{
271 		"ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
272 		ONIG_ENCODING_ISO_8859_1
273 	},
274 #endif
275 #ifdef ONIG_ENCODING_ISO_8859_2
276 	{
277 		"ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
278 		ONIG_ENCODING_ISO_8859_2
279 	},
280 #endif
281 #ifdef ONIG_ENCODING_ISO_8859_3
282 	{
283 		"ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
284 		ONIG_ENCODING_ISO_8859_3
285 	},
286 #endif
287 #ifdef ONIG_ENCODING_ISO_8859_4
288 	{
289 		"ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
290 		ONIG_ENCODING_ISO_8859_4
291 	},
292 #endif
293 #ifdef ONIG_ENCODING_ISO_8859_5
294 	{
295 		"ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
296 		ONIG_ENCODING_ISO_8859_5
297 	},
298 #endif
299 #ifdef ONIG_ENCODING_ISO_8859_6
300 	{
301 		"ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
302 		ONIG_ENCODING_ISO_8859_6
303 	},
304 #endif
305 #ifdef ONIG_ENCODING_ISO_8859_7
306 	{
307 		"ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
308 		ONIG_ENCODING_ISO_8859_7
309 	},
310 #endif
311 #ifdef ONIG_ENCODING_ISO_8859_8
312 	{
313 		"ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
314 		ONIG_ENCODING_ISO_8859_8
315 	},
316 #endif
317 #ifdef ONIG_ENCODING_ISO_8859_9
318 	{
319 		"ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
320 		ONIG_ENCODING_ISO_8859_9
321 	},
322 #endif
323 #ifdef ONIG_ENCODING_ISO_8859_10
324 	{
325 		"ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
326 		ONIG_ENCODING_ISO_8859_10
327 	},
328 #endif
329 #ifdef ONIG_ENCODING_ISO_8859_11
330 	{
331 		"ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
332 		ONIG_ENCODING_ISO_8859_11
333 	},
334 #endif
335 #ifdef ONIG_ENCODING_ISO_8859_13
336 	{
337 		"ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
338 		ONIG_ENCODING_ISO_8859_13
339 	},
340 #endif
341 #ifdef ONIG_ENCODING_ISO_8859_14
342 	{
343 		"ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
344 		ONIG_ENCODING_ISO_8859_14
345 	},
346 #endif
347 #ifdef ONIG_ENCODING_ISO_8859_15
348 	{
349 		"ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
350 		ONIG_ENCODING_ISO_8859_15
351 	},
352 #endif
353 #ifdef ONIG_ENCODING_ISO_8859_16
354 	{
355 		"ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
356 		ONIG_ENCODING_ISO_8859_16
357 	},
358 #endif
359 #ifdef ONIG_ENCODING_ASCII
360 	{
361 		"ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
362 		ONIG_ENCODING_ASCII
363 	},
364 #endif
365 	{ NULL, ONIG_ENCODING_UNDEF }
366 };
367 /* }}} */
368 
369 /* {{{ php_mb_regex_name2mbctype */
_php_mb_regex_name2mbctype(const char * pname)370 static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
371 {
372 	const char *p;
373 	php_mb_regex_enc_name_map_t *mapping;
374 
375 	if (pname == NULL || !*pname) {
376 		return ONIG_ENCODING_UNDEF;
377 	}
378 
379 	for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
380 		for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
381 			if (strcasecmp(p, pname) == 0) {
382 				return mapping->code;
383 			}
384 		}
385 	}
386 
387 	return ONIG_ENCODING_UNDEF;
388 }
389 /* }}} */
390 
391 /* {{{ php_mb_regex_mbctype2name */
_php_mb_regex_mbctype2name(OnigEncoding mbctype)392 static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
393 {
394 	php_mb_regex_enc_name_map_t *mapping;
395 
396 	for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
397 		if (mapping->code == mbctype) {
398 			return mapping->names;
399 		}
400 	}
401 
402 	return NULL;
403 }
404 /* }}} */
405 
406 /* {{{ php_mb_regex_set_mbctype */
php_mb_regex_set_mbctype(const char * encname TSRMLS_DC)407 int php_mb_regex_set_mbctype(const char *encname TSRMLS_DC)
408 {
409 	OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
410 	if (mbctype == ONIG_ENCODING_UNDEF) {
411 		return FAILURE;
412 	}
413 	MBREX(current_mbctype) = mbctype;
414 	return SUCCESS;
415 }
416 /* }}} */
417 
418 /* {{{ php_mb_regex_set_default_mbctype */
php_mb_regex_set_default_mbctype(const char * encname TSRMLS_DC)419 int php_mb_regex_set_default_mbctype(const char *encname TSRMLS_DC)
420 {
421 	OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
422 	if (mbctype == ONIG_ENCODING_UNDEF) {
423 		return FAILURE;
424 	}
425 	MBREX(default_mbctype) = mbctype;
426 	return SUCCESS;
427 }
428 /* }}} */
429 
430 /* {{{ php_mb_regex_get_mbctype */
php_mb_regex_get_mbctype(TSRMLS_D)431 const char *php_mb_regex_get_mbctype(TSRMLS_D)
432 {
433 	return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
434 }
435 /* }}} */
436 
437 /* {{{ php_mb_regex_get_default_mbctype */
php_mb_regex_get_default_mbctype(TSRMLS_D)438 const char *php_mb_regex_get_default_mbctype(TSRMLS_D)
439 {
440 	return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
441 }
442 /* }}} */
443 
444 /*
445  * regex cache
446  */
447 /* {{{ php_mbregex_compile_pattern */
php_mbregex_compile_pattern(const char * pattern,int patlen,OnigOptionType options,OnigEncoding enc,OnigSyntaxType * syntax TSRMLS_DC)448 static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax TSRMLS_DC)
449 {
450 	int err_code = 0;
451 	int found = 0;
452 	php_mb_regex_t *retval = NULL, **rc = NULL;
453 	OnigErrorInfo err_info;
454 	OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
455 
456 	found = zend_hash_find(&MBREX(ht_rc), (char *)pattern, patlen+1, (void **) &rc);
457 	if (found == FAILURE || (*rc)->options != options || (*rc)->enc != enc || (*rc)->syntax != syntax) {
458 		if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
459 			onig_error_code_to_str(err_str, err_code, err_info);
460 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex compile err: %s", err_str);
461 			retval = NULL;
462 			goto out;
463 		}
464 		zend_hash_update(&MBREX(ht_rc), (char *) pattern, patlen + 1, (void *) &retval, sizeof(retval), NULL);
465 	} else if (found == SUCCESS) {
466 		retval = *rc;
467 	}
468 out:
469 	return retval;
470 }
471 /* }}} */
472 
473 /* {{{ _php_mb_regex_get_option_string */
_php_mb_regex_get_option_string(char * str,size_t len,OnigOptionType option,OnigSyntaxType * syntax)474 static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
475 {
476 	size_t len_left = len;
477 	size_t len_req = 0;
478 	char *p = str;
479 	char c;
480 
481 	if ((option & ONIG_OPTION_IGNORECASE) != 0) {
482 		if (len_left > 0) {
483 			--len_left;
484 			*(p++) = 'i';
485 		}
486 		++len_req;
487 	}
488 
489 	if ((option & ONIG_OPTION_EXTEND) != 0) {
490 		if (len_left > 0) {
491 			--len_left;
492 			*(p++) = 'x';
493 		}
494 		++len_req;
495 	}
496 
497 	if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
498 			(ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
499 		if (len_left > 0) {
500 			--len_left;
501 			*(p++) = 'p';
502 		}
503 		++len_req;
504 	} else {
505 		if ((option & ONIG_OPTION_MULTILINE) != 0) {
506 			if (len_left > 0) {
507 				--len_left;
508 				*(p++) = 'm';
509 			}
510 			++len_req;
511 		}
512 
513 		if ((option & ONIG_OPTION_SINGLELINE) != 0) {
514 			if (len_left > 0) {
515 				--len_left;
516 				*(p++) = 's';
517 			}
518 			++len_req;
519 		}
520 	}
521 	if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
522 		if (len_left > 0) {
523 			--len_left;
524 			*(p++) = 'l';
525 		}
526 		++len_req;
527 	}
528 	if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
529 		if (len_left > 0) {
530 			--len_left;
531 			*(p++) = 'n';
532 		}
533 		++len_req;
534 	}
535 
536 	c = 0;
537 
538 	if (syntax == ONIG_SYNTAX_JAVA) {
539 		c = 'j';
540 	} else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
541 		c = 'u';
542 	} else if (syntax == ONIG_SYNTAX_GREP) {
543 		c = 'g';
544 	} else if (syntax == ONIG_SYNTAX_EMACS) {
545 		c = 'c';
546 	} else if (syntax == ONIG_SYNTAX_RUBY) {
547 		c = 'r';
548 	} else if (syntax == ONIG_SYNTAX_PERL) {
549 		c = 'z';
550 	} else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
551 		c = 'b';
552 	} else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
553 		c = 'd';
554 	}
555 
556 	if (c != 0) {
557 		if (len_left > 0) {
558 			--len_left;
559 			*(p++) = c;
560 		}
561 		++len_req;
562 	}
563 
564 
565 	if (len_left > 0) {
566 		--len_left;
567 		*(p++) = '\0';
568 	}
569 	++len_req;
570 	if (len < len_req) {
571 		return len_req;
572 	}
573 
574 	return 0;
575 }
576 /* }}} */
577 
578 /* {{{ _php_mb_regex_init_options */
579 static void
_php_mb_regex_init_options(const char * parg,int narg,OnigOptionType * option,OnigSyntaxType ** syntax,int * eval)580 _php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
581 {
582 	int n;
583 	char c;
584 	int optm = 0;
585 
586 	*syntax = ONIG_SYNTAX_RUBY;
587 
588 	if (parg != NULL) {
589 		n = 0;
590 		while(n < narg) {
591 			c = parg[n++];
592 			switch (c) {
593 				case 'i':
594 					optm |= ONIG_OPTION_IGNORECASE;
595 					break;
596 				case 'x':
597 					optm |= ONIG_OPTION_EXTEND;
598 					break;
599 				case 'm':
600 					optm |= ONIG_OPTION_MULTILINE;
601 					break;
602 				case 's':
603 					optm |= ONIG_OPTION_SINGLELINE;
604 					break;
605 				case 'p':
606 					optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
607 					break;
608 				case 'l':
609 					optm |= ONIG_OPTION_FIND_LONGEST;
610 					break;
611 				case 'n':
612 					optm |= ONIG_OPTION_FIND_NOT_EMPTY;
613 					break;
614 				case 'j':
615 					*syntax = ONIG_SYNTAX_JAVA;
616 					break;
617 				case 'u':
618 					*syntax = ONIG_SYNTAX_GNU_REGEX;
619 					break;
620 				case 'g':
621 					*syntax = ONIG_SYNTAX_GREP;
622 					break;
623 				case 'c':
624 					*syntax = ONIG_SYNTAX_EMACS;
625 					break;
626 				case 'r':
627 					*syntax = ONIG_SYNTAX_RUBY;
628 					break;
629 				case 'z':
630 					*syntax = ONIG_SYNTAX_PERL;
631 					break;
632 				case 'b':
633 					*syntax = ONIG_SYNTAX_POSIX_BASIC;
634 					break;
635 				case 'd':
636 					*syntax = ONIG_SYNTAX_POSIX_EXTENDED;
637 					break;
638 				case 'e':
639 					if (eval != NULL) *eval = 1;
640 					break;
641 				default:
642 					break;
643 			}
644 		}
645 		if (option != NULL) *option|=optm;
646 	}
647 }
648 /* }}} */
649 
650 /*
651  * php functions
652  */
653 
654 /* {{{ proto string mb_regex_encoding([string encoding])
655    Returns the current encoding for regex as a string. */
PHP_FUNCTION(mb_regex_encoding)656 PHP_FUNCTION(mb_regex_encoding)
657 {
658 	size_t argc = ZEND_NUM_ARGS();
659 	char *encoding;
660 	int encoding_len;
661 	OnigEncoding mbctype;
662 
663 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &encoding, &encoding_len) == FAILURE) {
664 		return;
665 	}
666 
667 	if (argc == 0) {
668 		const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
669 
670 		if (retval == NULL) {
671 			RETURN_FALSE;
672 		}
673 
674 		RETURN_STRING((char *)retval, 1);
675 	} else if (argc == 1) {
676 		mbctype = _php_mb_regex_name2mbctype(encoding);
677 
678 		if (mbctype == ONIG_ENCODING_UNDEF) {
679 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
680 			RETURN_FALSE;
681 		}
682 
683 		MBREX(current_mbctype) = mbctype;
684 		RETURN_TRUE;
685 	}
686 }
687 /* }}} */
688 
689 /* {{{ _php_mb_regex_ereg_exec */
_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS,int icase)690 static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
691 {
692 	zval **arg_pattern, *array;
693 	char *string;
694 	int string_len;
695 	php_mb_regex_t *re;
696 	OnigRegion *regs = NULL;
697 	int i, match_len, beg, end;
698 	OnigOptionType options;
699 	char *str;
700 
701 	array = NULL;
702 
703 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zs|z", &arg_pattern, &string, &string_len, &array) == FAILURE) {
704 		RETURN_FALSE;
705 	}
706 
707 	options = MBREX(regex_default_options);
708 	if (icase) {
709 		options |= ONIG_OPTION_IGNORECASE;
710 	}
711 
712 	/* compile the regular expression from the supplied regex */
713 	if (Z_TYPE_PP(arg_pattern) != IS_STRING) {
714 		/* we convert numbers to integers and treat them as a string */
715 		if (Z_TYPE_PP(arg_pattern) == IS_DOUBLE) {
716 			convert_to_long_ex(arg_pattern);	/* get rid of decimal places */
717 		}
718 		convert_to_string_ex(arg_pattern);
719 		/* don't bother doing an extended regex with just a number */
720 	}
721 
722 	if (!Z_STRVAL_PP(arg_pattern) || Z_STRLEN_PP(arg_pattern) == 0) {
723 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "empty pattern");
724 		RETVAL_FALSE;
725 		goto out;
726 	}
727 
728 	re = php_mbregex_compile_pattern(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC);
729 	if (re == NULL) {
730 		RETVAL_FALSE;
731 		goto out;
732 	}
733 
734 	regs = onig_region_new();
735 
736 	/* actually execute the regular expression */
737 	if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
738 		RETVAL_FALSE;
739 		goto out;
740 	}
741 
742 	match_len = 1;
743 	str = string;
744 	if (array != NULL) {
745 		match_len = regs->end[0] - regs->beg[0];
746 		zval_dtor(array);
747 		array_init(array);
748 		for (i = 0; i < regs->num_regs; i++) {
749 			beg = regs->beg[i];
750 			end = regs->end[i];
751 			if (beg >= 0 && beg < end && end <= string_len) {
752 				add_index_stringl(array, i, (char *)&str[beg], end - beg, 1);
753 			} else {
754 				add_index_bool(array, i, 0);
755 			}
756 		}
757 	}
758 
759 	if (match_len == 0) {
760 		match_len = 1;
761 	}
762 	RETVAL_LONG(match_len);
763 out:
764 	if (regs != NULL) {
765 		onig_region_free(regs, 1);
766 	}
767 }
768 /* }}} */
769 
770 /* {{{ proto int mb_ereg(string pattern, string string [, array registers])
771    Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg)772 PHP_FUNCTION(mb_ereg)
773 {
774 	_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
775 }
776 /* }}} */
777 
778 /* {{{ proto int mb_eregi(string pattern, string string [, array registers])
779    Case-insensitive regular expression match for multibyte string */
PHP_FUNCTION(mb_eregi)780 PHP_FUNCTION(mb_eregi)
781 {
782 	_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
783 }
784 /* }}} */
785 
786 /* {{{ _php_mb_regex_ereg_replace_exec */
_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS,OnigOptionType options,int is_callable)787 static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable)
788 {
789 	zval **arg_pattern_zval;
790 
791 	char *arg_pattern;
792 	int arg_pattern_len;
793 
794 	char *replace;
795 	int replace_len;
796 
797 	zend_fcall_info arg_replace_fci;
798 	zend_fcall_info_cache arg_replace_fci_cache;
799 
800 	char *string;
801 	int string_len;
802 
803 	char *p;
804 	php_mb_regex_t *re;
805 	OnigSyntaxType *syntax;
806 	OnigRegion *regs = NULL;
807 	smart_str out_buf = { 0 };
808 	smart_str eval_buf = { 0 };
809 	smart_str *pbuf;
810 	int i, err, eval, n;
811 	OnigUChar *pos;
812 	OnigUChar *string_lim;
813 	char *description = NULL;
814 	char pat_buf[2];
815 
816 	const mbfl_encoding *enc;
817 
818 	{
819 		const char *current_enc_name;
820 		current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
821 		if (current_enc_name == NULL ||
822 			(enc = mbfl_name2encoding(current_enc_name)) == NULL) {
823 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
824 			RETURN_FALSE;
825 		}
826 	}
827 	eval = 0;
828 	{
829 		char *option_str = NULL;
830 		int option_str_len = 0;
831 
832 		if (!is_callable) {
833 			if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zss|s",
834 						&arg_pattern_zval,
835 						&replace, &replace_len,
836 						&string, &string_len,
837 						&option_str, &option_str_len) == FAILURE) {
838 				RETURN_FALSE;
839 			}
840 		} else {
841 			if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zfs|s",
842 						&arg_pattern_zval,
843 						&arg_replace_fci, &arg_replace_fci_cache,
844 						&string, &string_len,
845 						&option_str, &option_str_len) == FAILURE) {
846 				RETURN_FALSE;
847 			}
848 		}
849 
850 		if (option_str != NULL) {
851 			_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
852 		} else {
853 			options |= MBREX(regex_default_options);
854 			syntax = MBREX(regex_default_syntax);
855 		}
856 	}
857 	if (Z_TYPE_PP(arg_pattern_zval) == IS_STRING) {
858 		arg_pattern = Z_STRVAL_PP(arg_pattern_zval);
859 		arg_pattern_len = Z_STRLEN_PP(arg_pattern_zval);
860 	} else {
861 		/* FIXME: this code is not multibyte aware! */
862 		convert_to_long_ex(arg_pattern_zval);
863 		pat_buf[0] = (char)Z_LVAL_PP(arg_pattern_zval);
864 		pat_buf[1] = '\0';
865 
866 		arg_pattern = pat_buf;
867 		arg_pattern_len = 1;
868 	}
869 	/* create regex pattern buffer */
870 	re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax TSRMLS_CC);
871 	if (re == NULL) {
872 		RETURN_FALSE;
873 	}
874 
875 	if (eval || is_callable) {
876 		pbuf = &eval_buf;
877 		description = zend_make_compiled_string_description("mbregex replace" TSRMLS_CC);
878 	} else {
879 		pbuf = &out_buf;
880 		description = NULL;
881 	}
882 
883 	if (is_callable) {
884 		if (eval) {
885 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Option 'e' cannot be used with replacement callback");
886 			RETURN_FALSE;
887 		}
888 	}
889 
890 	/* do the actual work */
891 	err = 0;
892 	pos = (OnigUChar *)string;
893 	string_lim = (OnigUChar*)(string + string_len);
894 	regs = onig_region_new();
895 	while (err >= 0) {
896 		err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
897 		if (err <= -2) {
898 			OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
899 			onig_error_code_to_str(err_str, err);
900 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
901 			break;
902 		}
903 		if (err >= 0) {
904 #if moriyoshi_0
905 			if (regs->beg[0] == regs->end[0]) {
906 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
907 				break;
908 			}
909 #endif
910 			/* copy the part of the string before the match */
911 			smart_str_appendl(&out_buf, pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
912 
913 			if (!is_callable) {
914 				/* copy replacement and backrefs */
915 				i = 0;
916 				p = replace;
917 				while (i < replace_len) {
918 					int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
919 					n = -1;
920 					if ((replace_len - i) >= 2 && fwd == 1 &&
921 					p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
922 						n = p[1] - '0';
923 					}
924 					if (n >= 0 && n < regs->num_regs) {
925 						if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && regs->end[n] <= string_len) {
926 							smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
927 						}
928 						p += 2;
929 						i += 2;
930 					} else {
931 						smart_str_appendl(pbuf, p, fwd);
932 						p += fwd;
933 						i += fwd;
934 					}
935 				}
936 			}
937 
938 			if (eval) {
939 				zval v;
940 				/* null terminate buffer */
941 				smart_str_0(&eval_buf);
942 				/* do eval */
943 				if (zend_eval_stringl(eval_buf.c, eval_buf.len, &v, description TSRMLS_CC) == FAILURE) {
944 					efree(description);
945 					php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, eval_buf.c);
946 					/* zend_error() does not return in this case */
947 				}
948 
949 				/* result of eval */
950 				convert_to_string(&v);
951 				smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
952 				/* Clean up */
953 				eval_buf.len = 0;
954 				zval_dtor(&v);
955 			} else if (is_callable) {
956 				zval *retval_ptr = NULL;
957 				zval **args[1];
958 				zval *subpats;
959 				int i;
960 
961 				MAKE_STD_ZVAL(subpats);
962 				array_init(subpats);
963 
964 				for (i = 0; i < regs->num_regs; i++) {
965 					add_next_index_stringl(subpats, string + regs->beg[i], regs->end[i] - regs->beg[i], 1);
966 				}
967 
968 				args[0] = &subpats;
969 				/* null terminate buffer */
970 				smart_str_0(&eval_buf);
971 
972 				arg_replace_fci.param_count = 1;
973 				arg_replace_fci.params = args;
974 				arg_replace_fci.retval_ptr_ptr = &retval_ptr;
975 				if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache TSRMLS_CC) == SUCCESS && arg_replace_fci.retval_ptr_ptr && retval_ptr) {
976 					convert_to_string_ex(&retval_ptr);
977 					smart_str_appendl(&out_buf, Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
978 					eval_buf.len = 0;
979 					zval_ptr_dtor(&retval_ptr);
980 				} else {
981 					if (!EG(exception)) {
982 						php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
983 					}
984 				}
985 				zval_ptr_dtor(&subpats);
986 			}
987 
988 			n = regs->end[0];
989 			if ((pos - (OnigUChar *)string) < n) {
990 				pos = (OnigUChar *)string + n;
991 			} else {
992 				if (pos < string_lim) {
993 					smart_str_appendl(&out_buf, pos, 1);
994 				}
995 				pos++;
996 			}
997 		} else { /* nomatch */
998 			/* stick that last bit of string on our output */
999 			if (string_lim - pos > 0) {
1000 				smart_str_appendl(&out_buf, pos, string_lim - pos);
1001 			}
1002 		}
1003 		onig_region_free(regs, 0);
1004 	}
1005 
1006 	if (description) {
1007 		efree(description);
1008 	}
1009 	if (regs != NULL) {
1010 		onig_region_free(regs, 1);
1011 	}
1012 	smart_str_free(&eval_buf);
1013 
1014 	if (err <= -2) {
1015 		smart_str_free(&out_buf);
1016 		RETVAL_FALSE;
1017 	} else {
1018 		smart_str_appendc(&out_buf, '\0');
1019 		RETVAL_STRINGL((char *)out_buf.c, out_buf.len - 1, 0);
1020 	}
1021 }
1022 /* }}} */
1023 
1024 /* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option])
1025    Replace regular expression for multibyte string */
PHP_FUNCTION(mb_ereg_replace)1026 PHP_FUNCTION(mb_ereg_replace)
1027 {
1028 	_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1029 }
1030 /* }}} */
1031 
1032 /* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string)
1033    Case insensitive replace regular expression for multibyte string */
PHP_FUNCTION(mb_eregi_replace)1034 PHP_FUNCTION(mb_eregi_replace)
1035 {
1036 	_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0);
1037 }
1038 /* }}} */
1039 
1040 /* {{{ proto string mb_ereg_replace_callback(string pattern, string callback, string string [, string option])
1041     regular expression for multibyte string using replacement callback */
PHP_FUNCTION(mb_ereg_replace_callback)1042 PHP_FUNCTION(mb_ereg_replace_callback)
1043 {
1044 	_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1045 }
1046 /* }}} */
1047 
1048 /* {{{ proto array mb_split(string pattern, string string [, int limit])
1049    split multibyte string into array by regular expression */
PHP_FUNCTION(mb_split)1050 PHP_FUNCTION(mb_split)
1051 {
1052 	char *arg_pattern;
1053 	int arg_pattern_len;
1054 	php_mb_regex_t *re;
1055 	OnigRegion *regs = NULL;
1056 	char *string;
1057 	OnigUChar *pos, *chunk_pos;
1058 	int string_len;
1059 
1060 	int n, err;
1061 	long count = -1;
1062 
1063 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
1064 		RETURN_FALSE;
1065 	}
1066 
1067 	if (count > 0) {
1068 		count--;
1069 	}
1070 
1071 	/* create regex pattern buffer */
1072 	if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
1073 		RETURN_FALSE;
1074 	}
1075 
1076 	array_init(return_value);
1077 
1078 	chunk_pos = pos = (OnigUChar *)string;
1079 	err = 0;
1080 	regs = onig_region_new();
1081 	/* churn through str, generating array entries as we go */
1082 	while (count != 0 && (pos - (OnigUChar *)string) < string_len) {
1083 		int beg, end;
1084 		err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0);
1085 		if (err < 0) {
1086 			break;
1087 		}
1088 		beg = regs->beg[0], end = regs->end[0];
1089 		/* add it to the array */
1090 		if ((pos - (OnigUChar *)string) < end) {
1091 			if (beg < string_len && beg >= (chunk_pos - (OnigUChar *)string)) {
1092 				add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos), 1);
1093 				--count;
1094 			} else {
1095 				err = -2;
1096 				break;
1097 			}
1098 			/* point at our new starting point */
1099 			chunk_pos = pos = (OnigUChar *)string + end;
1100 		} else {
1101 			pos++;
1102 		}
1103 		onig_region_free(regs, 0);
1104 	}
1105 
1106 	onig_region_free(regs, 1);
1107 
1108 	/* see if we encountered an error */
1109 	if (err <= -2) {
1110 		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1111 		onig_error_code_to_str(err_str, err);
1112 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
1113 		zval_dtor(return_value);
1114 		RETURN_FALSE;
1115 	}
1116 
1117 	/* otherwise we just have one last element to add to the array */
1118 	n = ((OnigUChar *)(string + string_len) - chunk_pos);
1119 	if (n > 0) {
1120 		add_next_index_stringl(return_value, (char *)chunk_pos, n, 1);
1121 	} else {
1122 		add_next_index_stringl(return_value, "", 0, 1);
1123 	}
1124 }
1125 /* }}} */
1126 
1127 /* {{{ proto bool mb_ereg_match(string pattern, string string [,string option])
1128    Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg_match)1129 PHP_FUNCTION(mb_ereg_match)
1130 {
1131 	char *arg_pattern;
1132 	int arg_pattern_len;
1133 
1134 	char *string;
1135 	int string_len;
1136 
1137 	php_mb_regex_t *re;
1138 	OnigSyntaxType *syntax;
1139 	OnigOptionType option = 0;
1140 	int err;
1141 
1142 	{
1143 		char *option_str = NULL;
1144 		int option_str_len = 0;
1145 
1146 		if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s",
1147 		                          &arg_pattern, &arg_pattern_len, &string, &string_len,
1148 		                          &option_str, &option_str_len)==FAILURE) {
1149 			RETURN_FALSE;
1150 		}
1151 
1152 		if (option_str != NULL) {
1153 			_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
1154 		} else {
1155 			option |= MBREX(regex_default_options);
1156 			syntax = MBREX(regex_default_syntax);
1157 		}
1158 	}
1159 
1160 	if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
1161 		RETURN_FALSE;
1162 	}
1163 
1164 	/* match */
1165 	err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0);
1166 	if (err >= 0) {
1167 		RETVAL_TRUE;
1168 	} else {
1169 		RETVAL_FALSE;
1170 	}
1171 }
1172 /* }}} */
1173 
1174 /* regex search */
1175 /* {{{ _php_mb_regex_ereg_search_exec */
1176 static void
_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS,int mode)1177 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
1178 {
1179 	size_t argc = ZEND_NUM_ARGS();
1180 	char *arg_pattern, *arg_options;
1181 	int arg_pattern_len, arg_options_len;
1182 	int n, i, err, pos, len, beg, end;
1183 	OnigOptionType option;
1184 	OnigUChar *str;
1185 	OnigSyntaxType *syntax;
1186 
1187 	if (zend_parse_parameters(argc TSRMLS_CC, "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1188 		return;
1189 	}
1190 
1191 	option = MBREX(regex_default_options);
1192 
1193 	if (argc == 2) {
1194 		option = 0;
1195 		_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1196 	}
1197 
1198 	if (argc > 0) {
1199 		/* create regex pattern buffer */
1200 		if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
1201 			RETURN_FALSE;
1202 		}
1203 	}
1204 
1205 	pos = MBREX(search_pos);
1206 	str = NULL;
1207 	len = 0;
1208 	if (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING){
1209 		str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
1210 		len = Z_STRLEN_P(MBREX(search_str));
1211 	}
1212 
1213 	if (MBREX(search_re) == NULL) {
1214 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "No regex given");
1215 		RETURN_FALSE;
1216 	}
1217 
1218 	if (str == NULL) {
1219 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "No string given");
1220 		RETURN_FALSE;
1221 	}
1222 
1223 	if (MBREX(search_regs)) {
1224 		onig_region_free(MBREX(search_regs), 1);
1225 	}
1226 	MBREX(search_regs) = onig_region_new();
1227 
1228 	err = onig_search(MBREX(search_re), str, str + len, str + pos, str  + len, MBREX(search_regs), 0);
1229 	if (err == ONIG_MISMATCH) {
1230 		MBREX(search_pos) = len;
1231 		RETVAL_FALSE;
1232 	} else if (err <= -2) {
1233 		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1234 		onig_error_code_to_str(err_str, err);
1235 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
1236 		RETVAL_FALSE;
1237 	} else {
1238 		if (MBREX(search_regs)->beg[0] == MBREX(search_regs)->end[0]) {
1239 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
1240 		}
1241 		switch (mode) {
1242 		case 1:
1243 			array_init(return_value);
1244 			beg = MBREX(search_regs)->beg[0];
1245 			end = MBREX(search_regs)->end[0];
1246 			add_next_index_long(return_value, beg);
1247 			add_next_index_long(return_value, end - beg);
1248 			break;
1249 		case 2:
1250 			array_init(return_value);
1251 			n = MBREX(search_regs)->num_regs;
1252 			for (i = 0; i < n; i++) {
1253 				beg = MBREX(search_regs)->beg[i];
1254 				end = MBREX(search_regs)->end[i];
1255 				if (beg >= 0 && beg <= end && end <= len) {
1256 					add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
1257 				} else {
1258 					add_index_bool(return_value, i, 0);
1259 				}
1260 			}
1261 			break;
1262 		default:
1263 			RETVAL_TRUE;
1264 			break;
1265 		}
1266 		end = MBREX(search_regs)->end[0];
1267 		if (pos < end) {
1268 			MBREX(search_pos) = end;
1269 		} else {
1270 			MBREX(search_pos) = pos + 1;
1271 		}
1272 	}
1273 
1274 	if (err < 0) {
1275 		onig_region_free(MBREX(search_regs), 1);
1276 		MBREX(search_regs) = (OnigRegion *)NULL;
1277 	}
1278 }
1279 /* }}} */
1280 
1281 /* {{{ proto bool mb_ereg_search([string pattern[, string option]])
1282    Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search)1283 PHP_FUNCTION(mb_ereg_search)
1284 {
1285 	_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1286 }
1287 /* }}} */
1288 
1289 /* {{{ proto array mb_ereg_search_pos([string pattern[, string option]])
1290    Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_pos)1291 PHP_FUNCTION(mb_ereg_search_pos)
1292 {
1293 	_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1294 }
1295 /* }}} */
1296 
1297 /* {{{ proto array mb_ereg_search_regs([string pattern[, string option]])
1298    Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_regs)1299 PHP_FUNCTION(mb_ereg_search_regs)
1300 {
1301 	_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
1302 }
1303 /* }}} */
1304 
1305 /* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]])
1306    Initialize string and regular expression for search. */
PHP_FUNCTION(mb_ereg_search_init)1307 PHP_FUNCTION(mb_ereg_search_init)
1308 {
1309 	size_t argc = ZEND_NUM_ARGS();
1310 	zval *arg_str;
1311 	char *arg_pattern = NULL, *arg_options = NULL;
1312 	int arg_pattern_len = 0, arg_options_len = 0;
1313 	OnigSyntaxType *syntax = NULL;
1314 	OnigOptionType option;
1315 
1316 	if (zend_parse_parameters(argc TSRMLS_CC, "z|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1317 		return;
1318 	}
1319 
1320 	if (argc > 1 && arg_pattern_len == 0) {
1321 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty pattern");
1322 		RETURN_FALSE;
1323 	}
1324 
1325 	option = MBREX(regex_default_options);
1326 	syntax = MBREX(regex_default_syntax);
1327 
1328 	if (argc == 3) {
1329 		option = 0;
1330 		_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1331 	}
1332 
1333 	if (argc > 1) {
1334 		/* create regex pattern buffer */
1335 		if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
1336 			RETURN_FALSE;
1337 		}
1338 	}
1339 
1340 	if (MBREX(search_str) != NULL) {
1341 		zval_ptr_dtor(&MBREX(search_str));
1342 		MBREX(search_str) = (zval *)NULL;
1343 	}
1344 
1345 	MBREX(search_str) = arg_str;
1346 	Z_ADDREF_P(MBREX(search_str));
1347 	SEPARATE_ZVAL_IF_NOT_REF(&MBREX(search_str));
1348 
1349 	MBREX(search_pos) = 0;
1350 
1351 	if (MBREX(search_regs) != NULL) {
1352 		onig_region_free(MBREX(search_regs), 1);
1353 		MBREX(search_regs) = (OnigRegion *) NULL;
1354 	}
1355 
1356 	RETURN_TRUE;
1357 }
1358 /* }}} */
1359 
1360 /* {{{ proto array mb_ereg_search_getregs(void)
1361    Get matched substring of the last time */
PHP_FUNCTION(mb_ereg_search_getregs)1362 PHP_FUNCTION(mb_ereg_search_getregs)
1363 {
1364 	int n, i, len, beg, end;
1365 	OnigUChar *str;
1366 
1367 	if (MBREX(search_regs) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && Z_STRVAL_P(MBREX(search_str)) != NULL) {
1368 		array_init(return_value);
1369 
1370 		str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
1371 		len = Z_STRLEN_P(MBREX(search_str));
1372 		n = MBREX(search_regs)->num_regs;
1373 		for (i = 0; i < n; i++) {
1374 			beg = MBREX(search_regs)->beg[i];
1375 			end = MBREX(search_regs)->end[i];
1376 			if (beg >= 0 && beg <= end && end <= len) {
1377 				add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
1378 			} else {
1379 				add_index_bool(return_value, i, 0);
1380 			}
1381 		}
1382 	} else {
1383 		RETVAL_FALSE;
1384 	}
1385 }
1386 /* }}} */
1387 
1388 /* {{{ proto int mb_ereg_search_getpos(void)
1389    Get search start position */
PHP_FUNCTION(mb_ereg_search_getpos)1390 PHP_FUNCTION(mb_ereg_search_getpos)
1391 {
1392 	RETVAL_LONG(MBREX(search_pos));
1393 }
1394 /* }}} */
1395 
1396 /* {{{ proto bool mb_ereg_search_setpos(int position)
1397    Set search start position */
PHP_FUNCTION(mb_ereg_search_setpos)1398 PHP_FUNCTION(mb_ereg_search_setpos)
1399 {
1400 	long position;
1401 
1402 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &position) == FAILURE) {
1403 		return;
1404 	}
1405 
1406 	if (position < 0 || (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && position >= Z_STRLEN_P(MBREX(search_str)))) {
1407 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Position is out of range");
1408 		MBREX(search_pos) = 0;
1409 		RETURN_FALSE;
1410 	}
1411 
1412 	MBREX(search_pos) = position;
1413 	RETURN_TRUE;
1414 }
1415 /* }}} */
1416 
1417 /* {{{ php_mb_regex_set_options */
_php_mb_regex_set_options(OnigOptionType options,OnigSyntaxType * syntax,OnigOptionType * prev_options,OnigSyntaxType ** prev_syntax TSRMLS_DC)1418 static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC)
1419 {
1420 	if (prev_options != NULL) {
1421 		*prev_options = MBREX(regex_default_options);
1422 	}
1423 	if (prev_syntax != NULL) {
1424 		*prev_syntax = MBREX(regex_default_syntax);
1425 	}
1426 	MBREX(regex_default_options) = options;
1427 	MBREX(regex_default_syntax) = syntax;
1428 }
1429 /* }}} */
1430 
1431 /* {{{ proto string mb_regex_set_options([string options])
1432    Set or get the default options for mbregex functions */
PHP_FUNCTION(mb_regex_set_options)1433 PHP_FUNCTION(mb_regex_set_options)
1434 {
1435 	OnigOptionType opt;
1436 	OnigSyntaxType *syntax;
1437 	char *string = NULL;
1438 	int string_len;
1439 	char buf[16];
1440 
1441 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s",
1442 	                          &string, &string_len) == FAILURE) {
1443 		RETURN_FALSE;
1444 	}
1445 	if (string != NULL) {
1446 		opt = 0;
1447 		syntax = NULL;
1448 		_php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
1449 		_php_mb_regex_set_options(opt, syntax, NULL, NULL TSRMLS_CC);
1450 	} else {
1451 		opt = MBREX(regex_default_options);
1452 		syntax = MBREX(regex_default_syntax);
1453 	}
1454 	_php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
1455 
1456 	RETVAL_STRING(buf, 1);
1457 }
1458 /* }}} */
1459 
1460 #endif	/* HAVE_MBREGEX */
1461 
1462 /*
1463  * Local variables:
1464  * tab-width: 4
1465  * c-basic-offset: 4
1466  * End:
1467  * vim600: fdm=marker
1468  * vim: noet sw=4 ts=4
1469  */
1470