xref: /PHP-5.3/ext/mbstring/php_mbregex.c (revision a2045ff3)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 5                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1997-2013 The PHP Group                                |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
16    +----------------------------------------------------------------------+
17  */
18 
19 /* $Id$ */
20 
21 
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25 
26 #include "php.h"
27 #include "php_ini.h"
28 
29 #if HAVE_MBREGEX
30 
31 #include "ext/standard/php_smart_str.h"
32 #include "ext/standard/info.h"
33 #include "php_mbregex.h"
34 #include "mbstring.h"
35 
36 #include "php_onig_compat.h" /* must come prior to the oniguruma header */
37 #include <oniguruma.h>
38 #undef UChar
39 
40 ZEND_EXTERN_MODULE_GLOBALS(mbstring)
41 
42 struct _zend_mb_regex_globals {
43 	OnigEncoding default_mbctype;
44 	OnigEncoding current_mbctype;
45 	HashTable ht_rc;
46 	zval *search_str;
47 	zval *search_str_val;
48 	unsigned int search_pos;
49 	php_mb_regex_t *search_re;
50 	OnigRegion *search_regs;
51 	OnigOptionType regex_default_options;
52 	OnigSyntaxType *regex_default_syntax;
53 };
54 
55 #define MBREX(g) (MBSTRG(mb_regex_globals)->g)
56 
57 /* {{{ static void php_mb_regex_free_cache() */
php_mb_regex_free_cache(php_mb_regex_t ** pre)58 static void php_mb_regex_free_cache(php_mb_regex_t **pre)
59 {
60 	onig_free(*pre);
61 }
62 /* }}} */
63 
64 /* {{{ _php_mb_regex_globals_ctor */
_php_mb_regex_globals_ctor(zend_mb_regex_globals * pglobals TSRMLS_DC)65 static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals TSRMLS_DC)
66 {
67 	pglobals->default_mbctype = ONIG_ENCODING_EUC_JP;
68 	pglobals->current_mbctype = ONIG_ENCODING_EUC_JP;
69 	zend_hash_init(&(pglobals->ht_rc), 0, NULL, (void (*)(void *)) php_mb_regex_free_cache, 1);
70 	pglobals->search_str = (zval*) NULL;
71 	pglobals->search_re = (php_mb_regex_t*)NULL;
72 	pglobals->search_pos = 0;
73 	pglobals->search_regs = (OnigRegion*)NULL;
74 	pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
75 	pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
76 	return SUCCESS;
77 }
78 /* }}} */
79 
80 /* {{{ _php_mb_regex_globals_dtor */
_php_mb_regex_globals_dtor(zend_mb_regex_globals * pglobals TSRMLS_DC)81 static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals TSRMLS_DC)
82 {
83 	zend_hash_destroy(&pglobals->ht_rc);
84 }
85 /* }}} */
86 
87 /* {{{ php_mb_regex_globals_alloc */
php_mb_regex_globals_alloc(TSRMLS_D)88 zend_mb_regex_globals *php_mb_regex_globals_alloc(TSRMLS_D)
89 {
90 	zend_mb_regex_globals *pglobals = pemalloc(
91 			sizeof(zend_mb_regex_globals), 1);
92 	if (!pglobals) {
93 		return NULL;
94 	}
95 	if (SUCCESS != _php_mb_regex_globals_ctor(pglobals TSRMLS_CC)) {
96 		pefree(pglobals, 1);
97 		return NULL;
98 	}
99 	return pglobals;
100 }
101 /* }}} */
102 
103 /* {{{ php_mb_regex_globals_free */
php_mb_regex_globals_free(zend_mb_regex_globals * pglobals TSRMLS_DC)104 void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals TSRMLS_DC)
105 {
106 	if (!pglobals) {
107 		return;
108 	}
109 	_php_mb_regex_globals_dtor(pglobals TSRMLS_CC);
110 	pefree(pglobals, 1);
111 }
112 /* }}} */
113 
114 /* {{{ PHP_MINIT_FUNCTION(mb_regex) */
PHP_MINIT_FUNCTION(mb_regex)115 PHP_MINIT_FUNCTION(mb_regex)
116 {
117 	onig_init();
118 	return SUCCESS;
119 }
120 /* }}} */
121 
122 /* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
PHP_MSHUTDOWN_FUNCTION(mb_regex)123 PHP_MSHUTDOWN_FUNCTION(mb_regex)
124 {
125 	onig_end();
126 	return SUCCESS;
127 }
128 /* }}} */
129 
130 /* {{{ PHP_RINIT_FUNCTION(mb_regex) */
PHP_RINIT_FUNCTION(mb_regex)131 PHP_RINIT_FUNCTION(mb_regex)
132 {
133 	return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
134 }
135 /* }}} */
136 
137 /* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
PHP_RSHUTDOWN_FUNCTION(mb_regex)138 PHP_RSHUTDOWN_FUNCTION(mb_regex)
139 {
140 	MBREX(current_mbctype) = MBREX(default_mbctype);
141 
142 	if (MBREX(search_str) != NULL) {
143 		zval_ptr_dtor(&MBREX(search_str));
144 		MBREX(search_str) = (zval *)NULL;
145 	}
146 	MBREX(search_pos) = 0;
147 
148 	if (MBREX(search_regs) != NULL) {
149 		onig_region_free(MBREX(search_regs), 1);
150 		MBREX(search_regs) = (OnigRegion *)NULL;
151 	}
152 	zend_hash_clean(&MBREX(ht_rc));
153 
154 	return SUCCESS;
155 }
156 /* }}} */
157 
158 /* {{{ PHP_MINFO_FUNCTION(mb_regex) */
PHP_MINFO_FUNCTION(mb_regex)159 PHP_MINFO_FUNCTION(mb_regex)
160 {
161 	char buf[32];
162 	php_info_print_table_start();
163 	php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
164 	snprintf(buf, sizeof(buf), "%d.%d.%d",
165 			ONIGURUMA_VERSION_MAJOR,
166 			ONIGURUMA_VERSION_MINOR,
167 			ONIGURUMA_VERSION_TEENY);
168 #ifdef PHP_ONIG_BUNDLED
169 #ifdef USE_COMBINATION_EXPLOSION_CHECK
170 	php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
171 #else	/* USE_COMBINATION_EXPLOSION_CHECK */
172 	php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
173 #endif	/* USE_COMBINATION_EXPLOSION_CHECK */
174 #endif /* PHP_BUNDLED_ONIG */
175 	php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
176 	php_info_print_table_end();
177 }
178 /* }}} */
179 
180 /*
181  * encoding name resolver
182  */
183 
184 /* {{{ encoding name map */
185 typedef struct _php_mb_regex_enc_name_map_t {
186 	const char *names;
187 	OnigEncoding code;
188 } php_mb_regex_enc_name_map_t;
189 
190 php_mb_regex_enc_name_map_t enc_name_map[] = {
191 #ifdef ONIG_ENCODING_EUC_JP
192 	{
193 		"EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
194 		ONIG_ENCODING_EUC_JP
195 	},
196 #endif
197 #ifdef ONIG_ENCODING_UTF8
198 	{
199 		"UTF-8\0UTF8\0",
200 		ONIG_ENCODING_UTF8
201 	},
202 #endif
203 #ifdef ONIG_ENCODING_UTF16_BE
204 	{
205 		"UTF-16\0UTF-16BE\0",
206 		ONIG_ENCODING_UTF16_BE
207 	},
208 #endif
209 #ifdef ONIG_ENCODING_UTF16_LE
210 	{
211 		"UTF-16LE\0",
212 		ONIG_ENCODING_UTF16_LE
213 	},
214 #endif
215 #ifdef ONIG_ENCODING_UTF32_BE
216 	{
217 		"UCS-4\0UTF-32\0UTF-32BE\0",
218 		ONIG_ENCODING_UTF32_BE
219 	},
220 #endif
221 #ifdef ONIG_ENCODING_UTF32_LE
222 	{
223 		"UCS-4LE\0UTF-32LE\0",
224 		ONIG_ENCODING_UTF32_LE
225 	},
226 #endif
227 #ifdef ONIG_ENCODING_SJIS
228 	{
229 		"SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
230 		ONIG_ENCODING_SJIS
231 	},
232 #endif
233 #ifdef ONIG_ENCODING_BIG5
234 	{
235 		"BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
236 		ONIG_ENCODING_BIG5
237 	},
238 #endif
239 #ifdef ONIG_ENCODING_EUC_CN
240 	{
241 		"EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
242 		ONIG_ENCODING_EUC_CN
243 	},
244 #endif
245 #ifdef ONIG_ENCODING_EUC_TW
246 	{
247 		"EUC-TW\0EUCTW\0EUC_TW\0",
248 		ONIG_ENCODING_EUC_TW
249 	},
250 #endif
251 #ifdef ONIG_ENCODING_EUC_KR
252 	{
253 		"EUC-KR\0EUCKR\0EUC_KR\0",
254 		ONIG_ENCODING_EUC_KR
255 	},
256 #endif
257 #if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
258 	{
259 		"KOI8\0KOI-8\0",
260 		ONIG_ENCODING_KOI8
261 	},
262 #endif
263 #ifdef ONIG_ENCODING_KOI8_R
264 	{
265 		"KOI8R\0KOI8-R\0KOI-8R\0",
266 		ONIG_ENCODING_KOI8_R
267 	},
268 #endif
269 #ifdef ONIG_ENCODING_ISO_8859_1
270 	{
271 		"ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
272 		ONIG_ENCODING_ISO_8859_1
273 	},
274 #endif
275 #ifdef ONIG_ENCODING_ISO_8859_2
276 	{
277 		"ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
278 		ONIG_ENCODING_ISO_8859_2
279 	},
280 #endif
281 #ifdef ONIG_ENCODING_ISO_8859_3
282 	{
283 		"ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
284 		ONIG_ENCODING_ISO_8859_3
285 	},
286 #endif
287 #ifdef ONIG_ENCODING_ISO_8859_4
288 	{
289 		"ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
290 		ONIG_ENCODING_ISO_8859_4
291 	},
292 #endif
293 #ifdef ONIG_ENCODING_ISO_8859_5
294 	{
295 		"ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
296 		ONIG_ENCODING_ISO_8859_5
297 	},
298 #endif
299 #ifdef ONIG_ENCODING_ISO_8859_6
300 	{
301 		"ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
302 		ONIG_ENCODING_ISO_8859_6
303 	},
304 #endif
305 #ifdef ONIG_ENCODING_ISO_8859_7
306 	{
307 		"ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
308 		ONIG_ENCODING_ISO_8859_7
309 	},
310 #endif
311 #ifdef ONIG_ENCODING_ISO_8859_8
312 	{
313 		"ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
314 		ONIG_ENCODING_ISO_8859_8
315 	},
316 #endif
317 #ifdef ONIG_ENCODING_ISO_8859_9
318 	{
319 		"ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
320 		ONIG_ENCODING_ISO_8859_9
321 	},
322 #endif
323 #ifdef ONIG_ENCODING_ISO_8859_10
324 	{
325 		"ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
326 		ONIG_ENCODING_ISO_8859_10
327 	},
328 #endif
329 #ifdef ONIG_ENCODING_ISO_8859_11
330 	{
331 		"ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
332 		ONIG_ENCODING_ISO_8859_11
333 	},
334 #endif
335 #ifdef ONIG_ENCODING_ISO_8859_13
336 	{
337 		"ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
338 		ONIG_ENCODING_ISO_8859_13
339 	},
340 #endif
341 #ifdef ONIG_ENCODING_ISO_8859_14
342 	{
343 		"ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
344 		ONIG_ENCODING_ISO_8859_14
345 	},
346 #endif
347 #ifdef ONIG_ENCODING_ISO_8859_15
348 	{
349 		"ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
350 		ONIG_ENCODING_ISO_8859_15
351 	},
352 #endif
353 #ifdef ONIG_ENCODING_ISO_8859_16
354 	{
355 		"ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
356 		ONIG_ENCODING_ISO_8859_16
357 	},
358 #endif
359 #ifdef ONIG_ENCODING_ASCII
360 	{
361 		"ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
362 		ONIG_ENCODING_ASCII
363 	},
364 #endif
365 	{ NULL, ONIG_ENCODING_UNDEF }
366 };
367 /* }}} */
368 
369 /* {{{ php_mb_regex_name2mbctype */
_php_mb_regex_name2mbctype(const char * pname)370 static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
371 {
372 	const char *p;
373 	php_mb_regex_enc_name_map_t *mapping;
374 
375 	if (pname == NULL) {
376 		return ONIG_ENCODING_UNDEF;
377 	}
378 
379 	for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
380 		for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
381 			if (strcasecmp(p, pname) == 0) {
382 				return mapping->code;
383 			}
384 		}
385 	}
386 
387 	return ONIG_ENCODING_UNDEF;
388 }
389 /* }}} */
390 
391 /* {{{ php_mb_regex_mbctype2name */
_php_mb_regex_mbctype2name(OnigEncoding mbctype)392 static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
393 {
394 	php_mb_regex_enc_name_map_t *mapping;
395 
396 	for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
397 		if (mapping->code == mbctype) {
398 			return mapping->names;
399 		}
400 	}
401 
402 	return NULL;
403 }
404 /* }}} */
405 
406 /* {{{ php_mb_regex_set_mbctype */
php_mb_regex_set_mbctype(const char * encname TSRMLS_DC)407 int php_mb_regex_set_mbctype(const char *encname TSRMLS_DC)
408 {
409 	OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
410 	if (mbctype == ONIG_ENCODING_UNDEF) {
411 		return FAILURE;
412 	}
413 	MBREX(current_mbctype) = mbctype;
414 	return SUCCESS;
415 }
416 /* }}} */
417 
418 /* {{{ php_mb_regex_set_default_mbctype */
php_mb_regex_set_default_mbctype(const char * encname TSRMLS_DC)419 int php_mb_regex_set_default_mbctype(const char *encname TSRMLS_DC)
420 {
421 	OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
422 	if (mbctype == ONIG_ENCODING_UNDEF) {
423 		return FAILURE;
424 	}
425 	MBREX(default_mbctype) = mbctype;
426 	return SUCCESS;
427 }
428 /* }}} */
429 
430 /* {{{ php_mb_regex_get_mbctype */
php_mb_regex_get_mbctype(TSRMLS_D)431 const char *php_mb_regex_get_mbctype(TSRMLS_D)
432 {
433 	return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
434 }
435 /* }}} */
436 
437 /* {{{ php_mb_regex_get_default_mbctype */
php_mb_regex_get_default_mbctype(TSRMLS_D)438 const char *php_mb_regex_get_default_mbctype(TSRMLS_D)
439 {
440 	return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
441 }
442 /* }}} */
443 
444 /*
445  * regex cache
446  */
447 /* {{{ php_mbregex_compile_pattern */
php_mbregex_compile_pattern(const char * pattern,int patlen,OnigOptionType options,OnigEncoding enc,OnigSyntaxType * syntax TSRMLS_DC)448 static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax TSRMLS_DC)
449 {
450 	int err_code = 0;
451 	int found = 0;
452 	php_mb_regex_t *retval = NULL, **rc = NULL;
453 	OnigErrorInfo err_info;
454 	OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
455 
456 	found = zend_hash_find(&MBREX(ht_rc), (char *)pattern, patlen+1, (void **) &rc);
457 	if (found == FAILURE || (*rc)->options != options || (*rc)->enc != enc || (*rc)->syntax != syntax) {
458 		if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
459 			onig_error_code_to_str(err_str, err_code, err_info);
460 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex compile err: %s", err_str);
461 			retval = NULL;
462 			goto out;
463 		}
464 		zend_hash_update(&MBREX(ht_rc), (char *) pattern, patlen + 1, (void *) &retval, sizeof(retval), NULL);
465 	} else if (found == SUCCESS) {
466 		retval = *rc;
467 	}
468 out:
469 	return retval;
470 }
471 /* }}} */
472 
473 /* {{{ _php_mb_regex_get_option_string */
_php_mb_regex_get_option_string(char * str,size_t len,OnigOptionType option,OnigSyntaxType * syntax)474 static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
475 {
476 	size_t len_left = len;
477 	size_t len_req = 0;
478 	char *p = str;
479 	char c;
480 
481 	if ((option & ONIG_OPTION_IGNORECASE) != 0) {
482 		if (len_left > 0) {
483 			--len_left;
484 			*(p++) = 'i';
485 		}
486 		++len_req;
487 	}
488 
489 	if ((option & ONIG_OPTION_EXTEND) != 0) {
490 		if (len_left > 0) {
491 			--len_left;
492 			*(p++) = 'x';
493 		}
494 		++len_req;
495 	}
496 
497 	if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
498 			(ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
499 		if (len_left > 0) {
500 			--len_left;
501 			*(p++) = 'p';
502 		}
503 		++len_req;
504 	} else {
505 		if ((option & ONIG_OPTION_MULTILINE) != 0) {
506 			if (len_left > 0) {
507 				--len_left;
508 				*(p++) = 'm';
509 			}
510 			++len_req;
511 		}
512 
513 		if ((option & ONIG_OPTION_SINGLELINE) != 0) {
514 			if (len_left > 0) {
515 				--len_left;
516 				*(p++) = 's';
517 			}
518 			++len_req;
519 		}
520 	}
521 	if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
522 		if (len_left > 0) {
523 			--len_left;
524 			*(p++) = 'l';
525 		}
526 		++len_req;
527 	}
528 	if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
529 		if (len_left > 0) {
530 			--len_left;
531 			*(p++) = 'n';
532 		}
533 		++len_req;
534 	}
535 
536 	c = 0;
537 
538 	if (syntax == ONIG_SYNTAX_JAVA) {
539 		c = 'j';
540 	} else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
541 		c = 'u';
542 	} else if (syntax == ONIG_SYNTAX_GREP) {
543 		c = 'g';
544 	} else if (syntax == ONIG_SYNTAX_EMACS) {
545 		c = 'c';
546 	} else if (syntax == ONIG_SYNTAX_RUBY) {
547 		c = 'r';
548 	} else if (syntax == ONIG_SYNTAX_PERL) {
549 		c = 'z';
550 	} else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
551 		c = 'b';
552 	} else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
553 		c = 'd';
554 	}
555 
556 	if (c != 0) {
557 		if (len_left > 0) {
558 			--len_left;
559 			*(p++) = c;
560 		}
561 		++len_req;
562 	}
563 
564 
565 	if (len_left > 0) {
566 		--len_left;
567 		*(p++) = '\0';
568 	}
569 	++len_req;
570 	if (len < len_req) {
571 		return len_req;
572 	}
573 
574 	return 0;
575 }
576 /* }}} */
577 
578 /* {{{ _php_mb_regex_init_options */
579 static void
_php_mb_regex_init_options(const char * parg,int narg,OnigOptionType * option,OnigSyntaxType ** syntax,int * eval)580 _php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
581 {
582 	int n;
583 	char c;
584 	int optm = 0;
585 
586 	*syntax = ONIG_SYNTAX_RUBY;
587 
588 	if (parg != NULL) {
589 		n = 0;
590 		while(n < narg) {
591 			c = parg[n++];
592 			switch (c) {
593 				case 'i':
594 					optm |= ONIG_OPTION_IGNORECASE;
595 					break;
596 				case 'x':
597 					optm |= ONIG_OPTION_EXTEND;
598 					break;
599 				case 'm':
600 					optm |= ONIG_OPTION_MULTILINE;
601 					break;
602 				case 's':
603 					optm |= ONIG_OPTION_SINGLELINE;
604 					break;
605 				case 'p':
606 					optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
607 					break;
608 				case 'l':
609 					optm |= ONIG_OPTION_FIND_LONGEST;
610 					break;
611 				case 'n':
612 					optm |= ONIG_OPTION_FIND_NOT_EMPTY;
613 					break;
614 				case 'j':
615 					*syntax = ONIG_SYNTAX_JAVA;
616 					break;
617 				case 'u':
618 					*syntax = ONIG_SYNTAX_GNU_REGEX;
619 					break;
620 				case 'g':
621 					*syntax = ONIG_SYNTAX_GREP;
622 					break;
623 				case 'c':
624 					*syntax = ONIG_SYNTAX_EMACS;
625 					break;
626 				case 'r':
627 					*syntax = ONIG_SYNTAX_RUBY;
628 					break;
629 				case 'z':
630 					*syntax = ONIG_SYNTAX_PERL;
631 					break;
632 				case 'b':
633 					*syntax = ONIG_SYNTAX_POSIX_BASIC;
634 					break;
635 				case 'd':
636 					*syntax = ONIG_SYNTAX_POSIX_EXTENDED;
637 					break;
638 				case 'e':
639 					if (eval != NULL) *eval = 1;
640 					break;
641 				default:
642 					break;
643 			}
644 		}
645 		if (option != NULL) *option|=optm;
646 	}
647 }
648 /* }}} */
649 
650 /*
651  * php funcions
652  */
653 
654 /* {{{ proto string mb_regex_encoding([string encoding])
655    Returns the current encoding for regex as a string. */
PHP_FUNCTION(mb_regex_encoding)656 PHP_FUNCTION(mb_regex_encoding)
657 {
658 	size_t argc = ZEND_NUM_ARGS();
659 	char *encoding;
660 	int encoding_len;
661 	OnigEncoding mbctype;
662 
663 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &encoding, &encoding_len) == FAILURE) {
664 		return;
665 	}
666 
667 	if (argc == 0) {
668 		const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
669 
670 		if (retval == NULL) {
671 			RETURN_FALSE;
672 		}
673 
674 		RETURN_STRING((char *)retval, 1);
675 	} else if (argc == 1) {
676 		mbctype = _php_mb_regex_name2mbctype(encoding);
677 
678 		if (mbctype == ONIG_ENCODING_UNDEF) {
679 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
680 			RETURN_FALSE;
681 		}
682 
683 		MBREX(current_mbctype) = mbctype;
684 		RETURN_TRUE;
685 	}
686 }
687 /* }}} */
688 
689 /* {{{ _php_mb_regex_ereg_exec */
_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS,int icase)690 static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
691 {
692 	zval **arg_pattern, *array;
693 	char *string;
694 	int string_len;
695 	php_mb_regex_t *re;
696 	OnigRegion *regs = NULL;
697 	int i, match_len, beg, end;
698 	OnigOptionType options;
699 	char *str;
700 
701 	array = NULL;
702 
703 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zs|z", &arg_pattern, &string, &string_len, &array) == FAILURE) {
704 		RETURN_FALSE;
705 	}
706 
707 	options = MBREX(regex_default_options);
708 	if (icase) {
709 		options |= ONIG_OPTION_IGNORECASE;
710 	}
711 
712 	/* compile the regular expression from the supplied regex */
713 	if (Z_TYPE_PP(arg_pattern) != IS_STRING) {
714 		/* we convert numbers to integers and treat them as a string */
715 		if (Z_TYPE_PP(arg_pattern) == IS_DOUBLE) {
716 			convert_to_long_ex(arg_pattern);	/* get rid of decimal places */
717 		}
718 		convert_to_string_ex(arg_pattern);
719 		/* don't bother doing an extended regex with just a number */
720 	}
721 
722 	if (!Z_STRVAL_PP(arg_pattern) || Z_STRLEN_PP(arg_pattern) == 0) {
723 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "empty pattern");
724 		RETVAL_FALSE;
725 		goto out;
726 	}
727 
728 	re = php_mbregex_compile_pattern(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC);
729 	if (re == NULL) {
730 		RETVAL_FALSE;
731 		goto out;
732 	}
733 
734 	regs = onig_region_new();
735 
736 	/* actually execute the regular expression */
737 	if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
738 		RETVAL_FALSE;
739 		goto out;
740 	}
741 
742 	match_len = 1;
743 	str = string;
744 	if (array != NULL) {
745 		match_len = regs->end[0] - regs->beg[0];
746 		zval_dtor(array);
747 		array_init(array);
748 		for (i = 0; i < regs->num_regs; i++) {
749 			beg = regs->beg[i];
750 			end = regs->end[i];
751 			if (beg >= 0 && beg < end && end <= string_len) {
752 				add_index_stringl(array, i, (char *)&str[beg], end - beg, 1);
753 			} else {
754 				add_index_bool(array, i, 0);
755 			}
756 		}
757 	}
758 
759 	if (match_len == 0) {
760 		match_len = 1;
761 	}
762 	RETVAL_LONG(match_len);
763 out:
764 	if (regs != NULL) {
765 		onig_region_free(regs, 1);
766 	}
767 }
768 /* }}} */
769 
770 /* {{{ proto int mb_ereg(string pattern, string string [, array registers])
771    Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg)772 PHP_FUNCTION(mb_ereg)
773 {
774 	_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
775 }
776 /* }}} */
777 
778 /* {{{ proto int mb_eregi(string pattern, string string [, array registers])
779    Case-insensitive regular expression match for multibyte string */
PHP_FUNCTION(mb_eregi)780 PHP_FUNCTION(mb_eregi)
781 {
782 	_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
783 }
784 /* }}} */
785 
786 /* {{{ _php_mb_regex_ereg_replace_exec */
_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS,OnigOptionType options)787 static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options)
788 {
789 	zval **arg_pattern_zval;
790 
791 	char *arg_pattern;
792 	int arg_pattern_len;
793 
794 	char *replace;
795 	int replace_len;
796 
797 	char *string;
798 	int string_len;
799 
800 	char *p;
801 	php_mb_regex_t *re;
802 	OnigSyntaxType *syntax;
803 	OnigRegion *regs = NULL;
804 	smart_str out_buf = { 0 };
805 	smart_str eval_buf = { 0 };
806 	smart_str *pbuf;
807 	int i, err, eval, n;
808 	OnigUChar *pos;
809 	OnigUChar *string_lim;
810 	char *description = NULL;
811 	char pat_buf[2];
812 
813 	const mbfl_encoding *enc;
814 
815 	{
816 		const char *current_enc_name;
817 		current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
818 		if (current_enc_name == NULL ||
819 			(enc = mbfl_name2encoding(current_enc_name)) == NULL) {
820 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
821 			RETURN_FALSE;
822 		}
823 	}
824 	eval = 0;
825 	{
826 		char *option_str = NULL;
827 		int option_str_len = 0;
828 
829 		if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zss|s",
830 									&arg_pattern_zval,
831 									&replace, &replace_len,
832 									&string, &string_len,
833 									&option_str, &option_str_len) == FAILURE) {
834 			RETURN_FALSE;
835 		}
836 
837 		if (option_str != NULL) {
838 			_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
839 		} else {
840 			options |= MBREX(regex_default_options);
841 			syntax = MBREX(regex_default_syntax);
842 		}
843 	}
844 	if (Z_TYPE_PP(arg_pattern_zval) == IS_STRING) {
845 		arg_pattern = Z_STRVAL_PP(arg_pattern_zval);
846 		arg_pattern_len = Z_STRLEN_PP(arg_pattern_zval);
847 	} else {
848 		/* FIXME: this code is not multibyte aware! */
849 		convert_to_long_ex(arg_pattern_zval);
850 		pat_buf[0] = (char)Z_LVAL_PP(arg_pattern_zval);
851 		pat_buf[1] = '\0';
852 
853 		arg_pattern = pat_buf;
854 		arg_pattern_len = 1;
855 	}
856 	/* create regex pattern buffer */
857 	re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax TSRMLS_CC);
858 	if (re == NULL) {
859 		RETURN_FALSE;
860 	}
861 
862 	if (eval) {
863 		pbuf = &eval_buf;
864 		description = zend_make_compiled_string_description("mbregex replace" TSRMLS_CC);
865 	} else {
866 		pbuf = &out_buf;
867 		description = NULL;
868 	}
869 
870 	/* do the actual work */
871 	err = 0;
872 	pos = (OnigUChar *)string;
873 	string_lim = (OnigUChar*)(string + string_len);
874 	regs = onig_region_new();
875 	while (err >= 0) {
876 		err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
877 		if (err <= -2) {
878 			OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
879 			onig_error_code_to_str(err_str, err);
880 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
881 			break;
882 		}
883 		if (err >= 0) {
884 #if moriyoshi_0
885 			if (regs->beg[0] == regs->end[0]) {
886 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
887 				break;
888 			}
889 #endif
890 			/* copy the part of the string before the match */
891 			smart_str_appendl(&out_buf, pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
892 			/* copy replacement and backrefs */
893 			i = 0;
894 			p = replace;
895 			while (i < replace_len) {
896 				int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
897 				n = -1;
898 				if ((replace_len - i) >= 2 && fwd == 1 &&
899 					p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
900 					n = p[1] - '0';
901 				}
902 				if (n >= 0 && n < regs->num_regs) {
903 					if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && regs->end[n] <= string_len) {
904 						smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
905 					}
906 					p += 2;
907 					i += 2;
908 				} else {
909 					smart_str_appendl(pbuf, p, fwd);
910 					p += fwd;
911 					i += fwd;
912 				}
913 			}
914 			if (eval) {
915 				zval v;
916 				/* null terminate buffer */
917 				smart_str_0(&eval_buf);
918 				/* do eval */
919 				if (zend_eval_stringl(eval_buf.c, eval_buf.len, &v, description TSRMLS_CC) == FAILURE) {
920 					efree(description);
921 					php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, eval_buf.c);
922 					/* zend_error() does not return in this case */
923 				}
924 
925 				/* result of eval */
926 				convert_to_string(&v);
927 				smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
928 				/* Clean up */
929 				eval_buf.len = 0;
930 				zval_dtor(&v);
931 			}
932 			n = regs->end[0];
933 			if ((pos - (OnigUChar *)string) < n) {
934 				pos = (OnigUChar *)string + n;
935 			} else {
936 				if (pos < string_lim) {
937 					smart_str_appendl(&out_buf, pos, 1);
938 				}
939 				pos++;
940 			}
941 		} else { /* nomatch */
942 			/* stick that last bit of string on our output */
943 			if (string_lim - pos > 0) {
944 				smart_str_appendl(&out_buf, pos, string_lim - pos);
945 			}
946 		}
947 		onig_region_free(regs, 0);
948 	}
949 
950 	if (description) {
951 		efree(description);
952 	}
953 	if (regs != NULL) {
954 		onig_region_free(regs, 1);
955 	}
956 	smart_str_free(&eval_buf);
957 
958 	if (err <= -2) {
959 		smart_str_free(&out_buf);
960 		RETVAL_FALSE;
961 	} else {
962 		smart_str_appendc(&out_buf, '\0');
963 		RETVAL_STRINGL((char *)out_buf.c, out_buf.len - 1, 0);
964 	}
965 }
966 /* }}} */
967 
968 /* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option])
969    Replace regular expression for multibyte string */
PHP_FUNCTION(mb_ereg_replace)970 PHP_FUNCTION(mb_ereg_replace)
971 {
972 	_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
973 }
974 /* }}} */
975 
976 /* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string)
977    Case insensitive replace regular expression for multibyte string */
PHP_FUNCTION(mb_eregi_replace)978 PHP_FUNCTION(mb_eregi_replace)
979 {
980 	_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE);
981 }
982 /* }}} */
983 
984 /* {{{ proto array mb_split(string pattern, string string [, int limit])
985    split multibyte string into array by regular expression */
PHP_FUNCTION(mb_split)986 PHP_FUNCTION(mb_split)
987 {
988 	char *arg_pattern;
989 	int arg_pattern_len;
990 	php_mb_regex_t *re;
991 	OnigRegion *regs = NULL;
992 	char *string;
993 	OnigUChar *pos;
994 	int string_len;
995 
996 	int n, err;
997 	long count = -1;
998 
999 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
1000 		RETURN_FALSE;
1001 	}
1002 
1003 	if (count == 0) {
1004 		count = 1;
1005 	}
1006 
1007 	/* create regex pattern buffer */
1008 	if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
1009 		RETURN_FALSE;
1010 	}
1011 
1012 	array_init(return_value);
1013 
1014 	pos = (OnigUChar *)string;
1015 	err = 0;
1016 	regs = onig_region_new();
1017 	/* churn through str, generating array entries as we go */
1018 	while ((--count != 0) &&
1019 		   (err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0)) >= 0) {
1020 		if (regs->beg[0] == regs->end[0]) {
1021 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
1022 			break;
1023 		}
1024 
1025 		/* add it to the array */
1026 		if (regs->beg[0] < string_len && regs->beg[0] >= (pos - (OnigUChar *)string)) {
1027 			add_next_index_stringl(return_value, (char *)pos, ((OnigUChar *)(string + regs->beg[0]) - pos), 1);
1028 		} else {
1029 			err = -2;
1030 			break;
1031 		}
1032 		/* point at our new starting point */
1033 		n = regs->end[0];
1034 		if ((pos - (OnigUChar *)string) < n) {
1035 			pos = (OnigUChar *)string + n;
1036 		}
1037 		if (count < 0) {
1038 			count = 0;
1039 		}
1040 		onig_region_free(regs, 0);
1041 	}
1042 
1043 	onig_region_free(regs, 1);
1044 
1045 	/* see if we encountered an error */
1046 	if (err <= -2) {
1047 		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1048 		onig_error_code_to_str(err_str, err);
1049 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
1050 		zval_dtor(return_value);
1051 		RETURN_FALSE;
1052 	}
1053 
1054 	/* otherwise we just have one last element to add to the array */
1055 	n = ((OnigUChar *)(string + string_len) - pos);
1056 	if (n > 0) {
1057 		add_next_index_stringl(return_value, (char *)pos, n, 1);
1058 	} else {
1059 		add_next_index_stringl(return_value, "", 0, 1);
1060 	}
1061 }
1062 /* }}} */
1063 
1064 /* {{{ proto bool mb_ereg_match(string pattern, string string [,string option])
1065    Regular expression match for multibyte string */
PHP_FUNCTION(mb_ereg_match)1066 PHP_FUNCTION(mb_ereg_match)
1067 {
1068 	char *arg_pattern;
1069 	int arg_pattern_len;
1070 
1071 	char *string;
1072 	int string_len;
1073 
1074 	php_mb_regex_t *re;
1075 	OnigSyntaxType *syntax;
1076 	OnigOptionType option = 0;
1077 	int err;
1078 
1079 	{
1080 		char *option_str = NULL;
1081 		int option_str_len = 0;
1082 
1083 		if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s",
1084 		                          &arg_pattern, &arg_pattern_len, &string, &string_len,
1085 		                          &option_str, &option_str_len)==FAILURE) {
1086 			RETURN_FALSE;
1087 		}
1088 
1089 		if (option_str != NULL) {
1090 			_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
1091 		} else {
1092 			option |= MBREX(regex_default_options);
1093 			syntax = MBREX(regex_default_syntax);
1094 		}
1095 	}
1096 
1097 	if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
1098 		RETURN_FALSE;
1099 	}
1100 
1101 	/* match */
1102 	err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0);
1103 	if (err >= 0) {
1104 		RETVAL_TRUE;
1105 	} else {
1106 		RETVAL_FALSE;
1107 	}
1108 }
1109 /* }}} */
1110 
1111 /* regex search */
1112 /* {{{ _php_mb_regex_ereg_search_exec */
1113 static void
_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS,int mode)1114 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
1115 {
1116 	size_t argc = ZEND_NUM_ARGS();
1117 	char *arg_pattern, *arg_options;
1118 	int arg_pattern_len, arg_options_len;
1119 	int n, i, err, pos, len, beg, end;
1120 	OnigOptionType option;
1121 	OnigUChar *str;
1122 	OnigSyntaxType *syntax;
1123 
1124 	if (zend_parse_parameters(argc TSRMLS_CC, "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1125 		return;
1126 	}
1127 
1128 	option = MBREX(regex_default_options);
1129 
1130 	if (argc == 2) {
1131 		option = 0;
1132 		_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1133 	}
1134 
1135 	if (argc > 0) {
1136 		/* create regex pattern buffer */
1137 		if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
1138 			RETURN_FALSE;
1139 		}
1140 	}
1141 
1142 	pos = MBREX(search_pos);
1143 	str = NULL;
1144 	len = 0;
1145 	if (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING){
1146 		str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
1147 		len = Z_STRLEN_P(MBREX(search_str));
1148 	}
1149 
1150 	if (MBREX(search_re) == NULL) {
1151 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "No regex given");
1152 		RETURN_FALSE;
1153 	}
1154 
1155 	if (str == NULL) {
1156 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "No string given");
1157 		RETURN_FALSE;
1158 	}
1159 
1160 	if (MBREX(search_regs)) {
1161 		onig_region_free(MBREX(search_regs), 1);
1162 	}
1163 	MBREX(search_regs) = onig_region_new();
1164 
1165 	err = onig_search(MBREX(search_re), str, str + len, str + pos, str  + len, MBREX(search_regs), 0);
1166 	if (err == ONIG_MISMATCH) {
1167 		MBREX(search_pos) = len;
1168 		RETVAL_FALSE;
1169 	} else if (err <= -2) {
1170 		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1171 		onig_error_code_to_str(err_str, err);
1172 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
1173 		RETVAL_FALSE;
1174 	} else {
1175 		if (MBREX(search_regs)->beg[0] == MBREX(search_regs)->end[0]) {
1176 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
1177 		}
1178 		switch (mode) {
1179 		case 1:
1180 			array_init(return_value);
1181 			beg = MBREX(search_regs)->beg[0];
1182 			end = MBREX(search_regs)->end[0];
1183 			add_next_index_long(return_value, beg);
1184 			add_next_index_long(return_value, end - beg);
1185 			break;
1186 		case 2:
1187 			array_init(return_value);
1188 			n = MBREX(search_regs)->num_regs;
1189 			for (i = 0; i < n; i++) {
1190 				beg = MBREX(search_regs)->beg[i];
1191 				end = MBREX(search_regs)->end[i];
1192 				if (beg >= 0 && beg <= end && end <= len) {
1193 					add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
1194 				} else {
1195 					add_index_bool(return_value, i, 0);
1196 				}
1197 			}
1198 			break;
1199 		default:
1200 			RETVAL_TRUE;
1201 			break;
1202 		}
1203 		end = MBREX(search_regs)->end[0];
1204 		if (pos < end) {
1205 			MBREX(search_pos) = end;
1206 		} else {
1207 			MBREX(search_pos) = pos + 1;
1208 		}
1209 	}
1210 
1211 	if (err < 0) {
1212 		onig_region_free(MBREX(search_regs), 1);
1213 		MBREX(search_regs) = (OnigRegion *)NULL;
1214 	}
1215 }
1216 /* }}} */
1217 
1218 /* {{{ proto bool mb_ereg_search([string pattern[, string option]])
1219    Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search)1220 PHP_FUNCTION(mb_ereg_search)
1221 {
1222 	_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1223 }
1224 /* }}} */
1225 
1226 /* {{{ proto array mb_ereg_search_pos([string pattern[, string option]])
1227    Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_pos)1228 PHP_FUNCTION(mb_ereg_search_pos)
1229 {
1230 	_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1231 }
1232 /* }}} */
1233 
1234 /* {{{ proto array mb_ereg_search_regs([string pattern[, string option]])
1235    Regular expression search for multibyte string */
PHP_FUNCTION(mb_ereg_search_regs)1236 PHP_FUNCTION(mb_ereg_search_regs)
1237 {
1238 	_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
1239 }
1240 /* }}} */
1241 
1242 /* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]])
1243    Initialize string and regular expression for search. */
PHP_FUNCTION(mb_ereg_search_init)1244 PHP_FUNCTION(mb_ereg_search_init)
1245 {
1246 	size_t argc = ZEND_NUM_ARGS();
1247 	zval *arg_str;
1248 	char *arg_pattern = NULL, *arg_options = NULL;
1249 	int arg_pattern_len = 0, arg_options_len = 0;
1250 	OnigSyntaxType *syntax = NULL;
1251 	OnigOptionType option;
1252 
1253 	if (zend_parse_parameters(argc TSRMLS_CC, "z|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1254 		return;
1255 	}
1256 
1257 	if (argc > 1 && arg_pattern_len == 0) {
1258 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty pattern");
1259 		RETURN_FALSE;
1260 	}
1261 
1262 	option = MBREX(regex_default_options);
1263 	syntax = MBREX(regex_default_syntax);
1264 
1265 	if (argc == 3) {
1266 		option = 0;
1267 		_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1268 	}
1269 
1270 	if (argc > 1) {
1271 		/* create regex pattern buffer */
1272 		if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
1273 			RETURN_FALSE;
1274 		}
1275 	}
1276 
1277 	if (MBREX(search_str) != NULL) {
1278 		zval_ptr_dtor(&MBREX(search_str));
1279 		MBREX(search_str) = (zval *)NULL;
1280 	}
1281 
1282 	MBREX(search_str) = arg_str;
1283 	Z_ADDREF_P(MBREX(search_str));
1284 	SEPARATE_ZVAL_IF_NOT_REF(&MBREX(search_str));
1285 
1286 	MBREX(search_pos) = 0;
1287 
1288 	if (MBREX(search_regs) != NULL) {
1289 		onig_region_free(MBREX(search_regs), 1);
1290 		MBREX(search_regs) = (OnigRegion *) NULL;
1291 	}
1292 
1293 	RETURN_TRUE;
1294 }
1295 /* }}} */
1296 
1297 /* {{{ proto array mb_ereg_search_getregs(void)
1298    Get matched substring of the last time */
PHP_FUNCTION(mb_ereg_search_getregs)1299 PHP_FUNCTION(mb_ereg_search_getregs)
1300 {
1301 	int n, i, len, beg, end;
1302 	OnigUChar *str;
1303 
1304 	if (MBREX(search_regs) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && Z_STRVAL_P(MBREX(search_str)) != NULL) {
1305 		array_init(return_value);
1306 
1307 		str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
1308 		len = Z_STRLEN_P(MBREX(search_str));
1309 		n = MBREX(search_regs)->num_regs;
1310 		for (i = 0; i < n; i++) {
1311 			beg = MBREX(search_regs)->beg[i];
1312 			end = MBREX(search_regs)->end[i];
1313 			if (beg >= 0 && beg <= end && end <= len) {
1314 				add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
1315 			} else {
1316 				add_index_bool(return_value, i, 0);
1317 			}
1318 		}
1319 	} else {
1320 		RETVAL_FALSE;
1321 	}
1322 }
1323 /* }}} */
1324 
1325 /* {{{ proto int mb_ereg_search_getpos(void)
1326    Get search start position */
PHP_FUNCTION(mb_ereg_search_getpos)1327 PHP_FUNCTION(mb_ereg_search_getpos)
1328 {
1329 	RETVAL_LONG(MBREX(search_pos));
1330 }
1331 /* }}} */
1332 
1333 /* {{{ proto bool mb_ereg_search_setpos(int position)
1334    Set search start position */
PHP_FUNCTION(mb_ereg_search_setpos)1335 PHP_FUNCTION(mb_ereg_search_setpos)
1336 {
1337 	long position;
1338 
1339 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &position) == FAILURE) {
1340 		return;
1341 	}
1342 
1343 	if (position < 0 || (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && position >= Z_STRLEN_P(MBREX(search_str)))) {
1344 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Position is out of range");
1345 		MBREX(search_pos) = 0;
1346 		RETURN_FALSE;
1347 	}
1348 
1349 	MBREX(search_pos) = position;
1350 	RETURN_TRUE;
1351 }
1352 /* }}} */
1353 
1354 /* {{{ php_mb_regex_set_options */
_php_mb_regex_set_options(OnigOptionType options,OnigSyntaxType * syntax,OnigOptionType * prev_options,OnigSyntaxType ** prev_syntax TSRMLS_DC)1355 static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC)
1356 {
1357 	if (prev_options != NULL) {
1358 		*prev_options = MBREX(regex_default_options);
1359 	}
1360 	if (prev_syntax != NULL) {
1361 		*prev_syntax = MBREX(regex_default_syntax);
1362 	}
1363 	MBREX(regex_default_options) = options;
1364 	MBREX(regex_default_syntax) = syntax;
1365 }
1366 /* }}} */
1367 
1368 /* {{{ proto string mb_regex_set_options([string options])
1369    Set or get the default options for mbregex functions */
PHP_FUNCTION(mb_regex_set_options)1370 PHP_FUNCTION(mb_regex_set_options)
1371 {
1372 	OnigOptionType opt;
1373 	OnigSyntaxType *syntax;
1374 	char *string = NULL;
1375 	int string_len;
1376 	char buf[16];
1377 
1378 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s",
1379 	                          &string, &string_len) == FAILURE) {
1380 		RETURN_FALSE;
1381 	}
1382 	if (string != NULL) {
1383 		opt = 0;
1384 		syntax = NULL;
1385 		_php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
1386 		_php_mb_regex_set_options(opt, syntax, NULL, NULL TSRMLS_CC);
1387 	} else {
1388 		opt = MBREX(regex_default_options);
1389 		syntax = MBREX(regex_default_syntax);
1390 	}
1391 	_php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
1392 
1393 	RETVAL_STRING(buf, 1);
1394 }
1395 /* }}} */
1396 
1397 #endif	/* HAVE_MBREGEX */
1398 
1399 /*
1400  * Local variables:
1401  * tab-width: 4
1402  * c-basic-offset: 4
1403  * End:
1404  * vim600: fdm=marker
1405  * vim: noet sw=4 ts=4
1406  */
1407