xref: /PHP-8.0/ext/mbstring/mbstring.c (revision 2eb2f9d7)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
14    |         Rui Hirokawa <hirokawa@php.net>                              |
15    |         Hironori Sato <satoh@jpnnet.com>                             |
16    |         Shigeru Kanemoto <sgk@happysize.co.jp>                       |
17    +----------------------------------------------------------------------+
18 */
19 
20 /* {{{ includes */
21 #include "libmbfl/config.h"
22 #include "php.h"
23 #include "php_ini.h"
24 #include "php_variables.h"
25 #include "mbstring.h"
26 #include "ext/standard/php_string.h"
27 #include "ext/standard/php_mail.h"
28 #include "ext/standard/exec.h"
29 #include "ext/standard/url.h"
30 #include "main/php_output.h"
31 #include "ext/standard/info.h"
32 #include "ext/pcre/php_pcre.h"
33 
34 #include "libmbfl/mbfl/mbfilter_8bit.h"
35 #include "libmbfl/mbfl/mbfilter_pass.h"
36 #include "libmbfl/mbfl/mbfilter_wchar.h"
37 #include "libmbfl/filters/mbfilter_ascii.h"
38 #include "libmbfl/filters/mbfilter_base64.h"
39 #include "libmbfl/filters/mbfilter_qprint.h"
40 #include "libmbfl/filters/mbfilter_ucs4.h"
41 #include "libmbfl/filters/mbfilter_utf8.h"
42 #include "libmbfl/filters/mbfilter_tl_jisx0201_jisx0208.h"
43 
44 #include "php_variables.h"
45 #include "php_globals.h"
46 #include "rfc1867.h"
47 #include "php_content_types.h"
48 #include "SAPI.h"
49 #include "php_unicode.h"
50 #include "TSRM.h"
51 
52 #include "mb_gpc.h"
53 
54 #ifdef HAVE_MBREGEX
55 # include "php_mbregex.h"
56 #endif
57 
58 #include "zend_multibyte.h"
59 #include "mbstring_arginfo.h"
60 /* }}} */
61 
62 /* {{{ prototypes */
63 ZEND_DECLARE_MODULE_GLOBALS(mbstring)
64 
65 static PHP_GINIT_FUNCTION(mbstring);
66 static PHP_GSHUTDOWN_FUNCTION(mbstring);
67 
68 static void php_mb_populate_current_detect_order_list(void);
69 
70 static int php_mb_encoding_translation(void);
71 
72 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size);
73 
74 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
75 
76 static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
77 
78 static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
79 /* }}} */
80 
81 /* {{{ php_mb_default_identify_list */
82 typedef struct _php_mb_nls_ident_list {
83 	enum mbfl_no_language lang;
84 	const enum mbfl_no_encoding *list;
85 	size_t list_size;
86 } php_mb_nls_ident_list;
87 
88 static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
89 	mbfl_no_encoding_ascii,
90 	mbfl_no_encoding_jis,
91 	mbfl_no_encoding_utf8,
92 	mbfl_no_encoding_euc_jp,
93 	mbfl_no_encoding_sjis
94 };
95 
96 static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
97 	mbfl_no_encoding_ascii,
98 	mbfl_no_encoding_utf8,
99 	mbfl_no_encoding_euc_cn,
100 	mbfl_no_encoding_cp936
101 };
102 
103 static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
104 	mbfl_no_encoding_ascii,
105 	mbfl_no_encoding_utf8,
106 	mbfl_no_encoding_euc_tw,
107 	mbfl_no_encoding_big5
108 };
109 
110 static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
111 	mbfl_no_encoding_ascii,
112 	mbfl_no_encoding_utf8,
113 	mbfl_no_encoding_euc_kr,
114 	mbfl_no_encoding_uhc
115 };
116 
117 static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
118 	mbfl_no_encoding_ascii,
119 	mbfl_no_encoding_utf8,
120 	mbfl_no_encoding_koi8r,
121 	mbfl_no_encoding_cp1251,
122 	mbfl_no_encoding_cp866
123 };
124 
125 static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
126 	mbfl_no_encoding_ascii,
127 	mbfl_no_encoding_utf8,
128 	mbfl_no_encoding_armscii8
129 };
130 
131 static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
132 	mbfl_no_encoding_ascii,
133 	mbfl_no_encoding_utf8,
134 	mbfl_no_encoding_cp1254,
135 	mbfl_no_encoding_8859_9
136 };
137 
138 static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
139 	mbfl_no_encoding_ascii,
140 	mbfl_no_encoding_utf8,
141 	mbfl_no_encoding_koi8u
142 };
143 
144 static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
145 	mbfl_no_encoding_ascii,
146 	mbfl_no_encoding_utf8
147 };
148 
149 
150 static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
151 	{ mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
152 	{ mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
153 	{ mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
154 	{ mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
155 	{ mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
156 	{ mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
157 	{ mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
158 	{ mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
159 	{ mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
160 };
161 
162 /* }}} */
163 
164 /* {{{ mbstring_deps[] */
165 static const zend_module_dep mbstring_deps[] = {
166 	ZEND_MOD_REQUIRED("pcre")
167 	ZEND_MOD_END
168 };
169 /* }}} */
170 
171 /* {{{ zend_module_entry mbstring_module_entry */
172 zend_module_entry mbstring_module_entry = {
173 	STANDARD_MODULE_HEADER_EX,
174 	NULL,
175 	mbstring_deps,
176 	"mbstring",
177 	ext_functions,
178 	PHP_MINIT(mbstring),
179 	PHP_MSHUTDOWN(mbstring),
180 	PHP_RINIT(mbstring),
181 	PHP_RSHUTDOWN(mbstring),
182 	PHP_MINFO(mbstring),
183 	PHP_MBSTRING_VERSION,
184 	PHP_MODULE_GLOBALS(mbstring),
185 	PHP_GINIT(mbstring),
186 	PHP_GSHUTDOWN(mbstring),
187 	NULL,
188 	STANDARD_MODULE_PROPERTIES_EX
189 };
190 /* }}} */
191 
192 /* {{{ static sapi_post_entry php_post_entries[] */
193 static const sapi_post_entry php_post_entries[] = {
194 	{ DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data,	php_std_post_handler },
195 	{ MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
196 	{ NULL, 0, NULL, NULL }
197 };
198 /* }}} */
199 
200 #ifdef COMPILE_DL_MBSTRING
201 #ifdef ZTS
202 ZEND_TSRMLS_CACHE_DEFINE()
203 #endif
204 ZEND_GET_MODULE(mbstring)
205 #endif
206 
207 /* {{{ static sapi_post_entry mbstr_post_entries[] */
208 static const sapi_post_entry mbstr_post_entries[] = {
209 	{ DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
210 	{ MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
211 	{ NULL, 0, NULL, NULL }
212 };
213 /* }}} */
214 
php_mb_get_encoding(zend_string * encoding_name,uint32_t arg_num)215 static const mbfl_encoding *php_mb_get_encoding(zend_string *encoding_name, uint32_t arg_num) {
216 	if (encoding_name) {
217 		const mbfl_encoding *encoding;
218 		zend_string *last_encoding_name = MBSTRG(last_used_encoding_name);
219 		if (last_encoding_name && (last_encoding_name == encoding_name
220 				|| !strcasecmp(ZSTR_VAL(encoding_name), ZSTR_VAL(last_encoding_name)))) {
221 			return MBSTRG(last_used_encoding);
222 		}
223 
224 		encoding = mbfl_name2encoding(ZSTR_VAL(encoding_name));
225 		if (!encoding) {
226 			zend_argument_value_error(arg_num, "must be a valid encoding, \"%s\" given", ZSTR_VAL(encoding_name));
227 			return NULL;
228 		}
229 
230 		if (last_encoding_name) {
231 			zend_string_release(last_encoding_name);
232 		}
233 		MBSTRG(last_used_encoding_name) = zend_string_copy(encoding_name);
234 		MBSTRG(last_used_encoding) = encoding;
235 		return encoding;
236 	} else {
237 		return MBSTRG(current_internal_encoding);
238 	}
239 }
240 
php_mb_get_encoding_or_pass(const char * encoding_name)241 static const mbfl_encoding *php_mb_get_encoding_or_pass(const char *encoding_name) {
242 	if (strcmp(encoding_name, "pass") == 0) {
243 		return &mbfl_encoding_pass;
244 	}
245 
246 	return mbfl_name2encoding(encoding_name);
247 }
248 
count_commas(const char * p,const char * end)249 static size_t count_commas(const char *p, const char *end) {
250 	size_t count = 0;
251 	while ((p = memchr(p, ',', end - p))) {
252 		count++;
253 		p++;
254 	}
255 	return count;
256 }
257 
258 /* {{{ static zend_result php_mb_parse_encoding_list()
259  *  Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
260  * 	Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0.
261  */
php_mb_parse_encoding_list(const char * value,size_t value_length,const mbfl_encoding *** return_list,size_t * return_size,bool persistent,uint32_t arg_num,zend_bool allow_pass_encoding)262 static zend_result php_mb_parse_encoding_list(const char *value, size_t value_length,
263 	const mbfl_encoding ***return_list, size_t *return_size, bool persistent, uint32_t arg_num,
264 	zend_bool allow_pass_encoding)
265 {
266 	if (value == NULL || value_length == 0) {
267 		*return_list = NULL;
268 		*return_size = 0;
269 		return SUCCESS;
270 	} else {
271 		zend_bool included_auto;
272 		size_t n, size;
273 		char *p1, *endp, *tmpstr;
274 		const mbfl_encoding **entry, **list;
275 
276 		/* copy the value string for work */
277 		if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
278 			tmpstr = (char *)estrndup(value+1, value_length-2);
279 			value_length -= 2;
280 		} else {
281 			tmpstr = (char *)estrndup(value, value_length);
282 		}
283 
284 		endp = tmpstr + value_length;
285 		size = 1 + count_commas(tmpstr, endp) + MBSTRG(default_detect_order_list_size);
286 		list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
287 		entry = list;
288 		n = 0;
289 		included_auto = 0;
290 		p1 = tmpstr;
291 		while (1) {
292 			char *comma = (char *) php_memnstr(p1, ",", 1, endp);
293 			char *p = comma ? comma : endp;
294 			*p = '\0';
295 			/* trim spaces */
296 			while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
297 				p1++;
298 			}
299 			p--;
300 			while (p > p1 && (*p == ' ' || *p == '\t')) {
301 				*p = '\0';
302 				p--;
303 			}
304 			/* convert to the encoding number and check encoding */
305 			if (strcasecmp(p1, "auto") == 0) {
306 				if (!included_auto) {
307 					const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
308 					const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
309 					size_t i;
310 					included_auto = 1;
311 					for (i = 0; i < identify_list_size; i++) {
312 						*entry++ = mbfl_no2encoding(*src++);
313 						n++;
314 					}
315 				}
316 			} else {
317 				const mbfl_encoding *encoding =
318 					allow_pass_encoding ? php_mb_get_encoding_or_pass(p1) : mbfl_name2encoding(p1);
319 				if (!encoding) {
320 					/* Called from an INI setting modification */
321 					if (arg_num == 0) {
322 						php_error_docref("ref.mbstring", E_WARNING, "INI setting contains invalid encoding \"%s\"", p1);
323 					} else {
324 						zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", p1);
325 					}
326 					efree(tmpstr);
327 					pefree(ZEND_VOIDP(list), persistent);
328 					return FAILURE;
329 				}
330 
331 				*entry++ = encoding;
332 				n++;
333 			}
334 			if (n >= size || comma == NULL) {
335 				break;
336 			}
337 			p1 = comma + 1;
338 		}
339 		*return_list = list;
340 		*return_size = n;
341 		efree(tmpstr);
342 	}
343 
344 	return SUCCESS;
345 }
346 /* }}} */
347 
348 /* {{{ static int php_mb_parse_encoding_array()
349  *  Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
350  * 	Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0.
351  */
php_mb_parse_encoding_array(HashTable * target_hash,const mbfl_encoding *** return_list,size_t * return_size,uint32_t arg_num)352 static int php_mb_parse_encoding_array(HashTable *target_hash, const mbfl_encoding ***return_list,
353 	size_t *return_size, uint32_t arg_num)
354 {
355 	/* Allocate enough space to include the default detect order if "auto" is used. */
356 	size_t size = zend_hash_num_elements(target_hash) + MBSTRG(default_detect_order_list_size);
357 	const mbfl_encoding **list = ecalloc(size, sizeof(mbfl_encoding*));
358 	const mbfl_encoding **entry = list;
359 	zend_bool included_auto = 0;
360 	size_t n = 0;
361 	zval *hash_entry;
362 	ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
363 		zend_string *encoding_str = zval_try_get_string(hash_entry);
364 		if (UNEXPECTED(!encoding_str)) {
365 			efree(ZEND_VOIDP(list));
366 			return FAILURE;
367 		}
368 
369 		if (strcasecmp(ZSTR_VAL(encoding_str), "auto") == 0) {
370 			if (!included_auto) {
371 				const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
372 				const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
373 				size_t j;
374 
375 				included_auto = 1;
376 				for (j = 0; j < identify_list_size; j++) {
377 					*entry++ = mbfl_no2encoding(*src++);
378 					n++;
379 				}
380 			}
381 		} else {
382 			const mbfl_encoding *encoding = mbfl_name2encoding(ZSTR_VAL(encoding_str));
383 			if (encoding) {
384 				*entry++ = encoding;
385 				n++;
386 			} else {
387 				zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", ZSTR_VAL(encoding_str));
388 				zend_string_release(encoding_str);
389 				efree(ZEND_VOIDP(list));
390 				return FAILURE;
391 			}
392 		}
393 		zend_string_release(encoding_str);
394 	} ZEND_HASH_FOREACH_END();
395 	*return_list = list;
396 	*return_size = n;
397 	return SUCCESS;
398 }
399 /* }}} */
400 
401 /* {{{ zend_multibyte interface */
php_mb_zend_encoding_fetcher(const char * encoding_name)402 static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name)
403 {
404 	return (const zend_encoding*)mbfl_name2encoding(encoding_name);
405 }
406 
php_mb_zend_encoding_name_getter(const zend_encoding * encoding)407 static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
408 {
409 	return ((const mbfl_encoding *)encoding)->name;
410 }
411 
php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding * _encoding)412 static bool php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
413 {
414 	const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
415 	if (encoding->flag & MBFL_ENCTYPE_SBCS) {
416 		return 1;
417 	}
418 	if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
419 		return 1;
420 	}
421 	return 0;
422 }
423 
php_mb_zend_encoding_detector(const unsigned char * arg_string,size_t arg_length,const zend_encoding ** list,size_t list_size)424 static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size)
425 {
426 	mbfl_string string;
427 
428 	if (!list) {
429 		list = (const zend_encoding **)MBSTRG(current_detect_order_list);
430 		list_size = MBSTRG(current_detect_order_list_size);
431 	}
432 
433 	mbfl_string_init(&string);
434 	string.val = (unsigned char *)arg_string;
435 	string.len = arg_length;
436 	return (const zend_encoding *) mbfl_identify_encoding(&string, (const mbfl_encoding **)list, list_size, 0);
437 }
438 
php_mb_zend_encoding_converter(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length,const zend_encoding * encoding_to,const zend_encoding * encoding_from)439 static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
440 {
441 	mbfl_string string, result;
442 	mbfl_buffer_converter *convd;
443 
444 	/* new encoding */
445 	/* initialize string */
446 	string.encoding = (const mbfl_encoding*)encoding_from;
447 	string.val = (unsigned char*)from;
448 	string.len = from_length;
449 
450 	/* initialize converter */
451 	convd = mbfl_buffer_converter_new((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
452 	if (convd == NULL) {
453 		return (size_t) -1;
454 	}
455 
456 	mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
457 	mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
458 
459 	/* do it */
460 	size_t loc = mbfl_buffer_converter_feed(convd, &string);
461 
462 	mbfl_buffer_converter_flush(convd);
463 	mbfl_string_init(&result);
464 	if (!mbfl_buffer_converter_result(convd, &result)) {
465 		mbfl_buffer_converter_delete(convd);
466 		return (size_t)-1;
467 	}
468 
469 	*to = result.val;
470 	*to_length = result.len;
471 
472 	mbfl_buffer_converter_delete(convd);
473 
474 	return loc;
475 }
476 
php_mb_zend_encoding_list_parser(const char * encoding_list,size_t encoding_list_len,const zend_encoding *** return_list,size_t * return_size,bool persistent)477 static zend_result php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, bool persistent)
478 {
479 	return php_mb_parse_encoding_list(
480 		encoding_list, encoding_list_len,
481 		(const mbfl_encoding ***)return_list, return_size,
482 		persistent, /* arg_num */ 0, /* allow_pass_encoding */ 1);
483 }
484 
php_mb_zend_internal_encoding_getter(void)485 static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
486 {
487 	return (const zend_encoding *)MBSTRG(internal_encoding);
488 }
489 
php_mb_zend_internal_encoding_setter(const zend_encoding * encoding)490 static zend_result php_mb_zend_internal_encoding_setter(const zend_encoding *encoding)
491 {
492 	MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
493 	return SUCCESS;
494 }
495 
496 static zend_multibyte_functions php_mb_zend_multibyte_functions = {
497 	"mbstring",
498 	php_mb_zend_encoding_fetcher,
499 	php_mb_zend_encoding_name_getter,
500 	php_mb_zend_encoding_lexer_compatibility_checker,
501 	php_mb_zend_encoding_detector,
502 	php_mb_zend_encoding_converter,
503 	php_mb_zend_encoding_list_parser,
504 	php_mb_zend_internal_encoding_getter,
505 	php_mb_zend_internal_encoding_setter
506 };
507 /* }}} */
508 
509 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern)510 static void *_php_mb_compile_regex(const char *pattern)
511 {
512 	pcre2_code *retval;
513 	PCRE2_SIZE err_offset;
514 	int errnum;
515 
516 	if (!(retval = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED,
517 			PCRE2_CASELESS, &errnum, &err_offset, php_pcre_cctx()))) {
518 		PCRE2_UCHAR err_str[128];
519 		pcre2_get_error_message(errnum, err_str, sizeof(err_str));
520 		php_error_docref(NULL, E_WARNING, "%s (offset=%zu): %s", pattern, err_offset, err_str);
521 	}
522 	return retval;
523 }
524 /* }}} */
525 
526 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)527 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
528 {
529 	int res;
530 
531 	pcre2_match_data *match_data = php_pcre_create_match_data(0, opaque);
532 	if (NULL == match_data) {
533 		pcre2_code_free(opaque);
534 		php_error_docref(NULL, E_WARNING, "Cannot allocate match data");
535 		return FAILURE;
536 	}
537 	res = pcre2_match(opaque, (PCRE2_SPTR)str, str_len, 0, 0, match_data, php_pcre_mctx()) >= 0;
538 	php_pcre_free_match_data(match_data);
539 
540 	return res;
541 }
542 /* }}} */
543 
544 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)545 static void _php_mb_free_regex(void *opaque)
546 {
547 	pcre2_code_free(opaque);
548 }
549 /* }}} */
550 
551 /* {{{ php_mb_nls_get_default_detect_order_list */
php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,enum mbfl_no_encoding ** plist,size_t * plist_size)552 static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
553 {
554 	size_t i;
555 
556 	*plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
557 	*plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
558 
559 	for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
560 		if (php_mb_default_identify_list[i].lang == lang) {
561 			*plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
562 			*plist_size = php_mb_default_identify_list[i].list_size;
563 			return 1;
564 		}
565 	}
566 	return 0;
567 }
568 /* }}} */
569 
php_mb_rfc1867_substring_conf(const zend_encoding * encoding,char * start,size_t len,char quote)570 static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, size_t len, char quote)
571 {
572 	char *result = emalloc(len + 2);
573 	char *resp = result;
574 	size_t i;
575 
576 	for (i = 0; i < len && start[i] != quote; ++i) {
577 		if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
578 			*resp++ = start[++i];
579 		} else {
580 			size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
581 
582 			while (j-- > 0 && i < len) {
583 				*resp++ = start[i++];
584 			}
585 			--i;
586 		}
587 	}
588 
589 	*resp = '\0';
590 	return result;
591 }
592 
php_mb_rfc1867_getword(const zend_encoding * encoding,char ** line,char stop)593 static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop) /* {{{ */
594 {
595 	char *pos = *line, quote;
596 	char *res;
597 
598 	while (*pos && *pos != stop) {
599 		if ((quote = *pos) == '"' || quote == '\'') {
600 			++pos;
601 			while (*pos && *pos != quote) {
602 				if (*pos == '\\' && pos[1] && pos[1] == quote) {
603 					pos += 2;
604 				} else {
605 					++pos;
606 				}
607 			}
608 			if (*pos) {
609 				++pos;
610 			}
611 		} else {
612 			pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
613 
614 		}
615 	}
616 	if (*pos == '\0') {
617 		res = estrdup(*line);
618 		*line += strlen(*line);
619 		return res;
620 	}
621 
622 	res = estrndup(*line, pos - *line);
623 
624 	while (*pos == stop) {
625 		pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
626 	}
627 
628 	*line = pos;
629 	return res;
630 }
631 /* }}} */
632 
php_mb_rfc1867_getword_conf(const zend_encoding * encoding,char * str)633 static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str) /* {{{ */
634 {
635 	while (*str && isspace(*(unsigned char *)str)) {
636 		++str;
637 	}
638 
639 	if (!*str) {
640 		return estrdup("");
641 	}
642 
643 	if (*str == '"' || *str == '\'') {
644 		char quote = *str;
645 
646 		str++;
647 		return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote);
648 	} else {
649 		char *strend = str;
650 
651 		while (*strend && !isspace(*(unsigned char *)strend)) {
652 			++strend;
653 		}
654 		return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0);
655 	}
656 }
657 /* }}} */
658 
php_mb_rfc1867_basename(const zend_encoding * encoding,char * filename)659 static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename) /* {{{ */
660 {
661 	char *s, *s2;
662 	const size_t filename_len = strlen(filename);
663 
664 	/* The \ check should technically be needed for win32 systems only where
665 	 * it is a valid path separator. However, IE in all it's wisdom always sends
666 	 * the full path of the file on the user's filesystem, which means that unless
667 	 * the user does basename() they get a bogus file name. Until IE's user base drops
668 	 * to nill or problem is fixed this code must remain enabled for all systems. */
669 	s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
670 	s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
671 
672 	if (s && s2) {
673 		if (s > s2) {
674 			return ++s;
675 		} else {
676 			return ++s2;
677 		}
678 	} else if (s) {
679 		return ++s;
680 	} else if (s2) {
681 		return ++s2;
682 	} else {
683 		return filename;
684 	}
685 }
686 /* }}} */
687 
688 /* {{{ php.ini directive handler */
689 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
PHP_INI_MH(OnUpdate_mbstring_language)690 static PHP_INI_MH(OnUpdate_mbstring_language)
691 {
692 	enum mbfl_no_language no_language;
693 
694 	no_language = mbfl_name2no_language(ZSTR_VAL(new_value));
695 	if (no_language == mbfl_no_language_invalid) {
696 		MBSTRG(language) = mbfl_no_language_neutral;
697 		return FAILURE;
698 	}
699 	MBSTRG(language) = no_language;
700 	php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
701 	return SUCCESS;
702 }
703 /* }}} */
704 
705 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
PHP_INI_MH(OnUpdate_mbstring_detect_order)706 static PHP_INI_MH(OnUpdate_mbstring_detect_order)
707 {
708 	const mbfl_encoding **list;
709 	size_t size;
710 
711 	if (!new_value) {
712 		if (MBSTRG(detect_order_list)) {
713 			pefree(ZEND_VOIDP(MBSTRG(detect_order_list)), 1);
714 		}
715 		MBSTRG(detect_order_list) = NULL;
716 		MBSTRG(detect_order_list_size) = 0;
717 		return SUCCESS;
718 	}
719 
720 	if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 0) || size == 0) {
721 		return FAILURE;
722 	}
723 
724 	if (MBSTRG(detect_order_list)) {
725 		pefree(ZEND_VOIDP(MBSTRG(detect_order_list)), 1);
726 	}
727 	MBSTRG(detect_order_list) = list;
728 	MBSTRG(detect_order_list_size) = size;
729 	return SUCCESS;
730 }
731 /* }}} */
732 
_php_mb_ini_mbstring_http_input_set(const char * new_value,size_t new_value_length)733 static int _php_mb_ini_mbstring_http_input_set(const char *new_value, size_t new_value_length) {
734 	const mbfl_encoding **list;
735 	size_t size;
736 	if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 1) || size == 0) {
737 		return FAILURE;
738 	}
739 	if (MBSTRG(http_input_list)) {
740 		pefree(ZEND_VOIDP(MBSTRG(http_input_list)), 1);
741 	}
742 	MBSTRG(http_input_list) = list;
743 	MBSTRG(http_input_list_size) = size;
744 	return SUCCESS;
745 }
746 
747 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
PHP_INI_MH(OnUpdate_mbstring_http_input)748 static PHP_INI_MH(OnUpdate_mbstring_http_input)
749 {
750 	if (new_value) {
751 		php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_input is deprecated");
752 	}
753 
754 	if (!new_value || !ZSTR_VAL(new_value)) {
755 		const char *encoding = php_get_input_encoding();
756 		MBSTRG(http_input_set) = 0;
757 		_php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding));
758 		return SUCCESS;
759 	}
760 
761 	MBSTRG(http_input_set) = 1;
762 	return _php_mb_ini_mbstring_http_input_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
763 }
764 /* }}} */
765 
_php_mb_ini_mbstring_http_output_set(const char * new_value)766 static int _php_mb_ini_mbstring_http_output_set(const char *new_value) {
767 	const mbfl_encoding *encoding = php_mb_get_encoding_or_pass(new_value);
768 	if (!encoding) {
769 		return FAILURE;
770 	}
771 
772 	MBSTRG(http_output_encoding) = encoding;
773 	MBSTRG(current_http_output_encoding) = encoding;
774 	return SUCCESS;
775 }
776 
777 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
PHP_INI_MH(OnUpdate_mbstring_http_output)778 static PHP_INI_MH(OnUpdate_mbstring_http_output)
779 {
780 	if (new_value) {
781 		php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_output is deprecated");
782 	}
783 
784 	if (new_value == NULL || ZSTR_LEN(new_value) == 0) {
785 		MBSTRG(http_output_set) = 0;
786 		_php_mb_ini_mbstring_http_output_set(php_get_output_encoding());
787 		return SUCCESS;
788 	}
789 
790 	MBSTRG(http_output_set) = 1;
791 	return _php_mb_ini_mbstring_http_output_set(ZSTR_VAL(new_value));
792 }
793 /* }}} */
794 
795 /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
_php_mb_ini_mbstring_internal_encoding_set(const char * new_value,size_t new_value_length)796 static int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, size_t new_value_length)
797 {
798 	const mbfl_encoding *encoding;
799 
800 	if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) {
801 		/* falls back to UTF-8 if an unknown encoding name is given */
802 		if (new_value) {
803 			php_error_docref("ref.mbstring", E_WARNING, "Unknown encoding \"%s\" in ini setting", new_value);
804 		}
805 		encoding = &mbfl_encoding_utf8;
806 	}
807 	MBSTRG(internal_encoding) = encoding;
808 	MBSTRG(current_internal_encoding) = encoding;
809 #ifdef HAVE_MBREGEX
810 	{
811 		const char *enc_name = new_value;
812 		if (FAILURE == php_mb_regex_set_default_mbctype(enc_name)) {
813 			/* falls back to UTF-8 if an unknown encoding name is given */
814 			enc_name = "UTF-8";
815 			php_mb_regex_set_default_mbctype(enc_name);
816 		}
817 		php_mb_regex_set_mbctype(new_value);
818 	}
819 #endif
820 	return SUCCESS;
821 }
822 /* }}} */
823 
824 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
PHP_INI_MH(OnUpdate_mbstring_internal_encoding)825 static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
826 {
827 	if (new_value) {
828 		php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
829 	}
830 
831 	if (OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage) == FAILURE) {
832 		return FAILURE;
833 	}
834 
835 	if (new_value && ZSTR_LEN(new_value)) {
836 		MBSTRG(internal_encoding_set) = 1;
837 		return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
838 	} else {
839 		const char *encoding = php_get_internal_encoding();
840 		MBSTRG(internal_encoding_set) = 0;
841 		return _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding));
842 	}
843 }
844 /* }}} */
845 
846 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
PHP_INI_MH(OnUpdate_mbstring_substitute_character)847 static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
848 {
849 	int c;
850 	char *endptr = NULL;
851 
852 	if (new_value != NULL) {
853 		if (strcasecmp("none", ZSTR_VAL(new_value)) == 0) {
854 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
855 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
856 		} else if (strcasecmp("long", ZSTR_VAL(new_value)) == 0) {
857 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
858 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
859 		} else if (strcasecmp("entity", ZSTR_VAL(new_value)) == 0) {
860 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
861 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
862 		} else {
863 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
864 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
865 			if (ZSTR_LEN(new_value) > 0) {
866 				c = strtol(ZSTR_VAL(new_value), &endptr, 0);
867 				if (*endptr == '\0') {
868 					MBSTRG(filter_illegal_substchar) = c;
869 					MBSTRG(current_filter_illegal_substchar) = c;
870 				}
871 			}
872 		}
873 	} else {
874 		MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
875 		MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
876 		MBSTRG(filter_illegal_substchar) = 0x3f;	/* '?' */
877 		MBSTRG(current_filter_illegal_substchar) = 0x3f;	/* '?' */
878 	}
879 
880 	return SUCCESS;
881 }
882 /* }}} */
883 
884 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
PHP_INI_MH(OnUpdate_mbstring_encoding_translation)885 static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
886 {
887 	if (new_value == NULL) {
888 		return FAILURE;
889 	}
890 
891 	OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
892 
893 	if (MBSTRG(encoding_translation)) {
894 		sapi_unregister_post_entry(php_post_entries);
895 		sapi_register_post_entries(mbstr_post_entries);
896 	} else {
897 		sapi_unregister_post_entry(mbstr_post_entries);
898 		sapi_register_post_entries(php_post_entries);
899 	}
900 
901 	return SUCCESS;
902 }
903 /* }}} */
904 
905 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)906 static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
907 {
908 	zend_string *tmp;
909 	void *re = NULL;
910 
911 	if (!new_value) {
912 		new_value = entry->orig_value;
913 	}
914 	tmp = php_trim(new_value, NULL, 0, 3);
915 
916 	if (ZSTR_LEN(tmp) > 0) {
917 		if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) {
918 			zend_string_release_ex(tmp, 0);
919 			return FAILURE;
920 		}
921 	}
922 
923 	if (MBSTRG(http_output_conv_mimetypes)) {
924 		_php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
925 	}
926 
927 	MBSTRG(http_output_conv_mimetypes) = re;
928 
929 	zend_string_release_ex(tmp, 0);
930 	return SUCCESS;
931 }
932 /* }}} */
933 /* }}} */
934 
935 /* {{{ php.ini directive registration */
936 PHP_INI_BEGIN()
937 	PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
938 	PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
939 	PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
940 	PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
941 	STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
942 	PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
943 
944 	STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
945 		PHP_INI_SYSTEM | PHP_INI_PERDIR,
946 		OnUpdate_mbstring_encoding_translation,
947 		encoding_translation, zend_mbstring_globals, mbstring_globals)
948 	PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
949 		"^(text/|application/xhtml\\+xml)",
950 		PHP_INI_ALL,
951 		OnUpdate_mbstring_http_output_conv_mimetypes)
952 
953 	STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
954 		PHP_INI_ALL,
955 		OnUpdateBool,
956 		strict_detection, zend_mbstring_globals, mbstring_globals)
957 #ifdef HAVE_MBREGEX
958 	STD_PHP_INI_ENTRY("mbstring.regex_stack_limit", "100000",PHP_INI_ALL, OnUpdateLong, regex_stack_limit, zend_mbstring_globals, mbstring_globals)
959 	STD_PHP_INI_ENTRY("mbstring.regex_retry_limit", "1000000",PHP_INI_ALL, OnUpdateLong, regex_retry_limit, zend_mbstring_globals, mbstring_globals)
960 #endif
PHP_INI_END()961 PHP_INI_END()
962 /* }}} */
963 
964 static void mbstring_internal_encoding_changed_hook(void) {
965 	/* One of the internal_encoding / input_encoding / output_encoding ini settings changed. */
966 	if (!MBSTRG(internal_encoding_set)) {
967 		const char *encoding = php_get_internal_encoding();
968 		_php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding));
969 	}
970 
971 	if (!MBSTRG(http_output_set)) {
972 		const char *encoding = php_get_output_encoding();
973 		_php_mb_ini_mbstring_http_output_set(encoding);
974 	}
975 
976 	if (!MBSTRG(http_input_set)) {
977 		const char *encoding = php_get_input_encoding();
978 		_php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding));
979 	}
980 }
981 
982 /* {{{ module global initialize handler */
PHP_GINIT_FUNCTION(mbstring)983 static PHP_GINIT_FUNCTION(mbstring)
984 {
985 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
986 ZEND_TSRMLS_CACHE_UPDATE();
987 #endif
988 
989 	mbstring_globals->language = mbfl_no_language_uni;
990 	mbstring_globals->internal_encoding = NULL;
991 	mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
992 	mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
993 	mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
994 	mbstring_globals->http_input_identify = NULL;
995 	mbstring_globals->http_input_identify_get = NULL;
996 	mbstring_globals->http_input_identify_post = NULL;
997 	mbstring_globals->http_input_identify_cookie = NULL;
998 	mbstring_globals->http_input_identify_string = NULL;
999 	mbstring_globals->http_input_list = NULL;
1000 	mbstring_globals->http_input_list_size = 0;
1001 	mbstring_globals->detect_order_list = NULL;
1002 	mbstring_globals->detect_order_list_size = 0;
1003 	mbstring_globals->current_detect_order_list = NULL;
1004 	mbstring_globals->current_detect_order_list_size = 0;
1005 	mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1006 	mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1007 	mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1008 	mbstring_globals->filter_illegal_substchar = 0x3f;	/* '?' */
1009 	mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1010 	mbstring_globals->current_filter_illegal_substchar = 0x3f;	/* '?' */
1011 	mbstring_globals->illegalchars = 0;
1012 	mbstring_globals->encoding_translation = 0;
1013 	mbstring_globals->strict_detection = 0;
1014 	mbstring_globals->outconv = NULL;
1015 	mbstring_globals->http_output_conv_mimetypes = NULL;
1016 #ifdef HAVE_MBREGEX
1017 	mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc();
1018 #endif
1019 	mbstring_globals->last_used_encoding_name = NULL;
1020 	mbstring_globals->last_used_encoding = NULL;
1021 	mbstring_globals->internal_encoding_set = 0;
1022 	mbstring_globals->http_output_set = 0;
1023 	mbstring_globals->http_input_set = 0;
1024 }
1025 /* }}} */
1026 
1027 /* {{{ PHP_GSHUTDOWN_FUNCTION */
PHP_GSHUTDOWN_FUNCTION(mbstring)1028 static PHP_GSHUTDOWN_FUNCTION(mbstring)
1029 {
1030 	if (mbstring_globals->http_input_list) {
1031 		free(ZEND_VOIDP(mbstring_globals->http_input_list));
1032 	}
1033 	if (mbstring_globals->detect_order_list) {
1034 		free(ZEND_VOIDP(mbstring_globals->detect_order_list));
1035 	}
1036 	if (mbstring_globals->http_output_conv_mimetypes) {
1037 		_php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1038 	}
1039 #ifdef HAVE_MBREGEX
1040 	php_mb_regex_globals_free(mbstring_globals->mb_regex_globals);
1041 #endif
1042 }
1043 /* }}} */
1044 
1045 /* {{{ PHP_MINIT_FUNCTION(mbstring) */
PHP_MINIT_FUNCTION(mbstring)1046 PHP_MINIT_FUNCTION(mbstring)
1047 {
1048 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1049 ZEND_TSRMLS_CACHE_UPDATE();
1050 #endif
1051 
1052 	REGISTER_INI_ENTRIES();
1053 
1054 	/* We assume that we're the only user of the hook. */
1055 	ZEND_ASSERT(php_internal_encoding_changed == NULL);
1056 	php_internal_encoding_changed = mbstring_internal_encoding_changed_hook;
1057 	mbstring_internal_encoding_changed_hook();
1058 
1059 	/* This is a global handler. Should not be set in a per-request handler. */
1060 	sapi_register_treat_data(mbstr_treat_data);
1061 
1062 	/* Post handlers are stored in the thread-local context. */
1063 	if (MBSTRG(encoding_translation)) {
1064 		sapi_register_post_entries(mbstr_post_entries);
1065 	}
1066 
1067 	REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1068 	REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1069 	REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1070 	REGISTER_LONG_CONSTANT("MB_CASE_FOLD", PHP_UNICODE_CASE_FOLD, CONST_CS | CONST_PERSISTENT);
1071 	REGISTER_LONG_CONSTANT("MB_CASE_UPPER_SIMPLE", PHP_UNICODE_CASE_UPPER_SIMPLE, CONST_CS | CONST_PERSISTENT);
1072 	REGISTER_LONG_CONSTANT("MB_CASE_LOWER_SIMPLE", PHP_UNICODE_CASE_LOWER_SIMPLE, CONST_CS | CONST_PERSISTENT);
1073 	REGISTER_LONG_CONSTANT("MB_CASE_TITLE_SIMPLE", PHP_UNICODE_CASE_TITLE_SIMPLE, CONST_CS | CONST_PERSISTENT);
1074 	REGISTER_LONG_CONSTANT("MB_CASE_FOLD_SIMPLE", PHP_UNICODE_CASE_FOLD_SIMPLE, CONST_CS | CONST_PERSISTENT);
1075 
1076 #ifdef HAVE_MBREGEX
1077 	PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1078 #endif
1079 
1080 	if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions)) {
1081 		return FAILURE;
1082 	}
1083 
1084 	php_rfc1867_set_multibyte_callbacks(
1085 		php_mb_encoding_translation,
1086 		php_mb_gpc_get_detect_order,
1087 		php_mb_gpc_set_input_encoding,
1088 		php_mb_rfc1867_getword,
1089 		php_mb_rfc1867_getword_conf,
1090 		php_mb_rfc1867_basename);
1091 
1092 	return SUCCESS;
1093 }
1094 /* }}} */
1095 
1096 /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
PHP_MSHUTDOWN_FUNCTION(mbstring)1097 PHP_MSHUTDOWN_FUNCTION(mbstring)
1098 {
1099 	UNREGISTER_INI_ENTRIES();
1100 
1101 	zend_multibyte_restore_functions();
1102 
1103 #ifdef HAVE_MBREGEX
1104 	PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1105 #endif
1106 
1107 	php_internal_encoding_changed = NULL;
1108 
1109 	return SUCCESS;
1110 }
1111 /* }}} */
1112 
1113 /* {{{ PHP_RINIT_FUNCTION(mbstring) */
PHP_RINIT_FUNCTION(mbstring)1114 PHP_RINIT_FUNCTION(mbstring)
1115 {
1116 	MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1117 	MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1118 	MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1119 	MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1120 
1121 	MBSTRG(illegalchars) = 0;
1122 
1123 	php_mb_populate_current_detect_order_list();
1124 
1125 #ifdef HAVE_MBREGEX
1126 	PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1127 #endif
1128 	zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding));
1129 
1130 	return SUCCESS;
1131 }
1132 /* }}} */
1133 
1134 /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
PHP_RSHUTDOWN_FUNCTION(mbstring)1135 PHP_RSHUTDOWN_FUNCTION(mbstring)
1136 {
1137 	if (MBSTRG(current_detect_order_list) != NULL) {
1138 		efree(ZEND_VOIDP(MBSTRG(current_detect_order_list)));
1139 		MBSTRG(current_detect_order_list) = NULL;
1140 		MBSTRG(current_detect_order_list_size) = 0;
1141 	}
1142 	if (MBSTRG(outconv) != NULL) {
1143 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1144 		mbfl_buffer_converter_delete(MBSTRG(outconv));
1145 		MBSTRG(outconv) = NULL;
1146 	}
1147 
1148 	/* clear http input identification. */
1149 	MBSTRG(http_input_identify) = NULL;
1150 	MBSTRG(http_input_identify_post) = NULL;
1151 	MBSTRG(http_input_identify_get) = NULL;
1152 	MBSTRG(http_input_identify_cookie) = NULL;
1153 	MBSTRG(http_input_identify_string) = NULL;
1154 
1155 	if (MBSTRG(last_used_encoding_name)) {
1156 		zend_string_release(MBSTRG(last_used_encoding_name));
1157 		MBSTRG(last_used_encoding_name) = NULL;
1158 	}
1159 
1160 	MBSTRG(internal_encoding_set) = 0;
1161 	MBSTRG(http_output_set) = 0;
1162 	MBSTRG(http_input_set) = 0;
1163 
1164 #ifdef HAVE_MBREGEX
1165 	PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1166 #endif
1167 
1168 	return SUCCESS;
1169 }
1170 /* }}} */
1171 
1172 /* {{{ PHP_MINFO_FUNCTION(mbstring) */
PHP_MINFO_FUNCTION(mbstring)1173 PHP_MINFO_FUNCTION(mbstring)
1174 {
1175 	php_info_print_table_start();
1176 	php_info_print_table_row(2, "Multibyte Support", "enabled");
1177 	php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1178 	php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1179 	{
1180 		char tmp[256];
1181 		snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1182 		php_info_print_table_row(2, "libmbfl version", tmp);
1183 	}
1184 	php_info_print_table_end();
1185 
1186 	php_info_print_table_start();
1187 	php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1188 	php_info_print_table_end();
1189 
1190 #ifdef HAVE_MBREGEX
1191 	PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1192 #endif
1193 
1194 	DISPLAY_INI_ENTRIES();
1195 }
1196 /* }}} */
1197 
1198 /* {{{ Sets the current language or Returns the current language as a string */
PHP_FUNCTION(mb_language)1199 PHP_FUNCTION(mb_language)
1200 {
1201 	zend_string *name = NULL;
1202 
1203 	ZEND_PARSE_PARAMETERS_START(0, 1)
1204 		Z_PARAM_OPTIONAL
1205 		Z_PARAM_STR_OR_NULL(name)
1206 	ZEND_PARSE_PARAMETERS_END();
1207 
1208 	if (name == NULL) {
1209 		RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)));
1210 	} else {
1211 		zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0);
1212 		if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1213 			zend_argument_value_error(1, "must be a valid language, \"%s\" given", ZSTR_VAL(name));
1214 			zend_string_release_ex(ini_name, 0);
1215 			RETURN_THROWS();
1216 		}
1217 		// TODO Make return void
1218 		RETVAL_TRUE;
1219 		zend_string_release_ex(ini_name, 0);
1220 	}
1221 }
1222 /* }}} */
1223 
1224 /* {{{ Sets the current internal encoding or Returns the current internal encoding as a string */
PHP_FUNCTION(mb_internal_encoding)1225 PHP_FUNCTION(mb_internal_encoding)
1226 {
1227 	char *name = NULL;
1228 	size_t name_len;
1229 	const mbfl_encoding *encoding;
1230 
1231 	ZEND_PARSE_PARAMETERS_START(0, 1)
1232 		Z_PARAM_OPTIONAL
1233 		Z_PARAM_STRING_OR_NULL(name, name_len)
1234 	ZEND_PARSE_PARAMETERS_END();
1235 
1236 	if (name == NULL) {
1237 		ZEND_ASSERT(MBSTRG(current_internal_encoding));
1238 		RETURN_STRING(MBSTRG(current_internal_encoding)->name);
1239 	} else {
1240 		encoding = mbfl_name2encoding(name);
1241 		if (!encoding) {
1242 			zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
1243 			RETURN_THROWS();
1244 		} else {
1245 			MBSTRG(current_internal_encoding) = encoding;
1246 			MBSTRG(internal_encoding_set) = 1;
1247 			/* TODO Return old encoding */
1248 			RETURN_TRUE;
1249 		}
1250 	}
1251 }
1252 /* }}} */
1253 
1254 /* {{{ Returns the input encoding */
PHP_FUNCTION(mb_http_input)1255 PHP_FUNCTION(mb_http_input)
1256 {
1257 	char *type = NULL;
1258 	size_t type_len = 0, n;
1259 	const mbfl_encoding **entry;
1260 	const mbfl_encoding *encoding;
1261 
1262 	ZEND_PARSE_PARAMETERS_START(0, 1)
1263 		Z_PARAM_OPTIONAL
1264 		Z_PARAM_STRING_OR_NULL(type, type_len)
1265 	ZEND_PARSE_PARAMETERS_END();
1266 
1267 	if (type == NULL) {
1268 		encoding = MBSTRG(http_input_identify);
1269 	} else {
1270 		switch (*type) {
1271 		case 'G':
1272 		case 'g':
1273 			encoding = MBSTRG(http_input_identify_get);
1274 			break;
1275 		case 'P':
1276 		case 'p':
1277 			encoding = MBSTRG(http_input_identify_post);
1278 			break;
1279 		case 'C':
1280 		case 'c':
1281 			encoding = MBSTRG(http_input_identify_cookie);
1282 			break;
1283 		case 'S':
1284 		case 's':
1285 			encoding = MBSTRG(http_input_identify_string);
1286 			break;
1287 		case 'I':
1288 		case 'i':
1289 			entry = MBSTRG(http_input_list);
1290 			n = MBSTRG(http_input_list_size);
1291 			array_init(return_value);
1292 			for (size_t i = 0; i < n; i++, entry++) {
1293 				add_next_index_string(return_value, (*entry)->name);
1294 			}
1295 			return;
1296 		case 'L':
1297 		case 'l':
1298 			entry = MBSTRG(http_input_list);
1299 			n = MBSTRG(http_input_list_size);
1300 			if (n == 0) {
1301 				// TODO should return empty string?
1302 				RETURN_FALSE;
1303 			}
1304 			// TODO Use smart_str instead.
1305 			mbfl_string result;
1306 			mbfl_memory_device device;
1307 			mbfl_memory_device_init(&device, n * 12, 0);
1308 			for (size_t i = 0; i < n; i++, entry++) {
1309 				mbfl_memory_device_strcat(&device, (*entry)->name);
1310 				mbfl_memory_device_output(',', &device);
1311 			}
1312 			mbfl_memory_device_unput(&device); /* Remove trailing comma */
1313 			mbfl_memory_device_result(&device, &result);
1314 			RETVAL_STRINGL((const char*)result.val, result.len);
1315 			mbfl_string_clear(&result);
1316 			return;
1317 		default:
1318 			zend_argument_value_error(1,
1319 				"must be one of \"G\", \"P\", \"C\", \"S\", \"I\", or \"L\"");
1320 			RETURN_THROWS();
1321 		}
1322 	}
1323 
1324 	if (encoding) {
1325 		RETURN_STRING(encoding->name);
1326 	} else {
1327 		RETURN_FALSE;
1328 	}
1329 }
1330 /* }}} */
1331 
1332 /* {{{ Sets the current output_encoding or returns the current output_encoding as a string */
PHP_FUNCTION(mb_http_output)1333 PHP_FUNCTION(mb_http_output)
1334 {
1335 	char *name = NULL;
1336 	size_t name_len;
1337 
1338 	ZEND_PARSE_PARAMETERS_START(0, 1)
1339 		Z_PARAM_OPTIONAL
1340 		Z_PARAM_STRING_OR_NULL(name, name_len)
1341 	ZEND_PARSE_PARAMETERS_END();
1342 
1343 	if (name == NULL) {
1344 		ZEND_ASSERT(MBSTRG(current_http_output_encoding));
1345 		RETURN_STRING(MBSTRG(current_http_output_encoding)->name);
1346 	} else {
1347 		const mbfl_encoding *encoding = php_mb_get_encoding_or_pass(name);
1348 		if (!encoding) {
1349 			zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
1350 			RETURN_THROWS();
1351 		} else {
1352 			MBSTRG(http_output_set) = 1;
1353 			MBSTRG(current_http_output_encoding) = encoding;
1354 			/* TODO Return previous encoding? */
1355 			RETURN_TRUE;
1356 		}
1357 	}
1358 }
1359 /* }}} */
1360 
1361 /* {{{ Sets the current detect_order or Return the current detect_order as a array */
PHP_FUNCTION(mb_detect_order)1362 PHP_FUNCTION(mb_detect_order)
1363 {
1364 	zend_string *order_str = NULL;
1365 	HashTable *order_ht = NULL;
1366 
1367 	ZEND_PARSE_PARAMETERS_START(0, 1)
1368 		Z_PARAM_OPTIONAL
1369 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(order_ht, order_str)
1370 	ZEND_PARSE_PARAMETERS_END();
1371 
1372 	if (!order_str && !order_ht) {
1373 		size_t n = MBSTRG(current_detect_order_list_size);
1374 		const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1375 		array_init(return_value);
1376 		for (size_t i = 0; i < n; i++) {
1377 			add_next_index_string(return_value, (*entry)->name);
1378 			entry++;
1379 		}
1380 	} else {
1381 		const mbfl_encoding **list;
1382 		size_t size;
1383 		if (order_ht) {
1384 			if (FAILURE == php_mb_parse_encoding_array(order_ht, &list, &size, 1)) {
1385 				RETURN_THROWS();
1386 			}
1387 		} else {
1388 			if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(order_str), ZSTR_LEN(order_str), &list, &size, /* persistent */ 0, /* arg_num */ 1, /* allow_pass_encoding */ 0)) {
1389 				RETURN_THROWS();
1390 			}
1391 		}
1392 
1393 		if (size == 0) {
1394 			efree(ZEND_VOIDP(list));
1395 			zend_argument_value_error(1, "must specify at least one encoding");
1396 			RETURN_THROWS();
1397 		}
1398 
1399 		if (MBSTRG(current_detect_order_list)) {
1400 			efree(ZEND_VOIDP(MBSTRG(current_detect_order_list)));
1401 		}
1402 		MBSTRG(current_detect_order_list) = list;
1403 		MBSTRG(current_detect_order_list_size) = size;
1404 		RETURN_TRUE;
1405 	}
1406 }
1407 /* }}} */
1408 
php_mb_check_code_point(zend_long cp)1409 static inline int php_mb_check_code_point(zend_long cp)
1410 {
1411 	if (cp < 0 || cp >= 0x110000) {
1412 		/* Out of Unicode range */
1413 		return 0;
1414 	}
1415 
1416 	if (cp >= 0xd800 && cp <= 0xdfff) {
1417 		/* Surrogate code-point. These are never valid on their own and we only allow a single
1418 		 * substitute character. */
1419 		return 0;
1420 	}
1421 
1422 	/* As we do not know the target encoding of the conversion operation that is going to
1423 	 * use the substitution character, we cannot check whether the codepoint is actually mapped
1424 	 * in the given encoding at this point. Thus we have to accept everything. */
1425 	return 1;
1426 }
1427 
1428 /* {{{ Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)1429 PHP_FUNCTION(mb_substitute_character)
1430 {
1431 	zend_string *substitute_character = NULL;
1432 	zend_long substitute_codepoint;
1433 	zend_bool substitute_is_null = 1;
1434 
1435 	ZEND_PARSE_PARAMETERS_START(0, 1)
1436 		Z_PARAM_OPTIONAL
1437 		Z_PARAM_STR_OR_LONG_OR_NULL(substitute_character, substitute_codepoint, substitute_is_null)
1438 	ZEND_PARSE_PARAMETERS_END();
1439 
1440 	if (substitute_is_null) {
1441 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
1442 			RETURN_STRING("none");
1443 		}
1444 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1445 			RETURN_STRING("long");
1446 		}
1447 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1448 			RETURN_STRING("entity");
1449 		}
1450 		RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1451 	}
1452 
1453 	if (substitute_character != NULL) {
1454 		if (zend_string_equals_literal_ci(substitute_character, "none")) {
1455 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1456 			RETURN_TRUE;
1457 		}
1458 		if (zend_string_equals_literal_ci(substitute_character, "long")) {
1459 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1460 			RETURN_TRUE;
1461 		}
1462 		if (zend_string_equals_literal_ci(substitute_character, "entity")) {
1463 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1464 			RETURN_TRUE;
1465 		}
1466 		/* Invalid string value */
1467 		zend_argument_value_error(1, "must be \"none\", \"long\", \"entity\" or a valid codepoint");
1468 		RETURN_THROWS();
1469 	}
1470 	/* Integer codepoint passed */
1471 	if (!php_mb_check_code_point(substitute_codepoint)) {
1472 		zend_argument_value_error(1, "is not a valid codepoint");
1473 		RETURN_THROWS();
1474 	}
1475 
1476 	MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1477 	MBSTRG(current_filter_illegal_substchar) = substitute_codepoint;
1478 	RETURN_TRUE;
1479 }
1480 /* }}} */
1481 
1482 /* {{{ Return the preferred MIME name (charset) as a string */
PHP_FUNCTION(mb_preferred_mime_name)1483 PHP_FUNCTION(mb_preferred_mime_name)
1484 {
1485 	enum mbfl_no_encoding no_encoding;
1486 	char *name = NULL;
1487 	size_t name_len;
1488 
1489 	ZEND_PARSE_PARAMETERS_START(1, 1)
1490 		Z_PARAM_STRING(name, name_len)
1491 	ZEND_PARSE_PARAMETERS_END();
1492 
1493 	no_encoding = mbfl_name2no_encoding(name);
1494 	if (no_encoding == mbfl_no_encoding_invalid) {
1495 		zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
1496 		RETURN_THROWS();
1497 	}
1498 
1499 	const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
1500 	if (preferred_name == NULL || *preferred_name == '\0') {
1501 		php_error_docref(NULL, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
1502 		RETVAL_FALSE;
1503 	} else {
1504 		RETVAL_STRING((char *)preferred_name);
1505 	}
1506 }
1507 /* }}} */
1508 
1509 #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
1510 #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
1511 
1512 /* {{{ Parses GET/POST/COOKIE data and sets global variables */
PHP_FUNCTION(mb_parse_str)1513 PHP_FUNCTION(mb_parse_str)
1514 {
1515 	zval *track_vars_array = NULL;
1516 	char *encstr;
1517 	size_t encstr_len;
1518 	php_mb_encoding_handler_info_t info;
1519 	const mbfl_encoding *detected;
1520 
1521 	ZEND_PARSE_PARAMETERS_START(2, 2)
1522 		Z_PARAM_STRING(encstr, encstr_len)
1523 		Z_PARAM_ZVAL(track_vars_array)
1524 	ZEND_PARSE_PARAMETERS_END();
1525 
1526 	track_vars_array = zend_try_array_init(track_vars_array);
1527 	if (!track_vars_array) {
1528 		RETURN_THROWS();
1529 	}
1530 
1531 	encstr = estrndup(encstr, encstr_len);
1532 
1533 	info.data_type              = PARSE_STRING;
1534 	info.separator              = PG(arg_separator).input;
1535 	info.report_errors          = 1;
1536 	info.to_encoding            = MBSTRG(current_internal_encoding);
1537 	info.to_language            = MBSTRG(language);
1538 	info.from_encodings         = MBSTRG(http_input_list);
1539 	info.num_from_encodings     = MBSTRG(http_input_list_size);
1540 	info.from_language          = MBSTRG(language);
1541 
1542 	detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr);
1543 
1544 	MBSTRG(http_input_identify) = detected;
1545 
1546 	RETVAL_BOOL(detected);
1547 
1548 	if (encstr != NULL) efree(encstr);
1549 }
1550 /* }}} */
1551 
1552 /* {{{ Returns string in output buffer converted to the http_output encoding */
PHP_FUNCTION(mb_output_handler)1553 PHP_FUNCTION(mb_output_handler)
1554 {
1555 	char *arg_string;
1556 	size_t arg_string_len;
1557 	zend_long arg_status;
1558 	mbfl_string string, result;
1559 	const char *charset;
1560 	char *p;
1561 	const mbfl_encoding *encoding;
1562 	int last_feed;
1563 	size_t len;
1564 	unsigned char send_text_mimetype = 0;
1565 	char *s, *mimetype = NULL;
1566 
1567 	ZEND_PARSE_PARAMETERS_START(2, 2)
1568 		Z_PARAM_STRING(arg_string, arg_string_len)
1569 		Z_PARAM_LONG(arg_status)
1570 	ZEND_PARSE_PARAMETERS_END();
1571 
1572 	encoding = MBSTRG(current_http_output_encoding);
1573 
1574 	/* start phase only */
1575 	if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
1576 		/* delete the converter just in case. */
1577 		if (MBSTRG(outconv)) {
1578 			MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1579 			mbfl_buffer_converter_delete(MBSTRG(outconv));
1580 			MBSTRG(outconv) = NULL;
1581 		}
1582 
1583 		if (encoding == &mbfl_encoding_pass) {
1584 			RETURN_STRINGL(arg_string, arg_string_len);
1585 		}
1586 
1587 		/* analyze mime type */
1588 		if (SG(sapi_headers).mimetype &&
1589 			_php_mb_match_regex(
1590 				MBSTRG(http_output_conv_mimetypes),
1591 				SG(sapi_headers).mimetype,
1592 				strlen(SG(sapi_headers).mimetype))) {
1593 			if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL) {
1594 				mimetype = estrdup(SG(sapi_headers).mimetype);
1595 			} else {
1596 				mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
1597 			}
1598 			send_text_mimetype = 1;
1599 		} else if (SG(sapi_headers).send_default_content_type) {
1600 			mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
1601 		}
1602 
1603 		/* if content-type is not yet set, set it and activate the converter */
1604 		if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
1605 			charset = encoding->mime_name;
1606 			if (charset) {
1607 				len = spprintf( &p, 0, "Content-Type: %s; charset=%s",  mimetype, charset );
1608 				if (sapi_add_header(p, len, 0) != FAILURE) {
1609 					SG(sapi_headers).send_default_content_type = 0;
1610 				}
1611 			}
1612 			/* activate the converter */
1613 			MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
1614 			if (send_text_mimetype){
1615 				efree(mimetype);
1616 			}
1617 		}
1618 	}
1619 
1620 	/* just return if the converter is not activated. */
1621 	if (MBSTRG(outconv) == NULL) {
1622 		RETURN_STRINGL(arg_string, arg_string_len);
1623 	}
1624 
1625 	/* flag */
1626 	last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
1627 	/* mode */
1628 	mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
1629 	mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
1630 
1631 	/* feed the string */
1632 	mbfl_string_init(&string);
1633 	/* these are not needed. convd has encoding info.
1634 	string.encoding = MBSTRG(current_internal_encoding);
1635 	*/
1636 	string.val = (unsigned char *)arg_string;
1637 	string.len = arg_string_len;
1638 
1639 	mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
1640 	if (last_feed) {
1641 		mbfl_buffer_converter_flush(MBSTRG(outconv));
1642 	}
1643 	/* get the converter output, and return it */
1644 	mbfl_buffer_converter_result(MBSTRG(outconv), &result);
1645 
1646 	// TODO: avoid reallocation ???
1647 	RETVAL_STRINGL((char *)result.val, result.len);		/* the string is already strdup()'ed */
1648 	efree(result.val);
1649 
1650 	/* delete the converter if it is the last feed. */
1651 	if (last_feed) {
1652 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1653 		mbfl_buffer_converter_delete(MBSTRG(outconv));
1654 		MBSTRG(outconv) = NULL;
1655 	}
1656 }
1657 /* }}} */
1658 
1659 /* {{{ Convert a multibyte string to an array. If split_length is specified,
1660  break the string down into chunks each split_length characters long. */
1661 
1662 /* structure to pass split params to the callback */
1663 struct mbfl_split_params {
1664 	zval *return_value; /* php function return value structure pointer */
1665 	mbfl_string *result_string; /* string to store result chunk */
1666 	size_t mb_chunk_length; /* actual chunk length in chars */
1667 	size_t split_length; /* split length in chars */
1668 	mbfl_convert_filter *next_filter; /* widechar to encoding converter */
1669 };
1670 
1671 /* callback function to fill split array */
mbfl_split_output(int c,void * data)1672 static int mbfl_split_output(int c, void *data)
1673 {
1674 	struct mbfl_split_params *params = (struct mbfl_split_params *)data; /* cast passed data */
1675 
1676 	(*params->next_filter->filter_function)(c, params->next_filter); /* decoder filter */
1677 
1678 	if (params->split_length == ++params->mb_chunk_length) { /* if current chunk size reached defined chunk size or last char reached */
1679 		mbfl_convert_filter_flush(params->next_filter);/* concatenate separate decoded chars to the solid string */
1680 		mbfl_memory_device *device = (mbfl_memory_device *)params->next_filter->data; /* chars container */
1681 		mbfl_string *chunk = params->result_string;
1682 		mbfl_memory_device_result(device, chunk); /* make chunk */
1683 		add_next_index_stringl(params->return_value, (const char *)chunk->val, chunk->len); /* add chunk to the array */
1684 		efree(chunk->val);
1685 		params->mb_chunk_length = 0; /* reset mb_chunk size */
1686 	}
1687 
1688 	return 0;
1689 }
1690 
PHP_FUNCTION(mb_str_split)1691 PHP_FUNCTION(mb_str_split)
1692 {
1693 	zend_string *str, *encoding = NULL;
1694 	size_t mb_len, chunks, chunk_len;
1695 	const char *p, *last; /* pointer for the string cursor and last string char */
1696 	mbfl_string string, result_string;
1697 	const mbfl_encoding *mbfl_encoding;
1698 	zend_long split_length = 1;
1699 
1700 	ZEND_PARSE_PARAMETERS_START(1, 3)
1701 		Z_PARAM_STR(str)
1702 		Z_PARAM_OPTIONAL
1703 		Z_PARAM_LONG(split_length)
1704 		Z_PARAM_STR_OR_NULL(encoding)
1705 	ZEND_PARSE_PARAMETERS_END();
1706 
1707 	if (split_length <= 0) {
1708 		zend_argument_value_error(2, "must be greater than 0");
1709 		RETURN_THROWS();
1710 	}
1711 
1712 	/* fill mbfl_string structure */
1713 	string.val = (unsigned char *) ZSTR_VAL(str);
1714 	string.len = ZSTR_LEN(str);
1715 	string.encoding = php_mb_get_encoding(encoding, 3);
1716 	if (!string.encoding) {
1717 		RETURN_THROWS();
1718 	}
1719 
1720 	p = ZSTR_VAL(str); /* string cursor pointer */
1721 	last = ZSTR_VAL(str) + ZSTR_LEN(str); /* last string char pointer */
1722 
1723 	mbfl_encoding = string.encoding;
1724 
1725 	/* first scenario: 1,2,4-bytes fixed width encodings (head part) */
1726 	if (mbfl_encoding->flag & MBFL_ENCTYPE_SBCS) { /* 1 byte */
1727 		mb_len = string.len;
1728 		chunk_len = (size_t)split_length; /* chunk length in bytes */
1729 	} else if (mbfl_encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { /* 2 bytes */
1730 		mb_len = string.len / 2;
1731 		chunk_len = split_length * 2;
1732 	} else if (mbfl_encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { /* 4 bytes */
1733 		mb_len = string.len / 4;
1734 		chunk_len = split_length * 4;
1735 	} else if (mbfl_encoding->mblen_table != NULL) {
1736 		/* second scenario: variable width encodings with length table */
1737 		char unsigned const *mbtab = mbfl_encoding->mblen_table;
1738 
1739 		/* assume that we have 1-bytes characters */
1740 		array_init_size(return_value, (string.len + split_length) / split_length); /* round up */
1741 
1742 		while (p < last) { /* split cycle work until the cursor has reached the last byte */
1743 			char const *chunk_p = p; /* chunk first byte pointer */
1744 			chunk_len = 0; /* chunk length in bytes */
1745 			zend_long char_count;
1746 
1747 			for (char_count = 0; char_count < split_length && p < last; ++char_count) {
1748 				char unsigned const m = mbtab[*(const unsigned char *)p]; /* single character length table */
1749 				chunk_len += m;
1750 				p += m;
1751 			}
1752 			if (p >= last) chunk_len -= p - last; /* check if chunk is in bounds */
1753 			add_next_index_stringl(return_value, chunk_p, chunk_len);
1754 		}
1755 		return;
1756 	} else {
1757 		/* third scenario: other multibyte encodings */
1758 		mbfl_convert_filter *filter, *decoder;
1759 
1760 		/* assume that we have 1-bytes characters */
1761 		array_init_size(return_value, (string.len + split_length) / split_length); /* round up */
1762 
1763 		/* decoder filter to decode wchar to encoding */
1764 		mbfl_memory_device device;
1765 		mbfl_memory_device_init(&device, split_length + 1, 0);
1766 
1767 		decoder = mbfl_convert_filter_new(
1768 				&mbfl_encoding_wchar,
1769 				string.encoding,
1770 				mbfl_memory_device_output,
1771 				NULL,
1772 				&device);
1773 		/* assert that nothing is wrong with the decoder */
1774 		ZEND_ASSERT(decoder != NULL);
1775 
1776 		/* wchar filter */
1777 		mbfl_string_init(&result_string); /* mbfl_string to store chunk in the callback */
1778 		struct mbfl_split_params params = { /* init callback function params structure */
1779 			.return_value = return_value,
1780 			.result_string = &result_string,
1781 			.mb_chunk_length = 0,
1782 			.split_length = (size_t)split_length,
1783 			.next_filter = decoder,
1784 		};
1785 
1786 		filter = mbfl_convert_filter_new(
1787 				string.encoding,
1788 				&mbfl_encoding_wchar,
1789 				mbfl_split_output,
1790 				NULL,
1791 				&params);
1792 		/* assert that nothing is wrong with the filter */
1793 		ZEND_ASSERT(filter != NULL);
1794 
1795 		while (p < last - 1) { /* cycle each byte except last with callback function */
1796 			(*filter->filter_function)(*p++, filter);
1797 		}
1798 		params.mb_chunk_length = split_length - 1; /* force to finish current chunk */
1799 		(*filter->filter_function)(*p++, filter); /* process last char */
1800 
1801 		mbfl_convert_filter_delete(decoder);
1802 		mbfl_convert_filter_delete(filter);
1803 		mbfl_memory_device_clear(&device);
1804 		return;
1805 	}
1806 
1807 	/* first scenario: 1,2,4-bytes fixed width encodings (tail part) */
1808 	chunks = (mb_len + split_length - 1) / split_length; /* (round up idiom) */
1809 	array_init_size(return_value, chunks);
1810 	if (chunks != 0) {
1811 		zend_long i;
1812 
1813 		for (i = 0; i < chunks - 1; p += chunk_len, ++i) {
1814 			add_next_index_stringl(return_value, p, chunk_len);
1815 		}
1816 		add_next_index_stringl(return_value, p, last - p);
1817 	}
1818 }
1819 /* }}} */
1820 
1821 /* {{{ Get character numbers of a string */
PHP_FUNCTION(mb_strlen)1822 PHP_FUNCTION(mb_strlen)
1823 {
1824 	mbfl_string string;
1825 	char *str;
1826 	zend_string *enc_name = NULL;
1827 
1828 	ZEND_PARSE_PARAMETERS_START(1, 2)
1829 		Z_PARAM_STRING(str, string.len)
1830 		Z_PARAM_OPTIONAL
1831 		Z_PARAM_STR_OR_NULL(enc_name)
1832 	ZEND_PARSE_PARAMETERS_END();
1833 
1834 	string.val = (unsigned char*)str;
1835 	string.encoding = php_mb_get_encoding(enc_name, 2);
1836 	if (!string.encoding) {
1837 		RETURN_THROWS();
1838 	}
1839 
1840 	size_t n = mbfl_strlen(&string);
1841 	/* Only way this can fail is if the conversion creation fails
1842 	 * this would imply some sort of memory allocation failure which is a bug */
1843 	ZEND_ASSERT(!mbfl_is_error(n));
1844 	RETVAL_LONG(n);
1845 }
1846 /* }}} */
1847 
handle_strpos_error(size_t error)1848 static void handle_strpos_error(size_t error) {
1849 	switch (error) {
1850 	case MBFL_ERROR_NOT_FOUND:
1851 		break;
1852 	case MBFL_ERROR_ENCODING:
1853 		php_error_docref(NULL, E_WARNING, "Conversion error");
1854 		break;
1855 	case MBFL_ERROR_OFFSET:
1856 		zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1857 		break;
1858 	default:
1859 		zend_value_error("mb_strpos(): Unknown error");
1860 		break;
1861 	}
1862 }
1863 
1864 /* {{{ Find position of first occurrence of a string within another */
PHP_FUNCTION(mb_strpos)1865 PHP_FUNCTION(mb_strpos)
1866 {
1867 	int reverse = 0;
1868 	zend_long offset = 0;
1869 	char *haystack_val, *needle_val;
1870 	mbfl_string haystack, needle;
1871 	zend_string *enc_name = NULL;
1872 
1873 	ZEND_PARSE_PARAMETERS_START(2, 4)
1874 		Z_PARAM_STRING(haystack_val, haystack.len)
1875 		Z_PARAM_STRING(needle_val, needle.len)
1876 		Z_PARAM_OPTIONAL
1877 		Z_PARAM_LONG(offset)
1878 		Z_PARAM_STR_OR_NULL(enc_name)
1879 	ZEND_PARSE_PARAMETERS_END();
1880 
1881 	haystack.val = (unsigned char*)haystack_val;
1882 	needle.val = (unsigned char*)needle_val;
1883 
1884 	haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4);
1885 	if (!haystack.encoding) {
1886 		RETURN_THROWS();
1887 	}
1888 
1889 	size_t n = mbfl_strpos(&haystack, &needle, offset, reverse);
1890 	if (!mbfl_is_error(n)) {
1891 		RETVAL_LONG(n);
1892 	} else {
1893 		handle_strpos_error(n);
1894 		RETVAL_FALSE;
1895 	}
1896 }
1897 /* }}} */
1898 
1899 /* {{{ Find position of last occurrence of a string within another */
PHP_FUNCTION(mb_strrpos)1900 PHP_FUNCTION(mb_strrpos)
1901 {
1902 	mbfl_string haystack, needle;
1903 	char *haystack_val, *needle_val;
1904 	zend_string *enc_name = NULL;
1905 	zend_long offset = 0;
1906 
1907 	ZEND_PARSE_PARAMETERS_START(2, 4)
1908 		Z_PARAM_STRING(haystack_val, haystack.len)
1909 		Z_PARAM_STRING(needle_val, needle.len)
1910 		Z_PARAM_OPTIONAL
1911 		Z_PARAM_LONG(offset)
1912 		Z_PARAM_STR_OR_NULL(enc_name)
1913 	ZEND_PARSE_PARAMETERS_END();
1914 
1915 	haystack.val = (unsigned char*)haystack_val;
1916 	needle.val = (unsigned char*)needle_val;
1917 
1918 	haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4);
1919 	if (!haystack.encoding) {
1920 		RETURN_THROWS();
1921 	}
1922 
1923 	size_t n = mbfl_strpos(&haystack, &needle, offset, 1);
1924 	if (!mbfl_is_error(n)) {
1925 		RETVAL_LONG(n);
1926 	} else {
1927 		handle_strpos_error(n);
1928 		RETVAL_FALSE;
1929 	}
1930 }
1931 /* }}} */
1932 
1933 /* {{{ Finds position of first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stripos)1934 PHP_FUNCTION(mb_stripos)
1935 {
1936 	zend_long offset = 0;
1937 	mbfl_string haystack, needle;
1938 	char *haystack_val, *needle_val;
1939 	zend_string *from_encoding = NULL;
1940 
1941 	ZEND_PARSE_PARAMETERS_START(2, 4)
1942 		Z_PARAM_STRING(haystack_val, haystack.len)
1943 		Z_PARAM_STRING(needle_val, needle.len)
1944 		Z_PARAM_OPTIONAL
1945 		Z_PARAM_LONG(offset)
1946 		Z_PARAM_STR_OR_NULL(from_encoding)
1947 	ZEND_PARSE_PARAMETERS_END();
1948 
1949 	haystack.val = (unsigned char*)haystack_val;
1950 	needle.val = (unsigned char*)needle_val;
1951 
1952 	const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 4);
1953 	if (!enc) {
1954 		RETURN_THROWS();
1955 	}
1956 
1957 	size_t n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, enc);
1958 
1959 	if (!mbfl_is_error(n)) {
1960 		RETVAL_LONG(n);
1961 	} else {
1962 		handle_strpos_error(n);
1963 		RETVAL_FALSE;
1964 	}
1965 }
1966 /* }}} */
1967 
1968 /* {{{ Finds position of last occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_strripos)1969 PHP_FUNCTION(mb_strripos)
1970 {
1971 	zend_long offset = 0;
1972 	mbfl_string haystack, needle;
1973 	char *haystack_val, *needle_val;
1974 	zend_string *from_encoding = NULL;
1975 
1976 	ZEND_PARSE_PARAMETERS_START(2, 4)
1977 		Z_PARAM_STRING(haystack_val, haystack.len)
1978 		Z_PARAM_STRING(needle_val, needle.len)
1979 		Z_PARAM_OPTIONAL
1980 		Z_PARAM_LONG(offset)
1981 		Z_PARAM_STR_OR_NULL(from_encoding)
1982 	ZEND_PARSE_PARAMETERS_END();
1983 
1984 	haystack.val = (unsigned char*)haystack_val;
1985 	needle.val = (unsigned char*)needle_val;
1986 
1987 	const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 4);
1988 	if (!enc) {
1989 		RETURN_THROWS();
1990 	}
1991 
1992 	size_t n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, enc);
1993 
1994 	if (!mbfl_is_error(n)) {
1995 		RETVAL_LONG(n);
1996 	} else {
1997 		handle_strpos_error(n);
1998 		RETVAL_FALSE;
1999 	}
2000 }
2001 /* }}} */
2002 
2003 #define MB_STRSTR 1
2004 #define MB_STRRCHR 2
2005 #define MB_STRISTR 3
2006 #define MB_STRRICHR 4
2007 /* {{{ php_mb_strstr_variants */
php_mb_strstr_variants(INTERNAL_FUNCTION_PARAMETERS,unsigned int variant)2008 static void php_mb_strstr_variants(INTERNAL_FUNCTION_PARAMETERS, unsigned int variant)
2009 {
2010 	int reverse_mode = 0;
2011 	size_t n;
2012 	char *haystack_val, *needle_val;
2013 	mbfl_string haystack, needle, result, *ret = NULL;
2014 	zend_string *encoding_name = NULL;
2015 	zend_bool part = 0;
2016 
2017 	ZEND_PARSE_PARAMETERS_START(2, 4)
2018 		Z_PARAM_STRING(haystack_val, haystack.len)
2019 		Z_PARAM_STRING(needle_val, needle.len)
2020 		Z_PARAM_OPTIONAL
2021 		Z_PARAM_BOOL(part)
2022 		Z_PARAM_STR_OR_NULL(encoding_name)
2023 	ZEND_PARSE_PARAMETERS_END();
2024 
2025 	haystack.val = (unsigned char*)haystack_val;
2026 	needle.val = (unsigned char*)needle_val;
2027 	haystack.encoding = needle.encoding = php_mb_get_encoding(encoding_name, 4);
2028 	if (!haystack.encoding) {
2029 		RETURN_THROWS();
2030 	}
2031 
2032 	if (variant == MB_STRRCHR || variant == MB_STRRICHR) { reverse_mode = 1; }
2033 
2034 	if (variant == MB_STRISTR || variant == MB_STRRICHR) {
2035 		n = php_mb_stripos(reverse_mode, (char *)haystack.val, haystack.len, (char *)needle.val,
2036 			needle.len, 0, needle.encoding);
2037 	} else {
2038 		n = mbfl_strpos(&haystack, &needle, 0, reverse_mode);
2039 	}
2040 
2041 	if (!mbfl_is_error(n)) {
2042 		if (part) {
2043 			ret = mbfl_substr(&haystack, &result, 0, n);
2044 			ZEND_ASSERT(ret != NULL);
2045 			// TODO: avoid reallocation ???
2046 			RETVAL_STRINGL((char *)ret->val, ret->len);
2047 			efree(ret->val);
2048 		} else {
2049 			ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
2050 			ZEND_ASSERT(ret != NULL);
2051 			// TODO: avoid reallocation ???
2052 			RETVAL_STRINGL((char *)ret->val, ret->len);
2053 			efree(ret->val);
2054 		}
2055 	} else {
2056 		// FIXME use handle_strpos_error(n)
2057 		RETVAL_FALSE;
2058 	}
2059 }
2060 
2061 /* {{{ Finds first occurrence of a string within another */
PHP_FUNCTION(mb_strstr)2062 PHP_FUNCTION(mb_strstr)
2063 {
2064 	php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRSTR);
2065 }
2066 /* }}} */
2067 
2068 /* {{{ Finds the last occurrence of a character in a string within another */
PHP_FUNCTION(mb_strrchr)2069 PHP_FUNCTION(mb_strrchr)
2070 {
2071 	php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRRCHR);
2072 }
2073 /* }}} */
2074 
2075 /* {{{ Finds first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stristr)2076 PHP_FUNCTION(mb_stristr)
2077 {
2078 	php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRISTR);
2079 }
2080 /* }}} */
2081 
2082 /* {{{ Finds the last occurrence of a character in a string within another, case insensitive */
PHP_FUNCTION(mb_strrichr)2083 PHP_FUNCTION(mb_strrichr)
2084 {
2085 	php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRRICHR);
2086 }
2087 /* }}} */
2088 
2089 #undef MB_STRSTR
2090 #undef MB_STRRCHR
2091 #undef MB_STRISTR
2092 #undef MB_STRRICHR
2093 
2094 /* {{{ Count the number of substring occurrences */
PHP_FUNCTION(mb_substr_count)2095 PHP_FUNCTION(mb_substr_count)
2096 {
2097 	mbfl_string haystack, needle;
2098 	char *haystack_val, *needle_val;
2099 	zend_string *enc_name = NULL;
2100 
2101 	ZEND_PARSE_PARAMETERS_START(2, 3)
2102 		Z_PARAM_STRING(haystack_val, haystack.len)
2103 		Z_PARAM_STRING(needle_val, needle.len)
2104 		Z_PARAM_OPTIONAL
2105 		Z_PARAM_STR_OR_NULL(enc_name)
2106 	ZEND_PARSE_PARAMETERS_END();
2107 
2108 	haystack.val = (unsigned char*)haystack_val;
2109 	needle.val = (unsigned char*)needle_val;
2110 
2111 	if (needle.len == 0) {
2112 		zend_argument_value_error(2, "must not be empty");
2113 		RETURN_THROWS();
2114 	}
2115 
2116 	haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 3);
2117 	if (!haystack.encoding) {
2118 		RETURN_THROWS();
2119 	}
2120 
2121 	size_t n = mbfl_substr_count(&haystack, &needle);
2122 	/* An error can only occur if needle is empty,
2123 	 * an encoding error happens (which should not happen at this stage and is a bug)
2124 	 * or the haystack is more than sizeof(size_t) bytes
2125 	 * If one of these things occur this is a bug and should be flagged as such */
2126 	ZEND_ASSERT(!mbfl_is_error(n));
2127 	RETVAL_LONG(n);
2128 }
2129 /* }}} */
2130 
2131 /* {{{ Returns part of a string */
PHP_FUNCTION(mb_substr)2132 PHP_FUNCTION(mb_substr)
2133 {
2134 	char *str;
2135 	zend_string *encoding = NULL;
2136 	zend_long from, len;
2137 	size_t real_from, real_len;
2138 	size_t str_len;
2139 	zend_bool len_is_null = 1;
2140 	mbfl_string string, result, *ret;
2141 
2142 	ZEND_PARSE_PARAMETERS_START(2, 4)
2143 		Z_PARAM_STRING(str, str_len)
2144 		Z_PARAM_LONG(from)
2145 		Z_PARAM_OPTIONAL
2146 		Z_PARAM_LONG_OR_NULL(len, len_is_null)
2147 		Z_PARAM_STR_OR_NULL(encoding)
2148 	ZEND_PARSE_PARAMETERS_END();
2149 
2150 	string.encoding = php_mb_get_encoding(encoding, 4);
2151 	if (!string.encoding) {
2152 		RETURN_THROWS();
2153 	}
2154 
2155 	string.val = (unsigned char *)str;
2156 	string.len = str_len;
2157 
2158 	/* measures length */
2159 	size_t mblen = 0;
2160 	if (from < 0 || (!len_is_null && len < 0)) {
2161 		mblen = mbfl_strlen(&string);
2162 	}
2163 
2164 	/* if "from" position is negative, count start position from the end
2165 	 * of the string
2166 	 */
2167 	if (from >= 0) {
2168 		real_from = (size_t) from;
2169 	} else if (-from < mblen) {
2170 		real_from = mblen + from;
2171 	} else {
2172 		real_from = 0;
2173 	}
2174 
2175 	/* if "length" position is negative, set it to the length
2176 	 * needed to stop that many chars from the end of the string
2177 	 */
2178 	if (len_is_null) {
2179 		real_len = MBFL_SUBSTR_UNTIL_END;
2180 	} else if (len >= 0) {
2181 		real_len = (size_t) len;
2182 	} else if (real_from < mblen && -len < mblen - real_from) {
2183 		real_len = (mblen - real_from) + len;
2184 	} else {
2185 		real_len = 0;
2186 	}
2187 
2188 	ret = mbfl_substr(&string, &result, real_from, real_len);
2189 	ZEND_ASSERT(ret != NULL);
2190 
2191 	// TODO: avoid reallocation ???
2192 	RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2193 	efree(ret->val);
2194 }
2195 /* }}} */
2196 
2197 /* {{{ Returns part of a string */
PHP_FUNCTION(mb_strcut)2198 PHP_FUNCTION(mb_strcut)
2199 {
2200 	zend_string *encoding = NULL;
2201 	char *string_val;
2202 	zend_long from, len;
2203 	zend_bool len_is_null = 1;
2204 	mbfl_string string, result, *ret;
2205 
2206 	ZEND_PARSE_PARAMETERS_START(2, 4)
2207 		Z_PARAM_STRING(string_val, string.len)
2208 		Z_PARAM_LONG(from)
2209 		Z_PARAM_OPTIONAL
2210 		Z_PARAM_LONG_OR_NULL(len, len_is_null)
2211 		Z_PARAM_STR_OR_NULL(encoding)
2212 	ZEND_PARSE_PARAMETERS_END();
2213 
2214 	string.val = (unsigned char*)string_val;
2215 	string.encoding = php_mb_get_encoding(encoding, 4);
2216 	if (!string.encoding) {
2217 		RETURN_THROWS();
2218 	}
2219 
2220 	if (len_is_null) {
2221 		len = string.len;
2222 	}
2223 
2224 	/* if "from" position is negative, count start position from the end
2225 	 * of the string
2226 	 */
2227 	if (from < 0) {
2228 		from = string.len + from;
2229 		if (from < 0) {
2230 			from = 0;
2231 		}
2232 	}
2233 
2234 	/* if "length" position is negative, set it to the length
2235 	 * needed to stop that many chars from the end of the string
2236 	 */
2237 	if (len < 0) {
2238 		len = (string.len - from) + len;
2239 		if (len < 0) {
2240 			len = 0;
2241 		}
2242 	}
2243 
2244 	if (from > string.len) {
2245 		RETURN_EMPTY_STRING();
2246 	}
2247 
2248 	ret = mbfl_strcut(&string, &result, from, len);
2249 	ZEND_ASSERT(ret != NULL);
2250 
2251 	// TODO: avoid reallocation ???
2252 	RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2253 	efree(ret->val);
2254 }
2255 /* }}} */
2256 
2257 /* {{{ Gets terminal width of a string */
PHP_FUNCTION(mb_strwidth)2258 PHP_FUNCTION(mb_strwidth)
2259 {
2260 	char *string_val;
2261 	mbfl_string string;
2262 	zend_string *enc_name = NULL;
2263 
2264 	ZEND_PARSE_PARAMETERS_START(1, 2)
2265 		Z_PARAM_STRING(string_val, string.len)
2266 		Z_PARAM_OPTIONAL
2267 		Z_PARAM_STR_OR_NULL(enc_name)
2268 	ZEND_PARSE_PARAMETERS_END();
2269 
2270 	string.val = (unsigned char*)string_val;
2271 	string.encoding = php_mb_get_encoding(enc_name, 2);
2272 	if (!string.encoding) {
2273 		RETURN_THROWS();
2274 	}
2275 
2276 	size_t n = mbfl_strwidth(&string);
2277 	ZEND_ASSERT(n != (size_t) -1);
2278 	RETVAL_LONG(n);
2279 }
2280 /* }}} */
2281 
2282 /* {{{ Trim the string in terminal width */
PHP_FUNCTION(mb_strimwidth)2283 PHP_FUNCTION(mb_strimwidth)
2284 {
2285 	char *str, *trimmarker = NULL;
2286 	zend_string *encoding = NULL;
2287 	zend_long from, width, swidth = 0;
2288 	size_t str_len, trimmarker_len;
2289 	mbfl_string string, result, marker, *ret;
2290 
2291 	ZEND_PARSE_PARAMETERS_START(3, 5)
2292 		Z_PARAM_STRING(str, str_len)
2293 		Z_PARAM_LONG(from)
2294 		Z_PARAM_LONG(width)
2295 		Z_PARAM_OPTIONAL
2296 		Z_PARAM_STRING(trimmarker, trimmarker_len)
2297 		Z_PARAM_STR_OR_NULL(encoding)
2298 	ZEND_PARSE_PARAMETERS_END();
2299 
2300 	string.encoding = marker.encoding = php_mb_get_encoding(encoding, 5);
2301 	if (!string.encoding) {
2302 		RETURN_THROWS();
2303 	}
2304 
2305 	string.val = (unsigned char *)str;
2306 	string.len = str_len;
2307 	marker.val = NULL;
2308 	marker.len = 0;
2309 
2310 	if ((from < 0) || (width < 0)) {
2311 		swidth = mbfl_strwidth(&string);
2312 	}
2313 
2314 	if (from < 0) {
2315 		from += swidth;
2316 	}
2317 
2318 	if (from < 0 || (size_t)from > str_len) {
2319 		zend_argument_value_error(2, "is out of range");
2320 		RETURN_THROWS();
2321 	}
2322 
2323 	if (width < 0) {
2324 		width = swidth + width - from;
2325 	}
2326 
2327 	if (width < 0) {
2328 		zend_argument_value_error(3, "is out of range");
2329 		RETURN_THROWS();
2330 	}
2331 
2332 	if (trimmarker) {
2333 		marker.val = (unsigned char *)trimmarker;
2334 		marker.len = trimmarker_len;
2335 	}
2336 
2337 	ret = mbfl_strimwidth(&string, &marker, &result, from, width);
2338 	ZEND_ASSERT(ret != NULL);
2339 	// TODO: avoid reallocation ???
2340 	RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2341 	efree(ret->val);
2342 }
2343 /* }}} */
2344 
2345 
2346 /* See mbfl_no_encoding definition for list of unsupported encodings */
php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)2347 static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)
2348 {
2349 	return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint)
2350 			|| (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap)
2351 			|| (no_enc >= mbfl_no_encoding_jis && no_enc <= mbfl_no_encoding_2022jpms)
2352 			|| (no_enc >= mbfl_no_encoding_cp50220 && no_enc <= mbfl_no_encoding_cp50222));
2353 }
2354 
2355 
2356 /* See mbfl_no_encoding definition for list of UTF-8 encodings */
php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)2357 static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
2358 {
2359 	return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb);
2360 }
2361 
php_mb_convert_encoding_ex(const char * input,size_t length,const mbfl_encoding * to_encoding,const mbfl_encoding * from_encoding,size_t * output_len)2362 MBSTRING_API char *php_mb_convert_encoding_ex(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding *from_encoding, size_t *output_len)
2363 {
2364 	mbfl_string string, result, *ret;
2365 	mbfl_buffer_converter *convd;
2366 	char *output = NULL;
2367 
2368 	if (output_len) {
2369 		*output_len = 0;
2370 	}
2371 
2372 	/* initialize string */
2373 	string.encoding = from_encoding;
2374 	string.val = (unsigned char *)input;
2375 	string.len = length;
2376 
2377 	/* initialize converter */
2378 	convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
2379 	/* If this assertion fails this means some memory allocation failure which is a bug */
2380 	ZEND_ASSERT(convd != NULL);
2381 
2382 	mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
2383 	mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
2384 
2385 	/* do it */
2386 	mbfl_string_init(&result);
2387 	ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
2388 	if (ret) {
2389 		if (output_len) {
2390 			*output_len = ret->len;
2391 		}
2392 		output = (char *)ret->val;
2393 	}
2394 
2395 	MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
2396 	mbfl_buffer_converter_delete(convd);
2397 	return output;
2398 }
2399 /* }}} */
2400 
2401 /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
php_mb_convert_encoding(const char * input,size_t length,const mbfl_encoding * to_encoding,const mbfl_encoding ** from_encodings,size_t num_from_encodings,size_t * output_len)2402 MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding **from_encodings, size_t num_from_encodings, size_t *output_len)
2403 {
2404 	const mbfl_encoding *from_encoding;
2405 
2406 	if (output_len) {
2407 		*output_len = 0;
2408 	}
2409 
2410 	/* pre-conversion encoding */
2411 	ZEND_ASSERT(num_from_encodings >= 1);
2412 	if (num_from_encodings == 1) {
2413 		from_encoding = *from_encodings;
2414 	} else {
2415 		/* auto detect */
2416 		mbfl_string string;
2417 		mbfl_string_init(&string);
2418 		string.val = (unsigned char *)input;
2419 		string.len = length;
2420 		from_encoding = mbfl_identify_encoding(
2421 			&string, from_encodings, num_from_encodings, MBSTRG(strict_detection));
2422 		if (!from_encoding) {
2423 			php_error_docref(NULL, E_WARNING, "Unable to detect character encoding");
2424 			return NULL;
2425 		}
2426 	}
2427 
2428 	return php_mb_convert_encoding_ex(input, length, to_encoding, from_encoding, output_len);
2429 }
2430 /* }}} */
2431 
php_mb_convert_encoding_recursive(HashTable * input,const mbfl_encoding * to_encoding,const mbfl_encoding ** from_encodings,size_t num_from_encodings)2432 MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const mbfl_encoding *to_encoding, const mbfl_encoding **from_encodings, size_t num_from_encodings)
2433 {
2434 	HashTable *output, *chash;
2435 	zend_long idx;
2436 	zend_string *key;
2437 	zval *entry, entry_tmp;
2438 	size_t ckey_len, cval_len;
2439 	char *ckey, *cval;
2440 
2441 	if (!input) {
2442 		return NULL;
2443 	}
2444 
2445 	if (GC_IS_RECURSIVE(input)) {
2446 		GC_UNPROTECT_RECURSION(input);
2447 		php_error_docref(NULL, E_WARNING, "Cannot convert recursively referenced values");
2448 		return NULL;
2449 	}
2450 	GC_TRY_PROTECT_RECURSION(input);
2451 	output = zend_new_array(zend_hash_num_elements(input));
2452 	ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) {
2453 		/* convert key */
2454 		if (key) {
2455 			ckey = php_mb_convert_encoding(
2456 				ZSTR_VAL(key), ZSTR_LEN(key),
2457 				to_encoding, from_encodings, num_from_encodings, &ckey_len);
2458 			key = zend_string_init(ckey, ckey_len, 0);
2459 			efree(ckey);
2460 		}
2461 		/* convert value */
2462 		ZEND_ASSERT(entry);
2463 try_again:
2464 		switch(Z_TYPE_P(entry)) {
2465 			case IS_STRING:
2466 				cval = php_mb_convert_encoding(
2467 					Z_STRVAL_P(entry), Z_STRLEN_P(entry),
2468 					to_encoding, from_encodings, num_from_encodings, &cval_len);
2469 				ZVAL_STRINGL(&entry_tmp, cval, cval_len);
2470 				efree(cval);
2471 				break;
2472 			case IS_NULL:
2473 			case IS_TRUE:
2474 			case IS_FALSE:
2475 			case IS_LONG:
2476 			case IS_DOUBLE:
2477 				ZVAL_COPY(&entry_tmp, entry);
2478 				break;
2479 			case IS_ARRAY:
2480 				chash = php_mb_convert_encoding_recursive(
2481 					Z_ARRVAL_P(entry), to_encoding, from_encodings, num_from_encodings);
2482 				if (chash) {
2483 					ZVAL_ARR(&entry_tmp, chash);
2484 				} else {
2485 					ZVAL_EMPTY_ARRAY(&entry_tmp);
2486 				}
2487 				break;
2488 			case IS_REFERENCE:
2489 				entry = Z_REFVAL_P(entry);
2490 				goto try_again;
2491 			case IS_OBJECT:
2492 			default:
2493 				if (key) {
2494 					zend_string_release(key);
2495 				}
2496 				php_error_docref(NULL, E_WARNING, "Object is not supported");
2497 				continue;
2498 		}
2499 		if (key) {
2500 			zend_hash_add(output, key, &entry_tmp);
2501 			zend_string_release(key);
2502 		} else {
2503 			zend_hash_index_add(output, idx, &entry_tmp);
2504 		}
2505 	} ZEND_HASH_FOREACH_END();
2506 	GC_TRY_UNPROTECT_RECURSION(input);
2507 
2508 	return output;
2509 }
2510 /* }}} */
2511 
2512 /* {{{ Returns converted string in desired encoding */
PHP_FUNCTION(mb_convert_encoding)2513 PHP_FUNCTION(mb_convert_encoding)
2514 {
2515 	zend_string *to_encoding_name;
2516 	zend_string *input_str, *from_encodings_str = NULL;
2517 	HashTable *input_ht, *from_encodings_ht = NULL;
2518 	const mbfl_encoding **from_encodings;
2519 	size_t num_from_encodings;
2520 	zend_bool free_from_encodings;
2521 
2522 	ZEND_PARSE_PARAMETERS_START(2, 3)
2523 		Z_PARAM_ARRAY_HT_OR_STR(input_ht, input_str)
2524 		Z_PARAM_STR(to_encoding_name)
2525 		Z_PARAM_OPTIONAL
2526 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(from_encodings_ht, from_encodings_str)
2527 	ZEND_PARSE_PARAMETERS_END();
2528 
2529 	const mbfl_encoding *to_encoding = php_mb_get_encoding(to_encoding_name, 2);
2530 	if (!to_encoding) {
2531 		RETURN_THROWS();
2532 	}
2533 
2534 	if (from_encodings_ht) {
2535 		if (php_mb_parse_encoding_array(from_encodings_ht, &from_encodings, &num_from_encodings, 3) == FAILURE) {
2536 			RETURN_THROWS();
2537 		}
2538 		free_from_encodings = 1;
2539 	} else if (from_encodings_str) {
2540 		if (php_mb_parse_encoding_list(ZSTR_VAL(from_encodings_str), ZSTR_LEN(from_encodings_str),
2541 				&from_encodings, &num_from_encodings,
2542 				/* persistent */ 0, /* arg_num */ 3, /* allow_pass_encoding */ 0) == FAILURE) {
2543 			RETURN_THROWS();
2544 		}
2545 		free_from_encodings = 1;
2546 	} else {
2547 		from_encodings = &MBSTRG(current_internal_encoding);
2548 		num_from_encodings = 1;
2549 		free_from_encodings = 0;
2550 	}
2551 
2552 	if (!num_from_encodings) {
2553 		efree(ZEND_VOIDP(from_encodings));
2554 		zend_argument_value_error(3, "must specify at least one encoding");
2555 		RETURN_THROWS();
2556 	}
2557 
2558 	if (input_str) {
2559 		/* new encoding */
2560 		size_t size;
2561 		char *ret = php_mb_convert_encoding(ZSTR_VAL(input_str), ZSTR_LEN(input_str),
2562 			to_encoding, from_encodings, num_from_encodings, &size);
2563 		if (ret != NULL) {
2564 			// TODO: avoid reallocation ???
2565 			RETVAL_STRINGL(ret, size);		/* the string is already strdup()'ed */
2566 			efree(ret);
2567 		} else {
2568 			RETVAL_FALSE;
2569 		}
2570 	} else {
2571 		HashTable *tmp;
2572 		tmp = php_mb_convert_encoding_recursive(
2573 			input_ht, to_encoding, from_encodings, num_from_encodings);
2574 		RETVAL_ARR(tmp);
2575 	}
2576 
2577 	if (free_from_encodings) {
2578 		efree(ZEND_VOIDP(from_encodings));
2579 	}
2580 }
2581 /* }}} */
2582 
mbstring_convert_case(int case_mode,const char * str,size_t str_len,size_t * ret_len,const mbfl_encoding * enc)2583 static char *mbstring_convert_case(
2584 		int case_mode, const char *str, size_t str_len, size_t *ret_len,
2585 		const mbfl_encoding *enc) {
2586 	return php_unicode_convert_case(
2587 		case_mode, str, str_len, ret_len, enc,
2588 		MBSTRG(current_filter_illegal_mode), MBSTRG(current_filter_illegal_substchar));
2589 }
2590 
2591 /* {{{ Returns a case-folded version of source_string */
PHP_FUNCTION(mb_convert_case)2592 PHP_FUNCTION(mb_convert_case)
2593 {
2594 	zend_string *from_encoding = NULL;
2595 	char *str;
2596 	size_t str_len, ret_len;
2597 	zend_long case_mode = 0;
2598 
2599 	ZEND_PARSE_PARAMETERS_START(2, 3)
2600 		Z_PARAM_STRING(str, str_len)
2601 		Z_PARAM_LONG(case_mode)
2602 		Z_PARAM_OPTIONAL
2603 		Z_PARAM_STR_OR_NULL(from_encoding)
2604 	ZEND_PARSE_PARAMETERS_END();
2605 
2606 	const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 3);
2607 	if (!enc) {
2608 		RETURN_THROWS();
2609 	}
2610 
2611 	if (case_mode < 0 || case_mode > PHP_UNICODE_CASE_MODE_MAX) {
2612 		zend_argument_value_error(2, "must be one of the MB_CASE_* constants");
2613 		RETURN_THROWS();
2614 	}
2615 
2616 	char *newstr = mbstring_convert_case(case_mode, str, str_len, &ret_len, enc);
2617 	/* If newstr is NULL something went wrong in mbfl and this is a bug */
2618 	ZEND_ASSERT(newstr != NULL);
2619 
2620 	// TODO: avoid reallocation ???
2621 	RETVAL_STRINGL(newstr, ret_len);
2622 	efree(newstr);
2623 }
2624 /* }}} */
2625 
2626 /* {{{ Returns a upper cased version of source_string */
PHP_FUNCTION(mb_strtoupper)2627 PHP_FUNCTION(mb_strtoupper)
2628 {
2629 	zend_string *from_encoding = NULL;
2630 	char *str;
2631 	size_t str_len, ret_len;
2632 
2633 	ZEND_PARSE_PARAMETERS_START(1, 2)
2634 		Z_PARAM_STRING(str, str_len)
2635 		Z_PARAM_OPTIONAL
2636 		Z_PARAM_STR_OR_NULL(from_encoding)
2637 	ZEND_PARSE_PARAMETERS_END();
2638 
2639 	const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 2);
2640 	if (!enc) {
2641 		RETURN_THROWS();
2642 	}
2643 
2644 	char *newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc);
2645 	/* If newstr is NULL something went wrong in mbfl and this is a bug */
2646 	ZEND_ASSERT(newstr != NULL);
2647 
2648 	// TODO: avoid reallocation ???
2649 	RETVAL_STRINGL(newstr, ret_len);
2650 	efree(newstr);
2651 }
2652 /* }}} */
2653 
2654 /* {{{ Returns a lower cased version of source_string */
PHP_FUNCTION(mb_strtolower)2655 PHP_FUNCTION(mb_strtolower)
2656 {
2657 	zend_string *from_encoding = NULL;
2658 	char *str;
2659 	size_t str_len;
2660 	char *newstr;
2661 	size_t ret_len;
2662 	const mbfl_encoding *enc;
2663 
2664 	ZEND_PARSE_PARAMETERS_START(1, 2)
2665 		Z_PARAM_STRING(str, str_len)
2666 		Z_PARAM_OPTIONAL
2667 		Z_PARAM_STR_OR_NULL(from_encoding)
2668 	ZEND_PARSE_PARAMETERS_END();
2669 
2670 	enc = php_mb_get_encoding(from_encoding, 2);
2671 	if (!enc) {
2672 		RETURN_THROWS();
2673 	}
2674 
2675 	newstr = mbstring_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc);
2676 	/* If newstr is NULL something went wrong in mbfl and this is a bug */
2677 	ZEND_ASSERT(newstr != NULL);
2678 
2679 	// TODO: avoid reallocation ???
2680 	RETVAL_STRINGL(newstr, ret_len);
2681 	efree(newstr);
2682 }
2683 /* }}} */
2684 
2685 /* {{{ Encodings of the given string is returned (as a string) */
PHP_FUNCTION(mb_detect_encoding)2686 PHP_FUNCTION(mb_detect_encoding)
2687 {
2688 	char *str;
2689 	size_t str_len;
2690 	zend_string *encoding_str = NULL;
2691 	HashTable *encoding_ht = NULL;
2692 	zend_bool strict = 0;
2693 
2694 	mbfl_string string;
2695 	const mbfl_encoding *ret;
2696 	const mbfl_encoding **elist;
2697 	size_t size;
2698 	zend_bool free_elist;
2699 
2700 	ZEND_PARSE_PARAMETERS_START(1, 3)
2701 		Z_PARAM_STRING(str, str_len)
2702 		Z_PARAM_OPTIONAL
2703 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(encoding_ht, encoding_str)
2704 		Z_PARAM_BOOL(strict)
2705 	ZEND_PARSE_PARAMETERS_END();
2706 
2707 	/* make encoding list */
2708 	if (encoding_ht) {
2709 		if (FAILURE == php_mb_parse_encoding_array(encoding_ht, &elist, &size, 2)) {
2710 			RETURN_THROWS();
2711 		}
2712 		free_elist = 1;
2713 	} else if (encoding_str) {
2714 		if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(encoding_str), ZSTR_LEN(encoding_str), &elist, &size, /* persistent */ 0, /* arg_num */ 2, /* allow_pass_encoding */ 0)) {
2715 			RETURN_THROWS();
2716 		}
2717 		free_elist = 1;
2718 	} else {
2719 		elist = MBSTRG(current_detect_order_list);
2720 		size = MBSTRG(current_detect_order_list_size);
2721 		free_elist = 0;
2722 	}
2723 
2724 	if (size == 0) {
2725 		efree(ZEND_VOIDP(elist));
2726 		zend_argument_value_error(2, "must specify at least one encoding");
2727 		RETURN_THROWS();
2728 	}
2729 
2730 	if (ZEND_NUM_ARGS() < 3) {
2731 		strict = MBSTRG(strict_detection);
2732 	}
2733 
2734 	mbfl_string_init(&string);
2735 	string.val = (unsigned char *)str;
2736 	string.len = str_len;
2737 	ret = mbfl_identify_encoding(&string, elist, size, strict);
2738 
2739 	if (free_elist) {
2740 		efree(ZEND_VOIDP(elist));
2741 	}
2742 
2743 	if (ret == NULL) {
2744 		RETURN_FALSE;
2745 	}
2746 
2747 	RETVAL_STRING((char *)ret->name);
2748 }
2749 /* }}} */
2750 
2751 /* {{{ Returns an array of all supported entity encodings */
PHP_FUNCTION(mb_list_encodings)2752 PHP_FUNCTION(mb_list_encodings)
2753 {
2754 	ZEND_PARSE_PARAMETERS_NONE();
2755 
2756 	array_init(return_value);
2757 	for (const mbfl_encoding **encodings = mbfl_get_supported_encodings(); *encodings; encodings++) {
2758 		add_next_index_string(return_value, (*encodings)->name);
2759 	}
2760 }
2761 /* }}} */
2762 
2763 /* {{{ Returns an array of the aliases of a given encoding name */
PHP_FUNCTION(mb_encoding_aliases)2764 PHP_FUNCTION(mb_encoding_aliases)
2765 {
2766 	const mbfl_encoding *encoding;
2767 	zend_string *encoding_name = NULL;
2768 
2769 	ZEND_PARSE_PARAMETERS_START(1, 1)
2770 		Z_PARAM_STR(encoding_name)
2771 	ZEND_PARSE_PARAMETERS_END();
2772 
2773 	encoding = php_mb_get_encoding(encoding_name, 1);
2774 	if (!encoding) {
2775 		RETURN_THROWS();
2776 	}
2777 
2778 	array_init(return_value);
2779 	if (encoding->aliases != NULL) {
2780 		const char **alias;
2781 		for (alias = *encoding->aliases; *alias; ++alias) {
2782 			add_next_index_string(return_value, (char *)*alias);
2783 		}
2784 	}
2785 }
2786 /* }}} */
2787 
2788 /* {{{ Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
PHP_FUNCTION(mb_encode_mimeheader)2789 PHP_FUNCTION(mb_encode_mimeheader)
2790 {
2791 	const mbfl_encoding *charset, *transenc;
2792 	mbfl_string  string, result, *ret;
2793 	zend_string *charset_name = NULL;
2794 	char *trans_enc_name = NULL, *string_val;
2795 	size_t trans_enc_name_len;
2796 	char *linefeed = "\r\n";
2797 	size_t linefeed_len;
2798 	zend_long indent = 0;
2799 
2800 	string.encoding = MBSTRG(current_internal_encoding);
2801 
2802 	ZEND_PARSE_PARAMETERS_START(1, 5)
2803 		Z_PARAM_STRING(string_val, string.len)
2804 		Z_PARAM_OPTIONAL
2805 		Z_PARAM_STR(charset_name)
2806 		Z_PARAM_STRING(trans_enc_name, trans_enc_name_len)
2807 		Z_PARAM_STRING(linefeed, linefeed_len)
2808 		Z_PARAM_LONG(indent)
2809 	ZEND_PARSE_PARAMETERS_END();
2810 
2811 	string.val = (unsigned char*)string_val;
2812 	charset = &mbfl_encoding_pass;
2813 	transenc = &mbfl_encoding_base64;
2814 
2815 	if (charset_name != NULL) {
2816 		charset = php_mb_get_encoding(charset_name, 2);
2817 		if (!charset) {
2818 			RETURN_THROWS();
2819 		}
2820 	} else {
2821 		const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
2822 		if (lang != NULL) {
2823 			charset = mbfl_no2encoding(lang->mail_charset);
2824 			transenc = mbfl_no2encoding(lang->mail_header_encoding);
2825 		}
2826 	}
2827 
2828 	if (trans_enc_name != NULL) {
2829 		if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
2830 			transenc = &mbfl_encoding_base64;
2831 		} else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
2832 			transenc = &mbfl_encoding_qprint;
2833 		}
2834 	}
2835 
2836 	mbfl_string_init(&result);
2837 	ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
2838 	ZEND_ASSERT(ret != NULL);
2839 	// TODO: avoid reallocation ???
2840 	RETVAL_STRINGL((char *)ret->val, ret->len);	/* the string is already strdup()'ed */
2841 	efree(ret->val);
2842 }
2843 /* }}} */
2844 
2845 /* {{{ Decodes the MIME "encoded-word" in the string */
PHP_FUNCTION(mb_decode_mimeheader)2846 PHP_FUNCTION(mb_decode_mimeheader)
2847 {
2848 	char *string_val;
2849 	mbfl_string string, result, *ret;
2850 
2851 	string.encoding = MBSTRG(current_internal_encoding);
2852 
2853 	ZEND_PARSE_PARAMETERS_START(1, 1)
2854 		Z_PARAM_STRING(string_val, string.len)
2855 	ZEND_PARSE_PARAMETERS_END();
2856 
2857 	string.val = (unsigned char*)string_val;
2858 	mbfl_string_init(&result);
2859 	ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
2860 	ZEND_ASSERT(ret != NULL);
2861 	// TODO: avoid reallocation ???
2862 	RETVAL_STRINGL((char *)ret->val, ret->len);	/* the string is already strdup()'ed */
2863 	efree(ret->val);
2864 }
2865 /* }}} */
2866 
2867 /* {{{ Conversion between full-width character and half-width character (Japanese) */
PHP_FUNCTION(mb_convert_kana)2868 PHP_FUNCTION(mb_convert_kana)
2869 {
2870 	int opt;
2871 	mbfl_string string, result, *ret;
2872 	char *optstr = NULL, *string_val;
2873 	size_t optstr_len;
2874 	zend_string *encname = NULL;
2875 
2876 	ZEND_PARSE_PARAMETERS_START(1, 3)
2877 		Z_PARAM_STRING(string_val, string.len)
2878 		Z_PARAM_OPTIONAL
2879 		Z_PARAM_STRING(optstr, optstr_len)
2880 		Z_PARAM_STR_OR_NULL(encname)
2881 	ZEND_PARSE_PARAMETERS_END();
2882 
2883 	string.val = (unsigned char*)string_val;
2884 
2885 	/* "Zen" is 全, or "full"; "Han" is 半, or "half"
2886 	 * This refers to "fullwidth" or "halfwidth" variants of characters used for writing Japanese */
2887 	if (optstr != NULL) {
2888 		char *p = optstr, *e = p + optstr_len;
2889 		opt = 0;
2890 		while (p < e) {
2891 			switch (*p++) {
2892 			case 'A':
2893 				opt |= MBFL_FILT_TL_HAN2ZEN_ALL;
2894 				break;
2895 			case 'a':
2896 				opt |= MBFL_FILT_TL_ZEN2HAN_ALL;
2897 				break;
2898 			case 'R':
2899 				opt |= MBFL_FILT_TL_HAN2ZEN_ALPHA;
2900 				break;
2901 			case 'r':
2902 				opt |= MBFL_FILT_TL_ZEN2HAN_ALPHA;
2903 				break;
2904 			case 'N':
2905 				opt |= MBFL_FILT_TL_HAN2ZEN_NUMERIC;
2906 				break;
2907 			case 'n':
2908 				opt |= MBFL_FILT_TL_ZEN2HAN_NUMERIC;
2909 				break;
2910 			case 'S':
2911 				opt |= MBFL_FILT_TL_HAN2ZEN_SPACE;
2912 				break;
2913 			case 's':
2914 				opt |= MBFL_FILT_TL_ZEN2HAN_SPACE;
2915 				break;
2916 			case 'K':
2917 				opt |= MBFL_FILT_TL_HAN2ZEN_KATAKANA;
2918 				break;
2919 			case 'k':
2920 				opt |= MBFL_FILT_TL_ZEN2HAN_KATAKANA;
2921 				break;
2922 			case 'H':
2923 				opt |= MBFL_FILT_TL_HAN2ZEN_HIRAGANA;
2924 				break;
2925 			case 'h':
2926 				opt |= MBFL_FILT_TL_ZEN2HAN_HIRAGANA;
2927 				break;
2928 			case 'V':
2929 				opt |= MBFL_FILT_TL_HAN2ZEN_GLUE;
2930 				break;
2931 			case 'C':
2932 				opt |= MBFL_FILT_TL_ZEN2HAN_HIRA2KANA;
2933 				break;
2934 			case 'c':
2935 				opt |= MBFL_FILT_TL_ZEN2HAN_KANA2HIRA;
2936 				break;
2937 			case 'M':
2938 				/* TODO: figure out what 'M' and 'm' are for, and rename the constant
2939 				 * to something meaningful */
2940 				opt |= MBFL_FILT_TL_HAN2ZEN_COMPAT1;
2941 				break;
2942 			case 'm':
2943 				opt |= MBFL_FILT_TL_ZEN2HAN_COMPAT1;
2944 				break;
2945 			}
2946 		}
2947 	} else {
2948 		opt = MBFL_FILT_TL_HAN2ZEN_KATAKANA | MBFL_FILT_TL_HAN2ZEN_GLUE;
2949 	}
2950 
2951 	/* encoding */
2952 	string.encoding = php_mb_get_encoding(encname, 3);
2953 	if (!string.encoding) {
2954 		RETURN_THROWS();
2955 	}
2956 
2957 	ret = mbfl_ja_jp_hantozen(&string, &result, opt);
2958 	ZEND_ASSERT(ret != NULL);
2959 	// TODO: avoid reallocation ???
2960 	RETVAL_STRINGL((char *)ret->val, ret->len);		/* the string is already strdup()'ed */
2961 	efree(ret->val);
2962 }
2963 /* }}} */
2964 
mb_recursive_encoder_detector_feed(mbfl_encoding_detector * identd,zval * var,int * recursion_error)2965 static int mb_recursive_encoder_detector_feed(mbfl_encoding_detector *identd, zval *var, int *recursion_error) /* {{{ */
2966 {
2967 	mbfl_string string;
2968 	HashTable *ht;
2969 	zval *entry;
2970 
2971 	ZVAL_DEREF(var);
2972 	if (Z_TYPE_P(var) == IS_STRING) {
2973 		string.val = (unsigned char *)Z_STRVAL_P(var);
2974 		string.len = Z_STRLEN_P(var);
2975 		if (mbfl_encoding_detector_feed(identd, &string)) {
2976 			return 1; /* complete detecting */
2977 		}
2978 	} else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
2979 		if (Z_REFCOUNTED_P(var)) {
2980 			if (Z_IS_RECURSIVE_P(var)) {
2981 				*recursion_error = 1;
2982 				return 0;
2983 			}
2984 			Z_PROTECT_RECURSION_P(var);
2985 		}
2986 
2987 		ht = HASH_OF(var);
2988 		if (ht != NULL) {
2989 			ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
2990 				if (mb_recursive_encoder_detector_feed(identd, entry, recursion_error)) {
2991 					if (Z_REFCOUNTED_P(var)) {
2992 						Z_UNPROTECT_RECURSION_P(var);
2993 					}
2994 					return 1;
2995 				} else if (*recursion_error) {
2996 					if (Z_REFCOUNTED_P(var)) {
2997 						Z_UNPROTECT_RECURSION_P(var);
2998 					}
2999 					return 0;
3000 				}
3001 			} ZEND_HASH_FOREACH_END();
3002 		}
3003 
3004 		if (Z_REFCOUNTED_P(var)) {
3005 			Z_UNPROTECT_RECURSION_P(var);
3006 		}
3007 	}
3008 	return 0;
3009 } /* }}} */
3010 
mb_recursive_convert_variable(mbfl_buffer_converter * convd,zval * var)3011 static int mb_recursive_convert_variable(mbfl_buffer_converter *convd, zval *var) /* {{{ */
3012 {
3013 	mbfl_string string, result, *ret;
3014 	HashTable *ht;
3015 	zval *entry, *orig_var;
3016 
3017 	orig_var = var;
3018 	ZVAL_DEREF(var);
3019 	if (Z_TYPE_P(var) == IS_STRING) {
3020 		string.val = (unsigned char *)Z_STRVAL_P(var);
3021 		string.len = Z_STRLEN_P(var);
3022 		ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3023 		if (ret != NULL) {
3024 			zval_ptr_dtor(orig_var);
3025 			// TODO: avoid reallocation ???
3026 			ZVAL_STRINGL(orig_var, (char *)ret->val, ret->len);
3027 			efree(ret->val);
3028 		}
3029 	} else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3030 		if (Z_TYPE_P(var) == IS_ARRAY) {
3031 			SEPARATE_ARRAY(var);
3032 		}
3033 		if (Z_REFCOUNTED_P(var)) {
3034 			if (Z_IS_RECURSIVE_P(var)) {
3035 				return 1;
3036 			}
3037 			Z_PROTECT_RECURSION_P(var);
3038 		}
3039 
3040 		ht = HASH_OF(var);
3041 		if (ht != NULL) {
3042 			ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
3043 				if (mb_recursive_convert_variable(convd, entry)) {
3044 					if (Z_REFCOUNTED_P(var)) {
3045 						Z_UNPROTECT_RECURSION_P(var);
3046 					}
3047 					return 1;
3048 				}
3049 			} ZEND_HASH_FOREACH_END();
3050 		}
3051 
3052 		if (Z_REFCOUNTED_P(var)) {
3053 			Z_UNPROTECT_RECURSION_P(var);
3054 		}
3055 	}
3056 	return 0;
3057 } /* }}} */
3058 
3059 /* {{{ Converts the string resource in variables to desired encoding */
PHP_FUNCTION(mb_convert_variables)3060 PHP_FUNCTION(mb_convert_variables)
3061 {
3062 	zval *args;
3063 	zend_string *to_enc_str;
3064 	zend_string *from_enc_str;
3065 	HashTable *from_enc_ht;
3066 	mbfl_string string, result;
3067 	const mbfl_encoding *from_encoding, *to_encoding;
3068 	mbfl_encoding_detector *identd;
3069 	mbfl_buffer_converter *convd;
3070 	int n, argc;
3071 	size_t elistsz;
3072 	const mbfl_encoding **elist;
3073 	int recursion_error = 0;
3074 
3075 	ZEND_PARSE_PARAMETERS_START(3, -1)
3076 		Z_PARAM_STR(to_enc_str)
3077 		Z_PARAM_ARRAY_HT_OR_STR(from_enc_ht, from_enc_str)
3078 		Z_PARAM_VARIADIC('+', args, argc)
3079 	ZEND_PARSE_PARAMETERS_END();
3080 
3081 	/* new encoding */
3082 	to_encoding = php_mb_get_encoding(to_enc_str, 1);
3083 	if (!to_encoding) {
3084 		RETURN_THROWS();
3085 	}
3086 
3087 	/* initialize string */
3088 	from_encoding = MBSTRG(current_internal_encoding);
3089 	mbfl_string_init_set(&string, from_encoding);
3090 	mbfl_string_init(&result);
3091 
3092 	/* pre-conversion encoding */
3093 	if (from_enc_ht) {
3094 		if (php_mb_parse_encoding_array(from_enc_ht, &elist, &elistsz, 2) == FAILURE) {
3095 			RETURN_THROWS();
3096 		}
3097 	} else {
3098 		if (php_mb_parse_encoding_list(ZSTR_VAL(from_enc_str), ZSTR_LEN(from_enc_str), &elist, &elistsz, /* persistent */ 0, /* arg_num */ 2, /* allow_pass_encoding */ 0) == FAILURE) {
3099 			RETURN_THROWS();
3100 		}
3101 	}
3102 
3103 	if (elistsz == 0) {
3104 		efree(ZEND_VOIDP(elist));
3105 		zend_argument_value_error(2, "must specify at least one encoding");
3106 		RETURN_THROWS();
3107 	}
3108 
3109 	if (elistsz == 1) {
3110 		from_encoding = *elist;
3111 	} else {
3112 		/* auto detect */
3113 		from_encoding = NULL;
3114 		identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
3115 		if (identd != NULL) {
3116 			n = 0;
3117 			while (n < argc) {
3118 				if (mb_recursive_encoder_detector_feed(identd, &args[n], &recursion_error)) {
3119 					break;
3120 				}
3121 				n++;
3122 			}
3123 			from_encoding = mbfl_encoding_detector_judge(identd);
3124 			mbfl_encoding_detector_delete(identd);
3125 			if (recursion_error) {
3126 				efree(ZEND_VOIDP(elist));
3127 				php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
3128 				RETURN_FALSE;
3129 			}
3130 		}
3131 
3132 		if (!from_encoding) {
3133 			php_error_docref(NULL, E_WARNING, "Unable to detect encoding");
3134 			efree(ZEND_VOIDP(elist));
3135 			RETURN_FALSE;
3136 		}
3137 	}
3138 
3139 	efree(ZEND_VOIDP(elist));
3140 
3141 	convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
3142 	/* If this assertion fails this means some memory allocation failure which is a bug */
3143 	ZEND_ASSERT(convd != NULL);
3144 
3145 	mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3146 	mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3147 
3148 	/* convert */
3149 	n = 0;
3150 	while (n < argc) {
3151 		zval *zv = &args[n];
3152 
3153 		ZVAL_DEREF(zv);
3154 		recursion_error = mb_recursive_convert_variable(convd, zv);
3155 		if (recursion_error) {
3156 			break;
3157 		}
3158 		n++;
3159 	}
3160 
3161 	MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3162 	mbfl_buffer_converter_delete(convd);
3163 
3164 	if (recursion_error) {
3165 		php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
3166 		RETURN_FALSE;
3167 	}
3168 
3169 	RETURN_STRING(from_encoding->name);
3170 }
3171 /* }}} */
3172 
3173 /* HTML numeric entities */
3174 
3175 /* Convert PHP array to data structure required by mbfl_html_numeric_entity */
make_conversion_map(HashTable * target_hash,int * convmap_size)3176 static int *make_conversion_map(HashTable *target_hash, int *convmap_size)
3177 {
3178 	zval *hash_entry;
3179 
3180 	int n_elems = zend_hash_num_elements(target_hash);
3181 	if (n_elems % 4 != 0) {
3182 		zend_argument_value_error(2, "must have a multiple of 4 elements");
3183 		return NULL;
3184 	}
3185 
3186 	int *convmap = (int *)safe_emalloc(n_elems, sizeof(int), 0);
3187 	int *mapelm = convmap;
3188 
3189 	ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
3190 		*mapelm++ = zval_get_long(hash_entry);
3191 	} ZEND_HASH_FOREACH_END();
3192 
3193 	*convmap_size = n_elems / 4;
3194 	return convmap;
3195 }
3196 
3197 /* {{{ Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)3198 PHP_FUNCTION(mb_encode_numericentity)
3199 {
3200 	char *str = NULL;
3201 	zend_string *encoding = NULL;
3202 	int mapsize;
3203 	HashTable *target_hash;
3204 	zend_bool is_hex = 0;
3205 	mbfl_string string, result, *ret;
3206 
3207 	ZEND_PARSE_PARAMETERS_START(2, 4)
3208 		Z_PARAM_STRING(str, string.len)
3209 		Z_PARAM_ARRAY_HT(target_hash)
3210 		Z_PARAM_OPTIONAL
3211 		Z_PARAM_STR_OR_NULL(encoding)
3212 		Z_PARAM_BOOL(is_hex)
3213 	ZEND_PARSE_PARAMETERS_END();
3214 
3215 	string.val = (unsigned char *)str;
3216 	string.encoding = php_mb_get_encoding(encoding, 3);
3217 	if (!string.encoding) {
3218 		RETURN_THROWS();
3219 	}
3220 
3221 	int *convmap = make_conversion_map(target_hash, &mapsize);
3222 	if (convmap == NULL) {
3223 		RETURN_THROWS();
3224 	}
3225 
3226 	ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, is_hex ? 2 : 0);
3227 	ZEND_ASSERT(ret != NULL);
3228 	// TODO: avoid reallocation ???
3229 	RETVAL_STRINGL((char *)ret->val, ret->len);
3230 	efree(ret->val);
3231 	efree(convmap);
3232 }
3233 /* }}} */
3234 
3235 /* {{{ Converts HTML numeric entities to character code */
PHP_FUNCTION(mb_decode_numericentity)3236 PHP_FUNCTION(mb_decode_numericentity)
3237 {
3238 	char *str = NULL;
3239 	zend_string *encoding = NULL;
3240 	int mapsize;
3241 	HashTable *target_hash;
3242 	mbfl_string string, result, *ret;
3243 
3244 	ZEND_PARSE_PARAMETERS_START(2, 3)
3245 		Z_PARAM_STRING(str, string.len)
3246 		Z_PARAM_ARRAY_HT(target_hash)
3247 		Z_PARAM_OPTIONAL
3248 		Z_PARAM_STR_OR_NULL(encoding)
3249 	ZEND_PARSE_PARAMETERS_END();
3250 
3251 	string.val = (unsigned char *)str;
3252 	string.encoding = php_mb_get_encoding(encoding, 3);
3253 	if (!string.encoding) {
3254 		RETURN_THROWS();
3255 	}
3256 
3257 	int *convmap = make_conversion_map(target_hash, &mapsize);
3258 	if (convmap == NULL) {
3259 		RETURN_THROWS();
3260 	}
3261 
3262 	ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, 1);
3263 	ZEND_ASSERT(ret != NULL);
3264 	// TODO: avoid reallocation ???
3265 	RETVAL_STRINGL((char *)ret->val, ret->len);
3266 	efree(ret->val);
3267 	efree((void *)convmap);
3268 }
3269 /* }}} */
3270 
3271 /* {{{ Sends an email message with MIME scheme */
3272 
3273 #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos)										\
3274 	if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) {	\
3275 		pos += 2;											\
3276 		while (str[pos + 1] == ' ' || str[pos + 1] == '\t') {							\
3277 			pos++;											\
3278 		}												\
3279 		continue;											\
3280 	}
3281 
3282 #define CRLF "\r\n"
3283 
_php_mbstr_parse_mail_headers(HashTable * ht,const char * str,size_t str_len)3284 static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
3285 {
3286 	const char *ps;
3287 	size_t icnt;
3288 	int state = 0;
3289 	int crlf_state = -1;
3290 	char *token = NULL;
3291 	size_t token_pos = 0;
3292 	zend_string *fld_name, *fld_val;
3293 
3294 	ps = str;
3295 	icnt = str_len;
3296 	fld_name = fld_val = NULL;
3297 
3298 	/*
3299 	 *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
3300 	 *             ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
3301 	 *      state  0            1           2          3
3302 	 *
3303 	 *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
3304 	 *             ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
3305 	 * crlf_state -1                       0                     1 -1
3306 	 *
3307 	 */
3308 
3309 	while (icnt > 0) {
3310 		switch (*ps) {
3311 			case ':':
3312 				if (crlf_state == 1) {
3313 					token_pos++;
3314 				}
3315 
3316 				if (state == 0 || state == 1) {
3317 					if(token && token_pos > 0) {
3318 						fld_name = zend_string_init(token, token_pos, 0);
3319 					}
3320 					state = 2;
3321 				} else {
3322 					token_pos++;
3323 				}
3324 
3325 				crlf_state = 0;
3326 				break;
3327 
3328 			case '\n':
3329 				if (crlf_state == -1) {
3330 					goto out;
3331 				}
3332 				crlf_state = -1;
3333 				break;
3334 
3335 			case '\r':
3336 				if (crlf_state == 1) {
3337 					token_pos++;
3338 				} else {
3339 					crlf_state = 1;
3340 				}
3341 				break;
3342 
3343 			case ' ': case '\t':
3344 				if (crlf_state == -1) {
3345 					if (state == 3) {
3346 						/* continuing from the previous line */
3347 						state = 4;
3348 					} else {
3349 						/* simply skipping this new line */
3350 						state = 5;
3351 					}
3352 				} else {
3353 					if (crlf_state == 1) {
3354 						token_pos++;
3355 					}
3356 					if (state == 1 || state == 3) {
3357 						token_pos++;
3358 					}
3359 				}
3360 				crlf_state = 0;
3361 				break;
3362 
3363 			default:
3364 				switch (state) {
3365 					case 0:
3366 						token = (char*)ps;
3367 						token_pos = 0;
3368 						state = 1;
3369 						break;
3370 
3371 					case 2:
3372 						if (crlf_state != -1) {
3373 							token = (char*)ps;
3374 							token_pos = 0;
3375 
3376 							state = 3;
3377 							break;
3378 						}
3379 						/* break is missing intentionally */
3380 
3381 					case 3:
3382 						if (crlf_state == -1) {
3383 							if(token && token_pos > 0) {
3384 								fld_val = zend_string_init(token, token_pos, 0);
3385 							}
3386 
3387 							if (fld_name != NULL && fld_val != NULL) {
3388 								zval val;
3389 								/* FIXME: some locale free implementation is
3390 								 * really required here,,, */
3391 								php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
3392 								ZVAL_STR(&val, fld_val);
3393 
3394 								zend_hash_update(ht, fld_name, &val);
3395 
3396 								zend_string_release_ex(fld_name, 0);
3397 							}
3398 
3399 							fld_name = fld_val = NULL;
3400 							token = (char*)ps;
3401 							token_pos = 0;
3402 
3403 							state = 1;
3404 						}
3405 						break;
3406 
3407 					case 4:
3408 						token_pos++;
3409 						state = 3;
3410 						break;
3411 				}
3412 
3413 				if (crlf_state == 1) {
3414 					token_pos++;
3415 				}
3416 
3417 				token_pos++;
3418 
3419 				crlf_state = 0;
3420 				break;
3421 		}
3422 		ps++, icnt--;
3423 	}
3424 out:
3425 	if (state == 2) {
3426 		token = "";
3427 		token_pos = 0;
3428 
3429 		state = 3;
3430 	}
3431 	if (state == 3) {
3432 		if(token && token_pos > 0) {
3433 			fld_val = zend_string_init(token, token_pos, 0);
3434 		}
3435 		if (fld_name != NULL && fld_val != NULL) {
3436 			zval val;
3437 			/* FIXME: some locale free implementation is
3438 			 * really required here,,, */
3439 			php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
3440 			ZVAL_STR(&val, fld_val);
3441 
3442 			zend_hash_update(ht, fld_name, &val);
3443 
3444 			zend_string_release_ex(fld_name, 0);
3445 		}
3446 	}
3447 	return state;
3448 }
3449 
PHP_FUNCTION(mb_send_mail)3450 PHP_FUNCTION(mb_send_mail)
3451 {
3452 	char *to;
3453 	size_t to_len;
3454 	char *message;
3455 	size_t message_len;
3456 	char *subject;
3457 	size_t subject_len;
3458 	zend_string *extra_cmd = NULL;
3459 	HashTable *headers_ht = NULL;
3460 	zend_string *str_headers = NULL;
3461 	size_t n, i;
3462 	char *to_r = NULL;
3463 	char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
3464 	struct {
3465 		int cnt_type:1;
3466 		int cnt_trans_enc:1;
3467 	} suppressed_hdrs = { 0, 0 };
3468 
3469 	char *message_buf = NULL, *subject_buf = NULL, *p;
3470 	mbfl_string orig_str, conv_str;
3471 	mbfl_string *pstr;	/* pointer to mbfl string for return value */
3472 	enum mbfl_no_encoding;
3473 	const mbfl_encoding *tran_cs,	/* transfer text charset */
3474 						*head_enc,	/* header transfer encoding */
3475 						*body_enc;	/* body transfer encoding */
3476 	mbfl_memory_device device;	/* automatic allocateable buffer for additional header */
3477 	const mbfl_language *lang;
3478 	int err = 0;
3479 	HashTable ht_headers;
3480 	zval *s;
3481 	extern void mbfl_memory_device_unput(mbfl_memory_device *device);
3482 
3483 	/* initialize */
3484 	mbfl_memory_device_init(&device, 0, 0);
3485 	mbfl_string_init(&orig_str);
3486 	mbfl_string_init(&conv_str);
3487 
3488 	/* character-set, transfer-encoding */
3489 	tran_cs = &mbfl_encoding_utf8;
3490 	head_enc = &mbfl_encoding_base64;
3491 	body_enc = &mbfl_encoding_base64;
3492 	lang = mbfl_no2language(MBSTRG(language));
3493 	if (lang != NULL) {
3494 		tran_cs = mbfl_no2encoding(lang->mail_charset);
3495 		head_enc = mbfl_no2encoding(lang->mail_header_encoding);
3496 		body_enc = mbfl_no2encoding(lang->mail_body_encoding);
3497 	}
3498 
3499 	ZEND_PARSE_PARAMETERS_START(3, 5)
3500 		Z_PARAM_PATH(to, to_len)
3501 		Z_PARAM_PATH(subject, subject_len)
3502 		Z_PARAM_PATH(message, message_len)
3503 		Z_PARAM_OPTIONAL
3504 		Z_PARAM_ARRAY_HT_OR_STR(headers_ht, str_headers)
3505 		Z_PARAM_PATH_STR_OR_NULL(extra_cmd)
3506 	ZEND_PARSE_PARAMETERS_END();
3507 
3508 	if (str_headers) {
3509 		if (strlen(ZSTR_VAL(str_headers)) != ZSTR_LEN(str_headers)) {
3510 			zend_argument_value_error(4, "must not contain any null bytes");
3511 			RETURN_THROWS();
3512 		}
3513 		str_headers = php_trim(str_headers, NULL, 0, 2);
3514 	} else if (headers_ht) {
3515 		str_headers = php_mail_build_headers(headers_ht);
3516 		if (EG(exception)) {
3517 			RETURN_THROWS();
3518 		}
3519 	}
3520 
3521 	zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0);
3522 
3523 	if (str_headers != NULL) {
3524 		_php_mbstr_parse_mail_headers(&ht_headers, ZSTR_VAL(str_headers), ZSTR_LEN(str_headers));
3525 	}
3526 
3527 	if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) {
3528 		char *tmp;
3529 		char *param_name;
3530 		char *charset = NULL;
3531 
3532 		ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
3533 		p = strchr(Z_STRVAL_P(s), ';');
3534 
3535 		if (p != NULL) {
3536 			/* skipping the padded spaces */
3537 			do {
3538 				++p;
3539 			} while (*p == ' ' || *p == '\t');
3540 
3541 			if (*p != '\0') {
3542 				if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
3543 					if (strcasecmp(param_name, "charset") == 0) {
3544 						const mbfl_encoding *_tran_cs = tran_cs;
3545 
3546 						charset = php_strtok_r(NULL, "= \"", &tmp);
3547 						if (charset != NULL) {
3548 							_tran_cs = mbfl_name2encoding(charset);
3549 						}
3550 
3551 						if (!_tran_cs) {
3552 							php_error_docref(NULL, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
3553 							_tran_cs = &mbfl_encoding_ascii;
3554 						}
3555 						tran_cs = _tran_cs;
3556 					}
3557 				}
3558 			}
3559 		}
3560 		suppressed_hdrs.cnt_type = 1;
3561 	}
3562 
3563 	if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1))) {
3564 		const mbfl_encoding *_body_enc;
3565 
3566 		ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
3567 		_body_enc = mbfl_name2encoding(Z_STRVAL_P(s));
3568 		switch (_body_enc ? _body_enc->no_encoding : mbfl_no_encoding_invalid) {
3569 			case mbfl_no_encoding_base64:
3570 			case mbfl_no_encoding_7bit:
3571 			case mbfl_no_encoding_8bit:
3572 				body_enc = _body_enc;
3573 				break;
3574 
3575 			default:
3576 				php_error_docref(NULL, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", Z_STRVAL_P(s));
3577 				body_enc =	&mbfl_encoding_8bit;
3578 				break;
3579 		}
3580 		suppressed_hdrs.cnt_trans_enc = 1;
3581 	}
3582 
3583 	/* To: */
3584 	if (to_len > 0) {
3585 		to_r = estrndup(to, to_len);
3586 		for (; to_len; to_len--) {
3587 			if (!isspace((unsigned char) to_r[to_len - 1])) {
3588 				break;
3589 			}
3590 			to_r[to_len - 1] = '\0';
3591 		}
3592 		for (i = 0; to_r[i]; i++) {
3593 		if (iscntrl((unsigned char) to_r[i])) {
3594 			/* According to RFC 822, section 3.1.1 long headers may be separated into
3595 			 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
3596 			 * To prevent these separators from being replaced with a space, we use the
3597 			 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
3598 			 */
3599 			SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
3600 			to_r[i] = ' ';
3601 		}
3602 		}
3603 	} else {
3604 		to_r = to;
3605 	}
3606 
3607 	/* Subject: */
3608 	orig_str.val = (unsigned char *)subject;
3609 	orig_str.len = subject_len;
3610 	orig_str.encoding = MBSTRG(current_internal_encoding);
3611 	if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid
3612 			|| orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
3613 		orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
3614 	}
3615 	pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, CRLF, sizeof("Subject: [PHP-jp nnnnnnnn]" CRLF) - 1);
3616 	if (pstr != NULL) {
3617 		subject_buf = subject = (char *)pstr->val;
3618 	}
3619 
3620 	/* message body */
3621 	orig_str.val = (unsigned char *)message;
3622 	orig_str.len = message_len;
3623 	orig_str.encoding = MBSTRG(current_internal_encoding);
3624 
3625 	if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid
3626 			|| orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
3627 		orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
3628 	}
3629 
3630 	pstr = NULL;
3631 	{
3632 		mbfl_string tmpstr;
3633 
3634 		if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
3635 			tmpstr.encoding = &mbfl_encoding_8bit;
3636 			pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
3637 			efree(tmpstr.val);
3638 		}
3639 	}
3640 	if (pstr != NULL) {
3641 		message_buf = message = (char *)pstr->val;
3642 	}
3643 
3644 	/* other headers */
3645 #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
3646 #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
3647 #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
3648 #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
3649 	if (str_headers != NULL) {
3650 		p = ZSTR_VAL(str_headers);
3651 		n = ZSTR_LEN(str_headers);
3652 		mbfl_memory_device_strncat(&device, p, n);
3653 		if (n > 0 && p[n - 1] != '\n') {
3654 			mbfl_memory_device_strncat(&device, CRLF, sizeof(CRLF)-1);
3655 		}
3656 		zend_string_release_ex(str_headers, 0);
3657 	}
3658 
3659 	if (!zend_hash_str_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
3660 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
3661 		mbfl_memory_device_strncat(&device, CRLF, sizeof(CRLF)-1);
3662 	}
3663 
3664 	if (!suppressed_hdrs.cnt_type) {
3665 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
3666 
3667 		p = (char *)mbfl_no2preferred_mime_name(tran_cs->no_encoding);
3668 		if (p != NULL) {
3669 			mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
3670 			mbfl_memory_device_strcat(&device, p);
3671 		}
3672 		mbfl_memory_device_strncat(&device, CRLF, sizeof(CRLF)-1);
3673 	}
3674 	if (!suppressed_hdrs.cnt_trans_enc) {
3675 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
3676 		p = (char *)mbfl_no2preferred_mime_name(body_enc->no_encoding);
3677 		if (p == NULL) {
3678 			p = "7bit";
3679 		}
3680 		mbfl_memory_device_strcat(&device, p);
3681 		mbfl_memory_device_strncat(&device, CRLF, sizeof(CRLF)-1);
3682 	}
3683 
3684 	mbfl_memory_device_unput(&device);
3685 	mbfl_memory_device_unput(&device);
3686 	mbfl_memory_device_output('\0', &device);
3687 	str_headers = zend_string_init((char *)device.buffer, strlen((char *)device.buffer), 0);
3688 
3689 	if (force_extra_parameters) {
3690 		extra_cmd = php_escape_shell_cmd(force_extra_parameters);
3691 	} else if (extra_cmd) {
3692 		extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd));
3693 	}
3694 
3695 	if (!err && php_mail(to_r, subject, message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
3696 		RETVAL_TRUE;
3697 	} else {
3698 		RETVAL_FALSE;
3699 	}
3700 
3701 	if (extra_cmd) {
3702 		zend_string_release_ex(extra_cmd, 0);
3703 	}
3704 
3705 	if (to_r != to) {
3706 		efree(to_r);
3707 	}
3708 	if (subject_buf) {
3709 		efree((void *)subject_buf);
3710 	}
3711 	if (message_buf) {
3712 		efree((void *)message_buf);
3713 	}
3714 	mbfl_memory_device_clear(&device);
3715 	zend_hash_destroy(&ht_headers);
3716 	if (str_headers) {
3717 		zend_string_release_ex(str_headers, 0);
3718 	}
3719 }
3720 
3721 #undef SKIP_LONG_HEADER_SEP_MBSTRING
3722 #undef CRLF
3723 #undef MAIL_ASCIIZ_CHECK_MBSTRING
3724 #undef PHP_MBSTR_MAIL_MIME_HEADER1
3725 #undef PHP_MBSTR_MAIL_MIME_HEADER2
3726 #undef PHP_MBSTR_MAIL_MIME_HEADER3
3727 #undef PHP_MBSTR_MAIL_MIME_HEADER4
3728 /* }}} */
3729 
3730 /* {{{ Returns the current settings of mbstring */
PHP_FUNCTION(mb_get_info)3731 PHP_FUNCTION(mb_get_info)
3732 {
3733 	char *typ = NULL;
3734 	size_t typ_len;
3735 	size_t n;
3736 	char *name;
3737 	zval row;
3738 	const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3739 	const mbfl_encoding **entry;
3740 
3741 	ZEND_PARSE_PARAMETERS_START(0, 1)
3742 		Z_PARAM_OPTIONAL
3743 		Z_PARAM_STRING(typ, typ_len)
3744 	ZEND_PARSE_PARAMETERS_END();
3745 
3746 	if (!typ || !strcasecmp("all", typ)) {
3747 		array_init(return_value);
3748 		if (MBSTRG(current_internal_encoding)) {
3749 			add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name);
3750 		}
3751 		if (MBSTRG(http_input_identify)) {
3752 			add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name);
3753 		}
3754 		if (MBSTRG(current_http_output_encoding)) {
3755 			add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name);
3756 		}
3757 		if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
3758 			add_assoc_string(return_value, "http_output_conv_mimetypes", name);
3759 		}
3760 		if (lang != NULL) {
3761 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
3762 				add_assoc_string(return_value, "mail_charset", name);
3763 			}
3764 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
3765 				add_assoc_string(return_value, "mail_header_encoding", name);
3766 			}
3767 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
3768 				add_assoc_string(return_value, "mail_body_encoding", name);
3769 			}
3770 		}
3771 		add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
3772 		if (MBSTRG(encoding_translation)) {
3773 			add_assoc_string(return_value, "encoding_translation", "On");
3774 		} else {
3775 			add_assoc_string(return_value, "encoding_translation", "Off");
3776 		}
3777 		if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
3778 			add_assoc_string(return_value, "language", name);
3779 		}
3780 		n = MBSTRG(current_detect_order_list_size);
3781 		entry = MBSTRG(current_detect_order_list);
3782 		if (n > 0) {
3783 			size_t i;
3784 			array_init(&row);
3785 			for (i = 0; i < n; i++) {
3786 				add_next_index_string(&row, (*entry)->name);
3787 				entry++;
3788 			}
3789 			add_assoc_zval(return_value, "detect_order", &row);
3790 		}
3791 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
3792 			add_assoc_string(return_value, "substitute_character", "none");
3793 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
3794 			add_assoc_string(return_value, "substitute_character", "long");
3795 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
3796 			add_assoc_string(return_value, "substitute_character", "entity");
3797 		} else {
3798 			add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
3799 		}
3800 		if (MBSTRG(strict_detection)) {
3801 			add_assoc_string(return_value, "strict_detection", "On");
3802 		} else {
3803 			add_assoc_string(return_value, "strict_detection", "Off");
3804 		}
3805 	} else if (!strcasecmp("internal_encoding", typ)) {
3806 		if (MBSTRG(current_internal_encoding)) {
3807 			RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name);
3808 		}
3809 	} else if (!strcasecmp("http_input", typ)) {
3810 		if (MBSTRG(http_input_identify)) {
3811 			RETVAL_STRING((char *)MBSTRG(http_input_identify)->name);
3812 		}
3813 	} else if (!strcasecmp("http_output", typ)) {
3814 		if (MBSTRG(current_http_output_encoding)) {
3815 			RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name);
3816 		}
3817 	} else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
3818 		if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
3819 			RETVAL_STRING(name);
3820 		}
3821 	} else if (!strcasecmp("mail_charset", typ)) {
3822 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
3823 			RETVAL_STRING(name);
3824 		}
3825 	} else if (!strcasecmp("mail_header_encoding", typ)) {
3826 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
3827 			RETVAL_STRING(name);
3828 		}
3829 	} else if (!strcasecmp("mail_body_encoding", typ)) {
3830 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
3831 			RETVAL_STRING(name);
3832 		}
3833 	} else if (!strcasecmp("illegal_chars", typ)) {
3834 		RETVAL_LONG(MBSTRG(illegalchars));
3835 	} else if (!strcasecmp("encoding_translation", typ)) {
3836 		if (MBSTRG(encoding_translation)) {
3837 			RETVAL_STRING("On");
3838 		} else {
3839 			RETVAL_STRING("Off");
3840 		}
3841 	} else if (!strcasecmp("language", typ)) {
3842 		if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
3843 			RETVAL_STRING(name);
3844 		}
3845 	} else if (!strcasecmp("detect_order", typ)) {
3846 		n = MBSTRG(current_detect_order_list_size);
3847 		entry = MBSTRG(current_detect_order_list);
3848 		if (n > 0) {
3849 			size_t i;
3850 			array_init(return_value);
3851 			for (i = 0; i < n; i++) {
3852 				add_next_index_string(return_value, (*entry)->name);
3853 				entry++;
3854 			}
3855 		}
3856 	} else if (!strcasecmp("substitute_character", typ)) {
3857 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
3858 			RETVAL_STRING("none");
3859 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
3860 			RETVAL_STRING("long");
3861 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
3862 			RETVAL_STRING("entity");
3863 		} else {
3864 			RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
3865 		}
3866 	} else if (!strcasecmp("strict_detection", typ)) {
3867 		if (MBSTRG(strict_detection)) {
3868 			RETVAL_STRING("On");
3869 		} else {
3870 			RETVAL_STRING("Off");
3871 		}
3872 	} else {
3873 		// TODO Convert to ValueError
3874 		RETURN_FALSE;
3875 	}
3876 }
3877 /* }}} */
3878 
3879 
php_mb_init_convd(const mbfl_encoding * encoding)3880 static inline mbfl_buffer_converter *php_mb_init_convd(const mbfl_encoding *encoding)
3881 {
3882 	mbfl_buffer_converter *convd;
3883 
3884 	convd = mbfl_buffer_converter_new(encoding, encoding, 0);
3885 	if (convd == NULL) {
3886 		return NULL;
3887 	}
3888 	mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
3889 	mbfl_buffer_converter_illegal_substchar(convd, 0);
3890 	return convd;
3891 }
3892 
3893 
php_mb_check_encoding_impl(mbfl_buffer_converter * convd,const char * input,size_t length,const mbfl_encoding * encoding)3894 static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const char *input, size_t length, const mbfl_encoding *encoding) {
3895 	mbfl_string string, result;
3896 
3897 	mbfl_string_init_set(&string, encoding);
3898 	mbfl_string_init(&result);
3899 
3900 	string.val = (unsigned char *) input;
3901 	string.len = length;
3902 
3903 	mbfl_string *ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3904 	size_t illegalchars = mbfl_buffer_illegalchars(convd);
3905 
3906 	if (ret != NULL) {
3907 		if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
3908 			mbfl_string_clear(&result);
3909 			return 1;
3910 		}
3911 		mbfl_string_clear(&result);
3912 	}
3913 	return 0;
3914 }
3915 
php_mb_check_encoding(const char * input,size_t length,const mbfl_encoding * encoding)3916 MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const mbfl_encoding *encoding)
3917 {
3918 	mbfl_buffer_converter *convd = php_mb_init_convd(encoding);
3919 	/* If this assertion fails this means some memory allocation failure which is a bug */
3920 	ZEND_ASSERT(convd != NULL);
3921 
3922 	int result = php_mb_check_encoding_impl(convd, input, length, encoding);
3923 	mbfl_buffer_converter_delete(convd);
3924 	return result;
3925 }
3926 
php_mb_check_encoding_recursive(HashTable * vars,const mbfl_encoding * encoding)3927 static int php_mb_check_encoding_recursive(HashTable *vars, const mbfl_encoding *encoding)
3928 {
3929 	mbfl_buffer_converter *convd;
3930 	zend_long idx;
3931 	zend_string *key;
3932 	zval *entry;
3933 	int valid = 1;
3934 
3935 	(void)(idx);
3936 
3937 	convd = php_mb_init_convd(encoding);
3938 	/* If this assertion fails this means some memory allocation failure which is a bug */
3939 	ZEND_ASSERT(convd != NULL);
3940 
3941 	if (GC_IS_RECURSIVE(vars)) {
3942 		mbfl_buffer_converter_delete(convd);
3943 		php_error_docref(NULL, E_WARNING, "Cannot not handle circular references");
3944 		return 0;
3945 	}
3946 	GC_TRY_PROTECT_RECURSION(vars);
3947 	ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) {
3948 		ZVAL_DEREF(entry);
3949 		if (key) {
3950 			if (!php_mb_check_encoding_impl(convd, ZSTR_VAL(key), ZSTR_LEN(key), encoding)) {
3951 				valid = 0;
3952 				break;
3953 			}
3954 		}
3955 		switch (Z_TYPE_P(entry)) {
3956 			case IS_STRING:
3957 				if (!php_mb_check_encoding_impl(convd, Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) {
3958 					valid = 0;
3959 					break;
3960 				}
3961 				break;
3962 			case IS_ARRAY:
3963 				if (!php_mb_check_encoding_recursive(Z_ARRVAL_P(entry), encoding)) {
3964 					valid = 0;
3965 					break;
3966 				}
3967 				break;
3968 			case IS_LONG:
3969 			case IS_DOUBLE:
3970 			case IS_NULL:
3971 			case IS_TRUE:
3972 			case IS_FALSE:
3973 				break;
3974 			default:
3975 				/* Other types are error. */
3976 				valid = 0;
3977 				break;
3978 		}
3979 	} ZEND_HASH_FOREACH_END();
3980 	GC_TRY_UNPROTECT_RECURSION(vars);
3981 	mbfl_buffer_converter_delete(convd);
3982 	return valid;
3983 }
3984 
3985 
3986 /* {{{ Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)3987 PHP_FUNCTION(mb_check_encoding)
3988 {
3989 	zend_string *input_str = NULL, *enc = NULL;
3990 	HashTable *input_ht = NULL;
3991 	const mbfl_encoding *encoding;
3992 
3993 	ZEND_PARSE_PARAMETERS_START(0, 2)
3994 		Z_PARAM_OPTIONAL
3995 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(input_ht, input_str)
3996 		Z_PARAM_STR_OR_NULL(enc)
3997 	ZEND_PARSE_PARAMETERS_END();
3998 
3999 	encoding = php_mb_get_encoding(enc, 2);
4000 	if (!encoding) {
4001 		RETURN_THROWS();
4002 	}
4003 
4004 	if (input_ht) {
4005 		RETURN_BOOL(php_mb_check_encoding_recursive(input_ht, encoding));
4006 	} else if (input_str) {
4007 		RETURN_BOOL(php_mb_check_encoding(ZSTR_VAL(input_str), ZSTR_LEN(input_str), encoding));
4008 	} else {
4009 		/* FIXME: Actually check all inputs, except $_FILES file content. */
4010 		RETURN_BOOL(MBSTRG(illegalchars) == 0);
4011 	}
4012 }
4013 /* }}} */
4014 
4015 
php_mb_ord(const char * str,size_t str_len,zend_string * enc_name,const uint32_t enc_name_arg_num)4016 static inline zend_long php_mb_ord(const char *str, size_t str_len, zend_string *enc_name,
4017 	const uint32_t enc_name_arg_num)
4018 {
4019 	const mbfl_encoding *enc;
4020 	enum mbfl_no_encoding no_enc;
4021 
4022 	ZEND_ASSERT(str_len > 0);
4023 
4024 	enc = php_mb_get_encoding(enc_name, enc_name_arg_num);
4025 	if (!enc) {
4026 		return -2;
4027 	}
4028 
4029 	no_enc = enc->no_encoding;
4030 	if (php_mb_is_unsupported_no_encoding(no_enc)) {
4031 		zend_value_error("mb_ord() does not support the \"%s\" encoding", enc->name);
4032 		return -2;
4033 	}
4034 
4035 	{
4036 		mbfl_wchar_device dev;
4037 		mbfl_convert_filter *filter;
4038 		zend_long cp;
4039 
4040 		mbfl_wchar_device_init(&dev);
4041 		filter = mbfl_convert_filter_new(enc, &mbfl_encoding_wchar, mbfl_wchar_device_output, 0, &dev);
4042 		/* If this assertion fails this means some memory allocation failure which is a bug */
4043 		ZEND_ASSERT(filter != NULL);
4044 
4045 		mbfl_convert_filter_feed_string(filter, (unsigned char*)str, str_len);
4046 		mbfl_convert_filter_flush(filter);
4047 
4048 		if (dev.pos < 1 || filter->num_illegalchar || dev.buffer[0] >= MBFL_WCSGROUP_UCS4MAX) {
4049 			mbfl_convert_filter_delete(filter);
4050 			mbfl_wchar_device_clear(&dev);
4051 			return -1;
4052 		}
4053 
4054 		cp = dev.buffer[0];
4055 		mbfl_convert_filter_delete(filter);
4056 		mbfl_wchar_device_clear(&dev);
4057 		return cp;
4058 	}
4059 }
4060 
4061 
4062 /* {{{ */
PHP_FUNCTION(mb_ord)4063 PHP_FUNCTION(mb_ord)
4064 {
4065 	char *str;
4066 	size_t str_len;
4067 	zend_string *enc = NULL;
4068 	zend_long cp;
4069 
4070 	ZEND_PARSE_PARAMETERS_START(1, 2)
4071 		Z_PARAM_STRING(str, str_len)
4072 		Z_PARAM_OPTIONAL
4073 		Z_PARAM_STR_OR_NULL(enc)
4074 	ZEND_PARSE_PARAMETERS_END();
4075 
4076 	if (str_len == 0) {
4077 		zend_argument_value_error(1, "must not be empty");
4078 		RETURN_THROWS();
4079 	}
4080 
4081 	cp = php_mb_ord(str, str_len, enc, 2);
4082 
4083 	if (0 > cp) {
4084 		if (cp == -2) {
4085 			RETURN_THROWS();
4086 		}
4087 		RETURN_FALSE;
4088 	}
4089 
4090 	RETURN_LONG(cp);
4091 }
4092 /* }}} */
4093 
4094 
php_mb_chr(zend_long cp,zend_string * enc_name,uint32_t enc_name_arg_num)4095 static inline zend_string *php_mb_chr(zend_long cp, zend_string *enc_name, uint32_t enc_name_arg_num)
4096 {
4097 	const mbfl_encoding *enc;
4098 	enum mbfl_no_encoding no_enc;
4099 	zend_string *ret;
4100 	char* buf;
4101 	size_t buf_len;
4102 
4103 	enc = php_mb_get_encoding(enc_name, enc_name_arg_num);
4104 	if (!enc) {
4105 		return NULL;
4106 	}
4107 
4108 	no_enc = enc->no_encoding;
4109 	if (php_mb_is_unsupported_no_encoding(no_enc)) {
4110 		zend_value_error("mb_chr() does not support the \"%s\" encoding", enc->name);
4111 		return NULL;
4112 	}
4113 
4114 	if (cp < 0 || cp > 0x10ffff) {
4115 		return NULL;
4116 	}
4117 
4118 	if (php_mb_is_no_encoding_utf8(no_enc)) {
4119 		if (cp > 0xd7ff && 0xe000 > cp) {
4120 			return NULL;
4121 		}
4122 
4123 		if (cp < 0x80) {
4124 			ret = ZSTR_CHAR(cp);
4125 		} else if (cp < 0x800) {
4126 			ret = zend_string_alloc(2, 0);
4127 			ZSTR_VAL(ret)[0] = 0xc0 | (cp >> 6);
4128 			ZSTR_VAL(ret)[1] = 0x80 | (cp & 0x3f);
4129 			ZSTR_VAL(ret)[2] = 0;
4130 		} else if (cp < 0x10000) {
4131 			ret = zend_string_alloc(3, 0);
4132 			ZSTR_VAL(ret)[0] = 0xe0 | (cp >> 12);
4133 			ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 6) & 0x3f);
4134 			ZSTR_VAL(ret)[2] = 0x80 | (cp & 0x3f);
4135 			ZSTR_VAL(ret)[3] = 0;
4136 		} else {
4137 			ret = zend_string_alloc(4, 0);
4138 			ZSTR_VAL(ret)[0] = 0xf0 | (cp >> 18);
4139 			ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 12) & 0x3f);
4140 			ZSTR_VAL(ret)[2] = 0x80 | ((cp >> 6) & 0x3f);
4141 			ZSTR_VAL(ret)[3] = 0x80 | (cp & 0x3f);
4142 			ZSTR_VAL(ret)[4] = 0;
4143 		}
4144 
4145 		return ret;
4146 	}
4147 
4148 	buf_len = 4;
4149 	buf = (char *) emalloc(buf_len + 1);
4150 	buf[0] = (cp >> 24) & 0xff;
4151 	buf[1] = (cp >> 16) & 0xff;
4152 	buf[2] = (cp >>  8) & 0xff;
4153 	buf[3] = cp & 0xff;
4154 	buf[4] = 0;
4155 
4156 	char *ret_str;
4157 	size_t ret_len;
4158 	long orig_illegalchars = MBSTRG(illegalchars);
4159 	MBSTRG(illegalchars) = 0;
4160 	ret_str = php_mb_convert_encoding_ex(buf, buf_len, enc, &mbfl_encoding_ucs4be, &ret_len);
4161 	if (MBSTRG(illegalchars) != 0) {
4162 		efree(buf);
4163 		efree(ret_str);
4164 		MBSTRG(illegalchars) = orig_illegalchars;
4165 		return NULL;
4166 	}
4167 
4168 	ret = zend_string_init(ret_str, ret_len, 0);
4169 	efree(ret_str);
4170 	MBSTRG(illegalchars) = orig_illegalchars;
4171 
4172 	efree(buf);
4173 	return ret;
4174 }
4175 
4176 
4177 /* {{{ */
PHP_FUNCTION(mb_chr)4178 PHP_FUNCTION(mb_chr)
4179 {
4180 	zend_long cp;
4181 	zend_string *enc = NULL;
4182 
4183 	ZEND_PARSE_PARAMETERS_START(1, 2)
4184 		Z_PARAM_LONG(cp)
4185 		Z_PARAM_OPTIONAL
4186 		Z_PARAM_STR_OR_NULL(enc)
4187 	ZEND_PARSE_PARAMETERS_END();
4188 
4189 	zend_string* ret = php_mb_chr(cp, enc, 2);
4190 	if (ret == NULL) {
4191 		RETURN_FALSE;
4192 	}
4193 
4194 	RETURN_STR(ret);
4195 }
4196 /* }}} */
4197 
4198 /* {{{ */
PHP_FUNCTION(mb_scrub)4199 PHP_FUNCTION(mb_scrub)
4200 {
4201 	char* str;
4202 	size_t str_len;
4203 	zend_string *enc_name = NULL;
4204 
4205 	ZEND_PARSE_PARAMETERS_START(1, 2)
4206 		Z_PARAM_STRING(str, str_len)
4207 		Z_PARAM_OPTIONAL
4208 		Z_PARAM_STR_OR_NULL(enc_name)
4209 	ZEND_PARSE_PARAMETERS_END();
4210 
4211 	const mbfl_encoding *enc = php_mb_get_encoding(enc_name, 2);
4212 	if (!enc) {
4213 		RETURN_THROWS();
4214 	}
4215 
4216 	size_t ret_len;
4217 	char *ret = php_mb_convert_encoding_ex(str, str_len, enc, enc, &ret_len);
4218 
4219 	RETVAL_STRINGL(ret, ret_len);
4220 	efree(ret);
4221 }
4222 /* }}} */
4223 
4224 
4225 /* {{{ php_mb_populate_current_detect_order_list */
php_mb_populate_current_detect_order_list(void)4226 static void php_mb_populate_current_detect_order_list(void)
4227 {
4228 	const mbfl_encoding **entry = 0;
4229 	size_t nentries;
4230 
4231 	if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
4232 		nentries = MBSTRG(detect_order_list_size);
4233 		entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4234 		memcpy(ZEND_VOIDP(entry), MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
4235 	} else {
4236 		const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
4237 		size_t i;
4238 		nentries = MBSTRG(default_detect_order_list_size);
4239 		entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4240 		for (i = 0; i < nentries; i++) {
4241 			entry[i] = mbfl_no2encoding(src[i]);
4242 		}
4243 	}
4244 	MBSTRG(current_detect_order_list) = entry;
4245 	MBSTRG(current_detect_order_list_size) = nentries;
4246 }
4247 /* }}} */
4248 
4249 /* {{{ static int php_mb_encoding_translation() */
php_mb_encoding_translation(void)4250 static int php_mb_encoding_translation(void)
4251 {
4252 	return MBSTRG(encoding_translation);
4253 }
4254 /* }}} */
4255 
4256 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
php_mb_mbchar_bytes_ex(const char * s,const mbfl_encoding * enc)4257 MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
4258 {
4259 	if (enc != NULL) {
4260 		if (enc->flag & MBFL_ENCTYPE_MBCS) {
4261 			if (enc->mblen_table != NULL) {
4262 				if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
4263 			}
4264 		} else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
4265 			return 2;
4266 		} else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
4267 			return 4;
4268 		}
4269 	}
4270 	return 1;
4271 }
4272 /* }}} */
4273 
4274 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
php_mb_mbchar_bytes(const char * s)4275 MBSTRING_API size_t php_mb_mbchar_bytes(const char *s)
4276 {
4277 	return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
4278 }
4279 /* }}} */
4280 
4281 /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
php_mb_safe_strrchr_ex(const char * s,unsigned int c,size_t nbytes,const mbfl_encoding * enc)4282 MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
4283 {
4284 	register const char *p = s;
4285 	char *last=NULL;
4286 
4287 	if (nbytes == (size_t)-1) {
4288 		size_t nb = 0;
4289 
4290 		while (*p != '\0') {
4291 			if (nb == 0) {
4292 				if ((unsigned char)*p == (unsigned char)c) {
4293 					last = (char *)p;
4294 				}
4295 				nb = php_mb_mbchar_bytes_ex(p, enc);
4296 				if (nb == 0) {
4297 					return NULL; /* something is going wrong! */
4298 				}
4299 			}
4300 			--nb;
4301 			++p;
4302 		}
4303 	} else {
4304 		register size_t bcnt = nbytes;
4305 		register size_t nbytes_char;
4306 		while (bcnt > 0) {
4307 			if ((unsigned char)*p == (unsigned char)c) {
4308 				last = (char *)p;
4309 			}
4310 			nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
4311 			if (bcnt < nbytes_char) {
4312 				return NULL;
4313 			}
4314 			p += nbytes_char;
4315 			bcnt -= nbytes_char;
4316 		}
4317 	}
4318 	return last;
4319 }
4320 /* }}} */
4321 
4322 /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
php_mb_safe_strrchr(const char * s,unsigned int c,size_t nbytes)4323 MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes)
4324 {
4325 	return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
4326 }
4327 /* }}} */
4328 
4329 /* {{{ MBSTRING_API int php_mb_stripos() */
php_mb_stripos(int mode,const char * old_haystack,size_t old_haystack_len,const char * old_needle,size_t old_needle_len,zend_long offset,const mbfl_encoding * enc)4330 MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t old_haystack_len, const char *old_needle, size_t old_needle_len, zend_long offset, const mbfl_encoding *enc)
4331 {
4332 	size_t n = (size_t) -1;
4333 	mbfl_string haystack, needle;
4334 
4335 	mbfl_string_init_set(&haystack, enc);
4336 	mbfl_string_init_set(&needle, enc);
4337 
4338 	do {
4339 		/* We're using simple case-folding here, because we'd have to deal with remapping of
4340 		 * offsets otherwise. */
4341 
4342 		size_t len = 0;
4343 		haystack.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_haystack, old_haystack_len, &len, enc);
4344 		haystack.len = len;
4345 
4346 		if (!haystack.val) {
4347 			break;
4348 		}
4349 
4350 		if (haystack.len == 0) {
4351 			break;
4352 		}
4353 
4354 		needle.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_needle, old_needle_len, &len, enc);
4355 		needle.len = len;
4356 
4357 		if (!needle.val) {
4358 			break;
4359 		}
4360 
4361 		n = mbfl_strpos(&haystack, &needle, offset, mode);
4362 	} while(0);
4363 
4364 	if (haystack.val) {
4365 		efree(haystack.val);
4366 	}
4367 
4368 	if (needle.val) {
4369 		efree(needle.val);
4370 	}
4371 
4372 	return n;
4373 }
4374 /* }}} */
4375 
php_mb_gpc_get_detect_order(const zend_encoding *** list,size_t * list_size)4376 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size) /* {{{ */
4377 {
4378 	*list = (const zend_encoding **)MBSTRG(http_input_list);
4379 	*list_size = MBSTRG(http_input_list_size);
4380 }
4381 /* }}} */
4382 
php_mb_gpc_set_input_encoding(const zend_encoding * encoding)4383 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding) /* {{{ */
4384 {
4385 	MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
4386 }
4387 /* }}} */
4388