xref: /PHP-8.1/ext/mbstring/mbstring.c (revision b721d0f7)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | https://www.php.net/license/3_01.txt                                 |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
14    |         Rui Hirokawa <hirokawa@php.net>                              |
15    |         Hironori Sato <satoh@jpnnet.com>                             |
16    |         Shigeru Kanemoto <sgk@happysize.co.jp>                       |
17    +----------------------------------------------------------------------+
18 */
19 
20 /* {{{ includes */
21 #include "libmbfl/config.h"
22 #include "php.h"
23 #include "php_ini.h"
24 #include "php_variables.h"
25 #include "mbstring.h"
26 #include "ext/standard/php_string.h"
27 #include "ext/standard/php_mail.h"
28 #include "ext/standard/exec.h"
29 #include "ext/standard/url.h"
30 #include "main/php_output.h"
31 #include "ext/standard/info.h"
32 #include "ext/pcre/php_pcre.h"
33 
34 #include "libmbfl/mbfl/mbfilter_8bit.h"
35 #include "libmbfl/mbfl/mbfilter_pass.h"
36 #include "libmbfl/mbfl/mbfilter_wchar.h"
37 #include "libmbfl/filters/mbfilter_base64.h"
38 #include "libmbfl/filters/mbfilter_qprint.h"
39 #include "libmbfl/filters/mbfilter_ucs4.h"
40 #include "libmbfl/filters/mbfilter_utf8.h"
41 #include "libmbfl/filters/mbfilter_tl_jisx0201_jisx0208.h"
42 #include "libmbfl/filters/mbfilter_singlebyte.h"
43 
44 #include "php_variables.h"
45 #include "php_globals.h"
46 #include "rfc1867.h"
47 #include "php_content_types.h"
48 #include "SAPI.h"
49 #include "php_unicode.h"
50 #include "TSRM.h"
51 
52 #include "mb_gpc.h"
53 
54 #ifdef HAVE_MBREGEX
55 # include "php_mbregex.h"
56 #endif
57 
58 #include "zend_multibyte.h"
59 #include "mbstring_arginfo.h"
60 /* }}} */
61 
62 /* {{{ prototypes */
63 ZEND_DECLARE_MODULE_GLOBALS(mbstring)
64 
65 static PHP_GINIT_FUNCTION(mbstring);
66 static PHP_GSHUTDOWN_FUNCTION(mbstring);
67 
68 static void php_mb_populate_current_detect_order_list(void);
69 
70 static int php_mb_encoding_translation(void);
71 
72 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size);
73 
74 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
75 
76 static inline bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
77 
78 static inline bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
79 /* }}} */
80 
81 /* {{{ php_mb_default_identify_list */
82 typedef struct _php_mb_nls_ident_list {
83 	enum mbfl_no_language lang;
84 	const enum mbfl_no_encoding *list;
85 	size_t list_size;
86 } php_mb_nls_ident_list;
87 
88 static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
89 	mbfl_no_encoding_ascii,
90 	mbfl_no_encoding_jis,
91 	mbfl_no_encoding_utf8,
92 	mbfl_no_encoding_euc_jp,
93 	mbfl_no_encoding_sjis
94 };
95 
96 static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
97 	mbfl_no_encoding_ascii,
98 	mbfl_no_encoding_utf8,
99 	mbfl_no_encoding_euc_cn,
100 	mbfl_no_encoding_cp936
101 };
102 
103 static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
104 	mbfl_no_encoding_ascii,
105 	mbfl_no_encoding_utf8,
106 	mbfl_no_encoding_euc_tw,
107 	mbfl_no_encoding_big5
108 };
109 
110 static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
111 	mbfl_no_encoding_ascii,
112 	mbfl_no_encoding_utf8,
113 	mbfl_no_encoding_euc_kr,
114 	mbfl_no_encoding_uhc
115 };
116 
117 static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
118 	mbfl_no_encoding_ascii,
119 	mbfl_no_encoding_utf8,
120 	mbfl_no_encoding_koi8r,
121 	mbfl_no_encoding_cp1251,
122 	mbfl_no_encoding_cp866
123 };
124 
125 static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
126 	mbfl_no_encoding_ascii,
127 	mbfl_no_encoding_utf8,
128 	mbfl_no_encoding_armscii8
129 };
130 
131 static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
132 	mbfl_no_encoding_ascii,
133 	mbfl_no_encoding_utf8,
134 	mbfl_no_encoding_cp1254,
135 	mbfl_no_encoding_8859_9
136 };
137 
138 static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
139 	mbfl_no_encoding_ascii,
140 	mbfl_no_encoding_utf8,
141 	mbfl_no_encoding_koi8u
142 };
143 
144 static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
145 	mbfl_no_encoding_ascii,
146 	mbfl_no_encoding_utf8
147 };
148 
149 
150 static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
151 	{ mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
152 	{ mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
153 	{ mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
154 	{ mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
155 	{ mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
156 	{ mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
157 	{ mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
158 	{ mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
159 	{ mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
160 };
161 
162 /* }}} */
163 
164 /* {{{ mbstring_deps[] */
165 static const zend_module_dep mbstring_deps[] = {
166 	ZEND_MOD_REQUIRED("pcre")
167 	ZEND_MOD_END
168 };
169 /* }}} */
170 
171 /* {{{ zend_module_entry mbstring_module_entry */
172 zend_module_entry mbstring_module_entry = {
173 	STANDARD_MODULE_HEADER_EX,
174 	NULL,
175 	mbstring_deps,
176 	"mbstring",
177 	ext_functions,
178 	PHP_MINIT(mbstring),
179 	PHP_MSHUTDOWN(mbstring),
180 	PHP_RINIT(mbstring),
181 	PHP_RSHUTDOWN(mbstring),
182 	PHP_MINFO(mbstring),
183 	PHP_MBSTRING_VERSION,
184 	PHP_MODULE_GLOBALS(mbstring),
185 	PHP_GINIT(mbstring),
186 	PHP_GSHUTDOWN(mbstring),
187 	NULL,
188 	STANDARD_MODULE_PROPERTIES_EX
189 };
190 /* }}} */
191 
192 /* {{{ static sapi_post_entry php_post_entries[] */
193 static const sapi_post_entry php_post_entries[] = {
194 	{ DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data,	php_std_post_handler },
195 	{ MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
196 	{ NULL, 0, NULL, NULL }
197 };
198 /* }}} */
199 
200 #ifdef COMPILE_DL_MBSTRING
201 #ifdef ZTS
202 ZEND_TSRMLS_CACHE_DEFINE()
203 #endif
204 ZEND_GET_MODULE(mbstring)
205 #endif
206 
207 /* {{{ static sapi_post_entry mbstr_post_entries[] */
208 static const sapi_post_entry mbstr_post_entries[] = {
209 	{ DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
210 	{ MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
211 	{ NULL, 0, NULL, NULL }
212 };
213 /* }}} */
214 
php_mb_get_encoding(zend_string * encoding_name,uint32_t arg_num)215 static const mbfl_encoding *php_mb_get_encoding(zend_string *encoding_name, uint32_t arg_num) {
216 	if (encoding_name) {
217 		const mbfl_encoding *encoding;
218 		zend_string *last_encoding_name = MBSTRG(last_used_encoding_name);
219 		if (last_encoding_name && (last_encoding_name == encoding_name
220 				|| zend_string_equals_ci(encoding_name, last_encoding_name))) {
221 			return MBSTRG(last_used_encoding);
222 		}
223 
224 		encoding = mbfl_name2encoding(ZSTR_VAL(encoding_name));
225 		if (!encoding) {
226 			zend_argument_value_error(arg_num, "must be a valid encoding, \"%s\" given", ZSTR_VAL(encoding_name));
227 			return NULL;
228 		}
229 
230 		if (last_encoding_name) {
231 			zend_string_release(last_encoding_name);
232 		}
233 		MBSTRG(last_used_encoding_name) = zend_string_copy(encoding_name);
234 		MBSTRG(last_used_encoding) = encoding;
235 		return encoding;
236 	} else {
237 		return MBSTRG(current_internal_encoding);
238 	}
239 }
240 
php_mb_get_encoding_or_pass(const char * encoding_name)241 static const mbfl_encoding *php_mb_get_encoding_or_pass(const char *encoding_name) {
242 	if (strcmp(encoding_name, "pass") == 0) {
243 		return &mbfl_encoding_pass;
244 	}
245 
246 	return mbfl_name2encoding(encoding_name);
247 }
248 
count_commas(const char * p,const char * end)249 static size_t count_commas(const char *p, const char *end) {
250 	size_t count = 0;
251 	while ((p = memchr(p, ',', end - p))) {
252 		count++;
253 		p++;
254 	}
255 	return count;
256 }
257 
258 /* {{{ static zend_result php_mb_parse_encoding_list()
259  *  Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
260  * 	Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0.
261  */
php_mb_parse_encoding_list(const char * value,size_t value_length,const mbfl_encoding *** return_list,size_t * return_size,bool persistent,uint32_t arg_num,bool allow_pass_encoding)262 static zend_result php_mb_parse_encoding_list(const char *value, size_t value_length,
263 	const mbfl_encoding ***return_list, size_t *return_size, bool persistent, uint32_t arg_num,
264 	bool allow_pass_encoding)
265 {
266 	if (value == NULL || value_length == 0) {
267 		*return_list = NULL;
268 		*return_size = 0;
269 		return SUCCESS;
270 	} else {
271 		bool included_auto;
272 		size_t n, size;
273 		char *p1, *endp, *tmpstr;
274 		const mbfl_encoding **entry, **list;
275 
276 		/* copy the value string for work */
277 		if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
278 			tmpstr = (char *)estrndup(value+1, value_length-2);
279 			value_length -= 2;
280 		} else {
281 			tmpstr = (char *)estrndup(value, value_length);
282 		}
283 
284 		endp = tmpstr + value_length;
285 		size = 1 + count_commas(tmpstr, endp) + MBSTRG(default_detect_order_list_size);
286 		list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
287 		entry = list;
288 		n = 0;
289 		included_auto = 0;
290 		p1 = tmpstr;
291 		while (1) {
292 			char *comma = (char *) php_memnstr(p1, ",", 1, endp);
293 			char *p = comma ? comma : endp;
294 			*p = '\0';
295 			/* trim spaces */
296 			while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
297 				p1++;
298 			}
299 			p--;
300 			while (p > p1 && (*p == ' ' || *p == '\t')) {
301 				*p = '\0';
302 				p--;
303 			}
304 			/* convert to the encoding number and check encoding */
305 			if (strcasecmp(p1, "auto") == 0) {
306 				if (!included_auto) {
307 					const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
308 					const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
309 					size_t i;
310 					included_auto = 1;
311 					for (i = 0; i < identify_list_size; i++) {
312 						*entry++ = mbfl_no2encoding(*src++);
313 						n++;
314 					}
315 				}
316 			} else {
317 				const mbfl_encoding *encoding =
318 					allow_pass_encoding ? php_mb_get_encoding_or_pass(p1) : mbfl_name2encoding(p1);
319 				if (!encoding) {
320 					/* Called from an INI setting modification */
321 					if (arg_num == 0) {
322 						php_error_docref("ref.mbstring", E_WARNING, "INI setting contains invalid encoding \"%s\"", p1);
323 					} else {
324 						zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", p1);
325 					}
326 					efree(tmpstr);
327 					pefree(ZEND_VOIDP(list), persistent);
328 					return FAILURE;
329 				}
330 
331 				*entry++ = encoding;
332 				n++;
333 			}
334 			if (n >= size || comma == NULL) {
335 				break;
336 			}
337 			p1 = comma + 1;
338 		}
339 		*return_list = list;
340 		*return_size = n;
341 		efree(tmpstr);
342 	}
343 
344 	return SUCCESS;
345 }
346 /* }}} */
347 
348 /* {{{ static int php_mb_parse_encoding_array()
349  *  Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
350  * 	Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0.
351  */
php_mb_parse_encoding_array(HashTable * target_hash,const mbfl_encoding *** return_list,size_t * return_size,uint32_t arg_num)352 static int php_mb_parse_encoding_array(HashTable *target_hash, const mbfl_encoding ***return_list,
353 	size_t *return_size, uint32_t arg_num)
354 {
355 	/* Allocate enough space to include the default detect order if "auto" is used. */
356 	size_t size = zend_hash_num_elements(target_hash) + MBSTRG(default_detect_order_list_size);
357 	const mbfl_encoding **list = ecalloc(size, sizeof(mbfl_encoding*));
358 	const mbfl_encoding **entry = list;
359 	bool included_auto = 0;
360 	size_t n = 0;
361 	zval *hash_entry;
362 	ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
363 		zend_string *encoding_str = zval_try_get_string(hash_entry);
364 		if (UNEXPECTED(!encoding_str)) {
365 			efree(ZEND_VOIDP(list));
366 			return FAILURE;
367 		}
368 
369 		if (zend_string_equals_literal_ci(encoding_str, "auto")) {
370 			if (!included_auto) {
371 				const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
372 				const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
373 				size_t j;
374 
375 				included_auto = 1;
376 				for (j = 0; j < identify_list_size; j++) {
377 					*entry++ = mbfl_no2encoding(*src++);
378 					n++;
379 				}
380 			}
381 		} else {
382 			const mbfl_encoding *encoding = mbfl_name2encoding(ZSTR_VAL(encoding_str));
383 			if (encoding) {
384 				*entry++ = encoding;
385 				n++;
386 			} else {
387 				zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", ZSTR_VAL(encoding_str));
388 				zend_string_release(encoding_str);
389 				efree(ZEND_VOIDP(list));
390 				return FAILURE;
391 			}
392 		}
393 		zend_string_release(encoding_str);
394 	} ZEND_HASH_FOREACH_END();
395 	*return_list = list;
396 	*return_size = n;
397 	return SUCCESS;
398 }
399 /* }}} */
400 
401 /* {{{ zend_multibyte interface */
php_mb_zend_encoding_fetcher(const char * encoding_name)402 static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name)
403 {
404 	return (const zend_encoding*)mbfl_name2encoding(encoding_name);
405 }
406 
php_mb_zend_encoding_name_getter(const zend_encoding * encoding)407 static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
408 {
409 	return ((const mbfl_encoding *)encoding)->name;
410 }
411 
php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding * _encoding)412 static bool php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
413 {
414 	const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
415 	return !(encoding->flag & MBFL_ENCTYPE_GL_UNSAFE);
416 }
417 
php_mb_zend_encoding_detector(const unsigned char * arg_string,size_t arg_length,const zend_encoding ** list,size_t list_size)418 static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size)
419 {
420 	mbfl_string string;
421 
422 	if (!list) {
423 		list = (const zend_encoding **)MBSTRG(current_detect_order_list);
424 		list_size = MBSTRG(current_detect_order_list_size);
425 	}
426 
427 	mbfl_string_init(&string);
428 	string.val = (unsigned char *)arg_string;
429 	string.len = arg_length;
430 	return (const zend_encoding *) mbfl_identify_encoding(&string, (const mbfl_encoding **)list, list_size, 0);
431 }
432 
php_mb_zend_encoding_converter(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length,const zend_encoding * encoding_to,const zend_encoding * encoding_from)433 static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
434 {
435 	mbfl_string string, result;
436 	mbfl_buffer_converter *convd;
437 
438 	/* new encoding */
439 	/* initialize string */
440 	string.encoding = (const mbfl_encoding*)encoding_from;
441 	string.val = (unsigned char*)from;
442 	string.len = from_length;
443 
444 	/* initialize converter */
445 	convd = mbfl_buffer_converter_new((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
446 	if (convd == NULL) {
447 		return (size_t) -1;
448 	}
449 
450 	mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
451 	mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
452 
453 	/* do it */
454 	size_t loc = mbfl_buffer_converter_feed(convd, &string);
455 
456 	mbfl_buffer_converter_flush(convd);
457 	mbfl_string_init(&result);
458 	if (!mbfl_buffer_converter_result(convd, &result)) {
459 		mbfl_buffer_converter_delete(convd);
460 		return (size_t)-1;
461 	}
462 
463 	*to = result.val;
464 	*to_length = result.len;
465 
466 	mbfl_buffer_converter_delete(convd);
467 
468 	return loc;
469 }
470 
php_mb_zend_encoding_list_parser(const char * encoding_list,size_t encoding_list_len,const zend_encoding *** return_list,size_t * return_size,bool persistent)471 static zend_result php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, bool persistent)
472 {
473 	return php_mb_parse_encoding_list(
474 		encoding_list, encoding_list_len,
475 		(const mbfl_encoding ***)return_list, return_size,
476 		persistent, /* arg_num */ 0, /* allow_pass_encoding */ 1);
477 }
478 
php_mb_zend_internal_encoding_getter(void)479 static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
480 {
481 	return (const zend_encoding *)MBSTRG(internal_encoding);
482 }
483 
php_mb_zend_internal_encoding_setter(const zend_encoding * encoding)484 static zend_result php_mb_zend_internal_encoding_setter(const zend_encoding *encoding)
485 {
486 	MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
487 	return SUCCESS;
488 }
489 
490 static zend_multibyte_functions php_mb_zend_multibyte_functions = {
491 	"mbstring",
492 	php_mb_zend_encoding_fetcher,
493 	php_mb_zend_encoding_name_getter,
494 	php_mb_zend_encoding_lexer_compatibility_checker,
495 	php_mb_zend_encoding_detector,
496 	php_mb_zend_encoding_converter,
497 	php_mb_zend_encoding_list_parser,
498 	php_mb_zend_internal_encoding_getter,
499 	php_mb_zend_internal_encoding_setter
500 };
501 /* }}} */
502 
503 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern)504 static void *_php_mb_compile_regex(const char *pattern)
505 {
506 	pcre2_code *retval;
507 	PCRE2_SIZE err_offset;
508 	int errnum;
509 
510 	if (!(retval = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED,
511 			PCRE2_CASELESS, &errnum, &err_offset, php_pcre_cctx()))) {
512 		PCRE2_UCHAR err_str[128];
513 		pcre2_get_error_message(errnum, err_str, sizeof(err_str));
514 		php_error_docref(NULL, E_WARNING, "%s (offset=%zu): %s", pattern, err_offset, err_str);
515 	}
516 	return retval;
517 }
518 /* }}} */
519 
520 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)521 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
522 {
523 	int res;
524 
525 	pcre2_match_data *match_data = php_pcre_create_match_data(0, opaque);
526 	if (NULL == match_data) {
527 		pcre2_code_free(opaque);
528 		php_error_docref(NULL, E_WARNING, "Cannot allocate match data");
529 		return FAILURE;
530 	}
531 	res = pcre2_match(opaque, (PCRE2_SPTR)str, str_len, 0, 0, match_data, php_pcre_mctx()) >= 0;
532 	php_pcre_free_match_data(match_data);
533 
534 	return res;
535 }
536 /* }}} */
537 
538 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)539 static void _php_mb_free_regex(void *opaque)
540 {
541 	pcre2_code_free(opaque);
542 }
543 /* }}} */
544 
545 /* {{{ php_mb_nls_get_default_detect_order_list */
php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,enum mbfl_no_encoding ** plist,size_t * plist_size)546 static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
547 {
548 	size_t i;
549 
550 	*plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
551 	*plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
552 
553 	for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
554 		if (php_mb_default_identify_list[i].lang == lang) {
555 			*plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
556 			*plist_size = php_mb_default_identify_list[i].list_size;
557 			return 1;
558 		}
559 	}
560 	return 0;
561 }
562 /* }}} */
563 
php_mb_rfc1867_substring_conf(const zend_encoding * encoding,char * start,size_t len,char quote)564 static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, size_t len, char quote)
565 {
566 	char *result = emalloc(len + 2);
567 	char *resp = result;
568 	size_t i;
569 
570 	for (i = 0; i < len && start[i] != quote; ++i) {
571 		if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
572 			*resp++ = start[++i];
573 		} else {
574 			size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
575 
576 			while (j-- > 0 && i < len) {
577 				*resp++ = start[i++];
578 			}
579 			--i;
580 		}
581 	}
582 
583 	*resp = '\0';
584 	return result;
585 }
586 
php_mb_rfc1867_getword(const zend_encoding * encoding,char ** line,char stop)587 static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop) /* {{{ */
588 {
589 	char *pos = *line, quote;
590 	char *res;
591 
592 	while (*pos && *pos != stop) {
593 		if ((quote = *pos) == '"' || quote == '\'') {
594 			++pos;
595 			while (*pos && *pos != quote) {
596 				if (*pos == '\\' && pos[1] && pos[1] == quote) {
597 					pos += 2;
598 				} else {
599 					++pos;
600 				}
601 			}
602 			if (*pos) {
603 				++pos;
604 			}
605 		} else {
606 			pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
607 
608 		}
609 	}
610 	if (*pos == '\0') {
611 		res = estrdup(*line);
612 		*line += strlen(*line);
613 		return res;
614 	}
615 
616 	res = estrndup(*line, pos - *line);
617 
618 	while (*pos == stop) {
619 		pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
620 	}
621 
622 	*line = pos;
623 	return res;
624 }
625 /* }}} */
626 
php_mb_rfc1867_getword_conf(const zend_encoding * encoding,char * str)627 static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str) /* {{{ */
628 {
629 	while (*str && isspace(*(unsigned char *)str)) {
630 		++str;
631 	}
632 
633 	if (!*str) {
634 		return estrdup("");
635 	}
636 
637 	if (*str == '"' || *str == '\'') {
638 		char quote = *str;
639 
640 		str++;
641 		return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote);
642 	} else {
643 		char *strend = str;
644 
645 		while (*strend && !isspace(*(unsigned char *)strend)) {
646 			++strend;
647 		}
648 		return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0);
649 	}
650 }
651 /* }}} */
652 
php_mb_rfc1867_basename(const zend_encoding * encoding,char * filename)653 static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename) /* {{{ */
654 {
655 	char *s, *s2;
656 	const size_t filename_len = strlen(filename);
657 
658 	/* The \ check should technically be needed for win32 systems only where
659 	 * it is a valid path separator. However, IE in all it's wisdom always sends
660 	 * the full path of the file on the user's filesystem, which means that unless
661 	 * the user does basename() they get a bogus file name. Until IE's user base drops
662 	 * to nill or problem is fixed this code must remain enabled for all systems. */
663 	s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
664 	s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
665 
666 	if (s && s2) {
667 		if (s > s2) {
668 			return ++s;
669 		} else {
670 			return ++s2;
671 		}
672 	} else if (s) {
673 		return ++s;
674 	} else if (s2) {
675 		return ++s2;
676 	} else {
677 		return filename;
678 	}
679 }
680 /* }}} */
681 
682 /* {{{ php.ini directive handler */
683 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
PHP_INI_MH(OnUpdate_mbstring_language)684 static PHP_INI_MH(OnUpdate_mbstring_language)
685 {
686 	enum mbfl_no_language no_language;
687 
688 	no_language = mbfl_name2no_language(ZSTR_VAL(new_value));
689 	if (no_language == mbfl_no_language_invalid) {
690 		MBSTRG(language) = mbfl_no_language_neutral;
691 		return FAILURE;
692 	}
693 	MBSTRG(language) = no_language;
694 	php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
695 	return SUCCESS;
696 }
697 /* }}} */
698 
699 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
PHP_INI_MH(OnUpdate_mbstring_detect_order)700 static PHP_INI_MH(OnUpdate_mbstring_detect_order)
701 {
702 	const mbfl_encoding **list;
703 	size_t size;
704 
705 	if (!new_value) {
706 		if (MBSTRG(detect_order_list)) {
707 			pefree(ZEND_VOIDP(MBSTRG(detect_order_list)), 1);
708 		}
709 		MBSTRG(detect_order_list) = NULL;
710 		MBSTRG(detect_order_list_size) = 0;
711 		return SUCCESS;
712 	}
713 
714 	if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 0) || size == 0) {
715 		return FAILURE;
716 	}
717 
718 	if (MBSTRG(detect_order_list)) {
719 		pefree(ZEND_VOIDP(MBSTRG(detect_order_list)), 1);
720 	}
721 	MBSTRG(detect_order_list) = list;
722 	MBSTRG(detect_order_list_size) = size;
723 	return SUCCESS;
724 }
725 /* }}} */
726 
_php_mb_ini_mbstring_http_input_set(const char * new_value,size_t new_value_length)727 static int _php_mb_ini_mbstring_http_input_set(const char *new_value, size_t new_value_length) {
728 	const mbfl_encoding **list;
729 	size_t size;
730 	if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 1) || size == 0) {
731 		return FAILURE;
732 	}
733 	if (MBSTRG(http_input_list)) {
734 		pefree(ZEND_VOIDP(MBSTRG(http_input_list)), 1);
735 	}
736 	MBSTRG(http_input_list) = list;
737 	MBSTRG(http_input_list_size) = size;
738 	return SUCCESS;
739 }
740 
741 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
PHP_INI_MH(OnUpdate_mbstring_http_input)742 static PHP_INI_MH(OnUpdate_mbstring_http_input)
743 {
744 	if (new_value) {
745 		php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_input is deprecated");
746 	}
747 
748 	if (!new_value || !ZSTR_LEN(new_value)) {
749 		const char *encoding = php_get_input_encoding();
750 		MBSTRG(http_input_set) = 0;
751 		_php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding));
752 		return SUCCESS;
753 	}
754 
755 	MBSTRG(http_input_set) = 1;
756 	return _php_mb_ini_mbstring_http_input_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
757 }
758 /* }}} */
759 
_php_mb_ini_mbstring_http_output_set(const char * new_value)760 static int _php_mb_ini_mbstring_http_output_set(const char *new_value) {
761 	const mbfl_encoding *encoding = php_mb_get_encoding_or_pass(new_value);
762 	if (!encoding) {
763 		return FAILURE;
764 	}
765 
766 	MBSTRG(http_output_encoding) = encoding;
767 	MBSTRG(current_http_output_encoding) = encoding;
768 	return SUCCESS;
769 }
770 
771 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
PHP_INI_MH(OnUpdate_mbstring_http_output)772 static PHP_INI_MH(OnUpdate_mbstring_http_output)
773 {
774 	if (new_value) {
775 		php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_output is deprecated");
776 	}
777 
778 	if (new_value == NULL || ZSTR_LEN(new_value) == 0) {
779 		MBSTRG(http_output_set) = 0;
780 		_php_mb_ini_mbstring_http_output_set(php_get_output_encoding());
781 		return SUCCESS;
782 	}
783 
784 	MBSTRG(http_output_set) = 1;
785 	return _php_mb_ini_mbstring_http_output_set(ZSTR_VAL(new_value));
786 }
787 /* }}} */
788 
789 /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
_php_mb_ini_mbstring_internal_encoding_set(const char * new_value,size_t new_value_length)790 static int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, size_t new_value_length)
791 {
792 	const mbfl_encoding *encoding;
793 
794 	if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) {
795 		/* falls back to UTF-8 if an unknown encoding name is given */
796 		if (new_value) {
797 			php_error_docref("ref.mbstring", E_WARNING, "Unknown encoding \"%s\" in ini setting", new_value);
798 		}
799 		encoding = &mbfl_encoding_utf8;
800 	}
801 	MBSTRG(internal_encoding) = encoding;
802 	MBSTRG(current_internal_encoding) = encoding;
803 #ifdef HAVE_MBREGEX
804 	{
805 		const char *enc_name = new_value;
806 		if (FAILURE == php_mb_regex_set_default_mbctype(enc_name)) {
807 			/* falls back to UTF-8 if an unknown encoding name is given */
808 			enc_name = "UTF-8";
809 			php_mb_regex_set_default_mbctype(enc_name);
810 		}
811 		php_mb_regex_set_mbctype(new_value);
812 	}
813 #endif
814 	return SUCCESS;
815 }
816 /* }}} */
817 
818 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
PHP_INI_MH(OnUpdate_mbstring_internal_encoding)819 static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
820 {
821 	if (new_value) {
822 		php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
823 	}
824 
825 	if (OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage) == FAILURE) {
826 		return FAILURE;
827 	}
828 
829 	if (new_value && ZSTR_LEN(new_value)) {
830 		MBSTRG(internal_encoding_set) = 1;
831 		return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
832 	} else {
833 		const char *encoding = php_get_internal_encoding();
834 		MBSTRG(internal_encoding_set) = 0;
835 		return _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding));
836 	}
837 }
838 /* }}} */
839 
840 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
PHP_INI_MH(OnUpdate_mbstring_substitute_character)841 static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
842 {
843 	int c;
844 	char *endptr = NULL;
845 
846 	if (new_value != NULL) {
847 		if (zend_string_equals_literal_ci(new_value, "none")) {
848 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
849 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
850 		} else if (zend_string_equals_literal_ci(new_value, "long")) {
851 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
852 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
853 		} else if (zend_string_equals_literal_ci(new_value, "entity")) {
854 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
855 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
856 		} else {
857 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
858 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
859 			if (ZSTR_LEN(new_value) > 0) {
860 				c = strtol(ZSTR_VAL(new_value), &endptr, 0);
861 				if (*endptr == '\0') {
862 					MBSTRG(filter_illegal_substchar) = c;
863 					MBSTRG(current_filter_illegal_substchar) = c;
864 				}
865 			}
866 		}
867 	} else {
868 		MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
869 		MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
870 		MBSTRG(filter_illegal_substchar) = 0x3f;	/* '?' */
871 		MBSTRG(current_filter_illegal_substchar) = 0x3f;	/* '?' */
872 	}
873 
874 	return SUCCESS;
875 }
876 /* }}} */
877 
878 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
PHP_INI_MH(OnUpdate_mbstring_encoding_translation)879 static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
880 {
881 	if (new_value == NULL) {
882 		return FAILURE;
883 	}
884 
885 	OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
886 
887 	if (MBSTRG(encoding_translation)) {
888 		sapi_unregister_post_entry(php_post_entries);
889 		sapi_register_post_entries(mbstr_post_entries);
890 	} else {
891 		sapi_unregister_post_entry(mbstr_post_entries);
892 		sapi_register_post_entries(php_post_entries);
893 	}
894 
895 	return SUCCESS;
896 }
897 /* }}} */
898 
899 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)900 static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
901 {
902 	zend_string *tmp;
903 	void *re = NULL;
904 
905 	if (!new_value) {
906 		new_value = entry->orig_value;
907 	}
908 	tmp = php_trim(new_value, NULL, 0, 3);
909 
910 	if (ZSTR_LEN(tmp) > 0) {
911 		if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) {
912 			zend_string_release_ex(tmp, 0);
913 			return FAILURE;
914 		}
915 	}
916 
917 	if (MBSTRG(http_output_conv_mimetypes)) {
918 		_php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
919 	}
920 
921 	MBSTRG(http_output_conv_mimetypes) = re;
922 
923 	zend_string_release_ex(tmp, 0);
924 	return SUCCESS;
925 }
926 /* }}} */
927 /* }}} */
928 
929 /* {{{ php.ini directive registration */
930 PHP_INI_BEGIN()
931 	PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
932 	PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
933 	PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
934 	PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
935 	STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
936 	PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
937 
938 	STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
939 		PHP_INI_SYSTEM | PHP_INI_PERDIR,
940 		OnUpdate_mbstring_encoding_translation,
941 		encoding_translation, zend_mbstring_globals, mbstring_globals)
942 	PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
943 		"^(text/|application/xhtml\\+xml)",
944 		PHP_INI_ALL,
945 		OnUpdate_mbstring_http_output_conv_mimetypes)
946 
947 	STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
948 		PHP_INI_ALL,
949 		OnUpdateBool,
950 		strict_detection, zend_mbstring_globals, mbstring_globals)
951 #ifdef HAVE_MBREGEX
952 	STD_PHP_INI_ENTRY("mbstring.regex_stack_limit", "100000",PHP_INI_ALL, OnUpdateLong, regex_stack_limit, zend_mbstring_globals, mbstring_globals)
953 	STD_PHP_INI_ENTRY("mbstring.regex_retry_limit", "1000000",PHP_INI_ALL, OnUpdateLong, regex_retry_limit, zend_mbstring_globals, mbstring_globals)
954 #endif
PHP_INI_END()955 PHP_INI_END()
956 /* }}} */
957 
958 static void mbstring_internal_encoding_changed_hook(void) {
959 	/* One of the internal_encoding / input_encoding / output_encoding ini settings changed. */
960 	if (!MBSTRG(internal_encoding_set)) {
961 		const char *encoding = php_get_internal_encoding();
962 		_php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding));
963 	}
964 
965 	if (!MBSTRG(http_output_set)) {
966 		const char *encoding = php_get_output_encoding();
967 		_php_mb_ini_mbstring_http_output_set(encoding);
968 	}
969 
970 	if (!MBSTRG(http_input_set)) {
971 		const char *encoding = php_get_input_encoding();
972 		_php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding));
973 	}
974 }
975 
976 /* {{{ module global initialize handler */
PHP_GINIT_FUNCTION(mbstring)977 static PHP_GINIT_FUNCTION(mbstring)
978 {
979 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
980 ZEND_TSRMLS_CACHE_UPDATE();
981 #endif
982 
983 	mbstring_globals->language = mbfl_no_language_uni;
984 	mbstring_globals->internal_encoding = NULL;
985 	mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
986 	mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
987 	mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
988 	mbstring_globals->http_input_identify = NULL;
989 	mbstring_globals->http_input_identify_get = NULL;
990 	mbstring_globals->http_input_identify_post = NULL;
991 	mbstring_globals->http_input_identify_cookie = NULL;
992 	mbstring_globals->http_input_identify_string = NULL;
993 	mbstring_globals->http_input_list = NULL;
994 	mbstring_globals->http_input_list_size = 0;
995 	mbstring_globals->detect_order_list = NULL;
996 	mbstring_globals->detect_order_list_size = 0;
997 	mbstring_globals->current_detect_order_list = NULL;
998 	mbstring_globals->current_detect_order_list_size = 0;
999 	mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1000 	mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1001 	mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1002 	mbstring_globals->filter_illegal_substchar = 0x3f;	/* '?' */
1003 	mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1004 	mbstring_globals->current_filter_illegal_substchar = 0x3f;	/* '?' */
1005 	mbstring_globals->illegalchars = 0;
1006 	mbstring_globals->encoding_translation = 0;
1007 	mbstring_globals->strict_detection = 0;
1008 	mbstring_globals->outconv = NULL;
1009 	mbstring_globals->http_output_conv_mimetypes = NULL;
1010 #ifdef HAVE_MBREGEX
1011 	mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc();
1012 #endif
1013 	mbstring_globals->last_used_encoding_name = NULL;
1014 	mbstring_globals->last_used_encoding = NULL;
1015 	mbstring_globals->internal_encoding_set = 0;
1016 	mbstring_globals->http_output_set = 0;
1017 	mbstring_globals->http_input_set = 0;
1018 }
1019 /* }}} */
1020 
1021 /* {{{ PHP_GSHUTDOWN_FUNCTION */
PHP_GSHUTDOWN_FUNCTION(mbstring)1022 static PHP_GSHUTDOWN_FUNCTION(mbstring)
1023 {
1024 	if (mbstring_globals->http_input_list) {
1025 		free(ZEND_VOIDP(mbstring_globals->http_input_list));
1026 	}
1027 	if (mbstring_globals->detect_order_list) {
1028 		free(ZEND_VOIDP(mbstring_globals->detect_order_list));
1029 	}
1030 	if (mbstring_globals->http_output_conv_mimetypes) {
1031 		_php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1032 	}
1033 #ifdef HAVE_MBREGEX
1034 	php_mb_regex_globals_free(mbstring_globals->mb_regex_globals);
1035 #endif
1036 }
1037 /* }}} */
1038 
1039 /* {{{ PHP_MINIT_FUNCTION(mbstring) */
PHP_MINIT_FUNCTION(mbstring)1040 PHP_MINIT_FUNCTION(mbstring)
1041 {
1042 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1043 ZEND_TSRMLS_CACHE_UPDATE();
1044 #endif
1045 
1046 	REGISTER_INI_ENTRIES();
1047 
1048 	/* We assume that we're the only user of the hook. */
1049 	ZEND_ASSERT(php_internal_encoding_changed == NULL);
1050 	php_internal_encoding_changed = mbstring_internal_encoding_changed_hook;
1051 	mbstring_internal_encoding_changed_hook();
1052 
1053 	/* This is a global handler. Should not be set in a per-request handler. */
1054 	sapi_register_treat_data(mbstr_treat_data);
1055 
1056 	/* Post handlers are stored in the thread-local context. */
1057 	if (MBSTRG(encoding_translation)) {
1058 		sapi_register_post_entries(mbstr_post_entries);
1059 	}
1060 
1061 	REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1062 	REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1063 	REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1064 	REGISTER_LONG_CONSTANT("MB_CASE_FOLD", PHP_UNICODE_CASE_FOLD, CONST_CS | CONST_PERSISTENT);
1065 	REGISTER_LONG_CONSTANT("MB_CASE_UPPER_SIMPLE", PHP_UNICODE_CASE_UPPER_SIMPLE, CONST_CS | CONST_PERSISTENT);
1066 	REGISTER_LONG_CONSTANT("MB_CASE_LOWER_SIMPLE", PHP_UNICODE_CASE_LOWER_SIMPLE, CONST_CS | CONST_PERSISTENT);
1067 	REGISTER_LONG_CONSTANT("MB_CASE_TITLE_SIMPLE", PHP_UNICODE_CASE_TITLE_SIMPLE, CONST_CS | CONST_PERSISTENT);
1068 	REGISTER_LONG_CONSTANT("MB_CASE_FOLD_SIMPLE", PHP_UNICODE_CASE_FOLD_SIMPLE, CONST_CS | CONST_PERSISTENT);
1069 
1070 #ifdef HAVE_MBREGEX
1071 	PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1072 #endif
1073 
1074 	if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions)) {
1075 		return FAILURE;
1076 	}
1077 
1078 	php_rfc1867_set_multibyte_callbacks(
1079 		php_mb_encoding_translation,
1080 		php_mb_gpc_get_detect_order,
1081 		php_mb_gpc_set_input_encoding,
1082 		php_mb_rfc1867_getword,
1083 		php_mb_rfc1867_getword_conf,
1084 		php_mb_rfc1867_basename);
1085 
1086 	return SUCCESS;
1087 }
1088 /* }}} */
1089 
1090 /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
PHP_MSHUTDOWN_FUNCTION(mbstring)1091 PHP_MSHUTDOWN_FUNCTION(mbstring)
1092 {
1093 	UNREGISTER_INI_ENTRIES();
1094 
1095 	zend_multibyte_restore_functions();
1096 
1097 #ifdef HAVE_MBREGEX
1098 	PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1099 #endif
1100 
1101 	php_internal_encoding_changed = NULL;
1102 
1103 	return SUCCESS;
1104 }
1105 /* }}} */
1106 
1107 /* {{{ PHP_RINIT_FUNCTION(mbstring) */
PHP_RINIT_FUNCTION(mbstring)1108 PHP_RINIT_FUNCTION(mbstring)
1109 {
1110 	MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1111 	MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1112 	MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1113 	MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1114 
1115 	MBSTRG(illegalchars) = 0;
1116 
1117 	php_mb_populate_current_detect_order_list();
1118 
1119 #ifdef HAVE_MBREGEX
1120 	PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1121 #endif
1122 	zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding));
1123 
1124 	return SUCCESS;
1125 }
1126 /* }}} */
1127 
1128 /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
PHP_RSHUTDOWN_FUNCTION(mbstring)1129 PHP_RSHUTDOWN_FUNCTION(mbstring)
1130 {
1131 	if (MBSTRG(current_detect_order_list) != NULL) {
1132 		efree(ZEND_VOIDP(MBSTRG(current_detect_order_list)));
1133 		MBSTRG(current_detect_order_list) = NULL;
1134 		MBSTRG(current_detect_order_list_size) = 0;
1135 	}
1136 	if (MBSTRG(outconv) != NULL) {
1137 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1138 		mbfl_buffer_converter_delete(MBSTRG(outconv));
1139 		MBSTRG(outconv) = NULL;
1140 	}
1141 
1142 	/* clear http input identification. */
1143 	MBSTRG(http_input_identify) = NULL;
1144 	MBSTRG(http_input_identify_post) = NULL;
1145 	MBSTRG(http_input_identify_get) = NULL;
1146 	MBSTRG(http_input_identify_cookie) = NULL;
1147 	MBSTRG(http_input_identify_string) = NULL;
1148 
1149 	if (MBSTRG(last_used_encoding_name)) {
1150 		zend_string_release(MBSTRG(last_used_encoding_name));
1151 		MBSTRG(last_used_encoding_name) = NULL;
1152 	}
1153 
1154 	MBSTRG(internal_encoding_set) = 0;
1155 	MBSTRG(http_output_set) = 0;
1156 	MBSTRG(http_input_set) = 0;
1157 
1158 #ifdef HAVE_MBREGEX
1159 	PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1160 #endif
1161 
1162 	return SUCCESS;
1163 }
1164 /* }}} */
1165 
1166 /* {{{ PHP_MINFO_FUNCTION(mbstring) */
PHP_MINFO_FUNCTION(mbstring)1167 PHP_MINFO_FUNCTION(mbstring)
1168 {
1169 	php_info_print_table_start();
1170 	php_info_print_table_row(2, "Multibyte Support", "enabled");
1171 	php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1172 	php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1173 	{
1174 		char tmp[256];
1175 		snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1176 		php_info_print_table_row(2, "libmbfl version", tmp);
1177 	}
1178 	php_info_print_table_end();
1179 
1180 	php_info_print_table_start();
1181 	php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1182 	php_info_print_table_end();
1183 
1184 #ifdef HAVE_MBREGEX
1185 	PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1186 #endif
1187 
1188 	DISPLAY_INI_ENTRIES();
1189 }
1190 /* }}} */
1191 
1192 /* {{{ Sets the current language or Returns the current language as a string */
PHP_FUNCTION(mb_language)1193 PHP_FUNCTION(mb_language)
1194 {
1195 	zend_string *name = NULL;
1196 
1197 	ZEND_PARSE_PARAMETERS_START(0, 1)
1198 		Z_PARAM_OPTIONAL
1199 		Z_PARAM_STR_OR_NULL(name)
1200 	ZEND_PARSE_PARAMETERS_END();
1201 
1202 	if (name == NULL) {
1203 		RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)));
1204 	} else {
1205 		zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0);
1206 		if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1207 			zend_argument_value_error(1, "must be a valid language, \"%s\" given", ZSTR_VAL(name));
1208 			zend_string_release_ex(ini_name, 0);
1209 			RETURN_THROWS();
1210 		}
1211 		// TODO Make return void
1212 		RETVAL_TRUE;
1213 		zend_string_release_ex(ini_name, 0);
1214 	}
1215 }
1216 /* }}} */
1217 
1218 /* {{{ Sets the current internal encoding or Returns the current internal encoding as a string */
PHP_FUNCTION(mb_internal_encoding)1219 PHP_FUNCTION(mb_internal_encoding)
1220 {
1221 	char *name = NULL;
1222 	size_t name_len;
1223 	const mbfl_encoding *encoding;
1224 
1225 	ZEND_PARSE_PARAMETERS_START(0, 1)
1226 		Z_PARAM_OPTIONAL
1227 		Z_PARAM_STRING_OR_NULL(name, name_len)
1228 	ZEND_PARSE_PARAMETERS_END();
1229 
1230 	if (name == NULL) {
1231 		ZEND_ASSERT(MBSTRG(current_internal_encoding));
1232 		RETURN_STRING(MBSTRG(current_internal_encoding)->name);
1233 	} else {
1234 		encoding = mbfl_name2encoding(name);
1235 		if (!encoding) {
1236 			zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
1237 			RETURN_THROWS();
1238 		} else {
1239 			MBSTRG(current_internal_encoding) = encoding;
1240 			MBSTRG(internal_encoding_set) = 1;
1241 			/* TODO Return old encoding */
1242 			RETURN_TRUE;
1243 		}
1244 	}
1245 }
1246 /* }}} */
1247 
1248 /* {{{ Returns the input encoding */
PHP_FUNCTION(mb_http_input)1249 PHP_FUNCTION(mb_http_input)
1250 {
1251 	char *type = NULL;
1252 	size_t type_len = 0, n;
1253 	const mbfl_encoding **entry;
1254 	const mbfl_encoding *encoding;
1255 
1256 	ZEND_PARSE_PARAMETERS_START(0, 1)
1257 		Z_PARAM_OPTIONAL
1258 		Z_PARAM_STRING_OR_NULL(type, type_len)
1259 	ZEND_PARSE_PARAMETERS_END();
1260 
1261 	if (type == NULL) {
1262 		encoding = MBSTRG(http_input_identify);
1263 	} else {
1264 		switch (*type) {
1265 		case 'G':
1266 		case 'g':
1267 			encoding = MBSTRG(http_input_identify_get);
1268 			break;
1269 		case 'P':
1270 		case 'p':
1271 			encoding = MBSTRG(http_input_identify_post);
1272 			break;
1273 		case 'C':
1274 		case 'c':
1275 			encoding = MBSTRG(http_input_identify_cookie);
1276 			break;
1277 		case 'S':
1278 		case 's':
1279 			encoding = MBSTRG(http_input_identify_string);
1280 			break;
1281 		case 'I':
1282 		case 'i':
1283 			entry = MBSTRG(http_input_list);
1284 			n = MBSTRG(http_input_list_size);
1285 			array_init(return_value);
1286 			for (size_t i = 0; i < n; i++, entry++) {
1287 				add_next_index_string(return_value, (*entry)->name);
1288 			}
1289 			return;
1290 		case 'L':
1291 		case 'l':
1292 			entry = MBSTRG(http_input_list);
1293 			n = MBSTRG(http_input_list_size);
1294 			if (n == 0) {
1295 				// TODO should return empty string?
1296 				RETURN_FALSE;
1297 			}
1298 			// TODO Use smart_str instead.
1299 			mbfl_string result;
1300 			mbfl_memory_device device;
1301 			mbfl_memory_device_init(&device, n * 12, 0);
1302 			for (size_t i = 0; i < n; i++, entry++) {
1303 				mbfl_memory_device_strcat(&device, (*entry)->name);
1304 				mbfl_memory_device_output(',', &device);
1305 			}
1306 			mbfl_memory_device_unput(&device); /* Remove trailing comma */
1307 			mbfl_memory_device_result(&device, &result);
1308 			RETVAL_STRINGL((const char*)result.val, result.len);
1309 			mbfl_string_clear(&result);
1310 			return;
1311 		default:
1312 			zend_argument_value_error(1,
1313 				"must be one of \"G\", \"P\", \"C\", \"S\", \"I\", or \"L\"");
1314 			RETURN_THROWS();
1315 		}
1316 	}
1317 
1318 	if (encoding) {
1319 		RETURN_STRING(encoding->name);
1320 	} else {
1321 		RETURN_FALSE;
1322 	}
1323 }
1324 /* }}} */
1325 
1326 /* {{{ Sets the current output_encoding or returns the current output_encoding as a string */
PHP_FUNCTION(mb_http_output)1327 PHP_FUNCTION(mb_http_output)
1328 {
1329 	char *name = NULL;
1330 	size_t name_len;
1331 
1332 	ZEND_PARSE_PARAMETERS_START(0, 1)
1333 		Z_PARAM_OPTIONAL
1334 		Z_PARAM_STRING_OR_NULL(name, name_len)
1335 	ZEND_PARSE_PARAMETERS_END();
1336 
1337 	if (name == NULL) {
1338 		ZEND_ASSERT(MBSTRG(current_http_output_encoding));
1339 		RETURN_STRING(MBSTRG(current_http_output_encoding)->name);
1340 	} else {
1341 		const mbfl_encoding *encoding = php_mb_get_encoding_or_pass(name);
1342 		if (!encoding) {
1343 			zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
1344 			RETURN_THROWS();
1345 		} else {
1346 			MBSTRG(http_output_set) = 1;
1347 			MBSTRG(current_http_output_encoding) = encoding;
1348 			/* TODO Return previous encoding? */
1349 			RETURN_TRUE;
1350 		}
1351 	}
1352 }
1353 /* }}} */
1354 
1355 /* {{{ Sets the current detect_order or Return the current detect_order as a array */
PHP_FUNCTION(mb_detect_order)1356 PHP_FUNCTION(mb_detect_order)
1357 {
1358 	zend_string *order_str = NULL;
1359 	HashTable *order_ht = NULL;
1360 
1361 	ZEND_PARSE_PARAMETERS_START(0, 1)
1362 		Z_PARAM_OPTIONAL
1363 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(order_ht, order_str)
1364 	ZEND_PARSE_PARAMETERS_END();
1365 
1366 	if (!order_str && !order_ht) {
1367 		size_t n = MBSTRG(current_detect_order_list_size);
1368 		const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1369 		array_init(return_value);
1370 		for (size_t i = 0; i < n; i++) {
1371 			add_next_index_string(return_value, (*entry)->name);
1372 			entry++;
1373 		}
1374 	} else {
1375 		const mbfl_encoding **list;
1376 		size_t size;
1377 		if (order_ht) {
1378 			if (FAILURE == php_mb_parse_encoding_array(order_ht, &list, &size, 1)) {
1379 				RETURN_THROWS();
1380 			}
1381 		} else {
1382 			if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(order_str), ZSTR_LEN(order_str), &list, &size, /* persistent */ 0, /* arg_num */ 1, /* allow_pass_encoding */ 0)) {
1383 				RETURN_THROWS();
1384 			}
1385 		}
1386 
1387 		if (size == 0) {
1388 			efree(ZEND_VOIDP(list));
1389 			zend_argument_value_error(1, "must specify at least one encoding");
1390 			RETURN_THROWS();
1391 		}
1392 
1393 		if (MBSTRG(current_detect_order_list)) {
1394 			efree(ZEND_VOIDP(MBSTRG(current_detect_order_list)));
1395 		}
1396 		MBSTRG(current_detect_order_list) = list;
1397 		MBSTRG(current_detect_order_list_size) = size;
1398 		RETURN_TRUE;
1399 	}
1400 }
1401 /* }}} */
1402 
php_mb_check_code_point(zend_long cp)1403 static inline int php_mb_check_code_point(zend_long cp)
1404 {
1405 	if (cp < 0 || cp >= 0x110000) {
1406 		/* Out of Unicode range */
1407 		return 0;
1408 	}
1409 
1410 	if (cp >= 0xd800 && cp <= 0xdfff) {
1411 		/* Surrogate code-point. These are never valid on their own and we only allow a single
1412 		 * substitute character. */
1413 		return 0;
1414 	}
1415 
1416 	/* As we do not know the target encoding of the conversion operation that is going to
1417 	 * use the substitution character, we cannot check whether the codepoint is actually mapped
1418 	 * in the given encoding at this point. Thus we have to accept everything. */
1419 	return 1;
1420 }
1421 
1422 /* {{{ Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)1423 PHP_FUNCTION(mb_substitute_character)
1424 {
1425 	zend_string *substitute_character = NULL;
1426 	zend_long substitute_codepoint;
1427 	bool substitute_is_null = 1;
1428 
1429 	ZEND_PARSE_PARAMETERS_START(0, 1)
1430 		Z_PARAM_OPTIONAL
1431 		Z_PARAM_STR_OR_LONG_OR_NULL(substitute_character, substitute_codepoint, substitute_is_null)
1432 	ZEND_PARSE_PARAMETERS_END();
1433 
1434 	if (substitute_is_null) {
1435 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
1436 			RETURN_STRING("none");
1437 		}
1438 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1439 			RETURN_STRING("long");
1440 		}
1441 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1442 			RETURN_STRING("entity");
1443 		}
1444 		RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1445 	}
1446 
1447 	if (substitute_character != NULL) {
1448 		if (zend_string_equals_literal_ci(substitute_character, "none")) {
1449 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1450 			RETURN_TRUE;
1451 		}
1452 		if (zend_string_equals_literal_ci(substitute_character, "long")) {
1453 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1454 			RETURN_TRUE;
1455 		}
1456 		if (zend_string_equals_literal_ci(substitute_character, "entity")) {
1457 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1458 			RETURN_TRUE;
1459 		}
1460 		/* Invalid string value */
1461 		zend_argument_value_error(1, "must be \"none\", \"long\", \"entity\" or a valid codepoint");
1462 		RETURN_THROWS();
1463 	}
1464 	/* Integer codepoint passed */
1465 	if (!php_mb_check_code_point(substitute_codepoint)) {
1466 		zend_argument_value_error(1, "is not a valid codepoint");
1467 		RETURN_THROWS();
1468 	}
1469 
1470 	MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1471 	MBSTRG(current_filter_illegal_substchar) = substitute_codepoint;
1472 	RETURN_TRUE;
1473 }
1474 /* }}} */
1475 
1476 /* {{{ Return the preferred MIME name (charset) as a string */
PHP_FUNCTION(mb_preferred_mime_name)1477 PHP_FUNCTION(mb_preferred_mime_name)
1478 {
1479 	enum mbfl_no_encoding no_encoding;
1480 	char *name = NULL;
1481 	size_t name_len;
1482 
1483 	ZEND_PARSE_PARAMETERS_START(1, 1)
1484 		Z_PARAM_STRING(name, name_len)
1485 	ZEND_PARSE_PARAMETERS_END();
1486 
1487 	no_encoding = mbfl_name2no_encoding(name);
1488 	if (no_encoding == mbfl_no_encoding_invalid) {
1489 		zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
1490 		RETURN_THROWS();
1491 	}
1492 
1493 	const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
1494 	if (preferred_name == NULL || *preferred_name == '\0') {
1495 		php_error_docref(NULL, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
1496 		RETVAL_FALSE;
1497 	} else {
1498 		RETVAL_STRING((char *)preferred_name);
1499 	}
1500 }
1501 /* }}} */
1502 
1503 /* {{{ Parses GET/POST/COOKIE data and sets global variables */
PHP_FUNCTION(mb_parse_str)1504 PHP_FUNCTION(mb_parse_str)
1505 {
1506 	zval *track_vars_array = NULL;
1507 	char *encstr;
1508 	size_t encstr_len;
1509 	php_mb_encoding_handler_info_t info;
1510 	const mbfl_encoding *detected;
1511 
1512 	ZEND_PARSE_PARAMETERS_START(2, 2)
1513 		Z_PARAM_STRING(encstr, encstr_len)
1514 		Z_PARAM_ZVAL(track_vars_array)
1515 	ZEND_PARSE_PARAMETERS_END();
1516 
1517 	track_vars_array = zend_try_array_init(track_vars_array);
1518 	if (!track_vars_array) {
1519 		RETURN_THROWS();
1520 	}
1521 
1522 	encstr = estrndup(encstr, encstr_len);
1523 
1524 	info.data_type              = PARSE_STRING;
1525 	info.separator              = PG(arg_separator).input;
1526 	info.report_errors          = 1;
1527 	info.to_encoding            = MBSTRG(current_internal_encoding);
1528 	info.to_language            = MBSTRG(language);
1529 	info.from_encodings         = MBSTRG(http_input_list);
1530 	info.num_from_encodings     = MBSTRG(http_input_list_size);
1531 	info.from_language          = MBSTRG(language);
1532 
1533 	detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr);
1534 
1535 	MBSTRG(http_input_identify) = detected;
1536 
1537 	RETVAL_BOOL(detected);
1538 
1539 	if (encstr != NULL) efree(encstr);
1540 }
1541 /* }}} */
1542 
1543 /* {{{ Returns string in output buffer converted to the http_output encoding */
PHP_FUNCTION(mb_output_handler)1544 PHP_FUNCTION(mb_output_handler)
1545 {
1546 	char *arg_string;
1547 	size_t arg_string_len;
1548 	zend_long arg_status;
1549 	mbfl_string string, result;
1550 	const char *charset;
1551 	char *p;
1552 	const mbfl_encoding *encoding;
1553 	int last_feed;
1554 	size_t len;
1555 	unsigned char send_text_mimetype = 0;
1556 	char *s, *mimetype = NULL;
1557 
1558 	ZEND_PARSE_PARAMETERS_START(2, 2)
1559 		Z_PARAM_STRING(arg_string, arg_string_len)
1560 		Z_PARAM_LONG(arg_status)
1561 	ZEND_PARSE_PARAMETERS_END();
1562 
1563 	encoding = MBSTRG(current_http_output_encoding);
1564 
1565 	/* start phase only */
1566 	if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
1567 		/* delete the converter just in case. */
1568 		if (MBSTRG(outconv)) {
1569 			MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1570 			mbfl_buffer_converter_delete(MBSTRG(outconv));
1571 			MBSTRG(outconv) = NULL;
1572 		}
1573 
1574 		if (encoding == &mbfl_encoding_pass) {
1575 			RETURN_STRINGL(arg_string, arg_string_len);
1576 		}
1577 
1578 		/* analyze mime type */
1579 		if (SG(sapi_headers).mimetype &&
1580 			_php_mb_match_regex(
1581 				MBSTRG(http_output_conv_mimetypes),
1582 				SG(sapi_headers).mimetype,
1583 				strlen(SG(sapi_headers).mimetype))) {
1584 			if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL) {
1585 				mimetype = estrdup(SG(sapi_headers).mimetype);
1586 			} else {
1587 				mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
1588 			}
1589 			send_text_mimetype = 1;
1590 		} else if (SG(sapi_headers).send_default_content_type) {
1591 			mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
1592 		}
1593 
1594 		/* if content-type is not yet set, set it and activate the converter */
1595 		if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
1596 			charset = encoding->mime_name;
1597 			if (charset) {
1598 				len = spprintf( &p, 0, "Content-Type: %s; charset=%s",  mimetype, charset );
1599 				if (sapi_add_header(p, len, 0) != FAILURE) {
1600 					SG(sapi_headers).send_default_content_type = 0;
1601 				}
1602 			}
1603 			/* activate the converter */
1604 			MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
1605 			if (send_text_mimetype){
1606 				efree(mimetype);
1607 			}
1608 		}
1609 	}
1610 
1611 	/* just return if the converter is not activated. */
1612 	if (MBSTRG(outconv) == NULL) {
1613 		RETURN_STRINGL(arg_string, arg_string_len);
1614 	}
1615 
1616 	/* flag */
1617 	last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
1618 	/* mode */
1619 	mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
1620 	mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
1621 
1622 	/* feed the string */
1623 	mbfl_string_init(&string);
1624 	/* these are not needed. convd has encoding info.
1625 	string.encoding = MBSTRG(current_internal_encoding);
1626 	*/
1627 	string.val = (unsigned char *)arg_string;
1628 	string.len = arg_string_len;
1629 
1630 	mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
1631 	if (last_feed) {
1632 		mbfl_buffer_converter_flush(MBSTRG(outconv));
1633 	}
1634 	/* get the converter output, and return it */
1635 	mbfl_buffer_converter_result(MBSTRG(outconv), &result);
1636 
1637 	// TODO: avoid reallocation ???
1638 	RETVAL_STRINGL((char *)result.val, result.len);		/* the string is already strdup()'ed */
1639 	efree(result.val);
1640 
1641 	/* delete the converter if it is the last feed. */
1642 	if (last_feed) {
1643 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1644 		mbfl_buffer_converter_delete(MBSTRG(outconv));
1645 		MBSTRG(outconv) = NULL;
1646 	}
1647 }
1648 /* }}} */
1649 
1650 /* {{{ Convert a multibyte string to an array. If split_length is specified,
1651  break the string down into chunks each split_length characters long. */
1652 
1653 /* structure to pass split params to the callback */
1654 struct mbfl_split_params {
1655 	zval *return_value; /* php function return value structure pointer */
1656 	mbfl_string *result_string; /* string to store result chunk */
1657 	size_t mb_chunk_length; /* actual chunk length in chars */
1658 	size_t split_length; /* split length in chars */
1659 	mbfl_convert_filter *next_filter; /* widechar to encoding converter */
1660 };
1661 
1662 /* callback function to fill split array */
mbfl_split_output(int c,void * data)1663 static int mbfl_split_output(int c, void *data)
1664 {
1665 	struct mbfl_split_params *params = (struct mbfl_split_params *)data; /* cast passed data */
1666 
1667 	(*params->next_filter->filter_function)(c, params->next_filter); /* decoder filter */
1668 
1669 	if (params->split_length == ++params->mb_chunk_length) { /* if current chunk size reached defined chunk size or last char reached */
1670 		mbfl_convert_filter_flush(params->next_filter);/* concatenate separate decoded chars to the solid string */
1671 		mbfl_memory_device *device = (mbfl_memory_device *)params->next_filter->data; /* chars container */
1672 		mbfl_string *chunk = params->result_string;
1673 		mbfl_memory_device_result(device, chunk); /* make chunk */
1674 		add_next_index_stringl(params->return_value, (const char *)chunk->val, chunk->len); /* add chunk to the array */
1675 		efree(chunk->val);
1676 		params->mb_chunk_length = 0; /* reset mb_chunk size */
1677 	}
1678 
1679 	return 0;
1680 }
1681 
PHP_FUNCTION(mb_str_split)1682 PHP_FUNCTION(mb_str_split)
1683 {
1684 	zend_string *str, *encoding = NULL;
1685 	size_t mb_len, chunks, chunk_len;
1686 	const char *p, *last; /* pointer for the string cursor and last string char */
1687 	mbfl_string string, result_string;
1688 	const mbfl_encoding *mbfl_encoding;
1689 	zend_long split_length = 1;
1690 
1691 	ZEND_PARSE_PARAMETERS_START(1, 3)
1692 		Z_PARAM_STR(str)
1693 		Z_PARAM_OPTIONAL
1694 		Z_PARAM_LONG(split_length)
1695 		Z_PARAM_STR_OR_NULL(encoding)
1696 	ZEND_PARSE_PARAMETERS_END();
1697 
1698 	if (split_length <= 0) {
1699 		zend_argument_value_error(2, "must be greater than 0");
1700 		RETURN_THROWS();
1701 	}
1702 
1703 	/* fill mbfl_string structure */
1704 	string.val = (unsigned char *) ZSTR_VAL(str);
1705 	string.len = ZSTR_LEN(str);
1706 	string.encoding = php_mb_get_encoding(encoding, 3);
1707 	if (!string.encoding) {
1708 		RETURN_THROWS();
1709 	}
1710 
1711 	p = ZSTR_VAL(str); /* string cursor pointer */
1712 	last = ZSTR_VAL(str) + ZSTR_LEN(str); /* last string char pointer */
1713 
1714 	mbfl_encoding = string.encoding;
1715 
1716 	/* first scenario: 1,2,4-bytes fixed width encodings (head part) */
1717 	if (mbfl_encoding->flag & MBFL_ENCTYPE_SBCS) { /* 1 byte */
1718 		mb_len = string.len;
1719 		chunk_len = (size_t)split_length; /* chunk length in bytes */
1720 	} else if (mbfl_encoding->flag & MBFL_ENCTYPE_WCS2) { /* 2 bytes */
1721 		mb_len = string.len / 2;
1722 		chunk_len = split_length * 2;
1723 	} else if (mbfl_encoding->flag & MBFL_ENCTYPE_WCS4) { /* 4 bytes */
1724 		mb_len = string.len / 4;
1725 		chunk_len = split_length * 4;
1726 	} else if (mbfl_encoding->mblen_table != NULL) {
1727 		/* second scenario: variable width encodings with length table */
1728 		char unsigned const *mbtab = mbfl_encoding->mblen_table;
1729 
1730 		/* assume that we have 1-bytes characters */
1731 		array_init_size(return_value, (string.len + split_length) / split_length); /* round up */
1732 
1733 		while (p < last) { /* split cycle work until the cursor has reached the last byte */
1734 			char const *chunk_p = p; /* chunk first byte pointer */
1735 			chunk_len = 0; /* chunk length in bytes */
1736 			zend_long char_count;
1737 
1738 			for (char_count = 0; char_count < split_length && p < last; ++char_count) {
1739 				char unsigned const m = mbtab[*(const unsigned char *)p]; /* single character length table */
1740 				chunk_len += m;
1741 				p += m;
1742 			}
1743 			if (p >= last) chunk_len -= p - last; /* check if chunk is in bounds */
1744 			add_next_index_stringl(return_value, chunk_p, chunk_len);
1745 		}
1746 		return;
1747 	} else {
1748 		/* third scenario: other multibyte encodings */
1749 		mbfl_convert_filter *filter, *decoder;
1750 
1751 		/* assume that we have 1-bytes characters */
1752 		array_init_size(return_value, (string.len + split_length) / split_length); /* round up */
1753 
1754 		/* decoder filter to decode wchar to encoding */
1755 		mbfl_memory_device device;
1756 		mbfl_memory_device_init(&device, split_length + 1, 0);
1757 
1758 		decoder = mbfl_convert_filter_new(
1759 				&mbfl_encoding_wchar,
1760 				string.encoding,
1761 				mbfl_memory_device_output,
1762 				NULL,
1763 				&device);
1764 		/* assert that nothing is wrong with the decoder */
1765 		ZEND_ASSERT(decoder != NULL);
1766 
1767 		/* wchar filter */
1768 		mbfl_string_init(&result_string); /* mbfl_string to store chunk in the callback */
1769 		struct mbfl_split_params params = { /* init callback function params structure */
1770 			.return_value = return_value,
1771 			.result_string = &result_string,
1772 			.mb_chunk_length = 0,
1773 			.split_length = (size_t)split_length,
1774 			.next_filter = decoder,
1775 		};
1776 
1777 		filter = mbfl_convert_filter_new(
1778 				string.encoding,
1779 				&mbfl_encoding_wchar,
1780 				mbfl_split_output,
1781 				NULL,
1782 				&params);
1783 		/* assert that nothing is wrong with the filter */
1784 		ZEND_ASSERT(filter != NULL);
1785 
1786 		while (p < last - 1) { /* cycle each byte except last with callback function */
1787 			(*filter->filter_function)(*p++, filter);
1788 		}
1789 		params.mb_chunk_length = split_length - 1; /* force to finish current chunk */
1790 		(*filter->filter_function)(*p++, filter); /* process last char */
1791 
1792 		mbfl_convert_filter_delete(decoder);
1793 		mbfl_convert_filter_delete(filter);
1794 		mbfl_memory_device_clear(&device);
1795 		return;
1796 	}
1797 
1798 	/* first scenario: 1,2,4-bytes fixed width encodings (tail part) */
1799 	chunks = (mb_len + split_length - 1) / split_length; /* (round up idiom) */
1800 	array_init_size(return_value, chunks);
1801 	if (chunks != 0) {
1802 		zend_long i;
1803 
1804 		for (i = 0; i < chunks - 1; p += chunk_len, ++i) {
1805 			add_next_index_stringl(return_value, p, chunk_len);
1806 		}
1807 		add_next_index_stringl(return_value, p, last - p);
1808 	}
1809 }
1810 /* }}} */
1811 
1812 /* {{{ Get character numbers of a string */
PHP_FUNCTION(mb_strlen)1813 PHP_FUNCTION(mb_strlen)
1814 {
1815 	mbfl_string string;
1816 	char *str;
1817 	zend_string *enc_name = NULL;
1818 
1819 	ZEND_PARSE_PARAMETERS_START(1, 2)
1820 		Z_PARAM_STRING(str, string.len)
1821 		Z_PARAM_OPTIONAL
1822 		Z_PARAM_STR_OR_NULL(enc_name)
1823 	ZEND_PARSE_PARAMETERS_END();
1824 
1825 	string.val = (unsigned char*)str;
1826 	string.encoding = php_mb_get_encoding(enc_name, 2);
1827 	if (!string.encoding) {
1828 		RETURN_THROWS();
1829 	}
1830 
1831 	size_t n = mbfl_strlen(&string);
1832 	/* Only way this can fail is if the conversion creation fails
1833 	 * this would imply some sort of memory allocation failure which is a bug */
1834 	ZEND_ASSERT(!mbfl_is_error(n));
1835 	RETVAL_LONG(n);
1836 }
1837 /* }}} */
1838 
handle_strpos_error(size_t error)1839 static void handle_strpos_error(size_t error) {
1840 	switch (error) {
1841 	case MBFL_ERROR_NOT_FOUND:
1842 		break;
1843 	case MBFL_ERROR_ENCODING:
1844 		php_error_docref(NULL, E_WARNING, "Conversion error");
1845 		break;
1846 	case MBFL_ERROR_OFFSET:
1847 		zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1848 		break;
1849 	default:
1850 		zend_value_error("mb_strpos(): Unknown error");
1851 		break;
1852 	}
1853 }
1854 
1855 /* {{{ Find position of first occurrence of a string within another */
PHP_FUNCTION(mb_strpos)1856 PHP_FUNCTION(mb_strpos)
1857 {
1858 	int reverse = 0;
1859 	zend_long offset = 0;
1860 	char *haystack_val, *needle_val;
1861 	mbfl_string haystack, needle;
1862 	zend_string *enc_name = NULL;
1863 
1864 	ZEND_PARSE_PARAMETERS_START(2, 4)
1865 		Z_PARAM_STRING(haystack_val, haystack.len)
1866 		Z_PARAM_STRING(needle_val, needle.len)
1867 		Z_PARAM_OPTIONAL
1868 		Z_PARAM_LONG(offset)
1869 		Z_PARAM_STR_OR_NULL(enc_name)
1870 	ZEND_PARSE_PARAMETERS_END();
1871 
1872 	haystack.val = (unsigned char*)haystack_val;
1873 	needle.val = (unsigned char*)needle_val;
1874 
1875 	haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4);
1876 	if (!haystack.encoding) {
1877 		RETURN_THROWS();
1878 	}
1879 
1880 	size_t n = mbfl_strpos(&haystack, &needle, offset, reverse);
1881 	if (!mbfl_is_error(n)) {
1882 		RETVAL_LONG(n);
1883 	} else {
1884 		handle_strpos_error(n);
1885 		RETVAL_FALSE;
1886 	}
1887 }
1888 /* }}} */
1889 
1890 /* {{{ Find position of last occurrence of a string within another */
PHP_FUNCTION(mb_strrpos)1891 PHP_FUNCTION(mb_strrpos)
1892 {
1893 	mbfl_string haystack, needle;
1894 	char *haystack_val, *needle_val;
1895 	zend_string *enc_name = NULL;
1896 	zend_long offset = 0;
1897 
1898 	ZEND_PARSE_PARAMETERS_START(2, 4)
1899 		Z_PARAM_STRING(haystack_val, haystack.len)
1900 		Z_PARAM_STRING(needle_val, needle.len)
1901 		Z_PARAM_OPTIONAL
1902 		Z_PARAM_LONG(offset)
1903 		Z_PARAM_STR_OR_NULL(enc_name)
1904 	ZEND_PARSE_PARAMETERS_END();
1905 
1906 	haystack.val = (unsigned char*)haystack_val;
1907 	needle.val = (unsigned char*)needle_val;
1908 
1909 	haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4);
1910 	if (!haystack.encoding) {
1911 		RETURN_THROWS();
1912 	}
1913 
1914 	size_t n = mbfl_strpos(&haystack, &needle, offset, 1);
1915 	if (!mbfl_is_error(n)) {
1916 		RETVAL_LONG(n);
1917 	} else {
1918 		handle_strpos_error(n);
1919 		RETVAL_FALSE;
1920 	}
1921 }
1922 /* }}} */
1923 
1924 /* {{{ Finds position of first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stripos)1925 PHP_FUNCTION(mb_stripos)
1926 {
1927 	zend_long offset = 0;
1928 	mbfl_string haystack, needle;
1929 	char *haystack_val, *needle_val;
1930 	zend_string *from_encoding = NULL;
1931 
1932 	ZEND_PARSE_PARAMETERS_START(2, 4)
1933 		Z_PARAM_STRING(haystack_val, haystack.len)
1934 		Z_PARAM_STRING(needle_val, needle.len)
1935 		Z_PARAM_OPTIONAL
1936 		Z_PARAM_LONG(offset)
1937 		Z_PARAM_STR_OR_NULL(from_encoding)
1938 	ZEND_PARSE_PARAMETERS_END();
1939 
1940 	haystack.val = (unsigned char*)haystack_val;
1941 	needle.val = (unsigned char*)needle_val;
1942 
1943 	const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 4);
1944 	if (!enc) {
1945 		RETURN_THROWS();
1946 	}
1947 
1948 	size_t n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, enc);
1949 
1950 	if (!mbfl_is_error(n)) {
1951 		RETVAL_LONG(n);
1952 	} else {
1953 		handle_strpos_error(n);
1954 		RETVAL_FALSE;
1955 	}
1956 }
1957 /* }}} */
1958 
1959 /* {{{ Finds position of last occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_strripos)1960 PHP_FUNCTION(mb_strripos)
1961 {
1962 	zend_long offset = 0;
1963 	mbfl_string haystack, needle;
1964 	char *haystack_val, *needle_val;
1965 	zend_string *from_encoding = NULL;
1966 
1967 	ZEND_PARSE_PARAMETERS_START(2, 4)
1968 		Z_PARAM_STRING(haystack_val, haystack.len)
1969 		Z_PARAM_STRING(needle_val, needle.len)
1970 		Z_PARAM_OPTIONAL
1971 		Z_PARAM_LONG(offset)
1972 		Z_PARAM_STR_OR_NULL(from_encoding)
1973 	ZEND_PARSE_PARAMETERS_END();
1974 
1975 	haystack.val = (unsigned char*)haystack_val;
1976 	needle.val = (unsigned char*)needle_val;
1977 
1978 	const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 4);
1979 	if (!enc) {
1980 		RETURN_THROWS();
1981 	}
1982 
1983 	size_t n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, enc);
1984 
1985 	if (!mbfl_is_error(n)) {
1986 		RETVAL_LONG(n);
1987 	} else {
1988 		handle_strpos_error(n);
1989 		RETVAL_FALSE;
1990 	}
1991 }
1992 /* }}} */
1993 
1994 #define MB_STRSTR 1
1995 #define MB_STRRCHR 2
1996 #define MB_STRISTR 3
1997 #define MB_STRRICHR 4
1998 /* {{{ php_mb_strstr_variants */
php_mb_strstr_variants(INTERNAL_FUNCTION_PARAMETERS,unsigned int variant)1999 static void php_mb_strstr_variants(INTERNAL_FUNCTION_PARAMETERS, unsigned int variant)
2000 {
2001 	int reverse_mode = 0;
2002 	size_t n;
2003 	char *haystack_val, *needle_val;
2004 	mbfl_string haystack, needle, result, *ret = NULL;
2005 	zend_string *encoding_name = NULL;
2006 	bool part = 0;
2007 
2008 	ZEND_PARSE_PARAMETERS_START(2, 4)
2009 		Z_PARAM_STRING(haystack_val, haystack.len)
2010 		Z_PARAM_STRING(needle_val, needle.len)
2011 		Z_PARAM_OPTIONAL
2012 		Z_PARAM_BOOL(part)
2013 		Z_PARAM_STR_OR_NULL(encoding_name)
2014 	ZEND_PARSE_PARAMETERS_END();
2015 
2016 	haystack.val = (unsigned char*)haystack_val;
2017 	needle.val = (unsigned char*)needle_val;
2018 	haystack.encoding = needle.encoding = php_mb_get_encoding(encoding_name, 4);
2019 	if (!haystack.encoding) {
2020 		RETURN_THROWS();
2021 	}
2022 
2023 	if (variant == MB_STRRCHR || variant == MB_STRRICHR) { reverse_mode = 1; }
2024 
2025 	if (variant == MB_STRISTR || variant == MB_STRRICHR) {
2026 		n = php_mb_stripos(reverse_mode, (char *)haystack.val, haystack.len, (char *)needle.val,
2027 			needle.len, 0, needle.encoding);
2028 	} else {
2029 		n = mbfl_strpos(&haystack, &needle, 0, reverse_mode);
2030 	}
2031 
2032 	if (!mbfl_is_error(n)) {
2033 		if (part) {
2034 			ret = mbfl_substr(&haystack, &result, 0, n);
2035 			ZEND_ASSERT(ret != NULL);
2036 			// TODO: avoid reallocation ???
2037 			RETVAL_STRINGL((char *)ret->val, ret->len);
2038 			efree(ret->val);
2039 		} else {
2040 			ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
2041 			ZEND_ASSERT(ret != NULL);
2042 			// TODO: avoid reallocation ???
2043 			RETVAL_STRINGL((char *)ret->val, ret->len);
2044 			efree(ret->val);
2045 		}
2046 	} else {
2047 		// FIXME use handle_strpos_error(n)
2048 		RETVAL_FALSE;
2049 	}
2050 }
2051 
2052 /* {{{ Finds first occurrence of a string within another */
PHP_FUNCTION(mb_strstr)2053 PHP_FUNCTION(mb_strstr)
2054 {
2055 	php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRSTR);
2056 }
2057 /* }}} */
2058 
2059 /* {{{ Finds the last occurrence of a character in a string within another */
PHP_FUNCTION(mb_strrchr)2060 PHP_FUNCTION(mb_strrchr)
2061 {
2062 	php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRRCHR);
2063 }
2064 /* }}} */
2065 
2066 /* {{{ Finds first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stristr)2067 PHP_FUNCTION(mb_stristr)
2068 {
2069 	php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRISTR);
2070 }
2071 /* }}} */
2072 
2073 /* {{{ Finds the last occurrence of a character in a string within another, case insensitive */
PHP_FUNCTION(mb_strrichr)2074 PHP_FUNCTION(mb_strrichr)
2075 {
2076 	php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRRICHR);
2077 }
2078 /* }}} */
2079 
2080 #undef MB_STRSTR
2081 #undef MB_STRRCHR
2082 #undef MB_STRISTR
2083 #undef MB_STRRICHR
2084 
2085 /* {{{ Count the number of substring occurrences */
PHP_FUNCTION(mb_substr_count)2086 PHP_FUNCTION(mb_substr_count)
2087 {
2088 	mbfl_string haystack, needle;
2089 	char *haystack_val, *needle_val;
2090 	zend_string *enc_name = NULL;
2091 
2092 	ZEND_PARSE_PARAMETERS_START(2, 3)
2093 		Z_PARAM_STRING(haystack_val, haystack.len)
2094 		Z_PARAM_STRING(needle_val, needle.len)
2095 		Z_PARAM_OPTIONAL
2096 		Z_PARAM_STR_OR_NULL(enc_name)
2097 	ZEND_PARSE_PARAMETERS_END();
2098 
2099 	haystack.val = (unsigned char*)haystack_val;
2100 	needle.val = (unsigned char*)needle_val;
2101 
2102 	if (needle.len == 0) {
2103 		zend_argument_value_error(2, "must not be empty");
2104 		RETURN_THROWS();
2105 	}
2106 
2107 	haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 3);
2108 	if (!haystack.encoding) {
2109 		RETURN_THROWS();
2110 	}
2111 
2112 	size_t n = mbfl_substr_count(&haystack, &needle);
2113 	/* An error can only occur if needle is empty,
2114 	 * an encoding error happens (which should not happen at this stage and is a bug)
2115 	 * or the haystack is more than sizeof(size_t) bytes
2116 	 * If one of these things occur this is a bug and should be flagged as such */
2117 	ZEND_ASSERT(!mbfl_is_error(n));
2118 	RETVAL_LONG(n);
2119 }
2120 /* }}} */
2121 
2122 /* {{{ Returns part of a string */
PHP_FUNCTION(mb_substr)2123 PHP_FUNCTION(mb_substr)
2124 {
2125 	char *str;
2126 	zend_string *encoding = NULL;
2127 	zend_long from, len;
2128 	size_t real_from, real_len;
2129 	size_t str_len;
2130 	bool len_is_null = 1;
2131 	mbfl_string string, result, *ret;
2132 
2133 	ZEND_PARSE_PARAMETERS_START(2, 4)
2134 		Z_PARAM_STRING(str, str_len)
2135 		Z_PARAM_LONG(from)
2136 		Z_PARAM_OPTIONAL
2137 		Z_PARAM_LONG_OR_NULL(len, len_is_null)
2138 		Z_PARAM_STR_OR_NULL(encoding)
2139 	ZEND_PARSE_PARAMETERS_END();
2140 
2141 	string.encoding = php_mb_get_encoding(encoding, 4);
2142 	if (!string.encoding) {
2143 		RETURN_THROWS();
2144 	}
2145 
2146 	string.val = (unsigned char *)str;
2147 	string.len = str_len;
2148 
2149 	/* measures length */
2150 	size_t mblen = 0;
2151 	if (from < 0 || (!len_is_null && len < 0)) {
2152 		mblen = mbfl_strlen(&string);
2153 	}
2154 
2155 	/* if "from" position is negative, count start position from the end
2156 	 * of the string
2157 	 */
2158 	if (from >= 0) {
2159 		real_from = (size_t) from;
2160 	} else if (-from < mblen) {
2161 		real_from = mblen + from;
2162 	} else {
2163 		real_from = 0;
2164 	}
2165 
2166 	/* if "length" position is negative, set it to the length
2167 	 * needed to stop that many chars from the end of the string
2168 	 */
2169 	if (len_is_null) {
2170 		real_len = MBFL_SUBSTR_UNTIL_END;
2171 	} else if (len >= 0) {
2172 		real_len = (size_t) len;
2173 	} else if (real_from < mblen && -len < mblen - real_from) {
2174 		real_len = (mblen - real_from) + len;
2175 	} else {
2176 		real_len = 0;
2177 	}
2178 
2179 	ret = mbfl_substr(&string, &result, real_from, real_len);
2180 	ZEND_ASSERT(ret != NULL);
2181 
2182 	// TODO: avoid reallocation ???
2183 	RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2184 	efree(ret->val);
2185 }
2186 /* }}} */
2187 
2188 /* {{{ Returns part of a string */
PHP_FUNCTION(mb_strcut)2189 PHP_FUNCTION(mb_strcut)
2190 {
2191 	zend_string *encoding = NULL;
2192 	char *string_val;
2193 	zend_long from, len;
2194 	bool len_is_null = 1;
2195 	mbfl_string string, result, *ret;
2196 
2197 	ZEND_PARSE_PARAMETERS_START(2, 4)
2198 		Z_PARAM_STRING(string_val, string.len)
2199 		Z_PARAM_LONG(from)
2200 		Z_PARAM_OPTIONAL
2201 		Z_PARAM_LONG_OR_NULL(len, len_is_null)
2202 		Z_PARAM_STR_OR_NULL(encoding)
2203 	ZEND_PARSE_PARAMETERS_END();
2204 
2205 	string.val = (unsigned char*)string_val;
2206 	string.encoding = php_mb_get_encoding(encoding, 4);
2207 	if (!string.encoding) {
2208 		RETURN_THROWS();
2209 	}
2210 
2211 	if (len_is_null) {
2212 		len = string.len;
2213 	}
2214 
2215 	/* if "from" position is negative, count start position from the end
2216 	 * of the string
2217 	 */
2218 	if (from < 0) {
2219 		from = string.len + from;
2220 		if (from < 0) {
2221 			from = 0;
2222 		}
2223 	}
2224 
2225 	/* if "length" position is negative, set it to the length
2226 	 * needed to stop that many chars from the end of the string
2227 	 */
2228 	if (len < 0) {
2229 		len = (string.len - from) + len;
2230 		if (len < 0) {
2231 			len = 0;
2232 		}
2233 	}
2234 
2235 	if (from > string.len) {
2236 		RETURN_EMPTY_STRING();
2237 	}
2238 
2239 	ret = mbfl_strcut(&string, &result, from, len);
2240 	ZEND_ASSERT(ret != NULL);
2241 
2242 	// TODO: avoid reallocation ???
2243 	RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2244 	efree(ret->val);
2245 }
2246 /* }}} */
2247 
2248 /* {{{ Gets terminal width of a string */
PHP_FUNCTION(mb_strwidth)2249 PHP_FUNCTION(mb_strwidth)
2250 {
2251 	char *string_val;
2252 	mbfl_string string;
2253 	zend_string *enc_name = NULL;
2254 
2255 	ZEND_PARSE_PARAMETERS_START(1, 2)
2256 		Z_PARAM_STRING(string_val, string.len)
2257 		Z_PARAM_OPTIONAL
2258 		Z_PARAM_STR_OR_NULL(enc_name)
2259 	ZEND_PARSE_PARAMETERS_END();
2260 
2261 	string.val = (unsigned char*)string_val;
2262 	string.encoding = php_mb_get_encoding(enc_name, 2);
2263 	if (!string.encoding) {
2264 		RETURN_THROWS();
2265 	}
2266 
2267 	size_t n = mbfl_strwidth(&string);
2268 	ZEND_ASSERT(n != (size_t) -1);
2269 	RETVAL_LONG(n);
2270 }
2271 /* }}} */
2272 
2273 /* {{{ Trim the string in terminal width */
PHP_FUNCTION(mb_strimwidth)2274 PHP_FUNCTION(mb_strimwidth)
2275 {
2276 	char *str, *trimmarker = NULL;
2277 	zend_string *encoding = NULL;
2278 	zend_long from, width, swidth = 0;
2279 	size_t str_len, trimmarker_len;
2280 	mbfl_string string, result, marker, *ret;
2281 
2282 	ZEND_PARSE_PARAMETERS_START(3, 5)
2283 		Z_PARAM_STRING(str, str_len)
2284 		Z_PARAM_LONG(from)
2285 		Z_PARAM_LONG(width)
2286 		Z_PARAM_OPTIONAL
2287 		Z_PARAM_STRING(trimmarker, trimmarker_len)
2288 		Z_PARAM_STR_OR_NULL(encoding)
2289 	ZEND_PARSE_PARAMETERS_END();
2290 
2291 	string.encoding = marker.encoding = php_mb_get_encoding(encoding, 5);
2292 	if (!string.encoding) {
2293 		RETURN_THROWS();
2294 	}
2295 
2296 	string.val = (unsigned char *)str;
2297 	string.len = str_len;
2298 	marker.val = NULL;
2299 	marker.len = 0;
2300 
2301 	if ((from < 0) || (width < 0)) {
2302 		swidth = mbfl_strwidth(&string);
2303 	}
2304 
2305 	if (from < 0) {
2306 		from += swidth;
2307 	}
2308 
2309 	if (from < 0 || (size_t)from > str_len) {
2310 		zend_argument_value_error(2, "is out of range");
2311 		RETURN_THROWS();
2312 	}
2313 
2314 	if (width < 0) {
2315 		width = swidth + width - from;
2316 	}
2317 
2318 	if (width < 0) {
2319 		zend_argument_value_error(3, "is out of range");
2320 		RETURN_THROWS();
2321 	}
2322 
2323 	if (trimmarker) {
2324 		marker.val = (unsigned char *)trimmarker;
2325 		marker.len = trimmarker_len;
2326 	}
2327 
2328 	ret = mbfl_strimwidth(&string, &marker, &result, from, width);
2329 	ZEND_ASSERT(ret != NULL);
2330 	// TODO: avoid reallocation ???
2331 	RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2332 	efree(ret->val);
2333 }
2334 /* }}} */
2335 
2336 
2337 /* See mbfl_no_encoding definition for list of unsupported encodings */
php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)2338 static inline bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)
2339 {
2340 	return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint)
2341 			|| (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap)
2342 			|| (no_enc >= mbfl_no_encoding_jis && no_enc <= mbfl_no_encoding_2022jpms)
2343 			|| (no_enc >= mbfl_no_encoding_cp50220 && no_enc <= mbfl_no_encoding_cp50222));
2344 }
2345 
2346 
2347 /* See mbfl_no_encoding definition for list of UTF-8 encodings */
php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)2348 static inline bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
2349 {
2350 	return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb);
2351 }
2352 
php_mb_convert_encoding_ex(const char * input,size_t length,const mbfl_encoding * to_encoding,const mbfl_encoding * from_encoding,size_t * output_len)2353 MBSTRING_API char *php_mb_convert_encoding_ex(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding *from_encoding, size_t *output_len)
2354 {
2355 	mbfl_string string, result, *ret;
2356 	mbfl_buffer_converter *convd;
2357 	char *output = NULL;
2358 
2359 	if (output_len) {
2360 		*output_len = 0;
2361 	}
2362 
2363 	/* initialize string */
2364 	string.encoding = from_encoding;
2365 	string.val = (unsigned char *)input;
2366 	string.len = length;
2367 
2368 	/* initialize converter */
2369 	convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
2370 	/* If this assertion fails this means some memory allocation failure which is a bug */
2371 	ZEND_ASSERT(convd != NULL);
2372 
2373 	mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
2374 	mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
2375 
2376 	/* do it */
2377 	mbfl_string_init(&result);
2378 	ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
2379 	if (ret) {
2380 		if (output_len) {
2381 			*output_len = ret->len;
2382 		}
2383 		output = (char *)ret->val;
2384 	}
2385 
2386 	MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
2387 	mbfl_buffer_converter_delete(convd);
2388 	return output;
2389 }
2390 /* }}} */
2391 
2392 /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
php_mb_convert_encoding(const char * input,size_t length,const mbfl_encoding * to_encoding,const mbfl_encoding ** from_encodings,size_t num_from_encodings,size_t * output_len)2393 MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding **from_encodings, size_t num_from_encodings, size_t *output_len)
2394 {
2395 	const mbfl_encoding *from_encoding;
2396 
2397 	if (output_len) {
2398 		*output_len = 0;
2399 	}
2400 
2401 	/* pre-conversion encoding */
2402 	ZEND_ASSERT(num_from_encodings >= 1);
2403 	if (num_from_encodings == 1) {
2404 		from_encoding = *from_encodings;
2405 	} else {
2406 		/* auto detect */
2407 		mbfl_string string;
2408 		mbfl_string_init(&string);
2409 		string.val = (unsigned char *)input;
2410 		string.len = length;
2411 		from_encoding = mbfl_identify_encoding(
2412 			&string, from_encodings, num_from_encodings, MBSTRG(strict_detection));
2413 		if (!from_encoding) {
2414 			php_error_docref(NULL, E_WARNING, "Unable to detect character encoding");
2415 			return NULL;
2416 		}
2417 	}
2418 
2419 	return php_mb_convert_encoding_ex(input, length, to_encoding, from_encoding, output_len);
2420 }
2421 /* }}} */
2422 
php_mb_convert_encoding_recursive(HashTable * input,const mbfl_encoding * to_encoding,const mbfl_encoding ** from_encodings,size_t num_from_encodings)2423 MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const mbfl_encoding *to_encoding, const mbfl_encoding **from_encodings, size_t num_from_encodings)
2424 {
2425 	HashTable *output, *chash;
2426 	zend_long idx;
2427 	zend_string *key;
2428 	zval *entry, entry_tmp;
2429 	size_t ckey_len, cval_len;
2430 	char *ckey, *cval;
2431 
2432 	if (!input) {
2433 		return NULL;
2434 	}
2435 
2436 	if (GC_IS_RECURSIVE(input)) {
2437 		GC_UNPROTECT_RECURSION(input);
2438 		php_error_docref(NULL, E_WARNING, "Cannot convert recursively referenced values");
2439 		return NULL;
2440 	}
2441 	GC_TRY_PROTECT_RECURSION(input);
2442 	output = zend_new_array(zend_hash_num_elements(input));
2443 	ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) {
2444 		/* convert key */
2445 		if (key) {
2446 			ckey = php_mb_convert_encoding(
2447 				ZSTR_VAL(key), ZSTR_LEN(key),
2448 				to_encoding, from_encodings, num_from_encodings, &ckey_len);
2449 			if (!ckey) {
2450 				continue;
2451 			}
2452 			key = zend_string_init(ckey, ckey_len, 0);
2453 			efree(ckey);
2454 		}
2455 		/* convert value */
2456 		ZEND_ASSERT(entry);
2457 try_again:
2458 		switch(Z_TYPE_P(entry)) {
2459 			case IS_STRING:
2460 				cval = php_mb_convert_encoding(
2461 					Z_STRVAL_P(entry), Z_STRLEN_P(entry),
2462 					to_encoding, from_encodings, num_from_encodings, &cval_len);
2463 				if (!cval) {
2464 					if (key) {
2465 						zend_string_release(key);
2466 					}
2467 					continue;
2468 				}
2469 				ZVAL_STRINGL(&entry_tmp, cval, cval_len);
2470 				efree(cval);
2471 				break;
2472 			case IS_NULL:
2473 			case IS_TRUE:
2474 			case IS_FALSE:
2475 			case IS_LONG:
2476 			case IS_DOUBLE:
2477 				ZVAL_COPY(&entry_tmp, entry);
2478 				break;
2479 			case IS_ARRAY:
2480 				chash = php_mb_convert_encoding_recursive(
2481 					Z_ARRVAL_P(entry), to_encoding, from_encodings, num_from_encodings);
2482 				if (chash) {
2483 					ZVAL_ARR(&entry_tmp, chash);
2484 				} else {
2485 					ZVAL_EMPTY_ARRAY(&entry_tmp);
2486 				}
2487 				break;
2488 			case IS_REFERENCE:
2489 				entry = Z_REFVAL_P(entry);
2490 				goto try_again;
2491 			case IS_OBJECT:
2492 			default:
2493 				if (key) {
2494 					zend_string_release(key);
2495 				}
2496 				php_error_docref(NULL, E_WARNING, "Object is not supported");
2497 				continue;
2498 		}
2499 		if (key) {
2500 			zend_hash_add(output, key, &entry_tmp);
2501 			zend_string_release(key);
2502 		} else {
2503 			zend_hash_index_add(output, idx, &entry_tmp);
2504 		}
2505 	} ZEND_HASH_FOREACH_END();
2506 	GC_TRY_UNPROTECT_RECURSION(input);
2507 
2508 	return output;
2509 }
2510 /* }}} */
2511 
remove_non_encodings_from_elist(const mbfl_encoding ** elist,size_t * size)2512 static void remove_non_encodings_from_elist(const mbfl_encoding **elist, size_t *size)
2513 {
2514 	/* mbstring supports some 'text encodings' which aren't really text encodings
2515 	 * at all, but really 'byte encodings', like Base64, QPrint, and so on.
2516 	 * These should never be returned by `mb_detect_encoding`. */
2517 	int shift = 0;
2518 	for (int i = 0; i < *size; i++) {
2519 		const mbfl_encoding *encoding = elist[i];
2520 		if (encoding->no_encoding <= mbfl_no_encoding_charset_min) {
2521 			shift++; /* Remove this encoding from the list */
2522 		} else if (shift) {
2523 			elist[i - shift] = encoding;
2524 		}
2525 	}
2526 	*size -= shift;
2527 }
2528 
2529 /* {{{ Returns converted string in desired encoding */
PHP_FUNCTION(mb_convert_encoding)2530 PHP_FUNCTION(mb_convert_encoding)
2531 {
2532 	zend_string *to_encoding_name;
2533 	zend_string *input_str, *from_encodings_str = NULL;
2534 	HashTable *input_ht, *from_encodings_ht = NULL;
2535 	const mbfl_encoding **from_encodings;
2536 	size_t num_from_encodings;
2537 	bool free_from_encodings;
2538 
2539 	ZEND_PARSE_PARAMETERS_START(2, 3)
2540 		Z_PARAM_ARRAY_HT_OR_STR(input_ht, input_str)
2541 		Z_PARAM_STR(to_encoding_name)
2542 		Z_PARAM_OPTIONAL
2543 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(from_encodings_ht, from_encodings_str)
2544 	ZEND_PARSE_PARAMETERS_END();
2545 
2546 	const mbfl_encoding *to_encoding = php_mb_get_encoding(to_encoding_name, 2);
2547 	if (!to_encoding) {
2548 		RETURN_THROWS();
2549 	}
2550 
2551 	if (from_encodings_ht) {
2552 		if (php_mb_parse_encoding_array(from_encodings_ht, &from_encodings, &num_from_encodings, 3) == FAILURE) {
2553 			RETURN_THROWS();
2554 		}
2555 		free_from_encodings = 1;
2556 	} else if (from_encodings_str) {
2557 		if (php_mb_parse_encoding_list(ZSTR_VAL(from_encodings_str), ZSTR_LEN(from_encodings_str),
2558 				&from_encodings, &num_from_encodings,
2559 				/* persistent */ 0, /* arg_num */ 3, /* allow_pass_encoding */ 0) == FAILURE) {
2560 			RETURN_THROWS();
2561 		}
2562 		free_from_encodings = 1;
2563 	} else {
2564 		from_encodings = &MBSTRG(current_internal_encoding);
2565 		num_from_encodings = 1;
2566 		free_from_encodings = 0;
2567 	}
2568 
2569 	if (num_from_encodings > 1) {
2570 		remove_non_encodings_from_elist(from_encodings, &num_from_encodings);
2571 	}
2572 
2573 	if (!num_from_encodings) {
2574 		efree(ZEND_VOIDP(from_encodings));
2575 		zend_argument_value_error(3, "must specify at least one encoding");
2576 		RETURN_THROWS();
2577 	}
2578 
2579 	if (input_str) {
2580 		/* new encoding */
2581 		size_t size;
2582 		char *ret = php_mb_convert_encoding(ZSTR_VAL(input_str), ZSTR_LEN(input_str),
2583 			to_encoding, from_encodings, num_from_encodings, &size);
2584 		if (ret != NULL) {
2585 			// TODO: avoid reallocation ???
2586 			RETVAL_STRINGL(ret, size);		/* the string is already strdup()'ed */
2587 			efree(ret);
2588 		} else {
2589 			RETVAL_FALSE;
2590 		}
2591 	} else {
2592 		HashTable *tmp;
2593 		tmp = php_mb_convert_encoding_recursive(
2594 			input_ht, to_encoding, from_encodings, num_from_encodings);
2595 		RETVAL_ARR(tmp);
2596 	}
2597 
2598 	if (free_from_encodings) {
2599 		efree(ZEND_VOIDP(from_encodings));
2600 	}
2601 }
2602 /* }}} */
2603 
mbstring_convert_case(int case_mode,const char * str,size_t str_len,size_t * ret_len,const mbfl_encoding * enc)2604 static char *mbstring_convert_case(
2605 		int case_mode, const char *str, size_t str_len, size_t *ret_len,
2606 		const mbfl_encoding *enc) {
2607 	return php_unicode_convert_case(
2608 		case_mode, str, str_len, ret_len, enc,
2609 		MBSTRG(current_filter_illegal_mode), MBSTRG(current_filter_illegal_substchar));
2610 }
2611 
2612 /* {{{ Returns a case-folded version of source_string */
PHP_FUNCTION(mb_convert_case)2613 PHP_FUNCTION(mb_convert_case)
2614 {
2615 	zend_string *from_encoding = NULL;
2616 	char *str;
2617 	size_t str_len, ret_len;
2618 	zend_long case_mode = 0;
2619 
2620 	ZEND_PARSE_PARAMETERS_START(2, 3)
2621 		Z_PARAM_STRING(str, str_len)
2622 		Z_PARAM_LONG(case_mode)
2623 		Z_PARAM_OPTIONAL
2624 		Z_PARAM_STR_OR_NULL(from_encoding)
2625 	ZEND_PARSE_PARAMETERS_END();
2626 
2627 	const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 3);
2628 	if (!enc) {
2629 		RETURN_THROWS();
2630 	}
2631 
2632 	if (case_mode < 0 || case_mode > PHP_UNICODE_CASE_MODE_MAX) {
2633 		zend_argument_value_error(2, "must be one of the MB_CASE_* constants");
2634 		RETURN_THROWS();
2635 	}
2636 
2637 	char *newstr = mbstring_convert_case(case_mode, str, str_len, &ret_len, enc);
2638 	/* If newstr is NULL something went wrong in mbfl and this is a bug */
2639 	ZEND_ASSERT(newstr != NULL);
2640 
2641 	// TODO: avoid reallocation ???
2642 	RETVAL_STRINGL(newstr, ret_len);
2643 	efree(newstr);
2644 }
2645 /* }}} */
2646 
2647 /* {{{ Returns a upper cased version of source_string */
PHP_FUNCTION(mb_strtoupper)2648 PHP_FUNCTION(mb_strtoupper)
2649 {
2650 	zend_string *from_encoding = NULL;
2651 	char *str;
2652 	size_t str_len, ret_len;
2653 
2654 	ZEND_PARSE_PARAMETERS_START(1, 2)
2655 		Z_PARAM_STRING(str, str_len)
2656 		Z_PARAM_OPTIONAL
2657 		Z_PARAM_STR_OR_NULL(from_encoding)
2658 	ZEND_PARSE_PARAMETERS_END();
2659 
2660 	const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 2);
2661 	if (!enc) {
2662 		RETURN_THROWS();
2663 	}
2664 
2665 	char *newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc);
2666 	/* If newstr is NULL something went wrong in mbfl and this is a bug */
2667 	ZEND_ASSERT(newstr != NULL);
2668 
2669 	// TODO: avoid reallocation ???
2670 	RETVAL_STRINGL(newstr, ret_len);
2671 	efree(newstr);
2672 }
2673 /* }}} */
2674 
2675 /* {{{ Returns a lower cased version of source_string */
PHP_FUNCTION(mb_strtolower)2676 PHP_FUNCTION(mb_strtolower)
2677 {
2678 	zend_string *from_encoding = NULL;
2679 	char *str;
2680 	size_t str_len;
2681 	char *newstr;
2682 	size_t ret_len;
2683 	const mbfl_encoding *enc;
2684 
2685 	ZEND_PARSE_PARAMETERS_START(1, 2)
2686 		Z_PARAM_STRING(str, str_len)
2687 		Z_PARAM_OPTIONAL
2688 		Z_PARAM_STR_OR_NULL(from_encoding)
2689 	ZEND_PARSE_PARAMETERS_END();
2690 
2691 	enc = php_mb_get_encoding(from_encoding, 2);
2692 	if (!enc) {
2693 		RETURN_THROWS();
2694 	}
2695 
2696 	newstr = mbstring_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc);
2697 	/* If newstr is NULL something went wrong in mbfl and this is a bug */
2698 	ZEND_ASSERT(newstr != NULL);
2699 
2700 	// TODO: avoid reallocation ???
2701 	RETVAL_STRINGL(newstr, ret_len);
2702 	efree(newstr);
2703 }
2704 /* }}} */
2705 
duplicate_elist(const mbfl_encoding ** elist,size_t size)2706 static const mbfl_encoding **duplicate_elist(const mbfl_encoding **elist, size_t size)
2707 {
2708 	const mbfl_encoding **new_elist = safe_emalloc(size, sizeof(mbfl_encoding*), 0);
2709 	memcpy(ZEND_VOIDP(new_elist), elist, size * sizeof(mbfl_encoding*));
2710 	return new_elist;
2711 }
2712 
2713 /* {{{ Encodings of the given string is returned (as a string) */
PHP_FUNCTION(mb_detect_encoding)2714 PHP_FUNCTION(mb_detect_encoding)
2715 {
2716 	char *str;
2717 	size_t str_len;
2718 	zend_string *encoding_str = NULL;
2719 	HashTable *encoding_ht = NULL;
2720 	bool strict = 0;
2721 
2722 	mbfl_string string;
2723 	const mbfl_encoding *ret;
2724 	const mbfl_encoding **elist;
2725 	size_t size;
2726 
2727 	ZEND_PARSE_PARAMETERS_START(1, 3)
2728 		Z_PARAM_STRING(str, str_len)
2729 		Z_PARAM_OPTIONAL
2730 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(encoding_ht, encoding_str)
2731 		Z_PARAM_BOOL(strict)
2732 	ZEND_PARSE_PARAMETERS_END();
2733 
2734 	/* make encoding list */
2735 	if (encoding_ht) {
2736 		if (FAILURE == php_mb_parse_encoding_array(encoding_ht, &elist, &size, 2)) {
2737 			RETURN_THROWS();
2738 		}
2739 	} else if (encoding_str) {
2740 		if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(encoding_str), ZSTR_LEN(encoding_str), &elist, &size, /* persistent */ 0, /* arg_num */ 2, /* allow_pass_encoding */ 0)) {
2741 			RETURN_THROWS();
2742 		}
2743 	} else {
2744 		elist = duplicate_elist(MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size));
2745 		size = MBSTRG(current_detect_order_list_size);
2746 	}
2747 
2748 	if (size == 0) {
2749 		efree(ZEND_VOIDP(elist));
2750 		zend_argument_value_error(2, "must specify at least one encoding");
2751 		RETURN_THROWS();
2752 	}
2753 
2754 	remove_non_encodings_from_elist(elist, &size);
2755 	if (size == 0) {
2756 		efree(ZEND_VOIDP(elist));
2757 		RETURN_FALSE;
2758 	}
2759 
2760 	if (ZEND_NUM_ARGS() < 3) {
2761 		strict = MBSTRG(strict_detection);
2762 	}
2763 
2764 	if (strict && size == 1) {
2765 		/* If there is only a single candidate encoding, mb_check_encoding is faster */
2766 		ret = (php_mb_check_encoding(str, str_len, *elist)) ? *elist : NULL;
2767 	} else {
2768 		mbfl_string_init(&string);
2769 		string.val = (unsigned char *)str;
2770 		string.len = str_len;
2771 		ret = mbfl_identify_encoding(&string, elist, size, strict);
2772 	}
2773 
2774 	efree(ZEND_VOIDP(elist));
2775 
2776 	if (ret == NULL) {
2777 		RETURN_FALSE;
2778 	}
2779 
2780 	RETVAL_STRING((char *)ret->name);
2781 }
2782 /* }}} */
2783 
2784 /* {{{ Returns an array of all supported entity encodings */
PHP_FUNCTION(mb_list_encodings)2785 PHP_FUNCTION(mb_list_encodings)
2786 {
2787 	ZEND_PARSE_PARAMETERS_NONE();
2788 
2789 	array_init(return_value);
2790 	for (const mbfl_encoding **encodings = mbfl_get_supported_encodings(); *encodings; encodings++) {
2791 		add_next_index_string(return_value, (*encodings)->name);
2792 	}
2793 }
2794 /* }}} */
2795 
2796 /* {{{ Returns an array of the aliases of a given encoding name */
PHP_FUNCTION(mb_encoding_aliases)2797 PHP_FUNCTION(mb_encoding_aliases)
2798 {
2799 	const mbfl_encoding *encoding;
2800 	zend_string *encoding_name = NULL;
2801 
2802 	ZEND_PARSE_PARAMETERS_START(1, 1)
2803 		Z_PARAM_STR(encoding_name)
2804 	ZEND_PARSE_PARAMETERS_END();
2805 
2806 	encoding = php_mb_get_encoding(encoding_name, 1);
2807 	if (!encoding) {
2808 		RETURN_THROWS();
2809 	}
2810 
2811 	array_init(return_value);
2812 	if (encoding->aliases != NULL) {
2813 		for (const char **alias = encoding->aliases; *alias; ++alias) {
2814 			add_next_index_string(return_value, (char *)*alias);
2815 		}
2816 	}
2817 }
2818 /* }}} */
2819 
2820 /* {{{ Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
PHP_FUNCTION(mb_encode_mimeheader)2821 PHP_FUNCTION(mb_encode_mimeheader)
2822 {
2823 	const mbfl_encoding *charset, *transenc;
2824 	mbfl_string  string, result, *ret;
2825 	zend_string *charset_name = NULL;
2826 	char *trans_enc_name = NULL, *string_val;
2827 	size_t trans_enc_name_len;
2828 	char *linefeed = "\r\n";
2829 	size_t linefeed_len;
2830 	zend_long indent = 0;
2831 
2832 	string.encoding = MBSTRG(current_internal_encoding);
2833 
2834 	ZEND_PARSE_PARAMETERS_START(1, 5)
2835 		Z_PARAM_STRING(string_val, string.len)
2836 		Z_PARAM_OPTIONAL
2837 		Z_PARAM_STR(charset_name)
2838 		Z_PARAM_STRING(trans_enc_name, trans_enc_name_len)
2839 		Z_PARAM_STRING(linefeed, linefeed_len)
2840 		Z_PARAM_LONG(indent)
2841 	ZEND_PARSE_PARAMETERS_END();
2842 
2843 	string.val = (unsigned char*)string_val;
2844 	charset = &mbfl_encoding_pass;
2845 	transenc = &mbfl_encoding_base64;
2846 
2847 	if (charset_name != NULL) {
2848 		charset = php_mb_get_encoding(charset_name, 2);
2849 		if (!charset) {
2850 			RETURN_THROWS();
2851 		} else if (charset->mime_name == NULL || charset->mime_name[0] == '\0') {
2852 			zend_argument_value_error(2, "\"%s\" cannot be used for MIME header encoding", ZSTR_VAL(charset_name));
2853 			RETURN_THROWS();
2854 		}
2855 	} else {
2856 		const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
2857 		if (lang != NULL) {
2858 			charset = mbfl_no2encoding(lang->mail_charset);
2859 			transenc = mbfl_no2encoding(lang->mail_header_encoding);
2860 		}
2861 	}
2862 
2863 	if (trans_enc_name != NULL) {
2864 		if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
2865 			transenc = &mbfl_encoding_base64;
2866 		} else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
2867 			transenc = &mbfl_encoding_qprint;
2868 		}
2869 	}
2870 
2871 	mbfl_string_init(&result);
2872 	ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
2873 	ZEND_ASSERT(ret != NULL);
2874 	// TODO: avoid reallocation ???
2875 	RETVAL_STRINGL((char *)ret->val, ret->len);	/* the string is already strdup()'ed */
2876 	efree(ret->val);
2877 }
2878 /* }}} */
2879 
2880 /* {{{ Decodes the MIME "encoded-word" in the string */
PHP_FUNCTION(mb_decode_mimeheader)2881 PHP_FUNCTION(mb_decode_mimeheader)
2882 {
2883 	char *string_val;
2884 	mbfl_string string, result, *ret;
2885 
2886 	string.encoding = MBSTRG(current_internal_encoding);
2887 
2888 	ZEND_PARSE_PARAMETERS_START(1, 1)
2889 		Z_PARAM_STRING(string_val, string.len)
2890 	ZEND_PARSE_PARAMETERS_END();
2891 
2892 	string.val = (unsigned char*)string_val;
2893 	mbfl_string_init(&result);
2894 	ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
2895 	ZEND_ASSERT(ret != NULL);
2896 	// TODO: avoid reallocation ???
2897 	RETVAL_STRINGL((char *)ret->val, ret->len);	/* the string is already strdup()'ed */
2898 	efree(ret->val);
2899 }
2900 /* }}} */
2901 
2902 /* {{{ Conversion between full-width character and half-width character (Japanese) */
PHP_FUNCTION(mb_convert_kana)2903 PHP_FUNCTION(mb_convert_kana)
2904 {
2905 	int opt;
2906 	mbfl_string string, result, *ret;
2907 	char *optstr = NULL, *string_val;
2908 	size_t optstr_len;
2909 	zend_string *encname = NULL;
2910 
2911 	ZEND_PARSE_PARAMETERS_START(1, 3)
2912 		Z_PARAM_STRING(string_val, string.len)
2913 		Z_PARAM_OPTIONAL
2914 		Z_PARAM_STRING(optstr, optstr_len)
2915 		Z_PARAM_STR_OR_NULL(encname)
2916 	ZEND_PARSE_PARAMETERS_END();
2917 
2918 	string.val = (unsigned char*)string_val;
2919 
2920 	/* "Zen" is 全, or "full"; "Han" is 半, or "half"
2921 	 * This refers to "fullwidth" or "halfwidth" variants of characters used for writing Japanese */
2922 	if (optstr != NULL) {
2923 		char *p = optstr, *e = p + optstr_len;
2924 		opt = 0;
2925 		while (p < e) {
2926 			switch (*p++) {
2927 			case 'A':
2928 				opt |= MBFL_FILT_TL_HAN2ZEN_ALL;
2929 				break;
2930 			case 'a':
2931 				opt |= MBFL_FILT_TL_ZEN2HAN_ALL;
2932 				break;
2933 			case 'R':
2934 				opt |= MBFL_FILT_TL_HAN2ZEN_ALPHA;
2935 				break;
2936 			case 'r':
2937 				opt |= MBFL_FILT_TL_ZEN2HAN_ALPHA;
2938 				break;
2939 			case 'N':
2940 				opt |= MBFL_FILT_TL_HAN2ZEN_NUMERIC;
2941 				break;
2942 			case 'n':
2943 				opt |= MBFL_FILT_TL_ZEN2HAN_NUMERIC;
2944 				break;
2945 			case 'S':
2946 				opt |= MBFL_FILT_TL_HAN2ZEN_SPACE;
2947 				break;
2948 			case 's':
2949 				opt |= MBFL_FILT_TL_ZEN2HAN_SPACE;
2950 				break;
2951 			case 'K':
2952 				opt |= MBFL_FILT_TL_HAN2ZEN_KATAKANA;
2953 				break;
2954 			case 'k':
2955 				opt |= MBFL_FILT_TL_ZEN2HAN_KATAKANA;
2956 				break;
2957 			case 'H':
2958 				opt |= MBFL_FILT_TL_HAN2ZEN_HIRAGANA;
2959 				break;
2960 			case 'h':
2961 				opt |= MBFL_FILT_TL_ZEN2HAN_HIRAGANA;
2962 				break;
2963 			case 'V':
2964 				opt |= MBFL_FILT_TL_HAN2ZEN_GLUE;
2965 				break;
2966 			case 'C':
2967 				opt |= MBFL_FILT_TL_ZEN2HAN_HIRA2KANA;
2968 				break;
2969 			case 'c':
2970 				opt |= MBFL_FILT_TL_ZEN2HAN_KANA2HIRA;
2971 				break;
2972 			case 'M':
2973 				/* TODO: figure out what 'M' and 'm' are for, and rename the constant
2974 				 * to something meaningful */
2975 				opt |= MBFL_FILT_TL_HAN2ZEN_COMPAT1;
2976 				break;
2977 			case 'm':
2978 				opt |= MBFL_FILT_TL_ZEN2HAN_COMPAT1;
2979 				break;
2980 			}
2981 		}
2982 	} else {
2983 		opt = MBFL_FILT_TL_HAN2ZEN_KATAKANA | MBFL_FILT_TL_HAN2ZEN_GLUE;
2984 	}
2985 
2986 	/* encoding */
2987 	string.encoding = php_mb_get_encoding(encname, 3);
2988 	if (!string.encoding) {
2989 		RETURN_THROWS();
2990 	}
2991 
2992 	ret = mbfl_ja_jp_hantozen(&string, &result, opt);
2993 	ZEND_ASSERT(ret != NULL);
2994 	// TODO: avoid reallocation ???
2995 	RETVAL_STRINGL((char *)ret->val, ret->len);		/* the string is already strdup()'ed */
2996 	efree(ret->val);
2997 }
2998 /* }}} */
2999 
mb_recursive_encoder_detector_feed(mbfl_encoding_detector * identd,zval * var,int * recursion_error)3000 static int mb_recursive_encoder_detector_feed(mbfl_encoding_detector *identd, zval *var, int *recursion_error) /* {{{ */
3001 {
3002 	mbfl_string string;
3003 	HashTable *ht;
3004 	zval *entry;
3005 
3006 	ZVAL_DEREF(var);
3007 	if (Z_TYPE_P(var) == IS_STRING) {
3008 		string.val = (unsigned char *)Z_STRVAL_P(var);
3009 		string.len = Z_STRLEN_P(var);
3010 		if (mbfl_encoding_detector_feed(identd, &string)) {
3011 			return 1; /* complete detecting */
3012 		}
3013 	} else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3014 		if (Z_REFCOUNTED_P(var)) {
3015 			if (Z_IS_RECURSIVE_P(var)) {
3016 				*recursion_error = 1;
3017 				return 0;
3018 			}
3019 			Z_PROTECT_RECURSION_P(var);
3020 		}
3021 
3022 		ht = HASH_OF(var);
3023 		if (ht != NULL) {
3024 			ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
3025 				if (mb_recursive_encoder_detector_feed(identd, entry, recursion_error)) {
3026 					if (Z_REFCOUNTED_P(var)) {
3027 						Z_UNPROTECT_RECURSION_P(var);
3028 					}
3029 					return 1;
3030 				} else if (*recursion_error) {
3031 					if (Z_REFCOUNTED_P(var)) {
3032 						Z_UNPROTECT_RECURSION_P(var);
3033 					}
3034 					return 0;
3035 				}
3036 			} ZEND_HASH_FOREACH_END();
3037 		}
3038 
3039 		if (Z_REFCOUNTED_P(var)) {
3040 			Z_UNPROTECT_RECURSION_P(var);
3041 		}
3042 	}
3043 	return 0;
3044 } /* }}} */
3045 
mb_recursive_convert_variable(mbfl_buffer_converter * convd,zval * var)3046 static int mb_recursive_convert_variable(mbfl_buffer_converter *convd, zval *var) /* {{{ */
3047 {
3048 	mbfl_string string, result, *ret;
3049 	HashTable *ht;
3050 	zval *entry, *orig_var;
3051 
3052 	orig_var = var;
3053 	ZVAL_DEREF(var);
3054 	if (Z_TYPE_P(var) == IS_STRING) {
3055 		string.val = (unsigned char *)Z_STRVAL_P(var);
3056 		string.len = Z_STRLEN_P(var);
3057 		ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3058 		if (ret != NULL) {
3059 			zval_ptr_dtor(orig_var);
3060 			// TODO: avoid reallocation ???
3061 			ZVAL_STRINGL(orig_var, (char *)ret->val, ret->len);
3062 			efree(ret->val);
3063 		}
3064 	} else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3065 		if (Z_TYPE_P(var) == IS_ARRAY) {
3066 			SEPARATE_ARRAY(var);
3067 		}
3068 		if (Z_REFCOUNTED_P(var)) {
3069 			if (Z_IS_RECURSIVE_P(var)) {
3070 				return 1;
3071 			}
3072 			Z_PROTECT_RECURSION_P(var);
3073 		}
3074 
3075 		ht = HASH_OF(var);
3076 		if (ht != NULL) {
3077 			ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
3078 				if (mb_recursive_convert_variable(convd, entry)) {
3079 					if (Z_REFCOUNTED_P(var)) {
3080 						Z_UNPROTECT_RECURSION_P(var);
3081 					}
3082 					return 1;
3083 				}
3084 			} ZEND_HASH_FOREACH_END();
3085 		}
3086 
3087 		if (Z_REFCOUNTED_P(var)) {
3088 			Z_UNPROTECT_RECURSION_P(var);
3089 		}
3090 	}
3091 	return 0;
3092 } /* }}} */
3093 
3094 /* {{{ Converts the string resource in variables to desired encoding */
PHP_FUNCTION(mb_convert_variables)3095 PHP_FUNCTION(mb_convert_variables)
3096 {
3097 	zval *args;
3098 	zend_string *to_enc_str;
3099 	zend_string *from_enc_str;
3100 	HashTable *from_enc_ht;
3101 	mbfl_string string, result;
3102 	const mbfl_encoding *from_encoding, *to_encoding;
3103 	mbfl_encoding_detector *identd;
3104 	mbfl_buffer_converter *convd;
3105 	int n, argc;
3106 	size_t elistsz;
3107 	const mbfl_encoding **elist;
3108 	int recursion_error = 0;
3109 
3110 	ZEND_PARSE_PARAMETERS_START(3, -1)
3111 		Z_PARAM_STR(to_enc_str)
3112 		Z_PARAM_ARRAY_HT_OR_STR(from_enc_ht, from_enc_str)
3113 		Z_PARAM_VARIADIC('+', args, argc)
3114 	ZEND_PARSE_PARAMETERS_END();
3115 
3116 	/* new encoding */
3117 	to_encoding = php_mb_get_encoding(to_enc_str, 1);
3118 	if (!to_encoding) {
3119 		RETURN_THROWS();
3120 	}
3121 
3122 	/* initialize string */
3123 	from_encoding = MBSTRG(current_internal_encoding);
3124 	mbfl_string_init_set(&string, from_encoding);
3125 	mbfl_string_init(&result);
3126 
3127 	/* pre-conversion encoding */
3128 	if (from_enc_ht) {
3129 		if (php_mb_parse_encoding_array(from_enc_ht, &elist, &elistsz, 2) == FAILURE) {
3130 			RETURN_THROWS();
3131 		}
3132 	} else {
3133 		if (php_mb_parse_encoding_list(ZSTR_VAL(from_enc_str), ZSTR_LEN(from_enc_str), &elist, &elistsz, /* persistent */ 0, /* arg_num */ 2, /* allow_pass_encoding */ 0) == FAILURE) {
3134 			RETURN_THROWS();
3135 		}
3136 	}
3137 
3138 	if (elistsz == 0) {
3139 		efree(ZEND_VOIDP(elist));
3140 		zend_argument_value_error(2, "must specify at least one encoding");
3141 		RETURN_THROWS();
3142 	}
3143 
3144 	if (elistsz == 1) {
3145 		from_encoding = *elist;
3146 	} else {
3147 		/* auto detect */
3148 		from_encoding = NULL;
3149 		identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
3150 		if (identd != NULL) {
3151 			n = 0;
3152 			while (n < argc) {
3153 				if (mb_recursive_encoder_detector_feed(identd, &args[n], &recursion_error)) {
3154 					break;
3155 				}
3156 				n++;
3157 			}
3158 			from_encoding = mbfl_encoding_detector_judge(identd);
3159 			mbfl_encoding_detector_delete(identd);
3160 			if (recursion_error) {
3161 				efree(ZEND_VOIDP(elist));
3162 				php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
3163 				RETURN_FALSE;
3164 			}
3165 		}
3166 
3167 		if (!from_encoding) {
3168 			php_error_docref(NULL, E_WARNING, "Unable to detect encoding");
3169 			efree(ZEND_VOIDP(elist));
3170 			RETURN_FALSE;
3171 		}
3172 	}
3173 
3174 	efree(ZEND_VOIDP(elist));
3175 
3176 	convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
3177 	/* If this assertion fails this means some memory allocation failure which is a bug */
3178 	ZEND_ASSERT(convd != NULL);
3179 
3180 	mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3181 	mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3182 
3183 	/* convert */
3184 	n = 0;
3185 	while (n < argc) {
3186 		zval *zv = &args[n];
3187 
3188 		ZVAL_DEREF(zv);
3189 		recursion_error = mb_recursive_convert_variable(convd, zv);
3190 		if (recursion_error) {
3191 			break;
3192 		}
3193 		n++;
3194 	}
3195 
3196 	MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3197 	mbfl_buffer_converter_delete(convd);
3198 
3199 	if (recursion_error) {
3200 		php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
3201 		RETURN_FALSE;
3202 	}
3203 
3204 	RETURN_STRING(from_encoding->name);
3205 }
3206 /* }}} */
3207 
3208 /* HTML numeric entities */
3209 
3210 /* Convert PHP array to data structure required by mbfl_html_numeric_entity */
make_conversion_map(HashTable * target_hash,int * convmap_size)3211 static int *make_conversion_map(HashTable *target_hash, int *convmap_size)
3212 {
3213 	zval *hash_entry;
3214 
3215 	int n_elems = zend_hash_num_elements(target_hash);
3216 	if (n_elems % 4 != 0) {
3217 		zend_argument_value_error(2, "must have a multiple of 4 elements");
3218 		return NULL;
3219 	}
3220 
3221 	int *convmap = (int *)safe_emalloc(n_elems, sizeof(int), 0);
3222 	int *mapelm = convmap;
3223 
3224 	ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
3225 		*mapelm++ = zval_get_long(hash_entry);
3226 	} ZEND_HASH_FOREACH_END();
3227 
3228 	*convmap_size = n_elems / 4;
3229 	return convmap;
3230 }
3231 
3232 /* {{{ Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)3233 PHP_FUNCTION(mb_encode_numericentity)
3234 {
3235 	char *str = NULL;
3236 	zend_string *encoding = NULL;
3237 	int mapsize;
3238 	HashTable *target_hash;
3239 	bool is_hex = 0;
3240 	mbfl_string string, result, *ret;
3241 
3242 	ZEND_PARSE_PARAMETERS_START(2, 4)
3243 		Z_PARAM_STRING(str, string.len)
3244 		Z_PARAM_ARRAY_HT(target_hash)
3245 		Z_PARAM_OPTIONAL
3246 		Z_PARAM_STR_OR_NULL(encoding)
3247 		Z_PARAM_BOOL(is_hex)
3248 	ZEND_PARSE_PARAMETERS_END();
3249 
3250 	string.val = (unsigned char *)str;
3251 	string.encoding = php_mb_get_encoding(encoding, 3);
3252 	if (!string.encoding) {
3253 		RETURN_THROWS();
3254 	}
3255 
3256 	int *convmap = make_conversion_map(target_hash, &mapsize);
3257 	if (convmap == NULL) {
3258 		RETURN_THROWS();
3259 	}
3260 
3261 	ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, is_hex ? 2 : 0);
3262 	ZEND_ASSERT(ret != NULL);
3263 	// TODO: avoid reallocation ???
3264 	RETVAL_STRINGL((char *)ret->val, ret->len);
3265 	efree(ret->val);
3266 	efree(convmap);
3267 }
3268 /* }}} */
3269 
3270 /* {{{ Converts HTML numeric entities to character code */
PHP_FUNCTION(mb_decode_numericentity)3271 PHP_FUNCTION(mb_decode_numericentity)
3272 {
3273 	char *str = NULL;
3274 	zend_string *encoding = NULL;
3275 	int mapsize;
3276 	HashTable *target_hash;
3277 	mbfl_string string, result, *ret;
3278 
3279 	ZEND_PARSE_PARAMETERS_START(2, 3)
3280 		Z_PARAM_STRING(str, string.len)
3281 		Z_PARAM_ARRAY_HT(target_hash)
3282 		Z_PARAM_OPTIONAL
3283 		Z_PARAM_STR_OR_NULL(encoding)
3284 	ZEND_PARSE_PARAMETERS_END();
3285 
3286 	string.val = (unsigned char *)str;
3287 	string.encoding = php_mb_get_encoding(encoding, 3);
3288 	if (!string.encoding) {
3289 		RETURN_THROWS();
3290 	}
3291 
3292 	int *convmap = make_conversion_map(target_hash, &mapsize);
3293 	if (convmap == NULL) {
3294 		RETURN_THROWS();
3295 	}
3296 
3297 	ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, 1);
3298 	ZEND_ASSERT(ret != NULL);
3299 	// TODO: avoid reallocation ???
3300 	RETVAL_STRINGL((char *)ret->val, ret->len);
3301 	efree(ret->val);
3302 	efree((void *)convmap);
3303 }
3304 /* }}} */
3305 
3306 /* {{{ Sends an email message with MIME scheme */
3307 
3308 #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos)										\
3309 	if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) {	\
3310 		pos += 2;											\
3311 		while (str[pos + 1] == ' ' || str[pos + 1] == '\t') {							\
3312 			pos++;											\
3313 		}												\
3314 		continue;											\
3315 	}
3316 
3317 #define CRLF "\r\n"
3318 
_php_mbstr_parse_mail_headers(HashTable * ht,const char * str,size_t str_len)3319 static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
3320 {
3321 	const char *ps;
3322 	size_t icnt;
3323 	int state = 0;
3324 	int crlf_state = -1;
3325 	char *token = NULL;
3326 	size_t token_pos = 0;
3327 	zend_string *fld_name, *fld_val;
3328 
3329 	ps = str;
3330 	icnt = str_len;
3331 	fld_name = fld_val = NULL;
3332 
3333 	/*
3334 	 *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
3335 	 *             ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
3336 	 *      state  0            1           2          3
3337 	 *
3338 	 *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
3339 	 *             ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
3340 	 * crlf_state -1                       0                     1 -1
3341 	 *
3342 	 */
3343 
3344 	while (icnt > 0) {
3345 		switch (*ps) {
3346 			case ':':
3347 				if (crlf_state == 1) {
3348 					token_pos++;
3349 				}
3350 
3351 				if (state == 0 || state == 1) {
3352 					if(token && token_pos > 0) {
3353 						fld_name = zend_string_init(token, token_pos, 0);
3354 					}
3355 					state = 2;
3356 				} else {
3357 					token_pos++;
3358 				}
3359 
3360 				crlf_state = 0;
3361 				break;
3362 
3363 			case '\n':
3364 				if (crlf_state == -1) {
3365 					goto out;
3366 				}
3367 				crlf_state = -1;
3368 				break;
3369 
3370 			case '\r':
3371 				if (crlf_state == 1) {
3372 					token_pos++;
3373 				} else {
3374 					crlf_state = 1;
3375 				}
3376 				break;
3377 
3378 			case ' ': case '\t':
3379 				if (crlf_state == -1) {
3380 					if (state == 3) {
3381 						/* continuing from the previous line */
3382 						state = 4;
3383 					} else {
3384 						/* simply skipping this new line */
3385 						state = 5;
3386 					}
3387 				} else {
3388 					if (crlf_state == 1) {
3389 						token_pos++;
3390 					}
3391 					if (state == 1 || state == 3) {
3392 						token_pos++;
3393 					}
3394 				}
3395 				crlf_state = 0;
3396 				break;
3397 
3398 			default:
3399 				switch (state) {
3400 					case 0:
3401 						token = (char*)ps;
3402 						token_pos = 0;
3403 						state = 1;
3404 						break;
3405 
3406 					case 2:
3407 						if (crlf_state != -1) {
3408 							token = (char*)ps;
3409 							token_pos = 0;
3410 
3411 							state = 3;
3412 							break;
3413 						}
3414 						ZEND_FALLTHROUGH;
3415 
3416 					case 3:
3417 						if (crlf_state == -1) {
3418 							if(token && token_pos > 0) {
3419 								fld_val = zend_string_init(token, token_pos, 0);
3420 							}
3421 
3422 							if (fld_name != NULL && fld_val != NULL) {
3423 								zval val;
3424 								zend_str_tolower(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
3425 								ZVAL_STR(&val, fld_val);
3426 
3427 								zend_hash_update(ht, fld_name, &val);
3428 
3429 								zend_string_release_ex(fld_name, 0);
3430 							}
3431 
3432 							fld_name = fld_val = NULL;
3433 							token = (char*)ps;
3434 							token_pos = 0;
3435 
3436 							state = 1;
3437 						}
3438 						break;
3439 
3440 					case 4:
3441 						token_pos++;
3442 						state = 3;
3443 						break;
3444 				}
3445 
3446 				if (crlf_state == 1) {
3447 					token_pos++;
3448 				}
3449 
3450 				token_pos++;
3451 
3452 				crlf_state = 0;
3453 				break;
3454 		}
3455 		ps++, icnt--;
3456 	}
3457 out:
3458 	if (state == 2) {
3459 		token = "";
3460 		token_pos = 0;
3461 
3462 		state = 3;
3463 	}
3464 	if (state == 3) {
3465 		if(token && token_pos > 0) {
3466 			fld_val = zend_string_init(token, token_pos, 0);
3467 		}
3468 		if (fld_name != NULL && fld_val != NULL) {
3469 			zval val;
3470 			zend_str_tolower(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
3471 			ZVAL_STR(&val, fld_val);
3472 			zend_hash_update(ht, fld_name, &val);
3473 
3474 			zend_string_release_ex(fld_name, 0);
3475 		}
3476 	}
3477 	return state;
3478 }
3479 
PHP_FUNCTION(mb_send_mail)3480 PHP_FUNCTION(mb_send_mail)
3481 {
3482 	char *to;
3483 	size_t to_len;
3484 	char *message;
3485 	size_t message_len;
3486 	char *subject;
3487 	size_t subject_len;
3488 	zend_string *extra_cmd = NULL;
3489 	HashTable *headers_ht = NULL;
3490 	zend_string *str_headers = NULL;
3491 	size_t n, i;
3492 	char *to_r = NULL;
3493 	char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
3494 	struct {
3495 		int cnt_type:1;
3496 		int cnt_trans_enc:1;
3497 	} suppressed_hdrs = { 0, 0 };
3498 
3499 	char *message_buf = NULL, *subject_buf = NULL, *p;
3500 	mbfl_string orig_str, conv_str;
3501 	mbfl_string *pstr;	/* pointer to mbfl string for return value */
3502 	enum mbfl_no_encoding;
3503 	const mbfl_encoding *tran_cs,	/* transfer text charset */
3504 						*head_enc,	/* header transfer encoding */
3505 						*body_enc;	/* body transfer encoding */
3506 	mbfl_memory_device device;	/* automatic allocateable buffer for additional header */
3507 	const mbfl_language *lang;
3508 	int err = 0;
3509 	HashTable ht_headers;
3510 	zval *s;
3511 	extern void mbfl_memory_device_unput(mbfl_memory_device *device);
3512 
3513 	/* initialize */
3514 	mbfl_memory_device_init(&device, 0, 0);
3515 	mbfl_string_init(&orig_str);
3516 	mbfl_string_init(&conv_str);
3517 
3518 	/* character-set, transfer-encoding */
3519 	tran_cs = &mbfl_encoding_utf8;
3520 	head_enc = &mbfl_encoding_base64;
3521 	body_enc = &mbfl_encoding_base64;
3522 	lang = mbfl_no2language(MBSTRG(language));
3523 	if (lang != NULL) {
3524 		tran_cs = mbfl_no2encoding(lang->mail_charset);
3525 		head_enc = mbfl_no2encoding(lang->mail_header_encoding);
3526 		body_enc = mbfl_no2encoding(lang->mail_body_encoding);
3527 	}
3528 
3529 	ZEND_PARSE_PARAMETERS_START(3, 5)
3530 		Z_PARAM_PATH(to, to_len)
3531 		Z_PARAM_PATH(subject, subject_len)
3532 		Z_PARAM_PATH(message, message_len)
3533 		Z_PARAM_OPTIONAL
3534 		Z_PARAM_ARRAY_HT_OR_STR(headers_ht, str_headers)
3535 		Z_PARAM_PATH_STR_OR_NULL(extra_cmd)
3536 	ZEND_PARSE_PARAMETERS_END();
3537 
3538 	if (str_headers) {
3539 		if (strlen(ZSTR_VAL(str_headers)) != ZSTR_LEN(str_headers)) {
3540 			zend_argument_value_error(4, "must not contain any null bytes");
3541 			RETURN_THROWS();
3542 		}
3543 		str_headers = php_trim(str_headers, NULL, 0, 2);
3544 	} else if (headers_ht) {
3545 		str_headers = php_mail_build_headers(headers_ht);
3546 		if (EG(exception)) {
3547 			RETURN_THROWS();
3548 		}
3549 	}
3550 
3551 	zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0);
3552 
3553 	if (str_headers != NULL) {
3554 		_php_mbstr_parse_mail_headers(&ht_headers, ZSTR_VAL(str_headers), ZSTR_LEN(str_headers));
3555 	}
3556 
3557 	if ((s = zend_hash_str_find(&ht_headers, "content-type", sizeof("content-type") - 1))) {
3558 		char *tmp;
3559 		char *param_name;
3560 		char *charset = NULL;
3561 
3562 		ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
3563 		p = strchr(Z_STRVAL_P(s), ';');
3564 
3565 		if (p != NULL) {
3566 			/* skipping the padded spaces */
3567 			do {
3568 				++p;
3569 			} while (*p == ' ' || *p == '\t');
3570 
3571 			if (*p != '\0') {
3572 				if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
3573 					if (strcasecmp(param_name, "charset") == 0) {
3574 						const mbfl_encoding *_tran_cs = tran_cs;
3575 
3576 						charset = php_strtok_r(NULL, "= \"", &tmp);
3577 						if (charset != NULL) {
3578 							_tran_cs = mbfl_name2encoding(charset);
3579 						}
3580 
3581 						if (!_tran_cs) {
3582 							php_error_docref(NULL, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
3583 							_tran_cs = &mbfl_encoding_ascii;
3584 						}
3585 						tran_cs = _tran_cs;
3586 					}
3587 				}
3588 			}
3589 		}
3590 		suppressed_hdrs.cnt_type = 1;
3591 	}
3592 
3593 	if ((s = zend_hash_str_find(&ht_headers, "content-transfer-encoding", sizeof("content-transfer-encoding") - 1))) {
3594 		const mbfl_encoding *_body_enc;
3595 
3596 		ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
3597 		_body_enc = mbfl_name2encoding(Z_STRVAL_P(s));
3598 		switch (_body_enc ? _body_enc->no_encoding : mbfl_no_encoding_invalid) {
3599 			case mbfl_no_encoding_base64:
3600 			case mbfl_no_encoding_7bit:
3601 			case mbfl_no_encoding_8bit:
3602 				body_enc = _body_enc;
3603 				break;
3604 
3605 			default:
3606 				php_error_docref(NULL, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", Z_STRVAL_P(s));
3607 				body_enc =	&mbfl_encoding_8bit;
3608 				break;
3609 		}
3610 		suppressed_hdrs.cnt_trans_enc = 1;
3611 	}
3612 
3613 	/* To: */
3614 	if (to_len > 0) {
3615 		to_r = estrndup(to, to_len);
3616 		for (; to_len; to_len--) {
3617 			if (!isspace((unsigned char) to_r[to_len - 1])) {
3618 				break;
3619 			}
3620 			to_r[to_len - 1] = '\0';
3621 		}
3622 		for (i = 0; to_r[i]; i++) {
3623 		if (iscntrl((unsigned char) to_r[i])) {
3624 			/* According to RFC 822, section 3.1.1 long headers may be separated into
3625 			 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
3626 			 * To prevent these separators from being replaced with a space, we use the
3627 			 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
3628 			 */
3629 			SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
3630 			to_r[i] = ' ';
3631 		}
3632 		}
3633 	} else {
3634 		to_r = to;
3635 	}
3636 
3637 	/* Subject: */
3638 	orig_str.val = (unsigned char *)subject;
3639 	orig_str.len = subject_len;
3640 	orig_str.encoding = MBSTRG(current_internal_encoding);
3641 	if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid
3642 			|| orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
3643 		orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
3644 	}
3645 	pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, CRLF, sizeof("Subject: [PHP-jp nnnnnnnn]" CRLF) - 1);
3646 	if (pstr != NULL) {
3647 		subject_buf = subject = (char *)pstr->val;
3648 	}
3649 
3650 	/* message body */
3651 	orig_str.val = (unsigned char *)message;
3652 	orig_str.len = message_len;
3653 	orig_str.encoding = MBSTRG(current_internal_encoding);
3654 
3655 	if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid
3656 			|| orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
3657 		orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
3658 	}
3659 
3660 	pstr = NULL;
3661 	{
3662 		mbfl_string tmpstr;
3663 
3664 		if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
3665 			tmpstr.encoding = &mbfl_encoding_8bit;
3666 			pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
3667 			efree(tmpstr.val);
3668 		}
3669 	}
3670 	if (pstr != NULL) {
3671 		message_buf = message = (char *)pstr->val;
3672 	}
3673 
3674 	/* other headers */
3675 #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
3676 #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
3677 #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
3678 #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
3679 	if (str_headers != NULL) {
3680 		p = ZSTR_VAL(str_headers);
3681 		n = ZSTR_LEN(str_headers);
3682 		mbfl_memory_device_strncat(&device, p, n);
3683 		if (n > 0 && p[n - 1] != '\n') {
3684 			mbfl_memory_device_strncat(&device, CRLF, sizeof(CRLF)-1);
3685 		}
3686 		zend_string_release_ex(str_headers, 0);
3687 	}
3688 
3689 	if (!zend_hash_str_exists(&ht_headers, "mime-version", sizeof("mime-version") - 1)) {
3690 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
3691 		mbfl_memory_device_strncat(&device, CRLF, sizeof(CRLF)-1);
3692 	}
3693 
3694 	if (!suppressed_hdrs.cnt_type) {
3695 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
3696 
3697 		p = (char *)mbfl_encoding_preferred_mime_name(tran_cs);
3698 		if (p != NULL) {
3699 			mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
3700 			mbfl_memory_device_strcat(&device, p);
3701 		}
3702 		mbfl_memory_device_strncat(&device, CRLF, sizeof(CRLF)-1);
3703 	}
3704 	if (!suppressed_hdrs.cnt_trans_enc) {
3705 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
3706 		p = (char *)mbfl_encoding_preferred_mime_name(body_enc);
3707 		if (p == NULL) {
3708 			p = "7bit";
3709 		}
3710 		mbfl_memory_device_strcat(&device, p);
3711 		mbfl_memory_device_strncat(&device, CRLF, sizeof(CRLF)-1);
3712 	}
3713 
3714 	mbfl_memory_device_unput(&device);
3715 	mbfl_memory_device_unput(&device);
3716 	mbfl_memory_device_output('\0', &device);
3717 	str_headers = zend_string_init((char *)device.buffer, strlen((char *)device.buffer), 0);
3718 
3719 	if (force_extra_parameters) {
3720 		extra_cmd = php_escape_shell_cmd(force_extra_parameters);
3721 	} else if (extra_cmd) {
3722 		extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd));
3723 	}
3724 
3725 	if (!err && php_mail(to_r, subject, message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
3726 		RETVAL_TRUE;
3727 	} else {
3728 		RETVAL_FALSE;
3729 	}
3730 
3731 	if (extra_cmd) {
3732 		zend_string_release_ex(extra_cmd, 0);
3733 	}
3734 
3735 	if (to_r != to) {
3736 		efree(to_r);
3737 	}
3738 	if (subject_buf) {
3739 		efree((void *)subject_buf);
3740 	}
3741 	if (message_buf) {
3742 		efree((void *)message_buf);
3743 	}
3744 	mbfl_memory_device_clear(&device);
3745 	zend_hash_destroy(&ht_headers);
3746 	if (str_headers) {
3747 		zend_string_release_ex(str_headers, 0);
3748 	}
3749 }
3750 
3751 #undef SKIP_LONG_HEADER_SEP_MBSTRING
3752 #undef CRLF
3753 #undef MAIL_ASCIIZ_CHECK_MBSTRING
3754 #undef PHP_MBSTR_MAIL_MIME_HEADER1
3755 #undef PHP_MBSTR_MAIL_MIME_HEADER2
3756 #undef PHP_MBSTR_MAIL_MIME_HEADER3
3757 #undef PHP_MBSTR_MAIL_MIME_HEADER4
3758 /* }}} */
3759 
3760 /* {{{ Returns the current settings of mbstring */
PHP_FUNCTION(mb_get_info)3761 PHP_FUNCTION(mb_get_info)
3762 {
3763 	zend_string *type = NULL;
3764 	size_t n;
3765 	char *name;
3766 	zval row;
3767 	const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3768 	const mbfl_encoding **entry;
3769 
3770 	ZEND_PARSE_PARAMETERS_START(0, 1)
3771 		Z_PARAM_OPTIONAL
3772 		Z_PARAM_STR(type)
3773 	ZEND_PARSE_PARAMETERS_END();
3774 
3775 	if (!type || zend_string_equals_literal_ci(type, "all")) {
3776 		array_init(return_value);
3777 		if (MBSTRG(current_internal_encoding)) {
3778 			add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name);
3779 		}
3780 		if (MBSTRG(http_input_identify)) {
3781 			add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name);
3782 		}
3783 		if (MBSTRG(current_http_output_encoding)) {
3784 			add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name);
3785 		}
3786 		if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
3787 			add_assoc_string(return_value, "http_output_conv_mimetypes", name);
3788 		}
3789 		if (lang != NULL) {
3790 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
3791 				add_assoc_string(return_value, "mail_charset", name);
3792 			}
3793 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
3794 				add_assoc_string(return_value, "mail_header_encoding", name);
3795 			}
3796 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
3797 				add_assoc_string(return_value, "mail_body_encoding", name);
3798 			}
3799 		}
3800 		add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
3801 		if (MBSTRG(encoding_translation)) {
3802 			add_assoc_string(return_value, "encoding_translation", "On");
3803 		} else {
3804 			add_assoc_string(return_value, "encoding_translation", "Off");
3805 		}
3806 		if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
3807 			add_assoc_string(return_value, "language", name);
3808 		}
3809 		n = MBSTRG(current_detect_order_list_size);
3810 		entry = MBSTRG(current_detect_order_list);
3811 		if (n > 0) {
3812 			size_t i;
3813 			array_init(&row);
3814 			for (i = 0; i < n; i++) {
3815 				add_next_index_string(&row, (*entry)->name);
3816 				entry++;
3817 			}
3818 			add_assoc_zval(return_value, "detect_order", &row);
3819 		}
3820 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
3821 			add_assoc_string(return_value, "substitute_character", "none");
3822 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
3823 			add_assoc_string(return_value, "substitute_character", "long");
3824 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
3825 			add_assoc_string(return_value, "substitute_character", "entity");
3826 		} else {
3827 			add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
3828 		}
3829 		if (MBSTRG(strict_detection)) {
3830 			add_assoc_string(return_value, "strict_detection", "On");
3831 		} else {
3832 			add_assoc_string(return_value, "strict_detection", "Off");
3833 		}
3834 	} else if (zend_string_equals_literal_ci(type, "internal_encoding")) {
3835 		if (MBSTRG(current_internal_encoding)) {
3836 			RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name);
3837 		}
3838 	} else if (zend_string_equals_literal_ci(type, "http_input")) {
3839 		if (MBSTRG(http_input_identify)) {
3840 			RETVAL_STRING((char *)MBSTRG(http_input_identify)->name);
3841 		}
3842 	} else if (zend_string_equals_literal_ci(type, "http_output")) {
3843 		if (MBSTRG(current_http_output_encoding)) {
3844 			RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name);
3845 		}
3846 	} else if (zend_string_equals_literal_ci(type, "http_output_conv_mimetypes")) {
3847 		if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
3848 			RETVAL_STRING(name);
3849 		}
3850 	} else if (zend_string_equals_literal_ci(type, "mail_charset")) {
3851 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
3852 			RETVAL_STRING(name);
3853 		}
3854 	} else if (zend_string_equals_literal_ci(type, "mail_header_encoding")) {
3855 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
3856 			RETVAL_STRING(name);
3857 		}
3858 	} else if (zend_string_equals_literal_ci(type, "mail_body_encoding")) {
3859 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
3860 			RETVAL_STRING(name);
3861 		}
3862 	} else if (zend_string_equals_literal_ci(type, "illegal_chars")) {
3863 		RETVAL_LONG(MBSTRG(illegalchars));
3864 	} else if (zend_string_equals_literal_ci(type, "encoding_translation")) {
3865 		if (MBSTRG(encoding_translation)) {
3866 			RETVAL_STRING("On");
3867 		} else {
3868 			RETVAL_STRING("Off");
3869 		}
3870 	} else if (zend_string_equals_literal_ci(type, "language")) {
3871 		if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
3872 			RETVAL_STRING(name);
3873 		}
3874 	} else if (zend_string_equals_literal_ci(type, "detect_order")) {
3875 		n = MBSTRG(current_detect_order_list_size);
3876 		entry = MBSTRG(current_detect_order_list);
3877 		if (n > 0) {
3878 			size_t i;
3879 			array_init(return_value);
3880 			for (i = 0; i < n; i++) {
3881 				add_next_index_string(return_value, (*entry)->name);
3882 				entry++;
3883 			}
3884 		}
3885 	} else if (zend_string_equals_literal_ci(type, "substitute_character")) {
3886 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
3887 			RETVAL_STRING("none");
3888 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
3889 			RETVAL_STRING("long");
3890 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
3891 			RETVAL_STRING("entity");
3892 		} else {
3893 			RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
3894 		}
3895 	} else if (zend_string_equals_literal_ci(type, "strict_detection")) {
3896 		if (MBSTRG(strict_detection)) {
3897 			RETVAL_STRING("On");
3898 		} else {
3899 			RETVAL_STRING("Off");
3900 		}
3901 	} else {
3902 		// TODO Convert to ValueError
3903 		RETURN_FALSE;
3904 	}
3905 }
3906 /* }}} */
3907 
mbfl_filt_check_errors(int c,void * data)3908 static int mbfl_filt_check_errors(int c, void* data)
3909 {
3910 	if (c == MBFL_BAD_INPUT) {
3911 		(*((mbfl_convert_filter**)data))->num_illegalchar++;
3912 	}
3913 	return 0;
3914 }
3915 
php_mb_check_encoding(const char * input,size_t length,const mbfl_encoding * encoding)3916 MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const mbfl_encoding *encoding)
3917 {
3918 	mbfl_convert_filter *filter = mbfl_convert_filter_new(encoding, &mbfl_encoding_wchar, mbfl_filt_check_errors, NULL, &filter);
3919 
3920 	if (encoding->check != NULL) {
3921 		mbfl_convert_filter_delete(filter);
3922 		return encoding->check((unsigned char*)input, length);
3923 	}
3924 
3925 	while (length--) {
3926 		unsigned char c = *input++;
3927 		(filter->filter_function)(c, filter);
3928 		if (filter->num_illegalchar) {
3929 			mbfl_convert_filter_delete(filter);
3930 			return 0;
3931 		}
3932 	}
3933 
3934 	(filter->filter_flush)(filter);
3935 	int result = !filter->num_illegalchar;
3936 	mbfl_convert_filter_delete(filter);
3937 	return result;
3938 }
3939 
php_mb_check_encoding_recursive(HashTable * vars,const mbfl_encoding * encoding)3940 static int php_mb_check_encoding_recursive(HashTable *vars, const mbfl_encoding *encoding)
3941 {
3942 	zend_long idx;
3943 	zend_string *key;
3944 	zval *entry;
3945 	int valid = 1;
3946 
3947 	(void)(idx); /* Suppress spurious compiler warning that `idx` is not used */
3948 
3949 	if (GC_IS_RECURSIVE(vars)) {
3950 		php_error_docref(NULL, E_WARNING, "Cannot not handle circular references");
3951 		return 0;
3952 	}
3953 	GC_TRY_PROTECT_RECURSION(vars);
3954 	ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) {
3955 		ZVAL_DEREF(entry);
3956 		if (key) {
3957 			if (!php_mb_check_encoding(ZSTR_VAL(key), ZSTR_LEN(key), encoding)) {
3958 				valid = 0;
3959 				break;
3960 			}
3961 		}
3962 		switch (Z_TYPE_P(entry)) {
3963 			case IS_STRING:
3964 				if (!php_mb_check_encoding(Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) {
3965 					valid = 0;
3966 					break;
3967 				}
3968 				break;
3969 			case IS_ARRAY:
3970 				if (!php_mb_check_encoding_recursive(Z_ARRVAL_P(entry), encoding)) {
3971 					valid = 0;
3972 					break;
3973 				}
3974 				break;
3975 			case IS_LONG:
3976 			case IS_DOUBLE:
3977 			case IS_NULL:
3978 			case IS_TRUE:
3979 			case IS_FALSE:
3980 				break;
3981 			default:
3982 				/* Other types are error. */
3983 				valid = 0;
3984 				break;
3985 		}
3986 	} ZEND_HASH_FOREACH_END();
3987 	GC_TRY_UNPROTECT_RECURSION(vars);
3988 	return valid;
3989 }
3990 
3991 /* {{{ Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)3992 PHP_FUNCTION(mb_check_encoding)
3993 {
3994 	zend_string *input_str = NULL, *enc = NULL;
3995 	HashTable *input_ht = NULL;
3996 	const mbfl_encoding *encoding;
3997 
3998 	ZEND_PARSE_PARAMETERS_START(0, 2)
3999 		Z_PARAM_OPTIONAL
4000 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(input_ht, input_str)
4001 		Z_PARAM_STR_OR_NULL(enc)
4002 	ZEND_PARSE_PARAMETERS_END();
4003 
4004 	encoding = php_mb_get_encoding(enc, 2);
4005 	if (!encoding) {
4006 		RETURN_THROWS();
4007 	}
4008 
4009 	if (input_ht) {
4010 		RETURN_BOOL(php_mb_check_encoding_recursive(input_ht, encoding));
4011 	} else if (input_str) {
4012 		RETURN_BOOL(php_mb_check_encoding(ZSTR_VAL(input_str), ZSTR_LEN(input_str), encoding));
4013 	} else {
4014 		php_error_docref(NULL, E_DEPRECATED,
4015 			"Calling mb_check_encoding() without argument is deprecated");
4016 
4017 		/* FIXME: Actually check all inputs, except $_FILES file content. */
4018 		RETURN_BOOL(MBSTRG(illegalchars) == 0);
4019 	}
4020 }
4021 /* }}} */
4022 
4023 
php_mb_ord(const char * str,size_t str_len,zend_string * enc_name,const uint32_t enc_name_arg_num)4024 static inline zend_long php_mb_ord(const char *str, size_t str_len, zend_string *enc_name,
4025 	const uint32_t enc_name_arg_num)
4026 {
4027 	const mbfl_encoding *enc;
4028 	enum mbfl_no_encoding no_enc;
4029 
4030 	ZEND_ASSERT(str_len > 0);
4031 
4032 	enc = php_mb_get_encoding(enc_name, enc_name_arg_num);
4033 	if (!enc) {
4034 		return -2;
4035 	}
4036 
4037 	no_enc = enc->no_encoding;
4038 	if (php_mb_is_unsupported_no_encoding(no_enc)) {
4039 		zend_value_error("mb_ord() does not support the \"%s\" encoding", enc->name);
4040 		return -2;
4041 	}
4042 
4043 	{
4044 		mbfl_wchar_device dev;
4045 		mbfl_convert_filter *filter;
4046 		zend_long cp;
4047 
4048 		mbfl_wchar_device_init(&dev);
4049 		filter = mbfl_convert_filter_new(enc, &mbfl_encoding_wchar, mbfl_wchar_device_output, 0, &dev);
4050 		/* If this assertion fails this means some memory allocation failure which is a bug */
4051 		ZEND_ASSERT(filter != NULL);
4052 
4053 		mbfl_convert_filter_feed_string(filter, (unsigned char*)str, str_len);
4054 		mbfl_convert_filter_flush(filter);
4055 
4056 		if (dev.pos < 1 || filter->num_illegalchar || dev.buffer[0] == MBFL_BAD_INPUT) {
4057 			cp = -1;
4058 		} else {
4059 			cp = dev.buffer[0];
4060 		}
4061 
4062 		mbfl_convert_filter_delete(filter);
4063 		mbfl_wchar_device_clear(&dev);
4064 		return cp;
4065 	}
4066 }
4067 
4068 
4069 /* {{{ */
PHP_FUNCTION(mb_ord)4070 PHP_FUNCTION(mb_ord)
4071 {
4072 	char *str;
4073 	size_t str_len;
4074 	zend_string *enc = NULL;
4075 	zend_long cp;
4076 
4077 	ZEND_PARSE_PARAMETERS_START(1, 2)
4078 		Z_PARAM_STRING(str, str_len)
4079 		Z_PARAM_OPTIONAL
4080 		Z_PARAM_STR_OR_NULL(enc)
4081 	ZEND_PARSE_PARAMETERS_END();
4082 
4083 	if (str_len == 0) {
4084 		zend_argument_value_error(1, "must not be empty");
4085 		RETURN_THROWS();
4086 	}
4087 
4088 	cp = php_mb_ord(str, str_len, enc, 2);
4089 
4090 	if (0 > cp) {
4091 		if (cp == -2) {
4092 			RETURN_THROWS();
4093 		}
4094 		RETURN_FALSE;
4095 	}
4096 
4097 	RETURN_LONG(cp);
4098 }
4099 /* }}} */
4100 
4101 
php_mb_chr(zend_long cp,zend_string * enc_name,uint32_t enc_name_arg_num)4102 static inline zend_string *php_mb_chr(zend_long cp, zend_string *enc_name, uint32_t enc_name_arg_num)
4103 {
4104 	const mbfl_encoding *enc;
4105 	enum mbfl_no_encoding no_enc;
4106 	zend_string *ret;
4107 	char* buf;
4108 	size_t buf_len;
4109 
4110 	enc = php_mb_get_encoding(enc_name, enc_name_arg_num);
4111 	if (!enc) {
4112 		return NULL;
4113 	}
4114 
4115 	no_enc = enc->no_encoding;
4116 	if (php_mb_is_unsupported_no_encoding(no_enc)) {
4117 		zend_value_error("mb_chr() does not support the \"%s\" encoding", enc->name);
4118 		return NULL;
4119 	}
4120 
4121 	if (cp < 0 || cp > 0x10ffff) {
4122 		return NULL;
4123 	}
4124 
4125 	if (php_mb_is_no_encoding_utf8(no_enc)) {
4126 		if (cp > 0xd7ff && 0xe000 > cp) {
4127 			return NULL;
4128 		}
4129 
4130 		if (cp < 0x80) {
4131 			ret = ZSTR_CHAR(cp);
4132 		} else if (cp < 0x800) {
4133 			ret = zend_string_alloc(2, 0);
4134 			ZSTR_VAL(ret)[0] = 0xc0 | (cp >> 6);
4135 			ZSTR_VAL(ret)[1] = 0x80 | (cp & 0x3f);
4136 			ZSTR_VAL(ret)[2] = 0;
4137 		} else if (cp < 0x10000) {
4138 			ret = zend_string_alloc(3, 0);
4139 			ZSTR_VAL(ret)[0] = 0xe0 | (cp >> 12);
4140 			ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 6) & 0x3f);
4141 			ZSTR_VAL(ret)[2] = 0x80 | (cp & 0x3f);
4142 			ZSTR_VAL(ret)[3] = 0;
4143 		} else {
4144 			ret = zend_string_alloc(4, 0);
4145 			ZSTR_VAL(ret)[0] = 0xf0 | (cp >> 18);
4146 			ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 12) & 0x3f);
4147 			ZSTR_VAL(ret)[2] = 0x80 | ((cp >> 6) & 0x3f);
4148 			ZSTR_VAL(ret)[3] = 0x80 | (cp & 0x3f);
4149 			ZSTR_VAL(ret)[4] = 0;
4150 		}
4151 
4152 		return ret;
4153 	}
4154 
4155 	buf_len = 4;
4156 	buf = (char *) emalloc(buf_len + 1);
4157 	buf[0] = (cp >> 24) & 0xff;
4158 	buf[1] = (cp >> 16) & 0xff;
4159 	buf[2] = (cp >>  8) & 0xff;
4160 	buf[3] = cp & 0xff;
4161 	buf[4] = 0;
4162 
4163 	char *ret_str;
4164 	size_t ret_len;
4165 	long orig_illegalchars = MBSTRG(illegalchars);
4166 	MBSTRG(illegalchars) = 0;
4167 	ret_str = php_mb_convert_encoding_ex(buf, buf_len, enc, &mbfl_encoding_ucs4be, &ret_len);
4168 	if (MBSTRG(illegalchars) != 0) {
4169 		efree(buf);
4170 		efree(ret_str);
4171 		MBSTRG(illegalchars) = orig_illegalchars;
4172 		return NULL;
4173 	}
4174 
4175 	ret = zend_string_init(ret_str, ret_len, 0);
4176 	efree(ret_str);
4177 	MBSTRG(illegalchars) = orig_illegalchars;
4178 
4179 	efree(buf);
4180 	return ret;
4181 }
4182 
4183 
4184 /* {{{ */
PHP_FUNCTION(mb_chr)4185 PHP_FUNCTION(mb_chr)
4186 {
4187 	zend_long cp;
4188 	zend_string *enc = NULL;
4189 
4190 	ZEND_PARSE_PARAMETERS_START(1, 2)
4191 		Z_PARAM_LONG(cp)
4192 		Z_PARAM_OPTIONAL
4193 		Z_PARAM_STR_OR_NULL(enc)
4194 	ZEND_PARSE_PARAMETERS_END();
4195 
4196 	zend_string* ret = php_mb_chr(cp, enc, 2);
4197 	if (ret == NULL) {
4198 		RETURN_FALSE;
4199 	}
4200 
4201 	RETURN_STR(ret);
4202 }
4203 /* }}} */
4204 
4205 /* {{{ */
PHP_FUNCTION(mb_scrub)4206 PHP_FUNCTION(mb_scrub)
4207 {
4208 	char* str;
4209 	size_t str_len;
4210 	zend_string *enc_name = NULL;
4211 
4212 	ZEND_PARSE_PARAMETERS_START(1, 2)
4213 		Z_PARAM_STRING(str, str_len)
4214 		Z_PARAM_OPTIONAL
4215 		Z_PARAM_STR_OR_NULL(enc_name)
4216 	ZEND_PARSE_PARAMETERS_END();
4217 
4218 	const mbfl_encoding *enc = php_mb_get_encoding(enc_name, 2);
4219 	if (!enc) {
4220 		RETURN_THROWS();
4221 	}
4222 
4223 	size_t ret_len;
4224 	char *ret = php_mb_convert_encoding_ex(str, str_len, enc, enc, &ret_len);
4225 
4226 	RETVAL_STRINGL(ret, ret_len);
4227 	efree(ret);
4228 }
4229 /* }}} */
4230 
4231 
4232 /* {{{ php_mb_populate_current_detect_order_list */
php_mb_populate_current_detect_order_list(void)4233 static void php_mb_populate_current_detect_order_list(void)
4234 {
4235 	const mbfl_encoding **entry = 0;
4236 	size_t nentries;
4237 
4238 	if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
4239 		nentries = MBSTRG(detect_order_list_size);
4240 		entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4241 		memcpy(ZEND_VOIDP(entry), MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
4242 	} else {
4243 		const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
4244 		size_t i;
4245 		nentries = MBSTRG(default_detect_order_list_size);
4246 		entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4247 		for (i = 0; i < nentries; i++) {
4248 			entry[i] = mbfl_no2encoding(src[i]);
4249 		}
4250 	}
4251 	MBSTRG(current_detect_order_list) = entry;
4252 	MBSTRG(current_detect_order_list_size) = nentries;
4253 }
4254 /* }}} */
4255 
4256 /* {{{ static int php_mb_encoding_translation() */
php_mb_encoding_translation(void)4257 static int php_mb_encoding_translation(void)
4258 {
4259 	return MBSTRG(encoding_translation);
4260 }
4261 /* }}} */
4262 
4263 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
php_mb_mbchar_bytes_ex(const char * s,const mbfl_encoding * enc)4264 MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
4265 {
4266 	if (enc) {
4267 		if (enc->mblen_table) {
4268 			if (s) {
4269 				return enc->mblen_table[*(unsigned char *)s];
4270 			}
4271 		} else if (enc->flag & MBFL_ENCTYPE_WCS2) {
4272 			return 2;
4273 		} else if (enc->flag & MBFL_ENCTYPE_WCS4) {
4274 			return 4;
4275 		}
4276 	}
4277 	return 1;
4278 }
4279 /* }}} */
4280 
4281 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
php_mb_mbchar_bytes(const char * s)4282 MBSTRING_API size_t php_mb_mbchar_bytes(const char *s)
4283 {
4284 	return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
4285 }
4286 /* }}} */
4287 
4288 /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
php_mb_safe_strrchr_ex(const char * s,unsigned int c,size_t nbytes,const mbfl_encoding * enc)4289 MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
4290 {
4291 	const char *p = s;
4292 	char *last=NULL;
4293 
4294 	if (nbytes == (size_t)-1) {
4295 		size_t nb = 0;
4296 
4297 		while (*p != '\0') {
4298 			if (nb == 0) {
4299 				if ((unsigned char)*p == (unsigned char)c) {
4300 					last = (char *)p;
4301 				}
4302 				nb = php_mb_mbchar_bytes_ex(p, enc);
4303 				if (nb == 0) {
4304 					return NULL; /* something is going wrong! */
4305 				}
4306 			}
4307 			--nb;
4308 			++p;
4309 		}
4310 	} else {
4311 		size_t bcnt = nbytes;
4312 		size_t nbytes_char;
4313 		while (bcnt > 0) {
4314 			if ((unsigned char)*p == (unsigned char)c) {
4315 				last = (char *)p;
4316 			}
4317 			nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
4318 			if (bcnt < nbytes_char) {
4319 				return NULL;
4320 			}
4321 			p += nbytes_char;
4322 			bcnt -= nbytes_char;
4323 		}
4324 	}
4325 	return last;
4326 }
4327 /* }}} */
4328 
4329 /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
php_mb_safe_strrchr(const char * s,unsigned int c,size_t nbytes)4330 MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes)
4331 {
4332 	return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
4333 }
4334 /* }}} */
4335 
4336 /* {{{ MBSTRING_API int php_mb_stripos() */
php_mb_stripos(int mode,const char * old_haystack,size_t old_haystack_len,const char * old_needle,size_t old_needle_len,zend_long offset,const mbfl_encoding * enc)4337 MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t old_haystack_len, const char *old_needle, size_t old_needle_len, zend_long offset, const mbfl_encoding *enc)
4338 {
4339 	size_t n = (size_t) -1;
4340 	mbfl_string haystack, needle;
4341 
4342 	mbfl_string_init_set(&haystack, enc);
4343 	mbfl_string_init_set(&needle, enc);
4344 
4345 	do {
4346 		/* We're using simple case-folding here, because we'd have to deal with remapping of
4347 		 * offsets otherwise. */
4348 
4349 		size_t len = 0;
4350 		haystack.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_haystack, old_haystack_len, &len, enc);
4351 		haystack.len = len;
4352 
4353 		if (!haystack.val) {
4354 			break;
4355 		}
4356 
4357 		if (haystack.len == 0) {
4358 			break;
4359 		}
4360 
4361 		needle.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_needle, old_needle_len, &len, enc);
4362 		needle.len = len;
4363 
4364 		if (!needle.val) {
4365 			break;
4366 		}
4367 
4368 		n = mbfl_strpos(&haystack, &needle, offset, mode);
4369 	} while(0);
4370 
4371 	if (haystack.val) {
4372 		efree(haystack.val);
4373 	}
4374 
4375 	if (needle.val) {
4376 		efree(needle.val);
4377 	}
4378 
4379 	return n;
4380 }
4381 /* }}} */
4382 
php_mb_gpc_get_detect_order(const zend_encoding *** list,size_t * list_size)4383 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size) /* {{{ */
4384 {
4385 	*list = (const zend_encoding **)MBSTRG(http_input_list);
4386 	*list_size = MBSTRG(http_input_list_size);
4387 }
4388 /* }}} */
4389 
php_mb_gpc_set_input_encoding(const zend_encoding * encoding)4390 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding) /* {{{ */
4391 {
4392 	MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
4393 }
4394 /* }}} */
4395