xref: /PHP-7.3/ext/mbstring/mbstring.c (revision 6031b082)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1997-2018 The PHP Group                                |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
16    |         Rui Hirokawa <hirokawa@php.net>                              |
17    |         Hironori Sato <satoh@jpnnet.com>                             |
18    |         Shigeru Kanemoto <sgk@happysize.co.jp>                       |
19    +----------------------------------------------------------------------+
20  */
21 
22 /* {{{ includes */
23 #ifdef HAVE_CONFIG_H
24 #include "config.h"
25 #endif
26 
27 #include "php.h"
28 #include "php_ini.h"
29 #include "php_variables.h"
30 #include "mbstring.h"
31 #include "ext/standard/php_string.h"
32 #include "ext/standard/php_mail.h"
33 #include "ext/standard/exec.h"
34 #include "ext/standard/url.h"
35 #include "main/php_output.h"
36 #include "ext/standard/info.h"
37 
38 #include "libmbfl/mbfl/mbfl_allocators.h"
39 #include "libmbfl/mbfl/mbfilter_8bit.h"
40 #include "libmbfl/mbfl/mbfilter_pass.h"
41 #include "libmbfl/mbfl/mbfilter_wchar.h"
42 #include "libmbfl/filters/mbfilter_ascii.h"
43 #include "libmbfl/filters/mbfilter_base64.h"
44 #include "libmbfl/filters/mbfilter_qprint.h"
45 #include "libmbfl/filters/mbfilter_ucs4.h"
46 #include "libmbfl/filters/mbfilter_utf8.h"
47 
48 #include "php_variables.h"
49 #include "php_globals.h"
50 #include "rfc1867.h"
51 #include "php_content_types.h"
52 #include "SAPI.h"
53 #include "php_unicode.h"
54 #include "TSRM.h"
55 
56 #include "mb_gpc.h"
57 
58 #if HAVE_MBREGEX
59 #include "php_mbregex.h"
60 #endif
61 
62 #include "zend_multibyte.h"
63 
64 #if HAVE_ONIG
65 #include "php_onig_compat.h"
66 #include <oniguruma.h>
67 #undef UChar
68 #if ONIGURUMA_VERSION_INT < 60800
69 typedef void OnigMatchParam;
70 #define onig_new_match_param() (NULL)
71 #define onig_initialize_match_param(x) (void)(x)
72 #define onig_set_match_stack_limit_size_of_match_param(x, y)
73 #define onig_free_match_param(x)
74 #define onig_search_with_param(reg, str, end, start, range, region, option, mp) \
75 		onig_search(reg, str, end, start, range, region, option)
76 #define onig_match_with_param(re, str, end, at, region, option, mp) \
77 		onig_match(re, str, end, at, region, option)
78 #endif
79 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
80 #include "ext/pcre/php_pcre.h"
81 #endif
82 /* }}} */
83 
84 #if HAVE_MBSTRING
85 
86 /* {{{ prototypes */
87 ZEND_DECLARE_MODULE_GLOBALS(mbstring)
88 
89 static PHP_GINIT_FUNCTION(mbstring);
90 static PHP_GSHUTDOWN_FUNCTION(mbstring);
91 
92 static void php_mb_populate_current_detect_order_list(void);
93 
94 static int php_mb_encoding_translation(void);
95 
96 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size);
97 
98 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
99 
100 static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
101 
102 static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
103 /* }}} */
104 
105 /* {{{ php_mb_default_identify_list */
106 typedef struct _php_mb_nls_ident_list {
107 	enum mbfl_no_language lang;
108 	const enum mbfl_no_encoding *list;
109 	size_t list_size;
110 } php_mb_nls_ident_list;
111 
112 static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
113 	mbfl_no_encoding_ascii,
114 	mbfl_no_encoding_jis,
115 	mbfl_no_encoding_utf8,
116 	mbfl_no_encoding_euc_jp,
117 	mbfl_no_encoding_sjis
118 };
119 
120 static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
121 	mbfl_no_encoding_ascii,
122 	mbfl_no_encoding_utf8,
123 	mbfl_no_encoding_euc_cn,
124 	mbfl_no_encoding_cp936
125 };
126 
127 static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
128 	mbfl_no_encoding_ascii,
129 	mbfl_no_encoding_utf8,
130 	mbfl_no_encoding_euc_tw,
131 	mbfl_no_encoding_big5
132 };
133 
134 static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
135 	mbfl_no_encoding_ascii,
136 	mbfl_no_encoding_utf8,
137 	mbfl_no_encoding_euc_kr,
138 	mbfl_no_encoding_uhc
139 };
140 
141 static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
142 	mbfl_no_encoding_ascii,
143 	mbfl_no_encoding_utf8,
144 	mbfl_no_encoding_koi8r,
145 	mbfl_no_encoding_cp1251,
146 	mbfl_no_encoding_cp866
147 };
148 
149 static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
150 	mbfl_no_encoding_ascii,
151 	mbfl_no_encoding_utf8,
152 	mbfl_no_encoding_armscii8
153 };
154 
155 static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
156 	mbfl_no_encoding_ascii,
157 	mbfl_no_encoding_utf8,
158 	mbfl_no_encoding_cp1254,
159 	mbfl_no_encoding_8859_9
160 };
161 
162 static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
163 	mbfl_no_encoding_ascii,
164 	mbfl_no_encoding_utf8,
165 	mbfl_no_encoding_koi8u
166 };
167 
168 static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
169 	mbfl_no_encoding_ascii,
170 	mbfl_no_encoding_utf8
171 };
172 
173 
174 static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
175 	{ mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
176 	{ mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
177 	{ mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
178 	{ mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
179 	{ mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
180 	{ mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
181 	{ mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
182 	{ mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
183 	{ mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
184 };
185 
186 /* }}} */
187 
188 /* {{{ mb_overload_def mb_ovld[] */
189 static const struct mb_overload_def mb_ovld[] = {
190 	{MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
191 	{MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
192 	{MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
193 	{MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
194 	{MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
195 	{MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
196 	{MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
197 	{MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
198 	{MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
199 	{MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
200 	{MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
201 	{MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
202 	{MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
203 	{0, NULL, NULL, NULL}
204 };
205 /* }}} */
206 
207 /* {{{ arginfo */
208 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
209 	ZEND_ARG_INFO(0, language)
210 ZEND_END_ARG_INFO()
211 
212 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
213 	ZEND_ARG_INFO(0, encoding)
214 ZEND_END_ARG_INFO()
215 
216 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
217 	ZEND_ARG_INFO(0, type)
218 ZEND_END_ARG_INFO()
219 
220 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
221 	ZEND_ARG_INFO(0, encoding)
222 ZEND_END_ARG_INFO()
223 
224 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
225 	ZEND_ARG_INFO(0, encoding)
226 ZEND_END_ARG_INFO()
227 
228 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
229 	ZEND_ARG_INFO(0, substchar)
230 ZEND_END_ARG_INFO()
231 
232 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
233 	ZEND_ARG_INFO(0, encoding)
234 ZEND_END_ARG_INFO()
235 
236 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
237 	ZEND_ARG_INFO(0, encoded_string)
238 	ZEND_ARG_INFO(1, result)
239 ZEND_END_ARG_INFO()
240 
241 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
242 	ZEND_ARG_INFO(0, contents)
243 	ZEND_ARG_INFO(0, status)
244 ZEND_END_ARG_INFO()
245 
246 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
247 	ZEND_ARG_INFO(0, str)
248 	ZEND_ARG_INFO(0, encoding)
249 ZEND_END_ARG_INFO()
250 
251 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
252 	ZEND_ARG_INFO(0, haystack)
253 	ZEND_ARG_INFO(0, needle)
254 	ZEND_ARG_INFO(0, offset)
255 	ZEND_ARG_INFO(0, encoding)
256 ZEND_END_ARG_INFO()
257 
258 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
259 	ZEND_ARG_INFO(0, haystack)
260 	ZEND_ARG_INFO(0, needle)
261 	ZEND_ARG_INFO(0, offset)
262 	ZEND_ARG_INFO(0, encoding)
263 ZEND_END_ARG_INFO()
264 
265 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
266 	ZEND_ARG_INFO(0, haystack)
267 	ZEND_ARG_INFO(0, needle)
268 	ZEND_ARG_INFO(0, offset)
269 	ZEND_ARG_INFO(0, encoding)
270 ZEND_END_ARG_INFO()
271 
272 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
273 	ZEND_ARG_INFO(0, haystack)
274 	ZEND_ARG_INFO(0, needle)
275 	ZEND_ARG_INFO(0, offset)
276 	ZEND_ARG_INFO(0, encoding)
277 ZEND_END_ARG_INFO()
278 
279 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
280 	ZEND_ARG_INFO(0, haystack)
281 	ZEND_ARG_INFO(0, needle)
282 	ZEND_ARG_INFO(0, part)
283 	ZEND_ARG_INFO(0, encoding)
284 ZEND_END_ARG_INFO()
285 
286 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
287 	ZEND_ARG_INFO(0, haystack)
288 	ZEND_ARG_INFO(0, needle)
289 	ZEND_ARG_INFO(0, part)
290 	ZEND_ARG_INFO(0, encoding)
291 ZEND_END_ARG_INFO()
292 
293 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
294 	ZEND_ARG_INFO(0, haystack)
295 	ZEND_ARG_INFO(0, needle)
296 	ZEND_ARG_INFO(0, part)
297 	ZEND_ARG_INFO(0, encoding)
298 ZEND_END_ARG_INFO()
299 
300 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
301 	ZEND_ARG_INFO(0, haystack)
302 	ZEND_ARG_INFO(0, needle)
303 	ZEND_ARG_INFO(0, part)
304 	ZEND_ARG_INFO(0, encoding)
305 ZEND_END_ARG_INFO()
306 
307 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
308 	ZEND_ARG_INFO(0, haystack)
309 	ZEND_ARG_INFO(0, needle)
310 	ZEND_ARG_INFO(0, encoding)
311 ZEND_END_ARG_INFO()
312 
313 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
314 	ZEND_ARG_INFO(0, str)
315 	ZEND_ARG_INFO(0, start)
316 	ZEND_ARG_INFO(0, length)
317 	ZEND_ARG_INFO(0, encoding)
318 ZEND_END_ARG_INFO()
319 
320 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
321 	ZEND_ARG_INFO(0, str)
322 	ZEND_ARG_INFO(0, start)
323 	ZEND_ARG_INFO(0, length)
324 	ZEND_ARG_INFO(0, encoding)
325 ZEND_END_ARG_INFO()
326 
327 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
328 	ZEND_ARG_INFO(0, str)
329 	ZEND_ARG_INFO(0, encoding)
330 ZEND_END_ARG_INFO()
331 
332 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
333 	ZEND_ARG_INFO(0, str)
334 	ZEND_ARG_INFO(0, start)
335 	ZEND_ARG_INFO(0, width)
336 	ZEND_ARG_INFO(0, trimmarker)
337 	ZEND_ARG_INFO(0, encoding)
338 ZEND_END_ARG_INFO()
339 
340 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
341 	ZEND_ARG_INFO(0, str)
342 	ZEND_ARG_INFO(0, to)
343 	ZEND_ARG_INFO(0, from)
344 ZEND_END_ARG_INFO()
345 
346 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
347 	ZEND_ARG_INFO(0, sourcestring)
348 	ZEND_ARG_INFO(0, mode)
349 	ZEND_ARG_INFO(0, encoding)
350 ZEND_END_ARG_INFO()
351 
352 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
353 	ZEND_ARG_INFO(0, sourcestring)
354 	ZEND_ARG_INFO(0, encoding)
355 ZEND_END_ARG_INFO()
356 
357 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
358 	ZEND_ARG_INFO(0, sourcestring)
359 	ZEND_ARG_INFO(0, encoding)
360 ZEND_END_ARG_INFO()
361 
362 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
363 	ZEND_ARG_INFO(0, str)
364 	ZEND_ARG_INFO(0, encoding_list)
365 	ZEND_ARG_INFO(0, strict)
366 ZEND_END_ARG_INFO()
367 
368 ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
369 ZEND_END_ARG_INFO()
370 
371 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
372 	ZEND_ARG_INFO(0, encoding)
373 ZEND_END_ARG_INFO()
374 
375 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
376 	ZEND_ARG_INFO(0, str)
377 	ZEND_ARG_INFO(0, charset)
378 	ZEND_ARG_INFO(0, transfer)
379 	ZEND_ARG_INFO(0, linefeed)
380 	ZEND_ARG_INFO(0, indent)
381 ZEND_END_ARG_INFO()
382 
383 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
384 	ZEND_ARG_INFO(0, string)
385 ZEND_END_ARG_INFO()
386 
387 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
388 	ZEND_ARG_INFO(0, str)
389 	ZEND_ARG_INFO(0, option)
390 	ZEND_ARG_INFO(0, encoding)
391 ZEND_END_ARG_INFO()
392 
393 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 0, 0, 3)
394 	ZEND_ARG_INFO(0, to)
395 	ZEND_ARG_INFO(0, from)
396 	ZEND_ARG_VARIADIC_INFO(1, vars)
397 ZEND_END_ARG_INFO()
398 
399 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
400 	ZEND_ARG_INFO(0, string)
401 	ZEND_ARG_INFO(0, convmap)
402 	ZEND_ARG_INFO(0, encoding)
403 	ZEND_ARG_INFO(0, is_hex)
404 ZEND_END_ARG_INFO()
405 
406 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
407 	ZEND_ARG_INFO(0, string)
408 	ZEND_ARG_INFO(0, convmap)
409 	ZEND_ARG_INFO(0, encoding)
410 	ZEND_ARG_INFO(0, is_hex)
411 ZEND_END_ARG_INFO()
412 
413 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
414 	ZEND_ARG_INFO(0, to)
415 	ZEND_ARG_INFO(0, subject)
416 	ZEND_ARG_INFO(0, message)
417 	ZEND_ARG_INFO(0, additional_headers)
418 	ZEND_ARG_INFO(0, additional_parameters)
419 ZEND_END_ARG_INFO()
420 
421 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
422 	ZEND_ARG_INFO(0, type)
423 ZEND_END_ARG_INFO()
424 
425 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
426 	ZEND_ARG_INFO(0, var)
427 	ZEND_ARG_INFO(0, encoding)
428 ZEND_END_ARG_INFO()
429 
430 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_scrub, 0, 0, 1)
431 	ZEND_ARG_INFO(0, str)
432 	ZEND_ARG_INFO(0, encoding)
433 ZEND_END_ARG_INFO()
434 
435 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ord, 0, 0, 1)
436 	ZEND_ARG_INFO(0, str)
437 	ZEND_ARG_INFO(0, encoding)
438 ZEND_END_ARG_INFO()
439 
440 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_chr, 0, 0, 1)
441 	ZEND_ARG_INFO(0, cp)
442 	ZEND_ARG_INFO(0, encoding)
443 ZEND_END_ARG_INFO()
444 
445 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
446 	ZEND_ARG_INFO(0, encoding)
447 ZEND_END_ARG_INFO()
448 
449 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
450 	ZEND_ARG_INFO(0, pattern)
451 	ZEND_ARG_INFO(0, string)
452 	ZEND_ARG_INFO(1, registers)
453 ZEND_END_ARG_INFO()
454 
455 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
456 	ZEND_ARG_INFO(0, pattern)
457 	ZEND_ARG_INFO(0, string)
458 	ZEND_ARG_INFO(1, registers)
459 ZEND_END_ARG_INFO()
460 
461 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
462 	ZEND_ARG_INFO(0, pattern)
463 	ZEND_ARG_INFO(0, replacement)
464 	ZEND_ARG_INFO(0, string)
465 	ZEND_ARG_INFO(0, option)
466 ZEND_END_ARG_INFO()
467 
468 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
469 	ZEND_ARG_INFO(0, pattern)
470 	ZEND_ARG_INFO(0, replacement)
471 	ZEND_ARG_INFO(0, string)
472 	ZEND_ARG_INFO(0, option)
473 ZEND_END_ARG_INFO()
474 
475 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3)
476 	ZEND_ARG_INFO(0, pattern)
477 	ZEND_ARG_INFO(0, callback)
478 	ZEND_ARG_INFO(0, string)
479 	ZEND_ARG_INFO(0, option)
480 ZEND_END_ARG_INFO()
481 
482 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
483 	ZEND_ARG_INFO(0, pattern)
484 	ZEND_ARG_INFO(0, string)
485 	ZEND_ARG_INFO(0, limit)
486 ZEND_END_ARG_INFO()
487 
488 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
489 	ZEND_ARG_INFO(0, pattern)
490 	ZEND_ARG_INFO(0, string)
491 	ZEND_ARG_INFO(0, option)
492 ZEND_END_ARG_INFO()
493 
494 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
495 	ZEND_ARG_INFO(0, pattern)
496 	ZEND_ARG_INFO(0, option)
497 ZEND_END_ARG_INFO()
498 
499 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
500 	ZEND_ARG_INFO(0, pattern)
501 	ZEND_ARG_INFO(0, option)
502 ZEND_END_ARG_INFO()
503 
504 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
505 	ZEND_ARG_INFO(0, pattern)
506 	ZEND_ARG_INFO(0, option)
507 ZEND_END_ARG_INFO()
508 
509 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
510 	ZEND_ARG_INFO(0, string)
511 	ZEND_ARG_INFO(0, pattern)
512 	ZEND_ARG_INFO(0, option)
513 ZEND_END_ARG_INFO()
514 
515 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
516 ZEND_END_ARG_INFO()
517 
518 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
519 ZEND_END_ARG_INFO()
520 
521 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
522 	ZEND_ARG_INFO(0, position)
523 ZEND_END_ARG_INFO()
524 
525 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
526 	ZEND_ARG_INFO(0, options)
527 ZEND_END_ARG_INFO()
528 /* }}} */
529 
530 /* {{{ zend_function_entry mbstring_functions[] */
531 static const zend_function_entry mbstring_functions[] = {
532 	PHP_FE(mb_convert_case,			arginfo_mb_convert_case)
533 	PHP_FE(mb_strtoupper,			arginfo_mb_strtoupper)
534 	PHP_FE(mb_strtolower,			arginfo_mb_strtolower)
535 	PHP_FE(mb_language,				arginfo_mb_language)
536 	PHP_FE(mb_internal_encoding,	arginfo_mb_internal_encoding)
537 	PHP_FE(mb_http_input,			arginfo_mb_http_input)
538 	PHP_FE(mb_http_output,			arginfo_mb_http_output)
539 	PHP_FE(mb_detect_order,			arginfo_mb_detect_order)
540 	PHP_FE(mb_substitute_character,	arginfo_mb_substitute_character)
541 	PHP_FE(mb_parse_str,			arginfo_mb_parse_str)
542 	PHP_FE(mb_output_handler,		arginfo_mb_output_handler)
543 	PHP_FE(mb_preferred_mime_name,	arginfo_mb_preferred_mime_name)
544 	PHP_FE(mb_strlen,				arginfo_mb_strlen)
545 	PHP_FE(mb_strpos,				arginfo_mb_strpos)
546 	PHP_FE(mb_strrpos,				arginfo_mb_strrpos)
547 	PHP_FE(mb_stripos,				arginfo_mb_stripos)
548 	PHP_FE(mb_strripos,				arginfo_mb_strripos)
549 	PHP_FE(mb_strstr,				arginfo_mb_strstr)
550 	PHP_FE(mb_strrchr,				arginfo_mb_strrchr)
551 	PHP_FE(mb_stristr,				arginfo_mb_stristr)
552 	PHP_FE(mb_strrichr,				arginfo_mb_strrichr)
553 	PHP_FE(mb_substr_count,			arginfo_mb_substr_count)
554 	PHP_FE(mb_substr,				arginfo_mb_substr)
555 	PHP_FE(mb_strcut,				arginfo_mb_strcut)
556 	PHP_FE(mb_strwidth,				arginfo_mb_strwidth)
557 	PHP_FE(mb_strimwidth,			arginfo_mb_strimwidth)
558 	PHP_FE(mb_convert_encoding,		arginfo_mb_convert_encoding)
559 	PHP_FE(mb_detect_encoding,		arginfo_mb_detect_encoding)
560 	PHP_FE(mb_list_encodings,		arginfo_mb_list_encodings)
561 	PHP_FE(mb_encoding_aliases,		arginfo_mb_encoding_aliases)
562 	PHP_FE(mb_convert_kana,			arginfo_mb_convert_kana)
563 	PHP_FE(mb_encode_mimeheader,	arginfo_mb_encode_mimeheader)
564 	PHP_FE(mb_decode_mimeheader,	arginfo_mb_decode_mimeheader)
565 	PHP_FE(mb_convert_variables,	arginfo_mb_convert_variables)
566 	PHP_FE(mb_encode_numericentity,	arginfo_mb_encode_numericentity)
567 	PHP_FE(mb_decode_numericentity,	arginfo_mb_decode_numericentity)
568 	PHP_FE(mb_send_mail,			arginfo_mb_send_mail)
569 	PHP_FE(mb_get_info,				arginfo_mb_get_info)
570 	PHP_FE(mb_check_encoding,		arginfo_mb_check_encoding)
571 	PHP_FE(mb_ord,					arginfo_mb_ord)
572 	PHP_FE(mb_chr,					arginfo_mb_chr)
573 	PHP_FE(mb_scrub,				arginfo_mb_scrub)
574 #if HAVE_MBREGEX
575 	PHP_MBREGEX_FUNCTION_ENTRIES
576 #endif
577 	PHP_FE_END
578 };
579 /* }}} */
580 
581 /* {{{ zend_module_entry mbstring_module_entry */
582 zend_module_entry mbstring_module_entry = {
583 	STANDARD_MODULE_HEADER,
584 	"mbstring",
585 	mbstring_functions,
586 	PHP_MINIT(mbstring),
587 	PHP_MSHUTDOWN(mbstring),
588 	PHP_RINIT(mbstring),
589 	PHP_RSHUTDOWN(mbstring),
590 	PHP_MINFO(mbstring),
591 	PHP_MBSTRING_VERSION,
592 	PHP_MODULE_GLOBALS(mbstring),
593 	PHP_GINIT(mbstring),
594 	PHP_GSHUTDOWN(mbstring),
595 	NULL,
596 	STANDARD_MODULE_PROPERTIES_EX
597 };
598 /* }}} */
599 
600 /* {{{ static sapi_post_entry php_post_entries[] */
601 static const sapi_post_entry php_post_entries[] = {
602 	{ DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data,	php_std_post_handler },
603 	{ MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
604 	{ NULL, 0, NULL, NULL }
605 };
606 /* }}} */
607 
608 #ifdef COMPILE_DL_MBSTRING
609 #ifdef ZTS
610 ZEND_TSRMLS_CACHE_DEFINE()
611 #endif
ZEND_GET_MODULE(mbstring)612 ZEND_GET_MODULE(mbstring)
613 #endif
614 
615 static char *get_internal_encoding(void) {
616 	if (PG(internal_encoding) && PG(internal_encoding)[0]) {
617 		return PG(internal_encoding);
618 	} else if (SG(default_charset)) {
619 		return SG(default_charset);
620 	}
621 	return "";
622 }
623 
get_input_encoding(void)624 static char *get_input_encoding(void) {
625 	if (PG(input_encoding) && PG(input_encoding)[0]) {
626 		return PG(input_encoding);
627 	} else if (SG(default_charset)) {
628 		return SG(default_charset);
629 	}
630 	return "";
631 }
632 
get_output_encoding(void)633 static char *get_output_encoding(void) {
634 	if (PG(output_encoding) && PG(output_encoding)[0]) {
635 		return PG(output_encoding);
636 	} else if (SG(default_charset)) {
637 		return SG(default_charset);
638 	}
639 	return "";
640 }
641 
642 
643 /* {{{ allocators */
_php_mb_allocators_malloc(size_t sz)644 static void *_php_mb_allocators_malloc(size_t sz)
645 {
646 	return emalloc(sz);
647 }
648 
_php_mb_allocators_realloc(void * ptr,size_t sz)649 static void *_php_mb_allocators_realloc(void *ptr, size_t sz)
650 {
651 	return erealloc(ptr, sz);
652 }
653 
_php_mb_allocators_calloc(size_t nelems,size_t szelem)654 static void *_php_mb_allocators_calloc(size_t nelems, size_t szelem)
655 {
656 	return ecalloc(nelems, szelem);
657 }
658 
_php_mb_allocators_free(void * ptr)659 static void _php_mb_allocators_free(void *ptr)
660 {
661 	efree(ptr);
662 }
663 
_php_mb_allocators_pmalloc(size_t sz)664 static void *_php_mb_allocators_pmalloc(size_t sz)
665 {
666 	return pemalloc(sz, 1);
667 }
668 
_php_mb_allocators_prealloc(void * ptr,size_t sz)669 static void *_php_mb_allocators_prealloc(void *ptr, size_t sz)
670 {
671 	return perealloc(ptr, sz, 1);
672 }
673 
_php_mb_allocators_pfree(void * ptr)674 static void _php_mb_allocators_pfree(void *ptr)
675 {
676 	pefree(ptr, 1);
677 }
678 
679 static const mbfl_allocators _php_mb_allocators = {
680 	_php_mb_allocators_malloc,
681 	_php_mb_allocators_realloc,
682 	_php_mb_allocators_calloc,
683 	_php_mb_allocators_free,
684 	_php_mb_allocators_pmalloc,
685 	_php_mb_allocators_prealloc,
686 	_php_mb_allocators_pfree
687 };
688 /* }}} */
689 
690 /* {{{ static sapi_post_entry mbstr_post_entries[] */
691 static const sapi_post_entry mbstr_post_entries[] = {
692 	{ DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
693 	{ MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
694 	{ NULL, 0, NULL, NULL }
695 };
696 /* }}} */
697 
php_mb_get_encoding(const char * encoding_name)698 static const mbfl_encoding *php_mb_get_encoding(const char *encoding_name) {
699 	if (encoding_name) {
700 		const mbfl_encoding *encoding;
701 		if (MBSTRG(last_used_encoding_name)
702 				&& !strcasecmp(encoding_name, MBSTRG(last_used_encoding_name))) {
703 			return MBSTRG(last_used_encoding);
704 		}
705 
706 		encoding = mbfl_name2encoding(encoding_name);
707 		if (!encoding) {
708 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding_name);
709 			return NULL;
710 		}
711 
712 		if (MBSTRG(last_used_encoding_name)) {
713 			efree(MBSTRG(last_used_encoding_name));
714 		}
715 		MBSTRG(last_used_encoding_name) = estrdup(encoding_name);
716 		MBSTRG(last_used_encoding) = encoding;
717 		return encoding;
718 	} else {
719 		return MBSTRG(current_internal_encoding);
720 	}
721 }
722 
723 /* {{{ static int php_mb_parse_encoding_list()
724  *  Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
725  *  Even if any illegal encoding is detected the result may contain a list
726  *  of parsed encodings.
727  */
728 static int
php_mb_parse_encoding_list(const char * value,size_t value_length,const mbfl_encoding *** return_list,size_t * return_size,int persistent)729 php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
730 {
731 	int bauto, ret = SUCCESS;
732 	size_t n, size;
733 	char *p, *p1, *p2, *endp, *tmpstr;
734 	const mbfl_encoding **entry, **list;
735 
736 	list = NULL;
737 	if (value == NULL || value_length == 0) {
738 		if (return_list) {
739 			*return_list = NULL;
740 		}
741 		if (return_size) {
742 			*return_size = 0;
743 		}
744 		return FAILURE;
745 	} else {
746 		/* copy the value string for work */
747 		if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
748 			tmpstr = (char *)estrndup(value+1, value_length-2);
749 			value_length -= 2;
750 		}
751 		else
752 			tmpstr = (char *)estrndup(value, value_length);
753 		/* count the number of listed encoding names */
754 		endp = tmpstr + value_length;
755 		n = 1;
756 		p1 = tmpstr;
757 		while ((p2 = (char*)php_memnstr(p1, ",", 1, endp)) != NULL) {
758 			p1 = p2 + 1;
759 			n++;
760 		}
761 		size = n + MBSTRG(default_detect_order_list_size);
762 		/* make list */
763 		list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
764 		entry = list;
765 		n = 0;
766 		bauto = 0;
767 		p1 = tmpstr;
768 		do {
769 			p2 = p = (char*)php_memnstr(p1, ",", 1, endp);
770 			if (p == NULL) {
771 				p = endp;
772 			}
773 			*p = '\0';
774 			/* trim spaces */
775 			while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
776 				p1++;
777 			}
778 			p--;
779 			while (p > p1 && (*p == ' ' || *p == '\t')) {
780 				*p = '\0';
781 				p--;
782 			}
783 			/* convert to the encoding number and check encoding */
784 			if (strcasecmp(p1, "auto") == 0) {
785 				if (!bauto) {
786 					const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
787 					const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
788 					size_t i;
789 					bauto = 1;
790 					for (i = 0; i < identify_list_size; i++) {
791 						*entry++ = mbfl_no2encoding(*src++);
792 						n++;
793 					}
794 				}
795 			} else {
796 				const mbfl_encoding *encoding = mbfl_name2encoding(p1);
797 				if (encoding) {
798 					*entry++ = encoding;
799 					n++;
800 				} else {
801 					ret = FAILURE;
802 				}
803 			}
804 			p1 = p2 + 1;
805 		} while (n < size && p2 != NULL);
806 		if (n > 0) {
807 			if (return_list) {
808 				*return_list = list;
809 			} else {
810 				pefree(list, persistent);
811 			}
812 		} else {
813 			pefree(list, persistent);
814 			if (return_list) {
815 				*return_list = NULL;
816 			}
817 			ret = FAILURE;
818 		}
819 		if (return_size) {
820 			*return_size = n;
821 		}
822 		efree(tmpstr);
823 	}
824 
825 	return ret;
826 }
827 /* }}} */
828 
829 /* {{{ static int php_mb_parse_encoding_array()
830  *  Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
831  *  Even if any illegal encoding is detected the result may contain a list
832  *  of parsed encodings.
833  */
834 static int
php_mb_parse_encoding_array(zval * array,const mbfl_encoding *** return_list,size_t * return_size,int persistent)835 php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
836 {
837 	zval *hash_entry;
838 	HashTable *target_hash;
839 	int i, n, bauto, ret = SUCCESS;
840 	const mbfl_encoding **list, **entry;
841 	size_t size;
842 
843 	list = NULL;
844 	if (Z_TYPE_P(array) == IS_ARRAY) {
845 		target_hash = Z_ARRVAL_P(array);
846 		i = zend_hash_num_elements(target_hash);
847 		size = i + MBSTRG(default_detect_order_list_size);
848 		list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
849 		entry = list;
850 		bauto = 0;
851 		n = 0;
852 		ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
853 			convert_to_string_ex(hash_entry);
854 			if (strcasecmp(Z_STRVAL_P(hash_entry), "auto") == 0) {
855 				if (!bauto) {
856 					const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
857 					const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
858 					size_t j;
859 
860 					bauto = 1;
861 					for (j = 0; j < identify_list_size; j++) {
862 						*entry++ = mbfl_no2encoding(*src++);
863 						n++;
864 					}
865 				}
866 			} else {
867 				const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_P(hash_entry));
868 				if (encoding) {
869 					*entry++ = encoding;
870 					n++;
871 				} else {
872 					ret = FAILURE;
873 				}
874 			}
875 			i--;
876 		} ZEND_HASH_FOREACH_END();
877 		if (n > 0) {
878 			if (return_list) {
879 				*return_list = list;
880 			} else {
881 				pefree(list, persistent);
882 			}
883 		} else {
884 			pefree(list, persistent);
885 			if (return_list) {
886 				*return_list = NULL;
887 			}
888 			ret = FAILURE;
889 		}
890 		if (return_size) {
891 			*return_size = n;
892 		}
893 	}
894 
895 	return ret;
896 }
897 /* }}} */
898 
899 /* {{{ zend_multibyte interface */
php_mb_zend_encoding_fetcher(const char * encoding_name)900 static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name)
901 {
902 	return (const zend_encoding*)mbfl_name2encoding(encoding_name);
903 }
904 
php_mb_zend_encoding_name_getter(const zend_encoding * encoding)905 static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
906 {
907 	return ((const mbfl_encoding *)encoding)->name;
908 }
909 
php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding * _encoding)910 static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
911 {
912 	const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
913 	if (encoding->flag & MBFL_ENCTYPE_SBCS) {
914 		return 1;
915 	}
916 	if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
917 		return 1;
918 	}
919 	return 0;
920 }
921 
php_mb_zend_encoding_detector(const unsigned char * arg_string,size_t arg_length,const zend_encoding ** list,size_t list_size)922 static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size)
923 {
924 	mbfl_string string;
925 
926 	if (!list) {
927 		list = (const zend_encoding **)MBSTRG(current_detect_order_list);
928 		list_size = MBSTRG(current_detect_order_list_size);
929 	}
930 
931 	mbfl_string_init(&string);
932 	string.no_language = MBSTRG(language);
933 	string.val = (unsigned char *)arg_string;
934 	string.len = arg_length;
935 	return (const zend_encoding *) mbfl_identify_encoding(&string, (const mbfl_encoding **)list, list_size, 0);
936 }
937 
php_mb_zend_encoding_converter(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length,const zend_encoding * encoding_to,const zend_encoding * encoding_from)938 static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
939 {
940 	mbfl_string string, result;
941 	mbfl_buffer_converter *convd;
942 	int status;
943 	size_t loc;
944 
945 	/* new encoding */
946 	/* initialize string */
947 	mbfl_string_init(&string);
948 	mbfl_string_init(&result);
949 	string.encoding = (const mbfl_encoding*)encoding_from;
950 	string.no_language = MBSTRG(language);
951 	string.val = (unsigned char*)from;
952 	string.len = from_length;
953 
954 	/* initialize converter */
955 	convd = mbfl_buffer_converter_new((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
956 	if (convd == NULL) {
957 		return (size_t) -1;
958 	}
959 
960 	mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
961 	mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
962 
963 	/* do it */
964 	status = mbfl_buffer_converter_feed2(convd, &string, &loc);
965 	if (status) {
966 		mbfl_buffer_converter_delete(convd);
967 		return (size_t)-1;
968 	}
969 
970 	mbfl_buffer_converter_flush(convd);
971 	if (!mbfl_buffer_converter_result(convd, &result)) {
972 		mbfl_buffer_converter_delete(convd);
973 		return (size_t)-1;
974 	}
975 
976 	*to = result.val;
977 	*to_length = result.len;
978 
979 	mbfl_buffer_converter_delete(convd);
980 
981 	return loc;
982 }
983 
php_mb_zend_encoding_list_parser(const char * encoding_list,size_t encoding_list_len,const zend_encoding *** return_list,size_t * return_size,int persistent)984 static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent)
985 {
986 	return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent);
987 }
988 
php_mb_zend_internal_encoding_getter(void)989 static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
990 {
991 	return (const zend_encoding *)MBSTRG(internal_encoding);
992 }
993 
php_mb_zend_internal_encoding_setter(const zend_encoding * encoding)994 static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding)
995 {
996 	MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
997 	return SUCCESS;
998 }
999 
1000 static zend_multibyte_functions php_mb_zend_multibyte_functions = {
1001 	"mbstring",
1002 	php_mb_zend_encoding_fetcher,
1003 	php_mb_zend_encoding_name_getter,
1004 	php_mb_zend_encoding_lexer_compatibility_checker,
1005 	php_mb_zend_encoding_detector,
1006 	php_mb_zend_encoding_converter,
1007 	php_mb_zend_encoding_list_parser,
1008 	php_mb_zend_internal_encoding_getter,
1009 	php_mb_zend_internal_encoding_setter
1010 };
1011 /* }}} */
1012 
1013 static void *_php_mb_compile_regex(const char *pattern);
1014 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
1015 static void _php_mb_free_regex(void *opaque);
1016 
1017 #if HAVE_ONIG
1018 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern)1019 static void *_php_mb_compile_regex(const char *pattern)
1020 {
1021 	php_mb_regex_t *retval;
1022 	OnigErrorInfo err_info;
1023 	int err_code;
1024 
1025 	if ((err_code = onig_new(&retval,
1026 			(const OnigUChar *)pattern,
1027 			(const OnigUChar *)pattern + strlen(pattern),
1028 			ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
1029 			ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
1030 		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1031 		onig_error_code_to_str(err_str, err_code, err_info);
1032 		php_error_docref(NULL, E_WARNING, "%s: %s", pattern, err_str);
1033 		retval = NULL;
1034 	}
1035 	return retval;
1036 }
1037 /* }}} */
1038 
1039 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1040 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1041 {
1042 	OnigMatchParam *mp = onig_new_match_param();
1043 	int err;
1044 	onig_initialize_match_param(mp);
1045 	if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_stack_limit))) {
1046 		onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int)MBSTRG(regex_stack_limit));
1047 	}
1048 	/* search */
1049 	err = onig_search_with_param((php_mb_regex_t *)opaque, (const OnigUChar *)str,
1050 		(const OnigUChar*)str + str_len, (const OnigUChar *)str,
1051 		(const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE, mp);
1052 	onig_free_match_param(mp);
1053 	return err >= 0;
1054 }
1055 /* }}} */
1056 
1057 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1058 static void _php_mb_free_regex(void *opaque)
1059 {
1060 	onig_free((php_mb_regex_t *)opaque);
1061 }
1062 /* }}} */
1063 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
1064 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern)1065 static void *_php_mb_compile_regex(const char *pattern)
1066 {
1067 	pcre2_code *retval;
1068 	PCRE2_SIZE err_offset;
1069 	int errnum;
1070 
1071 	if (!(retval = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED,
1072 			PCRE2_CASELESS, &errnum, &err_offset, php_pcre_cctx()))) {
1073 		PCRE2_UCHAR err_str[128];
1074 		pcre2_get_error_message(errnum, err_str, sizeof(err_str));
1075 		php_error_docref(NULL, E_WARNING, "%s (offset=%zu): %s", pattern, err_offset, err_str);
1076 	}
1077 	return retval;
1078 }
1079 /* }}} */
1080 
1081 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1082 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1083 {
1084 	int res;
1085 
1086 	pcre2_match_data *match_data = php_pcre_create_match_data(0, opaque);
1087 	if (NULL == match_data) {
1088 		pcre2_code_free(opaque);
1089 		php_error_docref(NULL, E_WARNING, "Cannot allocate match data");
1090 		return FAILURE;
1091 	}
1092 	res = pcre2_match(opaque, (PCRE2_SPTR)str, str_len, 0, 0, match_data, php_pcre_mctx()) >= 0;
1093 	php_pcre_free_match_data(match_data);
1094 
1095 	return res;
1096 }
1097 /* }}} */
1098 
1099 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1100 static void _php_mb_free_regex(void *opaque)
1101 {
1102 	pcre2_code_free(opaque);
1103 }
1104 /* }}} */
1105 #endif
1106 
1107 /* {{{ php_mb_nls_get_default_detect_order_list */
php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,enum mbfl_no_encoding ** plist,size_t * plist_size)1108 static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
1109 {
1110 	size_t i;
1111 
1112 	*plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1113 	*plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1114 
1115 	for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
1116 		if (php_mb_default_identify_list[i].lang == lang) {
1117 			*plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
1118 			*plist_size = php_mb_default_identify_list[i].list_size;
1119 			return 1;
1120 		}
1121 	}
1122 	return 0;
1123 }
1124 /* }}} */
1125 
php_mb_rfc1867_substring_conf(const zend_encoding * encoding,char * start,size_t len,char quote)1126 static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, size_t len, char quote)
1127 {
1128 	char *result = emalloc(len + 2);
1129 	char *resp = result;
1130 	size_t i;
1131 
1132 	for (i = 0; i < len && start[i] != quote; ++i) {
1133 		if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
1134 			*resp++ = start[++i];
1135 		} else {
1136 			size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
1137 
1138 			while (j-- > 0 && i < len) {
1139 				*resp++ = start[i++];
1140 			}
1141 			--i;
1142 		}
1143 	}
1144 
1145 	*resp = '\0';
1146 	return result;
1147 }
1148 
php_mb_rfc1867_getword(const zend_encoding * encoding,char ** line,char stop)1149 static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop) /* {{{ */
1150 {
1151 	char *pos = *line, quote;
1152 	char *res;
1153 
1154 	while (*pos && *pos != stop) {
1155 		if ((quote = *pos) == '"' || quote == '\'') {
1156 			++pos;
1157 			while (*pos && *pos != quote) {
1158 				if (*pos == '\\' && pos[1] && pos[1] == quote) {
1159 					pos += 2;
1160 				} else {
1161 					++pos;
1162 				}
1163 			}
1164 			if (*pos) {
1165 				++pos;
1166 			}
1167 		} else {
1168 			pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1169 
1170 		}
1171 	}
1172 	if (*pos == '\0') {
1173 		res = estrdup(*line);
1174 		*line += strlen(*line);
1175 		return res;
1176 	}
1177 
1178 	res = estrndup(*line, pos - *line);
1179 
1180 	while (*pos == stop) {
1181 		pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1182 	}
1183 
1184 	*line = pos;
1185 	return res;
1186 }
1187 /* }}} */
1188 
php_mb_rfc1867_getword_conf(const zend_encoding * encoding,char * str)1189 static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str) /* {{{ */
1190 {
1191 	while (*str && isspace(*(unsigned char *)str)) {
1192 		++str;
1193 	}
1194 
1195 	if (!*str) {
1196 		return estrdup("");
1197 	}
1198 
1199 	if (*str == '"' || *str == '\'') {
1200 		char quote = *str;
1201 
1202 		str++;
1203 		return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote);
1204 	} else {
1205 		char *strend = str;
1206 
1207 		while (*strend && !isspace(*(unsigned char *)strend)) {
1208 			++strend;
1209 		}
1210 		return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0);
1211 	}
1212 }
1213 /* }}} */
1214 
php_mb_rfc1867_basename(const zend_encoding * encoding,char * filename)1215 static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename) /* {{{ */
1216 {
1217 	char *s, *s2;
1218 	const size_t filename_len = strlen(filename);
1219 
1220 	/* The \ check should technically be needed for win32 systems only where
1221 	 * it is a valid path separator. However, IE in all it's wisdom always sends
1222 	 * the full path of the file on the user's filesystem, which means that unless
1223 	 * the user does basename() they get a bogus file name. Until IE's user base drops
1224 	 * to nill or problem is fixed this code must remain enabled for all systems. */
1225 	s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
1226 	s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
1227 
1228 	if (s && s2) {
1229 		if (s > s2) {
1230 			return ++s;
1231 		} else {
1232 			return ++s2;
1233 		}
1234 	} else if (s) {
1235 		return ++s;
1236 	} else if (s2) {
1237 		return ++s2;
1238 	} else {
1239 		return filename;
1240 	}
1241 }
1242 /* }}} */
1243 
1244 /* {{{ php.ini directive handler */
1245 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
PHP_INI_MH(OnUpdate_mbstring_language)1246 static PHP_INI_MH(OnUpdate_mbstring_language)
1247 {
1248 	enum mbfl_no_language no_language;
1249 
1250 	no_language = mbfl_name2no_language(ZSTR_VAL(new_value));
1251 	if (no_language == mbfl_no_language_invalid) {
1252 		MBSTRG(language) = mbfl_no_language_neutral;
1253 		return FAILURE;
1254 	}
1255 	MBSTRG(language) = no_language;
1256 	php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
1257 	return SUCCESS;
1258 }
1259 /* }}} */
1260 
1261 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
PHP_INI_MH(OnUpdate_mbstring_detect_order)1262 static PHP_INI_MH(OnUpdate_mbstring_detect_order)
1263 {
1264 	const mbfl_encoding **list;
1265 	size_t size;
1266 
1267 	if (!new_value) {
1268 		if (MBSTRG(detect_order_list)) {
1269 			pefree(MBSTRG(detect_order_list), 1);
1270 		}
1271 		MBSTRG(detect_order_list) = NULL;
1272 		MBSTRG(detect_order_list_size) = 0;
1273 		return SUCCESS;
1274 	}
1275 
1276 	if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) {
1277 		return FAILURE;
1278 	}
1279 
1280 	if (MBSTRG(detect_order_list)) {
1281 		pefree(MBSTRG(detect_order_list), 1);
1282 	}
1283 	MBSTRG(detect_order_list) = list;
1284 	MBSTRG(detect_order_list_size) = size;
1285 	return SUCCESS;
1286 }
1287 /* }}} */
1288 
1289 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
PHP_INI_MH(OnUpdate_mbstring_http_input)1290 static PHP_INI_MH(OnUpdate_mbstring_http_input)
1291 {
1292 	const mbfl_encoding **list;
1293 	size_t size;
1294 
1295 	if (!new_value || !ZSTR_VAL(new_value)) {
1296 		if (MBSTRG(http_input_list)) {
1297 			pefree(MBSTRG(http_input_list), 1);
1298 		}
1299 		if (SUCCESS == php_mb_parse_encoding_list(get_input_encoding(), strlen(get_input_encoding())+1, &list, &size, 1)) {
1300 			MBSTRG(http_input_list) = list;
1301 			MBSTRG(http_input_list_size) = size;
1302 			return SUCCESS;
1303 		}
1304 		MBSTRG(http_input_list) = NULL;
1305 		MBSTRG(http_input_list_size) = 0;
1306 		return SUCCESS;
1307 	}
1308 
1309 	if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) {
1310 		return FAILURE;
1311 	}
1312 
1313 	if (MBSTRG(http_input_list)) {
1314 		pefree(MBSTRG(http_input_list), 1);
1315 	}
1316 	MBSTRG(http_input_list) = list;
1317 	MBSTRG(http_input_list_size) = size;
1318 
1319 	if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1320 		php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_input is deprecated");
1321 	}
1322 
1323 	return SUCCESS;
1324 }
1325 /* }}} */
1326 
1327 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
PHP_INI_MH(OnUpdate_mbstring_http_output)1328 static PHP_INI_MH(OnUpdate_mbstring_http_output)
1329 {
1330 	const mbfl_encoding *encoding;
1331 
1332 	if (new_value == NULL || ZSTR_LEN(new_value) == 0) {
1333 		encoding = mbfl_name2encoding(get_output_encoding());
1334 		if (!encoding) {
1335 			MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1336 			MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1337 			return SUCCESS;
1338 		}
1339 	} else {
1340 		encoding = mbfl_name2encoding(ZSTR_VAL(new_value));
1341 		if (!encoding) {
1342 			MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1343 			MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1344 			return FAILURE;
1345 		}
1346 	}
1347 	MBSTRG(http_output_encoding) = encoding;
1348 	MBSTRG(current_http_output_encoding) = encoding;
1349 
1350 	if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1351 		php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_output is deprecated");
1352 	}
1353 
1354 	return SUCCESS;
1355 }
1356 /* }}} */
1357 
1358 /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
_php_mb_ini_mbstring_internal_encoding_set(const char * new_value,size_t new_value_length)1359 int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, size_t new_value_length)
1360 {
1361 	const mbfl_encoding *encoding;
1362 
1363 	if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) {
1364 		/* falls back to UTF-8 if an unknown encoding name is given */
1365 		encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
1366 	}
1367 	MBSTRG(internal_encoding) = encoding;
1368 	MBSTRG(current_internal_encoding) = encoding;
1369 #if HAVE_MBREGEX
1370 	{
1371 		const char *enc_name = new_value;
1372 		if (FAILURE == php_mb_regex_set_default_mbctype(enc_name)) {
1373 			/* falls back to UTF-8 if an unknown encoding name is given */
1374 			enc_name = "UTF-8";
1375 			php_mb_regex_set_default_mbctype(enc_name);
1376 		}
1377 		php_mb_regex_set_mbctype(new_value);
1378 	}
1379 #endif
1380 	return SUCCESS;
1381 }
1382 /* }}} */
1383 
1384 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
PHP_INI_MH(OnUpdate_mbstring_internal_encoding)1385 static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1386 {
1387 	if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1388 		php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
1389 	}
1390 
1391 	if (OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage) == FAILURE) {
1392 		return FAILURE;
1393 	}
1394 
1395 	if (stage & (PHP_INI_STAGE_STARTUP | PHP_INI_STAGE_SHUTDOWN | PHP_INI_STAGE_RUNTIME)) {
1396 		if (new_value && ZSTR_LEN(new_value)) {
1397 			return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
1398 		} else {
1399 			return _php_mb_ini_mbstring_internal_encoding_set(get_internal_encoding(), strlen(get_internal_encoding())+1);
1400 		}
1401 	} else {
1402 		/* the corresponding mbstring globals needs to be set according to the
1403 		 * ini value in the later stage because it never falls back to the
1404 		 * default value if 1. no value for mbstring.internal_encoding is given,
1405 		 * 2. mbstring.language directive is processed in per-dir or runtime
1406 		 * context and 3. call to the handler for mbstring.language is done
1407 		 * after mbstring.internal_encoding is handled. */
1408 		return SUCCESS;
1409 	}
1410 }
1411 /* }}} */
1412 
1413 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
PHP_INI_MH(OnUpdate_mbstring_substitute_character)1414 static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1415 {
1416 	int c;
1417 	char *endptr = NULL;
1418 
1419 	if (new_value != NULL) {
1420 		if (strcasecmp("none", ZSTR_VAL(new_value)) == 0) {
1421 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1422 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1423 		} else if (strcasecmp("long", ZSTR_VAL(new_value)) == 0) {
1424 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1425 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1426 		} else if (strcasecmp("entity", ZSTR_VAL(new_value)) == 0) {
1427 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1428 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1429 		} else {
1430 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1431 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1432 			if (ZSTR_LEN(new_value) > 0) {
1433 				c = strtol(ZSTR_VAL(new_value), &endptr, 0);
1434 				if (*endptr == '\0') {
1435 					MBSTRG(filter_illegal_substchar) = c;
1436 					MBSTRG(current_filter_illegal_substchar) = c;
1437 				}
1438 			}
1439 		}
1440 	} else {
1441 		MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1442 		MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1443 		MBSTRG(filter_illegal_substchar) = 0x3f;	/* '?' */
1444 		MBSTRG(current_filter_illegal_substchar) = 0x3f;	/* '?' */
1445 	}
1446 
1447 	return SUCCESS;
1448 }
1449 /* }}} */
1450 
1451 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
PHP_INI_MH(OnUpdate_mbstring_encoding_translation)1452 static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1453 {
1454 	if (new_value == NULL) {
1455 		return FAILURE;
1456 	}
1457 
1458 	OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
1459 
1460 	if (MBSTRG(encoding_translation)) {
1461 		sapi_unregister_post_entry(php_post_entries);
1462 		sapi_register_post_entries(mbstr_post_entries);
1463 	} else {
1464 		sapi_unregister_post_entry(mbstr_post_entries);
1465 		sapi_register_post_entries(php_post_entries);
1466 	}
1467 
1468 	return SUCCESS;
1469 }
1470 /* }}} */
1471 
1472 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)1473 static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1474 {
1475 	zend_string *tmp;
1476 	void *re = NULL;
1477 
1478 	if (!new_value) {
1479 		new_value = entry->orig_value;
1480 	}
1481 	tmp = php_trim(new_value, NULL, 0, 3);
1482 
1483 	if (ZSTR_LEN(tmp) > 0) {
1484 		if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) {
1485 			zend_string_release_ex(tmp, 0);
1486 			return FAILURE;
1487 		}
1488 	}
1489 
1490 	if (MBSTRG(http_output_conv_mimetypes)) {
1491 		_php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1492 	}
1493 
1494 	MBSTRG(http_output_conv_mimetypes) = re;
1495 
1496 	zend_string_release_ex(tmp, 0);
1497 	return SUCCESS;
1498 }
1499 /* }}} */
1500 /* }}} */
1501 
1502 /* {{{ php.ini directive registration */
1503 PHP_INI_BEGIN()
1504 	PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1505 	PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1506 	PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
1507 	PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
1508 	STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
1509 	PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1510 	STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1511 	PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1512 
1513 	STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1514 		PHP_INI_SYSTEM | PHP_INI_PERDIR,
1515 		OnUpdate_mbstring_encoding_translation,
1516 		encoding_translation, zend_mbstring_globals, mbstring_globals)
1517 	PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1518 		"^(text/|application/xhtml\\+xml)",
1519 		PHP_INI_ALL,
1520 		OnUpdate_mbstring_http_output_conv_mimetypes)
1521 
1522 	STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1523 		PHP_INI_ALL,
1524 		OnUpdateBool,
1525 		strict_detection, zend_mbstring_globals, mbstring_globals)
1526 #if HAVE_MBREGEX
1527 	STD_PHP_INI_ENTRY("mbstring.regex_stack_limit", "100000",PHP_INI_ALL, OnUpdateLong, regex_stack_limit, zend_mbstring_globals, mbstring_globals)
1528 #endif
PHP_INI_END()1529 PHP_INI_END()
1530 /* }}} */
1531 
1532 /* {{{ module global initialize handler */
1533 static PHP_GINIT_FUNCTION(mbstring)
1534 {
1535 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1536 ZEND_TSRMLS_CACHE_UPDATE();
1537 #endif
1538 
1539 	mbstring_globals->language = mbfl_no_language_uni;
1540 	mbstring_globals->internal_encoding = NULL;
1541 	mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1542 	mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
1543 	mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
1544 	mbstring_globals->http_input_identify = NULL;
1545 	mbstring_globals->http_input_identify_get = NULL;
1546 	mbstring_globals->http_input_identify_post = NULL;
1547 	mbstring_globals->http_input_identify_cookie = NULL;
1548 	mbstring_globals->http_input_identify_string = NULL;
1549 	mbstring_globals->http_input_list = NULL;
1550 	mbstring_globals->http_input_list_size = 0;
1551 	mbstring_globals->detect_order_list = NULL;
1552 	mbstring_globals->detect_order_list_size = 0;
1553 	mbstring_globals->current_detect_order_list = NULL;
1554 	mbstring_globals->current_detect_order_list_size = 0;
1555 	mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1556 	mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1557 	mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1558 	mbstring_globals->filter_illegal_substchar = 0x3f;	/* '?' */
1559 	mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1560 	mbstring_globals->current_filter_illegal_substchar = 0x3f;	/* '?' */
1561 	mbstring_globals->illegalchars = 0;
1562 	mbstring_globals->func_overload = 0;
1563 	mbstring_globals->encoding_translation = 0;
1564 	mbstring_globals->strict_detection = 0;
1565 	mbstring_globals->outconv = NULL;
1566 	mbstring_globals->http_output_conv_mimetypes = NULL;
1567 #if HAVE_MBREGEX
1568 	mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc();
1569 #endif
1570 	mbstring_globals->last_used_encoding_name = NULL;
1571 	mbstring_globals->last_used_encoding = NULL;
1572 }
1573 /* }}} */
1574 
1575 /* {{{ PHP_GSHUTDOWN_FUNCTION */
PHP_GSHUTDOWN_FUNCTION(mbstring)1576 static PHP_GSHUTDOWN_FUNCTION(mbstring)
1577 {
1578 	if (mbstring_globals->http_input_list) {
1579 		free(mbstring_globals->http_input_list);
1580 	}
1581 	if (mbstring_globals->detect_order_list) {
1582 		free(mbstring_globals->detect_order_list);
1583 	}
1584 	if (mbstring_globals->http_output_conv_mimetypes) {
1585 		_php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1586 	}
1587 #if HAVE_MBREGEX
1588 	php_mb_regex_globals_free(mbstring_globals->mb_regex_globals);
1589 #endif
1590 }
1591 /* }}} */
1592 
1593 /* {{{ PHP_MINIT_FUNCTION(mbstring) */
PHP_MINIT_FUNCTION(mbstring)1594 PHP_MINIT_FUNCTION(mbstring)
1595 {
1596 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1597 ZEND_TSRMLS_CACHE_UPDATE();
1598 #endif
1599 	__mbfl_allocators = (mbfl_allocators*)&_php_mb_allocators;
1600 
1601 	REGISTER_INI_ENTRIES();
1602 
1603 	/* This is a global handler. Should not be set in a per-request handler. */
1604 	sapi_register_treat_data(mbstr_treat_data);
1605 
1606 	/* Post handlers are stored in the thread-local context. */
1607 	if (MBSTRG(encoding_translation)) {
1608 		sapi_register_post_entries(mbstr_post_entries);
1609 	}
1610 
1611 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1612 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1613 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1614 
1615 	REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1616 	REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1617 	REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1618 	REGISTER_LONG_CONSTANT("MB_CASE_FOLD", PHP_UNICODE_CASE_FOLD, CONST_CS | CONST_PERSISTENT);
1619 	REGISTER_LONG_CONSTANT("MB_CASE_UPPER_SIMPLE", PHP_UNICODE_CASE_UPPER_SIMPLE, CONST_CS | CONST_PERSISTENT);
1620 	REGISTER_LONG_CONSTANT("MB_CASE_LOWER_SIMPLE", PHP_UNICODE_CASE_LOWER_SIMPLE, CONST_CS | CONST_PERSISTENT);
1621 	REGISTER_LONG_CONSTANT("MB_CASE_TITLE_SIMPLE", PHP_UNICODE_CASE_TITLE_SIMPLE, CONST_CS | CONST_PERSISTENT);
1622 	REGISTER_LONG_CONSTANT("MB_CASE_FOLD_SIMPLE", PHP_UNICODE_CASE_FOLD_SIMPLE, CONST_CS | CONST_PERSISTENT);
1623 
1624 #if HAVE_MBREGEX
1625 	PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1626 #endif
1627 
1628 	if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions)) {
1629 		return FAILURE;
1630 	}
1631 
1632 	php_rfc1867_set_multibyte_callbacks(
1633 		php_mb_encoding_translation,
1634 		php_mb_gpc_get_detect_order,
1635 		php_mb_gpc_set_input_encoding,
1636 		php_mb_rfc1867_getword,
1637 		php_mb_rfc1867_getword_conf,
1638 		php_mb_rfc1867_basename);
1639 
1640 	/* override original function (deprecated). */
1641 	if (MBSTRG(func_overload)){
1642 		zend_function *func, *orig;
1643 		const struct mb_overload_def *p;
1644 		zend_string *str;
1645 
1646 		p = &(mb_ovld[0]);
1647 		while (p->type > 0) {
1648 			if ((MBSTRG(func_overload) & p->type) == p->type &&
1649 				!zend_hash_str_exists(CG(function_table), p->save_func, strlen(p->save_func))
1650 			) {
1651 				func = zend_hash_str_find_ptr(CG(function_table), p->ovld_func, strlen(p->ovld_func));
1652 
1653 				if ((orig = zend_hash_str_find_ptr(CG(function_table), p->orig_func, strlen(p->orig_func))) == NULL) {
1654 					php_error_docref("ref.mbstring", E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1655 					return FAILURE;
1656 				} else {
1657 					ZEND_ASSERT(orig->type == ZEND_INTERNAL_FUNCTION);
1658 					str = zend_string_init_interned(p->save_func, strlen(p->save_func), 1);
1659 					zend_hash_add_mem(CG(function_table), str, orig, sizeof(zend_internal_function));
1660 					zend_string_release_ex(str, 1);
1661 					function_add_ref(orig);
1662 
1663 					str = zend_string_init_interned(p->orig_func, strlen(p->orig_func), 1);
1664 					zend_hash_update_mem(CG(function_table), str, func, sizeof(zend_internal_function));
1665 					zend_string_release_ex(str, 1);
1666 					function_add_ref(func);
1667 				}
1668 			}
1669 			p++;
1670 		}
1671 	}
1672 
1673 	return SUCCESS;
1674 }
1675 /* }}} */
1676 
1677 /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
PHP_MSHUTDOWN_FUNCTION(mbstring)1678 PHP_MSHUTDOWN_FUNCTION(mbstring)
1679 {
1680 	/*  clear overloaded function. */
1681 	if (MBSTRG(func_overload)){
1682 		const struct mb_overload_def *p;
1683 		zend_function *orig;
1684 
1685 		p = &(mb_ovld[0]);
1686 		while (p->type > 0) {
1687 			if ((MBSTRG(func_overload) & p->type) == p->type &&
1688 				(orig = zend_hash_str_find_ptr(CG(function_table), p->save_func, strlen(p->save_func)))) {
1689 
1690 				zend_hash_str_update_mem(CG(function_table), p->orig_func, strlen(p->orig_func), orig, sizeof(zend_internal_function));
1691 				function_add_ref(orig);
1692 				zend_hash_str_del(CG(function_table), p->save_func, strlen(p->save_func));
1693 			}
1694 			p++;
1695 		}
1696 	}
1697 
1698 	UNREGISTER_INI_ENTRIES();
1699 
1700 	zend_multibyte_restore_functions();
1701 
1702 #if HAVE_MBREGEX
1703 	PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1704 #endif
1705 
1706 	return SUCCESS;
1707 }
1708 /* }}} */
1709 
1710 /* {{{ PHP_RINIT_FUNCTION(mbstring) */
PHP_RINIT_FUNCTION(mbstring)1711 PHP_RINIT_FUNCTION(mbstring)
1712 {
1713 	MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1714 	MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1715 	MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1716 	MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1717 
1718 	MBSTRG(illegalchars) = 0;
1719 
1720 	php_mb_populate_current_detect_order_list();
1721 
1722 	/* override original function. */
1723 	if (MBSTRG(func_overload)){
1724 		zend_error(E_DEPRECATED, "The mbstring.func_overload directive is deprecated");
1725 
1726 		CG(compiler_options) |= ZEND_COMPILE_NO_BUILTIN_STRLEN;
1727 	}
1728 #if HAVE_MBREGEX
1729 	PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1730 #endif
1731 	zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding));
1732 
1733 	return SUCCESS;
1734 }
1735 /* }}} */
1736 
1737 /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
PHP_RSHUTDOWN_FUNCTION(mbstring)1738 PHP_RSHUTDOWN_FUNCTION(mbstring)
1739 {
1740 	if (MBSTRG(current_detect_order_list) != NULL) {
1741 		efree(MBSTRG(current_detect_order_list));
1742 		MBSTRG(current_detect_order_list) = NULL;
1743 		MBSTRG(current_detect_order_list_size) = 0;
1744 	}
1745 	if (MBSTRG(outconv) != NULL) {
1746 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1747 		mbfl_buffer_converter_delete(MBSTRG(outconv));
1748 		MBSTRG(outconv) = NULL;
1749 	}
1750 
1751 	/* clear http input identification. */
1752 	MBSTRG(http_input_identify) = NULL;
1753 	MBSTRG(http_input_identify_post) = NULL;
1754 	MBSTRG(http_input_identify_get) = NULL;
1755 	MBSTRG(http_input_identify_cookie) = NULL;
1756 	MBSTRG(http_input_identify_string) = NULL;
1757 
1758 	if (MBSTRG(last_used_encoding_name)) {
1759 		efree(MBSTRG(last_used_encoding_name));
1760 		MBSTRG(last_used_encoding_name) = NULL;
1761 	}
1762 
1763 #if HAVE_MBREGEX
1764 	PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1765 #endif
1766 
1767 	return SUCCESS;
1768 }
1769 /* }}} */
1770 
1771 /* {{{ PHP_MINFO_FUNCTION(mbstring) */
PHP_MINFO_FUNCTION(mbstring)1772 PHP_MINFO_FUNCTION(mbstring)
1773 {
1774 	php_info_print_table_start();
1775 	php_info_print_table_row(2, "Multibyte Support", "enabled");
1776 	php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1777 	php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1778 	{
1779 		char tmp[256];
1780 		snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1781 		php_info_print_table_row(2, "libmbfl version", tmp);
1782 	}
1783 #if HAVE_ONIG
1784 	{
1785 		char tmp[256];
1786 		snprintf(tmp, sizeof(tmp), "%d.%d.%d", ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY);
1787 		php_info_print_table_row(2, "oniguruma version", tmp);
1788 	}
1789 #endif
1790 	php_info_print_table_end();
1791 
1792 	php_info_print_table_start();
1793 	php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1794 	php_info_print_table_end();
1795 
1796 #if HAVE_MBREGEX
1797 	PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1798 #endif
1799 
1800 	DISPLAY_INI_ENTRIES();
1801 }
1802 /* }}} */
1803 
1804 /* {{{ proto string mb_language([string language])
1805    Sets the current language or Returns the current language as a string */
PHP_FUNCTION(mb_language)1806 PHP_FUNCTION(mb_language)
1807 {
1808 	zend_string *name = NULL;
1809 
1810 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|S", &name) == FAILURE) {
1811 		return;
1812 	}
1813 	if (name == NULL) {
1814 		RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)));
1815 	} else {
1816 		zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0);
1817 		if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1818 			php_error_docref(NULL, E_WARNING, "Unknown language \"%s\"", ZSTR_VAL(name));
1819 			RETVAL_FALSE;
1820 		} else {
1821 			RETVAL_TRUE;
1822 		}
1823 		zend_string_release_ex(ini_name, 0);
1824 	}
1825 }
1826 /* }}} */
1827 
1828 /* {{{ proto string mb_internal_encoding([string encoding])
1829    Sets the current internal encoding or Returns the current internal encoding as a string */
PHP_FUNCTION(mb_internal_encoding)1830 PHP_FUNCTION(mb_internal_encoding)
1831 {
1832 	const char *name = NULL;
1833 	size_t name_len;
1834 	const mbfl_encoding *encoding;
1835 
1836 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
1837 		return;
1838 	}
1839 	if (name == NULL) {
1840 		name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
1841 		if (name != NULL) {
1842 			RETURN_STRING(name);
1843 		} else {
1844 			RETURN_FALSE;
1845 		}
1846 	} else {
1847 		encoding = mbfl_name2encoding(name);
1848 		if (!encoding) {
1849 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
1850 			RETURN_FALSE;
1851 		} else {
1852 			MBSTRG(current_internal_encoding) = encoding;
1853 			RETURN_TRUE;
1854 		}
1855 	}
1856 }
1857 /* }}} */
1858 
1859 /* {{{ proto mixed mb_http_input([string type])
1860    Returns the input encoding */
PHP_FUNCTION(mb_http_input)1861 PHP_FUNCTION(mb_http_input)
1862 {
1863 	char *typ = NULL;
1864 	size_t typ_len;
1865 	int retname;
1866 	char *list, *temp;
1867 	const mbfl_encoding *result = NULL;
1868 
1869 	retname = 1;
1870  	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
1871 		return;
1872  	}
1873  	if (typ == NULL) {
1874  		result = MBSTRG(http_input_identify);
1875  	} else {
1876  		switch (*typ) {
1877 		case 'G':
1878 		case 'g':
1879 			result = MBSTRG(http_input_identify_get);
1880 			break;
1881 		case 'P':
1882 		case 'p':
1883 			result = MBSTRG(http_input_identify_post);
1884 			break;
1885 		case 'C':
1886 		case 'c':
1887 			result = MBSTRG(http_input_identify_cookie);
1888 			break;
1889 		case 'S':
1890 		case 's':
1891 			result = MBSTRG(http_input_identify_string);
1892 			break;
1893 		case 'I':
1894 		case 'i':
1895 			{
1896 				const mbfl_encoding **entry = MBSTRG(http_input_list);
1897 				const size_t n = MBSTRG(http_input_list_size);
1898 				size_t i;
1899 				array_init(return_value);
1900 				for (i = 0; i < n; i++) {
1901 					add_next_index_string(return_value, (*entry)->name);
1902 					entry++;
1903 				}
1904 				retname = 0;
1905 			}
1906 			break;
1907 		case 'L':
1908 		case 'l':
1909 			{
1910 				const mbfl_encoding **entry = MBSTRG(http_input_list);
1911 				const size_t n = MBSTRG(http_input_list_size);
1912 				size_t i;
1913 				list = NULL;
1914 				for (i = 0; i < n; i++) {
1915 					if (list) {
1916 						temp = list;
1917 						spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
1918 						efree(temp);
1919 						if (!list) {
1920 							break;
1921 						}
1922 					} else {
1923 						list = estrdup((*entry)->name);
1924 					}
1925 					entry++;
1926 				}
1927 			}
1928 			if (!list) {
1929 				RETURN_FALSE;
1930 			}
1931 			RETVAL_STRING(list);
1932 			efree(list);
1933 			retname = 0;
1934 			break;
1935 		default:
1936 			result = MBSTRG(http_input_identify);
1937 			break;
1938 		}
1939 	}
1940 
1941 	if (retname) {
1942 		if (result) {
1943 			RETVAL_STRING(result->name);
1944 		} else {
1945 			RETVAL_FALSE;
1946 		}
1947 	}
1948 }
1949 /* }}} */
1950 
1951 /* {{{ proto string mb_http_output([string encoding])
1952    Sets the current output_encoding or returns the current output_encoding as a string */
PHP_FUNCTION(mb_http_output)1953 PHP_FUNCTION(mb_http_output)
1954 {
1955 	const char *name = NULL;
1956 	size_t name_len;
1957 	const mbfl_encoding *encoding;
1958 
1959 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
1960 		return;
1961 	}
1962 
1963 	if (name == NULL) {
1964 		name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
1965 		if (name != NULL) {
1966 			RETURN_STRING(name);
1967 		} else {
1968 			RETURN_FALSE;
1969 		}
1970 	} else {
1971 		encoding = mbfl_name2encoding(name);
1972 		if (!encoding) {
1973 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
1974 			RETURN_FALSE;
1975 		} else {
1976 			MBSTRG(current_http_output_encoding) = encoding;
1977 			RETURN_TRUE;
1978 		}
1979 	}
1980 }
1981 /* }}} */
1982 
1983 /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1984    Sets the current detect_order or Return the current detect_order as a array */
PHP_FUNCTION(mb_detect_order)1985 PHP_FUNCTION(mb_detect_order)
1986 {
1987 	zval *arg1 = NULL;
1988 
1989 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
1990 		return;
1991 	}
1992 
1993 	if (!arg1) {
1994 		size_t i;
1995 		size_t n = MBSTRG(current_detect_order_list_size);
1996 		const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1997 		array_init(return_value);
1998 		for (i = 0; i < n; i++) {
1999 			add_next_index_string(return_value, (*entry)->name);
2000 			entry++;
2001 		}
2002 	} else {
2003 		const mbfl_encoding **list = NULL;
2004 		size_t size = 0;
2005 		switch (Z_TYPE_P(arg1)) {
2006 			case IS_ARRAY:
2007 				if (FAILURE == php_mb_parse_encoding_array(arg1, &list, &size, 0)) {
2008 					if (list) {
2009 						efree(list);
2010 					}
2011 					RETURN_FALSE;
2012 				}
2013 				break;
2014 			default:
2015 				convert_to_string_ex(arg1);
2016 				if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(arg1), Z_STRLEN_P(arg1), &list, &size, 0)) {
2017 					if (list) {
2018 						efree(list);
2019 					}
2020 					RETURN_FALSE;
2021 				}
2022 				break;
2023 		}
2024 
2025 		if (list == NULL) {
2026 			RETURN_FALSE;
2027 		}
2028 
2029 		if (MBSTRG(current_detect_order_list)) {
2030 			efree(MBSTRG(current_detect_order_list));
2031 		}
2032 		MBSTRG(current_detect_order_list) = list;
2033 		MBSTRG(current_detect_order_list_size) = size;
2034 		RETURN_TRUE;
2035 	}
2036 }
2037 /* }}} */
2038 
php_mb_check_code_point(zend_long cp)2039 static inline int php_mb_check_code_point(zend_long cp)
2040 {
2041 	if (cp <= 0 || cp >= 0x110000) {
2042 		/* Out of Unicode range */
2043 		return 0;
2044 	}
2045 
2046 	if (cp >= 0xd800 && cp <= 0xdfff) {
2047 		/* Surrogate code-point. These are never valid on their own and we only allow a single
2048 		 * substitute character. */
2049 		return 0;
2050 	}
2051 
2052 	/* As the we do not know the target encoding of the conversion operation that is going to
2053 	 * use the substitution character, we cannot check whether the codepoint is actually mapped
2054 	 * in the given encoding at this point. Thus we have to accept everything. */
2055 	return 1;
2056 }
2057 
2058 /* {{{ proto mixed mb_substitute_character([mixed substchar])
2059    Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)2060 PHP_FUNCTION(mb_substitute_character)
2061 {
2062 	zval *arg1 = NULL;
2063 
2064 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
2065 		return;
2066 	}
2067 
2068 	if (!arg1) {
2069 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
2070 			RETURN_STRING("none");
2071 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
2072 			RETURN_STRING("long");
2073 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
2074 			RETURN_STRING("entity");
2075 		} else {
2076 			RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
2077 		}
2078 	} else {
2079 		RETVAL_TRUE;
2080 
2081 		switch (Z_TYPE_P(arg1)) {
2082 			case IS_STRING:
2083 				if (strncasecmp("none", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2084 					MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
2085 				} else if (strncasecmp("long", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2086 					MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
2087 				} else if (strncasecmp("entity", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2088 					MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
2089 				} else {
2090 					convert_to_long_ex(arg1);
2091 
2092 					if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
2093 						MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2094 						MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2095 					} else {
2096 						php_error_docref(NULL, E_WARNING, "Unknown character");
2097 						RETURN_FALSE;
2098 					}
2099 				}
2100 				break;
2101 			default:
2102 				convert_to_long_ex(arg1);
2103 				if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
2104 					MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2105 					MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2106 				} else {
2107 					php_error_docref(NULL, E_WARNING, "Unknown character");
2108 					RETURN_FALSE;
2109 				}
2110 				break;
2111 		}
2112 	}
2113 }
2114 /* }}} */
2115 
2116 /* {{{ proto string mb_preferred_mime_name(string encoding)
2117    Return the preferred MIME name (charset) as a string */
PHP_FUNCTION(mb_preferred_mime_name)2118 PHP_FUNCTION(mb_preferred_mime_name)
2119 {
2120 	enum mbfl_no_encoding no_encoding;
2121 	char *name = NULL;
2122 	size_t name_len;
2123 
2124 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
2125 		return;
2126 	} else {
2127 		no_encoding = mbfl_name2no_encoding(name);
2128 		if (no_encoding == mbfl_no_encoding_invalid) {
2129 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
2130 			RETVAL_FALSE;
2131 		} else {
2132 			const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
2133 			if (preferred_name == NULL || *preferred_name == '\0') {
2134 				php_error_docref(NULL, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
2135 				RETVAL_FALSE;
2136 			} else {
2137 				RETVAL_STRING((char *)preferred_name);
2138 			}
2139 		}
2140 	}
2141 }
2142 /* }}} */
2143 
2144 #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
2145 #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
2146 
2147 /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
2148    Parses GET/POST/COOKIE data and sets global variables */
PHP_FUNCTION(mb_parse_str)2149 PHP_FUNCTION(mb_parse_str)
2150 {
2151 	zval *track_vars_array = NULL;
2152 	char *encstr = NULL;
2153 	size_t encstr_len;
2154 	php_mb_encoding_handler_info_t info;
2155 	const mbfl_encoding *detected;
2156 
2157 	track_vars_array = NULL;
2158 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z/", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
2159 		return;
2160 	}
2161 
2162 	if (track_vars_array != NULL) {
2163 		/* Clear out the array */
2164 		zval_ptr_dtor(track_vars_array);
2165 		array_init(track_vars_array);
2166 	}
2167 
2168 	encstr = estrndup(encstr, encstr_len);
2169 
2170 	info.data_type              = PARSE_STRING;
2171 	info.separator              = PG(arg_separator).input;
2172 	info.report_errors          = 1;
2173 	info.to_encoding            = MBSTRG(current_internal_encoding);
2174 	info.to_language            = MBSTRG(language);
2175 	info.from_encodings         = MBSTRG(http_input_list);
2176 	info.num_from_encodings     = MBSTRG(http_input_list_size);
2177 	info.from_language          = MBSTRG(language);
2178 
2179 	if (track_vars_array != NULL) {
2180 		detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr);
2181 	} else {
2182 		zval tmp;
2183 		zend_array *symbol_table;
2184 		if (zend_forbid_dynamic_call("mb_parse_str() with a single argument") == FAILURE) {
2185 			efree(encstr);
2186 			return;
2187 		}
2188 
2189 		php_error_docref(NULL, E_DEPRECATED, "Calling mb_parse_str() without the result argument is deprecated");
2190 
2191 		symbol_table = zend_rebuild_symbol_table();
2192 		ZVAL_ARR(&tmp, symbol_table);
2193 		detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr);
2194 	}
2195 
2196 	MBSTRG(http_input_identify) = detected;
2197 
2198 	RETVAL_BOOL(detected);
2199 
2200 	if (encstr != NULL) efree(encstr);
2201 }
2202 /* }}} */
2203 
2204 /* {{{ proto string mb_output_handler(string contents, int status)
2205    Returns string in output buffer converted to the http_output encoding */
PHP_FUNCTION(mb_output_handler)2206 PHP_FUNCTION(mb_output_handler)
2207 {
2208 	char *arg_string;
2209 	size_t arg_string_len;
2210 	zend_long arg_status;
2211 	mbfl_string string, result;
2212 	const char *charset;
2213 	char *p;
2214 	const mbfl_encoding *encoding;
2215 	int last_feed;
2216 	size_t len;
2217 	unsigned char send_text_mimetype = 0;
2218 	char *s, *mimetype = NULL;
2219 
2220 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
2221 		return;
2222 	}
2223 
2224 	encoding = MBSTRG(current_http_output_encoding);
2225 
2226  	/* start phase only */
2227  	if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
2228  		/* delete the converter just in case. */
2229  		if (MBSTRG(outconv)) {
2230 			MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2231  			mbfl_buffer_converter_delete(MBSTRG(outconv));
2232  			MBSTRG(outconv) = NULL;
2233   		}
2234 		if (encoding == &mbfl_encoding_pass) {
2235 			RETURN_STRINGL(arg_string, arg_string_len);
2236 		}
2237 
2238 		/* analyze mime type */
2239 		if (SG(sapi_headers).mimetype &&
2240 			_php_mb_match_regex(
2241 				MBSTRG(http_output_conv_mimetypes),
2242 				SG(sapi_headers).mimetype,
2243 				strlen(SG(sapi_headers).mimetype))) {
2244 			if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
2245 				mimetype = estrdup(SG(sapi_headers).mimetype);
2246 			} else {
2247 				mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
2248 			}
2249 			send_text_mimetype = 1;
2250 		} else if (SG(sapi_headers).send_default_content_type) {
2251 			mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
2252 		}
2253 
2254  		/* if content-type is not yet set, set it and activate the converter */
2255  		if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
2256 			charset = encoding->mime_name;
2257 			if (charset) {
2258 				len = spprintf( &p, 0, "Content-Type: %s; charset=%s",  mimetype, charset );
2259 				if (sapi_add_header(p, len, 0) != FAILURE) {
2260 					SG(sapi_headers).send_default_content_type = 0;
2261 				}
2262 			}
2263  			/* activate the converter */
2264  			MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
2265 			if (send_text_mimetype){
2266 				efree(mimetype);
2267 			}
2268  		}
2269   	}
2270 
2271  	/* just return if the converter is not activated. */
2272  	if (MBSTRG(outconv) == NULL) {
2273 		RETURN_STRINGL(arg_string, arg_string_len);
2274 	}
2275 
2276  	/* flag */
2277  	last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
2278  	/* mode */
2279  	mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
2280  	mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
2281 
2282  	/* feed the string */
2283  	mbfl_string_init(&string);
2284 	/* these are not needed. convd has encoding info.
2285 	string.no_language = MBSTRG(language);
2286 	string.encoding = MBSTRG(current_internal_encoding);
2287 	*/
2288  	string.val = (unsigned char *)arg_string;
2289  	string.len = arg_string_len;
2290  	mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2291  	if (last_feed) {
2292  		mbfl_buffer_converter_flush(MBSTRG(outconv));
2293 	}
2294  	/* get the converter output, and return it */
2295  	mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2296 	// TODO: avoid reallocation ???
2297  	RETVAL_STRINGL((char *)result.val, result.len);		/* the string is already strdup()'ed */
2298 	efree(result.val);
2299 
2300  	/* delete the converter if it is the last feed. */
2301  	if (last_feed) {
2302 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2303 		mbfl_buffer_converter_delete(MBSTRG(outconv));
2304 		MBSTRG(outconv) = NULL;
2305 	}
2306 }
2307 /* }}} */
2308 
2309 /* {{{ proto int mb_strlen(string str [, string encoding])
2310    Get character numbers of a string */
PHP_FUNCTION(mb_strlen)2311 PHP_FUNCTION(mb_strlen)
2312 {
2313 	size_t n;
2314 	mbfl_string string;
2315 	char *str, *enc_name = NULL;
2316 	size_t str_len, enc_name_len;
2317 
2318 	mbfl_string_init(&string);
2319 
2320 	ZEND_PARSE_PARAMETERS_START(1, 2)
2321 		Z_PARAM_STRING(str, str_len)
2322 		Z_PARAM_OPTIONAL
2323 		Z_PARAM_STRING(enc_name, enc_name_len)
2324 	ZEND_PARSE_PARAMETERS_END();
2325 
2326 	string.val = (unsigned char *) str;
2327 	string.len = str_len;
2328 	string.no_language = MBSTRG(language);
2329 	string.encoding = php_mb_get_encoding(enc_name);
2330 	if (!string.encoding) {
2331 		RETURN_FALSE;
2332 	}
2333 
2334 	n = mbfl_strlen(&string);
2335 	if (!mbfl_is_error(n)) {
2336 		RETVAL_LONG(n);
2337 	} else {
2338 		RETVAL_FALSE;
2339 	}
2340 }
2341 /* }}} */
2342 
2343 /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2344    Find position of first occurrence of a string within another */
PHP_FUNCTION(mb_strpos)2345 PHP_FUNCTION(mb_strpos)
2346 {
2347 	int reverse = 0;
2348 	zend_long offset = 0;
2349 	mbfl_string haystack, needle;
2350 	char *enc_name = NULL;
2351 	size_t enc_name_len, n;
2352 
2353 	mbfl_string_init(&haystack);
2354 	mbfl_string_init(&needle);
2355 
2356 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2357 		return;
2358 	}
2359 
2360 	haystack.no_language = needle.no_language = MBSTRG(language);
2361 	haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2362 	if (!haystack.encoding) {
2363 		RETURN_FALSE;
2364 	}
2365 
2366 	if (offset != 0) {
2367 		size_t slen = mbfl_strlen(&haystack);
2368 		if (offset < 0) {
2369 			offset += slen;
2370 		}
2371 		if (offset < 0 || offset > slen) {
2372 			php_error_docref(NULL, E_WARNING, "Offset not contained in string");
2373 			RETURN_FALSE;
2374 		}
2375 	}
2376 
2377 	if (needle.len == 0) {
2378 		php_error_docref(NULL, E_WARNING, "Empty delimiter");
2379 		RETURN_FALSE;
2380 	}
2381 
2382 	n = mbfl_strpos(&haystack, &needle, offset, reverse);
2383 	if (!mbfl_is_error(n)) {
2384 		RETVAL_LONG(n);
2385 	} else {
2386 		switch (-n) {
2387 		case 1:
2388 			break;
2389 		case 2:
2390 			php_error_docref(NULL, E_WARNING, "Needle has not positive length");
2391 			break;
2392 		case 4:
2393 			php_error_docref(NULL, E_WARNING, "Unknown encoding or conversion error");
2394 			break;
2395 		case 8:
2396 			php_error_docref(NULL, E_NOTICE, "Argument is empty");
2397 			break;
2398 		default:
2399 			php_error_docref(NULL, E_WARNING, "Unknown error in mb_strpos");
2400 			break;
2401 		}
2402 		RETVAL_FALSE;
2403 	}
2404 }
2405 /* }}} */
2406 
2407 /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2408    Find position of last occurrence of a string within another */
PHP_FUNCTION(mb_strrpos)2409 PHP_FUNCTION(mb_strrpos)
2410 {
2411 	mbfl_string haystack, needle;
2412 	char *enc_name = NULL;
2413 	size_t enc_name_len;
2414 	zval *zoffset = NULL;
2415 	zend_long offset = 0, str_flg, n;
2416 	char *enc_name2 = NULL;
2417 	size_t enc_name_len2;
2418 
2419 	mbfl_string_init(&haystack);
2420 	mbfl_string_init(&needle);
2421 
2422 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2423 		return;
2424 	}
2425 
2426 	if (zoffset) {
2427 		if (Z_TYPE_P(zoffset) == IS_STRING) {
2428 			enc_name2     = Z_STRVAL_P(zoffset);
2429 			enc_name_len2 = Z_STRLEN_P(zoffset);
2430 			str_flg       = 1;
2431 
2432 			if (enc_name2 != NULL) {
2433 				switch (*enc_name2) {
2434 					case '0':
2435 					case '1':
2436 					case '2':
2437 					case '3':
2438 					case '4':
2439 					case '5':
2440 					case '6':
2441 					case '7':
2442 					case '8':
2443 					case '9':
2444 					case ' ':
2445 					case '-':
2446 					case '.':
2447 						break;
2448 					default :
2449 						str_flg = 0;
2450 						break;
2451 				}
2452 			}
2453 
2454 			if (str_flg) {
2455 				convert_to_long_ex(zoffset);
2456 				offset   = Z_LVAL_P(zoffset);
2457 			} else {
2458 				enc_name     = enc_name2;
2459 				enc_name_len = enc_name_len2;
2460 			}
2461 		} else {
2462 			convert_to_long_ex(zoffset);
2463 			offset = Z_LVAL_P(zoffset);
2464 		}
2465 	}
2466 
2467 	haystack.no_language = needle.no_language = MBSTRG(language);
2468 	haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2469 	if (!haystack.encoding) {
2470 		RETURN_FALSE;
2471 	}
2472 
2473 	if (offset != 0) {
2474 		size_t haystack_char_len = mbfl_strlen(&haystack);
2475 		if ((offset > 0 && offset > haystack_char_len) ||
2476 			(offset < 0 && -offset > haystack_char_len)) {
2477 			php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
2478 			RETURN_FALSE;
2479 		}
2480 	}
2481 
2482 	n = mbfl_strpos(&haystack, &needle, offset, 1);
2483 	if (!mbfl_is_error(n)) {
2484 		RETVAL_LONG(n);
2485 	} else {
2486 		RETVAL_FALSE;
2487 	}
2488 }
2489 /* }}} */
2490 
2491 /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2492    Finds position of first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stripos)2493 PHP_FUNCTION(mb_stripos)
2494 {
2495 	size_t n = (size_t) -1;
2496 	zend_long offset = 0;
2497 	mbfl_string haystack, needle;
2498 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2499 	size_t from_encoding_len;
2500 
2501 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2502 		return;
2503 	}
2504 
2505 	if (needle.len == 0) {
2506 		php_error_docref(NULL, E_WARNING, "Empty delimiter");
2507 		RETURN_FALSE;
2508 	}
2509 
2510 	n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2511 
2512 	if (!mbfl_is_error(n)) {
2513 		RETVAL_LONG(n);
2514 	} else {
2515 		RETVAL_FALSE;
2516 	}
2517 }
2518 /* }}} */
2519 
2520 /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2521    Finds position of last occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_strripos)2522 PHP_FUNCTION(mb_strripos)
2523 {
2524 	size_t n = (size_t) -1;
2525 	zend_long offset = 0;
2526 	mbfl_string haystack, needle;
2527 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2528 	size_t from_encoding_len;
2529 
2530 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2531 		return;
2532 	}
2533 
2534 	n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2535 
2536 	if (!mbfl_is_error(n)) {
2537 		RETVAL_LONG(n);
2538 	} else {
2539 		RETVAL_FALSE;
2540 	}
2541 }
2542 /* }}} */
2543 
2544 /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2545    Finds first occurrence of a string within another */
PHP_FUNCTION(mb_strstr)2546 PHP_FUNCTION(mb_strstr)
2547 {
2548 	size_t n;
2549 	mbfl_string haystack, needle, result, *ret = NULL;
2550 	char *enc_name = NULL;
2551 	size_t enc_name_len;
2552 	zend_bool part = 0;
2553 
2554 	mbfl_string_init(&haystack);
2555 	mbfl_string_init(&needle);
2556 
2557 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2558 		return;
2559 	}
2560 
2561 	haystack.no_language = needle.no_language = MBSTRG(language);
2562 	haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2563 	if (!haystack.encoding) {
2564 		RETURN_FALSE;
2565 	}
2566 
2567 	if (needle.len == 0) {
2568 		php_error_docref(NULL, E_WARNING, "Empty delimiter");
2569 		RETURN_FALSE;
2570 	}
2571 
2572 	n = mbfl_strpos(&haystack, &needle, 0, 0);
2573 	if (!mbfl_is_error(n)) {
2574 		if (part) {
2575 			ret = mbfl_substr(&haystack, &result, 0, n);
2576 			if (ret != NULL) {
2577 				// TODO: avoid reallocation ???
2578 				RETVAL_STRINGL((char *)ret->val, ret->len);
2579 				efree(ret->val);
2580 			} else {
2581 				RETVAL_FALSE;
2582 			}
2583 		} else {
2584 			ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
2585 			if (ret != NULL) {
2586 				// TODO: avoid reallocation ???
2587 				RETVAL_STRINGL((char *)ret->val, ret->len);
2588 				efree(ret->val);
2589 			} else {
2590 				RETVAL_FALSE;
2591 			}
2592 		}
2593 	} else {
2594 		RETVAL_FALSE;
2595 	}
2596 }
2597 /* }}} */
2598 
2599 /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2600    Finds the last occurrence of a character in a string within another */
PHP_FUNCTION(mb_strrchr)2601 PHP_FUNCTION(mb_strrchr)
2602 {
2603 	size_t n;
2604 	mbfl_string haystack, needle, result, *ret = NULL;
2605 	char *enc_name = NULL;
2606 	size_t enc_name_len;
2607 	zend_bool part = 0;
2608 
2609 	mbfl_string_init(&haystack);
2610 	mbfl_string_init(&needle);
2611 
2612 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2613 		return;
2614 	}
2615 
2616 	haystack.no_language = needle.no_language = MBSTRG(language);
2617 	haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2618 	if (!haystack.encoding) {
2619 		RETURN_FALSE;
2620 	}
2621 
2622 	if (haystack.len == 0) {
2623 		RETURN_FALSE;
2624 	}
2625 	if (needle.len == 0) {
2626 		RETURN_FALSE;
2627 	}
2628 
2629 	n = mbfl_strpos(&haystack, &needle, 0, 1);
2630 	if (!mbfl_is_error(n)) {
2631 		if (part) {
2632 			ret = mbfl_substr(&haystack, &result, 0, n);
2633 			if (ret != NULL) {
2634 				// TODO: avoid reallocation ???
2635 				RETVAL_STRINGL((char *)ret->val, ret->len);
2636 				efree(ret->val);
2637 			} else {
2638 				RETVAL_FALSE;
2639 			}
2640 		} else {
2641 			ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
2642 			if (ret != NULL) {
2643 				// TODO: avoid reallocation ???
2644 				RETVAL_STRINGL((char *)ret->val, ret->len);
2645 				efree(ret->val);
2646 			} else {
2647 				RETVAL_FALSE;
2648 			}
2649 		}
2650 	} else {
2651 		RETVAL_FALSE;
2652 	}
2653 }
2654 /* }}} */
2655 
2656 /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2657    Finds first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stristr)2658 PHP_FUNCTION(mb_stristr)
2659 {
2660 	zend_bool part = 0;
2661 	size_t from_encoding_len, n;
2662 	mbfl_string haystack, needle, result, *ret = NULL;
2663 	const char *from_encoding = NULL;
2664 	mbfl_string_init(&haystack);
2665 	mbfl_string_init(&needle);
2666 
2667 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2668 		return;
2669 	}
2670 
2671 	haystack.no_language = needle.no_language = MBSTRG(language);
2672 	haystack.encoding = needle.encoding = php_mb_get_encoding(from_encoding);
2673 	if (!haystack.encoding) {
2674 		RETURN_FALSE;
2675 	}
2676 
2677 	if (!needle.len) {
2678 		php_error_docref(NULL, E_WARNING, "Empty delimiter");
2679 		RETURN_FALSE;
2680 	}
2681 
2682 	n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2683 	if (mbfl_is_error(n)) {
2684 		RETURN_FALSE;
2685 	}
2686 
2687 	if (part) {
2688 		ret = mbfl_substr(&haystack, &result, 0, n);
2689 		if (ret != NULL) {
2690 			// TODO: avoid reallocation ???
2691 			RETVAL_STRINGL((char *)ret->val, ret->len);
2692 			efree(ret->val);
2693 		} else {
2694 			RETVAL_FALSE;
2695 		}
2696 	} else {
2697 		ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
2698 		if (ret != NULL) {
2699 			// TODO: avoid reallocaton ???
2700 			RETVAL_STRINGL((char *)ret->val, ret->len);
2701 			efree(ret->val);
2702 		} else {
2703 			RETVAL_FALSE;
2704 		}
2705 	}
2706 }
2707 /* }}} */
2708 
2709 /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2710    Finds the last occurrence of a character in a string within another, case insensitive */
PHP_FUNCTION(mb_strrichr)2711 PHP_FUNCTION(mb_strrichr)
2712 {
2713 	zend_bool part = 0;
2714 	size_t n;
2715 	size_t from_encoding_len;
2716 	mbfl_string haystack, needle, result, *ret = NULL;
2717 	const char *from_encoding = NULL;
2718 	mbfl_string_init(&haystack);
2719 	mbfl_string_init(&needle);
2720 
2721 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2722 		return;
2723 	}
2724 
2725 	haystack.no_language = needle.no_language = MBSTRG(language);
2726 	haystack.encoding = needle.encoding = php_mb_get_encoding(from_encoding);
2727 	if (!haystack.encoding) {
2728 		RETURN_FALSE;
2729 	}
2730 
2731 	n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2732 	if (mbfl_is_error(n)) {
2733 		RETURN_FALSE;
2734 	}
2735 
2736 	if (part) {
2737 		ret = mbfl_substr(&haystack, &result, 0, n);
2738 		if (ret != NULL) {
2739 			// TODO: avoid reallocation ???
2740 			RETVAL_STRINGL((char *)ret->val, ret->len);
2741 			efree(ret->val);
2742 		} else {
2743 			RETVAL_FALSE;
2744 		}
2745 	} else {
2746 		ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
2747 		if (ret != NULL) {
2748 			// TODO: avoid reallocation ???
2749 			RETVAL_STRINGL((char *)ret->val, ret->len);
2750 			efree(ret->val);
2751 		} else {
2752 			RETVAL_FALSE;
2753 		}
2754 	}
2755 }
2756 /* }}} */
2757 
2758 /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2759    Count the number of substring occurrences */
PHP_FUNCTION(mb_substr_count)2760 PHP_FUNCTION(mb_substr_count)
2761 {
2762 	size_t n;
2763 	mbfl_string haystack, needle;
2764 	char *enc_name = NULL;
2765 	size_t enc_name_len;
2766 
2767 	mbfl_string_init(&haystack);
2768 	mbfl_string_init(&needle);
2769 
2770 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
2771 		return;
2772 	}
2773 
2774 	haystack.no_language = needle.no_language = MBSTRG(language);
2775 	haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2776 	if (!haystack.encoding) {
2777 		RETURN_FALSE;
2778 	}
2779 
2780 	if (needle.len == 0) {
2781 		php_error_docref(NULL, E_WARNING, "Empty substring");
2782 		RETURN_FALSE;
2783 	}
2784 
2785 	n = mbfl_substr_count(&haystack, &needle);
2786 	if (!mbfl_is_error(n)) {
2787 		RETVAL_LONG(n);
2788 	} else {
2789 		RETVAL_FALSE;
2790 	}
2791 }
2792 /* }}} */
2793 
2794 /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2795    Returns part of a string */
PHP_FUNCTION(mb_substr)2796 PHP_FUNCTION(mb_substr)
2797 {
2798 	char *str, *encoding = NULL;
2799 	zend_long from, len;
2800 	size_t mblen, real_from, real_len;
2801 	size_t str_len, encoding_len;
2802 	zend_bool len_is_null = 1;
2803 	mbfl_string string, result, *ret;
2804 
2805 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", &str, &str_len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) {
2806 		return;
2807 	}
2808 
2809 	mbfl_string_init(&string);
2810 	string.no_language = MBSTRG(language);
2811 	string.encoding = php_mb_get_encoding(encoding);
2812 	if (!string.encoding) {
2813 		RETURN_FALSE;
2814 	}
2815 
2816 	string.val = (unsigned char *)str;
2817 	string.len = str_len;
2818 
2819 	/* measures length */
2820 	mblen = 0;
2821 	if (from < 0 || (!len_is_null && len < 0)) {
2822 		mblen = mbfl_strlen(&string);
2823 	}
2824 
2825 	/* if "from" position is negative, count start position from the end
2826 	 * of the string
2827 	 */
2828 	if (from >= 0) {
2829 		real_from = (size_t) from;
2830 	} else if (-from < mblen) {
2831 		real_from = mblen + from;
2832 	} else {
2833 		real_from = 0;
2834 	}
2835 
2836 	/* if "length" position is negative, set it to the length
2837 	 * needed to stop that many chars from the end of the string
2838 	 */
2839 	if (len_is_null) {
2840 		real_len = MBFL_SUBSTR_UNTIL_END;
2841 	} else if (len >= 0) {
2842 		real_len = (size_t) len;
2843 	} else if (real_from < mblen && -len < mblen - real_from) {
2844 		real_len = (mblen - real_from) + len;
2845 	} else {
2846 		real_len = 0;
2847 	}
2848 
2849 	if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2850 		&& (real_from > mbfl_strlen(&string))) {
2851 		RETURN_FALSE;
2852 	}
2853 
2854 	ret = mbfl_substr(&string, &result, real_from, real_len);
2855 	if (NULL == ret) {
2856 		RETURN_FALSE;
2857 	}
2858 
2859 	// TODO: avoid reallocation ???
2860 	RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2861 	efree(ret->val);
2862 }
2863 /* }}} */
2864 
2865 /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
2866    Returns part of a string */
PHP_FUNCTION(mb_strcut)2867 PHP_FUNCTION(mb_strcut)
2868 {
2869 	char *encoding = NULL;
2870 	zend_long from, len;
2871 	size_t encoding_len;
2872 	zend_bool len_is_null = 1;
2873 	mbfl_string string, result, *ret;
2874 
2875 	mbfl_string_init(&string);
2876 
2877 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", (char **)&string.val, &string.len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) {
2878 		return;
2879 	}
2880 
2881 	string.no_language = MBSTRG(language);
2882 	string.encoding = php_mb_get_encoding(encoding);
2883 	if (!string.encoding) {
2884 		RETURN_FALSE;
2885 	}
2886 
2887 	if (len_is_null) {
2888 		len = string.len;
2889 	}
2890 
2891 	/* if "from" position is negative, count start position from the end
2892 	 * of the string
2893 	 */
2894 	if (from < 0) {
2895 		from = string.len + from;
2896 		if (from < 0) {
2897 			from = 0;
2898 		}
2899 	}
2900 
2901 	/* if "length" position is negative, set it to the length
2902 	 * needed to stop that many chars from the end of the string
2903 	 */
2904 	if (len < 0) {
2905 		len = (string.len - from) + len;
2906 		if (len < 0) {
2907 			len = 0;
2908 		}
2909 	}
2910 
2911 	if (from > string.len) {
2912 		RETURN_FALSE;
2913 	}
2914 
2915 	ret = mbfl_strcut(&string, &result, from, len);
2916 	if (ret == NULL) {
2917 		RETURN_FALSE;
2918 	}
2919 
2920 	// TODO: avoid reallocation ???
2921 	RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2922 	efree(ret->val);
2923 }
2924 /* }}} */
2925 
2926 /* {{{ proto int mb_strwidth(string str [, string encoding])
2927    Gets terminal width of a string */
PHP_FUNCTION(mb_strwidth)2928 PHP_FUNCTION(mb_strwidth)
2929 {
2930 	size_t n;
2931 	mbfl_string string;
2932 	char *enc_name = NULL;
2933 	size_t enc_name_len;
2934 
2935 	mbfl_string_init(&string);
2936 
2937 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2938 		return;
2939 	}
2940 
2941 	string.no_language = MBSTRG(language);
2942 	string.encoding = php_mb_get_encoding(enc_name);
2943 	if (!string.encoding) {
2944 		RETURN_FALSE;
2945 	}
2946 
2947 	n = mbfl_strwidth(&string);
2948 	if (!mbfl_is_error(n)) {
2949 		RETVAL_LONG(n);
2950 	} else {
2951 		RETVAL_FALSE;
2952 	}
2953 }
2954 /* }}} */
2955 
2956 /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
2957    Trim the string in terminal width */
PHP_FUNCTION(mb_strimwidth)2958 PHP_FUNCTION(mb_strimwidth)
2959 {
2960 	char *str, *trimmarker = NULL, *encoding = NULL;
2961 	zend_long from, width, swidth;
2962 	size_t str_len, trimmarker_len, encoding_len;
2963 	mbfl_string string, result, marker, *ret;
2964 
2965 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
2966 		return;
2967 	}
2968 
2969 	mbfl_string_init(&string);
2970 	mbfl_string_init(&marker);
2971 
2972 	string.no_language = marker.no_language = MBSTRG(language);
2973 	string.encoding = marker.encoding = php_mb_get_encoding(encoding);
2974 	if (!string.encoding) {
2975 		RETURN_FALSE;
2976 	}
2977 
2978 	string.val = (unsigned char *)str;
2979 	string.len = str_len;
2980 	marker.val = NULL;
2981 	marker.len = 0;
2982 
2983 	if ((from < 0) || (width < 0)) {
2984 		swidth = mbfl_strwidth(&string);
2985 	}
2986 
2987 	if (from < 0) {
2988 		from += swidth;
2989 	}
2990 
2991 	if (from < 0 || (size_t)from > str_len) {
2992 		php_error_docref(NULL, E_WARNING, "Start position is out of range");
2993 		RETURN_FALSE;
2994 	}
2995 
2996 	if (width < 0) {
2997 		width = swidth + width - from;
2998 	}
2999 
3000 	if (width < 0) {
3001 		php_error_docref(NULL, E_WARNING, "Width is out of range");
3002 		RETURN_FALSE;
3003 	}
3004 
3005 	if (trimmarker) {
3006 		marker.val = (unsigned char *)trimmarker;
3007 		marker.len = trimmarker_len;
3008 	}
3009 
3010 	ret = mbfl_strimwidth(&string, &marker, &result, from, width);
3011 
3012 	if (ret == NULL) {
3013 		RETURN_FALSE;
3014 	}
3015 	// TODO: avoid reallocation ???
3016 	RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3017 	efree(ret->val);
3018 }
3019 /* }}} */
3020 
3021 
3022 /* See mbfl_no_encoding definition for list of unsupported encodings */
php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)3023 static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)
3024 {
3025 	return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint)
3026 			|| (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap)
3027 			|| (no_enc >= mbfl_no_encoding_jis && no_enc <= mbfl_no_encoding_2022jpms)
3028 			|| (no_enc >= mbfl_no_encoding_cp50220 && no_enc <= mbfl_no_encoding_cp50222));
3029 }
3030 
3031 
3032 /* See mbfl_no_encoding definition for list of UTF-8 encodings */
php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)3033 static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
3034 {
3035 	return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb);
3036 }
3037 
php_mb_convert_encoding_ex(const char * input,size_t length,const mbfl_encoding * to_encoding,const mbfl_encoding * from_encoding,size_t * output_len)3038 MBSTRING_API char *php_mb_convert_encoding_ex(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding *from_encoding, size_t *output_len)
3039 {
3040 	mbfl_string string, result, *ret;
3041 	mbfl_buffer_converter *convd;
3042 	char *output = NULL;
3043 
3044 	if (output_len) {
3045 		*output_len = 0;
3046 	}
3047 
3048 	/* initialize string */
3049 	mbfl_string_init(&string);
3050 	mbfl_string_init(&result);
3051 	string.encoding = from_encoding;
3052 	string.no_language = MBSTRG(language);
3053 	string.val = (unsigned char *)input;
3054 	string.len = length;
3055 
3056 	/* initialize converter */
3057 	convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
3058 	if (convd == NULL) {
3059 		php_error_docref(NULL, E_WARNING, "Unable to create character encoding converter");
3060 		return NULL;
3061 	}
3062 
3063 	mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3064 	mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3065 
3066 	/* do it */
3067 	ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3068 	if (ret) {
3069 		if (output_len) {
3070 			*output_len = ret->len;
3071 		}
3072 		output = (char *)ret->val;
3073 	}
3074 
3075 	MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3076 	mbfl_buffer_converter_delete(convd);
3077 	return output;
3078 }
3079 /* }}} */
3080 
3081 /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
php_mb_convert_encoding(const char * input,size_t length,const char * _to_encoding,const char * _from_encodings,size_t * output_len)3082 MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
3083 {
3084 	const mbfl_encoding *from_encoding, *to_encoding;
3085 
3086 	if (output_len) {
3087 		*output_len = 0;
3088 	}
3089 	if (!input) {
3090 		return NULL;
3091 	}
3092 	/* new encoding */
3093 	if (_to_encoding && strlen(_to_encoding)) {
3094 		to_encoding = mbfl_name2encoding(_to_encoding);
3095 		if (!to_encoding) {
3096 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
3097 			return NULL;
3098 		}
3099 	} else {
3100 		to_encoding = MBSTRG(current_internal_encoding);
3101 	}
3102 
3103 	/* pre-conversion encoding */
3104 	from_encoding = MBSTRG(current_internal_encoding);
3105 	if (_from_encodings) {
3106 		const mbfl_encoding **list = NULL;
3107 		size_t size = 0;
3108 		php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0);
3109 		if (size == 1) {
3110 			from_encoding = *list;
3111 		} else if (size > 1) {
3112 			/* auto detect */
3113 			mbfl_string string;
3114 			mbfl_string_init(&string);
3115 			string.val = (unsigned char *)input;
3116 			string.len = length;
3117 			from_encoding = mbfl_identify_encoding(&string, list, size, MBSTRG(strict_detection));
3118 			if (!from_encoding) {
3119 				php_error_docref(NULL, E_WARNING, "Unable to detect character encoding");
3120 				from_encoding = &mbfl_encoding_pass;
3121 			}
3122 		} else {
3123 			php_error_docref(NULL, E_WARNING, "Illegal character encoding specified");
3124 		}
3125 		if (list != NULL) {
3126 			efree((void *)list);
3127 		}
3128 	}
3129 
3130 	return php_mb_convert_encoding_ex(input, length, to_encoding, from_encoding, output_len);
3131 }
3132 /* }}} */
3133 
php_mb_convert_encoding_recursive(HashTable * input,const char * _to_encoding,const char * _from_encodings)3134 MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const char *_to_encoding, const char *_from_encodings)
3135 {
3136 	HashTable *output, *chash;
3137 	zend_long idx;
3138 	zend_string *key;
3139 	zval *entry, entry_tmp;
3140 	size_t ckey_len, cval_len;
3141 	char *ckey, *cval;
3142 
3143 	if (!input) {
3144 		return NULL;
3145 	}
3146 
3147 	if (GC_IS_RECURSIVE(input)) {
3148 		GC_UNPROTECT_RECURSION(input);
3149 		php_error_docref(NULL, E_WARNING, "Cannot convert recursively referenced values");
3150 		return NULL;
3151 	}
3152 	GC_TRY_PROTECT_RECURSION(input);
3153 	output = zend_new_array(zend_hash_num_elements(input));
3154 	ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) {
3155 		/* convert key */
3156 		if (key) {
3157 			ckey = php_mb_convert_encoding(ZSTR_VAL(key), ZSTR_LEN(key), _to_encoding, _from_encodings, &ckey_len);
3158 			key = zend_string_init(ckey, ckey_len, 0);
3159 			efree(ckey);
3160 		}
3161 		/* convert value */
3162 		ZEND_ASSERT(entry);
3163 		switch(Z_TYPE_P(entry)) {
3164 			case IS_STRING:
3165 				cval = php_mb_convert_encoding(Z_STRVAL_P(entry), Z_STRLEN_P(entry), _to_encoding, _from_encodings, &cval_len);
3166 				ZVAL_STRINGL(&entry_tmp, cval, cval_len);
3167 				efree(cval);
3168 				break;
3169 			case IS_NULL:
3170 			case IS_TRUE:
3171 			case IS_FALSE:
3172 			case IS_LONG:
3173 			case IS_DOUBLE:
3174 				ZVAL_COPY(&entry_tmp, entry);
3175 				break;
3176 			case IS_ARRAY:
3177 				chash = php_mb_convert_encoding_recursive(HASH_OF(entry), _to_encoding, _from_encodings);
3178 				if (chash) {
3179 					ZVAL_ARR(&entry_tmp, chash);
3180 				} else {
3181 					ZVAL_EMPTY_ARRAY(&entry_tmp);
3182 				}
3183 				break;
3184 			case IS_OBJECT:
3185 			default:
3186 				if (key) {
3187 					zend_string_release(key);
3188 				}
3189 				php_error_docref(NULL, E_WARNING, "Object is not supported");
3190 				continue;
3191 		}
3192 		if (key) {
3193 			zend_hash_add(output, key, &entry_tmp);
3194 			zend_string_release(key);
3195 		} else {
3196 			zend_hash_index_add(output, idx, &entry_tmp);
3197 		}
3198 	} ZEND_HASH_FOREACH_END();
3199 	GC_TRY_UNPROTECT_RECURSION(input);
3200 
3201 	return output;
3202 }
3203 /* }}} */
3204 
3205 
3206 /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
3207    Returns converted string in desired encoding */
PHP_FUNCTION(mb_convert_encoding)3208 PHP_FUNCTION(mb_convert_encoding)
3209 {
3210 	zval *input;
3211 	char *arg_new;
3212 	size_t new_len;
3213 	zval *arg_old = NULL;
3214 	size_t size, l, n;
3215 	char *_from_encodings = NULL, *ret, *s_free = NULL;
3216 
3217 	zval *hash_entry;
3218 	HashTable *target_hash;
3219 
3220 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z", &input, &arg_new, &new_len, &arg_old) == FAILURE) {
3221 		return;
3222 	}
3223 
3224 	if (Z_TYPE_P(input) != IS_STRING && Z_TYPE_P(input) != IS_ARRAY) {
3225 		convert_to_string(input);
3226 	}
3227 
3228 	if (arg_old) {
3229 		switch (Z_TYPE_P(arg_old)) {
3230 			case IS_ARRAY:
3231 				target_hash = Z_ARRVAL_P(arg_old);
3232 				_from_encodings = NULL;
3233 
3234 				ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
3235 					zend_string *encoding_str = zval_get_string(hash_entry);
3236 
3237 					if ( _from_encodings) {
3238 						l = strlen(_from_encodings);
3239 						n = strlen(ZSTR_VAL(encoding_str));
3240 						_from_encodings = erealloc(_from_encodings, l+n+2);
3241 						memcpy(_from_encodings + l, ",", 1);
3242 						memcpy(_from_encodings + l + 1, ZSTR_VAL(encoding_str), ZSTR_LEN(encoding_str) + 1);
3243 					} else {
3244 						_from_encodings = estrdup(ZSTR_VAL(encoding_str));
3245 					}
3246 					zend_string_release(encoding_str);
3247 				} ZEND_HASH_FOREACH_END();
3248 
3249 				if (_from_encodings != NULL && !strlen(_from_encodings)) {
3250 					efree(_from_encodings);
3251 					_from_encodings = NULL;
3252 				}
3253 				s_free = _from_encodings;
3254 				break;
3255 			default:
3256 				convert_to_string(arg_old);
3257 				_from_encodings = Z_STRVAL_P(arg_old);
3258 				break;
3259 			}
3260 	}
3261 
3262 	if (Z_TYPE_P(input) == IS_STRING) {
3263 		/* new encoding */
3264 		ret = php_mb_convert_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), arg_new, _from_encodings, &size);
3265 		if (ret != NULL) {
3266 			// TODO: avoid reallocation ???
3267 			RETVAL_STRINGL(ret, size);		/* the string is already strdup()'ed */
3268 			efree(ret);
3269 		} else {
3270 			RETVAL_FALSE;
3271 		}
3272 		if (s_free) {
3273 			efree(s_free);
3274 		}
3275 	} else {
3276 		HashTable *tmp;
3277 		tmp = php_mb_convert_encoding_recursive(HASH_OF(input), arg_new, _from_encodings);
3278 		RETURN_ARR(tmp);
3279 	}
3280 
3281 	return;
3282 }
3283 /* }}} */
3284 
mbstring_convert_case(int case_mode,const char * str,size_t str_len,size_t * ret_len,const mbfl_encoding * enc)3285 static char *mbstring_convert_case(
3286 		int case_mode, const char *str, size_t str_len, size_t *ret_len,
3287 		const mbfl_encoding *enc) {
3288 	return php_unicode_convert_case(
3289 		case_mode, str, str_len, ret_len, enc,
3290 		MBSTRG(current_filter_illegal_mode), MBSTRG(current_filter_illegal_substchar));
3291 }
3292 
3293 /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
3294    Returns a case-folded version of sourcestring */
PHP_FUNCTION(mb_convert_case)3295 PHP_FUNCTION(mb_convert_case)
3296 {
3297 	const char *from_encoding = NULL;
3298 	char *str;
3299 	size_t str_len, from_encoding_len;
3300 	zend_long case_mode = 0;
3301 	char *newstr;
3302 	size_t ret_len;
3303 	const mbfl_encoding *enc;
3304 
3305 	RETVAL_FALSE;
3306 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|s!", &str, &str_len,
3307 				&case_mode, &from_encoding, &from_encoding_len) == FAILURE) {
3308 		return;
3309 	}
3310 
3311 	enc = php_mb_get_encoding(from_encoding);
3312 	if (!enc) {
3313 		return;
3314 	}
3315 
3316 	if (case_mode < 0 || case_mode > PHP_UNICODE_CASE_MODE_MAX) {
3317 		php_error_docref(NULL, E_WARNING, "Invalid case mode");
3318 		return;
3319 	}
3320 
3321 	newstr = mbstring_convert_case(case_mode, str, str_len, &ret_len, enc);
3322 
3323 	if (newstr) {
3324 		// TODO: avoid reallocation ???
3325 		RETVAL_STRINGL(newstr, ret_len);
3326 		efree(newstr);
3327 	}
3328 }
3329 /* }}} */
3330 
3331 /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
3332  *  Returns a uppercased version of sourcestring
3333  */
PHP_FUNCTION(mb_strtoupper)3334 PHP_FUNCTION(mb_strtoupper)
3335 {
3336 	const char *from_encoding = NULL;
3337 	char *str;
3338 	size_t str_len, from_encoding_len;
3339 	char *newstr;
3340 	size_t ret_len;
3341 	const mbfl_encoding *enc;
3342 
3343 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len,
3344 				&from_encoding, &from_encoding_len) == FAILURE) {
3345 		return;
3346 	}
3347 
3348 	enc = php_mb_get_encoding(from_encoding);
3349 	if (!enc) {
3350 		RETURN_FALSE;
3351 	}
3352 
3353 	newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc);
3354 
3355 	if (newstr) {
3356 		// TODO: avoid reallocation ???
3357 		RETVAL_STRINGL(newstr, ret_len);
3358 		efree(newstr);
3359 		return;
3360 	}
3361 	RETURN_FALSE;
3362 }
3363 /* }}} */
3364 
3365 /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
3366  *  Returns a lowercased version of sourcestring
3367  */
PHP_FUNCTION(mb_strtolower)3368 PHP_FUNCTION(mb_strtolower)
3369 {
3370 	const char *from_encoding = NULL;
3371 	char *str;
3372 	size_t str_len, from_encoding_len;
3373 	char *newstr;
3374 	size_t ret_len;
3375 	const mbfl_encoding *enc;
3376 
3377 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len,
3378 				&from_encoding, &from_encoding_len) == FAILURE) {
3379 		return;
3380 	}
3381 
3382 	enc = php_mb_get_encoding(from_encoding);
3383 	if (!enc) {
3384 		RETURN_FALSE;
3385 	}
3386 
3387 	newstr = mbstring_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc);
3388 
3389 	if (newstr) {
3390 		// TODO: avoid reallocation ???
3391 		RETVAL_STRINGL(newstr, ret_len);
3392 		efree(newstr);
3393 		return;
3394 	}
3395 	RETURN_FALSE;
3396 }
3397 /* }}} */
3398 
3399 /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3400    Encodings of the given string is returned (as a string) */
PHP_FUNCTION(mb_detect_encoding)3401 PHP_FUNCTION(mb_detect_encoding)
3402 {
3403 	char *str;
3404 	size_t str_len;
3405 	zend_bool strict=0;
3406 	zval *encoding_list = NULL;
3407 
3408 	mbfl_string string;
3409 	const mbfl_encoding *ret;
3410 	const mbfl_encoding **elist, **list;
3411 	size_t size;
3412 
3413 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z!b", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3414 		return;
3415 	}
3416 
3417 	/* make encoding list */
3418 	list = NULL;
3419 	size = 0;
3420 	if (encoding_list) {
3421 		switch (Z_TYPE_P(encoding_list)) {
3422 		case IS_ARRAY:
3423 			if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0)) {
3424 				if (list) {
3425 					efree(list);
3426 					list = NULL;
3427 					size = 0;
3428 				}
3429 			}
3430 			break;
3431 		default:
3432 			convert_to_string(encoding_list);
3433 			if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0)) {
3434 				if (list) {
3435 					efree(list);
3436 					list = NULL;
3437 					size = 0;
3438 				}
3439 			}
3440 			break;
3441 		}
3442 		if (size == 0) {
3443 			php_error_docref(NULL, E_WARNING, "Illegal argument");
3444 		}
3445 	}
3446 
3447 	if (ZEND_NUM_ARGS() < 3) {
3448 		strict = MBSTRG(strict_detection);
3449 	}
3450 
3451 	if (size > 0 && list != NULL) {
3452 		elist = list;
3453 	} else {
3454 		elist = MBSTRG(current_detect_order_list);
3455 		size = MBSTRG(current_detect_order_list_size);
3456 	}
3457 
3458 	mbfl_string_init(&string);
3459 	string.no_language = MBSTRG(language);
3460 	string.val = (unsigned char *)str;
3461 	string.len = str_len;
3462 	ret = mbfl_identify_encoding(&string, elist, size, strict);
3463 
3464 	if (list != NULL) {
3465 		efree((void *)list);
3466 	}
3467 
3468 	if (ret == NULL) {
3469 		RETURN_FALSE;
3470 	}
3471 
3472 	RETVAL_STRING((char *)ret->name);
3473 }
3474 /* }}} */
3475 
3476 /* {{{ proto mixed mb_list_encodings()
3477    Returns an array of all supported entity encodings */
PHP_FUNCTION(mb_list_encodings)3478 PHP_FUNCTION(mb_list_encodings)
3479 {
3480 	const mbfl_encoding **encodings;
3481 	const mbfl_encoding *encoding;
3482 	int i;
3483 
3484 	if (zend_parse_parameters_none() == FAILURE) {
3485 		return;
3486 	}
3487 
3488 	array_init(return_value);
3489 	i = 0;
3490 	encodings = mbfl_get_supported_encodings();
3491 	while ((encoding = encodings[i++]) != NULL) {
3492 		add_next_index_string(return_value, (char *) encoding->name);
3493 	}
3494 }
3495 /* }}} */
3496 
3497 /* {{{ proto array mb_encoding_aliases(string encoding)
3498    Returns an array of the aliases of a given encoding name */
PHP_FUNCTION(mb_encoding_aliases)3499 PHP_FUNCTION(mb_encoding_aliases)
3500 {
3501 	const mbfl_encoding *encoding;
3502 	char *name = NULL;
3503 	size_t name_len;
3504 
3505 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
3506 		return;
3507 	}
3508 
3509 	encoding = mbfl_name2encoding(name);
3510 	if (!encoding) {
3511 		php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
3512 		RETURN_FALSE;
3513 	}
3514 
3515 	array_init(return_value);
3516 	if (encoding->aliases != NULL) {
3517 		const char **alias;
3518 		for (alias = *encoding->aliases; *alias; ++alias) {
3519 			add_next_index_string(return_value, (char *)*alias);
3520 		}
3521 	}
3522 }
3523 /* }}} */
3524 
3525 /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3526    Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
PHP_FUNCTION(mb_encode_mimeheader)3527 PHP_FUNCTION(mb_encode_mimeheader)
3528 {
3529 	const mbfl_encoding *charset, *transenc;
3530 	mbfl_string  string, result, *ret;
3531 	char *charset_name = NULL;
3532 	size_t charset_name_len;
3533 	char *trans_enc_name = NULL;
3534 	size_t trans_enc_name_len;
3535 	char *linefeed = "\r\n";
3536 	size_t linefeed_len;
3537 	zend_long indent = 0;
3538 
3539 	mbfl_string_init(&string);
3540 	string.no_language = MBSTRG(language);
3541 	string.encoding = MBSTRG(current_internal_encoding);
3542 
3543 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3544 		return;
3545 	}
3546 
3547 	charset = &mbfl_encoding_pass;
3548 	transenc = &mbfl_encoding_base64;
3549 
3550 	if (charset_name != NULL) {
3551 		charset = mbfl_name2encoding(charset_name);
3552 		if (!charset) {
3553 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3554 			RETURN_FALSE;
3555 		}
3556 	} else {
3557 		const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3558 		if (lang != NULL) {
3559 			charset = mbfl_no2encoding(lang->mail_charset);
3560 			transenc = mbfl_no2encoding(lang->mail_header_encoding);
3561 		}
3562 	}
3563 
3564 	if (trans_enc_name != NULL) {
3565 		if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3566 			transenc = &mbfl_encoding_base64;
3567 		} else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3568 			transenc = &mbfl_encoding_qprint;
3569 		}
3570 	}
3571 
3572 	mbfl_string_init(&result);
3573 	ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3574 	if (ret != NULL) {
3575 		// TODO: avoid reallocation ???
3576 		RETVAL_STRINGL((char *)ret->val, ret->len);	/* the string is already strdup()'ed */
3577 		efree(ret->val);
3578 	} else {
3579 		RETVAL_FALSE;
3580 	}
3581 }
3582 /* }}} */
3583 
3584 /* {{{ proto string mb_decode_mimeheader(string string)
3585    Decodes the MIME "encoded-word" in the string */
PHP_FUNCTION(mb_decode_mimeheader)3586 PHP_FUNCTION(mb_decode_mimeheader)
3587 {
3588 	mbfl_string string, result, *ret;
3589 
3590 	mbfl_string_init(&string);
3591 	string.no_language = MBSTRG(language);
3592 	string.encoding = MBSTRG(current_internal_encoding);
3593 
3594 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", (char **)&string.val, &string.len) == FAILURE) {
3595 		return;
3596 	}
3597 
3598 	mbfl_string_init(&result);
3599 	ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
3600 	if (ret != NULL) {
3601 		// TODO: avoid reallocation ???
3602 		RETVAL_STRINGL((char *)ret->val, ret->len);	/* the string is already strdup()'ed */
3603 		efree(ret->val);
3604 	} else {
3605 		RETVAL_FALSE;
3606 	}
3607 }
3608 /* }}} */
3609 
3610 /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3611    Conversion between full-width character and half-width character (Japanese) */
PHP_FUNCTION(mb_convert_kana)3612 PHP_FUNCTION(mb_convert_kana)
3613 {
3614 	int opt;
3615 	mbfl_string string, result, *ret;
3616 	char *optstr = NULL;
3617 	size_t optstr_len;
3618 	char *encname = NULL;
3619 	size_t encname_len;
3620 
3621 	mbfl_string_init(&string);
3622 
3623 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
3624 		return;
3625 	}
3626 
3627 	/* option */
3628 	if (optstr != NULL) {
3629 		char *p = optstr;
3630 		size_t i = 0, n = optstr_len;
3631 		opt = 0;
3632 		while (i < n) {
3633 			i++;
3634 			switch (*p++) {
3635 			case 'A':
3636 				opt |= 0x1;
3637 				break;
3638 			case 'a':
3639 				opt |= 0x10;
3640 				break;
3641 			case 'R':
3642 				opt |= 0x2;
3643 				break;
3644 			case 'r':
3645 				opt |= 0x20;
3646 				break;
3647 			case 'N':
3648 				opt |= 0x4;
3649 				break;
3650 			case 'n':
3651 				opt |= 0x40;
3652 				break;
3653 			case 'S':
3654 				opt |= 0x8;
3655 				break;
3656 			case 's':
3657 				opt |= 0x80;
3658 				break;
3659 			case 'K':
3660 				opt |= 0x100;
3661 				break;
3662 			case 'k':
3663 				opt |= 0x1000;
3664 				break;
3665 			case 'H':
3666 				opt |= 0x200;
3667 				break;
3668 			case 'h':
3669 				opt |= 0x2000;
3670 				break;
3671 			case 'V':
3672 				opt |= 0x800;
3673 				break;
3674 			case 'C':
3675 				opt |= 0x10000;
3676 				break;
3677 			case 'c':
3678 				opt |= 0x20000;
3679 				break;
3680 			case 'M':
3681 				opt |= 0x100000;
3682 				break;
3683 			case 'm':
3684 				opt |= 0x200000;
3685 				break;
3686 			}
3687 		}
3688 	} else {
3689 		opt = 0x900;
3690 	}
3691 
3692 	/* encoding */
3693 	string.no_language = MBSTRG(language);
3694 	string.encoding = php_mb_get_encoding(encname);
3695 	if (!string.encoding) {
3696 		RETURN_FALSE;
3697 	}
3698 
3699 	ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3700 	if (ret != NULL) {
3701 		// TODO: avoid reallocation ???
3702 		RETVAL_STRINGL((char *)ret->val, ret->len);		/* the string is already strdup()'ed */
3703 		efree(ret->val);
3704 	} else {
3705 		RETVAL_FALSE;
3706 	}
3707 }
3708 /* }}} */
3709 
mb_recursive_encoder_detector_feed(mbfl_encoding_detector * identd,zval * var,int * recursion_error)3710 static int mb_recursive_encoder_detector_feed(mbfl_encoding_detector *identd, zval *var, int *recursion_error) /* {{{ */
3711 {
3712 	mbfl_string string;
3713 	HashTable *ht;
3714 	zval *entry;
3715 
3716 	ZVAL_DEREF(var);
3717 	if (Z_TYPE_P(var) == IS_STRING) {
3718 		string.val = (unsigned char *)Z_STRVAL_P(var);
3719 		string.len = Z_STRLEN_P(var);
3720 		if (mbfl_encoding_detector_feed(identd, &string)) {
3721 			return 1; /* complete detecting */
3722 		}
3723 	} else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3724 		if (Z_REFCOUNTED_P(var)) {
3725 			if (Z_IS_RECURSIVE_P(var)) {
3726 				*recursion_error = 1;
3727 				return 0;
3728 			}
3729 			Z_PROTECT_RECURSION_P(var);
3730 		}
3731 
3732 		ht = HASH_OF(var);
3733 		if (ht != NULL) {
3734 			ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
3735 				if (mb_recursive_encoder_detector_feed(identd, entry, recursion_error)) {
3736 					if (Z_REFCOUNTED_P(var)) {
3737 						Z_UNPROTECT_RECURSION_P(var);
3738 					}
3739 					return 1;
3740 				} else if (*recursion_error) {
3741 					if (Z_REFCOUNTED_P(var)) {
3742 						Z_UNPROTECT_RECURSION_P(var);
3743 					}
3744 					return 0;
3745 				}
3746 			} ZEND_HASH_FOREACH_END();
3747 		}
3748 
3749 		if (Z_REFCOUNTED_P(var)) {
3750 			Z_UNPROTECT_RECURSION_P(var);
3751 		}
3752 	}
3753 	return 0;
3754 } /* }}} */
3755 
mb_recursive_convert_variable(mbfl_buffer_converter * convd,zval * var)3756 static int mb_recursive_convert_variable(mbfl_buffer_converter *convd, zval *var) /* {{{ */
3757 {
3758 	mbfl_string string, result, *ret;
3759 	HashTable *ht;
3760 	zval *entry, *orig_var;
3761 
3762 	orig_var = var;
3763 	ZVAL_DEREF(var);
3764 	if (Z_TYPE_P(var) == IS_STRING) {
3765 		string.val = (unsigned char *)Z_STRVAL_P(var);
3766 		string.len = Z_STRLEN_P(var);
3767 		ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3768 		if (ret != NULL) {
3769 			zval_ptr_dtor(orig_var);
3770 			// TODO: avoid reallocation ???
3771 			ZVAL_STRINGL(orig_var, (char *)ret->val, ret->len);
3772 			efree(ret->val);
3773 		}
3774 	} else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3775 		if (Z_TYPE_P(var) == IS_ARRAY) {
3776 			SEPARATE_ARRAY(var);
3777 		}
3778 		if (Z_REFCOUNTED_P(var)) {
3779 			if (Z_IS_RECURSIVE_P(var)) {
3780 				return 1;
3781 			}
3782 			Z_PROTECT_RECURSION_P(var);
3783 		}
3784 
3785 		ht = HASH_OF(var);
3786 		if (ht != NULL) {
3787 			ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
3788 				if (mb_recursive_convert_variable(convd, entry)) {
3789 					if (Z_REFCOUNTED_P(var)) {
3790 						Z_UNPROTECT_RECURSION_P(var);
3791 					}
3792 					return 1;
3793 				}
3794 			} ZEND_HASH_FOREACH_END();
3795 		}
3796 
3797 		if (Z_REFCOUNTED_P(var)) {
3798 			Z_UNPROTECT_RECURSION_P(var);
3799 		}
3800 	}
3801 	return 0;
3802 } /* }}} */
3803 
3804 /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3805    Converts the string resource in variables to desired encoding */
PHP_FUNCTION(mb_convert_variables)3806 PHP_FUNCTION(mb_convert_variables)
3807 {
3808 	zval *args, *zfrom_enc;
3809 	mbfl_string string, result;
3810 	const mbfl_encoding *from_encoding, *to_encoding;
3811 	mbfl_encoding_detector *identd;
3812 	mbfl_buffer_converter *convd;
3813 	int n, argc;
3814 	size_t to_enc_len;
3815 	size_t elistsz;
3816 	const mbfl_encoding **elist;
3817 	char *to_enc;
3818 	int recursion_error = 0;
3819 
3820 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3821 		return;
3822 	}
3823 
3824 	/* new encoding */
3825 	to_encoding = mbfl_name2encoding(to_enc);
3826 	if (!to_encoding) {
3827 		php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3828 		RETURN_FALSE;
3829 	}
3830 
3831 	/* initialize string */
3832 	mbfl_string_init(&string);
3833 	mbfl_string_init(&result);
3834 	from_encoding = MBSTRG(current_internal_encoding);
3835 	string.encoding = from_encoding;
3836 	string.no_language = MBSTRG(language);
3837 
3838 	/* pre-conversion encoding */
3839 	elist = NULL;
3840 	elistsz = 0;
3841 	switch (Z_TYPE_P(zfrom_enc)) {
3842 		case IS_ARRAY:
3843 			php_mb_parse_encoding_array(zfrom_enc, &elist, &elistsz, 0);
3844 			break;
3845 		default:
3846 			convert_to_string_ex(zfrom_enc);
3847 			php_mb_parse_encoding_list(Z_STRVAL_P(zfrom_enc), Z_STRLEN_P(zfrom_enc), &elist, &elistsz, 0);
3848 			break;
3849 	}
3850 
3851 	if (elistsz == 0) {
3852 		from_encoding = &mbfl_encoding_pass;
3853 	} else if (elistsz == 1) {
3854 		from_encoding = *elist;
3855 	} else {
3856 		/* auto detect */
3857 		from_encoding = NULL;
3858 		identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
3859 		if (identd != NULL) {
3860 			n = 0;
3861 			while (n < argc) {
3862 				if (mb_recursive_encoder_detector_feed(identd, &args[n], &recursion_error)) {
3863 					break;
3864 				}
3865 				n++;
3866 			}
3867 			from_encoding = mbfl_encoding_detector_judge(identd);
3868 			mbfl_encoding_detector_delete(identd);
3869 			if (recursion_error) {
3870 				if (elist != NULL) {
3871 					efree((void *)elist);
3872 				}
3873 				php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
3874 				RETURN_FALSE;
3875 			}
3876 		}
3877 
3878 		if (!from_encoding) {
3879 			php_error_docref(NULL, E_WARNING, "Unable to detect encoding");
3880 			from_encoding = &mbfl_encoding_pass;
3881 		}
3882 	}
3883 	if (elist != NULL) {
3884 		efree((void *)elist);
3885 	}
3886 	/* create converter */
3887 	convd = NULL;
3888 	if (from_encoding != &mbfl_encoding_pass) {
3889 		convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
3890 		if (convd == NULL) {
3891 			php_error_docref(NULL, E_WARNING, "Unable to create converter");
3892 			RETURN_FALSE;
3893 		}
3894 		mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3895 		mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3896 	}
3897 
3898 	/* convert */
3899 	if (convd != NULL) {
3900 		n = 0;
3901 		while (n < argc) {
3902 			zval *zv = &args[n];
3903 
3904 			ZVAL_DEREF(zv);
3905 			recursion_error = mb_recursive_convert_variable(convd, zv);
3906 			if (recursion_error) {
3907 				break;
3908 			}
3909 			n++;
3910 		}
3911 
3912 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3913 		mbfl_buffer_converter_delete(convd);
3914 
3915 		if (recursion_error) {
3916 			php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
3917 			RETURN_FALSE;
3918 		}
3919 	}
3920 
3921 	if (from_encoding) {
3922 		RETURN_STRING(from_encoding->name);
3923 	} else {
3924 		RETURN_FALSE;
3925 	}
3926 }
3927 /* }}} */
3928 
3929 /* {{{ HTML numeric entity */
3930 /* {{{ static void php_mb_numericentity_exec() */
3931 static void
php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS,int type)3932 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
3933 {
3934 	char *str, *encoding = NULL;
3935 	size_t str_len, encoding_len;
3936 	zval *zconvmap, *hash_entry;
3937 	HashTable *target_hash;
3938 	int i, *convmap, *mapelm, mapsize=0;
3939 	zend_bool is_hex = 0;
3940 	mbfl_string string, result, *ret;
3941 
3942 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
3943 		return;
3944 	}
3945 
3946 	mbfl_string_init(&string);
3947 	string.no_language = MBSTRG(language);
3948 	string.encoding = MBSTRG(current_internal_encoding);
3949 	string.val = (unsigned char *)str;
3950 	string.len = str_len;
3951 
3952 	/* encoding */
3953 	if (encoding && encoding_len > 0) {
3954 		string.encoding = mbfl_name2encoding(encoding);
3955 		if (!string.encoding) {
3956 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
3957 			RETURN_FALSE;
3958 		}
3959 	}
3960 
3961 	if (type == 0 && is_hex) {
3962 		type = 2; /* output in hex format */
3963 	}
3964 
3965 	/* conversion map */
3966 	convmap = NULL;
3967 	if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
3968 		target_hash = Z_ARRVAL_P(zconvmap);
3969 		i = zend_hash_num_elements(target_hash);
3970 		if (i > 0) {
3971 			convmap = (int *)safe_emalloc(i, sizeof(int), 0);
3972 			mapelm = convmap;
3973 			mapsize = 0;
3974 			ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
3975 				*mapelm++ = zval_get_long(hash_entry);
3976 				mapsize++;
3977 			} ZEND_HASH_FOREACH_END();
3978 		}
3979 	}
3980 	if (convmap == NULL) {
3981 		RETURN_FALSE;
3982 	}
3983 	mapsize /= 4;
3984 
3985 	ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
3986 	if (ret != NULL) {
3987 		// TODO: avoid reallocation ???
3988 		RETVAL_STRINGL((char *)ret->val, ret->len);
3989 		efree(ret->val);
3990 	} else {
3991 		RETVAL_FALSE;
3992 	}
3993 	efree((void *)convmap);
3994 }
3995 /* }}} */
3996 
3997 /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
3998    Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)3999 PHP_FUNCTION(mb_encode_numericentity)
4000 {
4001 	php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
4002 }
4003 /* }}} */
4004 
4005 /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
4006    Converts HTML numeric entities to character code */
PHP_FUNCTION(mb_decode_numericentity)4007 PHP_FUNCTION(mb_decode_numericentity)
4008 {
4009 	php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
4010 }
4011 /* }}} */
4012 /* }}} */
4013 
4014 /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
4015  *  Sends an email message with MIME scheme
4016  */
4017 
4018 #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos)										\
4019 	if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) {	\
4020 		pos += 2;											\
4021 		while (str[pos + 1] == ' ' || str[pos + 1] == '\t') {							\
4022 			pos++;											\
4023 		}												\
4024 		continue;											\
4025 	}
4026 
4027 #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len)			\
4028 	pp = str;					\
4029 	ee = pp + len;					\
4030 	while ((pp = memchr(pp, '\0', (ee - pp)))) {	\
4031 		*pp = ' ';				\
4032 	}						\
4033 
_php_mbstr_parse_mail_headers(HashTable * ht,const char * str,size_t str_len)4034 static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
4035 {
4036 	const char *ps;
4037 	size_t icnt;
4038 	int state = 0;
4039 	int crlf_state = -1;
4040 	char *token = NULL;
4041 	size_t token_pos = 0;
4042 	zend_string *fld_name, *fld_val;
4043 
4044 	ps = str;
4045 	icnt = str_len;
4046 	fld_name = fld_val = NULL;
4047 
4048 	/*
4049 	 *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
4050 	 *             ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
4051 	 *      state  0            1           2          3
4052 	 *
4053 	 *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
4054 	 *             ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
4055 	 * crlf_state -1                       0                     1 -1
4056 	 *
4057 	 */
4058 
4059 	while (icnt > 0) {
4060 		switch (*ps) {
4061 			case ':':
4062 				if (crlf_state == 1) {
4063 					token_pos++;
4064 				}
4065 
4066 				if (state == 0 || state == 1) {
4067 					if(token && token_pos > 0) {
4068 						fld_name = zend_string_init(token, token_pos, 0);
4069 					}
4070 					state = 2;
4071 				} else {
4072 					token_pos++;
4073 				}
4074 
4075 				crlf_state = 0;
4076 				break;
4077 
4078 			case '\n':
4079 				if (crlf_state == -1) {
4080 					goto out;
4081 				}
4082 				crlf_state = -1;
4083 				break;
4084 
4085 			case '\r':
4086 				if (crlf_state == 1) {
4087 					token_pos++;
4088 				} else {
4089 					crlf_state = 1;
4090 				}
4091 				break;
4092 
4093 			case ' ': case '\t':
4094 				if (crlf_state == -1) {
4095 					if (state == 3) {
4096 						/* continuing from the previous line */
4097 						state = 4;
4098 					} else {
4099 						/* simply skipping this new line */
4100 						state = 5;
4101 					}
4102 				} else {
4103 					if (crlf_state == 1) {
4104 						token_pos++;
4105 					}
4106 					if (state == 1 || state == 3) {
4107 						token_pos++;
4108 					}
4109 				}
4110 				crlf_state = 0;
4111 				break;
4112 
4113 			default:
4114 				switch (state) {
4115 					case 0:
4116 						token = (char*)ps;
4117 						token_pos = 0;
4118 						state = 1;
4119 						break;
4120 
4121 					case 2:
4122 						if (crlf_state != -1) {
4123 							token = (char*)ps;
4124 							token_pos = 0;
4125 
4126 							state = 3;
4127 							break;
4128 						}
4129 						/* break is missing intentionally */
4130 
4131 					case 3:
4132 						if (crlf_state == -1) {
4133 							if(token && token_pos > 0) {
4134 								fld_val = zend_string_init(token, token_pos, 0);
4135 							}
4136 
4137 							if (fld_name != NULL && fld_val != NULL) {
4138 								zval val;
4139 								/* FIXME: some locale free implementation is
4140 								 * really required here,,, */
4141 								php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
4142 								ZVAL_STR(&val, fld_val);
4143 
4144 								zend_hash_update(ht, fld_name, &val);
4145 
4146 								zend_string_release_ex(fld_name, 0);
4147 							}
4148 
4149 							fld_name = fld_val = NULL;
4150 							token = (char*)ps;
4151 							token_pos = 0;
4152 
4153 							state = 1;
4154 						}
4155 						break;
4156 
4157 					case 4:
4158 						token_pos++;
4159 						state = 3;
4160 						break;
4161 				}
4162 
4163 				if (crlf_state == 1) {
4164 					token_pos++;
4165 				}
4166 
4167 				token_pos++;
4168 
4169 				crlf_state = 0;
4170 				break;
4171 		}
4172 		ps++, icnt--;
4173 	}
4174 out:
4175 	if (state == 2) {
4176 		token = "";
4177 		token_pos = 0;
4178 
4179 		state = 3;
4180 	}
4181 	if (state == 3) {
4182 		if(token && token_pos > 0) {
4183 			fld_val = zend_string_init(token, token_pos, 0);
4184 		}
4185 		if (fld_name != NULL && fld_val != NULL) {
4186 			zval val;
4187 			/* FIXME: some locale free implementation is
4188 			 * really required here,,, */
4189 			php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
4190 			ZVAL_STR(&val, fld_val);
4191 
4192 			zend_hash_update(ht, fld_name, &val);
4193 
4194 			zend_string_release_ex(fld_name, 0);
4195 		}
4196 	}
4197 	return state;
4198 }
4199 
PHP_FUNCTION(mb_send_mail)4200 PHP_FUNCTION(mb_send_mail)
4201 {
4202 	char *to;
4203 	size_t to_len;
4204 	char *message;
4205 	size_t message_len;
4206 	char *subject;
4207 	size_t subject_len;
4208 	zval *headers = NULL;
4209 	zend_string *extra_cmd = NULL;
4210 	zend_string *str_headers = NULL, *tmp_headers;
4211 	size_t n, i;
4212 	char *to_r = NULL;
4213 	char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
4214 	struct {
4215 		int cnt_type:1;
4216 		int cnt_trans_enc:1;
4217 	} suppressed_hdrs = { 0, 0 };
4218 
4219 	char *message_buf = NULL, *subject_buf = NULL, *p;
4220 	mbfl_string orig_str, conv_str;
4221 	mbfl_string *pstr;	/* pointer to mbfl string for return value */
4222 	enum mbfl_no_encoding;
4223 	const mbfl_encoding *tran_cs,	/* transfar text charset */
4224 						*head_enc,	/* header transfar encoding */
4225 						*body_enc;	/* body transfar encoding */
4226 	mbfl_memory_device device;	/* automatic allocateable buffer for additional header */
4227 	const mbfl_language *lang;
4228 	int err = 0;
4229 	HashTable ht_headers;
4230 	zval *s;
4231 	extern void mbfl_memory_device_unput(mbfl_memory_device *device);
4232 	char *pp, *ee;
4233 
4234 	/* initialize */
4235 	mbfl_memory_device_init(&device, 0, 0);
4236 	mbfl_string_init(&orig_str);
4237 	mbfl_string_init(&conv_str);
4238 
4239 	/* character-set, transfer-encoding */
4240 	tran_cs = &mbfl_encoding_utf8;
4241 	head_enc = &mbfl_encoding_base64;
4242 	body_enc = &mbfl_encoding_base64;
4243 	lang = mbfl_no2language(MBSTRG(language));
4244 	if (lang != NULL) {
4245 		tran_cs = mbfl_no2encoding(lang->mail_charset);
4246 		head_enc = mbfl_no2encoding(lang->mail_header_encoding);
4247 		body_enc = mbfl_no2encoding(lang->mail_body_encoding);
4248 	}
4249 
4250 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|zS", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &extra_cmd) == FAILURE) {
4251 		return;
4252 	}
4253 
4254 	/* ASCIIZ check */
4255 	MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
4256 	MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
4257 	MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
4258 	if (headers) {
4259 		switch(Z_TYPE_P(headers)) {
4260 			case IS_STRING:
4261 				tmp_headers = zend_string_init(Z_STRVAL_P(headers), Z_STRLEN_P(headers), 0);
4262 				MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(tmp_headers), ZSTR_LEN(tmp_headers));
4263 				str_headers = php_trim(tmp_headers, NULL, 0, 2);
4264 				zend_string_release_ex(tmp_headers, 0);
4265 				break;
4266 			case IS_ARRAY:
4267 				str_headers = php_mail_build_headers(headers);
4268 				break;
4269 			default:
4270 				php_error_docref(NULL, E_WARNING, "headers parameter must be string or array");
4271 				RETURN_FALSE;
4272 		}
4273 	}
4274 	if (extra_cmd) {
4275 		MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(extra_cmd), ZSTR_LEN(extra_cmd));
4276 	}
4277 
4278 	zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0);
4279 
4280 	if (str_headers != NULL) {
4281 		_php_mbstr_parse_mail_headers(&ht_headers, ZSTR_VAL(str_headers), ZSTR_LEN(str_headers));
4282 	}
4283 
4284 	if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) {
4285 		char *tmp;
4286 		char *param_name;
4287 		char *charset = NULL;
4288 
4289 		ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
4290 		p = strchr(Z_STRVAL_P(s), ';');
4291 
4292 		if (p != NULL) {
4293 			/* skipping the padded spaces */
4294 			do {
4295 				++p;
4296 			} while (*p == ' ' || *p == '\t');
4297 
4298 			if (*p != '\0') {
4299 				if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
4300 					if (strcasecmp(param_name, "charset") == 0) {
4301 						const mbfl_encoding *_tran_cs = tran_cs;
4302 
4303 						charset = php_strtok_r(NULL, "= \"", &tmp);
4304 						if (charset != NULL) {
4305 							_tran_cs = mbfl_name2encoding(charset);
4306 						}
4307 
4308 						if (!_tran_cs) {
4309 							php_error_docref(NULL, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
4310 							_tran_cs = &mbfl_encoding_ascii;
4311 						}
4312 						tran_cs = _tran_cs;
4313 					}
4314 				}
4315 			}
4316 		}
4317 		suppressed_hdrs.cnt_type = 1;
4318 	}
4319 
4320 	if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1))) {
4321 		const mbfl_encoding *_body_enc;
4322 
4323 		ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
4324 		_body_enc = mbfl_name2encoding(Z_STRVAL_P(s));
4325 		switch (_body_enc ? _body_enc->no_encoding : mbfl_no_encoding_invalid) {
4326 			case mbfl_no_encoding_base64:
4327 			case mbfl_no_encoding_7bit:
4328 			case mbfl_no_encoding_8bit:
4329 				body_enc = _body_enc;
4330 				break;
4331 
4332 			default:
4333 				php_error_docref(NULL, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", Z_STRVAL_P(s));
4334 				body_enc =	&mbfl_encoding_8bit;
4335 				break;
4336 		}
4337 		suppressed_hdrs.cnt_trans_enc = 1;
4338 	}
4339 
4340 	/* To: */
4341 	if (to_len > 0) {
4342 		to_r = estrndup(to, to_len);
4343 		for (; to_len; to_len--) {
4344 			if (!isspace((unsigned char) to_r[to_len - 1])) {
4345 				break;
4346 			}
4347 			to_r[to_len - 1] = '\0';
4348 		}
4349 		for (i = 0; to_r[i]; i++) {
4350 		if (iscntrl((unsigned char) to_r[i])) {
4351 			/* According to RFC 822, section 3.1.1 long headers may be separated into
4352 			 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
4353 			 * To prevent these separators from being replaced with a space, we use the
4354 			 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
4355 			 */
4356 			SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
4357 			to_r[i] = ' ';
4358 		}
4359 		}
4360 	} else {
4361 		to_r = to;
4362 	}
4363 
4364 	/* Subject: */
4365 	orig_str.no_language = MBSTRG(language);
4366 	orig_str.val = (unsigned char *)subject;
4367 	orig_str.len = subject_len;
4368 	orig_str.encoding = MBSTRG(current_internal_encoding);
4369 	if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid
4370 			|| orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
4371 		orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4372 	}
4373 	pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4374 	if (pstr != NULL) {
4375 		subject_buf = subject = (char *)pstr->val;
4376 	}
4377 
4378 	/* message body */
4379 	orig_str.no_language = MBSTRG(language);
4380 	orig_str.val = (unsigned char *)message;
4381 	orig_str.len = message_len;
4382 	orig_str.encoding = MBSTRG(current_internal_encoding);
4383 
4384 	if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid
4385 			|| orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
4386 		orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4387 	}
4388 
4389 	pstr = NULL;
4390 	{
4391 		mbfl_string tmpstr;
4392 
4393 		if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4394 			tmpstr.encoding = &mbfl_encoding_8bit;
4395 			pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4396 			efree(tmpstr.val);
4397 		}
4398 	}
4399 	if (pstr != NULL) {
4400 		message_buf = message = (char *)pstr->val;
4401 	}
4402 
4403 	/* other headers */
4404 #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4405 #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4406 #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4407 #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4408 	if (str_headers != NULL) {
4409 		p = ZSTR_VAL(str_headers);
4410 		n = ZSTR_LEN(str_headers);
4411 		mbfl_memory_device_strncat(&device, p, n);
4412 		if (n > 0 && p[n - 1] != '\n') {
4413 			mbfl_memory_device_strncat(&device, "\n", 1);
4414 		}
4415 		zend_string_release_ex(str_headers, 0);
4416 	}
4417 
4418 	if (!zend_hash_str_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4419 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4420 		mbfl_memory_device_strncat(&device, "\n", 1);
4421 	}
4422 
4423 	if (!suppressed_hdrs.cnt_type) {
4424 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4425 
4426 		p = (char *)mbfl_no2preferred_mime_name(tran_cs->no_encoding);
4427 		if (p != NULL) {
4428 			mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4429 			mbfl_memory_device_strcat(&device, p);
4430 		}
4431 		mbfl_memory_device_strncat(&device, "\n", 1);
4432 	}
4433 	if (!suppressed_hdrs.cnt_trans_enc) {
4434 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4435 		p = (char *)mbfl_no2preferred_mime_name(body_enc->no_encoding);
4436 		if (p == NULL) {
4437 			p = "7bit";
4438 		}
4439 		mbfl_memory_device_strcat(&device, p);
4440 		mbfl_memory_device_strncat(&device, "\n", 1);
4441 	}
4442 
4443 	mbfl_memory_device_unput(&device);
4444 	mbfl_memory_device_output('\0', &device);
4445 	str_headers = zend_string_init((char *)device.buffer, strlen((char *)device.buffer), 0);
4446 
4447 	if (force_extra_parameters) {
4448 		extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4449 	} else if (extra_cmd) {
4450 		extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd));
4451 	}
4452 
4453 	if (!err && php_mail(to_r, subject, message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
4454 		RETVAL_TRUE;
4455 	} else {
4456 		RETVAL_FALSE;
4457 	}
4458 
4459 	if (extra_cmd) {
4460 		zend_string_release_ex(extra_cmd, 0);
4461 	}
4462 
4463 	if (to_r != to) {
4464 		efree(to_r);
4465 	}
4466 	if (subject_buf) {
4467 		efree((void *)subject_buf);
4468 	}
4469 	if (message_buf) {
4470 		efree((void *)message_buf);
4471 	}
4472 	mbfl_memory_device_clear(&device);
4473 	zend_hash_destroy(&ht_headers);
4474 	if (str_headers) {
4475 		zend_string_release_ex(str_headers, 0);
4476 	}
4477 }
4478 
4479 #undef SKIP_LONG_HEADER_SEP_MBSTRING
4480 #undef MAIL_ASCIIZ_CHECK_MBSTRING
4481 #undef PHP_MBSTR_MAIL_MIME_HEADER1
4482 #undef PHP_MBSTR_MAIL_MIME_HEADER2
4483 #undef PHP_MBSTR_MAIL_MIME_HEADER3
4484 #undef PHP_MBSTR_MAIL_MIME_HEADER4
4485 /* }}} */
4486 
4487 /* {{{ proto mixed mb_get_info([string type])
4488    Returns the current settings of mbstring */
PHP_FUNCTION(mb_get_info)4489 PHP_FUNCTION(mb_get_info)
4490 {
4491 	char *typ = NULL;
4492 	size_t typ_len;
4493 	size_t n;
4494 	char *name;
4495 	const struct mb_overload_def *over_func;
4496 	zval row1, row2;
4497 	const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4498 	const mbfl_encoding **entry;
4499 
4500 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
4501 		return;
4502 	}
4503 
4504 	if (!typ || !strcasecmp("all", typ)) {
4505 		array_init(return_value);
4506 		if (MBSTRG(current_internal_encoding)) {
4507 			add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name);
4508 		}
4509 		if (MBSTRG(http_input_identify)) {
4510 			add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name);
4511 		}
4512 		if (MBSTRG(current_http_output_encoding)) {
4513 			add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name);
4514 		}
4515 		if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
4516 			add_assoc_string(return_value, "http_output_conv_mimetypes", name);
4517 		}
4518 		add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4519 		if (MBSTRG(func_overload)){
4520 			over_func = &(mb_ovld[0]);
4521 			array_init(&row1);
4522 			while (over_func->type > 0) {
4523 				if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4524 					add_assoc_string(&row1, over_func->orig_func, over_func->ovld_func);
4525 				}
4526 				over_func++;
4527 			}
4528 			add_assoc_zval(return_value, "func_overload_list", &row1);
4529 		} else {
4530 			add_assoc_string(return_value, "func_overload_list", "no overload");
4531  		}
4532 		if (lang != NULL) {
4533 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4534 				add_assoc_string(return_value, "mail_charset", name);
4535 			}
4536 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4537 				add_assoc_string(return_value, "mail_header_encoding", name);
4538 			}
4539 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4540 				add_assoc_string(return_value, "mail_body_encoding", name);
4541 			}
4542 		}
4543 		add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4544 		if (MBSTRG(encoding_translation)) {
4545 			add_assoc_string(return_value, "encoding_translation", "On");
4546 		} else {
4547 			add_assoc_string(return_value, "encoding_translation", "Off");
4548 		}
4549 		if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4550 			add_assoc_string(return_value, "language", name);
4551 		}
4552 		n = MBSTRG(current_detect_order_list_size);
4553 		entry = MBSTRG(current_detect_order_list);
4554 		if (n > 0) {
4555 			size_t i;
4556 			array_init(&row2);
4557 			for (i = 0; i < n; i++) {
4558 				add_next_index_string(&row2, (*entry)->name);
4559 				entry++;
4560 			}
4561 			add_assoc_zval(return_value, "detect_order", &row2);
4562 		}
4563 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4564 			add_assoc_string(return_value, "substitute_character", "none");
4565 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4566 			add_assoc_string(return_value, "substitute_character", "long");
4567 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4568 			add_assoc_string(return_value, "substitute_character", "entity");
4569 		} else {
4570 			add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4571 		}
4572 		if (MBSTRG(strict_detection)) {
4573 			add_assoc_string(return_value, "strict_detection", "On");
4574 		} else {
4575 			add_assoc_string(return_value, "strict_detection", "Off");
4576 		}
4577 	} else if (!strcasecmp("internal_encoding", typ)) {
4578 		if (MBSTRG(current_internal_encoding)) {
4579 			RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name);
4580 		}
4581 	} else if (!strcasecmp("http_input", typ)) {
4582 		if (MBSTRG(http_input_identify)) {
4583 			RETVAL_STRING((char *)MBSTRG(http_input_identify)->name);
4584 		}
4585 	} else if (!strcasecmp("http_output", typ)) {
4586 		if (MBSTRG(current_http_output_encoding)) {
4587 			RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name);
4588 		}
4589 	} else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4590 		if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
4591 			RETVAL_STRING(name);
4592 		}
4593 	} else if (!strcasecmp("func_overload", typ)) {
4594  		RETVAL_LONG(MBSTRG(func_overload));
4595 	} else if (!strcasecmp("func_overload_list", typ)) {
4596 		if (MBSTRG(func_overload)){
4597 				over_func = &(mb_ovld[0]);
4598 				array_init(return_value);
4599 				while (over_func->type > 0) {
4600 					if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4601 						add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func);
4602 					}
4603 					over_func++;
4604 				}
4605 		} else {
4606 			RETVAL_STRING("no overload");
4607 		}
4608 	} else if (!strcasecmp("mail_charset", typ)) {
4609 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4610 			RETVAL_STRING(name);
4611 		}
4612 	} else if (!strcasecmp("mail_header_encoding", typ)) {
4613 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4614 			RETVAL_STRING(name);
4615 		}
4616 	} else if (!strcasecmp("mail_body_encoding", typ)) {
4617 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4618 			RETVAL_STRING(name);
4619 		}
4620 	} else if (!strcasecmp("illegal_chars", typ)) {
4621 		RETVAL_LONG(MBSTRG(illegalchars));
4622 	} else if (!strcasecmp("encoding_translation", typ)) {
4623 		if (MBSTRG(encoding_translation)) {
4624 			RETVAL_STRING("On");
4625 		} else {
4626 			RETVAL_STRING("Off");
4627 		}
4628 	} else if (!strcasecmp("language", typ)) {
4629 		if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4630 			RETVAL_STRING(name);
4631 		}
4632 	} else if (!strcasecmp("detect_order", typ)) {
4633 		n = MBSTRG(current_detect_order_list_size);
4634 		entry = MBSTRG(current_detect_order_list);
4635 		if (n > 0) {
4636 			size_t i;
4637 			array_init(return_value);
4638 			for (i = 0; i < n; i++) {
4639 				add_next_index_string(return_value, (*entry)->name);
4640 				entry++;
4641 			}
4642 		}
4643 	} else if (!strcasecmp("substitute_character", typ)) {
4644 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4645 			RETVAL_STRING("none");
4646 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4647 			RETVAL_STRING("long");
4648 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4649 			RETVAL_STRING("entity");
4650 		} else {
4651 			RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4652 		}
4653 	} else if (!strcasecmp("strict_detection", typ)) {
4654 		if (MBSTRG(strict_detection)) {
4655 			RETVAL_STRING("On");
4656 		} else {
4657 			RETVAL_STRING("Off");
4658 		}
4659 	} else {
4660 		RETURN_FALSE;
4661 	}
4662 }
4663 /* }}} */
4664 
4665 
php_mb_init_convd(const mbfl_encoding * encoding)4666 static inline mbfl_buffer_converter *php_mb_init_convd(const mbfl_encoding *encoding)
4667 {
4668 	mbfl_buffer_converter *convd;
4669 
4670 	convd = mbfl_buffer_converter_new(encoding, encoding, 0);
4671 	if (convd == NULL) {
4672 		return NULL;
4673 	}
4674 	mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4675 	mbfl_buffer_converter_illegal_substchar(convd, 0);
4676 	return convd;
4677 }
4678 
4679 
php_mb_check_encoding_impl(mbfl_buffer_converter * convd,const char * input,size_t length,const mbfl_encoding * encoding)4680 static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const char *input, size_t length, const mbfl_encoding *encoding) {
4681 	mbfl_string string, result, *ret = NULL;
4682 	size_t illegalchars = 0;
4683 
4684 	/* initialize string */
4685 	mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding);
4686 	mbfl_string_init(&result);
4687 
4688 	string.val = (unsigned char *) input;
4689 	string.len = length;
4690 
4691 	ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4692 	illegalchars = mbfl_buffer_illegalchars(convd);
4693 
4694 	if (ret != NULL) {
4695 		if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4696 			mbfl_string_clear(&result);
4697 			return 1;
4698 		}
4699 		mbfl_string_clear(&result);
4700 	}
4701 	return 0;
4702 }
4703 
4704 
php_mb_check_encoding(const char * input,size_t length,const char * enc)4705 MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc)
4706 {
4707 	const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4708 	mbfl_buffer_converter *convd;
4709 
4710 	if (input == NULL) {
4711 		return MBSTRG(illegalchars) == 0;
4712 	}
4713 
4714 	if (enc != NULL) {
4715 		encoding = mbfl_name2encoding(enc);
4716 		if (!encoding || encoding == &mbfl_encoding_pass) {
4717 			php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", enc);
4718 			return 0;
4719 		}
4720 	}
4721 
4722 	convd = php_mb_init_convd(encoding);
4723 	if (convd == NULL) {
4724 		php_error_docref(NULL, E_WARNING, "Unable to create converter");
4725 		return 0;
4726 	}
4727 
4728 	if (php_mb_check_encoding_impl(convd, input, length, encoding)) {
4729 		mbfl_buffer_converter_delete(convd);
4730 		return 1;
4731 	}
4732 	mbfl_buffer_converter_delete(convd);
4733 	return 0;
4734 }
4735 
4736 
php_mb_check_encoding_recursive(HashTable * vars,const zend_string * enc)4737 MBSTRING_API int php_mb_check_encoding_recursive(HashTable *vars, const zend_string *enc)
4738 {
4739 	const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4740 	mbfl_buffer_converter *convd;
4741 	zend_long idx;
4742 	zend_string *key;
4743 	zval *entry;
4744 	int valid = 1;
4745 
4746 	(void)(idx);
4747 
4748 	if (enc != NULL) {
4749 		encoding = mbfl_name2encoding(ZSTR_VAL(enc));
4750 		if (!encoding || encoding == &mbfl_encoding_pass) {
4751 			php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", ZSTR_VAL(enc));
4752 			return 0;
4753 		}
4754 	}
4755 
4756 	convd = php_mb_init_convd(encoding);
4757 	if (convd == NULL) {
4758 		php_error_docref(NULL, E_WARNING, "Unable to create converter");
4759 		return 0;
4760 	}
4761 
4762 	if (GC_IS_RECURSIVE(vars)) {
4763 		mbfl_buffer_converter_delete(convd);
4764 		php_error_docref(NULL, E_WARNING, "Cannot not handle circular references");
4765 		return 0;
4766 	}
4767 	GC_TRY_PROTECT_RECURSION(vars);
4768 	ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) {
4769 		ZVAL_DEREF(entry);
4770 		if (key) {
4771 			if (!php_mb_check_encoding_impl(convd, ZSTR_VAL(key), ZSTR_LEN(key), encoding)) {
4772 				valid = 0;
4773 				break;
4774 			}
4775 		}
4776 		switch (Z_TYPE_P(entry)) {
4777 			case IS_STRING:
4778 				if (!php_mb_check_encoding_impl(convd, Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) {
4779 					valid = 0;
4780 					break;
4781 				}
4782 				break;
4783 			case IS_ARRAY:
4784 				if (!php_mb_check_encoding_recursive(HASH_OF(entry), enc)) {
4785 					valid = 0;
4786 					break;
4787 				}
4788 				break;
4789 			case IS_LONG:
4790 			case IS_DOUBLE:
4791 			case IS_NULL:
4792 			case IS_TRUE:
4793 			case IS_FALSE:
4794 				break;
4795 			default:
4796 				/* Other types are error. */
4797 				valid = 0;
4798 				break;
4799 		}
4800 	} ZEND_HASH_FOREACH_END();
4801 	GC_TRY_UNPROTECT_RECURSION(vars);
4802 	mbfl_buffer_converter_delete(convd);
4803 	return valid;
4804 }
4805 
4806 
4807 /* {{{ proto bool mb_check_encoding([mixed var[, string encoding]])
4808    Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)4809 PHP_FUNCTION(mb_check_encoding)
4810 {
4811 	zval *input = NULL;
4812 	zend_string *enc = NULL;
4813 
4814 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|zS", &input, &enc) == FAILURE) {
4815 		return;
4816 	}
4817 
4818 	/* FIXME: Actually check all inputs, except $_FILES file content. */
4819 	if (input == NULL) {
4820 		if (MBSTRG(illegalchars) == 0) {
4821 			RETURN_TRUE;
4822 		}
4823 		RETURN_FALSE;
4824 	}
4825 
4826 	if (Z_TYPE_P(input) == IS_ARRAY) {
4827 		if (!php_mb_check_encoding_recursive(HASH_OF(input), enc)) {
4828 			RETURN_FALSE;
4829 		}
4830 	} else {
4831 		convert_to_string(input);
4832 		if (!php_mb_check_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), enc ? ZSTR_VAL(enc): NULL)) {
4833 			RETURN_FALSE;
4834 		}
4835 	}
4836 	RETURN_TRUE;
4837 }
4838 /* }}} */
4839 
4840 
php_mb_ord(const char * str,size_t str_len,const char * enc_name)4841 static inline zend_long php_mb_ord(const char* str, size_t str_len, const char* enc_name)
4842 {
4843 	const mbfl_encoding *enc;
4844 	enum mbfl_no_encoding no_enc;
4845 
4846 	enc = php_mb_get_encoding(enc_name);
4847 	if (!enc) {
4848 		return -1;
4849 	}
4850 
4851 	no_enc = enc->no_encoding;
4852 	if (php_mb_is_unsupported_no_encoding(no_enc)) {
4853 		php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc->name);
4854 		return -1;
4855 	}
4856 
4857 	if (str_len == 0) {
4858 		php_error_docref(NULL, E_WARNING, "Empty string");
4859 		return -1;
4860 	}
4861 
4862 	{
4863 		mbfl_wchar_device dev;
4864 		mbfl_convert_filter *filter;
4865 		zend_long cp;
4866 
4867 		mbfl_wchar_device_init(&dev);
4868 		filter = mbfl_convert_filter_new(
4869 			enc, &mbfl_encoding_wchar,
4870 			mbfl_wchar_device_output, 0, &dev);
4871 		if (!filter) {
4872 			php_error_docref(NULL, E_WARNING, "Creation of filter failed");
4873 			return -1;
4874 		}
4875 
4876 		mbfl_convert_filter_feed_string(filter, (const unsigned char *) str, str_len);
4877 		mbfl_convert_filter_flush(filter);
4878 
4879 		if (dev.pos < 1 || filter->num_illegalchar || dev.buffer[0] >= MBFL_WCSGROUP_UCS4MAX) {
4880 			mbfl_convert_filter_delete(filter);
4881 			mbfl_wchar_device_clear(&dev);
4882 			return -1;
4883 		}
4884 
4885 		cp = dev.buffer[0];
4886 		mbfl_convert_filter_delete(filter);
4887 		mbfl_wchar_device_clear(&dev);
4888 		return cp;
4889 	}
4890 }
4891 
4892 
4893 /* {{{ proto int|false mb_ord([string str[, string encoding]]) */
PHP_FUNCTION(mb_ord)4894 PHP_FUNCTION(mb_ord)
4895 {
4896 	char* str;
4897 	size_t str_len;
4898 	char* enc = NULL;
4899 	size_t enc_len;
4900 	zend_long cp;
4901 
4902 	ZEND_PARSE_PARAMETERS_START(1, 2)
4903 		Z_PARAM_STRING(str, str_len)
4904 		Z_PARAM_OPTIONAL
4905 		Z_PARAM_STRING(enc, enc_len)
4906 	ZEND_PARSE_PARAMETERS_END();
4907 
4908 	cp = php_mb_ord(str, str_len, enc);
4909 
4910 	if (0 > cp) {
4911 		RETURN_FALSE;
4912 	}
4913 
4914 	RETURN_LONG(cp);
4915 }
4916 /* }}} */
4917 
4918 
php_mb_chr(zend_long cp,const char * enc_name)4919 static inline zend_string *php_mb_chr(zend_long cp, const char *enc_name)
4920 {
4921 	const mbfl_encoding *enc;
4922 	enum mbfl_no_encoding no_enc;
4923 	zend_string *ret;
4924 	char* buf;
4925 	size_t buf_len;
4926 
4927 	enc = php_mb_get_encoding(enc_name);
4928 	if (!enc) {
4929 		return NULL;
4930 	}
4931 
4932 	no_enc = enc->no_encoding;
4933 	if (php_mb_is_unsupported_no_encoding(no_enc)) {
4934 		php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc->name);
4935 		return NULL;
4936 	}
4937 
4938 	if (cp < 0 || cp > 0x10ffff) {
4939 		return NULL;
4940 	}
4941 
4942 	if (php_mb_is_no_encoding_utf8(no_enc)) {
4943 		if (cp > 0xd7ff && 0xe000 > cp) {
4944 			return NULL;
4945 		}
4946 
4947 		if (cp < 0x80) {
4948 			ret = ZSTR_CHAR(cp);
4949 		} else if (cp < 0x800) {
4950 			ret = zend_string_alloc(2, 0);
4951 			ZSTR_VAL(ret)[0] = 0xc0 | (cp >> 6);
4952 			ZSTR_VAL(ret)[1] = 0x80 | (cp & 0x3f);
4953 			ZSTR_VAL(ret)[2] = 0;
4954 		} else if (cp < 0x10000) {
4955 			ret = zend_string_alloc(3, 0);
4956 			ZSTR_VAL(ret)[0] = 0xe0 | (cp >> 12);
4957 			ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 6) & 0x3f);
4958 			ZSTR_VAL(ret)[2] = 0x80 | (cp & 0x3f);
4959 			ZSTR_VAL(ret)[3] = 0;
4960 		} else {
4961 			ret = zend_string_alloc(4, 0);
4962 			ZSTR_VAL(ret)[0] = 0xf0 | (cp >> 18);
4963 			ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 12) & 0x3f);
4964 			ZSTR_VAL(ret)[2] = 0x80 | ((cp >> 6) & 0x3f);
4965 			ZSTR_VAL(ret)[3] = 0x80 | (cp & 0x3f);
4966 			ZSTR_VAL(ret)[4] = 0;
4967 		}
4968 
4969 		return ret;
4970 	}
4971 
4972 	buf_len = 4;
4973 	buf = (char *) emalloc(buf_len + 1);
4974 	buf[0] = (cp >> 24) & 0xff;
4975 	buf[1] = (cp >> 16) & 0xff;
4976 	buf[2] = (cp >>  8) & 0xff;
4977 	buf[3] = cp & 0xff;
4978 	buf[4] = 0;
4979 
4980 	{
4981 		char *ret_str;
4982 		size_t ret_len;
4983 		long orig_illegalchars = MBSTRG(illegalchars);
4984 		MBSTRG(illegalchars) = 0;
4985 		ret_str = php_mb_convert_encoding_ex(buf, buf_len, enc, &mbfl_encoding_ucs4be, &ret_len);
4986 		if (MBSTRG(illegalchars) != 0) {
4987 			efree(buf);
4988 			efree(ret_str);
4989 			MBSTRG(illegalchars) = orig_illegalchars;
4990 			return NULL;
4991 		}
4992 
4993 		ret = zend_string_init(ret_str, ret_len, 0);
4994 		efree(ret_str);
4995 		MBSTRG(illegalchars) = orig_illegalchars;
4996 	}
4997 
4998 	efree(buf);
4999 	return ret;
5000 }
5001 
5002 
5003 /* {{{ proto string|false mb_chr([int cp[, string encoding]]) */
PHP_FUNCTION(mb_chr)5004 PHP_FUNCTION(mb_chr)
5005 {
5006 	zend_long cp;
5007 	char* enc = NULL;
5008 	size_t enc_len;
5009 	zend_string* ret;
5010 
5011 	ZEND_PARSE_PARAMETERS_START(1, 2)
5012 		Z_PARAM_LONG(cp)
5013 		Z_PARAM_OPTIONAL
5014 		Z_PARAM_STRING(enc, enc_len)
5015 	ZEND_PARSE_PARAMETERS_END();
5016 
5017 	ret = php_mb_chr(cp, enc);
5018 	if (ret == NULL) {
5019 		RETURN_FALSE;
5020 	}
5021 
5022 	RETURN_STR(ret);
5023 }
5024 /* }}} */
5025 
5026 
php_mb_scrub(const char * str,size_t str_len,const mbfl_encoding * enc,size_t * ret_len)5027 static inline char* php_mb_scrub(const char* str, size_t str_len, const mbfl_encoding *enc, size_t *ret_len)
5028 {
5029 	return php_mb_convert_encoding_ex(str, str_len, enc, enc, ret_len);
5030 }
5031 
5032 
5033 /* {{{ proto string|false mb_scrub([string str[, string encoding]]) */
PHP_FUNCTION(mb_scrub)5034 PHP_FUNCTION(mb_scrub)
5035 {
5036 	const mbfl_encoding *enc;
5037 	char* str;
5038 	size_t str_len;
5039 	char *enc_name = NULL;
5040 	size_t enc_name_len;
5041 	char *ret;
5042 	size_t ret_len;
5043 
5044 	ZEND_PARSE_PARAMETERS_START(1, 2)
5045 		Z_PARAM_STRING(str, str_len)
5046 		Z_PARAM_OPTIONAL
5047 		Z_PARAM_STRING(enc_name, enc_name_len)
5048 	ZEND_PARSE_PARAMETERS_END();
5049 
5050 	enc = php_mb_get_encoding(enc_name);
5051 	if (!enc) {
5052 		RETURN_FALSE;
5053 	}
5054 
5055 	ret = php_mb_scrub(str, str_len, enc, &ret_len);
5056 
5057 	if (ret == NULL) {
5058 		RETURN_FALSE;
5059 	}
5060 
5061 	RETVAL_STRINGL(ret, ret_len);
5062 	efree(ret);
5063 }
5064 /* }}} */
5065 
5066 
5067 /* {{{ php_mb_populate_current_detect_order_list */
php_mb_populate_current_detect_order_list(void)5068 static void php_mb_populate_current_detect_order_list(void)
5069 {
5070 	const mbfl_encoding **entry = 0;
5071 	size_t nentries;
5072 
5073 	if (MBSTRG(current_detect_order_list)) {
5074 		return;
5075 	}
5076 
5077 	if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
5078 		nentries = MBSTRG(detect_order_list_size);
5079 		entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
5080 		memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
5081 	} else {
5082 		const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
5083 		size_t i;
5084 		nentries = MBSTRG(default_detect_order_list_size);
5085 		entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
5086 		for (i = 0; i < nentries; i++) {
5087 			entry[i] = mbfl_no2encoding(src[i]);
5088 		}
5089 	}
5090 	MBSTRG(current_detect_order_list) = entry;
5091 	MBSTRG(current_detect_order_list_size) = nentries;
5092 }
5093 /* }}} */
5094 
5095 /* {{{ static int php_mb_encoding_translation() */
php_mb_encoding_translation(void)5096 static int php_mb_encoding_translation(void)
5097 {
5098 	return MBSTRG(encoding_translation);
5099 }
5100 /* }}} */
5101 
5102 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
php_mb_mbchar_bytes_ex(const char * s,const mbfl_encoding * enc)5103 MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
5104 {
5105 	if (enc != NULL) {
5106 		if (enc->flag & MBFL_ENCTYPE_MBCS) {
5107 			if (enc->mblen_table != NULL) {
5108 				if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
5109 			}
5110 		} else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
5111 			return 2;
5112 		} else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
5113 			return 4;
5114 		}
5115 	}
5116 	return 1;
5117 }
5118 /* }}} */
5119 
5120 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
php_mb_mbchar_bytes(const char * s)5121 MBSTRING_API size_t php_mb_mbchar_bytes(const char *s)
5122 {
5123 	return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
5124 }
5125 /* }}} */
5126 
5127 /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
php_mb_safe_strrchr_ex(const char * s,unsigned int c,size_t nbytes,const mbfl_encoding * enc)5128 MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
5129 {
5130 	register const char *p = s;
5131 	char *last=NULL;
5132 
5133 	if (nbytes == (size_t)-1) {
5134 		size_t nb = 0;
5135 
5136 		while (*p != '\0') {
5137 			if (nb == 0) {
5138 				if ((unsigned char)*p == (unsigned char)c) {
5139 					last = (char *)p;
5140 				}
5141 				nb = php_mb_mbchar_bytes_ex(p, enc);
5142 				if (nb == 0) {
5143 					return NULL; /* something is going wrong! */
5144 				}
5145 			}
5146 			--nb;
5147 			++p;
5148 		}
5149 	} else {
5150 		register size_t bcnt = nbytes;
5151 		register size_t nbytes_char;
5152 		while (bcnt > 0) {
5153 			if ((unsigned char)*p == (unsigned char)c) {
5154 				last = (char *)p;
5155 			}
5156 			nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
5157 			if (bcnt < nbytes_char) {
5158 				return NULL;
5159 			}
5160 			p += nbytes_char;
5161 			bcnt -= nbytes_char;
5162 		}
5163 	}
5164 	return last;
5165 }
5166 /* }}} */
5167 
5168 /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
php_mb_safe_strrchr(const char * s,unsigned int c,size_t nbytes)5169 MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes)
5170 {
5171 	return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
5172 }
5173 /* }}} */
5174 
5175 /* {{{ MBSTRING_API int php_mb_stripos()
5176  */
php_mb_stripos(int mode,const char * old_haystack,size_t old_haystack_len,const char * old_needle,size_t old_needle_len,zend_long offset,const char * from_encoding)5177 MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t old_haystack_len, const char *old_needle, size_t old_needle_len, zend_long offset, const char *from_encoding)
5178 {
5179 	size_t n = (size_t) -1;
5180 	mbfl_string haystack, needle;
5181 	const mbfl_encoding *enc;
5182 
5183 	enc = php_mb_get_encoding(from_encoding);
5184 	if (!enc) {
5185 		return (size_t) -1;
5186 	}
5187 
5188 	mbfl_string_init(&haystack);
5189 	mbfl_string_init(&needle);
5190 	haystack.no_language = MBSTRG(language);
5191 	haystack.encoding = enc;
5192 	needle.no_language = MBSTRG(language);
5193 	needle.encoding = enc;
5194 
5195 	do {
5196 		/* We're using simple case-folding here, because we'd have to deal with remapping of
5197 		 * offsets otherwise. */
5198 
5199 		size_t len = 0;
5200 		haystack.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_haystack, old_haystack_len, &len, enc);
5201 		haystack.len = len;
5202 
5203 		if (!haystack.val) {
5204 			break;
5205 		}
5206 
5207 		if (haystack.len == 0) {
5208 			break;
5209 		}
5210 
5211 		needle.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_needle, old_needle_len, &len, enc);
5212 		needle.len = len;
5213 
5214 		if (!needle.val) {
5215 			break;
5216 		}
5217 
5218 		if (needle.len == 0) {
5219 			break;
5220 		}
5221 
5222  		if (offset != 0) {
5223  			size_t haystack_char_len = mbfl_strlen(&haystack);
5224 
5225  			if (mode) {
5226 				if ((offset > 0 && (size_t)offset > haystack_char_len) ||
5227 					(offset < 0 && (size_t)(-offset) > haystack_char_len)) {
5228  					php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
5229  					break;
5230  				}
5231  			} else {
5232 				if (offset < 0) {
5233 					offset += (zend_long)haystack_char_len;
5234 				}
5235 				if (offset < 0 || (size_t)offset > haystack_char_len) {
5236  					php_error_docref(NULL, E_WARNING, "Offset not contained in string");
5237  					break;
5238  				}
5239  			}
5240 		}
5241 
5242 		n = mbfl_strpos(&haystack, &needle, offset, mode);
5243 	} while(0);
5244 
5245 	if (haystack.val) {
5246 		efree(haystack.val);
5247 	}
5248 
5249 	if (needle.val) {
5250 		efree(needle.val);
5251 	}
5252 
5253 	return n;
5254 }
5255 /* }}} */
5256 
php_mb_gpc_get_detect_order(const zend_encoding *** list,size_t * list_size)5257 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size) /* {{{ */
5258 {
5259 	*list = (const zend_encoding **)MBSTRG(http_input_list);
5260 	*list_size = MBSTRG(http_input_list_size);
5261 }
5262 /* }}} */
5263 
php_mb_gpc_set_input_encoding(const zend_encoding * encoding)5264 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding) /* {{{ */
5265 {
5266 	MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
5267 }
5268 /* }}} */
5269 
5270 #endif	/* HAVE_MBSTRING */
5271 
5272 /*
5273  * Local variables:
5274  * tab-width: 4
5275  * c-basic-offset: 4
5276  * End:
5277  * vim600: fdm=marker
5278  * vim: noet sw=4 ts=4
5279  */
5280