xref: /PHP-7.4/ext/mbstring/mbstring.c (revision d3d6d790)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) The PHP Group                                          |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
16    |         Rui Hirokawa <hirokawa@php.net>                              |
17    |         Hironori Sato <satoh@jpnnet.com>                             |
18    |         Shigeru Kanemoto <sgk@happysize.co.jp>                       |
19    +----------------------------------------------------------------------+
20  */
21 
22 /* {{{ includes */
23 #ifdef HAVE_CONFIG_H
24 #include "config.h"
25 #endif
26 
27 #include "php.h"
28 #include "php_ini.h"
29 #include "php_variables.h"
30 #include "mbstring.h"
31 #include "ext/standard/php_string.h"
32 #include "ext/standard/php_mail.h"
33 #include "ext/standard/exec.h"
34 #include "ext/standard/url.h"
35 #include "main/php_output.h"
36 #include "ext/standard/info.h"
37 
38 #include "libmbfl/mbfl/mbfl_allocators.h"
39 #include "libmbfl/mbfl/mbfilter_8bit.h"
40 #include "libmbfl/mbfl/mbfilter_pass.h"
41 #include "libmbfl/mbfl/mbfilter_wchar.h"
42 #include "libmbfl/filters/mbfilter_ascii.h"
43 #include "libmbfl/filters/mbfilter_base64.h"
44 #include "libmbfl/filters/mbfilter_qprint.h"
45 #include "libmbfl/filters/mbfilter_ucs4.h"
46 #include "libmbfl/filters/mbfilter_utf8.h"
47 
48 #include "php_variables.h"
49 #include "php_globals.h"
50 #include "rfc1867.h"
51 #include "php_content_types.h"
52 #include "SAPI.h"
53 #include "php_unicode.h"
54 #include "TSRM.h"
55 
56 #include "mb_gpc.h"
57 
58 #if HAVE_MBREGEX
59 # include "php_mbregex.h"
60 # include "php_onig_compat.h"
61 # include <oniguruma.h>
62 # undef UChar
63 #if ONIGURUMA_VERSION_INT < 60800
64 typedef void OnigMatchParam;
65 #define onig_new_match_param() (NULL)
66 #define onig_initialize_match_param(x) (void)(x)
67 #define onig_set_match_stack_limit_size_of_match_param(x, y)
68 #define onig_set_retry_limit_in_match_of_match_param(x, y)
69 #define onig_free_match_param(x)
70 #define onig_search_with_param(reg, str, end, start, range, region, option, mp) \
71 onig_search(reg, str, end, start, range, region, option)
72 #define onig_match_with_param(re, str, end, at, region, option, mp) \
73 onig_match(re, str, end, at, region, option)
74 #endif
75 #else
76 # include "ext/pcre/php_pcre.h"
77 #endif
78 
79 #include "zend_multibyte.h"
80 /* }}} */
81 
82 #if HAVE_MBSTRING
83 
84 /* {{{ prototypes */
85 ZEND_DECLARE_MODULE_GLOBALS(mbstring)
86 
87 static PHP_GINIT_FUNCTION(mbstring);
88 static PHP_GSHUTDOWN_FUNCTION(mbstring);
89 
90 static void php_mb_populate_current_detect_order_list(void);
91 
92 static int php_mb_encoding_translation(void);
93 
94 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size);
95 
96 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
97 
98 static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
99 
100 static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
101 /* }}} */
102 
103 /* {{{ php_mb_default_identify_list */
104 typedef struct _php_mb_nls_ident_list {
105 	enum mbfl_no_language lang;
106 	const enum mbfl_no_encoding *list;
107 	size_t list_size;
108 } php_mb_nls_ident_list;
109 
110 static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
111 	mbfl_no_encoding_ascii,
112 	mbfl_no_encoding_jis,
113 	mbfl_no_encoding_utf8,
114 	mbfl_no_encoding_euc_jp,
115 	mbfl_no_encoding_sjis
116 };
117 
118 static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
119 	mbfl_no_encoding_ascii,
120 	mbfl_no_encoding_utf8,
121 	mbfl_no_encoding_euc_cn,
122 	mbfl_no_encoding_cp936
123 };
124 
125 static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
126 	mbfl_no_encoding_ascii,
127 	mbfl_no_encoding_utf8,
128 	mbfl_no_encoding_euc_tw,
129 	mbfl_no_encoding_big5
130 };
131 
132 static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
133 	mbfl_no_encoding_ascii,
134 	mbfl_no_encoding_utf8,
135 	mbfl_no_encoding_euc_kr,
136 	mbfl_no_encoding_uhc
137 };
138 
139 static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
140 	mbfl_no_encoding_ascii,
141 	mbfl_no_encoding_utf8,
142 	mbfl_no_encoding_koi8r,
143 	mbfl_no_encoding_cp1251,
144 	mbfl_no_encoding_cp866
145 };
146 
147 static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
148 	mbfl_no_encoding_ascii,
149 	mbfl_no_encoding_utf8,
150 	mbfl_no_encoding_armscii8
151 };
152 
153 static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
154 	mbfl_no_encoding_ascii,
155 	mbfl_no_encoding_utf8,
156 	mbfl_no_encoding_cp1254,
157 	mbfl_no_encoding_8859_9
158 };
159 
160 static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
161 	mbfl_no_encoding_ascii,
162 	mbfl_no_encoding_utf8,
163 	mbfl_no_encoding_koi8u
164 };
165 
166 static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
167 	mbfl_no_encoding_ascii,
168 	mbfl_no_encoding_utf8
169 };
170 
171 
172 static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
173 	{ mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
174 	{ mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
175 	{ mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
176 	{ mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
177 	{ mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
178 	{ mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
179 	{ mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
180 	{ mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
181 	{ mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
182 };
183 
184 /* }}} */
185 
186 /* {{{ mb_overload_def mb_ovld[] */
187 static const struct mb_overload_def mb_ovld[] = {
188 	{MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
189 	{MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
190 	{MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
191 	{MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
192 	{MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
193 	{MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
194 	{MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
195 	{MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
196 	{MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
197 	{MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
198 	{MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
199 	{MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
200 	{MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
201 	{0, NULL, NULL, NULL}
202 };
203 /* }}} */
204 
205 /* {{{ arginfo */
206 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
207 	ZEND_ARG_INFO(0, language)
208 ZEND_END_ARG_INFO()
209 
210 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
211 	ZEND_ARG_INFO(0, encoding)
212 ZEND_END_ARG_INFO()
213 
214 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
215 	ZEND_ARG_INFO(0, type)
216 ZEND_END_ARG_INFO()
217 
218 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
219 	ZEND_ARG_INFO(0, encoding)
220 ZEND_END_ARG_INFO()
221 
222 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
223 	ZEND_ARG_INFO(0, encoding)
224 ZEND_END_ARG_INFO()
225 
226 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
227 	ZEND_ARG_INFO(0, substchar)
228 ZEND_END_ARG_INFO()
229 
230 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
231 	ZEND_ARG_INFO(0, encoding)
232 ZEND_END_ARG_INFO()
233 
234 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
235 	ZEND_ARG_INFO(0, encoded_string)
236 	ZEND_ARG_INFO(1, result)
237 ZEND_END_ARG_INFO()
238 
239 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
240 	ZEND_ARG_INFO(0, contents)
241 	ZEND_ARG_INFO(0, status)
242 ZEND_END_ARG_INFO()
243 
244 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_str_split, 0, 0, 1)
245 	ZEND_ARG_INFO(0, str)
246 	ZEND_ARG_INFO(0, split_length)
247 	ZEND_ARG_INFO(0, encoding)
248 ZEND_END_ARG_INFO()
249 
250 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
251 	ZEND_ARG_INFO(0, str)
252 	ZEND_ARG_INFO(0, encoding)
253 ZEND_END_ARG_INFO()
254 
255 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
256 	ZEND_ARG_INFO(0, haystack)
257 	ZEND_ARG_INFO(0, needle)
258 	ZEND_ARG_INFO(0, offset)
259 	ZEND_ARG_INFO(0, encoding)
260 ZEND_END_ARG_INFO()
261 
262 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
263 	ZEND_ARG_INFO(0, haystack)
264 	ZEND_ARG_INFO(0, needle)
265 	ZEND_ARG_INFO(0, offset)
266 	ZEND_ARG_INFO(0, encoding)
267 ZEND_END_ARG_INFO()
268 
269 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
270 	ZEND_ARG_INFO(0, haystack)
271 	ZEND_ARG_INFO(0, needle)
272 	ZEND_ARG_INFO(0, offset)
273 	ZEND_ARG_INFO(0, encoding)
274 ZEND_END_ARG_INFO()
275 
276 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
277 	ZEND_ARG_INFO(0, haystack)
278 	ZEND_ARG_INFO(0, needle)
279 	ZEND_ARG_INFO(0, offset)
280 	ZEND_ARG_INFO(0, encoding)
281 ZEND_END_ARG_INFO()
282 
283 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
284 	ZEND_ARG_INFO(0, haystack)
285 	ZEND_ARG_INFO(0, needle)
286 	ZEND_ARG_INFO(0, part)
287 	ZEND_ARG_INFO(0, encoding)
288 ZEND_END_ARG_INFO()
289 
290 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
291 	ZEND_ARG_INFO(0, haystack)
292 	ZEND_ARG_INFO(0, needle)
293 	ZEND_ARG_INFO(0, part)
294 	ZEND_ARG_INFO(0, encoding)
295 ZEND_END_ARG_INFO()
296 
297 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
298 	ZEND_ARG_INFO(0, haystack)
299 	ZEND_ARG_INFO(0, needle)
300 	ZEND_ARG_INFO(0, part)
301 	ZEND_ARG_INFO(0, encoding)
302 ZEND_END_ARG_INFO()
303 
304 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
305 	ZEND_ARG_INFO(0, haystack)
306 	ZEND_ARG_INFO(0, needle)
307 	ZEND_ARG_INFO(0, part)
308 	ZEND_ARG_INFO(0, encoding)
309 ZEND_END_ARG_INFO()
310 
311 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
312 	ZEND_ARG_INFO(0, haystack)
313 	ZEND_ARG_INFO(0, needle)
314 	ZEND_ARG_INFO(0, encoding)
315 ZEND_END_ARG_INFO()
316 
317 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
318 	ZEND_ARG_INFO(0, str)
319 	ZEND_ARG_INFO(0, start)
320 	ZEND_ARG_INFO(0, length)
321 	ZEND_ARG_INFO(0, encoding)
322 ZEND_END_ARG_INFO()
323 
324 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
325 	ZEND_ARG_INFO(0, str)
326 	ZEND_ARG_INFO(0, start)
327 	ZEND_ARG_INFO(0, length)
328 	ZEND_ARG_INFO(0, encoding)
329 ZEND_END_ARG_INFO()
330 
331 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
332 	ZEND_ARG_INFO(0, str)
333 	ZEND_ARG_INFO(0, encoding)
334 ZEND_END_ARG_INFO()
335 
336 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
337 	ZEND_ARG_INFO(0, str)
338 	ZEND_ARG_INFO(0, start)
339 	ZEND_ARG_INFO(0, width)
340 	ZEND_ARG_INFO(0, trimmarker)
341 	ZEND_ARG_INFO(0, encoding)
342 ZEND_END_ARG_INFO()
343 
344 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
345 	ZEND_ARG_INFO(0, str)
346 	ZEND_ARG_INFO(0, to)
347 	ZEND_ARG_INFO(0, from)
348 ZEND_END_ARG_INFO()
349 
350 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
351 	ZEND_ARG_INFO(0, sourcestring)
352 	ZEND_ARG_INFO(0, mode)
353 	ZEND_ARG_INFO(0, encoding)
354 ZEND_END_ARG_INFO()
355 
356 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
357 	ZEND_ARG_INFO(0, sourcestring)
358 	ZEND_ARG_INFO(0, encoding)
359 ZEND_END_ARG_INFO()
360 
361 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
362 	ZEND_ARG_INFO(0, sourcestring)
363 	ZEND_ARG_INFO(0, encoding)
364 ZEND_END_ARG_INFO()
365 
366 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
367 	ZEND_ARG_INFO(0, str)
368 	ZEND_ARG_INFO(0, encoding_list)
369 	ZEND_ARG_INFO(0, strict)
370 ZEND_END_ARG_INFO()
371 
372 ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
373 ZEND_END_ARG_INFO()
374 
375 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
376 	ZEND_ARG_INFO(0, encoding)
377 ZEND_END_ARG_INFO()
378 
379 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
380 	ZEND_ARG_INFO(0, str)
381 	ZEND_ARG_INFO(0, charset)
382 	ZEND_ARG_INFO(0, transfer)
383 	ZEND_ARG_INFO(0, linefeed)
384 	ZEND_ARG_INFO(0, indent)
385 ZEND_END_ARG_INFO()
386 
387 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
388 	ZEND_ARG_INFO(0, string)
389 ZEND_END_ARG_INFO()
390 
391 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
392 	ZEND_ARG_INFO(0, str)
393 	ZEND_ARG_INFO(0, option)
394 	ZEND_ARG_INFO(0, encoding)
395 ZEND_END_ARG_INFO()
396 
397 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 0, 0, 3)
398 	ZEND_ARG_INFO(0, to)
399 	ZEND_ARG_INFO(0, from)
400 	ZEND_ARG_VARIADIC_INFO(1, vars)
401 ZEND_END_ARG_INFO()
402 
403 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
404 	ZEND_ARG_INFO(0, string)
405 	ZEND_ARG_INFO(0, convmap)
406 	ZEND_ARG_INFO(0, encoding)
407 	ZEND_ARG_INFO(0, is_hex)
408 ZEND_END_ARG_INFO()
409 
410 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
411 	ZEND_ARG_INFO(0, string)
412 	ZEND_ARG_INFO(0, convmap)
413 	ZEND_ARG_INFO(0, encoding)
414 	ZEND_ARG_INFO(0, is_hex)
415 ZEND_END_ARG_INFO()
416 
417 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
418 	ZEND_ARG_INFO(0, to)
419 	ZEND_ARG_INFO(0, subject)
420 	ZEND_ARG_INFO(0, message)
421 	ZEND_ARG_INFO(0, additional_headers)
422 	ZEND_ARG_INFO(0, additional_parameters)
423 ZEND_END_ARG_INFO()
424 
425 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
426 	ZEND_ARG_INFO(0, type)
427 ZEND_END_ARG_INFO()
428 
429 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
430 	ZEND_ARG_INFO(0, var)
431 	ZEND_ARG_INFO(0, encoding)
432 ZEND_END_ARG_INFO()
433 
434 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_scrub, 0, 0, 1)
435 	ZEND_ARG_INFO(0, str)
436 	ZEND_ARG_INFO(0, encoding)
437 ZEND_END_ARG_INFO()
438 
439 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ord, 0, 0, 1)
440 	ZEND_ARG_INFO(0, str)
441 	ZEND_ARG_INFO(0, encoding)
442 ZEND_END_ARG_INFO()
443 
444 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_chr, 0, 0, 1)
445 	ZEND_ARG_INFO(0, cp)
446 	ZEND_ARG_INFO(0, encoding)
447 ZEND_END_ARG_INFO()
448 
449 #if HAVE_MBREGEX
450 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
451 	ZEND_ARG_INFO(0, encoding)
452 ZEND_END_ARG_INFO()
453 
454 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
455 	ZEND_ARG_INFO(0, pattern)
456 	ZEND_ARG_INFO(0, string)
457 	ZEND_ARG_INFO(1, registers)
458 ZEND_END_ARG_INFO()
459 
460 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
461 	ZEND_ARG_INFO(0, pattern)
462 	ZEND_ARG_INFO(0, string)
463 	ZEND_ARG_INFO(1, registers)
464 ZEND_END_ARG_INFO()
465 
466 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
467 	ZEND_ARG_INFO(0, pattern)
468 	ZEND_ARG_INFO(0, replacement)
469 	ZEND_ARG_INFO(0, string)
470 	ZEND_ARG_INFO(0, option)
471 ZEND_END_ARG_INFO()
472 
473 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
474 	ZEND_ARG_INFO(0, pattern)
475 	ZEND_ARG_INFO(0, replacement)
476 	ZEND_ARG_INFO(0, string)
477 	ZEND_ARG_INFO(0, option)
478 ZEND_END_ARG_INFO()
479 
480 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3)
481 	ZEND_ARG_INFO(0, pattern)
482 	ZEND_ARG_INFO(0, callback)
483 	ZEND_ARG_INFO(0, string)
484 	ZEND_ARG_INFO(0, option)
485 ZEND_END_ARG_INFO()
486 
487 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
488 	ZEND_ARG_INFO(0, pattern)
489 	ZEND_ARG_INFO(0, string)
490 	ZEND_ARG_INFO(0, limit)
491 ZEND_END_ARG_INFO()
492 
493 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
494 	ZEND_ARG_INFO(0, pattern)
495 	ZEND_ARG_INFO(0, string)
496 	ZEND_ARG_INFO(0, option)
497 ZEND_END_ARG_INFO()
498 
499 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
500 	ZEND_ARG_INFO(0, pattern)
501 	ZEND_ARG_INFO(0, option)
502 ZEND_END_ARG_INFO()
503 
504 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
505 	ZEND_ARG_INFO(0, pattern)
506 	ZEND_ARG_INFO(0, option)
507 ZEND_END_ARG_INFO()
508 
509 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
510 	ZEND_ARG_INFO(0, pattern)
511 	ZEND_ARG_INFO(0, option)
512 ZEND_END_ARG_INFO()
513 
514 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
515 	ZEND_ARG_INFO(0, string)
516 	ZEND_ARG_INFO(0, pattern)
517 	ZEND_ARG_INFO(0, option)
518 ZEND_END_ARG_INFO()
519 
520 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
521 ZEND_END_ARG_INFO()
522 
523 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
524 ZEND_END_ARG_INFO()
525 
526 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
527 	ZEND_ARG_INFO(0, position)
528 ZEND_END_ARG_INFO()
529 
530 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
531 	ZEND_ARG_INFO(0, options)
532 ZEND_END_ARG_INFO()
533 #endif /* HAVE_MBREGEX */
534 /* }}} */
535 
536 /* {{{ zend_function_entry mbstring_functions[] */
537 static const zend_function_entry mbstring_functions[] = {
538 	PHP_FE(mb_convert_case,			arginfo_mb_convert_case)
539 	PHP_FE(mb_strtoupper,			arginfo_mb_strtoupper)
540 	PHP_FE(mb_strtolower,			arginfo_mb_strtolower)
541 	PHP_FE(mb_language,				arginfo_mb_language)
542 	PHP_FE(mb_internal_encoding,	arginfo_mb_internal_encoding)
543 	PHP_FE(mb_http_input,			arginfo_mb_http_input)
544 	PHP_FE(mb_http_output,			arginfo_mb_http_output)
545 	PHP_FE(mb_detect_order,			arginfo_mb_detect_order)
546 	PHP_FE(mb_substitute_character,	arginfo_mb_substitute_character)
547 	PHP_FE(mb_parse_str,			arginfo_mb_parse_str)
548 	PHP_FE(mb_output_handler,		arginfo_mb_output_handler)
549 	PHP_FE(mb_preferred_mime_name,	arginfo_mb_preferred_mime_name)
550 	PHP_FE(mb_str_split,			arginfo_mb_str_split)
551 	PHP_FE(mb_strlen,				arginfo_mb_strlen)
552 	PHP_FE(mb_strpos,				arginfo_mb_strpos)
553 	PHP_FE(mb_strrpos,				arginfo_mb_strrpos)
554 	PHP_FE(mb_stripos,				arginfo_mb_stripos)
555 	PHP_FE(mb_strripos,				arginfo_mb_strripos)
556 	PHP_FE(mb_strstr,				arginfo_mb_strstr)
557 	PHP_FE(mb_strrchr,				arginfo_mb_strrchr)
558 	PHP_FE(mb_stristr,				arginfo_mb_stristr)
559 	PHP_FE(mb_strrichr,				arginfo_mb_strrichr)
560 	PHP_FE(mb_substr_count,			arginfo_mb_substr_count)
561 	PHP_FE(mb_substr,				arginfo_mb_substr)
562 	PHP_FE(mb_strcut,				arginfo_mb_strcut)
563 	PHP_FE(mb_strwidth,				arginfo_mb_strwidth)
564 	PHP_FE(mb_strimwidth,			arginfo_mb_strimwidth)
565 	PHP_FE(mb_convert_encoding,		arginfo_mb_convert_encoding)
566 	PHP_FE(mb_detect_encoding,		arginfo_mb_detect_encoding)
567 	PHP_FE(mb_list_encodings,		arginfo_mb_list_encodings)
568 	PHP_FE(mb_encoding_aliases,		arginfo_mb_encoding_aliases)
569 	PHP_FE(mb_convert_kana,			arginfo_mb_convert_kana)
570 	PHP_FE(mb_encode_mimeheader,	arginfo_mb_encode_mimeheader)
571 	PHP_FE(mb_decode_mimeheader,	arginfo_mb_decode_mimeheader)
572 	PHP_FE(mb_convert_variables,	arginfo_mb_convert_variables)
573 	PHP_FE(mb_encode_numericentity,	arginfo_mb_encode_numericentity)
574 	PHP_FE(mb_decode_numericentity,	arginfo_mb_decode_numericentity)
575 	PHP_FE(mb_send_mail,			arginfo_mb_send_mail)
576 	PHP_FE(mb_get_info,				arginfo_mb_get_info)
577 	PHP_FE(mb_check_encoding,		arginfo_mb_check_encoding)
578 	PHP_FE(mb_ord,					arginfo_mb_ord)
579 	PHP_FE(mb_chr,					arginfo_mb_chr)
580 	PHP_FE(mb_scrub,				arginfo_mb_scrub)
581 #if HAVE_MBREGEX
582 	PHP_MBREGEX_FUNCTION_ENTRIES
583 #endif
584 	PHP_FE_END
585 };
586 /* }}} */
587 
588 /* {{{ zend_module_entry mbstring_module_entry */
589 zend_module_entry mbstring_module_entry = {
590 	STANDARD_MODULE_HEADER,
591 	"mbstring",
592 	mbstring_functions,
593 	PHP_MINIT(mbstring),
594 	PHP_MSHUTDOWN(mbstring),
595 	PHP_RINIT(mbstring),
596 	PHP_RSHUTDOWN(mbstring),
597 	PHP_MINFO(mbstring),
598 	PHP_MBSTRING_VERSION,
599 	PHP_MODULE_GLOBALS(mbstring),
600 	PHP_GINIT(mbstring),
601 	PHP_GSHUTDOWN(mbstring),
602 	NULL,
603 	STANDARD_MODULE_PROPERTIES_EX
604 };
605 /* }}} */
606 
607 /* {{{ static sapi_post_entry php_post_entries[] */
608 static const sapi_post_entry php_post_entries[] = {
609 	{ DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data,	php_std_post_handler },
610 	{ MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
611 	{ NULL, 0, NULL, NULL }
612 };
613 /* }}} */
614 
615 #ifdef COMPILE_DL_MBSTRING
616 #ifdef ZTS
617 ZEND_TSRMLS_CACHE_DEFINE()
618 #endif
ZEND_GET_MODULE(mbstring)619 ZEND_GET_MODULE(mbstring)
620 #endif
621 
622 /* {{{ allocators */
623 static void *_php_mb_allocators_malloc(size_t sz)
624 {
625 	return emalloc(sz);
626 }
627 
_php_mb_allocators_realloc(void * ptr,size_t sz)628 static void *_php_mb_allocators_realloc(void *ptr, size_t sz)
629 {
630 	return erealloc(ptr, sz);
631 }
632 
_php_mb_allocators_calloc(size_t nelems,size_t szelem)633 static void *_php_mb_allocators_calloc(size_t nelems, size_t szelem)
634 {
635 	return ecalloc(nelems, szelem);
636 }
637 
_php_mb_allocators_free(void * ptr)638 static void _php_mb_allocators_free(void *ptr)
639 {
640 	efree(ptr);
641 }
642 
_php_mb_allocators_pmalloc(size_t sz)643 static void *_php_mb_allocators_pmalloc(size_t sz)
644 {
645 	return pemalloc(sz, 1);
646 }
647 
_php_mb_allocators_prealloc(void * ptr,size_t sz)648 static void *_php_mb_allocators_prealloc(void *ptr, size_t sz)
649 {
650 	return perealloc(ptr, sz, 1);
651 }
652 
_php_mb_allocators_pfree(void * ptr)653 static void _php_mb_allocators_pfree(void *ptr)
654 {
655 	pefree(ptr, 1);
656 }
657 
658 static const mbfl_allocators _php_mb_allocators = {
659 	_php_mb_allocators_malloc,
660 	_php_mb_allocators_realloc,
661 	_php_mb_allocators_calloc,
662 	_php_mb_allocators_free,
663 	_php_mb_allocators_pmalloc,
664 	_php_mb_allocators_prealloc,
665 	_php_mb_allocators_pfree
666 };
667 /* }}} */
668 
669 /* {{{ static sapi_post_entry mbstr_post_entries[] */
670 static const sapi_post_entry mbstr_post_entries[] = {
671 	{ DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
672 	{ MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
673 	{ NULL, 0, NULL, NULL }
674 };
675 /* }}} */
676 
php_mb_get_encoding(zend_string * encoding_name)677 static const mbfl_encoding *php_mb_get_encoding(zend_string *encoding_name) {
678 	if (encoding_name) {
679 		const mbfl_encoding *encoding;
680 		zend_string *last_encoding_name = MBSTRG(last_used_encoding_name);
681 		if (last_encoding_name && (last_encoding_name == encoding_name
682 				|| !strcasecmp(ZSTR_VAL(encoding_name), ZSTR_VAL(last_encoding_name)))) {
683 			return MBSTRG(last_used_encoding);
684 		}
685 
686 		encoding = mbfl_name2encoding(ZSTR_VAL(encoding_name));
687 		if (!encoding) {
688 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", ZSTR_VAL(encoding_name));
689 			return NULL;
690 		}
691 
692 		if (last_encoding_name) {
693 			zend_string_release(last_encoding_name);
694 		}
695 		MBSTRG(last_used_encoding_name) = zend_string_copy(encoding_name);
696 		MBSTRG(last_used_encoding) = encoding;
697 		return encoding;
698 	} else {
699 		return MBSTRG(current_internal_encoding);
700 	}
701 }
702 
703 /* {{{ static int php_mb_parse_encoding_list()
704  *  Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
705  *  Even if any illegal encoding is detected the result may contain a list
706  *  of parsed encodings.
707  */
708 static int
php_mb_parse_encoding_list(const char * value,size_t value_length,const mbfl_encoding *** return_list,size_t * return_size,int persistent)709 php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
710 {
711 	int bauto, ret = SUCCESS;
712 	size_t n, size;
713 	char *p, *p1, *p2, *endp, *tmpstr;
714 	const mbfl_encoding **entry, **list;
715 
716 	list = NULL;
717 	if (value == NULL || value_length == 0) {
718 		if (return_list) {
719 			*return_list = NULL;
720 		}
721 		if (return_size) {
722 			*return_size = 0;
723 		}
724 		return FAILURE;
725 	} else {
726 		/* copy the value string for work */
727 		if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
728 			tmpstr = (char *)estrndup(value+1, value_length-2);
729 			value_length -= 2;
730 		}
731 		else
732 			tmpstr = (char *)estrndup(value, value_length);
733 		/* count the number of listed encoding names */
734 		endp = tmpstr + value_length;
735 		n = 1;
736 		p1 = tmpstr;
737 		while ((p2 = (char*)php_memnstr(p1, ",", 1, endp)) != NULL) {
738 			p1 = p2 + 1;
739 			n++;
740 		}
741 		size = n + MBSTRG(default_detect_order_list_size);
742 		/* make list */
743 		list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
744 		entry = list;
745 		n = 0;
746 		bauto = 0;
747 		p1 = tmpstr;
748 		do {
749 			p2 = p = (char*)php_memnstr(p1, ",", 1, endp);
750 			if (p == NULL) {
751 				p = endp;
752 			}
753 			*p = '\0';
754 			/* trim spaces */
755 			while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
756 				p1++;
757 			}
758 			p--;
759 			while (p > p1 && (*p == ' ' || *p == '\t')) {
760 				*p = '\0';
761 				p--;
762 			}
763 			/* convert to the encoding number and check encoding */
764 			if (strcasecmp(p1, "auto") == 0) {
765 				if (!bauto) {
766 					const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
767 					const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
768 					size_t i;
769 					bauto = 1;
770 					for (i = 0; i < identify_list_size; i++) {
771 						*entry++ = mbfl_no2encoding(*src++);
772 						n++;
773 					}
774 				}
775 			} else {
776 				const mbfl_encoding *encoding = mbfl_name2encoding(p1);
777 				if (encoding) {
778 					*entry++ = encoding;
779 					n++;
780 				} else {
781 					ret = FAILURE;
782 				}
783 			}
784 			p1 = p2 + 1;
785 		} while (n < size && p2 != NULL);
786 		if (n > 0) {
787 			if (return_list) {
788 				*return_list = list;
789 			} else {
790 				pefree(list, persistent);
791 			}
792 		} else {
793 			pefree(list, persistent);
794 			if (return_list) {
795 				*return_list = NULL;
796 			}
797 			ret = FAILURE;
798 		}
799 		if (return_size) {
800 			*return_size = n;
801 		}
802 		efree(tmpstr);
803 	}
804 
805 	return ret;
806 }
807 /* }}} */
808 
809 /* {{{ static int php_mb_parse_encoding_array()
810  *  Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
811  *  Even if any illegal encoding is detected the result may contain a list
812  *  of parsed encodings.
813  */
814 static int
php_mb_parse_encoding_array(zval * array,const mbfl_encoding *** return_list,size_t * return_size,int persistent)815 php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
816 {
817 	zval *hash_entry;
818 	HashTable *target_hash;
819 	int i, n, bauto, ret = SUCCESS;
820 	const mbfl_encoding **list, **entry;
821 	size_t size;
822 
823 	list = NULL;
824 	if (Z_TYPE_P(array) == IS_ARRAY) {
825 		target_hash = Z_ARRVAL_P(array);
826 		i = zend_hash_num_elements(target_hash);
827 		size = i + MBSTRG(default_detect_order_list_size);
828 		list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
829 		entry = list;
830 		bauto = 0;
831 		n = 0;
832 		ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
833 			zend_string *encoding_str = zval_try_get_string(hash_entry);
834 			if (UNEXPECTED(!encoding_str)) {
835 				ret = FAILURE;
836 				break;
837 			}
838 
839 			if (strcasecmp(ZSTR_VAL(encoding_str), "auto") == 0) {
840 				if (!bauto) {
841 					const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
842 					const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
843 					size_t j;
844 
845 					bauto = 1;
846 					for (j = 0; j < identify_list_size; j++) {
847 						*entry++ = mbfl_no2encoding(*src++);
848 						n++;
849 					}
850 				}
851 			} else {
852 				const mbfl_encoding *encoding = mbfl_name2encoding(ZSTR_VAL(encoding_str));
853 				if (encoding) {
854 					*entry++ = encoding;
855 					n++;
856 				} else {
857 					ret = FAILURE;
858 				}
859 			}
860 			i--;
861 			zend_string_release(encoding_str);
862 		} ZEND_HASH_FOREACH_END();
863 		if (n > 0) {
864 			if (return_list) {
865 				*return_list = list;
866 			} else {
867 				pefree(list, persistent);
868 			}
869 		} else {
870 			pefree(list, persistent);
871 			if (return_list) {
872 				*return_list = NULL;
873 			}
874 			ret = FAILURE;
875 		}
876 		if (return_size) {
877 			*return_size = n;
878 		}
879 	}
880 
881 	return ret;
882 }
883 /* }}} */
884 
885 /* {{{ zend_multibyte interface */
php_mb_zend_encoding_fetcher(const char * encoding_name)886 static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name)
887 {
888 	return (const zend_encoding*)mbfl_name2encoding(encoding_name);
889 }
890 
php_mb_zend_encoding_name_getter(const zend_encoding * encoding)891 static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
892 {
893 	return ((const mbfl_encoding *)encoding)->name;
894 }
895 
php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding * _encoding)896 static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
897 {
898 	const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
899 	if (encoding->flag & MBFL_ENCTYPE_SBCS) {
900 		return 1;
901 	}
902 	if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
903 		return 1;
904 	}
905 	return 0;
906 }
907 
php_mb_zend_encoding_detector(const unsigned char * arg_string,size_t arg_length,const zend_encoding ** list,size_t list_size)908 static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size)
909 {
910 	mbfl_string string;
911 
912 	if (!list) {
913 		list = (const zend_encoding **)MBSTRG(current_detect_order_list);
914 		list_size = MBSTRG(current_detect_order_list_size);
915 	}
916 
917 	mbfl_string_init(&string);
918 	string.no_language = MBSTRG(language);
919 	string.val = (unsigned char *)arg_string;
920 	string.len = arg_length;
921 	return (const zend_encoding *) mbfl_identify_encoding(&string, (const mbfl_encoding **)list, list_size, 0);
922 }
923 
php_mb_zend_encoding_converter(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length,const zend_encoding * encoding_to,const zend_encoding * encoding_from)924 static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
925 {
926 	mbfl_string string, result;
927 	mbfl_buffer_converter *convd;
928 	int status;
929 	size_t loc;
930 
931 	/* new encoding */
932 	/* initialize string */
933 	string.encoding = (const mbfl_encoding*)encoding_from;
934 	string.no_language = MBSTRG(language);
935 	string.val = (unsigned char*)from;
936 	string.len = from_length;
937 
938 	/* initialize converter */
939 	convd = mbfl_buffer_converter_new((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
940 	if (convd == NULL) {
941 		return (size_t) -1;
942 	}
943 
944 	mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
945 	mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
946 
947 	/* do it */
948 	status = mbfl_buffer_converter_feed2(convd, &string, &loc);
949 	if (status) {
950 		mbfl_buffer_converter_delete(convd);
951 		return (size_t)-1;
952 	}
953 
954 	mbfl_buffer_converter_flush(convd);
955 	mbfl_string_init(&result);
956 	if (!mbfl_buffer_converter_result(convd, &result)) {
957 		mbfl_buffer_converter_delete(convd);
958 		return (size_t)-1;
959 	}
960 
961 	*to = result.val;
962 	*to_length = result.len;
963 
964 	mbfl_buffer_converter_delete(convd);
965 
966 	return loc;
967 }
968 
php_mb_zend_encoding_list_parser(const char * encoding_list,size_t encoding_list_len,const zend_encoding *** return_list,size_t * return_size,int persistent)969 static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent)
970 {
971 	return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent);
972 }
973 
php_mb_zend_internal_encoding_getter(void)974 static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
975 {
976 	return (const zend_encoding *)MBSTRG(internal_encoding);
977 }
978 
php_mb_zend_internal_encoding_setter(const zend_encoding * encoding)979 static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding)
980 {
981 	MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
982 	return SUCCESS;
983 }
984 
985 static zend_multibyte_functions php_mb_zend_multibyte_functions = {
986 	"mbstring",
987 	php_mb_zend_encoding_fetcher,
988 	php_mb_zend_encoding_name_getter,
989 	php_mb_zend_encoding_lexer_compatibility_checker,
990 	php_mb_zend_encoding_detector,
991 	php_mb_zend_encoding_converter,
992 	php_mb_zend_encoding_list_parser,
993 	php_mb_zend_internal_encoding_getter,
994 	php_mb_zend_internal_encoding_setter
995 };
996 /* }}} */
997 
998 static void *_php_mb_compile_regex(const char *pattern);
999 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
1000 static void _php_mb_free_regex(void *opaque);
1001 
1002 #if HAVE_MBREGEX
1003 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern)1004 static void *_php_mb_compile_regex(const char *pattern)
1005 {
1006 	php_mb_regex_t *retval;
1007 	OnigErrorInfo err_info;
1008 	int err_code;
1009 
1010 	if ((err_code = onig_new(&retval,
1011 			(const OnigUChar *)pattern,
1012 			(const OnigUChar *)pattern + strlen(pattern),
1013 			ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
1014 			ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
1015 		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1016 		onig_error_code_to_str(err_str, err_code, err_info);
1017 		php_error_docref(NULL, E_WARNING, "%s: %s", pattern, err_str);
1018 		retval = NULL;
1019 	}
1020 	return retval;
1021 }
1022 /* }}} */
1023 
1024 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1025 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1026 {
1027 	OnigMatchParam *mp = onig_new_match_param();
1028 	int err;
1029 	onig_initialize_match_param(mp);
1030 	if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_stack_limit))) {
1031 		onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int)MBSTRG(regex_stack_limit));
1032 	}
1033 	if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_retry_limit))) {
1034 		onig_set_retry_limit_in_match_of_match_param(mp, (unsigned int)MBSTRG(regex_retry_limit));
1035 	}
1036 	/* search */
1037 	err = onig_search_with_param((php_mb_regex_t *)opaque, (const OnigUChar *)str,
1038 		(const OnigUChar*)str + str_len, (const OnigUChar *)str,
1039 		(const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE, mp);
1040 	onig_free_match_param(mp);
1041 	return err >= 0;
1042 }
1043 /* }}} */
1044 
1045 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1046 static void _php_mb_free_regex(void *opaque)
1047 {
1048 	onig_free((php_mb_regex_t *)opaque);
1049 }
1050 /* }}} */
1051 #else
1052 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern)1053 static void *_php_mb_compile_regex(const char *pattern)
1054 {
1055 	pcre2_code *retval;
1056 	PCRE2_SIZE err_offset;
1057 	int errnum;
1058 
1059 	if (!(retval = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED,
1060 			PCRE2_CASELESS, &errnum, &err_offset, php_pcre_cctx()))) {
1061 		PCRE2_UCHAR err_str[128];
1062 		pcre2_get_error_message(errnum, err_str, sizeof(err_str));
1063 		php_error_docref(NULL, E_WARNING, "%s (offset=%zu): %s", pattern, err_offset, err_str);
1064 	}
1065 	return retval;
1066 }
1067 /* }}} */
1068 
1069 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1070 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1071 {
1072 	int res;
1073 
1074 	pcre2_match_data *match_data = php_pcre_create_match_data(0, opaque);
1075 	if (NULL == match_data) {
1076 		pcre2_code_free(opaque);
1077 		php_error_docref(NULL, E_WARNING, "Cannot allocate match data");
1078 		return FAILURE;
1079 	}
1080 	res = pcre2_match(opaque, (PCRE2_SPTR)str, str_len, 0, 0, match_data, php_pcre_mctx()) >= 0;
1081 	php_pcre_free_match_data(match_data);
1082 
1083 	return res;
1084 }
1085 /* }}} */
1086 
1087 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1088 static void _php_mb_free_regex(void *opaque)
1089 {
1090 	pcre2_code_free(opaque);
1091 }
1092 /* }}} */
1093 #endif
1094 
1095 /* {{{ php_mb_nls_get_default_detect_order_list */
php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,enum mbfl_no_encoding ** plist,size_t * plist_size)1096 static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
1097 {
1098 	size_t i;
1099 
1100 	*plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1101 	*plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1102 
1103 	for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
1104 		if (php_mb_default_identify_list[i].lang == lang) {
1105 			*plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
1106 			*plist_size = php_mb_default_identify_list[i].list_size;
1107 			return 1;
1108 		}
1109 	}
1110 	return 0;
1111 }
1112 /* }}} */
1113 
php_mb_rfc1867_substring_conf(const zend_encoding * encoding,char * start,size_t len,char quote)1114 static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, size_t len, char quote)
1115 {
1116 	char *result = emalloc(len + 2);
1117 	char *resp = result;
1118 	size_t i;
1119 
1120 	for (i = 0; i < len && start[i] != quote; ++i) {
1121 		if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
1122 			*resp++ = start[++i];
1123 		} else {
1124 			size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
1125 
1126 			while (j-- > 0 && i < len) {
1127 				*resp++ = start[i++];
1128 			}
1129 			--i;
1130 		}
1131 	}
1132 
1133 	*resp = '\0';
1134 	return result;
1135 }
1136 
php_mb_rfc1867_getword(const zend_encoding * encoding,char ** line,char stop)1137 static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop) /* {{{ */
1138 {
1139 	char *pos = *line, quote;
1140 	char *res;
1141 
1142 	while (*pos && *pos != stop) {
1143 		if ((quote = *pos) == '"' || quote == '\'') {
1144 			++pos;
1145 			while (*pos && *pos != quote) {
1146 				if (*pos == '\\' && pos[1] && pos[1] == quote) {
1147 					pos += 2;
1148 				} else {
1149 					++pos;
1150 				}
1151 			}
1152 			if (*pos) {
1153 				++pos;
1154 			}
1155 		} else {
1156 			pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1157 
1158 		}
1159 	}
1160 	if (*pos == '\0') {
1161 		res = estrdup(*line);
1162 		*line += strlen(*line);
1163 		return res;
1164 	}
1165 
1166 	res = estrndup(*line, pos - *line);
1167 
1168 	while (*pos == stop) {
1169 		pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1170 	}
1171 
1172 	*line = pos;
1173 	return res;
1174 }
1175 /* }}} */
1176 
php_mb_rfc1867_getword_conf(const zend_encoding * encoding,char * str)1177 static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str) /* {{{ */
1178 {
1179 	while (*str && isspace(*(unsigned char *)str)) {
1180 		++str;
1181 	}
1182 
1183 	if (!*str) {
1184 		return estrdup("");
1185 	}
1186 
1187 	if (*str == '"' || *str == '\'') {
1188 		char quote = *str;
1189 
1190 		str++;
1191 		return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote);
1192 	} else {
1193 		char *strend = str;
1194 
1195 		while (*strend && !isspace(*(unsigned char *)strend)) {
1196 			++strend;
1197 		}
1198 		return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0);
1199 	}
1200 }
1201 /* }}} */
1202 
php_mb_rfc1867_basename(const zend_encoding * encoding,char * filename)1203 static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename) /* {{{ */
1204 {
1205 	char *s, *s2;
1206 	const size_t filename_len = strlen(filename);
1207 
1208 	/* The \ check should technically be needed for win32 systems only where
1209 	 * it is a valid path separator. However, IE in all it's wisdom always sends
1210 	 * the full path of the file on the user's filesystem, which means that unless
1211 	 * the user does basename() they get a bogus file name. Until IE's user base drops
1212 	 * to nill or problem is fixed this code must remain enabled for all systems. */
1213 	s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
1214 	s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
1215 
1216 	if (s && s2) {
1217 		if (s > s2) {
1218 			return ++s;
1219 		} else {
1220 			return ++s2;
1221 		}
1222 	} else if (s) {
1223 		return ++s;
1224 	} else if (s2) {
1225 		return ++s2;
1226 	} else {
1227 		return filename;
1228 	}
1229 }
1230 /* }}} */
1231 
1232 /* {{{ php.ini directive handler */
1233 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
PHP_INI_MH(OnUpdate_mbstring_language)1234 static PHP_INI_MH(OnUpdate_mbstring_language)
1235 {
1236 	enum mbfl_no_language no_language;
1237 
1238 	no_language = mbfl_name2no_language(ZSTR_VAL(new_value));
1239 	if (no_language == mbfl_no_language_invalid) {
1240 		MBSTRG(language) = mbfl_no_language_neutral;
1241 		return FAILURE;
1242 	}
1243 	MBSTRG(language) = no_language;
1244 	php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
1245 	return SUCCESS;
1246 }
1247 /* }}} */
1248 
1249 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
PHP_INI_MH(OnUpdate_mbstring_detect_order)1250 static PHP_INI_MH(OnUpdate_mbstring_detect_order)
1251 {
1252 	const mbfl_encoding **list;
1253 	size_t size;
1254 
1255 	if (!new_value) {
1256 		if (MBSTRG(detect_order_list)) {
1257 			pefree(MBSTRG(detect_order_list), 1);
1258 		}
1259 		MBSTRG(detect_order_list) = NULL;
1260 		MBSTRG(detect_order_list_size) = 0;
1261 		return SUCCESS;
1262 	}
1263 
1264 	if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) {
1265 		return FAILURE;
1266 	}
1267 
1268 	if (MBSTRG(detect_order_list)) {
1269 		pefree(MBSTRG(detect_order_list), 1);
1270 	}
1271 	MBSTRG(detect_order_list) = list;
1272 	MBSTRG(detect_order_list_size) = size;
1273 	return SUCCESS;
1274 }
1275 /* }}} */
1276 
_php_mb_ini_mbstring_http_input_set(const char * new_value,size_t new_value_length)1277 static int _php_mb_ini_mbstring_http_input_set(const char *new_value, size_t new_value_length) {
1278 	const mbfl_encoding **list;
1279 	size_t size;
1280 	if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1)) {
1281 		return FAILURE;
1282 	}
1283 	if (MBSTRG(http_input_list)) {
1284 		pefree(MBSTRG(http_input_list), 1);
1285 	}
1286 	MBSTRG(http_input_list) = list;
1287 	MBSTRG(http_input_list_size) = size;
1288 	return SUCCESS;
1289 }
1290 
1291 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
PHP_INI_MH(OnUpdate_mbstring_http_input)1292 static PHP_INI_MH(OnUpdate_mbstring_http_input)
1293 {
1294 	if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1295 		php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_input is deprecated");
1296 	}
1297 
1298 	if (!new_value || !ZSTR_VAL(new_value)) {
1299 		const char *encoding = php_get_input_encoding();
1300 		MBSTRG(http_input_set) = 0;
1301 		_php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding));
1302 		return SUCCESS;
1303 	}
1304 
1305 	MBSTRG(http_input_set) = 1;
1306 	return _php_mb_ini_mbstring_http_input_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
1307 }
1308 /* }}} */
1309 
_php_mb_ini_mbstring_http_output_set(const char * new_value)1310 static int _php_mb_ini_mbstring_http_output_set(const char *new_value) {
1311 	const mbfl_encoding *encoding = mbfl_name2encoding(new_value);
1312 	if (!encoding) {
1313 		return FAILURE;
1314 	}
1315 
1316 	MBSTRG(http_output_encoding) = encoding;
1317 	MBSTRG(current_http_output_encoding) = encoding;
1318 	return SUCCESS;
1319 }
1320 
1321 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
PHP_INI_MH(OnUpdate_mbstring_http_output)1322 static PHP_INI_MH(OnUpdate_mbstring_http_output)
1323 {
1324 	if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1325 		php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_output is deprecated");
1326 	}
1327 
1328 	if (new_value == NULL || ZSTR_LEN(new_value) == 0) {
1329 		MBSTRG(http_output_set) = 0;
1330 		_php_mb_ini_mbstring_http_output_set(php_get_output_encoding());
1331 		return SUCCESS;
1332 	}
1333 
1334 	MBSTRG(http_output_set) = 1;
1335 	return _php_mb_ini_mbstring_http_output_set(ZSTR_VAL(new_value));
1336 }
1337 /* }}} */
1338 
1339 /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
_php_mb_ini_mbstring_internal_encoding_set(const char * new_value,size_t new_value_length)1340 static int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, size_t new_value_length)
1341 {
1342 	const mbfl_encoding *encoding;
1343 
1344 	if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) {
1345 		/* falls back to UTF-8 if an unknown encoding name is given */
1346 		encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
1347 	}
1348 	MBSTRG(internal_encoding) = encoding;
1349 	MBSTRG(current_internal_encoding) = encoding;
1350 #if HAVE_MBREGEX
1351 	{
1352 		const char *enc_name = new_value;
1353 		if (FAILURE == php_mb_regex_set_default_mbctype(enc_name)) {
1354 			/* falls back to UTF-8 if an unknown encoding name is given */
1355 			enc_name = "UTF-8";
1356 			php_mb_regex_set_default_mbctype(enc_name);
1357 		}
1358 		php_mb_regex_set_mbctype(new_value);
1359 	}
1360 #endif
1361 	return SUCCESS;
1362 }
1363 /* }}} */
1364 
1365 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
PHP_INI_MH(OnUpdate_mbstring_internal_encoding)1366 static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1367 {
1368 	if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1369 		php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
1370 	}
1371 
1372 	if (OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage) == FAILURE) {
1373 		return FAILURE;
1374 	}
1375 
1376 	if (new_value && ZSTR_LEN(new_value)) {
1377 		MBSTRG(internal_encoding_set) = 1;
1378 		return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
1379 	} else {
1380 		const char *encoding = php_get_internal_encoding();
1381 		MBSTRG(internal_encoding_set) = 0;
1382 		return _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding));
1383 	}
1384 }
1385 /* }}} */
1386 
1387 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
PHP_INI_MH(OnUpdate_mbstring_substitute_character)1388 static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1389 {
1390 	int c;
1391 	char *endptr = NULL;
1392 
1393 	if (new_value != NULL) {
1394 		if (strcasecmp("none", ZSTR_VAL(new_value)) == 0) {
1395 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1396 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1397 		} else if (strcasecmp("long", ZSTR_VAL(new_value)) == 0) {
1398 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1399 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1400 		} else if (strcasecmp("entity", ZSTR_VAL(new_value)) == 0) {
1401 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1402 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1403 		} else {
1404 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1405 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1406 			if (ZSTR_LEN(new_value) > 0) {
1407 				c = strtol(ZSTR_VAL(new_value), &endptr, 0);
1408 				if (*endptr == '\0') {
1409 					MBSTRG(filter_illegal_substchar) = c;
1410 					MBSTRG(current_filter_illegal_substchar) = c;
1411 				}
1412 			}
1413 		}
1414 	} else {
1415 		MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1416 		MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1417 		MBSTRG(filter_illegal_substchar) = 0x3f;	/* '?' */
1418 		MBSTRG(current_filter_illegal_substchar) = 0x3f;	/* '?' */
1419 	}
1420 
1421 	return SUCCESS;
1422 }
1423 /* }}} */
1424 
1425 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
PHP_INI_MH(OnUpdate_mbstring_encoding_translation)1426 static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1427 {
1428 	if (new_value == NULL) {
1429 		return FAILURE;
1430 	}
1431 
1432 	OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
1433 
1434 	if (MBSTRG(encoding_translation)) {
1435 		sapi_unregister_post_entry(php_post_entries);
1436 		sapi_register_post_entries(mbstr_post_entries);
1437 	} else {
1438 		sapi_unregister_post_entry(mbstr_post_entries);
1439 		sapi_register_post_entries(php_post_entries);
1440 	}
1441 
1442 	return SUCCESS;
1443 }
1444 /* }}} */
1445 
1446 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)1447 static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1448 {
1449 	zend_string *tmp;
1450 	void *re = NULL;
1451 
1452 	if (!new_value) {
1453 		new_value = entry->orig_value;
1454 	}
1455 	tmp = php_trim(new_value, NULL, 0, 3);
1456 
1457 	if (ZSTR_LEN(tmp) > 0) {
1458 		if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) {
1459 			zend_string_release_ex(tmp, 0);
1460 			return FAILURE;
1461 		}
1462 	}
1463 
1464 	if (MBSTRG(http_output_conv_mimetypes)) {
1465 		_php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1466 	}
1467 
1468 	MBSTRG(http_output_conv_mimetypes) = re;
1469 
1470 	zend_string_release_ex(tmp, 0);
1471 	return SUCCESS;
1472 }
1473 /* }}} */
1474 /* }}} */
1475 
1476 /* {{{ php.ini directive registration */
1477 PHP_INI_BEGIN()
1478 	PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1479 	PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1480 	PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
1481 	PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
1482 	STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
1483 	PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1484 	STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1485 	PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1486 
1487 	STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1488 		PHP_INI_SYSTEM | PHP_INI_PERDIR,
1489 		OnUpdate_mbstring_encoding_translation,
1490 		encoding_translation, zend_mbstring_globals, mbstring_globals)
1491 	PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1492 		"^(text/|application/xhtml\\+xml)",
1493 		PHP_INI_ALL,
1494 		OnUpdate_mbstring_http_output_conv_mimetypes)
1495 
1496 	STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1497 		PHP_INI_ALL,
1498 		OnUpdateBool,
1499 		strict_detection, zend_mbstring_globals, mbstring_globals)
1500 #if HAVE_MBREGEX
1501 	STD_PHP_INI_ENTRY("mbstring.regex_stack_limit", "100000",PHP_INI_ALL, OnUpdateLong, regex_stack_limit, zend_mbstring_globals, mbstring_globals)
1502 	STD_PHP_INI_ENTRY("mbstring.regex_retry_limit", "1000000",PHP_INI_ALL, OnUpdateLong, regex_retry_limit, zend_mbstring_globals, mbstring_globals)
1503 #endif
PHP_INI_END()1504 PHP_INI_END()
1505 /* }}} */
1506 
1507 static void mbstring_internal_encoding_changed_hook() {
1508 	/* One of the internal_encoding / input_encoding / output_encoding ini settings changed. */
1509 	if (!MBSTRG(internal_encoding_set)) {
1510 		const char *encoding = php_get_internal_encoding();
1511 		_php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding));
1512 	}
1513 
1514 	if (!MBSTRG(http_output_set)) {
1515 		const char *encoding = php_get_output_encoding();
1516 		_php_mb_ini_mbstring_http_output_set(encoding);
1517 	}
1518 
1519 	if (!MBSTRG(http_input_set)) {
1520 		const char *encoding = php_get_input_encoding();
1521 		_php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding));
1522 	}
1523 }
1524 
1525 /* {{{ module global initialize handler */
PHP_GINIT_FUNCTION(mbstring)1526 static PHP_GINIT_FUNCTION(mbstring)
1527 {
1528 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1529 ZEND_TSRMLS_CACHE_UPDATE();
1530 #endif
1531 
1532 	mbstring_globals->language = mbfl_no_language_uni;
1533 	mbstring_globals->internal_encoding = NULL;
1534 	mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1535 	mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
1536 	mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
1537 	mbstring_globals->http_input_identify = NULL;
1538 	mbstring_globals->http_input_identify_get = NULL;
1539 	mbstring_globals->http_input_identify_post = NULL;
1540 	mbstring_globals->http_input_identify_cookie = NULL;
1541 	mbstring_globals->http_input_identify_string = NULL;
1542 	mbstring_globals->http_input_list = NULL;
1543 	mbstring_globals->http_input_list_size = 0;
1544 	mbstring_globals->detect_order_list = NULL;
1545 	mbstring_globals->detect_order_list_size = 0;
1546 	mbstring_globals->current_detect_order_list = NULL;
1547 	mbstring_globals->current_detect_order_list_size = 0;
1548 	mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1549 	mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1550 	mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1551 	mbstring_globals->filter_illegal_substchar = 0x3f;	/* '?' */
1552 	mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1553 	mbstring_globals->current_filter_illegal_substchar = 0x3f;	/* '?' */
1554 	mbstring_globals->illegalchars = 0;
1555 	mbstring_globals->func_overload = 0;
1556 	mbstring_globals->encoding_translation = 0;
1557 	mbstring_globals->strict_detection = 0;
1558 	mbstring_globals->outconv = NULL;
1559 	mbstring_globals->http_output_conv_mimetypes = NULL;
1560 #if HAVE_MBREGEX
1561 	mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc();
1562 #endif
1563 	mbstring_globals->last_used_encoding_name = NULL;
1564 	mbstring_globals->last_used_encoding = NULL;
1565 	mbstring_globals->internal_encoding_set = 0;
1566 	mbstring_globals->http_output_set = 0;
1567 	mbstring_globals->http_input_set = 0;
1568 }
1569 /* }}} */
1570 
1571 /* {{{ PHP_GSHUTDOWN_FUNCTION */
PHP_GSHUTDOWN_FUNCTION(mbstring)1572 static PHP_GSHUTDOWN_FUNCTION(mbstring)
1573 {
1574 	if (mbstring_globals->http_input_list) {
1575 		free(mbstring_globals->http_input_list);
1576 	}
1577 	if (mbstring_globals->detect_order_list) {
1578 		free(mbstring_globals->detect_order_list);
1579 	}
1580 	if (mbstring_globals->http_output_conv_mimetypes) {
1581 		_php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1582 	}
1583 #if HAVE_MBREGEX
1584 	php_mb_regex_globals_free(mbstring_globals->mb_regex_globals);
1585 #endif
1586 }
1587 /* }}} */
1588 
1589 /* {{{ PHP_MINIT_FUNCTION(mbstring) */
PHP_MINIT_FUNCTION(mbstring)1590 PHP_MINIT_FUNCTION(mbstring)
1591 {
1592 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1593 ZEND_TSRMLS_CACHE_UPDATE();
1594 #endif
1595 	__mbfl_allocators = (mbfl_allocators*)&_php_mb_allocators;
1596 
1597 	REGISTER_INI_ENTRIES();
1598 
1599 	/* We assume that we're the only user of the hook. */
1600 	ZEND_ASSERT(php_internal_encoding_changed == NULL);
1601 	php_internal_encoding_changed = mbstring_internal_encoding_changed_hook;
1602 	mbstring_internal_encoding_changed_hook();
1603 
1604 	/* This is a global handler. Should not be set in a per-request handler. */
1605 	sapi_register_treat_data(mbstr_treat_data);
1606 
1607 	/* Post handlers are stored in the thread-local context. */
1608 	if (MBSTRG(encoding_translation)) {
1609 		sapi_register_post_entries(mbstr_post_entries);
1610 	}
1611 
1612 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1613 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1614 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1615 
1616 	REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1617 	REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1618 	REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1619 	REGISTER_LONG_CONSTANT("MB_CASE_FOLD", PHP_UNICODE_CASE_FOLD, CONST_CS | CONST_PERSISTENT);
1620 	REGISTER_LONG_CONSTANT("MB_CASE_UPPER_SIMPLE", PHP_UNICODE_CASE_UPPER_SIMPLE, CONST_CS | CONST_PERSISTENT);
1621 	REGISTER_LONG_CONSTANT("MB_CASE_LOWER_SIMPLE", PHP_UNICODE_CASE_LOWER_SIMPLE, CONST_CS | CONST_PERSISTENT);
1622 	REGISTER_LONG_CONSTANT("MB_CASE_TITLE_SIMPLE", PHP_UNICODE_CASE_TITLE_SIMPLE, CONST_CS | CONST_PERSISTENT);
1623 	REGISTER_LONG_CONSTANT("MB_CASE_FOLD_SIMPLE", PHP_UNICODE_CASE_FOLD_SIMPLE, CONST_CS | CONST_PERSISTENT);
1624 
1625 #if HAVE_MBREGEX
1626 	PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1627 #endif
1628 
1629 	if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions)) {
1630 		return FAILURE;
1631 	}
1632 
1633 	php_rfc1867_set_multibyte_callbacks(
1634 		php_mb_encoding_translation,
1635 		php_mb_gpc_get_detect_order,
1636 		php_mb_gpc_set_input_encoding,
1637 		php_mb_rfc1867_getword,
1638 		php_mb_rfc1867_getword_conf,
1639 		php_mb_rfc1867_basename);
1640 
1641 	/* override original function (deprecated). */
1642 	if (MBSTRG(func_overload)){
1643 		zend_function *func, *orig;
1644 		const struct mb_overload_def *p;
1645 		zend_string *str;
1646 
1647 		p = &(mb_ovld[0]);
1648 		while (p->type > 0) {
1649 			if ((MBSTRG(func_overload) & p->type) == p->type &&
1650 				!zend_hash_str_exists(CG(function_table), p->save_func, strlen(p->save_func))
1651 			) {
1652 				func = zend_hash_str_find_ptr(CG(function_table), p->ovld_func, strlen(p->ovld_func));
1653 
1654 				if ((orig = zend_hash_str_find_ptr(CG(function_table), p->orig_func, strlen(p->orig_func))) == NULL) {
1655 					php_error_docref("ref.mbstring", E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1656 					return FAILURE;
1657 				} else {
1658 					ZEND_ASSERT(orig->type == ZEND_INTERNAL_FUNCTION);
1659 					str = zend_string_init_interned(p->save_func, strlen(p->save_func), 1);
1660 					zend_hash_add_mem(CG(function_table), str, orig, sizeof(zend_internal_function));
1661 					zend_string_release_ex(str, 1);
1662 					function_add_ref(orig);
1663 
1664 					str = zend_string_init_interned(p->orig_func, strlen(p->orig_func), 1);
1665 					zend_hash_update_mem(CG(function_table), str, func, sizeof(zend_internal_function));
1666 					zend_string_release_ex(str, 1);
1667 					function_add_ref(func);
1668 				}
1669 			}
1670 			p++;
1671 		}
1672 	}
1673 
1674 	return SUCCESS;
1675 }
1676 /* }}} */
1677 
1678 /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
PHP_MSHUTDOWN_FUNCTION(mbstring)1679 PHP_MSHUTDOWN_FUNCTION(mbstring)
1680 {
1681 	/*  clear overloaded function. */
1682 	if (MBSTRG(func_overload)){
1683 		const struct mb_overload_def *p;
1684 		zend_function *orig;
1685 
1686 		p = &(mb_ovld[0]);
1687 		while (p->type > 0) {
1688 			if ((MBSTRG(func_overload) & p->type) == p->type &&
1689 				(orig = zend_hash_str_find_ptr(CG(function_table), p->save_func, strlen(p->save_func)))) {
1690 
1691 				zend_hash_str_update_mem(CG(function_table), p->orig_func, strlen(p->orig_func), orig, sizeof(zend_internal_function));
1692 				function_add_ref(orig);
1693 				zend_hash_str_del(CG(function_table), p->save_func, strlen(p->save_func));
1694 			}
1695 			p++;
1696 		}
1697 	}
1698 
1699 	UNREGISTER_INI_ENTRIES();
1700 
1701 	zend_multibyte_restore_functions();
1702 
1703 #if HAVE_MBREGEX
1704 	PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1705 #endif
1706 
1707 	php_internal_encoding_changed = NULL;
1708 
1709 	return SUCCESS;
1710 }
1711 /* }}} */
1712 
1713 /* {{{ PHP_RINIT_FUNCTION(mbstring) */
PHP_RINIT_FUNCTION(mbstring)1714 PHP_RINIT_FUNCTION(mbstring)
1715 {
1716 	MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1717 	MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1718 	MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1719 	MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1720 
1721 	MBSTRG(illegalchars) = 0;
1722 
1723 	php_mb_populate_current_detect_order_list();
1724 
1725 	/* override original function. */
1726 	if (MBSTRG(func_overload)){
1727 		zend_error(E_DEPRECATED, "The mbstring.func_overload directive is deprecated");
1728 
1729 		CG(compiler_options) |= ZEND_COMPILE_NO_BUILTIN_STRLEN;
1730 	}
1731 #if HAVE_MBREGEX
1732 	PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1733 #endif
1734 	zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding));
1735 
1736 	return SUCCESS;
1737 }
1738 /* }}} */
1739 
1740 /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
PHP_RSHUTDOWN_FUNCTION(mbstring)1741 PHP_RSHUTDOWN_FUNCTION(mbstring)
1742 {
1743 	if (MBSTRG(current_detect_order_list) != NULL) {
1744 		efree(MBSTRG(current_detect_order_list));
1745 		MBSTRG(current_detect_order_list) = NULL;
1746 		MBSTRG(current_detect_order_list_size) = 0;
1747 	}
1748 	if (MBSTRG(outconv) != NULL) {
1749 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1750 		mbfl_buffer_converter_delete(MBSTRG(outconv));
1751 		MBSTRG(outconv) = NULL;
1752 	}
1753 
1754 	/* clear http input identification. */
1755 	MBSTRG(http_input_identify) = NULL;
1756 	MBSTRG(http_input_identify_post) = NULL;
1757 	MBSTRG(http_input_identify_get) = NULL;
1758 	MBSTRG(http_input_identify_cookie) = NULL;
1759 	MBSTRG(http_input_identify_string) = NULL;
1760 
1761 	if (MBSTRG(last_used_encoding_name)) {
1762 		zend_string_release(MBSTRG(last_used_encoding_name));
1763 		MBSTRG(last_used_encoding_name) = NULL;
1764 	}
1765 
1766 	MBSTRG(internal_encoding_set) = 0;
1767 	MBSTRG(http_output_set) = 0;
1768 	MBSTRG(http_input_set) = 0;
1769 
1770 #if HAVE_MBREGEX
1771 	PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1772 #endif
1773 
1774 	return SUCCESS;
1775 }
1776 /* }}} */
1777 
1778 /* {{{ PHP_MINFO_FUNCTION(mbstring) */
PHP_MINFO_FUNCTION(mbstring)1779 PHP_MINFO_FUNCTION(mbstring)
1780 {
1781 	php_info_print_table_start();
1782 	php_info_print_table_row(2, "Multibyte Support", "enabled");
1783 	php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1784 	php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1785 	{
1786 		char tmp[256];
1787 		snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1788 		php_info_print_table_row(2, "libmbfl version", tmp);
1789 	}
1790 	php_info_print_table_end();
1791 
1792 	php_info_print_table_start();
1793 	php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1794 	php_info_print_table_end();
1795 
1796 #if HAVE_MBREGEX
1797 	PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1798 #endif
1799 
1800 	DISPLAY_INI_ENTRIES();
1801 }
1802 /* }}} */
1803 
1804 /* {{{ proto string mb_language([string language])
1805    Sets the current language or Returns the current language as a string */
PHP_FUNCTION(mb_language)1806 PHP_FUNCTION(mb_language)
1807 {
1808 	zend_string *name = NULL;
1809 
1810 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|S", &name) == FAILURE) {
1811 		return;
1812 	}
1813 	if (name == NULL) {
1814 		RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)));
1815 	} else {
1816 		zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0);
1817 		if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1818 			php_error_docref(NULL, E_WARNING, "Unknown language \"%s\"", ZSTR_VAL(name));
1819 			RETVAL_FALSE;
1820 		} else {
1821 			RETVAL_TRUE;
1822 		}
1823 		zend_string_release_ex(ini_name, 0);
1824 	}
1825 }
1826 /* }}} */
1827 
1828 /* {{{ proto string mb_internal_encoding([string encoding])
1829    Sets the current internal encoding or Returns the current internal encoding as a string */
PHP_FUNCTION(mb_internal_encoding)1830 PHP_FUNCTION(mb_internal_encoding)
1831 {
1832 	const char *name = NULL;
1833 	size_t name_len;
1834 	const mbfl_encoding *encoding;
1835 
1836 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
1837 		return;
1838 	}
1839 	if (name == NULL) {
1840 		name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
1841 		if (name != NULL) {
1842 			RETURN_STRING(name);
1843 		} else {
1844 			RETURN_FALSE;
1845 		}
1846 	} else {
1847 		encoding = mbfl_name2encoding(name);
1848 		if (!encoding) {
1849 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
1850 			RETURN_FALSE;
1851 		} else {
1852 			MBSTRG(current_internal_encoding) = encoding;
1853 			MBSTRG(internal_encoding_set) = 1;
1854 			RETURN_TRUE;
1855 		}
1856 	}
1857 }
1858 /* }}} */
1859 
1860 /* {{{ proto mixed mb_http_input([string type])
1861    Returns the input encoding */
PHP_FUNCTION(mb_http_input)1862 PHP_FUNCTION(mb_http_input)
1863 {
1864 	char *typ = NULL;
1865 	size_t typ_len;
1866 	int retname;
1867 	char *list, *temp;
1868 	const mbfl_encoding *result = NULL;
1869 
1870 	retname = 1;
1871  	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
1872 		return;
1873  	}
1874  	if (typ == NULL) {
1875  		result = MBSTRG(http_input_identify);
1876  	} else {
1877  		switch (*typ) {
1878 		case 'G':
1879 		case 'g':
1880 			result = MBSTRG(http_input_identify_get);
1881 			break;
1882 		case 'P':
1883 		case 'p':
1884 			result = MBSTRG(http_input_identify_post);
1885 			break;
1886 		case 'C':
1887 		case 'c':
1888 			result = MBSTRG(http_input_identify_cookie);
1889 			break;
1890 		case 'S':
1891 		case 's':
1892 			result = MBSTRG(http_input_identify_string);
1893 			break;
1894 		case 'I':
1895 		case 'i':
1896 			{
1897 				const mbfl_encoding **entry = MBSTRG(http_input_list);
1898 				const size_t n = MBSTRG(http_input_list_size);
1899 				size_t i;
1900 				array_init(return_value);
1901 				for (i = 0; i < n; i++) {
1902 					add_next_index_string(return_value, (*entry)->name);
1903 					entry++;
1904 				}
1905 				retname = 0;
1906 			}
1907 			break;
1908 		case 'L':
1909 		case 'l':
1910 			{
1911 				const mbfl_encoding **entry = MBSTRG(http_input_list);
1912 				const size_t n = MBSTRG(http_input_list_size);
1913 				size_t i;
1914 				list = NULL;
1915 				for (i = 0; i < n; i++) {
1916 					if (list) {
1917 						temp = list;
1918 						spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
1919 						efree(temp);
1920 						if (!list) {
1921 							break;
1922 						}
1923 					} else {
1924 						list = estrdup((*entry)->name);
1925 					}
1926 					entry++;
1927 				}
1928 			}
1929 			if (!list) {
1930 				RETURN_FALSE;
1931 			}
1932 			RETVAL_STRING(list);
1933 			efree(list);
1934 			retname = 0;
1935 			break;
1936 		default:
1937 			result = MBSTRG(http_input_identify);
1938 			break;
1939 		}
1940 	}
1941 
1942 	if (retname) {
1943 		if (result) {
1944 			RETVAL_STRING(result->name);
1945 		} else {
1946 			RETVAL_FALSE;
1947 		}
1948 	}
1949 }
1950 /* }}} */
1951 
1952 /* {{{ proto string mb_http_output([string encoding])
1953    Sets the current output_encoding or returns the current output_encoding as a string */
PHP_FUNCTION(mb_http_output)1954 PHP_FUNCTION(mb_http_output)
1955 {
1956 	const char *name = NULL;
1957 	size_t name_len;
1958 	const mbfl_encoding *encoding;
1959 
1960 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
1961 		return;
1962 	}
1963 
1964 	if (name == NULL) {
1965 		name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
1966 		if (name != NULL) {
1967 			RETURN_STRING(name);
1968 		} else {
1969 			RETURN_FALSE;
1970 		}
1971 	} else {
1972 		encoding = mbfl_name2encoding(name);
1973 		if (!encoding) {
1974 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
1975 			RETURN_FALSE;
1976 		} else {
1977 			MBSTRG(http_output_set) = 1;
1978 			MBSTRG(current_http_output_encoding) = encoding;
1979 			RETURN_TRUE;
1980 		}
1981 	}
1982 }
1983 /* }}} */
1984 
1985 /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1986    Sets the current detect_order or Return the current detect_order as a array */
PHP_FUNCTION(mb_detect_order)1987 PHP_FUNCTION(mb_detect_order)
1988 {
1989 	zval *arg1 = NULL;
1990 
1991 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
1992 		return;
1993 	}
1994 
1995 	if (!arg1) {
1996 		size_t i;
1997 		size_t n = MBSTRG(current_detect_order_list_size);
1998 		const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1999 		array_init(return_value);
2000 		for (i = 0; i < n; i++) {
2001 			add_next_index_string(return_value, (*entry)->name);
2002 			entry++;
2003 		}
2004 	} else {
2005 		const mbfl_encoding **list = NULL;
2006 		size_t size = 0;
2007 		switch (Z_TYPE_P(arg1)) {
2008 			case IS_ARRAY:
2009 				if (FAILURE == php_mb_parse_encoding_array(arg1, &list, &size, 0)) {
2010 					if (list) {
2011 						efree(list);
2012 					}
2013 					RETURN_FALSE;
2014 				}
2015 				break;
2016 			default:
2017 				if (!try_convert_to_string(arg1)) {
2018 					return;
2019 				}
2020 				if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(arg1), Z_STRLEN_P(arg1), &list, &size, 0)) {
2021 					if (list) {
2022 						efree(list);
2023 					}
2024 					RETURN_FALSE;
2025 				}
2026 				break;
2027 		}
2028 
2029 		if (list == NULL) {
2030 			RETURN_FALSE;
2031 		}
2032 
2033 		if (MBSTRG(current_detect_order_list)) {
2034 			efree(MBSTRG(current_detect_order_list));
2035 		}
2036 		MBSTRG(current_detect_order_list) = list;
2037 		MBSTRG(current_detect_order_list_size) = size;
2038 		RETURN_TRUE;
2039 	}
2040 }
2041 /* }}} */
2042 
php_mb_check_code_point(zend_long cp)2043 static inline int php_mb_check_code_point(zend_long cp)
2044 {
2045 	if (cp <= 0 || cp >= 0x110000) {
2046 		/* Out of Unicode range */
2047 		return 0;
2048 	}
2049 
2050 	if (cp >= 0xd800 && cp <= 0xdfff) {
2051 		/* Surrogate code-point. These are never valid on their own and we only allow a single
2052 		 * substitute character. */
2053 		return 0;
2054 	}
2055 
2056 	/* As the we do not know the target encoding of the conversion operation that is going to
2057 	 * use the substitution character, we cannot check whether the codepoint is actually mapped
2058 	 * in the given encoding at this point. Thus we have to accept everything. */
2059 	return 1;
2060 }
2061 
2062 /* {{{ proto mixed mb_substitute_character([mixed substchar])
2063    Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)2064 PHP_FUNCTION(mb_substitute_character)
2065 {
2066 	zval *arg1 = NULL;
2067 
2068 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
2069 		return;
2070 	}
2071 
2072 	if (!arg1) {
2073 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
2074 			RETURN_STRING("none");
2075 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
2076 			RETURN_STRING("long");
2077 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
2078 			RETURN_STRING("entity");
2079 		} else {
2080 			RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
2081 		}
2082 	} else {
2083 		RETVAL_TRUE;
2084 
2085 		switch (Z_TYPE_P(arg1)) {
2086 			case IS_STRING:
2087 				if (strncasecmp("none", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2088 					MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
2089 				} else if (strncasecmp("long", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2090 					MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
2091 				} else if (strncasecmp("entity", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2092 					MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
2093 				} else {
2094 					convert_to_long_ex(arg1);
2095 
2096 					if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
2097 						MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2098 						MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2099 					} else {
2100 						php_error_docref(NULL, E_WARNING, "Unknown character");
2101 						RETURN_FALSE;
2102 					}
2103 				}
2104 				break;
2105 			default:
2106 				convert_to_long_ex(arg1);
2107 				if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
2108 					MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2109 					MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2110 				} else {
2111 					php_error_docref(NULL, E_WARNING, "Unknown character");
2112 					RETURN_FALSE;
2113 				}
2114 				break;
2115 		}
2116 	}
2117 }
2118 /* }}} */
2119 
2120 /* {{{ proto string mb_preferred_mime_name(string encoding)
2121    Return the preferred MIME name (charset) as a string */
PHP_FUNCTION(mb_preferred_mime_name)2122 PHP_FUNCTION(mb_preferred_mime_name)
2123 {
2124 	enum mbfl_no_encoding no_encoding;
2125 	char *name = NULL;
2126 	size_t name_len;
2127 
2128 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
2129 		return;
2130 	} else {
2131 		no_encoding = mbfl_name2no_encoding(name);
2132 		if (no_encoding == mbfl_no_encoding_invalid) {
2133 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
2134 			RETVAL_FALSE;
2135 		} else {
2136 			const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
2137 			if (preferred_name == NULL || *preferred_name == '\0') {
2138 				php_error_docref(NULL, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
2139 				RETVAL_FALSE;
2140 			} else {
2141 				RETVAL_STRING((char *)preferred_name);
2142 			}
2143 		}
2144 	}
2145 }
2146 /* }}} */
2147 
2148 #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
2149 #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
2150 
2151 /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
2152    Parses GET/POST/COOKIE data and sets global variables */
PHP_FUNCTION(mb_parse_str)2153 PHP_FUNCTION(mb_parse_str)
2154 {
2155 	zval *track_vars_array = NULL;
2156 	char *encstr = NULL;
2157 	size_t encstr_len;
2158 	php_mb_encoding_handler_info_t info;
2159 	const mbfl_encoding *detected;
2160 
2161 	track_vars_array = NULL;
2162 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
2163 		return;
2164 	}
2165 
2166 	if (track_vars_array != NULL) {
2167 		track_vars_array = zend_try_array_init(track_vars_array);
2168 		if (!track_vars_array) {
2169 			return;
2170 		}
2171 	}
2172 
2173 	encstr = estrndup(encstr, encstr_len);
2174 
2175 	info.data_type              = PARSE_STRING;
2176 	info.separator              = PG(arg_separator).input;
2177 	info.report_errors          = 1;
2178 	info.to_encoding            = MBSTRG(current_internal_encoding);
2179 	info.to_language            = MBSTRG(language);
2180 	info.from_encodings         = MBSTRG(http_input_list);
2181 	info.num_from_encodings     = MBSTRG(http_input_list_size);
2182 	info.from_language          = MBSTRG(language);
2183 
2184 	if (track_vars_array != NULL) {
2185 		detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr);
2186 	} else {
2187 		zval tmp;
2188 		zend_array *symbol_table;
2189 		if (zend_forbid_dynamic_call("mb_parse_str() with a single argument") == FAILURE) {
2190 			efree(encstr);
2191 			return;
2192 		}
2193 
2194 		php_error_docref(NULL, E_DEPRECATED, "Calling mb_parse_str() without the result argument is deprecated");
2195 
2196 		symbol_table = zend_rebuild_symbol_table();
2197 		ZVAL_ARR(&tmp, symbol_table);
2198 		detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr);
2199 	}
2200 
2201 	MBSTRG(http_input_identify) = detected;
2202 
2203 	RETVAL_BOOL(detected);
2204 
2205 	if (encstr != NULL) efree(encstr);
2206 }
2207 /* }}} */
2208 
2209 /* {{{ proto string mb_output_handler(string contents, int status)
2210    Returns string in output buffer converted to the http_output encoding */
PHP_FUNCTION(mb_output_handler)2211 PHP_FUNCTION(mb_output_handler)
2212 {
2213 	char *arg_string;
2214 	size_t arg_string_len;
2215 	zend_long arg_status;
2216 	mbfl_string string, result;
2217 	const char *charset;
2218 	char *p;
2219 	const mbfl_encoding *encoding;
2220 	int last_feed;
2221 	size_t len;
2222 	unsigned char send_text_mimetype = 0;
2223 	char *s, *mimetype = NULL;
2224 
2225 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
2226 		return;
2227 	}
2228 
2229 	encoding = MBSTRG(current_http_output_encoding);
2230 
2231  	/* start phase only */
2232  	if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
2233  		/* delete the converter just in case. */
2234  		if (MBSTRG(outconv)) {
2235 			MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2236  			mbfl_buffer_converter_delete(MBSTRG(outconv));
2237  			MBSTRG(outconv) = NULL;
2238   		}
2239 		if (encoding == &mbfl_encoding_pass) {
2240 			RETURN_STRINGL(arg_string, arg_string_len);
2241 		}
2242 
2243 		/* analyze mime type */
2244 		if (SG(sapi_headers).mimetype &&
2245 			_php_mb_match_regex(
2246 				MBSTRG(http_output_conv_mimetypes),
2247 				SG(sapi_headers).mimetype,
2248 				strlen(SG(sapi_headers).mimetype))) {
2249 			if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
2250 				mimetype = estrdup(SG(sapi_headers).mimetype);
2251 			} else {
2252 				mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
2253 			}
2254 			send_text_mimetype = 1;
2255 		} else if (SG(sapi_headers).send_default_content_type) {
2256 			mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
2257 		}
2258 
2259  		/* if content-type is not yet set, set it and activate the converter */
2260  		if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
2261 			charset = encoding->mime_name;
2262 			if (charset) {
2263 				len = spprintf( &p, 0, "Content-Type: %s; charset=%s",  mimetype, charset );
2264 				if (sapi_add_header(p, len, 0) != FAILURE) {
2265 					SG(sapi_headers).send_default_content_type = 0;
2266 				}
2267 			}
2268  			/* activate the converter */
2269  			MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
2270 			if (send_text_mimetype){
2271 				efree(mimetype);
2272 			}
2273  		}
2274   	}
2275 
2276  	/* just return if the converter is not activated. */
2277  	if (MBSTRG(outconv) == NULL) {
2278 		RETURN_STRINGL(arg_string, arg_string_len);
2279 	}
2280 
2281  	/* flag */
2282  	last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
2283  	/* mode */
2284  	mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
2285  	mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
2286 
2287  	/* feed the string */
2288  	mbfl_string_init(&string);
2289 	/* these are not needed. convd has encoding info.
2290 	string.no_language = MBSTRG(language);
2291 	string.encoding = MBSTRG(current_internal_encoding);
2292 	*/
2293  	string.val = (unsigned char *)arg_string;
2294  	string.len = arg_string_len;
2295  	mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2296  	if (last_feed) {
2297  		mbfl_buffer_converter_flush(MBSTRG(outconv));
2298 	}
2299  	/* get the converter output, and return it */
2300  	mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2301 	// TODO: avoid reallocation ???
2302  	RETVAL_STRINGL((char *)result.val, result.len);		/* the string is already strdup()'ed */
2303 	efree(result.val);
2304 
2305  	/* delete the converter if it is the last feed. */
2306  	if (last_feed) {
2307 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2308 		mbfl_buffer_converter_delete(MBSTRG(outconv));
2309 		MBSTRG(outconv) = NULL;
2310 	}
2311 }
2312 /* }}} */
2313 
2314 /* {{{ proto array mb_str_split(string str [, int split_length] [, string encoding])
2315  Convert a multibyte string to an array. If split_length is specified,
2316  break the string down into chunks each split_length characters long. */
2317 
2318 /* structure to pass split params to the callback */
2319 struct mbfl_split_params {
2320     zval *return_value; /* php function return value structure pointer */
2321     mbfl_string *result_string; /* string to store result chunk */
2322     size_t mb_chunk_length; /* actual chunk length in chars */
2323     size_t split_length; /* split length in chars */
2324     mbfl_convert_filter *next_filter; /* widechar to encoding converter */
2325 };
2326 
2327 /* callback function to fill split array */
mbfl_split_output(int c,void * data)2328 static int mbfl_split_output(int c, void *data)
2329 {
2330     struct mbfl_split_params *params = (struct mbfl_split_params *)data; /* cast passed data */
2331 
2332     (*params->next_filter->filter_function)(c, params->next_filter); /* decoder filter */
2333 
2334     if(params->split_length == ++params->mb_chunk_length) { /* if current chunk size reached defined chunk size or last char reached */
2335         mbfl_convert_filter_flush(params->next_filter);/* concatenate separate decoded chars to the solid string */
2336         mbfl_memory_device *device = (mbfl_memory_device *)params->next_filter->data; /* chars container */
2337         mbfl_string *chunk = params->result_string;
2338         mbfl_memory_device_result(device, chunk); /* make chunk */
2339         add_next_index_stringl(params->return_value, (const char *)chunk->val, chunk->len); /* add chunk to the array */
2340         efree(chunk->val);
2341         params->mb_chunk_length = 0; /* reset mb_chunk size */
2342     }
2343     return 0;
2344 }
2345 
PHP_FUNCTION(mb_str_split)2346 PHP_FUNCTION(mb_str_split)
2347 {
2348 	zend_string *str, *encoding = NULL;
2349 	size_t mb_len, chunks, chunk_len;
2350 	const char *p, *last; /* pointer for the string cursor and last string char */
2351 	mbfl_string string, result_string;
2352 	const mbfl_encoding *mbfl_encoding;
2353 	zend_long split_length = 1;
2354 
2355 	ZEND_PARSE_PARAMETERS_START(1, 3)
2356 		Z_PARAM_STR(str)
2357 		Z_PARAM_OPTIONAL
2358 		Z_PARAM_LONG(split_length)
2359 		Z_PARAM_STR(encoding)
2360 	ZEND_PARSE_PARAMETERS_END();
2361 
2362 	if (split_length <= 0) {
2363 		php_error_docref(NULL, E_WARNING, "The length of each segment must be greater than zero");
2364 		RETURN_FALSE;
2365 	}
2366 
2367 	/* fill mbfl_string structure */
2368 	string.val = (unsigned char *) ZSTR_VAL(str);
2369 	string.len = ZSTR_LEN(str);
2370 	string.no_language = MBSTRG(language);
2371 	string.encoding = php_mb_get_encoding(encoding);
2372 	if (!string.encoding) {
2373 		RETURN_FALSE;
2374 	}
2375 
2376 	p = ZSTR_VAL(str); /* string cursor pointer */
2377 	last = ZSTR_VAL(str) + ZSTR_LEN(str); /* last string char pointer */
2378 
2379 	mbfl_encoding = string.encoding;
2380 
2381 	/* first scenario: 1,2,4-bytes fixed width encodings (head part) */
2382 	if (mbfl_encoding->flag & MBFL_ENCTYPE_SBCS) { /* 1 byte */
2383 		mb_len = string.len;
2384 		chunk_len = (size_t)split_length; /* chunk length in bytes */
2385 	} else if (mbfl_encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { /* 2 bytes */
2386 		mb_len = string.len / 2;
2387 		chunk_len = split_length * 2;
2388 	} else if (mbfl_encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { /* 4 bytes */
2389 		mb_len = string.len / 4;
2390 		chunk_len = split_length * 4;
2391 	} else if (mbfl_encoding->mblen_table != NULL) {
2392 		/* second scenario: variable width encodings with length table */
2393 		char unsigned const *mbtab = mbfl_encoding->mblen_table;
2394 
2395 		/* assume that we have 1-bytes characters */
2396 		array_init_size(return_value, (string.len + split_length) / split_length); /* round up */
2397 
2398 		while (p < last) { /* split cycle work until the cursor has reached the last byte */
2399 			char const *chunk_p = p; /* chunk first byte pointer */
2400 			chunk_len = 0; /* chunk length in bytes */
2401 			zend_long char_count;
2402 
2403 			for (char_count = 0; char_count < split_length && p < last; ++char_count) {
2404 				char unsigned const m = mbtab[*(const unsigned char *)p]; /* single character length table */
2405 				chunk_len += m;
2406 				p += m;
2407 			}
2408 			if (p >= last) chunk_len -= p - last; /* check if chunk is in bounds */
2409 			add_next_index_stringl(return_value, chunk_p, chunk_len);
2410 		}
2411 		return;
2412 	} else {
2413 		/* third scenario: other multibyte encodings */
2414 		mbfl_convert_filter *filter, *decoder;
2415 
2416 		/* assume that we have 1-bytes characters */
2417 		array_init_size(return_value, (string.len + split_length) / split_length); /* round up */
2418 
2419 		/* decoder filter to decode wchar to encoding */
2420 		mbfl_memory_device device;
2421 		mbfl_memory_device_init(&device, split_length + 1, 0);
2422 
2423 		decoder = mbfl_convert_filter_new(
2424 				&mbfl_encoding_wchar,
2425 				string.encoding,
2426 				mbfl_memory_device_output,
2427 				NULL,
2428 				&device);
2429 		/* if something wrong with the decoded */
2430 		if (decoder == NULL) {
2431 			RETURN_FALSE;
2432 		}
2433 
2434 		/* wchar filter */
2435 		mbfl_string_init(&result_string); /* mbfl_string to store chunk in the callback */
2436 		struct mbfl_split_params params = { /* init callback function params structure */
2437 			.return_value = return_value,
2438 			.result_string = &result_string,
2439 			.mb_chunk_length = 0,
2440 			.split_length = (size_t)split_length,
2441 			.next_filter = decoder,
2442 		};
2443 
2444 		filter = mbfl_convert_filter_new(
2445 				string.encoding,
2446 				&mbfl_encoding_wchar,
2447 				mbfl_split_output,
2448 				NULL,
2449 				&params);
2450 		/* if something wrong with the filter */
2451 		if (filter == NULL){
2452 			mbfl_convert_filter_delete(decoder); /* this will free allocated memory for the decoded */
2453 			RETURN_FALSE;
2454 		}
2455 
2456 		while (p < last - 1) { /* cycle each byte except last with callback function */
2457 			(*filter->filter_function)(*p++, filter);
2458 		}
2459 		params.mb_chunk_length = split_length - 1; /* force to finish current chunk */
2460 		(*filter->filter_function)(*p++, filter); /*process last char */
2461 
2462 		mbfl_convert_filter_delete(decoder);
2463 		mbfl_convert_filter_delete(filter);
2464 		mbfl_memory_device_clear(&device);
2465 		return;
2466 	}
2467 
2468 	/* first scenario: 1,2,4-bytes fixed width encodings (tail part) */
2469 	chunks = (mb_len + split_length - 1) / split_length; /* (round up idiom) */
2470 	array_init_size(return_value, chunks);
2471 	if (chunks != 0) {
2472 		zend_long i;
2473 
2474 		for (i = 0; i < chunks - 1; p += chunk_len, ++i) {
2475 			add_next_index_stringl(return_value, p, chunk_len);
2476 		}
2477 		add_next_index_stringl(return_value, p, last - p);
2478 	}
2479 }
2480 /* }}} */
2481 
2482 /* {{{ proto int mb_strlen(string str [, string encoding])
2483    Get character numbers of a string */
PHP_FUNCTION(mb_strlen)2484 PHP_FUNCTION(mb_strlen)
2485 {
2486 	size_t n;
2487 	mbfl_string string;
2488 	char *str;
2489 	size_t str_len;
2490 	zend_string *enc_name = NULL;
2491 
2492 	ZEND_PARSE_PARAMETERS_START(1, 2)
2493 		Z_PARAM_STRING(str, str_len)
2494 		Z_PARAM_OPTIONAL
2495 		Z_PARAM_STR(enc_name)
2496 	ZEND_PARSE_PARAMETERS_END();
2497 
2498 	string.val = (unsigned char *) str;
2499 	string.len = str_len;
2500 	string.no_language = MBSTRG(language);
2501 	string.encoding = php_mb_get_encoding(enc_name);
2502 	if (!string.encoding) {
2503 		RETURN_FALSE;
2504 	}
2505 
2506 	n = mbfl_strlen(&string);
2507 	if (!mbfl_is_error(n)) {
2508 		RETVAL_LONG(n);
2509 	} else {
2510 		RETVAL_FALSE;
2511 	}
2512 }
2513 /* }}} */
2514 
2515 /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2516    Find position of first occurrence of a string within another */
PHP_FUNCTION(mb_strpos)2517 PHP_FUNCTION(mb_strpos)
2518 {
2519 	int reverse = 0;
2520 	zend_long offset = 0;
2521 	mbfl_string haystack, needle;
2522 	zend_string *enc_name = NULL;
2523 	size_t n;
2524 
2525 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name) == FAILURE) {
2526 		return;
2527 	}
2528 
2529 	haystack.no_language = needle.no_language = MBSTRG(language);
2530 	haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2531 	if (!haystack.encoding) {
2532 		RETURN_FALSE;
2533 	}
2534 
2535 	if (offset != 0) {
2536 		size_t slen = mbfl_strlen(&haystack);
2537 		if (offset < 0) {
2538 			offset += slen;
2539 		}
2540 		if (offset < 0 || offset > slen) {
2541 			php_error_docref(NULL, E_WARNING, "Offset not contained in string");
2542 			RETURN_FALSE;
2543 		}
2544 	}
2545 
2546 	if (needle.len == 0) {
2547 		php_error_docref(NULL, E_WARNING, "Empty delimiter");
2548 		RETURN_FALSE;
2549 	}
2550 
2551 	n = mbfl_strpos(&haystack, &needle, offset, reverse);
2552 	if (!mbfl_is_error(n)) {
2553 		RETVAL_LONG(n);
2554 	} else {
2555 		switch (-n) {
2556 		case 1:
2557 			break;
2558 		case 2:
2559 			php_error_docref(NULL, E_WARNING, "Needle has not positive length");
2560 			break;
2561 		case 4:
2562 			php_error_docref(NULL, E_WARNING, "Unknown encoding or conversion error");
2563 			break;
2564 		case 8:
2565 			php_error_docref(NULL, E_NOTICE, "Argument is empty");
2566 			break;
2567 		default:
2568 			php_error_docref(NULL, E_WARNING, "Unknown error in mb_strpos");
2569 			break;
2570 		}
2571 		RETVAL_FALSE;
2572 	}
2573 }
2574 /* }}} */
2575 
2576 /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2577    Find position of last occurrence of a string within another */
PHP_FUNCTION(mb_strrpos)2578 PHP_FUNCTION(mb_strrpos)
2579 {
2580 	mbfl_string haystack, needle;
2581 	zend_string *enc_name = NULL;
2582 	zval *zoffset = NULL;
2583 	zend_long offset = 0, n;
2584 
2585 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|zS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name) == FAILURE) {
2586 		return;
2587 	}
2588 
2589 	if (zoffset) {
2590 		if (Z_TYPE_P(zoffset) == IS_STRING) {
2591 			switch (Z_STRVAL_P(zoffset)[0]) {
2592 				case '0':
2593 				case '1':
2594 				case '2':
2595 				case '3':
2596 				case '4':
2597 				case '5':
2598 				case '6':
2599 				case '7':
2600 				case '8':
2601 				case '9':
2602 				case ' ':
2603 				case '-':
2604 				case '.':
2605 					convert_to_long_ex(zoffset);
2606 					offset = Z_LVAL_P(zoffset);
2607 					break;
2608 				default :
2609 					enc_name = Z_STR_P(zoffset);
2610 					php_error_docref(NULL, E_DEPRECATED,
2611 						"Passing the encoding as third parameter is deprecated. "
2612 						"Use an explicit zero offset");
2613 					break;
2614 			}
2615 		} else {
2616 			convert_to_long_ex(zoffset);
2617 			offset = Z_LVAL_P(zoffset);
2618 		}
2619 	}
2620 
2621 	haystack.no_language = needle.no_language = MBSTRG(language);
2622 	haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2623 	if (!haystack.encoding) {
2624 		RETURN_FALSE;
2625 	}
2626 
2627 	if (offset != 0) {
2628 		size_t haystack_char_len = mbfl_strlen(&haystack);
2629 		if ((offset > 0 && offset > haystack_char_len) ||
2630 			(offset < 0 && -offset > haystack_char_len)) {
2631 			php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
2632 			RETURN_FALSE;
2633 		}
2634 	}
2635 
2636 	n = mbfl_strpos(&haystack, &needle, offset, 1);
2637 	if (!mbfl_is_error(n)) {
2638 		RETVAL_LONG(n);
2639 	} else {
2640 		RETVAL_FALSE;
2641 	}
2642 }
2643 /* }}} */
2644 
2645 /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2646    Finds position of first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stripos)2647 PHP_FUNCTION(mb_stripos)
2648 {
2649 	size_t n = (size_t) -1;
2650 	zend_long offset = 0;
2651 	mbfl_string haystack, needle;
2652 	zend_string *from_encoding = NULL;
2653 
2654 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &from_encoding) == FAILURE) {
2655 		return;
2656 	}
2657 
2658 	if (needle.len == 0) {
2659 		php_error_docref(NULL, E_WARNING, "Empty delimiter");
2660 		RETURN_FALSE;
2661 	}
2662 
2663 	n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2664 
2665 	if (!mbfl_is_error(n)) {
2666 		RETVAL_LONG(n);
2667 	} else {
2668 		RETVAL_FALSE;
2669 	}
2670 }
2671 /* }}} */
2672 
2673 /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2674    Finds position of last occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_strripos)2675 PHP_FUNCTION(mb_strripos)
2676 {
2677 	size_t n = (size_t) -1;
2678 	zend_long offset = 0;
2679 	mbfl_string haystack, needle;
2680 	zend_string *from_encoding = NULL;
2681 
2682 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &from_encoding) == FAILURE) {
2683 		return;
2684 	}
2685 
2686 	n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2687 
2688 	if (!mbfl_is_error(n)) {
2689 		RETVAL_LONG(n);
2690 	} else {
2691 		RETVAL_FALSE;
2692 	}
2693 }
2694 /* }}} */
2695 
2696 /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2697    Finds first occurrence of a string within another */
PHP_FUNCTION(mb_strstr)2698 PHP_FUNCTION(mb_strstr)
2699 {
2700 	size_t n;
2701 	mbfl_string haystack, needle, result, *ret = NULL;
2702 	zend_string *enc_name = NULL;
2703 	zend_bool part = 0;
2704 
2705 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name) == FAILURE) {
2706 		return;
2707 	}
2708 
2709 	haystack.no_language = needle.no_language = MBSTRG(language);
2710 	haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2711 	if (!haystack.encoding) {
2712 		RETURN_FALSE;
2713 	}
2714 
2715 	if (needle.len == 0) {
2716 		php_error_docref(NULL, E_WARNING, "Empty delimiter");
2717 		RETURN_FALSE;
2718 	}
2719 
2720 	n = mbfl_strpos(&haystack, &needle, 0, 0);
2721 	if (!mbfl_is_error(n)) {
2722 		if (part) {
2723 			ret = mbfl_substr(&haystack, &result, 0, n);
2724 			if (ret != NULL) {
2725 				// TODO: avoid reallocation ???
2726 				RETVAL_STRINGL((char *)ret->val, ret->len);
2727 				efree(ret->val);
2728 			} else {
2729 				RETVAL_FALSE;
2730 			}
2731 		} else {
2732 			ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
2733 			if (ret != NULL) {
2734 				// TODO: avoid reallocation ???
2735 				RETVAL_STRINGL((char *)ret->val, ret->len);
2736 				efree(ret->val);
2737 			} else {
2738 				RETVAL_FALSE;
2739 			}
2740 		}
2741 	} else {
2742 		RETVAL_FALSE;
2743 	}
2744 }
2745 /* }}} */
2746 
2747 /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2748    Finds the last occurrence of a character in a string within another */
PHP_FUNCTION(mb_strrchr)2749 PHP_FUNCTION(mb_strrchr)
2750 {
2751 	size_t n;
2752 	mbfl_string haystack, needle, result, *ret = NULL;
2753 	zend_string *enc_name = NULL;
2754 	zend_bool part = 0;
2755 
2756 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name) == FAILURE) {
2757 		return;
2758 	}
2759 
2760 	haystack.no_language = needle.no_language = MBSTRG(language);
2761 	haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2762 	if (!haystack.encoding) {
2763 		RETURN_FALSE;
2764 	}
2765 
2766 	if (haystack.len == 0) {
2767 		RETURN_FALSE;
2768 	}
2769 	if (needle.len == 0) {
2770 		RETURN_FALSE;
2771 	}
2772 
2773 	n = mbfl_strpos(&haystack, &needle, 0, 1);
2774 	if (!mbfl_is_error(n)) {
2775 		if (part) {
2776 			ret = mbfl_substr(&haystack, &result, 0, n);
2777 			if (ret != NULL) {
2778 				// TODO: avoid reallocation ???
2779 				RETVAL_STRINGL((char *)ret->val, ret->len);
2780 				efree(ret->val);
2781 			} else {
2782 				RETVAL_FALSE;
2783 			}
2784 		} else {
2785 			ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
2786 			if (ret != NULL) {
2787 				// TODO: avoid reallocation ???
2788 				RETVAL_STRINGL((char *)ret->val, ret->len);
2789 				efree(ret->val);
2790 			} else {
2791 				RETVAL_FALSE;
2792 			}
2793 		}
2794 	} else {
2795 		RETVAL_FALSE;
2796 	}
2797 }
2798 /* }}} */
2799 
2800 /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2801    Finds first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stristr)2802 PHP_FUNCTION(mb_stristr)
2803 {
2804 	zend_bool part = 0;
2805 	size_t n;
2806 	mbfl_string haystack, needle, result, *ret = NULL;
2807 	zend_string *from_encoding = NULL;
2808 
2809 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding) == FAILURE) {
2810 		return;
2811 	}
2812 
2813 	haystack.no_language = needle.no_language = MBSTRG(language);
2814 	haystack.encoding = needle.encoding = php_mb_get_encoding(from_encoding);
2815 	if (!haystack.encoding) {
2816 		RETURN_FALSE;
2817 	}
2818 
2819 	if (!needle.len) {
2820 		php_error_docref(NULL, E_WARNING, "Empty delimiter");
2821 		RETURN_FALSE;
2822 	}
2823 
2824 	n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2825 	if (mbfl_is_error(n)) {
2826 		RETURN_FALSE;
2827 	}
2828 
2829 	if (part) {
2830 		ret = mbfl_substr(&haystack, &result, 0, n);
2831 		if (ret != NULL) {
2832 			// TODO: avoid reallocation ???
2833 			RETVAL_STRINGL((char *)ret->val, ret->len);
2834 			efree(ret->val);
2835 		} else {
2836 			RETVAL_FALSE;
2837 		}
2838 	} else {
2839 		ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
2840 		if (ret != NULL) {
2841 			// TODO: avoid reallocaton ???
2842 			RETVAL_STRINGL((char *)ret->val, ret->len);
2843 			efree(ret->val);
2844 		} else {
2845 			RETVAL_FALSE;
2846 		}
2847 	}
2848 }
2849 /* }}} */
2850 
2851 /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2852    Finds the last occurrence of a character in a string within another, case insensitive */
PHP_FUNCTION(mb_strrichr)2853 PHP_FUNCTION(mb_strrichr)
2854 {
2855 	zend_bool part = 0;
2856 	size_t n;
2857 	mbfl_string haystack, needle, result, *ret = NULL;
2858 	zend_string *from_encoding = NULL;
2859 
2860 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding) == FAILURE) {
2861 		return;
2862 	}
2863 
2864 	haystack.no_language = needle.no_language = MBSTRG(language);
2865 	haystack.encoding = needle.encoding = php_mb_get_encoding(from_encoding);
2866 	if (!haystack.encoding) {
2867 		RETURN_FALSE;
2868 	}
2869 
2870 	n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2871 	if (mbfl_is_error(n)) {
2872 		RETURN_FALSE;
2873 	}
2874 
2875 	if (part) {
2876 		ret = mbfl_substr(&haystack, &result, 0, n);
2877 		if (ret != NULL) {
2878 			// TODO: avoid reallocation ???
2879 			RETVAL_STRINGL((char *)ret->val, ret->len);
2880 			efree(ret->val);
2881 		} else {
2882 			RETVAL_FALSE;
2883 		}
2884 	} else {
2885 		ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
2886 		if (ret != NULL) {
2887 			// TODO: avoid reallocation ???
2888 			RETVAL_STRINGL((char *)ret->val, ret->len);
2889 			efree(ret->val);
2890 		} else {
2891 			RETVAL_FALSE;
2892 		}
2893 	}
2894 }
2895 /* }}} */
2896 
2897 /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2898    Count the number of substring occurrences */
PHP_FUNCTION(mb_substr_count)2899 PHP_FUNCTION(mb_substr_count)
2900 {
2901 	size_t n;
2902 	mbfl_string haystack, needle;
2903 	zend_string *enc_name = NULL;
2904 
2905 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|S", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name) == FAILURE) {
2906 		return;
2907 	}
2908 
2909 	haystack.no_language = needle.no_language = MBSTRG(language);
2910 	haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2911 	if (!haystack.encoding) {
2912 		RETURN_FALSE;
2913 	}
2914 
2915 	if (needle.len == 0) {
2916 		php_error_docref(NULL, E_WARNING, "Empty substring");
2917 		RETURN_FALSE;
2918 	}
2919 
2920 	n = mbfl_substr_count(&haystack, &needle);
2921 	if (!mbfl_is_error(n)) {
2922 		RETVAL_LONG(n);
2923 	} else {
2924 		RETVAL_FALSE;
2925 	}
2926 }
2927 /* }}} */
2928 
2929 /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2930    Returns part of a string */
PHP_FUNCTION(mb_substr)2931 PHP_FUNCTION(mb_substr)
2932 {
2933 	char *str;
2934 	zend_string *encoding = NULL;
2935 	zend_long from, len;
2936 	size_t mblen, real_from, real_len;
2937 	size_t str_len;
2938 	zend_bool len_is_null = 1;
2939 	mbfl_string string, result, *ret;
2940 
2941 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!S", &str, &str_len, &from, &len, &len_is_null, &encoding) == FAILURE) {
2942 		return;
2943 	}
2944 
2945 	string.no_language = MBSTRG(language);
2946 	string.encoding = php_mb_get_encoding(encoding);
2947 	if (!string.encoding) {
2948 		RETURN_FALSE;
2949 	}
2950 
2951 	string.val = (unsigned char *)str;
2952 	string.len = str_len;
2953 
2954 	/* measures length */
2955 	mblen = 0;
2956 	if (from < 0 || (!len_is_null && len < 0)) {
2957 		mblen = mbfl_strlen(&string);
2958 	}
2959 
2960 	/* if "from" position is negative, count start position from the end
2961 	 * of the string
2962 	 */
2963 	if (from >= 0) {
2964 		real_from = (size_t) from;
2965 	} else if (-from < mblen) {
2966 		real_from = mblen + from;
2967 	} else {
2968 		real_from = 0;
2969 	}
2970 
2971 	/* if "length" position is negative, set it to the length
2972 	 * needed to stop that many chars from the end of the string
2973 	 */
2974 	if (len_is_null) {
2975 		real_len = MBFL_SUBSTR_UNTIL_END;
2976 	} else if (len >= 0) {
2977 		real_len = (size_t) len;
2978 	} else if (real_from < mblen && -len < mblen - real_from) {
2979 		real_len = (mblen - real_from) + len;
2980 	} else {
2981 		real_len = 0;
2982 	}
2983 
2984 	if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2985 		&& (real_from > mbfl_strlen(&string))) {
2986 		RETURN_FALSE;
2987 	}
2988 
2989 	ret = mbfl_substr(&string, &result, real_from, real_len);
2990 	if (NULL == ret) {
2991 		RETURN_FALSE;
2992 	}
2993 
2994 	// TODO: avoid reallocation ???
2995 	RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2996 	efree(ret->val);
2997 }
2998 /* }}} */
2999 
3000 /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
3001    Returns part of a string */
PHP_FUNCTION(mb_strcut)3002 PHP_FUNCTION(mb_strcut)
3003 {
3004 	zend_string *encoding = NULL;
3005 	zend_long from, len;
3006 	zend_bool len_is_null = 1;
3007 	mbfl_string string, result, *ret;
3008 
3009 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!S", (char **)&string.val, &string.len, &from, &len, &len_is_null, &encoding) == FAILURE) {
3010 		return;
3011 	}
3012 
3013 	string.no_language = MBSTRG(language);
3014 	string.encoding = php_mb_get_encoding(encoding);
3015 	if (!string.encoding) {
3016 		RETURN_FALSE;
3017 	}
3018 
3019 	if (len_is_null) {
3020 		len = string.len;
3021 	}
3022 
3023 	/* if "from" position is negative, count start position from the end
3024 	 * of the string
3025 	 */
3026 	if (from < 0) {
3027 		from = string.len + from;
3028 		if (from < 0) {
3029 			from = 0;
3030 		}
3031 	}
3032 
3033 	/* if "length" position is negative, set it to the length
3034 	 * needed to stop that many chars from the end of the string
3035 	 */
3036 	if (len < 0) {
3037 		len = (string.len - from) + len;
3038 		if (len < 0) {
3039 			len = 0;
3040 		}
3041 	}
3042 
3043 	if (from > string.len) {
3044 		RETURN_FALSE;
3045 	}
3046 
3047 	ret = mbfl_strcut(&string, &result, from, len);
3048 	if (ret == NULL) {
3049 		RETURN_FALSE;
3050 	}
3051 
3052 	// TODO: avoid reallocation ???
3053 	RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3054 	efree(ret->val);
3055 }
3056 /* }}} */
3057 
3058 /* {{{ proto int mb_strwidth(string str [, string encoding])
3059    Gets terminal width of a string */
PHP_FUNCTION(mb_strwidth)3060 PHP_FUNCTION(mb_strwidth)
3061 {
3062 	size_t n;
3063 	mbfl_string string;
3064 	zend_string *enc_name = NULL;
3065 
3066 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|S", (char **)&string.val, &string.len, &enc_name) == FAILURE) {
3067 		return;
3068 	}
3069 
3070 	string.no_language = MBSTRG(language);
3071 	string.encoding = php_mb_get_encoding(enc_name);
3072 	if (!string.encoding) {
3073 		RETURN_FALSE;
3074 	}
3075 
3076 	n = mbfl_strwidth(&string);
3077 	if (!mbfl_is_error(n)) {
3078 		RETVAL_LONG(n);
3079 	} else {
3080 		RETVAL_FALSE;
3081 	}
3082 }
3083 /* }}} */
3084 
3085 /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
3086    Trim the string in terminal width */
PHP_FUNCTION(mb_strimwidth)3087 PHP_FUNCTION(mb_strimwidth)
3088 {
3089 	char *str, *trimmarker = NULL;
3090 	zend_string *encoding = NULL;
3091 	zend_long from, width, swidth = 0;
3092 	size_t str_len, trimmarker_len;
3093 	mbfl_string string, result, marker, *ret;
3094 
3095 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sll|sS", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding) == FAILURE) {
3096 		return;
3097 	}
3098 
3099 	string.no_language = marker.no_language = MBSTRG(language);
3100 	string.encoding = marker.encoding = php_mb_get_encoding(encoding);
3101 	if (!string.encoding) {
3102 		RETURN_FALSE;
3103 	}
3104 
3105 	string.val = (unsigned char *)str;
3106 	string.len = str_len;
3107 	marker.val = NULL;
3108 	marker.len = 0;
3109 
3110 	if ((from < 0) || (width < 0)) {
3111 		swidth = mbfl_strwidth(&string);
3112 	}
3113 
3114 	if (from < 0) {
3115 		from += swidth;
3116 	}
3117 
3118 	if (from < 0 || (size_t)from > str_len) {
3119 		php_error_docref(NULL, E_WARNING, "Start position is out of range");
3120 		RETURN_FALSE;
3121 	}
3122 
3123 	if (width < 0) {
3124 		width = swidth + width - from;
3125 	}
3126 
3127 	if (width < 0) {
3128 		php_error_docref(NULL, E_WARNING, "Width is out of range");
3129 		RETURN_FALSE;
3130 	}
3131 
3132 	if (trimmarker) {
3133 		marker.val = (unsigned char *)trimmarker;
3134 		marker.len = trimmarker_len;
3135 	}
3136 
3137 	ret = mbfl_strimwidth(&string, &marker, &result, from, width);
3138 
3139 	if (ret == NULL) {
3140 		RETURN_FALSE;
3141 	}
3142 	// TODO: avoid reallocation ???
3143 	RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3144 	efree(ret->val);
3145 }
3146 /* }}} */
3147 
3148 
3149 /* See mbfl_no_encoding definition for list of unsupported encodings */
php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)3150 static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)
3151 {
3152 	return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint)
3153 			|| (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap)
3154 			|| (no_enc >= mbfl_no_encoding_jis && no_enc <= mbfl_no_encoding_2022jpms)
3155 			|| (no_enc >= mbfl_no_encoding_cp50220 && no_enc <= mbfl_no_encoding_cp50222));
3156 }
3157 
3158 
3159 /* See mbfl_no_encoding definition for list of UTF-8 encodings */
php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)3160 static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
3161 {
3162 	return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb);
3163 }
3164 
php_mb_convert_encoding_ex(const char * input,size_t length,const mbfl_encoding * to_encoding,const mbfl_encoding * from_encoding,size_t * output_len)3165 MBSTRING_API char *php_mb_convert_encoding_ex(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding *from_encoding, size_t *output_len)
3166 {
3167 	mbfl_string string, result, *ret;
3168 	mbfl_buffer_converter *convd;
3169 	char *output = NULL;
3170 
3171 	if (output_len) {
3172 		*output_len = 0;
3173 	}
3174 
3175 	/* initialize string */
3176 	string.encoding = from_encoding;
3177 	string.no_language = MBSTRG(language);
3178 	string.val = (unsigned char *)input;
3179 	string.len = length;
3180 
3181 	/* initialize converter */
3182 	convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
3183 	if (convd == NULL) {
3184 		php_error_docref(NULL, E_WARNING, "Unable to create character encoding converter");
3185 		return NULL;
3186 	}
3187 
3188 	mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3189 	mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3190 
3191 	/* do it */
3192 	mbfl_string_init(&result);
3193 	ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3194 	if (ret) {
3195 		if (output_len) {
3196 			*output_len = ret->len;
3197 		}
3198 		output = (char *)ret->val;
3199 	}
3200 
3201 	MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3202 	mbfl_buffer_converter_delete(convd);
3203 	return output;
3204 }
3205 /* }}} */
3206 
3207 /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
php_mb_convert_encoding(const char * input,size_t length,const char * _to_encoding,const char * _from_encodings,size_t * output_len)3208 MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
3209 {
3210 	const mbfl_encoding *from_encoding, *to_encoding;
3211 
3212 	if (output_len) {
3213 		*output_len = 0;
3214 	}
3215 	if (!input) {
3216 		return NULL;
3217 	}
3218 	/* new encoding */
3219 	if (_to_encoding && strlen(_to_encoding)) {
3220 		to_encoding = mbfl_name2encoding(_to_encoding);
3221 		if (!to_encoding) {
3222 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
3223 			return NULL;
3224 		}
3225 	} else {
3226 		to_encoding = MBSTRG(current_internal_encoding);
3227 	}
3228 
3229 	/* pre-conversion encoding */
3230 	from_encoding = MBSTRG(current_internal_encoding);
3231 	if (_from_encodings) {
3232 		const mbfl_encoding **list = NULL;
3233 		size_t size = 0;
3234 		php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0);
3235 		if (size == 1) {
3236 			from_encoding = *list;
3237 		} else if (size > 1) {
3238 			/* auto detect */
3239 			mbfl_string string;
3240 			mbfl_string_init(&string);
3241 			string.val = (unsigned char *)input;
3242 			string.len = length;
3243 			from_encoding = mbfl_identify_encoding(&string, list, size, MBSTRG(strict_detection));
3244 			if (!from_encoding) {
3245 				php_error_docref(NULL, E_WARNING, "Unable to detect character encoding");
3246 				from_encoding = &mbfl_encoding_pass;
3247 			}
3248 		} else {
3249 			php_error_docref(NULL, E_WARNING, "Illegal character encoding specified");
3250 		}
3251 		if (list != NULL) {
3252 			efree((void *)list);
3253 		}
3254 	}
3255 
3256 	return php_mb_convert_encoding_ex(input, length, to_encoding, from_encoding, output_len);
3257 }
3258 /* }}} */
3259 
php_mb_convert_encoding_recursive(HashTable * input,const char * _to_encoding,const char * _from_encodings)3260 MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const char *_to_encoding, const char *_from_encodings)
3261 {
3262 	HashTable *output, *chash;
3263 	zend_long idx;
3264 	zend_string *key;
3265 	zval *entry, entry_tmp;
3266 	size_t ckey_len, cval_len;
3267 	char *ckey, *cval;
3268 
3269 	if (!input) {
3270 		return NULL;
3271 	}
3272 
3273 	if (GC_IS_RECURSIVE(input)) {
3274 		GC_UNPROTECT_RECURSION(input);
3275 		php_error_docref(NULL, E_WARNING, "Cannot convert recursively referenced values");
3276 		return NULL;
3277 	}
3278 	GC_TRY_PROTECT_RECURSION(input);
3279 	output = zend_new_array(zend_hash_num_elements(input));
3280 	ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) {
3281 		/* convert key */
3282 		if (key) {
3283 			ckey = php_mb_convert_encoding(ZSTR_VAL(key), ZSTR_LEN(key), _to_encoding, _from_encodings, &ckey_len);
3284 			key = zend_string_init(ckey, ckey_len, 0);
3285 			efree(ckey);
3286 		}
3287 		/* convert value */
3288 		ZEND_ASSERT(entry);
3289 try_again:
3290 		switch(Z_TYPE_P(entry)) {
3291 			case IS_STRING:
3292 				cval = php_mb_convert_encoding(Z_STRVAL_P(entry), Z_STRLEN_P(entry), _to_encoding, _from_encodings, &cval_len);
3293 				ZVAL_STRINGL(&entry_tmp, cval, cval_len);
3294 				efree(cval);
3295 				break;
3296 			case IS_NULL:
3297 			case IS_TRUE:
3298 			case IS_FALSE:
3299 			case IS_LONG:
3300 			case IS_DOUBLE:
3301 				ZVAL_COPY(&entry_tmp, entry);
3302 				break;
3303 			case IS_ARRAY:
3304 				chash = php_mb_convert_encoding_recursive(Z_ARRVAL_P(entry), _to_encoding, _from_encodings);
3305 				if (chash) {
3306 					ZVAL_ARR(&entry_tmp, chash);
3307 				} else {
3308 					ZVAL_EMPTY_ARRAY(&entry_tmp);
3309 				}
3310 				break;
3311 			case IS_REFERENCE:
3312 				entry = Z_REFVAL_P(entry);
3313 				goto try_again;
3314 			case IS_OBJECT:
3315 			default:
3316 				if (key) {
3317 					zend_string_release(key);
3318 				}
3319 				php_error_docref(NULL, E_WARNING, "Object is not supported");
3320 				continue;
3321 		}
3322 		if (key) {
3323 			zend_hash_add(output, key, &entry_tmp);
3324 			zend_string_release(key);
3325 		} else {
3326 			zend_hash_index_add(output, idx, &entry_tmp);
3327 		}
3328 	} ZEND_HASH_FOREACH_END();
3329 	GC_TRY_UNPROTECT_RECURSION(input);
3330 
3331 	return output;
3332 }
3333 /* }}} */
3334 
3335 
3336 /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
3337    Returns converted string in desired encoding */
PHP_FUNCTION(mb_convert_encoding)3338 PHP_FUNCTION(mb_convert_encoding)
3339 {
3340 	zval *input;
3341 	char *arg_new;
3342 	size_t new_len;
3343 	zval *arg_old = NULL;
3344 	size_t size, l, n;
3345 	char *_from_encodings = NULL, *ret, *s_free = NULL;
3346 
3347 	zval *hash_entry;
3348 	HashTable *target_hash;
3349 
3350 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z", &input, &arg_new, &new_len, &arg_old) == FAILURE) {
3351 		return;
3352 	}
3353 
3354 	if (Z_TYPE_P(input) != IS_STRING && Z_TYPE_P(input) != IS_ARRAY) {
3355 		if (!try_convert_to_string(input)) {
3356 			return;
3357 		}
3358 	}
3359 
3360 	if (arg_old) {
3361 		switch (Z_TYPE_P(arg_old)) {
3362 			case IS_ARRAY:
3363 				target_hash = Z_ARRVAL_P(arg_old);
3364 				_from_encodings = NULL;
3365 
3366 				ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
3367 					zend_string *encoding_str = zval_try_get_string(hash_entry);
3368 					if (UNEXPECTED(!encoding_str)) {
3369 						if (_from_encodings) {
3370 							efree(_from_encodings);
3371 						}
3372 						return;
3373 					}
3374 
3375 					if ( _from_encodings) {
3376 						l = strlen(_from_encodings);
3377 						n = ZSTR_LEN(encoding_str);
3378 						_from_encodings = erealloc(_from_encodings, l+n+2);
3379 						memcpy(_from_encodings + l, ",", 1);
3380 						memcpy(_from_encodings + l + 1, ZSTR_VAL(encoding_str), ZSTR_LEN(encoding_str) + 1);
3381 					} else {
3382 						_from_encodings = estrdup(ZSTR_VAL(encoding_str));
3383 					}
3384 					zend_string_release(encoding_str);
3385 				} ZEND_HASH_FOREACH_END();
3386 
3387 				if (_from_encodings != NULL && !strlen(_from_encodings)) {
3388 					efree(_from_encodings);
3389 					_from_encodings = NULL;
3390 				}
3391 				s_free = _from_encodings;
3392 				break;
3393 			default:
3394 				if (!try_convert_to_string(arg_old)) {
3395 					return;
3396 				}
3397 
3398 				_from_encodings = Z_STRVAL_P(arg_old);
3399 				break;
3400 			}
3401 	}
3402 
3403 	if (Z_TYPE_P(input) == IS_STRING) {
3404 		/* new encoding */
3405 		ret = php_mb_convert_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), arg_new, _from_encodings, &size);
3406 		if (ret != NULL) {
3407 			// TODO: avoid reallocation ???
3408 			RETVAL_STRINGL(ret, size);		/* the string is already strdup()'ed */
3409 			efree(ret);
3410 		} else {
3411 			RETVAL_FALSE;
3412 		}
3413 		if (s_free) {
3414 			efree(s_free);
3415 		}
3416 	} else {
3417 		HashTable *tmp;
3418 		tmp = php_mb_convert_encoding_recursive(Z_ARRVAL_P(input), arg_new, _from_encodings);
3419 		RETURN_ARR(tmp);
3420 	}
3421 
3422 	return;
3423 }
3424 /* }}} */
3425 
mbstring_convert_case(int case_mode,const char * str,size_t str_len,size_t * ret_len,const mbfl_encoding * enc)3426 static char *mbstring_convert_case(
3427 		int case_mode, const char *str, size_t str_len, size_t *ret_len,
3428 		const mbfl_encoding *enc) {
3429 	return php_unicode_convert_case(
3430 		case_mode, str, str_len, ret_len, enc,
3431 		MBSTRG(current_filter_illegal_mode), MBSTRG(current_filter_illegal_substchar));
3432 }
3433 
3434 /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
3435    Returns a case-folded version of sourcestring */
PHP_FUNCTION(mb_convert_case)3436 PHP_FUNCTION(mb_convert_case)
3437 {
3438 	zend_string *from_encoding = NULL;
3439 	char *str;
3440 	size_t str_len;
3441 	zend_long case_mode = 0;
3442 	char *newstr;
3443 	size_t ret_len;
3444 	const mbfl_encoding *enc;
3445 
3446 	RETVAL_FALSE;
3447 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|S!", &str, &str_len,
3448 				&case_mode, &from_encoding) == FAILURE) {
3449 		return;
3450 	}
3451 
3452 	enc = php_mb_get_encoding(from_encoding);
3453 	if (!enc) {
3454 		return;
3455 	}
3456 
3457 	if (case_mode < 0 || case_mode > PHP_UNICODE_CASE_MODE_MAX) {
3458 		php_error_docref(NULL, E_WARNING, "Invalid case mode");
3459 		return;
3460 	}
3461 
3462 	newstr = mbstring_convert_case(case_mode, str, str_len, &ret_len, enc);
3463 
3464 	if (newstr) {
3465 		// TODO: avoid reallocation ???
3466 		RETVAL_STRINGL(newstr, ret_len);
3467 		efree(newstr);
3468 	}
3469 }
3470 /* }}} */
3471 
3472 /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
3473  *  Returns a uppercased version of sourcestring
3474  */
PHP_FUNCTION(mb_strtoupper)3475 PHP_FUNCTION(mb_strtoupper)
3476 {
3477 	zend_string *from_encoding = NULL;
3478 	char *str;
3479 	size_t str_len;
3480 	char *newstr;
3481 	size_t ret_len;
3482 	const mbfl_encoding *enc;
3483 
3484 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|S!", &str, &str_len,
3485 				&from_encoding) == FAILURE) {
3486 		return;
3487 	}
3488 
3489 	enc = php_mb_get_encoding(from_encoding);
3490 	if (!enc) {
3491 		RETURN_FALSE;
3492 	}
3493 
3494 	newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc);
3495 
3496 	if (newstr) {
3497 		// TODO: avoid reallocation ???
3498 		RETVAL_STRINGL(newstr, ret_len);
3499 		efree(newstr);
3500 		return;
3501 	}
3502 	RETURN_FALSE;
3503 }
3504 /* }}} */
3505 
3506 /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
3507  *  Returns a lowercased version of sourcestring
3508  */
PHP_FUNCTION(mb_strtolower)3509 PHP_FUNCTION(mb_strtolower)
3510 {
3511 	zend_string *from_encoding = NULL;
3512 	char *str;
3513 	size_t str_len;
3514 	char *newstr;
3515 	size_t ret_len;
3516 	const mbfl_encoding *enc;
3517 
3518 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|S!", &str, &str_len,
3519 				&from_encoding) == FAILURE) {
3520 		return;
3521 	}
3522 
3523 	enc = php_mb_get_encoding(from_encoding);
3524 	if (!enc) {
3525 		RETURN_FALSE;
3526 	}
3527 
3528 	newstr = mbstring_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc);
3529 
3530 	if (newstr) {
3531 		// TODO: avoid reallocation ???
3532 		RETVAL_STRINGL(newstr, ret_len);
3533 		efree(newstr);
3534 		return;
3535 	}
3536 	RETURN_FALSE;
3537 }
3538 /* }}} */
3539 
3540 /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3541    Encodings of the given string is returned (as a string) */
PHP_FUNCTION(mb_detect_encoding)3542 PHP_FUNCTION(mb_detect_encoding)
3543 {
3544 	char *str;
3545 	size_t str_len;
3546 	zend_bool strict=0;
3547 	zval *encoding_list = NULL;
3548 
3549 	mbfl_string string;
3550 	const mbfl_encoding *ret;
3551 	const mbfl_encoding **elist, **list;
3552 	size_t size;
3553 
3554 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z!b", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3555 		return;
3556 	}
3557 
3558 	/* make encoding list */
3559 	list = NULL;
3560 	size = 0;
3561 	if (encoding_list) {
3562 		switch (Z_TYPE_P(encoding_list)) {
3563 		case IS_ARRAY:
3564 			if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0)) {
3565 				if (list) {
3566 					efree(list);
3567 					list = NULL;
3568 					size = 0;
3569 				}
3570 			}
3571 			break;
3572 		default:
3573 			if (!try_convert_to_string(encoding_list)) {
3574 				return;
3575 			}
3576 			if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0)) {
3577 				if (list) {
3578 					efree(list);
3579 					list = NULL;
3580 					size = 0;
3581 				}
3582 			}
3583 			break;
3584 		}
3585 		if (size == 0) {
3586 			php_error_docref(NULL, E_WARNING, "Illegal argument");
3587 		}
3588 	}
3589 
3590 	if (ZEND_NUM_ARGS() < 3) {
3591 		strict = MBSTRG(strict_detection);
3592 	}
3593 
3594 	if (size > 0 && list != NULL) {
3595 		elist = list;
3596 	} else {
3597 		elist = MBSTRG(current_detect_order_list);
3598 		size = MBSTRG(current_detect_order_list_size);
3599 	}
3600 
3601 	mbfl_string_init(&string);
3602 	string.no_language = MBSTRG(language);
3603 	string.val = (unsigned char *)str;
3604 	string.len = str_len;
3605 	ret = mbfl_identify_encoding(&string, elist, size, strict);
3606 
3607 	if (list != NULL) {
3608 		efree((void *)list);
3609 	}
3610 
3611 	if (ret == NULL) {
3612 		RETURN_FALSE;
3613 	}
3614 
3615 	RETVAL_STRING((char *)ret->name);
3616 }
3617 /* }}} */
3618 
3619 /* {{{ proto mixed mb_list_encodings()
3620    Returns an array of all supported entity encodings */
PHP_FUNCTION(mb_list_encodings)3621 PHP_FUNCTION(mb_list_encodings)
3622 {
3623 	const mbfl_encoding **encodings;
3624 	const mbfl_encoding *encoding;
3625 	int i;
3626 
3627 	if (zend_parse_parameters_none() == FAILURE) {
3628 		return;
3629 	}
3630 
3631 	array_init(return_value);
3632 	i = 0;
3633 	encodings = mbfl_get_supported_encodings();
3634 	while ((encoding = encodings[i++]) != NULL) {
3635 		add_next_index_string(return_value, (char *) encoding->name);
3636 	}
3637 }
3638 /* }}} */
3639 
3640 /* {{{ proto array mb_encoding_aliases(string encoding)
3641    Returns an array of the aliases of a given encoding name */
PHP_FUNCTION(mb_encoding_aliases)3642 PHP_FUNCTION(mb_encoding_aliases)
3643 {
3644 	const mbfl_encoding *encoding;
3645 	char *name = NULL;
3646 	size_t name_len;
3647 
3648 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
3649 		return;
3650 	}
3651 
3652 	encoding = mbfl_name2encoding(name);
3653 	if (!encoding) {
3654 		php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
3655 		RETURN_FALSE;
3656 	}
3657 
3658 	array_init(return_value);
3659 	if (encoding->aliases != NULL) {
3660 		const char **alias;
3661 		for (alias = *encoding->aliases; *alias; ++alias) {
3662 			add_next_index_string(return_value, (char *)*alias);
3663 		}
3664 	}
3665 }
3666 /* }}} */
3667 
3668 /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3669    Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
PHP_FUNCTION(mb_encode_mimeheader)3670 PHP_FUNCTION(mb_encode_mimeheader)
3671 {
3672 	const mbfl_encoding *charset, *transenc;
3673 	mbfl_string  string, result, *ret;
3674 	char *charset_name = NULL;
3675 	size_t charset_name_len;
3676 	char *trans_enc_name = NULL;
3677 	size_t trans_enc_name_len;
3678 	char *linefeed = "\r\n";
3679 	size_t linefeed_len;
3680 	zend_long indent = 0;
3681 
3682 	string.no_language = MBSTRG(language);
3683 	string.encoding = MBSTRG(current_internal_encoding);
3684 
3685 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3686 		return;
3687 	}
3688 
3689 	charset = &mbfl_encoding_pass;
3690 	transenc = &mbfl_encoding_base64;
3691 
3692 	if (charset_name != NULL) {
3693 		charset = mbfl_name2encoding(charset_name);
3694 		if (!charset) {
3695 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3696 			RETURN_FALSE;
3697 		}
3698 	} else {
3699 		const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3700 		if (lang != NULL) {
3701 			charset = mbfl_no2encoding(lang->mail_charset);
3702 			transenc = mbfl_no2encoding(lang->mail_header_encoding);
3703 		}
3704 	}
3705 
3706 	if (trans_enc_name != NULL) {
3707 		if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3708 			transenc = &mbfl_encoding_base64;
3709 		} else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3710 			transenc = &mbfl_encoding_qprint;
3711 		}
3712 	}
3713 
3714 	mbfl_string_init(&result);
3715 	ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3716 	if (ret != NULL) {
3717 		// TODO: avoid reallocation ???
3718 		RETVAL_STRINGL((char *)ret->val, ret->len);	/* the string is already strdup()'ed */
3719 		efree(ret->val);
3720 	} else {
3721 		RETVAL_FALSE;
3722 	}
3723 }
3724 /* }}} */
3725 
3726 /* {{{ proto string mb_decode_mimeheader(string string)
3727    Decodes the MIME "encoded-word" in the string */
PHP_FUNCTION(mb_decode_mimeheader)3728 PHP_FUNCTION(mb_decode_mimeheader)
3729 {
3730 	mbfl_string string, result, *ret;
3731 
3732 	string.no_language = MBSTRG(language);
3733 	string.encoding = MBSTRG(current_internal_encoding);
3734 
3735 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", (char **)&string.val, &string.len) == FAILURE) {
3736 		return;
3737 	}
3738 
3739 	mbfl_string_init(&result);
3740 	ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
3741 	if (ret != NULL) {
3742 		// TODO: avoid reallocation ???
3743 		RETVAL_STRINGL((char *)ret->val, ret->len);	/* the string is already strdup()'ed */
3744 		efree(ret->val);
3745 	} else {
3746 		RETVAL_FALSE;
3747 	}
3748 }
3749 /* }}} */
3750 
3751 /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3752    Conversion between full-width character and half-width character (Japanese) */
PHP_FUNCTION(mb_convert_kana)3753 PHP_FUNCTION(mb_convert_kana)
3754 {
3755 	int opt;
3756 	mbfl_string string, result, *ret;
3757 	char *optstr = NULL;
3758 	size_t optstr_len;
3759 	zend_string *encname = NULL;
3760 
3761 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|sS", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname) == FAILURE) {
3762 		return;
3763 	}
3764 
3765 	/* option */
3766 	if (optstr != NULL) {
3767 		char *p = optstr;
3768 		size_t i = 0, n = optstr_len;
3769 		opt = 0;
3770 		while (i < n) {
3771 			i++;
3772 			switch (*p++) {
3773 			case 'A':
3774 				opt |= 0x1;
3775 				break;
3776 			case 'a':
3777 				opt |= 0x10;
3778 				break;
3779 			case 'R':
3780 				opt |= 0x2;
3781 				break;
3782 			case 'r':
3783 				opt |= 0x20;
3784 				break;
3785 			case 'N':
3786 				opt |= 0x4;
3787 				break;
3788 			case 'n':
3789 				opt |= 0x40;
3790 				break;
3791 			case 'S':
3792 				opt |= 0x8;
3793 				break;
3794 			case 's':
3795 				opt |= 0x80;
3796 				break;
3797 			case 'K':
3798 				opt |= 0x100;
3799 				break;
3800 			case 'k':
3801 				opt |= 0x1000;
3802 				break;
3803 			case 'H':
3804 				opt |= 0x200;
3805 				break;
3806 			case 'h':
3807 				opt |= 0x2000;
3808 				break;
3809 			case 'V':
3810 				opt |= 0x800;
3811 				break;
3812 			case 'C':
3813 				opt |= 0x10000;
3814 				break;
3815 			case 'c':
3816 				opt |= 0x20000;
3817 				break;
3818 			case 'M':
3819 				opt |= 0x100000;
3820 				break;
3821 			case 'm':
3822 				opt |= 0x200000;
3823 				break;
3824 			}
3825 		}
3826 	} else {
3827 		opt = 0x900;
3828 	}
3829 
3830 	/* encoding */
3831 	string.no_language = MBSTRG(language);
3832 	string.encoding = php_mb_get_encoding(encname);
3833 	if (!string.encoding) {
3834 		RETURN_FALSE;
3835 	}
3836 
3837 	ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3838 	if (ret != NULL) {
3839 		// TODO: avoid reallocation ???
3840 		RETVAL_STRINGL((char *)ret->val, ret->len);		/* the string is already strdup()'ed */
3841 		efree(ret->val);
3842 	} else {
3843 		RETVAL_FALSE;
3844 	}
3845 }
3846 /* }}} */
3847 
mb_recursive_encoder_detector_feed(mbfl_encoding_detector * identd,zval * var,int * recursion_error)3848 static int mb_recursive_encoder_detector_feed(mbfl_encoding_detector *identd, zval *var, int *recursion_error) /* {{{ */
3849 {
3850 	mbfl_string string;
3851 	HashTable *ht;
3852 	zval *entry;
3853 
3854 	ZVAL_DEREF(var);
3855 	if (Z_TYPE_P(var) == IS_STRING) {
3856 		string.val = (unsigned char *)Z_STRVAL_P(var);
3857 		string.len = Z_STRLEN_P(var);
3858 		if (mbfl_encoding_detector_feed(identd, &string)) {
3859 			return 1; /* complete detecting */
3860 		}
3861 	} else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3862 		if (Z_REFCOUNTED_P(var)) {
3863 			if (Z_IS_RECURSIVE_P(var)) {
3864 				*recursion_error = 1;
3865 				return 0;
3866 			}
3867 			Z_PROTECT_RECURSION_P(var);
3868 		}
3869 
3870 		ht = HASH_OF(var);
3871 		if (ht != NULL) {
3872 			ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
3873 				if (mb_recursive_encoder_detector_feed(identd, entry, recursion_error)) {
3874 					if (Z_REFCOUNTED_P(var)) {
3875 						Z_UNPROTECT_RECURSION_P(var);
3876 					}
3877 					return 1;
3878 				} else if (*recursion_error) {
3879 					if (Z_REFCOUNTED_P(var)) {
3880 						Z_UNPROTECT_RECURSION_P(var);
3881 					}
3882 					return 0;
3883 				}
3884 			} ZEND_HASH_FOREACH_END();
3885 		}
3886 
3887 		if (Z_REFCOUNTED_P(var)) {
3888 			Z_UNPROTECT_RECURSION_P(var);
3889 		}
3890 	}
3891 	return 0;
3892 } /* }}} */
3893 
mb_recursive_convert_variable(mbfl_buffer_converter * convd,zval * var)3894 static int mb_recursive_convert_variable(mbfl_buffer_converter *convd, zval *var) /* {{{ */
3895 {
3896 	mbfl_string string, result, *ret;
3897 	HashTable *ht;
3898 	zval *entry, *orig_var;
3899 
3900 	orig_var = var;
3901 	ZVAL_DEREF(var);
3902 	if (Z_TYPE_P(var) == IS_STRING) {
3903 		string.val = (unsigned char *)Z_STRVAL_P(var);
3904 		string.len = Z_STRLEN_P(var);
3905 		ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3906 		if (ret != NULL) {
3907 			zval_ptr_dtor(orig_var);
3908 			// TODO: avoid reallocation ???
3909 			ZVAL_STRINGL(orig_var, (char *)ret->val, ret->len);
3910 			efree(ret->val);
3911 		}
3912 	} else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3913 		if (Z_TYPE_P(var) == IS_ARRAY) {
3914 			SEPARATE_ARRAY(var);
3915 		}
3916 		if (Z_REFCOUNTED_P(var)) {
3917 			if (Z_IS_RECURSIVE_P(var)) {
3918 				return 1;
3919 			}
3920 			Z_PROTECT_RECURSION_P(var);
3921 		}
3922 
3923 		ht = HASH_OF(var);
3924 		if (ht != NULL) {
3925 			ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
3926 				if (mb_recursive_convert_variable(convd, entry)) {
3927 					if (Z_REFCOUNTED_P(var)) {
3928 						Z_UNPROTECT_RECURSION_P(var);
3929 					}
3930 					return 1;
3931 				}
3932 			} ZEND_HASH_FOREACH_END();
3933 		}
3934 
3935 		if (Z_REFCOUNTED_P(var)) {
3936 			Z_UNPROTECT_RECURSION_P(var);
3937 		}
3938 	}
3939 	return 0;
3940 } /* }}} */
3941 
3942 /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3943    Converts the string resource in variables to desired encoding */
PHP_FUNCTION(mb_convert_variables)3944 PHP_FUNCTION(mb_convert_variables)
3945 {
3946 	zval *args, *zfrom_enc;
3947 	mbfl_string string, result;
3948 	const mbfl_encoding *from_encoding, *to_encoding;
3949 	mbfl_encoding_detector *identd;
3950 	mbfl_buffer_converter *convd;
3951 	int n, argc;
3952 	size_t to_enc_len;
3953 	size_t elistsz;
3954 	const mbfl_encoding **elist;
3955 	char *to_enc;
3956 	int recursion_error = 0;
3957 
3958 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3959 		return;
3960 	}
3961 
3962 	/* new encoding */
3963 	to_encoding = mbfl_name2encoding(to_enc);
3964 	if (!to_encoding) {
3965 		php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3966 		RETURN_FALSE;
3967 	}
3968 
3969 	/* initialize string */
3970 	mbfl_string_init(&string);
3971 	mbfl_string_init(&result);
3972 	from_encoding = MBSTRG(current_internal_encoding);
3973 	string.encoding = from_encoding;
3974 	string.no_language = MBSTRG(language);
3975 
3976 	/* pre-conversion encoding */
3977 	elist = NULL;
3978 	elistsz = 0;
3979 	switch (Z_TYPE_P(zfrom_enc)) {
3980 		case IS_ARRAY:
3981 			php_mb_parse_encoding_array(zfrom_enc, &elist, &elistsz, 0);
3982 			break;
3983 		default:
3984 			if (!try_convert_to_string(zfrom_enc)) {
3985 				return;
3986 			}
3987 			php_mb_parse_encoding_list(Z_STRVAL_P(zfrom_enc), Z_STRLEN_P(zfrom_enc), &elist, &elistsz, 0);
3988 			break;
3989 	}
3990 
3991 	if (elistsz == 0) {
3992 		from_encoding = &mbfl_encoding_pass;
3993 	} else if (elistsz == 1) {
3994 		from_encoding = *elist;
3995 	} else {
3996 		/* auto detect */
3997 		from_encoding = NULL;
3998 		identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
3999 		if (identd != NULL) {
4000 			n = 0;
4001 			while (n < argc) {
4002 				if (mb_recursive_encoder_detector_feed(identd, &args[n], &recursion_error)) {
4003 					break;
4004 				}
4005 				n++;
4006 			}
4007 			from_encoding = mbfl_encoding_detector_judge(identd);
4008 			mbfl_encoding_detector_delete(identd);
4009 			if (recursion_error) {
4010 				if (elist != NULL) {
4011 					efree((void *)elist);
4012 				}
4013 				php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
4014 				RETURN_FALSE;
4015 			}
4016 		}
4017 
4018 		if (!from_encoding) {
4019 			php_error_docref(NULL, E_WARNING, "Unable to detect encoding");
4020 			from_encoding = &mbfl_encoding_pass;
4021 		}
4022 	}
4023 	if (elist != NULL) {
4024 		efree((void *)elist);
4025 	}
4026 	/* create converter */
4027 	convd = NULL;
4028 	if (from_encoding != &mbfl_encoding_pass) {
4029 		convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
4030 		if (convd == NULL) {
4031 			php_error_docref(NULL, E_WARNING, "Unable to create converter");
4032 			RETURN_FALSE;
4033 		}
4034 		mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
4035 		mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
4036 	}
4037 
4038 	/* convert */
4039 	if (convd != NULL) {
4040 		n = 0;
4041 		while (n < argc) {
4042 			zval *zv = &args[n];
4043 
4044 			ZVAL_DEREF(zv);
4045 			recursion_error = mb_recursive_convert_variable(convd, zv);
4046 			if (recursion_error) {
4047 				break;
4048 			}
4049 			n++;
4050 		}
4051 
4052 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
4053 		mbfl_buffer_converter_delete(convd);
4054 
4055 		if (recursion_error) {
4056 			php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
4057 			RETURN_FALSE;
4058 		}
4059 	}
4060 
4061 	if (from_encoding) {
4062 		RETURN_STRING(from_encoding->name);
4063 	} else {
4064 		RETURN_FALSE;
4065 	}
4066 }
4067 /* }}} */
4068 
4069 /* {{{ HTML numeric entity */
4070 /* {{{ static void php_mb_numericentity_exec() */
4071 static void
php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS,int type)4072 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
4073 {
4074 	char *str, *encoding = NULL;
4075 	size_t str_len, encoding_len;
4076 	zval *zconvmap, *hash_entry;
4077 	HashTable *target_hash;
4078 	int i, *convmap, *mapelm, mapsize=0;
4079 	zend_bool is_hex = 0;
4080 	mbfl_string string, result, *ret;
4081 
4082 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
4083 		return;
4084 	}
4085 
4086 	string.no_language = MBSTRG(language);
4087 	string.encoding = MBSTRG(current_internal_encoding);
4088 	string.val = (unsigned char *)str;
4089 	string.len = str_len;
4090 
4091 	/* encoding */
4092 	if (encoding && encoding_len > 0) {
4093 		string.encoding = mbfl_name2encoding(encoding);
4094 		if (!string.encoding) {
4095 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
4096 			RETURN_FALSE;
4097 		}
4098 	}
4099 
4100 	if (type == 0 && is_hex) {
4101 		type = 2; /* output in hex format */
4102 	}
4103 
4104 	/* conversion map */
4105 	convmap = NULL;
4106 	if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
4107 		target_hash = Z_ARRVAL_P(zconvmap);
4108 		i = zend_hash_num_elements(target_hash);
4109 		if (i > 0) {
4110 			convmap = (int *)safe_emalloc(i, sizeof(int), 0);
4111 			mapelm = convmap;
4112 			mapsize = 0;
4113 			ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
4114 				*mapelm++ = zval_get_long(hash_entry);
4115 				mapsize++;
4116 			} ZEND_HASH_FOREACH_END();
4117 		}
4118 	}
4119 	if (convmap == NULL) {
4120 		RETURN_FALSE;
4121 	}
4122 	mapsize /= 4;
4123 
4124 	ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
4125 	if (ret != NULL) {
4126 		// TODO: avoid reallocation ???
4127 		RETVAL_STRINGL((char *)ret->val, ret->len);
4128 		efree(ret->val);
4129 	} else {
4130 		RETVAL_FALSE;
4131 	}
4132 	efree((void *)convmap);
4133 }
4134 /* }}} */
4135 
4136 /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
4137    Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)4138 PHP_FUNCTION(mb_encode_numericentity)
4139 {
4140 	php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
4141 }
4142 /* }}} */
4143 
4144 /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
4145    Converts HTML numeric entities to character code */
PHP_FUNCTION(mb_decode_numericentity)4146 PHP_FUNCTION(mb_decode_numericentity)
4147 {
4148 	php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
4149 }
4150 /* }}} */
4151 /* }}} */
4152 
4153 /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
4154  *  Sends an email message with MIME scheme
4155  */
4156 
4157 #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos)										\
4158 	if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) {	\
4159 		pos += 2;											\
4160 		while (str[pos + 1] == ' ' || str[pos + 1] == '\t') {							\
4161 			pos++;											\
4162 		}												\
4163 		continue;											\
4164 	}
4165 
4166 #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len)			\
4167 	pp = str;					\
4168 	ee = pp + len;					\
4169 	while ((pp = memchr(pp, '\0', (ee - pp)))) {	\
4170 		*pp = ' ';				\
4171 	}						\
4172 
_php_mbstr_parse_mail_headers(HashTable * ht,const char * str,size_t str_len)4173 static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
4174 {
4175 	const char *ps;
4176 	size_t icnt;
4177 	int state = 0;
4178 	int crlf_state = -1;
4179 	char *token = NULL;
4180 	size_t token_pos = 0;
4181 	zend_string *fld_name, *fld_val;
4182 
4183 	ps = str;
4184 	icnt = str_len;
4185 	fld_name = fld_val = NULL;
4186 
4187 	/*
4188 	 *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
4189 	 *             ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
4190 	 *      state  0            1           2          3
4191 	 *
4192 	 *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
4193 	 *             ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
4194 	 * crlf_state -1                       0                     1 -1
4195 	 *
4196 	 */
4197 
4198 	while (icnt > 0) {
4199 		switch (*ps) {
4200 			case ':':
4201 				if (crlf_state == 1) {
4202 					token_pos++;
4203 				}
4204 
4205 				if (state == 0 || state == 1) {
4206 					if(token && token_pos > 0) {
4207 						fld_name = zend_string_init(token, token_pos, 0);
4208 					}
4209 					state = 2;
4210 				} else {
4211 					token_pos++;
4212 				}
4213 
4214 				crlf_state = 0;
4215 				break;
4216 
4217 			case '\n':
4218 				if (crlf_state == -1) {
4219 					goto out;
4220 				}
4221 				crlf_state = -1;
4222 				break;
4223 
4224 			case '\r':
4225 				if (crlf_state == 1) {
4226 					token_pos++;
4227 				} else {
4228 					crlf_state = 1;
4229 				}
4230 				break;
4231 
4232 			case ' ': case '\t':
4233 				if (crlf_state == -1) {
4234 					if (state == 3) {
4235 						/* continuing from the previous line */
4236 						state = 4;
4237 					} else {
4238 						/* simply skipping this new line */
4239 						state = 5;
4240 					}
4241 				} else {
4242 					if (crlf_state == 1) {
4243 						token_pos++;
4244 					}
4245 					if (state == 1 || state == 3) {
4246 						token_pos++;
4247 					}
4248 				}
4249 				crlf_state = 0;
4250 				break;
4251 
4252 			default:
4253 				switch (state) {
4254 					case 0:
4255 						token = (char*)ps;
4256 						token_pos = 0;
4257 						state = 1;
4258 						break;
4259 
4260 					case 2:
4261 						if (crlf_state != -1) {
4262 							token = (char*)ps;
4263 							token_pos = 0;
4264 
4265 							state = 3;
4266 							break;
4267 						}
4268 						/* break is missing intentionally */
4269 
4270 					case 3:
4271 						if (crlf_state == -1) {
4272 							if(token && token_pos > 0) {
4273 								fld_val = zend_string_init(token, token_pos, 0);
4274 							}
4275 
4276 							if (fld_name != NULL && fld_val != NULL) {
4277 								zval val;
4278 								/* FIXME: some locale free implementation is
4279 								 * really required here,,, */
4280 								php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
4281 								ZVAL_STR(&val, fld_val);
4282 
4283 								zend_hash_update(ht, fld_name, &val);
4284 
4285 								zend_string_release_ex(fld_name, 0);
4286 							}
4287 
4288 							fld_name = fld_val = NULL;
4289 							token = (char*)ps;
4290 							token_pos = 0;
4291 
4292 							state = 1;
4293 						}
4294 						break;
4295 
4296 					case 4:
4297 						token_pos++;
4298 						state = 3;
4299 						break;
4300 				}
4301 
4302 				if (crlf_state == 1) {
4303 					token_pos++;
4304 				}
4305 
4306 				token_pos++;
4307 
4308 				crlf_state = 0;
4309 				break;
4310 		}
4311 		ps++, icnt--;
4312 	}
4313 out:
4314 	if (state == 2) {
4315 		token = "";
4316 		token_pos = 0;
4317 
4318 		state = 3;
4319 	}
4320 	if (state == 3) {
4321 		if(token && token_pos > 0) {
4322 			fld_val = zend_string_init(token, token_pos, 0);
4323 		}
4324 		if (fld_name != NULL && fld_val != NULL) {
4325 			zval val;
4326 			/* FIXME: some locale free implementation is
4327 			 * really required here,,, */
4328 			php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
4329 			ZVAL_STR(&val, fld_val);
4330 
4331 			zend_hash_update(ht, fld_name, &val);
4332 
4333 			zend_string_release_ex(fld_name, 0);
4334 		}
4335 	}
4336 	return state;
4337 }
4338 
PHP_FUNCTION(mb_send_mail)4339 PHP_FUNCTION(mb_send_mail)
4340 {
4341 	char *to;
4342 	size_t to_len;
4343 	char *message;
4344 	size_t message_len;
4345 	char *subject;
4346 	size_t subject_len;
4347 	zval *headers = NULL;
4348 	zend_string *extra_cmd = NULL;
4349 	zend_string *str_headers = NULL, *tmp_headers;
4350 	size_t n, i;
4351 	char *to_r = NULL;
4352 	char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
4353 	struct {
4354 		int cnt_type:1;
4355 		int cnt_trans_enc:1;
4356 	} suppressed_hdrs = { 0, 0 };
4357 
4358 	char *message_buf = NULL, *subject_buf = NULL, *p;
4359 	mbfl_string orig_str, conv_str;
4360 	mbfl_string *pstr;	/* pointer to mbfl string for return value */
4361 	enum mbfl_no_encoding;
4362 	const mbfl_encoding *tran_cs,	/* transfar text charset */
4363 						*head_enc,	/* header transfar encoding */
4364 						*body_enc;	/* body transfar encoding */
4365 	mbfl_memory_device device;	/* automatic allocateable buffer for additional header */
4366 	const mbfl_language *lang;
4367 	int err = 0;
4368 	HashTable ht_headers;
4369 	zval *s;
4370 	extern void mbfl_memory_device_unput(mbfl_memory_device *device);
4371 	char *pp, *ee;
4372 
4373 	/* initialize */
4374 	mbfl_memory_device_init(&device, 0, 0);
4375 	mbfl_string_init(&orig_str);
4376 	mbfl_string_init(&conv_str);
4377 
4378 	/* character-set, transfer-encoding */
4379 	tran_cs = &mbfl_encoding_utf8;
4380 	head_enc = &mbfl_encoding_base64;
4381 	body_enc = &mbfl_encoding_base64;
4382 	lang = mbfl_no2language(MBSTRG(language));
4383 	if (lang != NULL) {
4384 		tran_cs = mbfl_no2encoding(lang->mail_charset);
4385 		head_enc = mbfl_no2encoding(lang->mail_header_encoding);
4386 		body_enc = mbfl_no2encoding(lang->mail_body_encoding);
4387 	}
4388 
4389 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|zS", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &extra_cmd) == FAILURE) {
4390 		return;
4391 	}
4392 
4393 	/* ASCIIZ check */
4394 	MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
4395 	MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
4396 	MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
4397 	if (headers) {
4398 		switch(Z_TYPE_P(headers)) {
4399 			case IS_STRING:
4400 				tmp_headers = zend_string_init(Z_STRVAL_P(headers), Z_STRLEN_P(headers), 0);
4401 				MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(tmp_headers), ZSTR_LEN(tmp_headers));
4402 				str_headers = php_trim(tmp_headers, NULL, 0, 2);
4403 				zend_string_release_ex(tmp_headers, 0);
4404 				break;
4405 			case IS_ARRAY:
4406 				str_headers = php_mail_build_headers(headers);
4407 				break;
4408 			default:
4409 				php_error_docref(NULL, E_WARNING, "headers parameter must be string or array");
4410 				RETURN_FALSE;
4411 		}
4412 	}
4413 	if (extra_cmd) {
4414 		MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(extra_cmd), ZSTR_LEN(extra_cmd));
4415 	}
4416 
4417 	zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0);
4418 
4419 	if (str_headers != NULL) {
4420 		_php_mbstr_parse_mail_headers(&ht_headers, ZSTR_VAL(str_headers), ZSTR_LEN(str_headers));
4421 	}
4422 
4423 	if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) {
4424 		char *tmp;
4425 		char *param_name;
4426 		char *charset = NULL;
4427 
4428 		ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
4429 		p = strchr(Z_STRVAL_P(s), ';');
4430 
4431 		if (p != NULL) {
4432 			/* skipping the padded spaces */
4433 			do {
4434 				++p;
4435 			} while (*p == ' ' || *p == '\t');
4436 
4437 			if (*p != '\0') {
4438 				if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
4439 					if (strcasecmp(param_name, "charset") == 0) {
4440 						const mbfl_encoding *_tran_cs = tran_cs;
4441 
4442 						charset = php_strtok_r(NULL, "= \"", &tmp);
4443 						if (charset != NULL) {
4444 							_tran_cs = mbfl_name2encoding(charset);
4445 						}
4446 
4447 						if (!_tran_cs) {
4448 							php_error_docref(NULL, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
4449 							_tran_cs = &mbfl_encoding_ascii;
4450 						}
4451 						tran_cs = _tran_cs;
4452 					}
4453 				}
4454 			}
4455 		}
4456 		suppressed_hdrs.cnt_type = 1;
4457 	}
4458 
4459 	if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1))) {
4460 		const mbfl_encoding *_body_enc;
4461 
4462 		ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
4463 		_body_enc = mbfl_name2encoding(Z_STRVAL_P(s));
4464 		switch (_body_enc ? _body_enc->no_encoding : mbfl_no_encoding_invalid) {
4465 			case mbfl_no_encoding_base64:
4466 			case mbfl_no_encoding_7bit:
4467 			case mbfl_no_encoding_8bit:
4468 				body_enc = _body_enc;
4469 				break;
4470 
4471 			default:
4472 				php_error_docref(NULL, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", Z_STRVAL_P(s));
4473 				body_enc =	&mbfl_encoding_8bit;
4474 				break;
4475 		}
4476 		suppressed_hdrs.cnt_trans_enc = 1;
4477 	}
4478 
4479 	/* To: */
4480 	if (to_len > 0) {
4481 		to_r = estrndup(to, to_len);
4482 		for (; to_len; to_len--) {
4483 			if (!isspace((unsigned char) to_r[to_len - 1])) {
4484 				break;
4485 			}
4486 			to_r[to_len - 1] = '\0';
4487 		}
4488 		for (i = 0; to_r[i]; i++) {
4489 		if (iscntrl((unsigned char) to_r[i])) {
4490 			/* According to RFC 822, section 3.1.1 long headers may be separated into
4491 			 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
4492 			 * To prevent these separators from being replaced with a space, we use the
4493 			 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
4494 			 */
4495 			SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
4496 			to_r[i] = ' ';
4497 		}
4498 		}
4499 	} else {
4500 		to_r = to;
4501 	}
4502 
4503 	/* Subject: */
4504 	orig_str.no_language = MBSTRG(language);
4505 	orig_str.val = (unsigned char *)subject;
4506 	orig_str.len = subject_len;
4507 	orig_str.encoding = MBSTRG(current_internal_encoding);
4508 	if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid
4509 			|| orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
4510 		orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4511 	}
4512 	pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4513 	if (pstr != NULL) {
4514 		subject_buf = subject = (char *)pstr->val;
4515 	}
4516 
4517 	/* message body */
4518 	orig_str.no_language = MBSTRG(language);
4519 	orig_str.val = (unsigned char *)message;
4520 	orig_str.len = message_len;
4521 	orig_str.encoding = MBSTRG(current_internal_encoding);
4522 
4523 	if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid
4524 			|| orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
4525 		orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4526 	}
4527 
4528 	pstr = NULL;
4529 	{
4530 		mbfl_string tmpstr;
4531 
4532 		if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4533 			tmpstr.encoding = &mbfl_encoding_8bit;
4534 			pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4535 			efree(tmpstr.val);
4536 		}
4537 	}
4538 	if (pstr != NULL) {
4539 		message_buf = message = (char *)pstr->val;
4540 	}
4541 
4542 	/* other headers */
4543 #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4544 #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4545 #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4546 #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4547 	if (str_headers != NULL) {
4548 		p = ZSTR_VAL(str_headers);
4549 		n = ZSTR_LEN(str_headers);
4550 		mbfl_memory_device_strncat(&device, p, n);
4551 		if (n > 0 && p[n - 1] != '\n') {
4552 			mbfl_memory_device_strncat(&device, "\n", 1);
4553 		}
4554 		zend_string_release_ex(str_headers, 0);
4555 	}
4556 
4557 	if (!zend_hash_str_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4558 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4559 		mbfl_memory_device_strncat(&device, "\n", 1);
4560 	}
4561 
4562 	if (!suppressed_hdrs.cnt_type) {
4563 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4564 
4565 		p = (char *)mbfl_no2preferred_mime_name(tran_cs->no_encoding);
4566 		if (p != NULL) {
4567 			mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4568 			mbfl_memory_device_strcat(&device, p);
4569 		}
4570 		mbfl_memory_device_strncat(&device, "\n", 1);
4571 	}
4572 	if (!suppressed_hdrs.cnt_trans_enc) {
4573 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4574 		p = (char *)mbfl_no2preferred_mime_name(body_enc->no_encoding);
4575 		if (p == NULL) {
4576 			p = "7bit";
4577 		}
4578 		mbfl_memory_device_strcat(&device, p);
4579 		mbfl_memory_device_strncat(&device, "\n", 1);
4580 	}
4581 
4582 	mbfl_memory_device_unput(&device);
4583 	mbfl_memory_device_output('\0', &device);
4584 	str_headers = zend_string_init((char *)device.buffer, strlen((char *)device.buffer), 0);
4585 
4586 	if (force_extra_parameters) {
4587 		extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4588 	} else if (extra_cmd) {
4589 		extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd));
4590 	}
4591 
4592 	if (!err && php_mail(to_r, subject, message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
4593 		RETVAL_TRUE;
4594 	} else {
4595 		RETVAL_FALSE;
4596 	}
4597 
4598 	if (extra_cmd) {
4599 		zend_string_release_ex(extra_cmd, 0);
4600 	}
4601 
4602 	if (to_r != to) {
4603 		efree(to_r);
4604 	}
4605 	if (subject_buf) {
4606 		efree((void *)subject_buf);
4607 	}
4608 	if (message_buf) {
4609 		efree((void *)message_buf);
4610 	}
4611 	mbfl_memory_device_clear(&device);
4612 	zend_hash_destroy(&ht_headers);
4613 	if (str_headers) {
4614 		zend_string_release_ex(str_headers, 0);
4615 	}
4616 }
4617 
4618 #undef SKIP_LONG_HEADER_SEP_MBSTRING
4619 #undef MAIL_ASCIIZ_CHECK_MBSTRING
4620 #undef PHP_MBSTR_MAIL_MIME_HEADER1
4621 #undef PHP_MBSTR_MAIL_MIME_HEADER2
4622 #undef PHP_MBSTR_MAIL_MIME_HEADER3
4623 #undef PHP_MBSTR_MAIL_MIME_HEADER4
4624 /* }}} */
4625 
4626 /* {{{ proto mixed mb_get_info([string type])
4627    Returns the current settings of mbstring */
PHP_FUNCTION(mb_get_info)4628 PHP_FUNCTION(mb_get_info)
4629 {
4630 	char *typ = NULL;
4631 	size_t typ_len;
4632 	size_t n;
4633 	char *name;
4634 	const struct mb_overload_def *over_func;
4635 	zval row1, row2;
4636 	const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4637 	const mbfl_encoding **entry;
4638 
4639 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
4640 		return;
4641 	}
4642 
4643 	if (!typ || !strcasecmp("all", typ)) {
4644 		array_init(return_value);
4645 		if (MBSTRG(current_internal_encoding)) {
4646 			add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name);
4647 		}
4648 		if (MBSTRG(http_input_identify)) {
4649 			add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name);
4650 		}
4651 		if (MBSTRG(current_http_output_encoding)) {
4652 			add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name);
4653 		}
4654 		if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
4655 			add_assoc_string(return_value, "http_output_conv_mimetypes", name);
4656 		}
4657 		add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4658 		if (MBSTRG(func_overload)){
4659 			over_func = &(mb_ovld[0]);
4660 			array_init(&row1);
4661 			while (over_func->type > 0) {
4662 				if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4663 					add_assoc_string(&row1, over_func->orig_func, over_func->ovld_func);
4664 				}
4665 				over_func++;
4666 			}
4667 			add_assoc_zval(return_value, "func_overload_list", &row1);
4668 		} else {
4669 			add_assoc_string(return_value, "func_overload_list", "no overload");
4670  		}
4671 		if (lang != NULL) {
4672 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4673 				add_assoc_string(return_value, "mail_charset", name);
4674 			}
4675 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4676 				add_assoc_string(return_value, "mail_header_encoding", name);
4677 			}
4678 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4679 				add_assoc_string(return_value, "mail_body_encoding", name);
4680 			}
4681 		}
4682 		add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4683 		if (MBSTRG(encoding_translation)) {
4684 			add_assoc_string(return_value, "encoding_translation", "On");
4685 		} else {
4686 			add_assoc_string(return_value, "encoding_translation", "Off");
4687 		}
4688 		if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4689 			add_assoc_string(return_value, "language", name);
4690 		}
4691 		n = MBSTRG(current_detect_order_list_size);
4692 		entry = MBSTRG(current_detect_order_list);
4693 		if (n > 0) {
4694 			size_t i;
4695 			array_init(&row2);
4696 			for (i = 0; i < n; i++) {
4697 				add_next_index_string(&row2, (*entry)->name);
4698 				entry++;
4699 			}
4700 			add_assoc_zval(return_value, "detect_order", &row2);
4701 		}
4702 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4703 			add_assoc_string(return_value, "substitute_character", "none");
4704 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4705 			add_assoc_string(return_value, "substitute_character", "long");
4706 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4707 			add_assoc_string(return_value, "substitute_character", "entity");
4708 		} else {
4709 			add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4710 		}
4711 		if (MBSTRG(strict_detection)) {
4712 			add_assoc_string(return_value, "strict_detection", "On");
4713 		} else {
4714 			add_assoc_string(return_value, "strict_detection", "Off");
4715 		}
4716 	} else if (!strcasecmp("internal_encoding", typ)) {
4717 		if (MBSTRG(current_internal_encoding)) {
4718 			RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name);
4719 		}
4720 	} else if (!strcasecmp("http_input", typ)) {
4721 		if (MBSTRG(http_input_identify)) {
4722 			RETVAL_STRING((char *)MBSTRG(http_input_identify)->name);
4723 		}
4724 	} else if (!strcasecmp("http_output", typ)) {
4725 		if (MBSTRG(current_http_output_encoding)) {
4726 			RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name);
4727 		}
4728 	} else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4729 		if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
4730 			RETVAL_STRING(name);
4731 		}
4732 	} else if (!strcasecmp("func_overload", typ)) {
4733  		RETVAL_LONG(MBSTRG(func_overload));
4734 	} else if (!strcasecmp("func_overload_list", typ)) {
4735 		if (MBSTRG(func_overload)){
4736 				over_func = &(mb_ovld[0]);
4737 				array_init(return_value);
4738 				while (over_func->type > 0) {
4739 					if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4740 						add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func);
4741 					}
4742 					over_func++;
4743 				}
4744 		} else {
4745 			RETVAL_STRING("no overload");
4746 		}
4747 	} else if (!strcasecmp("mail_charset", typ)) {
4748 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4749 			RETVAL_STRING(name);
4750 		}
4751 	} else if (!strcasecmp("mail_header_encoding", typ)) {
4752 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4753 			RETVAL_STRING(name);
4754 		}
4755 	} else if (!strcasecmp("mail_body_encoding", typ)) {
4756 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4757 			RETVAL_STRING(name);
4758 		}
4759 	} else if (!strcasecmp("illegal_chars", typ)) {
4760 		RETVAL_LONG(MBSTRG(illegalchars));
4761 	} else if (!strcasecmp("encoding_translation", typ)) {
4762 		if (MBSTRG(encoding_translation)) {
4763 			RETVAL_STRING("On");
4764 		} else {
4765 			RETVAL_STRING("Off");
4766 		}
4767 	} else if (!strcasecmp("language", typ)) {
4768 		if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4769 			RETVAL_STRING(name);
4770 		}
4771 	} else if (!strcasecmp("detect_order", typ)) {
4772 		n = MBSTRG(current_detect_order_list_size);
4773 		entry = MBSTRG(current_detect_order_list);
4774 		if (n > 0) {
4775 			size_t i;
4776 			array_init(return_value);
4777 			for (i = 0; i < n; i++) {
4778 				add_next_index_string(return_value, (*entry)->name);
4779 				entry++;
4780 			}
4781 		}
4782 	} else if (!strcasecmp("substitute_character", typ)) {
4783 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4784 			RETVAL_STRING("none");
4785 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4786 			RETVAL_STRING("long");
4787 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4788 			RETVAL_STRING("entity");
4789 		} else {
4790 			RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4791 		}
4792 	} else if (!strcasecmp("strict_detection", typ)) {
4793 		if (MBSTRG(strict_detection)) {
4794 			RETVAL_STRING("On");
4795 		} else {
4796 			RETVAL_STRING("Off");
4797 		}
4798 	} else {
4799 		RETURN_FALSE;
4800 	}
4801 }
4802 /* }}} */
4803 
4804 
php_mb_init_convd(const mbfl_encoding * encoding)4805 static inline mbfl_buffer_converter *php_mb_init_convd(const mbfl_encoding *encoding)
4806 {
4807 	mbfl_buffer_converter *convd;
4808 
4809 	convd = mbfl_buffer_converter_new(encoding, encoding, 0);
4810 	if (convd == NULL) {
4811 		return NULL;
4812 	}
4813 	mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4814 	mbfl_buffer_converter_illegal_substchar(convd, 0);
4815 	return convd;
4816 }
4817 
4818 
php_mb_check_encoding_impl(mbfl_buffer_converter * convd,const char * input,size_t length,const mbfl_encoding * encoding)4819 static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const char *input, size_t length, const mbfl_encoding *encoding) {
4820 	mbfl_string string, result, *ret = NULL;
4821 	size_t illegalchars = 0;
4822 
4823 	/* initialize string */
4824 	mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding);
4825 	mbfl_string_init(&result);
4826 
4827 	string.val = (unsigned char *) input;
4828 	string.len = length;
4829 
4830 	ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4831 	illegalchars = mbfl_buffer_illegalchars(convd);
4832 
4833 	if (ret != NULL) {
4834 		if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4835 			mbfl_string_clear(&result);
4836 			return 1;
4837 		}
4838 		mbfl_string_clear(&result);
4839 	}
4840 	return 0;
4841 }
4842 
4843 
php_mb_check_encoding(const char * input,size_t length,const char * enc)4844 MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc)
4845 {
4846 	const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4847 	mbfl_buffer_converter *convd;
4848 
4849 	if (input == NULL) {
4850 		return MBSTRG(illegalchars) == 0;
4851 	}
4852 
4853 	if (enc != NULL) {
4854 		encoding = mbfl_name2encoding(enc);
4855 		if (!encoding || encoding == &mbfl_encoding_pass) {
4856 			php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", enc);
4857 			return 0;
4858 		}
4859 	}
4860 
4861 	convd = php_mb_init_convd(encoding);
4862 	if (convd == NULL) {
4863 		php_error_docref(NULL, E_WARNING, "Unable to create converter");
4864 		return 0;
4865 	}
4866 
4867 	if (php_mb_check_encoding_impl(convd, input, length, encoding)) {
4868 		mbfl_buffer_converter_delete(convd);
4869 		return 1;
4870 	}
4871 	mbfl_buffer_converter_delete(convd);
4872 	return 0;
4873 }
4874 
4875 
php_mb_check_encoding_recursive(HashTable * vars,const zend_string * enc)4876 MBSTRING_API int php_mb_check_encoding_recursive(HashTable *vars, const zend_string *enc)
4877 {
4878 	const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4879 	mbfl_buffer_converter *convd;
4880 	zend_long idx;
4881 	zend_string *key;
4882 	zval *entry;
4883 	int valid = 1;
4884 
4885 	(void)(idx);
4886 
4887 	if (enc != NULL) {
4888 		encoding = mbfl_name2encoding(ZSTR_VAL(enc));
4889 		if (!encoding || encoding == &mbfl_encoding_pass) {
4890 			php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", ZSTR_VAL(enc));
4891 			return 0;
4892 		}
4893 	}
4894 
4895 	convd = php_mb_init_convd(encoding);
4896 	if (convd == NULL) {
4897 		php_error_docref(NULL, E_WARNING, "Unable to create converter");
4898 		return 0;
4899 	}
4900 
4901 	if (GC_IS_RECURSIVE(vars)) {
4902 		mbfl_buffer_converter_delete(convd);
4903 		php_error_docref(NULL, E_WARNING, "Cannot not handle circular references");
4904 		return 0;
4905 	}
4906 	GC_TRY_PROTECT_RECURSION(vars);
4907 	ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) {
4908 		ZVAL_DEREF(entry);
4909 		if (key) {
4910 			if (!php_mb_check_encoding_impl(convd, ZSTR_VAL(key), ZSTR_LEN(key), encoding)) {
4911 				valid = 0;
4912 				break;
4913 			}
4914 		}
4915 		switch (Z_TYPE_P(entry)) {
4916 			case IS_STRING:
4917 				if (!php_mb_check_encoding_impl(convd, Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) {
4918 					valid = 0;
4919 					break;
4920 				}
4921 				break;
4922 			case IS_ARRAY:
4923 				if (!php_mb_check_encoding_recursive(Z_ARRVAL_P(entry), enc)) {
4924 					valid = 0;
4925 					break;
4926 				}
4927 				break;
4928 			case IS_LONG:
4929 			case IS_DOUBLE:
4930 			case IS_NULL:
4931 			case IS_TRUE:
4932 			case IS_FALSE:
4933 				break;
4934 			default:
4935 				/* Other types are error. */
4936 				valid = 0;
4937 				break;
4938 		}
4939 	} ZEND_HASH_FOREACH_END();
4940 	GC_TRY_UNPROTECT_RECURSION(vars);
4941 	mbfl_buffer_converter_delete(convd);
4942 	return valid;
4943 }
4944 
4945 
4946 /* {{{ proto bool mb_check_encoding([mixed var[, string encoding]])
4947    Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)4948 PHP_FUNCTION(mb_check_encoding)
4949 {
4950 	zval *input = NULL;
4951 	zend_string *enc = NULL;
4952 
4953 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|zS", &input, &enc) == FAILURE) {
4954 		return;
4955 	}
4956 
4957 	/* FIXME: Actually check all inputs, except $_FILES file content. */
4958 	if (input == NULL) {
4959 		if (MBSTRG(illegalchars) == 0) {
4960 			RETURN_TRUE;
4961 		}
4962 		RETURN_FALSE;
4963 	}
4964 
4965 	if (Z_TYPE_P(input) == IS_ARRAY) {
4966 		if (!php_mb_check_encoding_recursive(HASH_OF(input), enc)) {
4967 			RETURN_FALSE;
4968 		}
4969 	} else {
4970 		if (!try_convert_to_string(input)) {
4971 			RETURN_FALSE;
4972 		}
4973 		if (!php_mb_check_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), enc ? ZSTR_VAL(enc): NULL)) {
4974 			RETURN_FALSE;
4975 		}
4976 	}
4977 	RETURN_TRUE;
4978 }
4979 /* }}} */
4980 
4981 
php_mb_ord(const char * str,size_t str_len,zend_string * enc_name)4982 static inline zend_long php_mb_ord(const char *str, size_t str_len, zend_string *enc_name)
4983 {
4984 	const mbfl_encoding *enc;
4985 	enum mbfl_no_encoding no_enc;
4986 
4987 	enc = php_mb_get_encoding(enc_name);
4988 	if (!enc) {
4989 		return -1;
4990 	}
4991 
4992 	no_enc = enc->no_encoding;
4993 	if (php_mb_is_unsupported_no_encoding(no_enc)) {
4994 		php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc->name);
4995 		return -1;
4996 	}
4997 
4998 	if (str_len == 0) {
4999 		php_error_docref(NULL, E_WARNING, "Empty string");
5000 		return -1;
5001 	}
5002 
5003 	{
5004 		mbfl_wchar_device dev;
5005 		mbfl_convert_filter *filter;
5006 		zend_long cp;
5007 
5008 		mbfl_wchar_device_init(&dev);
5009 		filter = mbfl_convert_filter_new(
5010 			enc, &mbfl_encoding_wchar,
5011 			mbfl_wchar_device_output, 0, &dev);
5012 		if (!filter) {
5013 			php_error_docref(NULL, E_WARNING, "Creation of filter failed");
5014 			return -1;
5015 		}
5016 
5017 		mbfl_convert_filter_feed_string(filter, (const unsigned char *) str, str_len);
5018 		mbfl_convert_filter_flush(filter);
5019 
5020 		if (dev.pos < 1 || filter->num_illegalchar || dev.buffer[0] >= MBFL_WCSGROUP_UCS4MAX) {
5021 			mbfl_convert_filter_delete(filter);
5022 			mbfl_wchar_device_clear(&dev);
5023 			return -1;
5024 		}
5025 
5026 		cp = dev.buffer[0];
5027 		mbfl_convert_filter_delete(filter);
5028 		mbfl_wchar_device_clear(&dev);
5029 		return cp;
5030 	}
5031 }
5032 
5033 
5034 /* {{{ proto int|false mb_ord([string str[, string encoding]]) */
PHP_FUNCTION(mb_ord)5035 PHP_FUNCTION(mb_ord)
5036 {
5037 	char *str;
5038 	size_t str_len;
5039 	zend_string *enc = NULL;
5040 	zend_long cp;
5041 
5042 	ZEND_PARSE_PARAMETERS_START(1, 2)
5043 		Z_PARAM_STRING(str, str_len)
5044 		Z_PARAM_OPTIONAL
5045 		Z_PARAM_STR(enc)
5046 	ZEND_PARSE_PARAMETERS_END();
5047 
5048 	cp = php_mb_ord(str, str_len, enc);
5049 
5050 	if (0 > cp) {
5051 		RETURN_FALSE;
5052 	}
5053 
5054 	RETURN_LONG(cp);
5055 }
5056 /* }}} */
5057 
5058 
php_mb_chr(zend_long cp,zend_string * enc_name)5059 static inline zend_string *php_mb_chr(zend_long cp, zend_string *enc_name)
5060 {
5061 	const mbfl_encoding *enc;
5062 	enum mbfl_no_encoding no_enc;
5063 	zend_string *ret;
5064 	char* buf;
5065 	size_t buf_len;
5066 
5067 	enc = php_mb_get_encoding(enc_name);
5068 	if (!enc) {
5069 		return NULL;
5070 	}
5071 
5072 	no_enc = enc->no_encoding;
5073 	if (php_mb_is_unsupported_no_encoding(no_enc)) {
5074 		php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc->name);
5075 		return NULL;
5076 	}
5077 
5078 	if (cp < 0 || cp > 0x10ffff) {
5079 		return NULL;
5080 	}
5081 
5082 	if (php_mb_is_no_encoding_utf8(no_enc)) {
5083 		if (cp > 0xd7ff && 0xe000 > cp) {
5084 			return NULL;
5085 		}
5086 
5087 		if (cp < 0x80) {
5088 			ret = ZSTR_CHAR(cp);
5089 		} else if (cp < 0x800) {
5090 			ret = zend_string_alloc(2, 0);
5091 			ZSTR_VAL(ret)[0] = 0xc0 | (cp >> 6);
5092 			ZSTR_VAL(ret)[1] = 0x80 | (cp & 0x3f);
5093 			ZSTR_VAL(ret)[2] = 0;
5094 		} else if (cp < 0x10000) {
5095 			ret = zend_string_alloc(3, 0);
5096 			ZSTR_VAL(ret)[0] = 0xe0 | (cp >> 12);
5097 			ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 6) & 0x3f);
5098 			ZSTR_VAL(ret)[2] = 0x80 | (cp & 0x3f);
5099 			ZSTR_VAL(ret)[3] = 0;
5100 		} else {
5101 			ret = zend_string_alloc(4, 0);
5102 			ZSTR_VAL(ret)[0] = 0xf0 | (cp >> 18);
5103 			ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 12) & 0x3f);
5104 			ZSTR_VAL(ret)[2] = 0x80 | ((cp >> 6) & 0x3f);
5105 			ZSTR_VAL(ret)[3] = 0x80 | (cp & 0x3f);
5106 			ZSTR_VAL(ret)[4] = 0;
5107 		}
5108 
5109 		return ret;
5110 	}
5111 
5112 	buf_len = 4;
5113 	buf = (char *) emalloc(buf_len + 1);
5114 	buf[0] = (cp >> 24) & 0xff;
5115 	buf[1] = (cp >> 16) & 0xff;
5116 	buf[2] = (cp >>  8) & 0xff;
5117 	buf[3] = cp & 0xff;
5118 	buf[4] = 0;
5119 
5120 	{
5121 		char *ret_str;
5122 		size_t ret_len;
5123 		long orig_illegalchars = MBSTRG(illegalchars);
5124 		MBSTRG(illegalchars) = 0;
5125 		ret_str = php_mb_convert_encoding_ex(buf, buf_len, enc, &mbfl_encoding_ucs4be, &ret_len);
5126 		if (MBSTRG(illegalchars) != 0) {
5127 			efree(buf);
5128 			efree(ret_str);
5129 			MBSTRG(illegalchars) = orig_illegalchars;
5130 			return NULL;
5131 		}
5132 
5133 		ret = zend_string_init(ret_str, ret_len, 0);
5134 		efree(ret_str);
5135 		MBSTRG(illegalchars) = orig_illegalchars;
5136 	}
5137 
5138 	efree(buf);
5139 	return ret;
5140 }
5141 
5142 
5143 /* {{{ proto string|false mb_chr([int cp[, string encoding]]) */
PHP_FUNCTION(mb_chr)5144 PHP_FUNCTION(mb_chr)
5145 {
5146 	zend_long cp;
5147 	zend_string *enc = NULL;
5148 	zend_string* ret;
5149 
5150 	ZEND_PARSE_PARAMETERS_START(1, 2)
5151 		Z_PARAM_LONG(cp)
5152 		Z_PARAM_OPTIONAL
5153 		Z_PARAM_STR(enc)
5154 	ZEND_PARSE_PARAMETERS_END();
5155 
5156 	ret = php_mb_chr(cp, enc);
5157 	if (ret == NULL) {
5158 		RETURN_FALSE;
5159 	}
5160 
5161 	RETURN_STR(ret);
5162 }
5163 /* }}} */
5164 
5165 
php_mb_scrub(const char * str,size_t str_len,const mbfl_encoding * enc,size_t * ret_len)5166 static inline char* php_mb_scrub(const char* str, size_t str_len, const mbfl_encoding *enc, size_t *ret_len)
5167 {
5168 	return php_mb_convert_encoding_ex(str, str_len, enc, enc, ret_len);
5169 }
5170 
5171 
5172 /* {{{ proto string|false mb_scrub([string str[, string encoding]]) */
PHP_FUNCTION(mb_scrub)5173 PHP_FUNCTION(mb_scrub)
5174 {
5175 	const mbfl_encoding *enc;
5176 	char* str;
5177 	size_t str_len;
5178 	zend_string *enc_name = NULL;
5179 	char *ret;
5180 	size_t ret_len;
5181 
5182 	ZEND_PARSE_PARAMETERS_START(1, 2)
5183 		Z_PARAM_STRING(str, str_len)
5184 		Z_PARAM_OPTIONAL
5185 		Z_PARAM_STR(enc_name)
5186 	ZEND_PARSE_PARAMETERS_END();
5187 
5188 	enc = php_mb_get_encoding(enc_name);
5189 	if (!enc) {
5190 		RETURN_FALSE;
5191 	}
5192 
5193 	ret = php_mb_scrub(str, str_len, enc, &ret_len);
5194 
5195 	if (ret == NULL) {
5196 		RETURN_FALSE;
5197 	}
5198 
5199 	RETVAL_STRINGL(ret, ret_len);
5200 	efree(ret);
5201 }
5202 /* }}} */
5203 
5204 
5205 /* {{{ php_mb_populate_current_detect_order_list */
php_mb_populate_current_detect_order_list(void)5206 static void php_mb_populate_current_detect_order_list(void)
5207 {
5208 	const mbfl_encoding **entry = 0;
5209 	size_t nentries;
5210 
5211 	if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
5212 		nentries = MBSTRG(detect_order_list_size);
5213 		entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
5214 		memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
5215 	} else {
5216 		const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
5217 		size_t i;
5218 		nentries = MBSTRG(default_detect_order_list_size);
5219 		entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
5220 		for (i = 0; i < nentries; i++) {
5221 			entry[i] = mbfl_no2encoding(src[i]);
5222 		}
5223 	}
5224 	MBSTRG(current_detect_order_list) = entry;
5225 	MBSTRG(current_detect_order_list_size) = nentries;
5226 }
5227 /* }}} */
5228 
5229 /* {{{ static int php_mb_encoding_translation() */
php_mb_encoding_translation(void)5230 static int php_mb_encoding_translation(void)
5231 {
5232 	return MBSTRG(encoding_translation);
5233 }
5234 /* }}} */
5235 
5236 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
php_mb_mbchar_bytes_ex(const char * s,const mbfl_encoding * enc)5237 MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
5238 {
5239 	if (enc != NULL) {
5240 		if (enc->flag & MBFL_ENCTYPE_MBCS) {
5241 			if (enc->mblen_table != NULL) {
5242 				if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
5243 			}
5244 		} else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
5245 			return 2;
5246 		} else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
5247 			return 4;
5248 		}
5249 	}
5250 	return 1;
5251 }
5252 /* }}} */
5253 
5254 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
php_mb_mbchar_bytes(const char * s)5255 MBSTRING_API size_t php_mb_mbchar_bytes(const char *s)
5256 {
5257 	return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
5258 }
5259 /* }}} */
5260 
5261 /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
php_mb_safe_strrchr_ex(const char * s,unsigned int c,size_t nbytes,const mbfl_encoding * enc)5262 MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
5263 {
5264 	register const char *p = s;
5265 	char *last=NULL;
5266 
5267 	if (nbytes == (size_t)-1) {
5268 		size_t nb = 0;
5269 
5270 		while (*p != '\0') {
5271 			if (nb == 0) {
5272 				if ((unsigned char)*p == (unsigned char)c) {
5273 					last = (char *)p;
5274 				}
5275 				nb = php_mb_mbchar_bytes_ex(p, enc);
5276 				if (nb == 0) {
5277 					return NULL; /* something is going wrong! */
5278 				}
5279 			}
5280 			--nb;
5281 			++p;
5282 		}
5283 	} else {
5284 		register size_t bcnt = nbytes;
5285 		register size_t nbytes_char;
5286 		while (bcnt > 0) {
5287 			if ((unsigned char)*p == (unsigned char)c) {
5288 				last = (char *)p;
5289 			}
5290 			nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
5291 			if (bcnt < nbytes_char) {
5292 				return NULL;
5293 			}
5294 			p += nbytes_char;
5295 			bcnt -= nbytes_char;
5296 		}
5297 	}
5298 	return last;
5299 }
5300 /* }}} */
5301 
5302 /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
php_mb_safe_strrchr(const char * s,unsigned int c,size_t nbytes)5303 MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes)
5304 {
5305 	return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
5306 }
5307 /* }}} */
5308 
5309 /* {{{ MBSTRING_API int php_mb_stripos()
5310  */
php_mb_stripos(int mode,const char * old_haystack,size_t old_haystack_len,const char * old_needle,size_t old_needle_len,zend_long offset,zend_string * from_encoding)5311 MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t old_haystack_len, const char *old_needle, size_t old_needle_len, zend_long offset, zend_string *from_encoding)
5312 {
5313 	size_t n = (size_t) -1;
5314 	mbfl_string haystack, needle;
5315 	const mbfl_encoding *enc;
5316 
5317 	enc = php_mb_get_encoding(from_encoding);
5318 	if (!enc) {
5319 		return (size_t) -1;
5320 	}
5321 
5322 	mbfl_string_init(&haystack);
5323 	mbfl_string_init(&needle);
5324 	haystack.no_language = MBSTRG(language);
5325 	haystack.encoding = enc;
5326 	needle.no_language = MBSTRG(language);
5327 	needle.encoding = enc;
5328 
5329 	do {
5330 		/* We're using simple case-folding here, because we'd have to deal with remapping of
5331 		 * offsets otherwise. */
5332 
5333 		size_t len = 0;
5334 		haystack.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_haystack, old_haystack_len, &len, enc);
5335 		haystack.len = len;
5336 
5337 		if (!haystack.val) {
5338 			break;
5339 		}
5340 
5341 		if (haystack.len == 0) {
5342 			break;
5343 		}
5344 
5345 		needle.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_needle, old_needle_len, &len, enc);
5346 		needle.len = len;
5347 
5348 		if (!needle.val) {
5349 			break;
5350 		}
5351 
5352 		if (needle.len == 0) {
5353 			break;
5354 		}
5355 
5356  		if (offset != 0) {
5357  			size_t haystack_char_len = mbfl_strlen(&haystack);
5358 
5359  			if (mode) {
5360 				if ((offset > 0 && (size_t)offset > haystack_char_len) ||
5361 					(offset < 0 && (size_t)(-offset) > haystack_char_len)) {
5362  					php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
5363  					break;
5364  				}
5365  			} else {
5366 				if (offset < 0) {
5367 					offset += (zend_long)haystack_char_len;
5368 				}
5369 				if (offset < 0 || (size_t)offset > haystack_char_len) {
5370  					php_error_docref(NULL, E_WARNING, "Offset not contained in string");
5371  					break;
5372  				}
5373  			}
5374 		}
5375 
5376 		n = mbfl_strpos(&haystack, &needle, offset, mode);
5377 	} while(0);
5378 
5379 	if (haystack.val) {
5380 		efree(haystack.val);
5381 	}
5382 
5383 	if (needle.val) {
5384 		efree(needle.val);
5385 	}
5386 
5387 	return n;
5388 }
5389 /* }}} */
5390 
php_mb_gpc_get_detect_order(const zend_encoding *** list,size_t * list_size)5391 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size) /* {{{ */
5392 {
5393 	*list = (const zend_encoding **)MBSTRG(http_input_list);
5394 	*list_size = MBSTRG(http_input_list_size);
5395 }
5396 /* }}} */
5397 
php_mb_gpc_set_input_encoding(const zend_encoding * encoding)5398 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding) /* {{{ */
5399 {
5400 	MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
5401 }
5402 /* }}} */
5403 
5404 #endif	/* HAVE_MBSTRING */
5405