xref: /PHP-7.2/ext/mbstring/mbstring.c (revision 45db6fa5)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1997-2018 The PHP Group                                |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
16    |         Rui Hirokawa <hirokawa@php.net>                              |
17    +----------------------------------------------------------------------+
18  */
19 
20 /* $Id$ */
21 
22 /*
23  * PHP 4 Multibyte String module "mbstring"
24  *
25  * History:
26  *   2000.5.19  Release php-4.0RC2_jstring-1.0
27  *   2001.4.1   Release php4_jstring-1.0.91
28  *   2001.4.30  Release php4_jstring-1.1 (contribute to The PHP Group)
29  *   2001.5.1   Renamed from jstring to mbstring (hirokawa@php.net)
30  */
31 
32 /*
33  * PHP3 Internationalization support program.
34  *
35  * Copyright (c) 1999,2000 by the PHP3 internationalization team.
36  * All rights reserved.
37  *
38  * See README_PHP3-i18n-ja for more detail.
39  *
40  * Authors:
41  *    Hironori Sato <satoh@jpnnet.com>
42  *    Shigeru Kanemoto <sgk@happysize.co.jp>
43  *    Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
44  *    Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
45  */
46 
47 /* {{{ includes */
48 #ifdef HAVE_CONFIG_H
49 #include "config.h"
50 #endif
51 
52 #include "php.h"
53 #include "php_ini.h"
54 #include "php_variables.h"
55 #include "mbstring.h"
56 #include "ext/standard/php_string.h"
57 #include "ext/standard/php_mail.h"
58 #include "ext/standard/exec.h"
59 #include "ext/standard/url.h"
60 #include "main/php_output.h"
61 #include "ext/standard/info.h"
62 
63 #include "libmbfl/mbfl/mbfl_allocators.h"
64 #include "libmbfl/mbfl/mbfilter_pass.h"
65 
66 #include "php_variables.h"
67 #include "php_globals.h"
68 #include "rfc1867.h"
69 #include "php_content_types.h"
70 #include "SAPI.h"
71 #include "php_unicode.h"
72 #include "TSRM.h"
73 
74 #include "mb_gpc.h"
75 
76 #if HAVE_MBREGEX
77 #include "php_mbregex.h"
78 #endif
79 
80 #include "zend_multibyte.h"
81 
82 #if HAVE_ONIG
83 #include "php_onig_compat.h"
84 #include <oniguruma.h>
85 #undef UChar
86 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
87 #include "ext/pcre/php_pcre.h"
88 #endif
89 /* }}} */
90 
91 #if HAVE_MBSTRING
92 
93 /* {{{ prototypes */
94 ZEND_DECLARE_MODULE_GLOBALS(mbstring)
95 
96 static PHP_GINIT_FUNCTION(mbstring);
97 static PHP_GSHUTDOWN_FUNCTION(mbstring);
98 
99 static void php_mb_populate_current_detect_order_list(void);
100 
101 static int php_mb_encoding_translation(void);
102 
103 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size);
104 
105 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
106 
107 static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
108 
109 static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
110 /* }}} */
111 
112 /* {{{ php_mb_default_identify_list */
113 typedef struct _php_mb_nls_ident_list {
114 	enum mbfl_no_language lang;
115 	const enum mbfl_no_encoding *list;
116 	size_t list_size;
117 } php_mb_nls_ident_list;
118 
119 static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
120 	mbfl_no_encoding_ascii,
121 	mbfl_no_encoding_jis,
122 	mbfl_no_encoding_utf8,
123 	mbfl_no_encoding_euc_jp,
124 	mbfl_no_encoding_sjis
125 };
126 
127 static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
128 	mbfl_no_encoding_ascii,
129 	mbfl_no_encoding_utf8,
130 	mbfl_no_encoding_euc_cn,
131 	mbfl_no_encoding_cp936
132 };
133 
134 static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
135 	mbfl_no_encoding_ascii,
136 	mbfl_no_encoding_utf8,
137 	mbfl_no_encoding_euc_tw,
138 	mbfl_no_encoding_big5
139 };
140 
141 static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
142 	mbfl_no_encoding_ascii,
143 	mbfl_no_encoding_utf8,
144 	mbfl_no_encoding_euc_kr,
145 	mbfl_no_encoding_uhc
146 };
147 
148 static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
149 	mbfl_no_encoding_ascii,
150 	mbfl_no_encoding_utf8,
151 	mbfl_no_encoding_koi8r,
152 	mbfl_no_encoding_cp1251,
153 	mbfl_no_encoding_cp866
154 };
155 
156 static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
157 	mbfl_no_encoding_ascii,
158 	mbfl_no_encoding_utf8,
159 	mbfl_no_encoding_armscii8
160 };
161 
162 static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
163 	mbfl_no_encoding_ascii,
164 	mbfl_no_encoding_utf8,
165 	mbfl_no_encoding_cp1254,
166 	mbfl_no_encoding_8859_9
167 };
168 
169 static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
170 	mbfl_no_encoding_ascii,
171 	mbfl_no_encoding_utf8,
172 	mbfl_no_encoding_koi8u
173 };
174 
175 static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
176 	mbfl_no_encoding_ascii,
177 	mbfl_no_encoding_utf8
178 };
179 
180 
181 static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
182 	{ mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
183 	{ mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
184 	{ mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
185 	{ mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
186 	{ mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
187 	{ mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
188 	{ mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
189 	{ mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
190 	{ mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
191 };
192 
193 /* }}} */
194 
195 /* {{{ mb_overload_def mb_ovld[] */
196 static const struct mb_overload_def mb_ovld[] = {
197 	{MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
198 	{MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
199 	{MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
200 	{MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
201 	{MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
202 	{MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
203 	{MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
204 	{MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
205 	{MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
206 	{MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
207 	{MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
208 	{MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
209 	{MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
210 	{0, NULL, NULL, NULL}
211 };
212 /* }}} */
213 
214 /* {{{ arginfo */
215 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
216 	ZEND_ARG_INFO(0, language)
217 ZEND_END_ARG_INFO()
218 
219 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
220 	ZEND_ARG_INFO(0, encoding)
221 ZEND_END_ARG_INFO()
222 
223 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
224 	ZEND_ARG_INFO(0, type)
225 ZEND_END_ARG_INFO()
226 
227 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
228 	ZEND_ARG_INFO(0, encoding)
229 ZEND_END_ARG_INFO()
230 
231 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
232 	ZEND_ARG_INFO(0, encoding)
233 ZEND_END_ARG_INFO()
234 
235 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
236 	ZEND_ARG_INFO(0, substchar)
237 ZEND_END_ARG_INFO()
238 
239 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
240 	ZEND_ARG_INFO(0, encoding)
241 ZEND_END_ARG_INFO()
242 
243 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
244 	ZEND_ARG_INFO(0, encoded_string)
245 	ZEND_ARG_INFO(1, result)
246 ZEND_END_ARG_INFO()
247 
248 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
249 	ZEND_ARG_INFO(0, contents)
250 	ZEND_ARG_INFO(0, status)
251 ZEND_END_ARG_INFO()
252 
253 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
254 	ZEND_ARG_INFO(0, str)
255 	ZEND_ARG_INFO(0, encoding)
256 ZEND_END_ARG_INFO()
257 
258 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
259 	ZEND_ARG_INFO(0, haystack)
260 	ZEND_ARG_INFO(0, needle)
261 	ZEND_ARG_INFO(0, offset)
262 	ZEND_ARG_INFO(0, encoding)
263 ZEND_END_ARG_INFO()
264 
265 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
266 	ZEND_ARG_INFO(0, haystack)
267 	ZEND_ARG_INFO(0, needle)
268 	ZEND_ARG_INFO(0, offset)
269 	ZEND_ARG_INFO(0, encoding)
270 ZEND_END_ARG_INFO()
271 
272 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
273 	ZEND_ARG_INFO(0, haystack)
274 	ZEND_ARG_INFO(0, needle)
275 	ZEND_ARG_INFO(0, offset)
276 	ZEND_ARG_INFO(0, encoding)
277 ZEND_END_ARG_INFO()
278 
279 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
280 	ZEND_ARG_INFO(0, haystack)
281 	ZEND_ARG_INFO(0, needle)
282 	ZEND_ARG_INFO(0, offset)
283 	ZEND_ARG_INFO(0, encoding)
284 ZEND_END_ARG_INFO()
285 
286 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
287 	ZEND_ARG_INFO(0, haystack)
288 	ZEND_ARG_INFO(0, needle)
289 	ZEND_ARG_INFO(0, part)
290 	ZEND_ARG_INFO(0, encoding)
291 ZEND_END_ARG_INFO()
292 
293 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
294 	ZEND_ARG_INFO(0, haystack)
295 	ZEND_ARG_INFO(0, needle)
296 	ZEND_ARG_INFO(0, part)
297 	ZEND_ARG_INFO(0, encoding)
298 ZEND_END_ARG_INFO()
299 
300 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
301 	ZEND_ARG_INFO(0, haystack)
302 	ZEND_ARG_INFO(0, needle)
303 	ZEND_ARG_INFO(0, part)
304 	ZEND_ARG_INFO(0, encoding)
305 ZEND_END_ARG_INFO()
306 
307 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
308 	ZEND_ARG_INFO(0, haystack)
309 	ZEND_ARG_INFO(0, needle)
310 	ZEND_ARG_INFO(0, part)
311 	ZEND_ARG_INFO(0, encoding)
312 ZEND_END_ARG_INFO()
313 
314 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
315 	ZEND_ARG_INFO(0, haystack)
316 	ZEND_ARG_INFO(0, needle)
317 	ZEND_ARG_INFO(0, encoding)
318 ZEND_END_ARG_INFO()
319 
320 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
321 	ZEND_ARG_INFO(0, str)
322 	ZEND_ARG_INFO(0, start)
323 	ZEND_ARG_INFO(0, length)
324 	ZEND_ARG_INFO(0, encoding)
325 ZEND_END_ARG_INFO()
326 
327 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
328 	ZEND_ARG_INFO(0, str)
329 	ZEND_ARG_INFO(0, start)
330 	ZEND_ARG_INFO(0, length)
331 	ZEND_ARG_INFO(0, encoding)
332 ZEND_END_ARG_INFO()
333 
334 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
335 	ZEND_ARG_INFO(0, str)
336 	ZEND_ARG_INFO(0, encoding)
337 ZEND_END_ARG_INFO()
338 
339 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
340 	ZEND_ARG_INFO(0, str)
341 	ZEND_ARG_INFO(0, start)
342 	ZEND_ARG_INFO(0, width)
343 	ZEND_ARG_INFO(0, trimmarker)
344 	ZEND_ARG_INFO(0, encoding)
345 ZEND_END_ARG_INFO()
346 
347 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
348 	ZEND_ARG_INFO(0, str)
349 	ZEND_ARG_INFO(0, to)
350 	ZEND_ARG_INFO(0, from)
351 ZEND_END_ARG_INFO()
352 
353 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
354 	ZEND_ARG_INFO(0, sourcestring)
355 	ZEND_ARG_INFO(0, mode)
356 	ZEND_ARG_INFO(0, encoding)
357 ZEND_END_ARG_INFO()
358 
359 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
360 	ZEND_ARG_INFO(0, sourcestring)
361 	ZEND_ARG_INFO(0, encoding)
362 ZEND_END_ARG_INFO()
363 
364 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
365 	ZEND_ARG_INFO(0, sourcestring)
366 	ZEND_ARG_INFO(0, encoding)
367 ZEND_END_ARG_INFO()
368 
369 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
370 	ZEND_ARG_INFO(0, str)
371 	ZEND_ARG_INFO(0, encoding_list)
372 	ZEND_ARG_INFO(0, strict)
373 ZEND_END_ARG_INFO()
374 
375 ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
376 ZEND_END_ARG_INFO()
377 
378 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
379 	ZEND_ARG_INFO(0, encoding)
380 ZEND_END_ARG_INFO()
381 
382 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
383 	ZEND_ARG_INFO(0, str)
384 	ZEND_ARG_INFO(0, charset)
385 	ZEND_ARG_INFO(0, transfer)
386 	ZEND_ARG_INFO(0, linefeed)
387 	ZEND_ARG_INFO(0, indent)
388 ZEND_END_ARG_INFO()
389 
390 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
391 	ZEND_ARG_INFO(0, string)
392 ZEND_END_ARG_INFO()
393 
394 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
395 	ZEND_ARG_INFO(0, str)
396 	ZEND_ARG_INFO(0, option)
397 	ZEND_ARG_INFO(0, encoding)
398 ZEND_END_ARG_INFO()
399 
400 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 0, 0, 3)
401 	ZEND_ARG_INFO(0, to)
402 	ZEND_ARG_INFO(0, from)
403 	ZEND_ARG_VARIADIC_INFO(1, vars)
404 ZEND_END_ARG_INFO()
405 
406 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
407 	ZEND_ARG_INFO(0, string)
408 	ZEND_ARG_INFO(0, convmap)
409 	ZEND_ARG_INFO(0, encoding)
410 	ZEND_ARG_INFO(0, is_hex)
411 ZEND_END_ARG_INFO()
412 
413 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
414 	ZEND_ARG_INFO(0, string)
415 	ZEND_ARG_INFO(0, convmap)
416 	ZEND_ARG_INFO(0, encoding)
417 	ZEND_ARG_INFO(0, is_hex)
418 ZEND_END_ARG_INFO()
419 
420 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
421 	ZEND_ARG_INFO(0, to)
422 	ZEND_ARG_INFO(0, subject)
423 	ZEND_ARG_INFO(0, message)
424 	ZEND_ARG_INFO(0, additional_headers)
425 	ZEND_ARG_INFO(0, additional_parameters)
426 ZEND_END_ARG_INFO()
427 
428 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
429 	ZEND_ARG_INFO(0, type)
430 ZEND_END_ARG_INFO()
431 
432 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
433 	ZEND_ARG_INFO(0, var)
434 	ZEND_ARG_INFO(0, encoding)
435 ZEND_END_ARG_INFO()
436 
437 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_scrub, 0, 0, 1)
438 	ZEND_ARG_INFO(0, str)
439 	ZEND_ARG_INFO(0, encoding)
440 ZEND_END_ARG_INFO()
441 
442 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ord, 0, 0, 1)
443 	ZEND_ARG_INFO(0, str)
444 	ZEND_ARG_INFO(0, encoding)
445 ZEND_END_ARG_INFO()
446 
447 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_chr, 0, 0, 1)
448 	ZEND_ARG_INFO(0, cp)
449 	ZEND_ARG_INFO(0, encoding)
450 ZEND_END_ARG_INFO()
451 
452 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
453 	ZEND_ARG_INFO(0, encoding)
454 ZEND_END_ARG_INFO()
455 
456 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
457 	ZEND_ARG_INFO(0, pattern)
458 	ZEND_ARG_INFO(0, string)
459 	ZEND_ARG_INFO(1, registers)
460 ZEND_END_ARG_INFO()
461 
462 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
463 	ZEND_ARG_INFO(0, pattern)
464 	ZEND_ARG_INFO(0, string)
465 	ZEND_ARG_INFO(1, registers)
466 ZEND_END_ARG_INFO()
467 
468 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
469 	ZEND_ARG_INFO(0, pattern)
470 	ZEND_ARG_INFO(0, replacement)
471 	ZEND_ARG_INFO(0, string)
472 	ZEND_ARG_INFO(0, option)
473 ZEND_END_ARG_INFO()
474 
475 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
476 	ZEND_ARG_INFO(0, pattern)
477 	ZEND_ARG_INFO(0, replacement)
478 	ZEND_ARG_INFO(0, string)
479 	ZEND_ARG_INFO(0, option)
480 ZEND_END_ARG_INFO()
481 
482 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3)
483 	ZEND_ARG_INFO(0, pattern)
484 	ZEND_ARG_INFO(0, callback)
485 	ZEND_ARG_INFO(0, string)
486 	ZEND_ARG_INFO(0, option)
487 ZEND_END_ARG_INFO()
488 
489 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
490 	ZEND_ARG_INFO(0, pattern)
491 	ZEND_ARG_INFO(0, string)
492 	ZEND_ARG_INFO(0, limit)
493 ZEND_END_ARG_INFO()
494 
495 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
496 	ZEND_ARG_INFO(0, pattern)
497 	ZEND_ARG_INFO(0, string)
498 	ZEND_ARG_INFO(0, option)
499 ZEND_END_ARG_INFO()
500 
501 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
502 	ZEND_ARG_INFO(0, pattern)
503 	ZEND_ARG_INFO(0, option)
504 ZEND_END_ARG_INFO()
505 
506 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
507 	ZEND_ARG_INFO(0, pattern)
508 	ZEND_ARG_INFO(0, option)
509 ZEND_END_ARG_INFO()
510 
511 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
512 	ZEND_ARG_INFO(0, pattern)
513 	ZEND_ARG_INFO(0, option)
514 ZEND_END_ARG_INFO()
515 
516 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
517 	ZEND_ARG_INFO(0, string)
518 	ZEND_ARG_INFO(0, pattern)
519 	ZEND_ARG_INFO(0, option)
520 ZEND_END_ARG_INFO()
521 
522 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
523 ZEND_END_ARG_INFO()
524 
525 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
526 ZEND_END_ARG_INFO()
527 
528 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
529 	ZEND_ARG_INFO(0, position)
530 ZEND_END_ARG_INFO()
531 
532 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
533 	ZEND_ARG_INFO(0, options)
534 ZEND_END_ARG_INFO()
535 /* }}} */
536 
537 /* {{{ zend_function_entry mbstring_functions[] */
538 const zend_function_entry mbstring_functions[] = {
539 	PHP_FE(mb_convert_case,			arginfo_mb_convert_case)
540 	PHP_FE(mb_strtoupper,			arginfo_mb_strtoupper)
541 	PHP_FE(mb_strtolower,			arginfo_mb_strtolower)
542 	PHP_FE(mb_language,				arginfo_mb_language)
543 	PHP_FE(mb_internal_encoding,	arginfo_mb_internal_encoding)
544 	PHP_FE(mb_http_input,			arginfo_mb_http_input)
545 	PHP_FE(mb_http_output,			arginfo_mb_http_output)
546 	PHP_FE(mb_detect_order,			arginfo_mb_detect_order)
547 	PHP_FE(mb_substitute_character,	arginfo_mb_substitute_character)
548 	PHP_FE(mb_parse_str,			arginfo_mb_parse_str)
549 	PHP_FE(mb_output_handler,		arginfo_mb_output_handler)
550 	PHP_FE(mb_preferred_mime_name,	arginfo_mb_preferred_mime_name)
551 	PHP_FE(mb_strlen,				arginfo_mb_strlen)
552 	PHP_FE(mb_strpos,				arginfo_mb_strpos)
553 	PHP_FE(mb_strrpos,				arginfo_mb_strrpos)
554 	PHP_FE(mb_stripos,				arginfo_mb_stripos)
555 	PHP_FE(mb_strripos,				arginfo_mb_strripos)
556 	PHP_FE(mb_strstr,				arginfo_mb_strstr)
557 	PHP_FE(mb_strrchr,				arginfo_mb_strrchr)
558 	PHP_FE(mb_stristr,				arginfo_mb_stristr)
559 	PHP_FE(mb_strrichr,				arginfo_mb_strrichr)
560 	PHP_FE(mb_substr_count,			arginfo_mb_substr_count)
561 	PHP_FE(mb_substr,				arginfo_mb_substr)
562 	PHP_FE(mb_strcut,				arginfo_mb_strcut)
563 	PHP_FE(mb_strwidth,				arginfo_mb_strwidth)
564 	PHP_FE(mb_strimwidth,			arginfo_mb_strimwidth)
565 	PHP_FE(mb_convert_encoding,		arginfo_mb_convert_encoding)
566 	PHP_FE(mb_detect_encoding,		arginfo_mb_detect_encoding)
567 	PHP_FE(mb_list_encodings,		arginfo_mb_list_encodings)
568 	PHP_FE(mb_encoding_aliases,		arginfo_mb_encoding_aliases)
569 	PHP_FE(mb_convert_kana,			arginfo_mb_convert_kana)
570 	PHP_FE(mb_encode_mimeheader,	arginfo_mb_encode_mimeheader)
571 	PHP_FE(mb_decode_mimeheader,	arginfo_mb_decode_mimeheader)
572 	PHP_FE(mb_convert_variables,	arginfo_mb_convert_variables)
573 	PHP_FE(mb_encode_numericentity,	arginfo_mb_encode_numericentity)
574 	PHP_FE(mb_decode_numericentity,	arginfo_mb_decode_numericentity)
575 	PHP_FE(mb_send_mail,			arginfo_mb_send_mail)
576 	PHP_FE(mb_get_info,				arginfo_mb_get_info)
577 	PHP_FE(mb_check_encoding,		arginfo_mb_check_encoding)
578 	PHP_FE(mb_ord,					arginfo_mb_ord)
579 	PHP_FE(mb_chr,					arginfo_mb_chr)
580 	PHP_FE(mb_scrub,				arginfo_mb_scrub)
581 #if HAVE_MBREGEX
582 	PHP_MBREGEX_FUNCTION_ENTRIES
583 #endif
584 	PHP_FE_END
585 };
586 /* }}} */
587 
588 /* {{{ zend_module_entry mbstring_module_entry */
589 zend_module_entry mbstring_module_entry = {
590 	STANDARD_MODULE_HEADER,
591 	"mbstring",
592 	mbstring_functions,
593 	PHP_MINIT(mbstring),
594 	PHP_MSHUTDOWN(mbstring),
595 	PHP_RINIT(mbstring),
596 	PHP_RSHUTDOWN(mbstring),
597 	PHP_MINFO(mbstring),
598 	PHP_MBSTRING_VERSION,
599 	PHP_MODULE_GLOBALS(mbstring),
600 	PHP_GINIT(mbstring),
601 	PHP_GSHUTDOWN(mbstring),
602 	NULL,
603 	STANDARD_MODULE_PROPERTIES_EX
604 };
605 /* }}} */
606 
607 /* {{{ static sapi_post_entry php_post_entries[] */
608 static sapi_post_entry php_post_entries[] = {
609 	{ DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data,	php_std_post_handler },
610 	{ MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
611 	{ NULL, 0, NULL, NULL }
612 };
613 /* }}} */
614 
615 #ifdef COMPILE_DL_MBSTRING
616 #ifdef ZTS
617 ZEND_TSRMLS_CACHE_DEFINE()
618 #endif
ZEND_GET_MODULE(mbstring)619 ZEND_GET_MODULE(mbstring)
620 #endif
621 
622 static char *get_internal_encoding(void) {
623 	if (PG(internal_encoding) && PG(internal_encoding)[0]) {
624 		return PG(internal_encoding);
625 	} else if (SG(default_charset)) {
626 		return SG(default_charset);
627 	}
628 	return "";
629 }
630 
get_input_encoding(void)631 static char *get_input_encoding(void) {
632 	if (PG(input_encoding) && PG(input_encoding)[0]) {
633 		return PG(input_encoding);
634 	} else if (SG(default_charset)) {
635 		return SG(default_charset);
636 	}
637 	return "";
638 }
639 
get_output_encoding(void)640 static char *get_output_encoding(void) {
641 	if (PG(output_encoding) && PG(output_encoding)[0]) {
642 		return PG(output_encoding);
643 	} else if (SG(default_charset)) {
644 		return SG(default_charset);
645 	}
646 	return "";
647 }
648 
649 
650 /* {{{ allocators */
_php_mb_allocators_malloc(unsigned int sz)651 static void *_php_mb_allocators_malloc(unsigned int sz)
652 {
653 	return emalloc(sz);
654 }
655 
_php_mb_allocators_realloc(void * ptr,unsigned int sz)656 static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
657 {
658 	return erealloc(ptr, sz);
659 }
660 
_php_mb_allocators_calloc(unsigned int nelems,unsigned int szelem)661 static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
662 {
663 	return ecalloc(nelems, szelem);
664 }
665 
_php_mb_allocators_free(void * ptr)666 static void _php_mb_allocators_free(void *ptr)
667 {
668 	efree(ptr);
669 }
670 
_php_mb_allocators_pmalloc(unsigned int sz)671 static void *_php_mb_allocators_pmalloc(unsigned int sz)
672 {
673 	return pemalloc(sz, 1);
674 }
675 
_php_mb_allocators_prealloc(void * ptr,unsigned int sz)676 static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
677 {
678 	return perealloc(ptr, sz, 1);
679 }
680 
_php_mb_allocators_pfree(void * ptr)681 static void _php_mb_allocators_pfree(void *ptr)
682 {
683 	pefree(ptr, 1);
684 }
685 
686 static mbfl_allocators _php_mb_allocators = {
687 	_php_mb_allocators_malloc,
688 	_php_mb_allocators_realloc,
689 	_php_mb_allocators_calloc,
690 	_php_mb_allocators_free,
691 	_php_mb_allocators_pmalloc,
692 	_php_mb_allocators_prealloc,
693 	_php_mb_allocators_pfree
694 };
695 /* }}} */
696 
697 /* {{{ static sapi_post_entry mbstr_post_entries[] */
698 static sapi_post_entry mbstr_post_entries[] = {
699 	{ DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
700 	{ MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
701 	{ NULL, 0, NULL, NULL }
702 };
703 /* }}} */
704 
705 /* {{{ static int php_mb_parse_encoding_list()
706  *  Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
707  *  Even if any illegal encoding is detected the result may contain a list
708  *  of parsed encodings.
709  */
710 static int
php_mb_parse_encoding_list(const char * value,size_t value_length,const mbfl_encoding *** return_list,size_t * return_size,int persistent)711 php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
712 {
713 	int bauto, ret = SUCCESS;
714 	size_t n, size;
715 	char *p, *p1, *p2, *endp, *tmpstr;
716 	const mbfl_encoding **entry, **list;
717 
718 	list = NULL;
719 	if (value == NULL || value_length <= 0) {
720 		if (return_list) {
721 			*return_list = NULL;
722 		}
723 		if (return_size) {
724 			*return_size = 0;
725 		}
726 		return FAILURE;
727 	} else {
728 		/* copy the value string for work */
729 		if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
730 			tmpstr = (char *)estrndup(value+1, value_length-2);
731 			value_length -= 2;
732 		}
733 		else
734 			tmpstr = (char *)estrndup(value, value_length);
735 		/* count the number of listed encoding names */
736 		endp = tmpstr + value_length;
737 		n = 1;
738 		p1 = tmpstr;
739 		while ((p2 = (char*)php_memnstr(p1, ",", 1, endp)) != NULL) {
740 			p1 = p2 + 1;
741 			n++;
742 		}
743 		size = n + MBSTRG(default_detect_order_list_size);
744 		/* make list */
745 		list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
746 		entry = list;
747 		n = 0;
748 		bauto = 0;
749 		p1 = tmpstr;
750 		do {
751 			p2 = p = (char*)php_memnstr(p1, ",", 1, endp);
752 			if (p == NULL) {
753 				p = endp;
754 			}
755 			*p = '\0';
756 			/* trim spaces */
757 			while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
758 				p1++;
759 			}
760 			p--;
761 			while (p > p1 && (*p == ' ' || *p == '\t')) {
762 				*p = '\0';
763 				p--;
764 			}
765 			/* convert to the encoding number and check encoding */
766 			if (strcasecmp(p1, "auto") == 0) {
767 				if (!bauto) {
768 					const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
769 					const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
770 					size_t i;
771 					bauto = 1;
772 					for (i = 0; i < identify_list_size; i++) {
773 						*entry++ = mbfl_no2encoding(*src++);
774 						n++;
775 					}
776 				}
777 			} else {
778 				const mbfl_encoding *encoding = mbfl_name2encoding(p1);
779 				if (encoding) {
780 					*entry++ = encoding;
781 					n++;
782 				} else {
783 					ret = FAILURE;
784 				}
785 			}
786 			p1 = p2 + 1;
787 		} while (n < size && p2 != NULL);
788 		if (n > 0) {
789 			if (return_list) {
790 				*return_list = list;
791 			} else {
792 				pefree(list, persistent);
793 			}
794 		} else {
795 			pefree(list, persistent);
796 			if (return_list) {
797 				*return_list = NULL;
798 			}
799 			ret = FAILURE;
800 		}
801 		if (return_size) {
802 			*return_size = n;
803 		}
804 		efree(tmpstr);
805 	}
806 
807 	return ret;
808 }
809 /* }}} */
810 
811 /* {{{ static int php_mb_parse_encoding_array()
812  *  Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
813  *  Even if any illegal encoding is detected the result may contain a list
814  *  of parsed encodings.
815  */
816 static int
php_mb_parse_encoding_array(zval * array,const mbfl_encoding *** return_list,size_t * return_size,int persistent)817 php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
818 {
819 	zval *hash_entry;
820 	HashTable *target_hash;
821 	int i, n, size, bauto, ret = SUCCESS;
822 	const mbfl_encoding **list, **entry;
823 
824 	list = NULL;
825 	if (Z_TYPE_P(array) == IS_ARRAY) {
826 		target_hash = Z_ARRVAL_P(array);
827 		i = zend_hash_num_elements(target_hash);
828 		size = i + MBSTRG(default_detect_order_list_size);
829 		list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
830 		entry = list;
831 		bauto = 0;
832 		n = 0;
833 		ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
834 			convert_to_string_ex(hash_entry);
835 			if (strcasecmp(Z_STRVAL_P(hash_entry), "auto") == 0) {
836 				if (!bauto) {
837 					const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
838 					const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
839 					size_t j;
840 
841 					bauto = 1;
842 					for (j = 0; j < identify_list_size; j++) {
843 						*entry++ = mbfl_no2encoding(*src++);
844 						n++;
845 					}
846 				}
847 			} else {
848 				const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_P(hash_entry));
849 				if (encoding) {
850 					*entry++ = encoding;
851 					n++;
852 				} else {
853 					ret = FAILURE;
854 				}
855 			}
856 			i--;
857 		} ZEND_HASH_FOREACH_END();
858 		if (n > 0) {
859 			if (return_list) {
860 				*return_list = list;
861 			} else {
862 				pefree(list, persistent);
863 			}
864 		} else {
865 			pefree(list, persistent);
866 			if (return_list) {
867 				*return_list = NULL;
868 			}
869 			ret = FAILURE;
870 		}
871 		if (return_size) {
872 			*return_size = n;
873 		}
874 	}
875 
876 	return ret;
877 }
878 /* }}} */
879 
880 /* {{{ zend_multibyte interface */
php_mb_zend_encoding_fetcher(const char * encoding_name)881 static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name)
882 {
883 	return (const zend_encoding*)mbfl_name2encoding(encoding_name);
884 }
885 
php_mb_zend_encoding_name_getter(const zend_encoding * encoding)886 static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
887 {
888 	return ((const mbfl_encoding *)encoding)->name;
889 }
890 
php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding * _encoding)891 static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
892 {
893 	const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
894 	if (encoding->flag & MBFL_ENCTYPE_SBCS) {
895 		return 1;
896 	}
897 	if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
898 		return 1;
899 	}
900 	return 0;
901 }
902 
php_mb_zend_encoding_detector(const unsigned char * arg_string,size_t arg_length,const zend_encoding ** list,size_t list_size)903 static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size)
904 {
905 	mbfl_string string;
906 
907 	if (!list) {
908 		list = (const zend_encoding **)MBSTRG(current_detect_order_list);
909 		list_size = MBSTRG(current_detect_order_list_size);
910 	}
911 
912 	mbfl_string_init(&string);
913 	string.no_language = MBSTRG(language);
914 	string.val = (unsigned char *)arg_string;
915 	string.len = arg_length;
916 	return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0);
917 }
918 
php_mb_zend_encoding_converter(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length,const zend_encoding * encoding_to,const zend_encoding * encoding_from)919 static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
920 {
921 	mbfl_string string, result;
922 	mbfl_buffer_converter *convd;
923 	int status, loc;
924 
925 	/* new encoding */
926 	/* initialize string */
927 	mbfl_string_init(&string);
928 	mbfl_string_init(&result);
929 	string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding;
930 	string.no_language = MBSTRG(language);
931 	string.val = (unsigned char*)from;
932 	string.len = from_length;
933 
934 	/* initialize converter */
935 	convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
936 	if (convd == NULL) {
937 		return -1;
938 	}
939 
940 	mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
941 	mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
942 
943 	/* do it */
944 	status = mbfl_buffer_converter_feed2(convd, &string, &loc);
945 	if (status) {
946 		mbfl_buffer_converter_delete(convd);
947 		return (size_t)-1;
948 	}
949 
950 	mbfl_buffer_converter_flush(convd);
951 	if (!mbfl_buffer_converter_result(convd, &result)) {
952 		mbfl_buffer_converter_delete(convd);
953 		return (size_t)-1;
954 	}
955 
956 	*to = result.val;
957 	*to_length = result.len;
958 
959 	mbfl_buffer_converter_delete(convd);
960 
961 	return loc;
962 }
963 
php_mb_zend_encoding_list_parser(const char * encoding_list,size_t encoding_list_len,const zend_encoding *** return_list,size_t * return_size,int persistent)964 static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent)
965 {
966 	return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent);
967 }
968 
php_mb_zend_internal_encoding_getter(void)969 static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
970 {
971 	return (const zend_encoding *)MBSTRG(internal_encoding);
972 }
973 
php_mb_zend_internal_encoding_setter(const zend_encoding * encoding)974 static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding)
975 {
976 	MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
977 	return SUCCESS;
978 }
979 
980 static zend_multibyte_functions php_mb_zend_multibyte_functions = {
981 	"mbstring",
982 	php_mb_zend_encoding_fetcher,
983 	php_mb_zend_encoding_name_getter,
984 	php_mb_zend_encoding_lexer_compatibility_checker,
985 	php_mb_zend_encoding_detector,
986 	php_mb_zend_encoding_converter,
987 	php_mb_zend_encoding_list_parser,
988 	php_mb_zend_internal_encoding_getter,
989 	php_mb_zend_internal_encoding_setter
990 };
991 /* }}} */
992 
993 static void *_php_mb_compile_regex(const char *pattern);
994 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
995 static void _php_mb_free_regex(void *opaque);
996 
997 #if HAVE_ONIG
998 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern)999 static void *_php_mb_compile_regex(const char *pattern)
1000 {
1001 	php_mb_regex_t *retval;
1002 	OnigErrorInfo err_info;
1003 	int err_code;
1004 
1005 	if ((err_code = onig_new(&retval,
1006 			(const OnigUChar *)pattern,
1007 			(const OnigUChar *)pattern + strlen(pattern),
1008 			ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
1009 			ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
1010 		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1011 		onig_error_code_to_str(err_str, err_code, err_info);
1012 		php_error_docref(NULL, E_WARNING, "%s: %s", pattern, err_str);
1013 		retval = NULL;
1014 	}
1015 	return retval;
1016 }
1017 /* }}} */
1018 
1019 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1020 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1021 {
1022 	return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
1023 			(const OnigUChar*)str + str_len, (const OnigUChar *)str,
1024 			(const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
1025 }
1026 /* }}} */
1027 
1028 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1029 static void _php_mb_free_regex(void *opaque)
1030 {
1031 	onig_free((php_mb_regex_t *)opaque);
1032 }
1033 /* }}} */
1034 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
1035 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern)1036 static void *_php_mb_compile_regex(const char *pattern)
1037 {
1038 	pcre *retval;
1039 	const char *err_str;
1040 	int err_offset;
1041 
1042 	if (!(retval = pcre_compile(pattern,
1043 			PCRE_CASELESS, &err_str, &err_offset, NULL))) {
1044 		php_error_docref(NULL, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
1045 	}
1046 	return retval;
1047 }
1048 /* }}} */
1049 
1050 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1051 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1052 {
1053 	return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
1054 			0, NULL, 0) >= 0;
1055 }
1056 /* }}} */
1057 
1058 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1059 static void _php_mb_free_regex(void *opaque)
1060 {
1061 	pcre_free(opaque);
1062 }
1063 /* }}} */
1064 #endif
1065 
1066 /* {{{ php_mb_nls_get_default_detect_order_list */
php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,enum mbfl_no_encoding ** plist,size_t * plist_size)1067 static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
1068 {
1069 	size_t i;
1070 
1071 	*plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1072 	*plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1073 
1074 	for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
1075 		if (php_mb_default_identify_list[i].lang == lang) {
1076 			*plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
1077 			*plist_size = php_mb_default_identify_list[i].list_size;
1078 			return 1;
1079 		}
1080 	}
1081 	return 0;
1082 }
1083 /* }}} */
1084 
php_mb_rfc1867_substring_conf(const zend_encoding * encoding,char * start,int len,char quote)1085 static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, int len, char quote)
1086 {
1087 	char *result = emalloc(len + 2);
1088 	char *resp = result;
1089 	int i;
1090 
1091 	for (i = 0; i < len && start[i] != quote; ++i) {
1092 		if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
1093 			*resp++ = start[++i];
1094 		} else {
1095 			size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
1096 
1097 			while (j-- > 0 && i < len) {
1098 				*resp++ = start[i++];
1099 			}
1100 			--i;
1101 		}
1102 	}
1103 
1104 	*resp = '\0';
1105 	return result;
1106 }
1107 
php_mb_rfc1867_getword(const zend_encoding * encoding,char ** line,char stop)1108 static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop) /* {{{ */
1109 {
1110 	char *pos = *line, quote;
1111 	char *res;
1112 
1113 	while (*pos && *pos != stop) {
1114 		if ((quote = *pos) == '"' || quote == '\'') {
1115 			++pos;
1116 			while (*pos && *pos != quote) {
1117 				if (*pos == '\\' && pos[1] && pos[1] == quote) {
1118 					pos += 2;
1119 				} else {
1120 					++pos;
1121 				}
1122 			}
1123 			if (*pos) {
1124 				++pos;
1125 			}
1126 		} else {
1127 			pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1128 
1129 		}
1130 	}
1131 	if (*pos == '\0') {
1132 		res = estrdup(*line);
1133 		*line += strlen(*line);
1134 		return res;
1135 	}
1136 
1137 	res = estrndup(*line, pos - *line);
1138 
1139 	while (*pos == stop) {
1140 		pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1141 	}
1142 
1143 	*line = pos;
1144 	return res;
1145 }
1146 /* }}} */
1147 
php_mb_rfc1867_getword_conf(const zend_encoding * encoding,char * str)1148 static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str) /* {{{ */
1149 {
1150 	while (*str && isspace(*(unsigned char *)str)) {
1151 		++str;
1152 	}
1153 
1154 	if (!*str) {
1155 		return estrdup("");
1156 	}
1157 
1158 	if (*str == '"' || *str == '\'') {
1159 		char quote = *str;
1160 
1161 		str++;
1162 		return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote);
1163 	} else {
1164 		char *strend = str;
1165 
1166 		while (*strend && !isspace(*(unsigned char *)strend)) {
1167 			++strend;
1168 		}
1169 		return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0);
1170 	}
1171 }
1172 /* }}} */
1173 
php_mb_rfc1867_basename(const zend_encoding * encoding,char * filename)1174 static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename) /* {{{ */
1175 {
1176 	char *s, *s2;
1177 	const size_t filename_len = strlen(filename);
1178 
1179 	/* The \ check should technically be needed for win32 systems only where
1180 	 * it is a valid path separator. However, IE in all it's wisdom always sends
1181 	 * the full path of the file on the user's filesystem, which means that unless
1182 	 * the user does basename() they get a bogus file name. Until IE's user base drops
1183 	 * to nill or problem is fixed this code must remain enabled for all systems. */
1184 	s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
1185 	s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
1186 
1187 	if (s && s2) {
1188 		if (s > s2) {
1189 			return ++s;
1190 		} else {
1191 			return ++s2;
1192 		}
1193 	} else if (s) {
1194 		return ++s;
1195 	} else if (s2) {
1196 		return ++s2;
1197 	} else {
1198 		return filename;
1199 	}
1200 }
1201 /* }}} */
1202 
1203 /* {{{ php.ini directive handler */
1204 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
PHP_INI_MH(OnUpdate_mbstring_language)1205 static PHP_INI_MH(OnUpdate_mbstring_language)
1206 {
1207 	enum mbfl_no_language no_language;
1208 
1209 	no_language = mbfl_name2no_language(ZSTR_VAL(new_value));
1210 	if (no_language == mbfl_no_language_invalid) {
1211 		MBSTRG(language) = mbfl_no_language_neutral;
1212 		return FAILURE;
1213 	}
1214 	MBSTRG(language) = no_language;
1215 	php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
1216 	return SUCCESS;
1217 }
1218 /* }}} */
1219 
1220 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
PHP_INI_MH(OnUpdate_mbstring_detect_order)1221 static PHP_INI_MH(OnUpdate_mbstring_detect_order)
1222 {
1223 	const mbfl_encoding **list;
1224 	size_t size;
1225 
1226 	if (!new_value) {
1227 		if (MBSTRG(detect_order_list)) {
1228 			pefree(MBSTRG(detect_order_list), 1);
1229 		}
1230 		MBSTRG(detect_order_list) = NULL;
1231 		MBSTRG(detect_order_list_size) = 0;
1232 		return SUCCESS;
1233 	}
1234 
1235 	if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) {
1236 		return FAILURE;
1237 	}
1238 
1239 	if (MBSTRG(detect_order_list)) {
1240 		pefree(MBSTRG(detect_order_list), 1);
1241 	}
1242 	MBSTRG(detect_order_list) = list;
1243 	MBSTRG(detect_order_list_size) = size;
1244 	return SUCCESS;
1245 }
1246 /* }}} */
1247 
1248 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
PHP_INI_MH(OnUpdate_mbstring_http_input)1249 static PHP_INI_MH(OnUpdate_mbstring_http_input)
1250 {
1251 	const mbfl_encoding **list;
1252 	size_t size;
1253 
1254 	if (!new_value || !ZSTR_VAL(new_value)) {
1255 		if (MBSTRG(http_input_list)) {
1256 			pefree(MBSTRG(http_input_list), 1);
1257 		}
1258 		if (SUCCESS == php_mb_parse_encoding_list(get_input_encoding(), strlen(get_input_encoding())+1, &list, &size, 1)) {
1259 			MBSTRG(http_input_list) = list;
1260 			MBSTRG(http_input_list_size) = size;
1261 			return SUCCESS;
1262 		}
1263 		MBSTRG(http_input_list) = NULL;
1264 		MBSTRG(http_input_list_size) = 0;
1265 		return SUCCESS;
1266 	}
1267 
1268 	if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) {
1269 		return FAILURE;
1270 	}
1271 
1272 	if (MBSTRG(http_input_list)) {
1273 		pefree(MBSTRG(http_input_list), 1);
1274 	}
1275 	MBSTRG(http_input_list) = list;
1276 	MBSTRG(http_input_list_size) = size;
1277 
1278 	if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1279 		php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_input is deprecated");
1280 	}
1281 
1282 	return SUCCESS;
1283 }
1284 /* }}} */
1285 
1286 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
PHP_INI_MH(OnUpdate_mbstring_http_output)1287 static PHP_INI_MH(OnUpdate_mbstring_http_output)
1288 {
1289 	const mbfl_encoding *encoding;
1290 
1291 	if (new_value == NULL || ZSTR_LEN(new_value) == 0) {
1292 		encoding = mbfl_name2encoding(get_output_encoding());
1293 		if (!encoding) {
1294 			MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1295 			MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1296 			return SUCCESS;
1297 		}
1298 	} else {
1299 		encoding = mbfl_name2encoding(ZSTR_VAL(new_value));
1300 		if (!encoding) {
1301 			MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1302 			MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1303 			return FAILURE;
1304 		}
1305 	}
1306 	MBSTRG(http_output_encoding) = encoding;
1307 	MBSTRG(current_http_output_encoding) = encoding;
1308 
1309 	if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1310 		php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_output is deprecated");
1311 	}
1312 
1313 	return SUCCESS;
1314 }
1315 /* }}} */
1316 
1317 /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
_php_mb_ini_mbstring_internal_encoding_set(const char * new_value,uint32_t new_value_length)1318 int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint32_t new_value_length)
1319 {
1320 	const mbfl_encoding *encoding;
1321 
1322 	if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) {
1323 		/* falls back to UTF-8 if an unknown encoding name is given */
1324 		encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
1325 	}
1326 	MBSTRG(internal_encoding) = encoding;
1327 	MBSTRG(current_internal_encoding) = encoding;
1328 #if HAVE_MBREGEX
1329 	{
1330 		const char *enc_name = new_value;
1331 		if (FAILURE == php_mb_regex_set_default_mbctype(enc_name)) {
1332 			/* falls back to UTF-8 if an unknown encoding name is given */
1333 			enc_name = "UTF-8";
1334 			php_mb_regex_set_default_mbctype(enc_name);
1335 		}
1336 		php_mb_regex_set_mbctype(new_value);
1337 	}
1338 #endif
1339 	return SUCCESS;
1340 }
1341 /* }}} */
1342 
1343 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
PHP_INI_MH(OnUpdate_mbstring_internal_encoding)1344 static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1345 {
1346 	if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1347 		php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
1348 	}
1349 
1350 	if (OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage) == FAILURE) {
1351 		return FAILURE;
1352 	}
1353 
1354 	if (stage & (PHP_INI_STAGE_STARTUP | PHP_INI_STAGE_SHUTDOWN | PHP_INI_STAGE_RUNTIME)) {
1355 		if (new_value && ZSTR_LEN(new_value)) {
1356 			return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
1357 		} else {
1358 			return _php_mb_ini_mbstring_internal_encoding_set(get_internal_encoding(), strlen(get_internal_encoding())+1);
1359 		}
1360 	} else {
1361 		/* the corresponding mbstring globals needs to be set according to the
1362 		 * ini value in the later stage because it never falls back to the
1363 		 * default value if 1. no value for mbstring.internal_encoding is given,
1364 		 * 2. mbstring.language directive is processed in per-dir or runtime
1365 		 * context and 3. call to the handler for mbstring.language is done
1366 		 * after mbstring.internal_encoding is handled. */
1367 		return SUCCESS;
1368 	}
1369 }
1370 /* }}} */
1371 
1372 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
PHP_INI_MH(OnUpdate_mbstring_substitute_character)1373 static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1374 {
1375 	int c;
1376 	char *endptr = NULL;
1377 
1378 	if (new_value != NULL) {
1379 		if (strcasecmp("none", ZSTR_VAL(new_value)) == 0) {
1380 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1381 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1382 		} else if (strcasecmp("long", ZSTR_VAL(new_value)) == 0) {
1383 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1384 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1385 		} else if (strcasecmp("entity", ZSTR_VAL(new_value)) == 0) {
1386 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1387 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1388 		} else {
1389 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1390 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1391 			if (ZSTR_LEN(new_value) > 0) {
1392 				c = strtol(ZSTR_VAL(new_value), &endptr, 0);
1393 				if (*endptr == '\0') {
1394 					MBSTRG(filter_illegal_substchar) = c;
1395 					MBSTRG(current_filter_illegal_substchar) = c;
1396 				}
1397 			}
1398 		}
1399 	} else {
1400 		MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1401 		MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1402 		MBSTRG(filter_illegal_substchar) = 0x3f;	/* '?' */
1403 		MBSTRG(current_filter_illegal_substchar) = 0x3f;	/* '?' */
1404 	}
1405 
1406 	return SUCCESS;
1407 }
1408 /* }}} */
1409 
1410 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
PHP_INI_MH(OnUpdate_mbstring_encoding_translation)1411 static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1412 {
1413 	if (new_value == NULL) {
1414 		return FAILURE;
1415 	}
1416 
1417 	OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
1418 
1419 	if (MBSTRG(encoding_translation)) {
1420 		sapi_unregister_post_entry(php_post_entries);
1421 		sapi_register_post_entries(mbstr_post_entries);
1422 	} else {
1423 		sapi_unregister_post_entry(mbstr_post_entries);
1424 		sapi_register_post_entries(php_post_entries);
1425 	}
1426 
1427 	return SUCCESS;
1428 }
1429 /* }}} */
1430 
1431 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)1432 static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1433 {
1434 	zend_string *tmp;
1435 	void *re = NULL;
1436 
1437 	if (!new_value) {
1438 		new_value = entry->orig_value;
1439 	}
1440 	tmp = php_trim(new_value, NULL, 0, 3);
1441 
1442 	if (ZSTR_LEN(tmp) > 0) {
1443 		if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) {
1444 			zend_string_release(tmp);
1445 			return FAILURE;
1446 		}
1447 	}
1448 
1449 	if (MBSTRG(http_output_conv_mimetypes)) {
1450 		_php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1451 	}
1452 
1453 	MBSTRG(http_output_conv_mimetypes) = re;
1454 
1455 	zend_string_release(tmp);
1456 	return SUCCESS;
1457 }
1458 /* }}} */
1459 /* }}} */
1460 
1461 /* {{{ php.ini directive registration */
1462 PHP_INI_BEGIN()
1463 	PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1464 	PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1465 	PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
1466 	PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
1467 	STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
1468 	PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1469 	STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1470 	PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1471 
1472 	STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1473 		PHP_INI_SYSTEM | PHP_INI_PERDIR,
1474 		OnUpdate_mbstring_encoding_translation,
1475 		encoding_translation, zend_mbstring_globals, mbstring_globals)
1476 	PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1477 		"^(text/|application/xhtml\\+xml)",
1478 		PHP_INI_ALL,
1479 		OnUpdate_mbstring_http_output_conv_mimetypes)
1480 
1481 	STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1482 		PHP_INI_ALL,
1483 		OnUpdateLong,
1484 		strict_detection, zend_mbstring_globals, mbstring_globals)
PHP_INI_END()1485 PHP_INI_END()
1486 /* }}} */
1487 
1488 /* {{{ module global initialize handler */
1489 static PHP_GINIT_FUNCTION(mbstring)
1490 {
1491 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1492 ZEND_TSRMLS_CACHE_UPDATE();
1493 #endif
1494 
1495 	mbstring_globals->language = mbfl_no_language_uni;
1496 	mbstring_globals->internal_encoding = NULL;
1497 	mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1498 	mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
1499 	mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
1500 	mbstring_globals->http_input_identify = NULL;
1501 	mbstring_globals->http_input_identify_get = NULL;
1502 	mbstring_globals->http_input_identify_post = NULL;
1503 	mbstring_globals->http_input_identify_cookie = NULL;
1504 	mbstring_globals->http_input_identify_string = NULL;
1505 	mbstring_globals->http_input_list = NULL;
1506 	mbstring_globals->http_input_list_size = 0;
1507 	mbstring_globals->detect_order_list = NULL;
1508 	mbstring_globals->detect_order_list_size = 0;
1509 	mbstring_globals->current_detect_order_list = NULL;
1510 	mbstring_globals->current_detect_order_list_size = 0;
1511 	mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1512 	mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1513 	mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1514 	mbstring_globals->filter_illegal_substchar = 0x3f;	/* '?' */
1515 	mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1516 	mbstring_globals->current_filter_illegal_substchar = 0x3f;	/* '?' */
1517 	mbstring_globals->illegalchars = 0;
1518 	mbstring_globals->func_overload = 0;
1519 	mbstring_globals->encoding_translation = 0;
1520 	mbstring_globals->strict_detection = 0;
1521 	mbstring_globals->outconv = NULL;
1522 	mbstring_globals->http_output_conv_mimetypes = NULL;
1523 #if HAVE_MBREGEX
1524 	mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc();
1525 #endif
1526 }
1527 /* }}} */
1528 
1529 /* {{{ PHP_GSHUTDOWN_FUNCTION */
PHP_GSHUTDOWN_FUNCTION(mbstring)1530 static PHP_GSHUTDOWN_FUNCTION(mbstring)
1531 {
1532 	if (mbstring_globals->http_input_list) {
1533 		free(mbstring_globals->http_input_list);
1534 	}
1535 	if (mbstring_globals->detect_order_list) {
1536 		free(mbstring_globals->detect_order_list);
1537 	}
1538 	if (mbstring_globals->http_output_conv_mimetypes) {
1539 		_php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1540 	}
1541 #if HAVE_MBREGEX
1542 	php_mb_regex_globals_free(mbstring_globals->mb_regex_globals);
1543 #endif
1544 }
1545 /* }}} */
1546 
1547 /* {{{ PHP_MINIT_FUNCTION(mbstring) */
PHP_MINIT_FUNCTION(mbstring)1548 PHP_MINIT_FUNCTION(mbstring)
1549 {
1550 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1551 ZEND_TSRMLS_CACHE_UPDATE();
1552 #endif
1553 	__mbfl_allocators = &_php_mb_allocators;
1554 
1555 	REGISTER_INI_ENTRIES();
1556 
1557 	/* This is a global handler. Should not be set in a per-request handler. */
1558 	sapi_register_treat_data(mbstr_treat_data);
1559 
1560 	/* Post handlers are stored in the thread-local context. */
1561 	if (MBSTRG(encoding_translation)) {
1562 		sapi_register_post_entries(mbstr_post_entries);
1563 	}
1564 
1565 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1566 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1567 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1568 
1569 	REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1570 	REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1571 	REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1572 
1573 #if HAVE_MBREGEX
1574 	PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1575 #endif
1576 
1577 	if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions)) {
1578 		return FAILURE;
1579 	}
1580 
1581 	php_rfc1867_set_multibyte_callbacks(
1582 		php_mb_encoding_translation,
1583 		php_mb_gpc_get_detect_order,
1584 		php_mb_gpc_set_input_encoding,
1585 		php_mb_rfc1867_getword,
1586 		php_mb_rfc1867_getword_conf,
1587 		php_mb_rfc1867_basename);
1588 
1589 	return SUCCESS;
1590 }
1591 /* }}} */
1592 
1593 /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
PHP_MSHUTDOWN_FUNCTION(mbstring)1594 PHP_MSHUTDOWN_FUNCTION(mbstring)
1595 {
1596 	UNREGISTER_INI_ENTRIES();
1597 
1598 	zend_multibyte_restore_functions();
1599 
1600 #if HAVE_MBREGEX
1601 	PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1602 #endif
1603 
1604 	return SUCCESS;
1605 }
1606 /* }}} */
1607 
1608 /* {{{ PHP_RINIT_FUNCTION(mbstring) */
PHP_RINIT_FUNCTION(mbstring)1609 PHP_RINIT_FUNCTION(mbstring)
1610 {
1611 	zend_function *func, *orig;
1612 	const struct mb_overload_def *p;
1613 
1614 	MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1615 	MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1616 	MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1617 	MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1618 
1619 	MBSTRG(illegalchars) = 0;
1620 
1621 	php_mb_populate_current_detect_order_list();
1622 
1623  	/* override original function. */
1624 	if (MBSTRG(func_overload)){
1625 		zend_error(E_DEPRECATED, "The mbstring.func_overload directive is deprecated");
1626 
1627 		p = &(mb_ovld[0]);
1628 		CG(compiler_options) |= ZEND_COMPILE_NO_BUILTIN_STRLEN;
1629 		while (p->type > 0) {
1630 			if ((MBSTRG(func_overload) & p->type) == p->type &&
1631 				!zend_hash_str_exists(EG(function_table), p->save_func, strlen(p->save_func))
1632 			) {
1633 				func = zend_hash_str_find_ptr(EG(function_table), p->ovld_func, strlen(p->ovld_func));
1634 
1635 				if ((orig = zend_hash_str_find_ptr(EG(function_table), p->orig_func, strlen(p->orig_func))) == NULL) {
1636 					php_error_docref("ref.mbstring", E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1637 					return FAILURE;
1638 				} else {
1639 					ZEND_ASSERT(orig->type == ZEND_INTERNAL_FUNCTION);
1640 					zend_hash_str_add_mem(EG(function_table), p->save_func, strlen(p->save_func), orig, sizeof(zend_internal_function));
1641 					function_add_ref(orig);
1642 
1643 					if (zend_hash_str_update_mem(EG(function_table), p->orig_func, strlen(p->orig_func), func, sizeof(zend_internal_function)) == NULL) {
1644 						php_error_docref("ref.mbstring", E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
1645 						return FAILURE;
1646 					}
1647 
1648 					function_add_ref(func);
1649 				}
1650 			}
1651 			p++;
1652 		}
1653 	}
1654 #if HAVE_MBREGEX
1655 	PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1656 #endif
1657 	zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding));
1658 
1659 	return SUCCESS;
1660 }
1661 /* }}} */
1662 
1663 /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
PHP_RSHUTDOWN_FUNCTION(mbstring)1664 PHP_RSHUTDOWN_FUNCTION(mbstring)
1665 {
1666 	const struct mb_overload_def *p;
1667 	zend_function *orig;
1668 
1669 	if (MBSTRG(current_detect_order_list) != NULL) {
1670 		efree(MBSTRG(current_detect_order_list));
1671 		MBSTRG(current_detect_order_list) = NULL;
1672 		MBSTRG(current_detect_order_list_size) = 0;
1673 	}
1674 	if (MBSTRG(outconv) != NULL) {
1675 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1676 		mbfl_buffer_converter_delete(MBSTRG(outconv));
1677 		MBSTRG(outconv) = NULL;
1678 	}
1679 
1680 	/* clear http input identification. */
1681 	MBSTRG(http_input_identify) = NULL;
1682 	MBSTRG(http_input_identify_post) = NULL;
1683 	MBSTRG(http_input_identify_get) = NULL;
1684 	MBSTRG(http_input_identify_cookie) = NULL;
1685 	MBSTRG(http_input_identify_string) = NULL;
1686 
1687  	/*  clear overloaded function. */
1688 	if (MBSTRG(func_overload)){
1689 		p = &(mb_ovld[0]);
1690 		while (p->type > 0) {
1691 			if ((MBSTRG(func_overload) & p->type) == p->type &&
1692 				(orig = zend_hash_str_find_ptr(EG(function_table), p->save_func, strlen(p->save_func)))) {
1693 
1694 				zend_hash_str_update_mem(EG(function_table), p->orig_func, strlen(p->orig_func), orig, sizeof(zend_internal_function));
1695 				function_add_ref(orig);
1696 				zend_hash_str_del(EG(function_table), p->save_func, strlen(p->save_func));
1697 			}
1698 			p++;
1699 		}
1700 		CG(compiler_options) &= ~ZEND_COMPILE_NO_BUILTIN_STRLEN;
1701 	}
1702 
1703 #if HAVE_MBREGEX
1704 	PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1705 #endif
1706 
1707 	return SUCCESS;
1708 }
1709 /* }}} */
1710 
1711 /* {{{ PHP_MINFO_FUNCTION(mbstring) */
PHP_MINFO_FUNCTION(mbstring)1712 PHP_MINFO_FUNCTION(mbstring)
1713 {
1714 	php_info_print_table_start();
1715 	php_info_print_table_row(2, "Multibyte Support", "enabled");
1716 	php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1717 	php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1718 	{
1719 		char tmp[256];
1720 		snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1721 		php_info_print_table_row(2, "libmbfl version", tmp);
1722 	}
1723 #if HAVE_ONIG
1724 	{
1725 		char tmp[256];
1726 		snprintf(tmp, sizeof(tmp), "%d.%d.%d", ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY);
1727 		php_info_print_table_row(2, "oniguruma version", tmp);
1728 	}
1729 #endif
1730 	php_info_print_table_end();
1731 
1732 	php_info_print_table_start();
1733 	php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1734 	php_info_print_table_end();
1735 
1736 #if HAVE_MBREGEX
1737 	PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1738 #endif
1739 
1740 	DISPLAY_INI_ENTRIES();
1741 }
1742 /* }}} */
1743 
1744 /* {{{ proto string mb_language([string language])
1745    Sets the current language or Returns the current language as a string */
PHP_FUNCTION(mb_language)1746 PHP_FUNCTION(mb_language)
1747 {
1748 	zend_string *name = NULL;
1749 
1750 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|S", &name) == FAILURE) {
1751 		return;
1752 	}
1753 	if (name == NULL) {
1754 		RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)));
1755 	} else {
1756 		zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0);
1757 		if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1758 			php_error_docref(NULL, E_WARNING, "Unknown language \"%s\"", ZSTR_VAL(name));
1759 			RETVAL_FALSE;
1760 		} else {
1761 			RETVAL_TRUE;
1762 		}
1763 		zend_string_release(ini_name);
1764 	}
1765 }
1766 /* }}} */
1767 
1768 /* {{{ proto string mb_internal_encoding([string encoding])
1769    Sets the current internal encoding or Returns the current internal encoding as a string */
PHP_FUNCTION(mb_internal_encoding)1770 PHP_FUNCTION(mb_internal_encoding)
1771 {
1772 	const char *name = NULL;
1773 	size_t name_len;
1774 	const mbfl_encoding *encoding;
1775 
1776 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
1777 		return;
1778 	}
1779 	if (name == NULL) {
1780 		name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
1781 		if (name != NULL) {
1782 			RETURN_STRING(name);
1783 		} else {
1784 			RETURN_FALSE;
1785 		}
1786 	} else {
1787 		encoding = mbfl_name2encoding(name);
1788 		if (!encoding) {
1789 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
1790 			RETURN_FALSE;
1791 		} else {
1792 			MBSTRG(current_internal_encoding) = encoding;
1793 			RETURN_TRUE;
1794 		}
1795 	}
1796 }
1797 /* }}} */
1798 
1799 /* {{{ proto mixed mb_http_input([string type])
1800    Returns the input encoding */
PHP_FUNCTION(mb_http_input)1801 PHP_FUNCTION(mb_http_input)
1802 {
1803 	char *typ = NULL;
1804 	size_t typ_len;
1805 	int retname;
1806 	char *list, *temp;
1807 	const mbfl_encoding *result = NULL;
1808 
1809 	retname = 1;
1810  	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
1811 		return;
1812  	}
1813  	if (typ == NULL) {
1814  		result = MBSTRG(http_input_identify);
1815  	} else {
1816  		switch (*typ) {
1817 		case 'G':
1818 		case 'g':
1819 			result = MBSTRG(http_input_identify_get);
1820 			break;
1821 		case 'P':
1822 		case 'p':
1823 			result = MBSTRG(http_input_identify_post);
1824 			break;
1825 		case 'C':
1826 		case 'c':
1827 			result = MBSTRG(http_input_identify_cookie);
1828 			break;
1829 		case 'S':
1830 		case 's':
1831 			result = MBSTRG(http_input_identify_string);
1832 			break;
1833 		case 'I':
1834 		case 'i':
1835 			{
1836 				const mbfl_encoding **entry = MBSTRG(http_input_list);
1837 				const size_t n = MBSTRG(http_input_list_size);
1838 				size_t i;
1839 				array_init(return_value);
1840 				for (i = 0; i < n; i++) {
1841 					add_next_index_string(return_value, (*entry)->name);
1842 					entry++;
1843 				}
1844 				retname = 0;
1845 			}
1846 			break;
1847 		case 'L':
1848 		case 'l':
1849 			{
1850 				const mbfl_encoding **entry = MBSTRG(http_input_list);
1851 				const size_t n = MBSTRG(http_input_list_size);
1852 				size_t i;
1853 				list = NULL;
1854 				for (i = 0; i < n; i++) {
1855 					if (list) {
1856 						temp = list;
1857 						spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
1858 						efree(temp);
1859 						if (!list) {
1860 							break;
1861 						}
1862 					} else {
1863 						list = estrdup((*entry)->name);
1864 					}
1865 					entry++;
1866 				}
1867 			}
1868 			if (!list) {
1869 				RETURN_FALSE;
1870 			}
1871 			RETVAL_STRING(list);
1872 			efree(list);
1873 			retname = 0;
1874 			break;
1875 		default:
1876 			result = MBSTRG(http_input_identify);
1877 			break;
1878 		}
1879 	}
1880 
1881 	if (retname) {
1882 		if (result) {
1883 			RETVAL_STRING(result->name);
1884 		} else {
1885 			RETVAL_FALSE;
1886 		}
1887 	}
1888 }
1889 /* }}} */
1890 
1891 /* {{{ proto string mb_http_output([string encoding])
1892    Sets the current output_encoding or returns the current output_encoding as a string */
PHP_FUNCTION(mb_http_output)1893 PHP_FUNCTION(mb_http_output)
1894 {
1895 	const char *name = NULL;
1896 	size_t name_len;
1897 	const mbfl_encoding *encoding;
1898 
1899 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
1900 		return;
1901 	}
1902 
1903 	if (name == NULL) {
1904 		name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
1905 		if (name != NULL) {
1906 			RETURN_STRING(name);
1907 		} else {
1908 			RETURN_FALSE;
1909 		}
1910 	} else {
1911 		encoding = mbfl_name2encoding(name);
1912 		if (!encoding) {
1913 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
1914 			RETURN_FALSE;
1915 		} else {
1916 			MBSTRG(current_http_output_encoding) = encoding;
1917 			RETURN_TRUE;
1918 		}
1919 	}
1920 }
1921 /* }}} */
1922 
1923 /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1924    Sets the current detect_order or Return the current detect_order as a array */
PHP_FUNCTION(mb_detect_order)1925 PHP_FUNCTION(mb_detect_order)
1926 {
1927 	zval *arg1 = NULL;
1928 
1929 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
1930 		return;
1931 	}
1932 
1933 	if (!arg1) {
1934 		size_t i;
1935 		size_t n = MBSTRG(current_detect_order_list_size);
1936 		const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1937 		array_init(return_value);
1938 		for (i = 0; i < n; i++) {
1939 			add_next_index_string(return_value, (*entry)->name);
1940 			entry++;
1941 		}
1942 	} else {
1943 		const mbfl_encoding **list = NULL;
1944 		size_t size = 0;
1945 		switch (Z_TYPE_P(arg1)) {
1946 			case IS_ARRAY:
1947 				if (FAILURE == php_mb_parse_encoding_array(arg1, &list, &size, 0)) {
1948 					if (list) {
1949 						efree(list);
1950 					}
1951 					RETURN_FALSE;
1952 				}
1953 				break;
1954 			default:
1955 				convert_to_string_ex(arg1);
1956 				if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(arg1), Z_STRLEN_P(arg1), &list, &size, 0)) {
1957 					if (list) {
1958 						efree(list);
1959 					}
1960 					RETURN_FALSE;
1961 				}
1962 				break;
1963 		}
1964 
1965 		if (list == NULL) {
1966 			RETURN_FALSE;
1967 		}
1968 
1969 		if (MBSTRG(current_detect_order_list)) {
1970 			efree(MBSTRG(current_detect_order_list));
1971 		}
1972 		MBSTRG(current_detect_order_list) = list;
1973 		MBSTRG(current_detect_order_list_size) = size;
1974 		RETURN_TRUE;
1975 	}
1976 }
1977 /* }}} */
1978 
php_mb_check_code_point(long cp)1979 static inline int php_mb_check_code_point(long cp)
1980 {
1981 	if (cp <= 0 || cp >= 0x110000) {
1982 		/* Out of Unicode range */
1983 		return 0;
1984 	}
1985 
1986 	if (cp >= 0xd800 && cp <= 0xdfff) {
1987 		/* Surrogate code-point. These are never valid on their own and we only allow a single
1988 		 * substitute character. */
1989 		return 0;
1990 	}
1991 
1992 	/* As the we do not know the target encoding of the conversion operation that is going to
1993 	 * use the substitution character, we cannot check whether the codepoint is actually mapped
1994 	 * in the given encoding at this point. Thus we have to accept everything. */
1995 	return 1;
1996 }
1997 
1998 /* {{{ proto mixed mb_substitute_character([mixed substchar])
1999    Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)2000 PHP_FUNCTION(mb_substitute_character)
2001 {
2002 	zval *arg1 = NULL;
2003 
2004 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
2005 		return;
2006 	}
2007 
2008 	if (!arg1) {
2009 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
2010 			RETURN_STRING("none");
2011 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
2012 			RETURN_STRING("long");
2013 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
2014 			RETURN_STRING("entity");
2015 		} else {
2016 			RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
2017 		}
2018 	} else {
2019 		RETVAL_TRUE;
2020 
2021 		switch (Z_TYPE_P(arg1)) {
2022 			case IS_STRING:
2023 				if (strncasecmp("none", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2024 					MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
2025 				} else if (strncasecmp("long", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2026 					MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
2027 				} else if (strncasecmp("entity", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2028 					MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
2029 				} else {
2030 					convert_to_long_ex(arg1);
2031 
2032 					if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
2033 						MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2034 						MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2035 					} else {
2036 						php_error_docref(NULL, E_WARNING, "Unknown character");
2037 						RETURN_FALSE;
2038 					}
2039 				}
2040 				break;
2041 			default:
2042 				convert_to_long_ex(arg1);
2043 				if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
2044 					MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2045 					MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2046 				} else {
2047 					php_error_docref(NULL, E_WARNING, "Unknown character");
2048 					RETURN_FALSE;
2049 				}
2050 				break;
2051 		}
2052 	}
2053 }
2054 /* }}} */
2055 
2056 /* {{{ proto string mb_preferred_mime_name(string encoding)
2057    Return the preferred MIME name (charset) as a string */
PHP_FUNCTION(mb_preferred_mime_name)2058 PHP_FUNCTION(mb_preferred_mime_name)
2059 {
2060 	enum mbfl_no_encoding no_encoding;
2061 	char *name = NULL;
2062 	size_t name_len;
2063 
2064 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
2065 		return;
2066 	} else {
2067 		no_encoding = mbfl_name2no_encoding(name);
2068 		if (no_encoding == mbfl_no_encoding_invalid) {
2069 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
2070 			RETVAL_FALSE;
2071 		} else {
2072 			const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
2073 			if (preferred_name == NULL || *preferred_name == '\0') {
2074 				php_error_docref(NULL, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
2075 				RETVAL_FALSE;
2076 			} else {
2077 				RETVAL_STRING((char *)preferred_name);
2078 			}
2079 		}
2080 	}
2081 }
2082 /* }}} */
2083 
2084 #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
2085 #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
2086 
2087 /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
2088    Parses GET/POST/COOKIE data and sets global variables */
PHP_FUNCTION(mb_parse_str)2089 PHP_FUNCTION(mb_parse_str)
2090 {
2091 	zval *track_vars_array = NULL;
2092 	char *encstr = NULL;
2093 	size_t encstr_len;
2094 	php_mb_encoding_handler_info_t info;
2095 	const mbfl_encoding *detected;
2096 
2097 	track_vars_array = NULL;
2098 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z/", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
2099 		return;
2100 	}
2101 
2102 	if (track_vars_array != NULL) {
2103 		/* Clear out the array */
2104 		zval_dtor(track_vars_array);
2105 		array_init(track_vars_array);
2106 	}
2107 
2108 	encstr = estrndup(encstr, encstr_len);
2109 
2110 	info.data_type              = PARSE_STRING;
2111 	info.separator              = PG(arg_separator).input;
2112 	info.report_errors          = 1;
2113 	info.to_encoding            = MBSTRG(current_internal_encoding);
2114 	info.to_language            = MBSTRG(language);
2115 	info.from_encodings         = MBSTRG(http_input_list);
2116 	info.num_from_encodings     = MBSTRG(http_input_list_size);
2117 	info.from_language          = MBSTRG(language);
2118 
2119 	if (track_vars_array != NULL) {
2120 		detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr);
2121 	} else {
2122 		zval tmp;
2123 		zend_array *symbol_table;
2124 		if (zend_forbid_dynamic_call("mb_parse_str() with a single argument") == FAILURE) {
2125 			efree(encstr);
2126 			return;
2127 		}
2128 
2129 		php_error_docref(NULL, E_DEPRECATED, "Calling mb_parse_str() without the result argument is deprecated");
2130 
2131 		symbol_table = zend_rebuild_symbol_table();
2132 		ZVAL_ARR(&tmp, symbol_table);
2133 		detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr);
2134 	}
2135 
2136 	MBSTRG(http_input_identify) = detected;
2137 
2138 	RETVAL_BOOL(detected);
2139 
2140 	if (encstr != NULL) efree(encstr);
2141 }
2142 /* }}} */
2143 
2144 /* {{{ proto string mb_output_handler(string contents, int status)
2145    Returns string in output buffer converted to the http_output encoding */
PHP_FUNCTION(mb_output_handler)2146 PHP_FUNCTION(mb_output_handler)
2147 {
2148 	char *arg_string;
2149 	size_t arg_string_len;
2150 	zend_long arg_status;
2151 	mbfl_string string, result;
2152 	const char *charset;
2153 	char *p;
2154 	const mbfl_encoding *encoding;
2155 	int last_feed, len;
2156 	unsigned char send_text_mimetype = 0;
2157 	char *s, *mimetype = NULL;
2158 
2159 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
2160 		return;
2161 	}
2162 
2163 	encoding = MBSTRG(current_http_output_encoding);
2164 
2165  	/* start phase only */
2166  	if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
2167  		/* delete the converter just in case. */
2168  		if (MBSTRG(outconv)) {
2169 			MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2170  			mbfl_buffer_converter_delete(MBSTRG(outconv));
2171  			MBSTRG(outconv) = NULL;
2172   		}
2173 		if (encoding == &mbfl_encoding_pass) {
2174 			RETURN_STRINGL(arg_string, arg_string_len);
2175 		}
2176 
2177 		/* analyze mime type */
2178 		if (SG(sapi_headers).mimetype &&
2179 			_php_mb_match_regex(
2180 				MBSTRG(http_output_conv_mimetypes),
2181 				SG(sapi_headers).mimetype,
2182 				strlen(SG(sapi_headers).mimetype))) {
2183 			if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
2184 				mimetype = estrdup(SG(sapi_headers).mimetype);
2185 			} else {
2186 				mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
2187 			}
2188 			send_text_mimetype = 1;
2189 		} else if (SG(sapi_headers).send_default_content_type) {
2190 			mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
2191 		}
2192 
2193  		/* if content-type is not yet set, set it and activate the converter */
2194  		if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
2195 			charset = encoding->mime_name;
2196 			if (charset) {
2197 				len = spprintf( &p, 0, "Content-Type: %s; charset=%s",  mimetype, charset );
2198 				if (sapi_add_header(p, len, 0) != FAILURE) {
2199 					SG(sapi_headers).send_default_content_type = 0;
2200 				}
2201 			}
2202  			/* activate the converter */
2203  			MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0);
2204 			if (send_text_mimetype){
2205 				efree(mimetype);
2206 			}
2207  		}
2208   	}
2209 
2210  	/* just return if the converter is not activated. */
2211  	if (MBSTRG(outconv) == NULL) {
2212 		RETURN_STRINGL(arg_string, arg_string_len);
2213 	}
2214 
2215  	/* flag */
2216  	last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
2217  	/* mode */
2218  	mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
2219  	mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
2220 
2221  	/* feed the string */
2222  	mbfl_string_init(&string);
2223 	/* these are not needed. convd has encoding info.
2224 	string.no_language = MBSTRG(language);
2225 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2226 	*/
2227  	string.val = (unsigned char *)arg_string;
2228  	string.len = arg_string_len;
2229  	mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2230  	if (last_feed) {
2231  		mbfl_buffer_converter_flush(MBSTRG(outconv));
2232 	}
2233  	/* get the converter output, and return it */
2234  	mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2235 	// TODO: avoid reallocation ???
2236  	RETVAL_STRINGL((char *)result.val, result.len);		/* the string is already strdup()'ed */
2237 	efree(result.val);
2238 
2239  	/* delete the converter if it is the last feed. */
2240  	if (last_feed) {
2241 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2242 		mbfl_buffer_converter_delete(MBSTRG(outconv));
2243 		MBSTRG(outconv) = NULL;
2244 	}
2245 }
2246 /* }}} */
2247 
2248 /* {{{ proto int mb_strlen(string str [, string encoding])
2249    Get character numbers of a string */
PHP_FUNCTION(mb_strlen)2250 PHP_FUNCTION(mb_strlen)
2251 {
2252 	int n;
2253 	mbfl_string string;
2254 	char *enc_name = NULL;
2255 	size_t enc_name_len, string_len;
2256 
2257 	mbfl_string_init(&string);
2258 
2259 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string_len, &enc_name, &enc_name_len) == FAILURE) {
2260 		return;
2261 	}
2262 
2263 	if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
2264 			php_error_docref(NULL, E_WARNING, "String overflows the max allowed length of %u", UINT_MAX);
2265 			return;
2266 	}
2267 
2268 	string.len = (uint32_t)string_len;
2269 
2270 	string.no_language = MBSTRG(language);
2271 	if (enc_name == NULL) {
2272 		string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2273 	} else {
2274 		string.no_encoding = mbfl_name2no_encoding(enc_name);
2275 		if (string.no_encoding == mbfl_no_encoding_invalid) {
2276 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2277 			RETURN_FALSE;
2278 		}
2279 	}
2280 
2281 	n = mbfl_strlen(&string);
2282 	if (n >= 0) {
2283 		RETVAL_LONG(n);
2284 	} else {
2285 		RETVAL_FALSE;
2286 	}
2287 }
2288 /* }}} */
2289 
2290 /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2291    Find position of first occurrence of a string within another */
PHP_FUNCTION(mb_strpos)2292 PHP_FUNCTION(mb_strpos)
2293 {
2294 	int n, reverse = 0;
2295 	zend_long offset = 0, slen;
2296 	mbfl_string haystack, needle;
2297 	char *enc_name = NULL;
2298 	size_t enc_name_len, haystack_len, needle_len;
2299 
2300 	mbfl_string_init(&haystack);
2301 	mbfl_string_init(&needle);
2302 	haystack.no_language = MBSTRG(language);
2303 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2304 	needle.no_language = MBSTRG(language);
2305 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2306 
2307 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2308 		return;
2309 	}
2310 
2311 	if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2312 			php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2313 			return;
2314 	} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2315 			php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2316 			return;
2317 	}
2318 
2319 	haystack.len = (uint32_t)haystack_len;
2320 	needle.len = (uint32_t)needle_len;
2321 
2322 	if (enc_name != NULL) {
2323 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2324 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2325 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2326 			RETURN_FALSE;
2327 		}
2328 	}
2329 
2330 	slen = mbfl_strlen(&haystack);
2331 	if (offset < 0) {
2332 		offset += slen;
2333 	}
2334 	if (offset < 0 || offset > slen) {
2335 		php_error_docref(NULL, E_WARNING, "Offset not contained in string");
2336 		RETURN_FALSE;
2337 	}
2338 	if (needle.len == 0) {
2339 		php_error_docref(NULL, E_WARNING, "Empty delimiter");
2340 		RETURN_FALSE;
2341 	}
2342 
2343 	n = mbfl_strpos(&haystack, &needle, offset, reverse);
2344 	if (n >= 0) {
2345 		RETVAL_LONG(n);
2346 	} else {
2347 		switch (-n) {
2348 		case 1:
2349 			break;
2350 		case 2:
2351 			php_error_docref(NULL, E_WARNING, "Needle has not positive length");
2352 			break;
2353 		case 4:
2354 			php_error_docref(NULL, E_WARNING, "Unknown encoding or conversion error");
2355 			break;
2356 		case 8:
2357 			php_error_docref(NULL, E_NOTICE, "Argument is empty");
2358 			break;
2359 		default:
2360 			php_error_docref(NULL, E_WARNING, "Unknown error in mb_strpos");
2361 			break;
2362 		}
2363 		RETVAL_FALSE;
2364 	}
2365 }
2366 /* }}} */
2367 
2368 /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2369    Find position of last occurrence of a string within another */
PHP_FUNCTION(mb_strrpos)2370 PHP_FUNCTION(mb_strrpos)
2371 {
2372 	int n;
2373 	mbfl_string haystack, needle;
2374 	char *enc_name = NULL;
2375 	size_t enc_name_len, haystack_len, needle_len;
2376 	zval *zoffset = NULL;
2377 	long offset = 0, str_flg;
2378 	char *enc_name2 = NULL;
2379 	int enc_name_len2;
2380 
2381 	mbfl_string_init(&haystack);
2382 	mbfl_string_init(&needle);
2383 	haystack.no_language = MBSTRG(language);
2384 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2385 	needle.no_language = MBSTRG(language);
2386 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2387 
2388 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|zs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2389 		return;
2390 	}
2391 
2392 	if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2393 			php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2394 			return;
2395 	} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2396 			php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2397 			return;
2398 	}
2399 
2400 	haystack.len = (uint32_t)haystack_len;
2401 	needle.len = (uint32_t)needle_len;
2402 
2403 	if (zoffset) {
2404 		if (Z_TYPE_P(zoffset) == IS_STRING) {
2405 			enc_name2     = Z_STRVAL_P(zoffset);
2406 			enc_name_len2 = Z_STRLEN_P(zoffset);
2407 			str_flg       = 1;
2408 
2409 			if (enc_name2 != NULL) {
2410 				switch (*enc_name2) {
2411 					case '0':
2412 					case '1':
2413 					case '2':
2414 					case '3':
2415 					case '4':
2416 					case '5':
2417 					case '6':
2418 					case '7':
2419 					case '8':
2420 					case '9':
2421 					case ' ':
2422 					case '-':
2423 					case '.':
2424 						break;
2425 					default :
2426 						str_flg = 0;
2427 						break;
2428 				}
2429 			}
2430 
2431 			if (str_flg) {
2432 				convert_to_long_ex(zoffset);
2433 				offset   = Z_LVAL_P(zoffset);
2434 			} else {
2435 				enc_name     = enc_name2;
2436 				enc_name_len = enc_name_len2;
2437 			}
2438 		} else {
2439 			convert_to_long_ex(zoffset);
2440 			offset = Z_LVAL_P(zoffset);
2441 		}
2442 	}
2443 
2444 	if (enc_name != NULL) {
2445 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2446 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2447 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2448 			RETURN_FALSE;
2449 		}
2450 	}
2451 
2452 	if (haystack.len <= 0) {
2453 		RETURN_FALSE;
2454 	}
2455 	if (needle.len <= 0) {
2456 		RETURN_FALSE;
2457 	}
2458 
2459 	{
2460 		int haystack_char_len = mbfl_strlen(&haystack);
2461 		if ((offset > 0 && offset > haystack_char_len) ||
2462 			(offset < 0 && -offset > haystack_char_len)) {
2463 			php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
2464 			RETURN_FALSE;
2465 		}
2466 	}
2467 
2468 	n = mbfl_strpos(&haystack, &needle, offset, 1);
2469 	if (n >= 0) {
2470 		RETVAL_LONG(n);
2471 	} else {
2472 		RETVAL_FALSE;
2473 	}
2474 }
2475 /* }}} */
2476 
2477 /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2478    Finds position of first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stripos)2479 PHP_FUNCTION(mb_stripos)
2480 {
2481 	int n = -1;
2482 	zend_long offset = 0;
2483 	mbfl_string haystack, needle;
2484 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2485 	size_t from_encoding_len, haystack_len, needle_len;
2486 
2487 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2488 		return;
2489 	}
2490 
2491 	if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2492 			php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2493 			return;
2494 	} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2495 			php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2496 			return;
2497 	}
2498 
2499 	haystack.len = (uint32_t)haystack_len;
2500 	needle.len = (uint32_t)needle_len;
2501 
2502 	if (needle.len == 0) {
2503 		php_error_docref(NULL, E_WARNING, "Empty delimiter");
2504 		RETURN_FALSE;
2505 	}
2506 	n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2507 
2508 	if (n >= 0) {
2509 		RETVAL_LONG(n);
2510 	} else {
2511 		RETVAL_FALSE;
2512 	}
2513 }
2514 /* }}} */
2515 
2516 /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2517    Finds position of last occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_strripos)2518 PHP_FUNCTION(mb_strripos)
2519 {
2520 	int n = -1;
2521 	zend_long offset = 0;
2522 	mbfl_string haystack, needle;
2523 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2524 	size_t from_encoding_len, haystack_len, needle_len;
2525 
2526 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2527 		return;
2528 	}
2529 
2530 	if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2531 			php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2532 			return;
2533 	} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2534 			php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2535 			return;
2536 	}
2537 
2538 	haystack.len = (uint32_t)haystack_len;
2539 	needle.len = (uint32_t)needle_len;
2540 
2541 	n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2542 
2543 	if (n >= 0) {
2544 		RETVAL_LONG(n);
2545 	} else {
2546 		RETVAL_FALSE;
2547 	}
2548 }
2549 /* }}} */
2550 
2551 /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2552    Finds first occurrence of a string within another */
PHP_FUNCTION(mb_strstr)2553 PHP_FUNCTION(mb_strstr)
2554 {
2555 	int n, len, mblen;
2556 	mbfl_string haystack, needle, result, *ret = NULL;
2557 	char *enc_name = NULL;
2558 	size_t enc_name_len, haystack_len, needle_len;
2559 	zend_bool part = 0;
2560 
2561 	mbfl_string_init(&haystack);
2562 	mbfl_string_init(&needle);
2563 	haystack.no_language = MBSTRG(language);
2564 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2565 	needle.no_language = MBSTRG(language);
2566 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2567 
2568 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &enc_name, &enc_name_len) == FAILURE) {
2569 		return;
2570 	}
2571 
2572 	if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2573 			php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2574 			return;
2575 	} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2576 			php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2577 			return;
2578 	}
2579 
2580 	haystack.len = (uint32_t)haystack_len;
2581 	needle.len = (uint32_t)needle_len;
2582 
2583 	if (enc_name != NULL) {
2584 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2585 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2586 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2587 			RETURN_FALSE;
2588 		}
2589 	}
2590 
2591 	if (needle.len <= 0) {
2592 		php_error_docref(NULL, E_WARNING, "Empty delimiter");
2593 		RETURN_FALSE;
2594 	}
2595 	n = mbfl_strpos(&haystack, &needle, 0, 0);
2596 	if (n >= 0) {
2597 		mblen = mbfl_strlen(&haystack);
2598 		if (part) {
2599 			ret = mbfl_substr(&haystack, &result, 0, n);
2600 			if (ret != NULL) {
2601 				// TODO: avoid reallocation ???
2602 				RETVAL_STRINGL((char *)ret->val, ret->len);
2603 				efree(ret->val);
2604 			} else {
2605 				RETVAL_FALSE;
2606 			}
2607 		} else {
2608 			len = (mblen - n);
2609 			ret = mbfl_substr(&haystack, &result, n, len);
2610 			if (ret != NULL) {
2611 				// TODO: avoid reallocation ???
2612 				RETVAL_STRINGL((char *)ret->val, ret->len);
2613 				efree(ret->val);
2614 			} else {
2615 				RETVAL_FALSE;
2616 			}
2617 		}
2618 	} else {
2619 		RETVAL_FALSE;
2620 	}
2621 }
2622 /* }}} */
2623 
2624 /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2625    Finds the last occurrence of a character in a string within another */
PHP_FUNCTION(mb_strrchr)2626 PHP_FUNCTION(mb_strrchr)
2627 {
2628 	int n, len, mblen;
2629 	mbfl_string haystack, needle, result, *ret = NULL;
2630 	char *enc_name = NULL;
2631 	size_t enc_name_len, haystack_len, needle_len;
2632 	zend_bool part = 0;
2633 
2634 	mbfl_string_init(&haystack);
2635 	mbfl_string_init(&needle);
2636 	haystack.no_language = MBSTRG(language);
2637 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2638 	needle.no_language = MBSTRG(language);
2639 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2640 
2641 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &enc_name, &enc_name_len) == FAILURE) {
2642 		return;
2643 	}
2644 
2645 	if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2646 			php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2647 			return;
2648 	} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2649 			php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2650 			return;
2651 	}
2652 
2653 	haystack.len = (uint32_t)haystack_len;
2654 	needle.len = (uint32_t)needle_len;
2655 
2656 	if (enc_name != NULL) {
2657 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2658 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2659 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2660 			RETURN_FALSE;
2661 		}
2662 	}
2663 
2664 	if (haystack.len <= 0) {
2665 		RETURN_FALSE;
2666 	}
2667 	if (needle.len <= 0) {
2668 		RETURN_FALSE;
2669 	}
2670 	n = mbfl_strpos(&haystack, &needle, 0, 1);
2671 	if (n >= 0) {
2672 		mblen = mbfl_strlen(&haystack);
2673 		if (part) {
2674 			ret = mbfl_substr(&haystack, &result, 0, n);
2675 			if (ret != NULL) {
2676 				// TODO: avoid reallocation ???
2677 				RETVAL_STRINGL((char *)ret->val, ret->len);
2678 				efree(ret->val);
2679 			} else {
2680 				RETVAL_FALSE;
2681 			}
2682 		} else {
2683 			len = (mblen - n);
2684 			ret = mbfl_substr(&haystack, &result, n, len);
2685 			if (ret != NULL) {
2686 				// TODO: avoid reallocation ???
2687 				RETVAL_STRINGL((char *)ret->val, ret->len);
2688 				efree(ret->val);
2689 			} else {
2690 				RETVAL_FALSE;
2691 			}
2692 		}
2693 	} else {
2694 		RETVAL_FALSE;
2695 	}
2696 }
2697 /* }}} */
2698 
2699 /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2700    Finds first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stristr)2701 PHP_FUNCTION(mb_stristr)
2702 {
2703 	zend_bool part = 0;
2704 	size_t from_encoding_len, len, mblen, haystack_len, needle_len;
2705 	int n;
2706 	mbfl_string haystack, needle, result, *ret = NULL;
2707 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2708 	mbfl_string_init(&haystack);
2709 	mbfl_string_init(&needle);
2710 	haystack.no_language = MBSTRG(language);
2711 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2712 	needle.no_language = MBSTRG(language);
2713 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2714 
2715 
2716 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2717 		return;
2718 	}
2719 
2720 	if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2721 			php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2722 			return;
2723 	} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2724 			php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2725 			return;
2726 	}
2727 
2728 	haystack.len = (uint32_t)haystack_len;
2729 	needle.len = (uint32_t)needle_len;
2730 
2731 	if (!needle.len) {
2732 		php_error_docref(NULL, E_WARNING, "Empty delimiter");
2733 		RETURN_FALSE;
2734 	}
2735 
2736 	haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2737 	if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2738 		php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2739 		RETURN_FALSE;
2740 	}
2741 
2742 	n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2743 
2744 	if (n <0) {
2745 		RETURN_FALSE;
2746 	}
2747 
2748 	mblen = mbfl_strlen(&haystack);
2749 
2750 	if (part) {
2751 		ret = mbfl_substr(&haystack, &result, 0, n);
2752 		if (ret != NULL) {
2753 			// TODO: avoid reallocation ???
2754 			RETVAL_STRINGL((char *)ret->val, ret->len);
2755 			efree(ret->val);
2756 		} else {
2757 			RETVAL_FALSE;
2758 		}
2759 	} else {
2760 		len = (mblen - n);
2761 		ret = mbfl_substr(&haystack, &result, n, len);
2762 		if (ret != NULL) {
2763 			// TODO: avoid reallocaton ???
2764 			RETVAL_STRINGL((char *)ret->val, ret->len);
2765 			efree(ret->val);
2766 		} else {
2767 			RETVAL_FALSE;
2768 		}
2769 	}
2770 }
2771 /* }}} */
2772 
2773 /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2774    Finds the last occurrence of a character in a string within another, case insensitive */
PHP_FUNCTION(mb_strrichr)2775 PHP_FUNCTION(mb_strrichr)
2776 {
2777 	zend_bool part = 0;
2778 	int n, len, mblen;
2779 	size_t from_encoding_len, haystack_len, needle_len;
2780 	mbfl_string haystack, needle, result, *ret = NULL;
2781 	const char *from_encoding = MBSTRG(current_internal_encoding)->name;
2782 	mbfl_string_init(&haystack);
2783 	mbfl_string_init(&needle);
2784 	haystack.no_language = MBSTRG(language);
2785 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2786 	needle.no_language = MBSTRG(language);
2787 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2788 
2789 
2790 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2791 		return;
2792 	}
2793 
2794 	if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2795 			php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2796 			return;
2797 	} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2798 			php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2799 			return;
2800 	}
2801 
2802 	haystack.len = (uint32_t)haystack_len;
2803 	needle.len = (uint32_t)needle_len;
2804 
2805 	haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2806 	if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2807 		php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2808 		RETURN_FALSE;
2809 	}
2810 
2811 	n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2812 
2813 	if (n <0) {
2814 		RETURN_FALSE;
2815 	}
2816 
2817 	mblen = mbfl_strlen(&haystack);
2818 
2819 	if (part) {
2820 		ret = mbfl_substr(&haystack, &result, 0, n);
2821 		if (ret != NULL) {
2822 			// TODO: avoid reallocation ???
2823 			RETVAL_STRINGL((char *)ret->val, ret->len);
2824 			efree(ret->val);
2825 		} else {
2826 			RETVAL_FALSE;
2827 		}
2828 	} else {
2829 		len = (mblen - n);
2830 		ret = mbfl_substr(&haystack, &result, n, len);
2831 		if (ret != NULL) {
2832 			// TODO: avoid reallocation ???
2833 			RETVAL_STRINGL((char *)ret->val, ret->len);
2834 			efree(ret->val);
2835 		} else {
2836 			RETVAL_FALSE;
2837 		}
2838 	}
2839 }
2840 /* }}} */
2841 
2842 /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2843    Count the number of substring occurrences */
PHP_FUNCTION(mb_substr_count)2844 PHP_FUNCTION(mb_substr_count)
2845 {
2846 	int n;
2847 	mbfl_string haystack, needle;
2848 	char *enc_name = NULL;
2849 	size_t enc_name_len, haystack_len, needle_len;
2850 
2851 	mbfl_string_init(&haystack);
2852 	mbfl_string_init(&needle);
2853 	haystack.no_language = MBSTRG(language);
2854 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2855 	needle.no_language = MBSTRG(language);
2856 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2857 
2858 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &enc_name, &enc_name_len) == FAILURE) {
2859 		return;
2860 	}
2861 
2862 	if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2863 			php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2864 			return;
2865 	} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2866 			php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2867 			return;
2868 	}
2869 
2870 	haystack.len = (uint32_t)haystack_len;
2871 	needle.len = (uint32_t)needle_len;
2872 
2873 	if (enc_name != NULL) {
2874 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2875 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2876 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2877 			RETURN_FALSE;
2878 		}
2879 	}
2880 
2881 	if (needle.len <= 0) {
2882 		php_error_docref(NULL, E_WARNING, "Empty substring");
2883 		RETURN_FALSE;
2884 	}
2885 
2886 	n = mbfl_substr_count(&haystack, &needle);
2887 	if (n >= 0) {
2888 		RETVAL_LONG(n);
2889 	} else {
2890 		RETVAL_FALSE;
2891 	}
2892 }
2893 /* }}} */
2894 
2895 /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2896    Returns part of a string */
PHP_FUNCTION(mb_substr)2897 PHP_FUNCTION(mb_substr)
2898 {
2899 	char *str, *encoding = NULL;
2900 	zend_long from, len;
2901 	int mblen;
2902 	size_t str_len, encoding_len;
2903 	zend_bool len_is_null = 1;
2904 	mbfl_string string, result, *ret;
2905 
2906 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", &str, &str_len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) {
2907 		return;
2908 	}
2909 
2910 	mbfl_string_init(&string);
2911 	string.no_language = MBSTRG(language);
2912 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2913 
2914 	if (encoding) {
2915 		string.no_encoding = mbfl_name2no_encoding(encoding);
2916 		if (string.no_encoding == mbfl_no_encoding_invalid) {
2917 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
2918 			RETURN_FALSE;
2919 		}
2920 	}
2921 
2922 	string.val = (unsigned char *)str;
2923 	string.len = str_len;
2924 
2925 	if (len_is_null) {
2926 		len = str_len;
2927 	}
2928 
2929 	/* measures length */
2930 	mblen = 0;
2931 	if (from < 0 || len < 0) {
2932 		mblen = mbfl_strlen(&string);
2933 	}
2934 
2935 	/* if "from" position is negative, count start position from the end
2936 	 * of the string
2937 	 */
2938 	if (from < 0) {
2939 		from = mblen + from;
2940 		if (from < 0) {
2941 			from = 0;
2942 		}
2943 	}
2944 
2945 	/* if "length" position is negative, set it to the length
2946 	 * needed to stop that many chars from the end of the string
2947 	 */
2948 	if (len < 0) {
2949 		len = (mblen - from) + len;
2950 		if (len < 0) {
2951 			len = 0;
2952 		}
2953 	}
2954 
2955 	if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2956 		&& (from >= mbfl_strlen(&string))) {
2957 		RETURN_FALSE;
2958 	}
2959 
2960 	if (from > INT_MAX) {
2961 		from = INT_MAX;
2962 	}
2963 	if (len > INT_MAX) {
2964 		len = INT_MAX;
2965 	}
2966 
2967 	ret = mbfl_substr(&string, &result, from, len);
2968 	if (NULL == ret) {
2969 		RETURN_FALSE;
2970 	}
2971 
2972 	// TODO: avoid reallocation ???
2973 	RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2974 	efree(ret->val);
2975 }
2976 /* }}} */
2977 
2978 /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
2979    Returns part of a string */
PHP_FUNCTION(mb_strcut)2980 PHP_FUNCTION(mb_strcut)
2981 {
2982 	char *encoding = NULL;
2983 	zend_long from, len;
2984 	size_t encoding_len, string_len;
2985 	zend_bool len_is_null = 1;
2986 	mbfl_string string, result, *ret;
2987 
2988 	mbfl_string_init(&string);
2989 	string.no_language = MBSTRG(language);
2990 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2991 
2992 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", (char **)&string.val, &string_len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) {
2993 		return;
2994 	}
2995 
2996 	if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
2997 			php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
2998 			return;
2999 	}
3000 
3001 	string.len = (uint32_t)string_len;
3002 
3003 	if (encoding) {
3004 		string.no_encoding = mbfl_name2no_encoding(encoding);
3005 		if (string.no_encoding == mbfl_no_encoding_invalid) {
3006 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
3007 			RETURN_FALSE;
3008 		}
3009 	}
3010 
3011 	if (len_is_null) {
3012 		len = string.len;
3013 	}
3014 
3015 	/* if "from" position is negative, count start position from the end
3016 	 * of the string
3017 	 */
3018 	if (from < 0) {
3019 		from = string.len + from;
3020 		if (from < 0) {
3021 			from = 0;
3022 		}
3023 	}
3024 
3025 	/* if "length" position is negative, set it to the length
3026 	 * needed to stop that many chars from the end of the string
3027 	 */
3028 	if (len < 0) {
3029 		len = (string.len - from) + len;
3030 		if (len < 0) {
3031 			len = 0;
3032 		}
3033 	}
3034 
3035 	if ((unsigned int)from > string.len) {
3036 		RETURN_FALSE;
3037 	}
3038 
3039 	ret = mbfl_strcut(&string, &result, from, len);
3040 	if (ret == NULL) {
3041 		RETURN_FALSE;
3042 	}
3043 
3044 	// TODO: avoid reallocation ???
3045 	RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3046 	efree(ret->val);
3047 }
3048 /* }}} */
3049 
3050 /* {{{ proto int mb_strwidth(string str [, string encoding])
3051    Gets terminal width of a string */
PHP_FUNCTION(mb_strwidth)3052 PHP_FUNCTION(mb_strwidth)
3053 {
3054 	int n;
3055 	mbfl_string string;
3056 	char *enc_name = NULL;
3057 	size_t enc_name_len, string_len;
3058 
3059 	mbfl_string_init(&string);
3060 
3061 	string.no_language = MBSTRG(language);
3062 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3063 
3064 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string_len, &enc_name, &enc_name_len) == FAILURE) {
3065 		return;
3066 	}
3067 
3068 	if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
3069 			php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
3070 			return;
3071 	}
3072 
3073 	string.len = (uint32_t)string_len;
3074 
3075 	if (enc_name != NULL) {
3076 		string.no_encoding = mbfl_name2no_encoding(enc_name);
3077 		if (string.no_encoding == mbfl_no_encoding_invalid) {
3078 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
3079 			RETURN_FALSE;
3080 		}
3081 	}
3082 
3083 	n = mbfl_strwidth(&string);
3084 	if (n >= 0) {
3085 		RETVAL_LONG(n);
3086 	} else {
3087 		RETVAL_FALSE;
3088 	}
3089 }
3090 /* }}} */
3091 
3092 /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
3093    Trim the string in terminal width */
PHP_FUNCTION(mb_strimwidth)3094 PHP_FUNCTION(mb_strimwidth)
3095 {
3096 	char *str, *trimmarker = NULL, *encoding = NULL;
3097 	zend_long from, width, swidth;
3098 	size_t str_len, trimmarker_len, encoding_len;
3099 	mbfl_string string, result, marker, *ret;
3100 
3101 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
3102 		return;
3103 	}
3104 
3105 	mbfl_string_init(&string);
3106 	mbfl_string_init(&marker);
3107 	string.no_language = MBSTRG(language);
3108 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3109 	marker.no_language = MBSTRG(language);
3110 	marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3111 	marker.val = NULL;
3112 	marker.len = 0;
3113 
3114 	if (encoding) {
3115 		string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
3116 		if (string.no_encoding == mbfl_no_encoding_invalid) {
3117 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
3118 			RETURN_FALSE;
3119 		}
3120 	}
3121 
3122 	string.val = (unsigned char *)str;
3123 	string.len = str_len;
3124 
3125 	if ((from < 0) || (width < 0)) {
3126 		swidth = mbfl_strwidth(&string);
3127 	}
3128 
3129 	if (from < 0) {
3130 		from += swidth;
3131 	}
3132 
3133 	if (from < 0 || (size_t)from > str_len) {
3134 		php_error_docref(NULL, E_WARNING, "Start position is out of range");
3135 		RETURN_FALSE;
3136 	}
3137 
3138 	if (width < 0) {
3139 		width = swidth + width - from;
3140 	}
3141 
3142 	if (width < 0) {
3143 		php_error_docref(NULL, E_WARNING, "Width is out of range");
3144 		RETURN_FALSE;
3145 	}
3146 
3147 	if (trimmarker) {
3148 		marker.val = (unsigned char *)trimmarker;
3149 		marker.len = trimmarker_len;
3150 	}
3151 
3152 	ret = mbfl_strimwidth(&string, &marker, &result, from, width);
3153 
3154 	if (ret == NULL) {
3155 		RETURN_FALSE;
3156 	}
3157 	// TODO: avoid reallocation ???
3158 	RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3159 	efree(ret->val);
3160 }
3161 /* }}} */
3162 
3163 
3164 /* See mbfl_no_encoding definition for list of unsupported encodings */
php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)3165 static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)
3166 {
3167 	return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint)
3168 			|| (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap)
3169 			|| (no_enc >= mbfl_no_encoding_jis && no_enc <= mbfl_no_encoding_2022jpms)
3170 			|| (no_enc >= mbfl_no_encoding_cp50220 && no_enc <= mbfl_no_encoding_cp50222));
3171 }
3172 
3173 
3174 /* See mbfl_no_encoding definition for list of UTF-8 encodings */
php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)3175 static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
3176 {
3177 	return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb);
3178 }
3179 
3180 
3181 /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
php_mb_convert_encoding(const char * input,size_t length,const char * _to_encoding,const char * _from_encodings,size_t * output_len)3182 MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
3183 {
3184 	mbfl_string string, result, *ret;
3185 	const mbfl_encoding *from_encoding, *to_encoding;
3186 	mbfl_buffer_converter *convd;
3187 	size_t size;
3188 	const mbfl_encoding **list;
3189 	char *output=NULL;
3190 
3191 	if (output_len) {
3192 		*output_len = 0;
3193 	}
3194 	if (!input) {
3195 		return NULL;
3196 	}
3197 	/* new encoding */
3198 	if (_to_encoding && strlen(_to_encoding)) {
3199 		to_encoding = mbfl_name2encoding(_to_encoding);
3200 		if (!to_encoding) {
3201 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
3202 			return NULL;
3203 		}
3204 	} else {
3205 		to_encoding = MBSTRG(current_internal_encoding);
3206 	}
3207 
3208 	/* initialize string */
3209 	mbfl_string_init(&string);
3210 	mbfl_string_init(&result);
3211 	from_encoding = MBSTRG(current_internal_encoding);
3212 	string.no_encoding = from_encoding->no_encoding;
3213 	string.no_language = MBSTRG(language);
3214 	string.val = (unsigned char *)input;
3215 	string.len = length;
3216 
3217 	/* pre-conversion encoding */
3218 	if (_from_encodings) {
3219 		list = NULL;
3220 		size = 0;
3221 		php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0);
3222 		if (size == 1) {
3223 			from_encoding = *list;
3224 			string.no_encoding = from_encoding->no_encoding;
3225 		} else if (size > 1) {
3226 			/* auto detect */
3227 			from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection));
3228 			if (from_encoding) {
3229 				string.no_encoding = from_encoding->no_encoding;
3230 			} else {
3231 				php_error_docref(NULL, E_WARNING, "Unable to detect character encoding");
3232 				from_encoding = &mbfl_encoding_pass;
3233 				to_encoding = from_encoding;
3234 				string.no_encoding = from_encoding->no_encoding;
3235 			}
3236 		} else {
3237 			php_error_docref(NULL, E_WARNING, "Illegal character encoding specified");
3238 		}
3239 		if (list != NULL) {
3240 			efree((void *)list);
3241 		}
3242 	}
3243 
3244 	/* initialize converter */
3245 	convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len);
3246 	if (convd == NULL) {
3247 		php_error_docref(NULL, E_WARNING, "Unable to create character encoding converter");
3248 		return NULL;
3249 	}
3250 
3251 	mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3252 	mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3253 
3254 	/* do it */
3255 	ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3256 	if (ret) {
3257 		if (output_len) {
3258 			*output_len = ret->len;
3259 		}
3260 		output = (char *)ret->val;
3261 	}
3262 
3263 	MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3264 	mbfl_buffer_converter_delete(convd);
3265 	return output;
3266 }
3267 /* }}} */
3268 
php_mb_convert_encoding_recursive(HashTable * input,const char * _to_encoding,const char * _from_encodings)3269 MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const char *_to_encoding, const char *_from_encodings)
3270 {
3271 	HashTable *output, *chash;
3272 	zend_long idx;
3273 	zend_string *key;
3274 	zval *entry, entry_tmp;
3275 	size_t ckey_len, cval_len;
3276 	char *ckey, *cval;
3277 
3278 	if (!input) {
3279 		return NULL;
3280 	}
3281 
3282 	if (input->u.v.nApplyCount++ > 1) {
3283 		input->u.v.nApplyCount--;
3284 		php_error_docref(NULL, E_WARNING, "Cannot convert recursively referenced values");
3285 		return NULL;
3286 	}
3287 	output = (HashTable *)emalloc(sizeof(HashTable));
3288 	zend_hash_init(output, zend_hash_num_elements(input), NULL, ZVAL_PTR_DTOR, 0);
3289 	ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) {
3290 		/* convert key */
3291 		if (key) {
3292 			ckey = php_mb_convert_encoding(ZSTR_VAL(key), ZSTR_LEN(key), _to_encoding, _from_encodings, &ckey_len);
3293 			key = zend_string_init(ckey, ckey_len, 0);
3294 			efree(ckey);
3295 		}
3296 		/* convert value */
3297 		ZEND_ASSERT(entry);
3298 		switch(Z_TYPE_P(entry)) {
3299 			case IS_STRING:
3300 				cval = php_mb_convert_encoding(Z_STRVAL_P(entry), Z_STRLEN_P(entry), _to_encoding, _from_encodings, &cval_len);
3301 				ZVAL_STRINGL(&entry_tmp, cval, cval_len);
3302 				efree(cval);
3303 				break;
3304 			case IS_NULL:
3305 			case IS_TRUE:
3306 			case IS_FALSE:
3307 			case IS_LONG:
3308 			case IS_DOUBLE:
3309 				ZVAL_COPY(&entry_tmp, entry);
3310 				break;
3311 			case IS_ARRAY:
3312 				chash = php_mb_convert_encoding_recursive(HASH_OF(entry), _to_encoding, _from_encodings);
3313 				if (!chash) {
3314 					chash = (HashTable *)emalloc(sizeof(HashTable));
3315 					zend_hash_init(chash, 0, NULL, ZVAL_PTR_DTOR, 0);
3316 				}
3317 				ZVAL_ARR(&entry_tmp, chash);
3318 				break;
3319 			case IS_OBJECT:
3320 			default:
3321 				if (key) {
3322 					zend_string_release(key);
3323 				}
3324 				php_error_docref(NULL, E_WARNING, "Object is not supported");
3325 				continue;
3326 		}
3327 		if (key) {
3328 			zend_hash_add(output, key, &entry_tmp);
3329 			zend_string_release(key);
3330 		} else {
3331 			zend_hash_index_add(output, idx, &entry_tmp);
3332 		}
3333 	} ZEND_HASH_FOREACH_END();
3334 	input->u.v.nApplyCount--;
3335 
3336 	return output;
3337 }
3338 /* }}} */
3339 
3340 
3341 /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
3342    Returns converted string in desired encoding */
PHP_FUNCTION(mb_convert_encoding)3343 PHP_FUNCTION(mb_convert_encoding)
3344 {
3345 	zval *input;
3346 	char *arg_new;
3347 	size_t new_len;
3348 	zval *arg_old = NULL;
3349 	size_t size, l, n;
3350 	char *_from_encodings = NULL, *ret, *s_free = NULL;
3351 
3352 	zval *hash_entry;
3353 	HashTable *target_hash;
3354 
3355 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z", &input, &arg_new, &new_len, &arg_old) == FAILURE) {
3356 		return;
3357 	}
3358 
3359 	if (Z_TYPE_P(input) != IS_STRING && Z_TYPE_P(input) != IS_ARRAY) {
3360 		convert_to_string(input);
3361 	}
3362 
3363 	if (arg_old) {
3364 		switch (Z_TYPE_P(arg_old)) {
3365 			case IS_ARRAY:
3366 				target_hash = Z_ARRVAL_P(arg_old);
3367 				_from_encodings = NULL;
3368 
3369 				ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
3370 
3371 					convert_to_string_ex(hash_entry);
3372 
3373 					if ( _from_encodings) {
3374 						l = strlen(_from_encodings);
3375 						n = strlen(Z_STRVAL_P(hash_entry));
3376 						_from_encodings = erealloc(_from_encodings, l+n+2);
3377 						memcpy(_from_encodings + l, ",", 1);
3378 						memcpy(_from_encodings + l + 1, Z_STRVAL_P(hash_entry), Z_STRLEN_P(hash_entry) + 1);
3379 					} else {
3380 						_from_encodings = estrdup(Z_STRVAL_P(hash_entry));
3381 					}
3382 				} ZEND_HASH_FOREACH_END();
3383 
3384 				if (_from_encodings != NULL && !strlen(_from_encodings)) {
3385 					efree(_from_encodings);
3386 					_from_encodings = NULL;
3387 				}
3388 				s_free = _from_encodings;
3389 				break;
3390 			default:
3391 				convert_to_string(arg_old);
3392 				_from_encodings = Z_STRVAL_P(arg_old);
3393 				break;
3394 			}
3395 	}
3396 
3397 	if (Z_TYPE_P(input) == IS_STRING) {
3398 		/* new encoding */
3399 		ret = php_mb_convert_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), arg_new, _from_encodings, &size);
3400 		if (ret != NULL) {
3401 			// TODO: avoid reallocation ???
3402 			RETVAL_STRINGL(ret, size);		/* the string is already strdup()'ed */
3403 			efree(ret);
3404 		} else {
3405 			RETVAL_FALSE;
3406 		}
3407 		if (s_free) {
3408 			efree(s_free);
3409 		}
3410 	} else {
3411 		HashTable *tmp;
3412 		tmp = php_mb_convert_encoding_recursive(HASH_OF(input), arg_new, _from_encodings);
3413 		RETURN_ARR(tmp);
3414 	}
3415 
3416 	return;
3417 }
3418 /* }}} */
3419 
3420 /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
3421    Returns a case-folded version of sourcestring */
PHP_FUNCTION(mb_convert_case)3422 PHP_FUNCTION(mb_convert_case)
3423 {
3424 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3425 	char *str;
3426 	size_t str_len, from_encoding_len;
3427 	zend_long case_mode = 0;
3428 	char *newstr;
3429 	size_t ret_len;
3430 
3431 	RETVAL_FALSE;
3432 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|s!", &str, &str_len,
3433 				&case_mode, &from_encoding, &from_encoding_len) == FAILURE) {
3434 		return;
3435 	}
3436 
3437 	newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding);
3438 
3439 	if (newstr) {
3440 		// TODO: avoid reallocation ???
3441 		RETVAL_STRINGL(newstr, ret_len);
3442 		efree(newstr);
3443 	}
3444 }
3445 /* }}} */
3446 
3447 /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
3448  *  Returns a uppercased version of sourcestring
3449  */
PHP_FUNCTION(mb_strtoupper)3450 PHP_FUNCTION(mb_strtoupper)
3451 {
3452 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3453 	char *str;
3454 	size_t str_len, from_encoding_len;
3455 	char *newstr;
3456 	size_t ret_len;
3457 
3458 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len,
3459 				&from_encoding, &from_encoding_len) == FAILURE) {
3460 		return;
3461 	}
3462 	newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding);
3463 
3464 	if (newstr) {
3465 		// TODO: avoid reallocation ???
3466 		RETVAL_STRINGL(newstr, ret_len);
3467 		efree(newstr);
3468 		return;
3469 	}
3470 	RETURN_FALSE;
3471 }
3472 /* }}} */
3473 
3474 /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
3475  *  Returns a lowercased version of sourcestring
3476  */
PHP_FUNCTION(mb_strtolower)3477 PHP_FUNCTION(mb_strtolower)
3478 {
3479 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3480 	char *str;
3481 	size_t str_len, from_encoding_len;
3482 	char *newstr;
3483 	size_t ret_len;
3484 
3485 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len,
3486 				&from_encoding, &from_encoding_len) == FAILURE) {
3487 		return;
3488 	}
3489 	newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding);
3490 
3491 	if (newstr) {
3492 		// TODO: avoid reallocation ???
3493 		RETVAL_STRINGL(newstr, ret_len);
3494 		efree(newstr);
3495 		return;
3496 	}
3497 	RETURN_FALSE;
3498 }
3499 /* }}} */
3500 
3501 /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3502    Encodings of the given string is returned (as a string) */
PHP_FUNCTION(mb_detect_encoding)3503 PHP_FUNCTION(mb_detect_encoding)
3504 {
3505 	char *str;
3506 	size_t str_len;
3507 	zend_bool strict=0;
3508 	zval *encoding_list = NULL;
3509 
3510 	mbfl_string string;
3511 	const mbfl_encoding *ret;
3512 	const mbfl_encoding **elist, **list;
3513 	size_t size;
3514 
3515 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z!b", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3516 		return;
3517 	}
3518 
3519 	/* make encoding list */
3520 	list = NULL;
3521 	size = 0;
3522 	if (encoding_list) {
3523 		switch (Z_TYPE_P(encoding_list)) {
3524 		case IS_ARRAY:
3525 			if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0)) {
3526 				if (list) {
3527 					efree(list);
3528 					list = NULL;
3529 					size = 0;
3530 				}
3531 			}
3532 			break;
3533 		default:
3534 			convert_to_string(encoding_list);
3535 			if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0)) {
3536 				if (list) {
3537 					efree(list);
3538 					list = NULL;
3539 					size = 0;
3540 				}
3541 			}
3542 			break;
3543 		}
3544 		if (size <= 0) {
3545 			php_error_docref(NULL, E_WARNING, "Illegal argument");
3546 		}
3547 	}
3548 
3549 	if (ZEND_NUM_ARGS() < 3) {
3550 		strict = (zend_bool)MBSTRG(strict_detection);
3551 	}
3552 
3553 	if (size > 0 && list != NULL) {
3554 		elist = list;
3555 	} else {
3556 		elist = MBSTRG(current_detect_order_list);
3557 		size = MBSTRG(current_detect_order_list_size);
3558 	}
3559 
3560 	mbfl_string_init(&string);
3561 	string.no_language = MBSTRG(language);
3562 	string.val = (unsigned char *)str;
3563 	string.len = str_len;
3564 	ret = mbfl_identify_encoding2(&string, elist, size, strict);
3565 
3566 	if (list != NULL) {
3567 		efree((void *)list);
3568 	}
3569 
3570 	if (ret == NULL) {
3571 		RETURN_FALSE;
3572 	}
3573 
3574 	RETVAL_STRING((char *)ret->name);
3575 }
3576 /* }}} */
3577 
3578 /* {{{ proto mixed mb_list_encodings()
3579    Returns an array of all supported entity encodings */
PHP_FUNCTION(mb_list_encodings)3580 PHP_FUNCTION(mb_list_encodings)
3581 {
3582 	const mbfl_encoding **encodings;
3583 	const mbfl_encoding *encoding;
3584 	int i;
3585 
3586 	if (zend_parse_parameters_none() == FAILURE) {
3587 		return;
3588 	}
3589 
3590 	array_init(return_value);
3591 	i = 0;
3592 	encodings = mbfl_get_supported_encodings();
3593 	while ((encoding = encodings[i++]) != NULL) {
3594 		add_next_index_string(return_value, (char *) encoding->name);
3595 	}
3596 }
3597 /* }}} */
3598 
3599 /* {{{ proto array mb_encoding_aliases(string encoding)
3600    Returns an array of the aliases of a given encoding name */
PHP_FUNCTION(mb_encoding_aliases)3601 PHP_FUNCTION(mb_encoding_aliases)
3602 {
3603 	const mbfl_encoding *encoding;
3604 	char *name = NULL;
3605 	size_t name_len;
3606 
3607 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
3608 		return;
3609 	}
3610 
3611 	encoding = mbfl_name2encoding(name);
3612 	if (!encoding) {
3613 		php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
3614 		RETURN_FALSE;
3615 	}
3616 
3617 	array_init(return_value);
3618 	if (encoding->aliases != NULL) {
3619 		const char **alias;
3620 		for (alias = *encoding->aliases; *alias; ++alias) {
3621 			add_next_index_string(return_value, (char *)*alias);
3622 		}
3623 	}
3624 }
3625 /* }}} */
3626 
3627 /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3628    Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
PHP_FUNCTION(mb_encode_mimeheader)3629 PHP_FUNCTION(mb_encode_mimeheader)
3630 {
3631 	enum mbfl_no_encoding charset, transenc;
3632 	mbfl_string  string, result, *ret;
3633 	char *charset_name = NULL;
3634 	size_t charset_name_len;
3635 	char *trans_enc_name = NULL;
3636 	size_t trans_enc_name_len;
3637 	char *linefeed = "\r\n";
3638 	size_t linefeed_len, string_len;
3639 	zend_long indent = 0;
3640 
3641 	mbfl_string_init(&string);
3642 	string.no_language = MBSTRG(language);
3643 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3644 
3645 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|sssl", (char **)&string.val, &string_len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3646 		return;
3647 	}
3648 
3649 	if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
3650 			php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
3651 			return;
3652 	}
3653 
3654 	string.len = (uint32_t)string_len;
3655 
3656 	charset = mbfl_no_encoding_pass;
3657 	transenc = mbfl_no_encoding_base64;
3658 
3659 	if (charset_name != NULL) {
3660 		charset = mbfl_name2no_encoding(charset_name);
3661 		if (charset == mbfl_no_encoding_invalid) {
3662 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3663 			RETURN_FALSE;
3664 		}
3665 	} else {
3666 		const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3667 		if (lang != NULL) {
3668 			charset = lang->mail_charset;
3669 			transenc = lang->mail_header_encoding;
3670 		}
3671 	}
3672 
3673 	if (trans_enc_name != NULL) {
3674 		if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3675 			transenc = mbfl_no_encoding_base64;
3676 		} else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3677 			transenc = mbfl_no_encoding_qprint;
3678 		}
3679 	}
3680 
3681 	mbfl_string_init(&result);
3682 	ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3683 	if (ret != NULL) {
3684 		// TODO: avoid reallocation ???
3685 		RETVAL_STRINGL((char *)ret->val, ret->len);	/* the string is already strdup()'ed */
3686 		efree(ret->val);
3687 	} else {
3688 		RETVAL_FALSE;
3689 	}
3690 }
3691 /* }}} */
3692 
3693 /* {{{ proto string mb_decode_mimeheader(string string)
3694    Decodes the MIME "encoded-word" in the string */
PHP_FUNCTION(mb_decode_mimeheader)3695 PHP_FUNCTION(mb_decode_mimeheader)
3696 {
3697 	mbfl_string string, result, *ret;
3698 	size_t string_len;
3699 
3700 	mbfl_string_init(&string);
3701 	string.no_language = MBSTRG(language);
3702 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3703 
3704 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", (char **)&string.val, &string_len) == FAILURE) {
3705 		return;
3706 	}
3707 
3708 	if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
3709 			php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
3710 			return;
3711 	}
3712 
3713 	string.len = (uint32_t)string_len;
3714 
3715 	mbfl_string_init(&result);
3716 	ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding);
3717 	if (ret != NULL) {
3718 		// TODO: avoid reallocation ???
3719 		RETVAL_STRINGL((char *)ret->val, ret->len);	/* the string is already strdup()'ed */
3720 		efree(ret->val);
3721 	} else {
3722 		RETVAL_FALSE;
3723 	}
3724 }
3725 /* }}} */
3726 
3727 /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3728    Conversion between full-width character and half-width character (Japanese) */
PHP_FUNCTION(mb_convert_kana)3729 PHP_FUNCTION(mb_convert_kana)
3730 {
3731 	int opt, i;
3732 	mbfl_string string, result, *ret;
3733 	char *optstr = NULL;
3734 	size_t optstr_len;
3735 	char *encname = NULL;
3736 	size_t encname_len, string_len;
3737 
3738 	mbfl_string_init(&string);
3739 	string.no_language = MBSTRG(language);
3740 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3741 
3742 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|ss", (char **)&string.val, &string_len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
3743 		return;
3744 	}
3745 
3746 	if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
3747 			php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
3748 			return;
3749 	}
3750 
3751 	string.len = (uint32_t)string_len;
3752 
3753 	/* option */
3754 	if (optstr != NULL) {
3755 		char *p = optstr;
3756 		int n = optstr_len;
3757 		i = 0;
3758 		opt = 0;
3759 		while (i < n) {
3760 			i++;
3761 			switch (*p++) {
3762 			case 'A':
3763 				opt |= 0x1;
3764 				break;
3765 			case 'a':
3766 				opt |= 0x10;
3767 				break;
3768 			case 'R':
3769 				opt |= 0x2;
3770 				break;
3771 			case 'r':
3772 				opt |= 0x20;
3773 				break;
3774 			case 'N':
3775 				opt |= 0x4;
3776 				break;
3777 			case 'n':
3778 				opt |= 0x40;
3779 				break;
3780 			case 'S':
3781 				opt |= 0x8;
3782 				break;
3783 			case 's':
3784 				opt |= 0x80;
3785 				break;
3786 			case 'K':
3787 				opt |= 0x100;
3788 				break;
3789 			case 'k':
3790 				opt |= 0x1000;
3791 				break;
3792 			case 'H':
3793 				opt |= 0x200;
3794 				break;
3795 			case 'h':
3796 				opt |= 0x2000;
3797 				break;
3798 			case 'V':
3799 				opt |= 0x800;
3800 				break;
3801 			case 'C':
3802 				opt |= 0x10000;
3803 				break;
3804 			case 'c':
3805 				opt |= 0x20000;
3806 				break;
3807 			case 'M':
3808 				opt |= 0x100000;
3809 				break;
3810 			case 'm':
3811 				opt |= 0x200000;
3812 				break;
3813 			}
3814 		}
3815 	} else {
3816 		opt = 0x900;
3817 	}
3818 
3819 	/* encoding */
3820 	if (encname != NULL) {
3821 		string.no_encoding = mbfl_name2no_encoding(encname);
3822 		if (string.no_encoding == mbfl_no_encoding_invalid) {
3823 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encname);
3824 			RETURN_FALSE;
3825 		}
3826 	}
3827 
3828 	ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3829 	if (ret != NULL) {
3830 		// TODO: avoid reallocation ???
3831 		RETVAL_STRINGL((char *)ret->val, ret->len);		/* the string is already strdup()'ed */
3832 		efree(ret->val);
3833 	} else {
3834 		RETVAL_FALSE;
3835 	}
3836 }
3837 /* }}} */
3838 
3839 #define PHP_MBSTR_STACK_BLOCK_SIZE 32
3840 
3841 /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3842    Converts the string resource in variables to desired encoding */
PHP_FUNCTION(mb_convert_variables)3843 PHP_FUNCTION(mb_convert_variables)
3844 {
3845 	zval *args, *stack, *var, *hash_entry, *hash_entry_ptr, *zfrom_enc;
3846 	HashTable *target_hash;
3847 	mbfl_string string, result, *ret;
3848 	const mbfl_encoding *from_encoding, *to_encoding;
3849 	mbfl_encoding_detector *identd;
3850 	mbfl_buffer_converter *convd;
3851 	int n, argc, stack_level, stack_max;
3852 	size_t to_enc_len;
3853 	size_t elistsz;
3854 	const mbfl_encoding **elist;
3855 	char *to_enc;
3856 	void *ptmp;
3857 	int recursion_error = 0;
3858 
3859 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3860 		return;
3861 	}
3862 
3863 	/* new encoding */
3864 	to_encoding = mbfl_name2encoding(to_enc);
3865 	if (!to_encoding) {
3866 		php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3867 		RETURN_FALSE;
3868 	}
3869 
3870 	/* initialize string */
3871 	mbfl_string_init(&string);
3872 	mbfl_string_init(&result);
3873 	from_encoding = MBSTRG(current_internal_encoding);
3874 	string.no_encoding = from_encoding->no_encoding;
3875 	string.no_language = MBSTRG(language);
3876 
3877 	/* pre-conversion encoding */
3878 	elist = NULL;
3879 	elistsz = 0;
3880 	switch (Z_TYPE_P(zfrom_enc)) {
3881 		case IS_ARRAY:
3882 			php_mb_parse_encoding_array(zfrom_enc, &elist, &elistsz, 0);
3883 			break;
3884 		default:
3885 			convert_to_string_ex(zfrom_enc);
3886 			php_mb_parse_encoding_list(Z_STRVAL_P(zfrom_enc), Z_STRLEN_P(zfrom_enc), &elist, &elistsz, 0);
3887 			break;
3888 	}
3889 
3890 	if (elistsz <= 0) {
3891 		from_encoding = &mbfl_encoding_pass;
3892 	} else if (elistsz == 1) {
3893 		from_encoding = *elist;
3894 	} else {
3895 		/* auto detect */
3896 		from_encoding = NULL;
3897 		stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3898 		stack = (zval *)safe_emalloc(stack_max, sizeof(zval), 0);
3899 		stack_level = 0;
3900 		identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection));
3901 		if (identd != NULL) {
3902 			n = 0;
3903 			while (n < argc || stack_level > 0) {
3904 				if (stack_level <= 0) {
3905 					var = &args[n++];
3906 					ZVAL_DEREF(var);
3907 					SEPARATE_ZVAL_NOREF(var);
3908 					if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3909 						target_hash = HASH_OF(var);
3910 						if (target_hash != NULL) {
3911 							zend_hash_internal_pointer_reset(target_hash);
3912 						}
3913 					}
3914 				} else {
3915 					stack_level--;
3916 					var = &stack[stack_level];
3917 				}
3918 				if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3919 					target_hash = HASH_OF(var);
3920 					if (target_hash != NULL) {
3921 						while ((hash_entry = zend_hash_get_current_data(target_hash)) != NULL) {
3922 							if (Z_REFCOUNTED_P(var)) {
3923 								if (++target_hash->u.v.nApplyCount > 1) {
3924 									--target_hash->u.v.nApplyCount;
3925 									recursion_error = 1;
3926 									goto detect_end;
3927 								}
3928 							}
3929 							zend_hash_move_forward(target_hash);
3930 							if (Z_TYPE_P(hash_entry) == IS_INDIRECT) {
3931 								hash_entry = Z_INDIRECT_P(hash_entry);
3932 							}
3933 							ZVAL_DEREF(hash_entry);
3934 							if (Z_TYPE_P(hash_entry) == IS_ARRAY || Z_TYPE_P(hash_entry) == IS_OBJECT) {
3935 								if (stack_level >= stack_max) {
3936 									stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3937 									ptmp = erealloc(stack, sizeof(zval) * stack_max);
3938 									stack = (zval *)ptmp;
3939 								}
3940 								ZVAL_COPY_VALUE(&stack[stack_level], var);
3941 								stack_level++;
3942 								var = hash_entry;
3943 								target_hash = HASH_OF(var);
3944 								if (target_hash != NULL) {
3945 									zend_hash_internal_pointer_reset(target_hash);
3946 									continue;
3947 								}
3948 							} else if (Z_TYPE_P(hash_entry) == IS_STRING) {
3949 								string.val = (unsigned char *)Z_STRVAL_P(hash_entry);
3950 								string.len = Z_STRLEN_P(hash_entry);
3951 								if (mbfl_encoding_detector_feed(identd, &string)) {
3952 									goto detect_end;		/* complete detecting */
3953 								}
3954 							}
3955 						}
3956 					}
3957 				} else if (Z_TYPE_P(var) == IS_STRING) {
3958 					string.val = (unsigned char *)Z_STRVAL_P(var);
3959 					string.len = Z_STRLEN_P(var);
3960 					if (mbfl_encoding_detector_feed(identd, &string)) {
3961 						goto detect_end;		/* complete detecting */
3962 					}
3963 				}
3964 			}
3965 detect_end:
3966 			from_encoding = mbfl_encoding_detector_judge2(identd);
3967 			mbfl_encoding_detector_delete(identd);
3968 		}
3969 		if (recursion_error) {
3970 			while(stack_level-- && (var = &stack[stack_level])) {
3971 				if (Z_REFCOUNTED_P(var)) {
3972 					if (HASH_OF(var)->u.v.nApplyCount > 1) {
3973 						HASH_OF(var)->u.v.nApplyCount--;
3974 					}
3975 				}
3976 			}
3977 			efree(stack);
3978 			if (elist != NULL) {
3979 				efree((void *)elist);
3980 			}
3981 			php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
3982 			RETURN_FALSE;
3983 		}
3984 		efree(stack);
3985 
3986 		if (!from_encoding) {
3987 			php_error_docref(NULL, E_WARNING, "Unable to detect encoding");
3988 			from_encoding = &mbfl_encoding_pass;
3989 		}
3990 	}
3991 	if (elist != NULL) {
3992 		efree((void *)elist);
3993 	}
3994 	/* create converter */
3995 	convd = NULL;
3996 	if (from_encoding != &mbfl_encoding_pass) {
3997 		convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0);
3998 		if (convd == NULL) {
3999 			php_error_docref(NULL, E_WARNING, "Unable to create converter");
4000 			RETURN_FALSE;
4001 		}
4002 		mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
4003 		mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
4004 	}
4005 
4006 	/* convert */
4007 	if (convd != NULL) {
4008 		stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
4009 		stack = (zval*)safe_emalloc(stack_max, sizeof(zval), 0);
4010 		stack_level = 0;
4011 		n = 0;
4012 		while (n < argc || stack_level > 0) {
4013 			if (stack_level <= 0) {
4014 				var = &args[n++];
4015 				ZVAL_DEREF(var);
4016 				SEPARATE_ZVAL_NOREF(var);
4017 				if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
4018 					target_hash = HASH_OF(var);
4019 					if (target_hash != NULL) {
4020 						zend_hash_internal_pointer_reset(target_hash);
4021 					}
4022 				}
4023 			} else {
4024 				stack_level--;
4025 				var = &stack[stack_level];
4026 			}
4027 			if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
4028 				target_hash = HASH_OF(var);
4029 				if (target_hash != NULL) {
4030 					while ((hash_entry_ptr = zend_hash_get_current_data(target_hash)) != NULL) {
4031 						zend_hash_move_forward(target_hash);
4032 						if (Z_TYPE_P(hash_entry_ptr) == IS_INDIRECT) {
4033 							hash_entry_ptr = Z_INDIRECT_P(hash_entry_ptr);
4034 						}
4035 						hash_entry = hash_entry_ptr;
4036 						ZVAL_DEREF(hash_entry);
4037 						if (Z_TYPE_P(hash_entry) == IS_ARRAY || Z_TYPE_P(hash_entry) == IS_OBJECT) {
4038 							if (Z_REFCOUNTED_P(hash_entry)) {
4039 								if (++(HASH_OF(hash_entry)->u.v.nApplyCount) > 1) {
4040 									--(HASH_OF(hash_entry)->u.v.nApplyCount);
4041 									recursion_error = 1;
4042 									goto conv_end;
4043 								}
4044 							}
4045 							if (stack_level >= stack_max) {
4046 								stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
4047 								ptmp = erealloc(stack, sizeof(zval) * stack_max);
4048 								stack = (zval *)ptmp;
4049 							}
4050 							ZVAL_COPY_VALUE(&stack[stack_level], var);
4051 							stack_level++;
4052 							var = hash_entry;
4053 							SEPARATE_ZVAL(hash_entry);
4054 							target_hash = HASH_OF(var);
4055 							if (target_hash != NULL) {
4056 								zend_hash_internal_pointer_reset(target_hash);
4057 								continue;
4058 							}
4059 						} else if (Z_TYPE_P(hash_entry) == IS_STRING) {
4060 							string.val = (unsigned char *)Z_STRVAL_P(hash_entry);
4061 							string.len = Z_STRLEN_P(hash_entry);
4062 							ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4063 							if (ret != NULL) {
4064 								zval_ptr_dtor(hash_entry_ptr);
4065 								// TODO: avoid reallocation ???
4066 								ZVAL_STRINGL(hash_entry_ptr, (char *)ret->val, ret->len);
4067 								efree(ret->val);
4068 							}
4069 						}
4070 					}
4071 				}
4072 			} else if (Z_TYPE_P(var) == IS_STRING) {
4073 				string.val = (unsigned char *)Z_STRVAL_P(var);
4074 				string.len = Z_STRLEN_P(var);
4075 				ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4076 				if (ret != NULL) {
4077 					zval_ptr_dtor(var);
4078 					// TODO: avoid reallocation ???
4079 					ZVAL_STRINGL(var, (char *)ret->val, ret->len);
4080 					efree(ret->val);
4081 				}
4082 			}
4083 		}
4084 
4085 conv_end:
4086 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
4087 		mbfl_buffer_converter_delete(convd);
4088 
4089 		if (recursion_error) {
4090 			while(stack_level-- && (var = &stack[stack_level])) {
4091 				if (Z_REFCOUNTED_P(var)) {
4092 					if (HASH_OF(var)->u.v.nApplyCount > 1) {
4093 						HASH_OF(var)->u.v.nApplyCount--;
4094 					}
4095 				}
4096 			}
4097 			efree(stack);
4098 			php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
4099 			RETURN_FALSE;
4100 		}
4101 		efree(stack);
4102 	}
4103 
4104 	if (from_encoding) {
4105 		RETURN_STRING(from_encoding->name);
4106 	} else {
4107 		RETURN_FALSE;
4108 	}
4109 }
4110 /* }}} */
4111 
4112 /* {{{ HTML numeric entity */
4113 /* {{{ static void php_mb_numericentity_exec() */
4114 static void
php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS,int type)4115 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
4116 {
4117 	char *str, *encoding = NULL;
4118 	size_t str_len, encoding_len;
4119 	zval *zconvmap, *hash_entry;
4120 	HashTable *target_hash;
4121 	int i, *convmap, *mapelm, mapsize=0;
4122 	zend_bool is_hex = 0;
4123 	mbfl_string string, result, *ret;
4124 	enum mbfl_no_encoding no_encoding;
4125 
4126 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
4127 		return;
4128 	}
4129 
4130 	mbfl_string_init(&string);
4131 	string.no_language = MBSTRG(language);
4132 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4133 	string.val = (unsigned char *)str;
4134 	string.len = str_len;
4135 
4136 	/* encoding */
4137 	if (encoding && encoding_len > 0) {
4138 		no_encoding = mbfl_name2no_encoding(encoding);
4139 		if (no_encoding == mbfl_no_encoding_invalid) {
4140 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
4141 			RETURN_FALSE;
4142 		} else {
4143 			string.no_encoding = no_encoding;
4144 		}
4145 	}
4146 
4147 	if (type == 0 && is_hex) {
4148 		type = 2; /* output in hex format */
4149 	}
4150 
4151 	/* conversion map */
4152 	convmap = NULL;
4153 	if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
4154 		target_hash = Z_ARRVAL_P(zconvmap);
4155 		i = zend_hash_num_elements(target_hash);
4156 		if (i > 0) {
4157 			convmap = (int *)safe_emalloc(i, sizeof(int), 0);
4158 			mapelm = convmap;
4159 			mapsize = 0;
4160 			ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
4161 				convert_to_long_ex(hash_entry);
4162 				*mapelm++ = Z_LVAL_P(hash_entry);
4163 				mapsize++;
4164 			} ZEND_HASH_FOREACH_END();
4165 		}
4166 	}
4167 	if (convmap == NULL) {
4168 		RETURN_FALSE;
4169 	}
4170 	mapsize /= 4;
4171 
4172 	ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
4173 	if (ret != NULL) {
4174 		// TODO: avoid reallocation ???
4175 		RETVAL_STRINGL((char *)ret->val, ret->len);
4176 		efree(ret->val);
4177 	} else {
4178 		RETVAL_FALSE;
4179 	}
4180 	efree((void *)convmap);
4181 }
4182 /* }}} */
4183 
4184 /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
4185    Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)4186 PHP_FUNCTION(mb_encode_numericentity)
4187 {
4188 	php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
4189 }
4190 /* }}} */
4191 
4192 /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
4193    Converts HTML numeric entities to character code */
PHP_FUNCTION(mb_decode_numericentity)4194 PHP_FUNCTION(mb_decode_numericentity)
4195 {
4196 	php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
4197 }
4198 /* }}} */
4199 /* }}} */
4200 
4201 /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
4202  *  Sends an email message with MIME scheme
4203  */
4204 
4205 #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos)										\
4206 	if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) {	\
4207 		pos += 2;											\
4208 		while (str[pos + 1] == ' ' || str[pos + 1] == '\t') {							\
4209 			pos++;											\
4210 		}												\
4211 		continue;											\
4212 	}
4213 
4214 #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len)			\
4215 	pp = str;					\
4216 	ee = pp + len;					\
4217 	while ((pp = memchr(pp, '\0', (ee - pp)))) {	\
4218 		*pp = ' ';				\
4219 	}						\
4220 
_php_mbstr_parse_mail_headers(HashTable * ht,const char * str,size_t str_len)4221 static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
4222 {
4223 	const char *ps;
4224 	size_t icnt;
4225 	int state = 0;
4226 	int crlf_state = -1;
4227 	char *token = NULL;
4228 	size_t token_pos = 0;
4229 	zend_string *fld_name, *fld_val;
4230 
4231 	ps = str;
4232 	icnt = str_len;
4233 	fld_name = fld_val = NULL;
4234 
4235 	/*
4236 	 *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
4237 	 *             ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
4238 	 *      state  0            1           2          3
4239 	 *
4240 	 *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
4241 	 *             ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
4242 	 * crlf_state -1                       0                     1 -1
4243 	 *
4244 	 */
4245 
4246 	while (icnt > 0) {
4247 		switch (*ps) {
4248 			case ':':
4249 				if (crlf_state == 1) {
4250 					token_pos++;
4251 				}
4252 
4253 				if (state == 0 || state == 1) {
4254 					if(token && token_pos > 0) {
4255 						fld_name = zend_string_init(token, token_pos, 0);
4256 					}
4257 					state = 2;
4258 				} else {
4259 					token_pos++;
4260 				}
4261 
4262 				crlf_state = 0;
4263 				break;
4264 
4265 			case '\n':
4266 				if (crlf_state == -1) {
4267 					goto out;
4268 				}
4269 				crlf_state = -1;
4270 				break;
4271 
4272 			case '\r':
4273 				if (crlf_state == 1) {
4274 					token_pos++;
4275 				} else {
4276 					crlf_state = 1;
4277 				}
4278 				break;
4279 
4280 			case ' ': case '\t':
4281 				if (crlf_state == -1) {
4282 					if (state == 3) {
4283 						/* continuing from the previous line */
4284 						state = 4;
4285 					} else {
4286 						/* simply skipping this new line */
4287 						state = 5;
4288 					}
4289 				} else {
4290 					if (crlf_state == 1) {
4291 						token_pos++;
4292 					}
4293 					if (state == 1 || state == 3) {
4294 						token_pos++;
4295 					}
4296 				}
4297 				crlf_state = 0;
4298 				break;
4299 
4300 			default:
4301 				switch (state) {
4302 					case 0:
4303 						token = (char*)ps;
4304 						token_pos = 0;
4305 						state = 1;
4306 						break;
4307 
4308 					case 2:
4309 						if (crlf_state != -1) {
4310 							token = (char*)ps;
4311 							token_pos = 0;
4312 
4313 							state = 3;
4314 							break;
4315 						}
4316 						/* break is missing intentionally */
4317 
4318 					case 3:
4319 						if (crlf_state == -1) {
4320 							if(token && token_pos > 0) {
4321 								fld_val = zend_string_init(token, token_pos, 0);
4322 							}
4323 
4324 							if (fld_name != NULL && fld_val != NULL) {
4325 								zval val;
4326 								/* FIXME: some locale free implementation is
4327 								 * really required here,,, */
4328 								php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
4329 								ZVAL_STR(&val, fld_val);
4330 
4331 								zend_hash_update(ht, fld_name, &val);
4332 
4333 								zend_string_release(fld_name);
4334 							}
4335 
4336 							fld_name = fld_val = NULL;
4337 							token = (char*)ps;
4338 							token_pos = 0;
4339 
4340 							state = 1;
4341 						}
4342 						break;
4343 
4344 					case 4:
4345 						token_pos++;
4346 						state = 3;
4347 						break;
4348 				}
4349 
4350 				if (crlf_state == 1) {
4351 					token_pos++;
4352 				}
4353 
4354 				token_pos++;
4355 
4356 				crlf_state = 0;
4357 				break;
4358 		}
4359 		ps++, icnt--;
4360 	}
4361 out:
4362 	if (state == 2) {
4363 		token = "";
4364 		token_pos = 0;
4365 
4366 		state = 3;
4367 	}
4368 	if (state == 3) {
4369 		if(token && token_pos > 0) {
4370 			fld_val = zend_string_init(token, token_pos, 0);
4371 		}
4372 		if (fld_name != NULL && fld_val != NULL) {
4373 			zval val;
4374 			/* FIXME: some locale free implementation is
4375 			 * really required here,,, */
4376 			php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
4377 			ZVAL_STR(&val, fld_val);
4378 
4379 			zend_hash_update(ht, fld_name, &val);
4380 
4381 			zend_string_release(fld_name);
4382 		}
4383 	}
4384 	return state;
4385 }
4386 
PHP_FUNCTION(mb_send_mail)4387 PHP_FUNCTION(mb_send_mail)
4388 {
4389 	int n;
4390 	char *to = NULL;
4391 	size_t to_len;
4392 	char *message = NULL;
4393 	size_t message_len;
4394 	char *subject = NULL;
4395 	size_t subject_len;
4396 	zval *headers = NULL;
4397 	zend_string *extra_cmd = NULL;
4398 	zend_string *str_headers=NULL, *tmp_headers;
4399 	int i;
4400 	char *to_r = NULL;
4401 	char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
4402 	struct {
4403 		int cnt_type:1;
4404 		int cnt_trans_enc:1;
4405 	} suppressed_hdrs = { 0, 0 };
4406 
4407 	char *message_buf = NULL, *subject_buf = NULL, *p;
4408 	mbfl_string orig_str, conv_str;
4409 	mbfl_string *pstr;	/* pointer to mbfl string for return value */
4410 	enum mbfl_no_encoding
4411 		tran_cs,	/* transfar text charset */
4412 		head_enc,	/* header transfar encoding */
4413 		body_enc;	/* body transfar encoding */
4414 	mbfl_memory_device device;	/* automatic allocateable buffer for additional header */
4415 	const mbfl_language *lang;
4416 	int err = 0;
4417 	HashTable ht_headers;
4418 	zval *s;
4419 	extern void mbfl_memory_device_unput(mbfl_memory_device *device);
4420 	char *pp, *ee;
4421 
4422 	/* initialize */
4423 	mbfl_memory_device_init(&device, 0, 0);
4424 	mbfl_string_init(&orig_str);
4425 	mbfl_string_init(&conv_str);
4426 
4427 	/* character-set, transfer-encoding */
4428 	tran_cs = mbfl_no_encoding_utf8;
4429 	head_enc = mbfl_no_encoding_base64;
4430 	body_enc = mbfl_no_encoding_base64;
4431 	lang = mbfl_no2language(MBSTRG(language));
4432 	if (lang != NULL) {
4433 		tran_cs = lang->mail_charset;
4434 		head_enc = lang->mail_header_encoding;
4435 		body_enc = lang->mail_body_encoding;
4436 	}
4437 
4438 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|zS", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &extra_cmd) == FAILURE) {
4439 		return;
4440 	}
4441 
4442 	/* ASCIIZ check */
4443 	MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
4444 	MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
4445 	MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
4446 	if (headers) {
4447 		switch(Z_TYPE_P(headers)) {
4448 			case IS_STRING:
4449 				tmp_headers = zend_string_init(Z_STRVAL_P(headers), Z_STRLEN_P(headers), 0);
4450 				MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(tmp_headers), ZSTR_LEN(tmp_headers));
4451 				str_headers = php_trim(tmp_headers, NULL, 0, 2);
4452 				zend_string_release(tmp_headers);
4453 				break;
4454 			case IS_ARRAY:
4455 				str_headers = php_mail_build_headers(headers);
4456 				break;
4457 			default:
4458 				php_error_docref(NULL, E_WARNING, "headers parameter must be string or array");
4459 				RETURN_FALSE;
4460 		}
4461 	}
4462 	if (extra_cmd) {
4463 		MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(extra_cmd), ZSTR_LEN(extra_cmd));
4464 	}
4465 
4466 	zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0);
4467 
4468 	if (str_headers != NULL) {
4469 		_php_mbstr_parse_mail_headers(&ht_headers, ZSTR_VAL(str_headers), ZSTR_LEN(str_headers));
4470 	}
4471 
4472 	if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) {
4473 		char *tmp;
4474 		char *param_name;
4475 		char *charset = NULL;
4476 
4477 		ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
4478 		p = strchr(Z_STRVAL_P(s), ';');
4479 
4480 		if (p != NULL) {
4481 			/* skipping the padded spaces */
4482 			do {
4483 				++p;
4484 			} while (*p == ' ' || *p == '\t');
4485 
4486 			if (*p != '\0') {
4487 				if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
4488 					if (strcasecmp(param_name, "charset") == 0) {
4489 						enum mbfl_no_encoding _tran_cs = tran_cs;
4490 
4491 						charset = php_strtok_r(NULL, "= \"", &tmp);
4492 						if (charset != NULL) {
4493 							_tran_cs = mbfl_name2no_encoding(charset);
4494 						}
4495 
4496 						if (_tran_cs == mbfl_no_encoding_invalid) {
4497 							php_error_docref(NULL, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
4498 							_tran_cs = mbfl_no_encoding_ascii;
4499 						}
4500 						tran_cs = _tran_cs;
4501 					}
4502 				}
4503 			}
4504 		}
4505 		suppressed_hdrs.cnt_type = 1;
4506 	}
4507 
4508 	if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1))) {
4509 		enum mbfl_no_encoding _body_enc;
4510 
4511 		ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
4512 		_body_enc = mbfl_name2no_encoding(Z_STRVAL_P(s));
4513 		switch (_body_enc) {
4514 			case mbfl_no_encoding_base64:
4515 			case mbfl_no_encoding_7bit:
4516 			case mbfl_no_encoding_8bit:
4517 				body_enc = _body_enc;
4518 				break;
4519 
4520 			default:
4521 				php_error_docref(NULL, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", Z_STRVAL_P(s));
4522 				body_enc =	mbfl_no_encoding_8bit;
4523 				break;
4524 		}
4525 		suppressed_hdrs.cnt_trans_enc = 1;
4526 	}
4527 
4528 	/* To: */
4529 	if (to != NULL) {
4530 		if (to_len > 0) {
4531 			to_r = estrndup(to, to_len);
4532 			for (; to_len; to_len--) {
4533 				if (!isspace((unsigned char) to_r[to_len - 1])) {
4534 					break;
4535 				}
4536 				to_r[to_len - 1] = '\0';
4537 			}
4538 			for (i = 0; to_r[i]; i++) {
4539 			if (iscntrl((unsigned char) to_r[i])) {
4540 				/* According to RFC 822, section 3.1.1 long headers may be separated into
4541 				 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
4542 				 * To prevent these separators from being replaced with a space, we use the
4543 				 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
4544 				 */
4545 				SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
4546 				to_r[i] = ' ';
4547 			}
4548 			}
4549 		} else {
4550 			to_r = to;
4551 		}
4552 	} else {
4553 		php_error_docref(NULL, E_WARNING, "Missing To: field");
4554 		err = 1;
4555 	}
4556 
4557 	/* Subject: */
4558 	if (subject != NULL) {
4559 		orig_str.no_language = MBSTRG(language);
4560 		orig_str.val = (unsigned char *)subject;
4561 		orig_str.len = subject_len;
4562 		orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4563 		if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4564 			const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4565 			orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4566 		}
4567 		pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4568 		if (pstr != NULL) {
4569 			subject_buf = subject = (char *)pstr->val;
4570 		}
4571 	} else {
4572 		php_error_docref(NULL, E_WARNING, "Missing Subject: field");
4573 		err = 1;
4574 	}
4575 
4576 	/* message body */
4577 	if (message != NULL) {
4578 		orig_str.no_language = MBSTRG(language);
4579 		orig_str.val = (unsigned char *)message;
4580 		orig_str.len = (unsigned int)message_len;
4581 		orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4582 
4583 		if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4584 			const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4585 			orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4586 		}
4587 
4588 		pstr = NULL;
4589 		{
4590 			mbfl_string tmpstr;
4591 
4592 			if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4593 				tmpstr.no_encoding=mbfl_no_encoding_8bit;
4594 				pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4595 				efree(tmpstr.val);
4596 			}
4597 		}
4598 		if (pstr != NULL) {
4599 			message_buf = message = (char *)pstr->val;
4600 		}
4601 	} else {
4602 		/* this is not really an error, so it is allowed. */
4603 		php_error_docref(NULL, E_WARNING, "Empty message body");
4604 		message = NULL;
4605 	}
4606 
4607 	/* other headers */
4608 #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4609 #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4610 #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4611 #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4612 	if (str_headers != NULL) {
4613 		p = ZSTR_VAL(str_headers);
4614 		n = ZSTR_LEN(str_headers);
4615 		mbfl_memory_device_strncat(&device, p, n);
4616 		if (n > 0 && p[n - 1] != '\n') {
4617 			mbfl_memory_device_strncat(&device, "\n", 1);
4618 		}
4619 		zend_string_release(str_headers);
4620 	}
4621 
4622 	if (!zend_hash_str_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4623 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4624 		mbfl_memory_device_strncat(&device, "\n", 1);
4625 	}
4626 
4627 	if (!suppressed_hdrs.cnt_type) {
4628 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4629 
4630 		p = (char *)mbfl_no2preferred_mime_name(tran_cs);
4631 		if (p != NULL) {
4632 			mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4633 			mbfl_memory_device_strcat(&device, p);
4634 		}
4635 		mbfl_memory_device_strncat(&device, "\n", 1);
4636 	}
4637 	if (!suppressed_hdrs.cnt_trans_enc) {
4638 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4639 		p = (char *)mbfl_no2preferred_mime_name(body_enc);
4640 		if (p == NULL) {
4641 			p = "7bit";
4642 		}
4643 		mbfl_memory_device_strcat(&device, p);
4644 		mbfl_memory_device_strncat(&device, "\n", 1);
4645 	}
4646 
4647 	mbfl_memory_device_unput(&device);
4648 	mbfl_memory_device_output('\0', &device);
4649 	str_headers = zend_string_init((char *)device.buffer, strlen((char *)device.buffer), 0);
4650 
4651 	if (force_extra_parameters) {
4652 		extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4653 	} else if (extra_cmd) {
4654 		extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd));
4655 	}
4656 
4657 	if (!err && php_mail(to_r, subject, message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
4658 		RETVAL_TRUE;
4659 	} else {
4660 		RETVAL_FALSE;
4661 	}
4662 
4663 	if (extra_cmd) {
4664 		zend_string_release(extra_cmd);
4665 	}
4666 
4667 	if (to_r != to) {
4668 		efree(to_r);
4669 	}
4670 	if (subject_buf) {
4671 		efree((void *)subject_buf);
4672 	}
4673 	if (message_buf) {
4674 		efree((void *)message_buf);
4675 	}
4676 	mbfl_memory_device_clear(&device);
4677 	zend_hash_destroy(&ht_headers);
4678 	if (str_headers) {
4679 		zend_string_release(str_headers);
4680 	}
4681 }
4682 
4683 #undef SKIP_LONG_HEADER_SEP_MBSTRING
4684 #undef MAIL_ASCIIZ_CHECK_MBSTRING
4685 #undef PHP_MBSTR_MAIL_MIME_HEADER1
4686 #undef PHP_MBSTR_MAIL_MIME_HEADER2
4687 #undef PHP_MBSTR_MAIL_MIME_HEADER3
4688 #undef PHP_MBSTR_MAIL_MIME_HEADER4
4689 /* }}} */
4690 
4691 /* {{{ proto mixed mb_get_info([string type])
4692    Returns the current settings of mbstring */
PHP_FUNCTION(mb_get_info)4693 PHP_FUNCTION(mb_get_info)
4694 {
4695 	char *typ = NULL;
4696 	size_t typ_len;
4697 	size_t n;
4698 	char *name;
4699 	const struct mb_overload_def *over_func;
4700 	zval row1, row2;
4701 	const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4702 	const mbfl_encoding **entry;
4703 
4704 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
4705 		return;
4706 	}
4707 
4708 	if (!typ || !strcasecmp("all", typ)) {
4709 		array_init(return_value);
4710 		if (MBSTRG(current_internal_encoding)) {
4711 			add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name);
4712 		}
4713 		if (MBSTRG(http_input_identify)) {
4714 			add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name);
4715 		}
4716 		if (MBSTRG(current_http_output_encoding)) {
4717 			add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name);
4718 		}
4719 		if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
4720 			add_assoc_string(return_value, "http_output_conv_mimetypes", name);
4721 		}
4722 		add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4723 		if (MBSTRG(func_overload)){
4724 			over_func = &(mb_ovld[0]);
4725 			array_init(&row1);
4726 			while (over_func->type > 0) {
4727 				if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4728 					add_assoc_string(&row1, over_func->orig_func, over_func->ovld_func);
4729 				}
4730 				over_func++;
4731 			}
4732 			add_assoc_zval(return_value, "func_overload_list", &row1);
4733 		} else {
4734 			add_assoc_string(return_value, "func_overload_list", "no overload");
4735  		}
4736 		if (lang != NULL) {
4737 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4738 				add_assoc_string(return_value, "mail_charset", name);
4739 			}
4740 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4741 				add_assoc_string(return_value, "mail_header_encoding", name);
4742 			}
4743 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4744 				add_assoc_string(return_value, "mail_body_encoding", name);
4745 			}
4746 		}
4747 		add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4748 		if (MBSTRG(encoding_translation)) {
4749 			add_assoc_string(return_value, "encoding_translation", "On");
4750 		} else {
4751 			add_assoc_string(return_value, "encoding_translation", "Off");
4752 		}
4753 		if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4754 			add_assoc_string(return_value, "language", name);
4755 		}
4756 		n = MBSTRG(current_detect_order_list_size);
4757 		entry = MBSTRG(current_detect_order_list);
4758 		if (n > 0) {
4759 			size_t i;
4760 			array_init(&row2);
4761 			for (i = 0; i < n; i++) {
4762 				add_next_index_string(&row2, (*entry)->name);
4763 				entry++;
4764 			}
4765 			add_assoc_zval(return_value, "detect_order", &row2);
4766 		}
4767 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4768 			add_assoc_string(return_value, "substitute_character", "none");
4769 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4770 			add_assoc_string(return_value, "substitute_character", "long");
4771 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4772 			add_assoc_string(return_value, "substitute_character", "entity");
4773 		} else {
4774 			add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4775 		}
4776 		if (MBSTRG(strict_detection)) {
4777 			add_assoc_string(return_value, "strict_detection", "On");
4778 		} else {
4779 			add_assoc_string(return_value, "strict_detection", "Off");
4780 		}
4781 	} else if (!strcasecmp("internal_encoding", typ)) {
4782 		if (MBSTRG(current_internal_encoding)) {
4783 			RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name);
4784 		}
4785 	} else if (!strcasecmp("http_input", typ)) {
4786 		if (MBSTRG(http_input_identify)) {
4787 			RETVAL_STRING((char *)MBSTRG(http_input_identify)->name);
4788 		}
4789 	} else if (!strcasecmp("http_output", typ)) {
4790 		if (MBSTRG(current_http_output_encoding)) {
4791 			RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name);
4792 		}
4793 	} else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4794 		if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
4795 			RETVAL_STRING(name);
4796 		}
4797 	} else if (!strcasecmp("func_overload", typ)) {
4798  		RETVAL_LONG(MBSTRG(func_overload));
4799 	} else if (!strcasecmp("func_overload_list", typ)) {
4800 		if (MBSTRG(func_overload)){
4801 				over_func = &(mb_ovld[0]);
4802 				array_init(return_value);
4803 				while (over_func->type > 0) {
4804 					if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4805 						add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func);
4806 					}
4807 					over_func++;
4808 				}
4809 		} else {
4810 			RETVAL_STRING("no overload");
4811 		}
4812 	} else if (!strcasecmp("mail_charset", typ)) {
4813 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4814 			RETVAL_STRING(name);
4815 		}
4816 	} else if (!strcasecmp("mail_header_encoding", typ)) {
4817 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4818 			RETVAL_STRING(name);
4819 		}
4820 	} else if (!strcasecmp("mail_body_encoding", typ)) {
4821 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4822 			RETVAL_STRING(name);
4823 		}
4824 	} else if (!strcasecmp("illegal_chars", typ)) {
4825 		RETVAL_LONG(MBSTRG(illegalchars));
4826 	} else if (!strcasecmp("encoding_translation", typ)) {
4827 		if (MBSTRG(encoding_translation)) {
4828 			RETVAL_STRING("On");
4829 		} else {
4830 			RETVAL_STRING("Off");
4831 		}
4832 	} else if (!strcasecmp("language", typ)) {
4833 		if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4834 			RETVAL_STRING(name);
4835 		}
4836 	} else if (!strcasecmp("detect_order", typ)) {
4837 		n = MBSTRG(current_detect_order_list_size);
4838 		entry = MBSTRG(current_detect_order_list);
4839 		if (n > 0) {
4840 			size_t i;
4841 			array_init(return_value);
4842 			for (i = 0; i < n; i++) {
4843 				add_next_index_string(return_value, (*entry)->name);
4844 				entry++;
4845 			}
4846 		}
4847 	} else if (!strcasecmp("substitute_character", typ)) {
4848 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4849 			RETVAL_STRING("none");
4850 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4851 			RETVAL_STRING("long");
4852 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4853 			RETVAL_STRING("entity");
4854 		} else {
4855 			RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4856 		}
4857 	} else if (!strcasecmp("strict_detection", typ)) {
4858 		if (MBSTRG(strict_detection)) {
4859 			RETVAL_STRING("On");
4860 		} else {
4861 			RETVAL_STRING("Off");
4862 		}
4863 	} else {
4864 		RETURN_FALSE;
4865 	}
4866 }
4867 /* }}} */
4868 
4869 
php_mb_init_convd(const mbfl_encoding * encoding)4870 static inline mbfl_buffer_converter *php_mb_init_convd(const mbfl_encoding *encoding)
4871 {
4872 	mbfl_buffer_converter *convd;
4873 
4874 	convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
4875 	if (convd == NULL) {
4876 		return NULL;
4877 	}
4878 	mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4879 	mbfl_buffer_converter_illegal_substchar(convd, 0);
4880 	return convd;
4881 }
4882 
4883 
php_mb_check_encoding_impl(mbfl_buffer_converter * convd,const char * input,size_t length,const mbfl_encoding * encoding)4884 static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const char *input, size_t length, const mbfl_encoding *encoding) {
4885 	mbfl_string string, result, *ret = NULL;
4886 	long illegalchars = 0;
4887 
4888 	/* initialize string */
4889 	mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
4890 	mbfl_string_init(&result);
4891 
4892 	string.val = (unsigned char *) input;
4893 	string.len = length;
4894 
4895 	ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4896 	illegalchars = mbfl_buffer_illegalchars(convd);
4897 
4898 	if (ret != NULL) {
4899 		if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4900 			mbfl_string_clear(&result);
4901 			return 1;
4902 		}
4903 		mbfl_string_clear(&result);
4904 	}
4905 	return 0;
4906 }
4907 
4908 
php_mb_check_encoding(const char * input,size_t length,const char * enc)4909 MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc)
4910 {
4911 	const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4912 	mbfl_buffer_converter *convd;
4913 
4914 	if (input == NULL) {
4915 		return MBSTRG(illegalchars) == 0;
4916 	}
4917 
4918 	if (enc != NULL) {
4919 		encoding = mbfl_name2encoding(enc);
4920 		if (!encoding || encoding == &mbfl_encoding_pass) {
4921 			php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", enc);
4922 			return 0;
4923 		}
4924 	}
4925 
4926 	convd = php_mb_init_convd(encoding);
4927 	if (convd == NULL) {
4928 		php_error_docref(NULL, E_WARNING, "Unable to create converter");
4929 		return 0;
4930 	}
4931 
4932 	if (php_mb_check_encoding_impl(convd, input, length, encoding)) {
4933 		mbfl_buffer_converter_delete(convd);
4934 		return 1;
4935 	}
4936 	mbfl_buffer_converter_delete(convd);
4937 	return 0;
4938 }
4939 
4940 
php_mb_check_encoding_recursive(HashTable * vars,const zend_string * enc)4941 MBSTRING_API int php_mb_check_encoding_recursive(HashTable *vars, const zend_string *enc)
4942 {
4943 	const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4944 	mbfl_buffer_converter *convd;
4945 	zend_long idx;
4946 	zend_string *key;
4947 	zval *entry;
4948 	int valid = 1;
4949 
4950 	(void)(idx);
4951 
4952 	if (enc != NULL) {
4953 		encoding = mbfl_name2encoding(ZSTR_VAL(enc));
4954 		if (!encoding || encoding == &mbfl_encoding_pass) {
4955 			php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", ZSTR_VAL(enc));
4956 			return 0;
4957 		}
4958 	}
4959 
4960 	convd = php_mb_init_convd(encoding);
4961 	if (convd == NULL) {
4962 		php_error_docref(NULL, E_WARNING, "Unable to create converter");
4963 		return 0;
4964 	}
4965 
4966 	if (vars->u.v.nApplyCount++ > 1) {
4967 		vars->u.v.nApplyCount--;
4968 		mbfl_buffer_converter_delete(convd);
4969 		php_error_docref(NULL, E_WARNING, "Cannot not handle circular references");
4970 		return 0;
4971 	}
4972 	ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) {
4973 		ZVAL_DEREF(entry);
4974 		if (key) {
4975 			if (!php_mb_check_encoding_impl(convd, ZSTR_VAL(key), ZSTR_LEN(key), encoding)) {
4976 				valid = 0;
4977 				break;
4978 			}
4979 		}
4980 		switch (Z_TYPE_P(entry)) {
4981 			case IS_STRING:
4982 				if (!php_mb_check_encoding_impl(convd, Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) {
4983 					valid = 0;
4984 					break;
4985 				}
4986 				break;
4987 			case IS_ARRAY:
4988 				if (!php_mb_check_encoding_recursive(HASH_OF(entry), enc)) {
4989 					valid = 0;
4990 					break;
4991 				}
4992 				break;
4993 			case IS_LONG:
4994 			case IS_DOUBLE:
4995 			case IS_NULL:
4996 			case IS_TRUE:
4997 			case IS_FALSE:
4998 				break;
4999 			default:
5000 				/* Other types are error. */
5001 				valid = 0;
5002 				break;
5003 		}
5004 	} ZEND_HASH_FOREACH_END();
5005 	vars->u.v.nApplyCount--;
5006 	mbfl_buffer_converter_delete(convd);
5007 	return valid;
5008 }
5009 
5010 
5011 /* {{{ proto bool mb_check_encoding([mixed var[, string encoding]])
5012    Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)5013 PHP_FUNCTION(mb_check_encoding)
5014 {
5015 	zval *input = NULL;
5016 	zend_string *enc = NULL;
5017 
5018 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|zS", &input, &enc) == FAILURE) {
5019 		return;
5020 	}
5021 
5022 	/* FIXME: Actually check all inputs, except $_FILES file content. */
5023 	if (input == NULL) {
5024 		if (MBSTRG(illegalchars) == 0) {
5025 			RETURN_TRUE;
5026 		}
5027 		RETURN_FALSE;
5028 	}
5029 
5030 	if (Z_TYPE_P(input) == IS_ARRAY) {
5031 		if (!php_mb_check_encoding_recursive(HASH_OF(input), enc)) {
5032 			RETURN_FALSE;
5033 		}
5034 	} else {
5035 		convert_to_string(input);
5036 		if (!php_mb_check_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), enc ? ZSTR_VAL(enc): NULL)) {
5037 			RETURN_FALSE;
5038 		}
5039 	}
5040 	RETURN_TRUE;
5041 }
5042 /* }}} */
5043 
5044 
php_mb_ord(const char * str,size_t str_len,const char * enc)5045 static inline zend_long php_mb_ord(const char* str, size_t str_len, const char* enc)
5046 {
5047 	enum mbfl_no_encoding no_enc;
5048 	char* ret;
5049 	size_t ret_len;
5050 	zend_long cp;
5051 
5052 	if (enc == NULL) {
5053 		no_enc = MBSTRG(current_internal_encoding)->no_encoding;
5054 	} else {
5055 		no_enc = mbfl_name2no_encoding(enc);
5056 
5057 		if (no_enc == mbfl_no_encoding_invalid) {
5058 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc);
5059 			return -1;
5060 		}
5061 	}
5062 
5063 	if (php_mb_is_unsupported_no_encoding(no_enc)) {
5064 		php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc);
5065 		return -1;
5066 	}
5067 
5068 	if (str_len == 0) {
5069 		php_error_docref(NULL, E_WARNING, "Empty string");
5070 		return -1;
5071 	}
5072 
5073 	{
5074 		long orig_illegalchars = MBSTRG(illegalchars);
5075 		MBSTRG(illegalchars) = 0;
5076 		ret = php_mb_convert_encoding(str, str_len, "UCS-4BE", enc, &ret_len);
5077 		if (MBSTRG(illegalchars) != 0) {
5078 			if (ret) {
5079 				efree(ret);
5080 			}
5081 			MBSTRG(illegalchars) = orig_illegalchars;
5082 			return -1;
5083 		}
5084 
5085 		MBSTRG(illegalchars) = orig_illegalchars;
5086 	}
5087 
5088 	if (ret == NULL) {
5089 		return -1;
5090 	}
5091 
5092 	cp = (unsigned char) ret[0] << 24 | \
5093 		 (unsigned char) ret[1] << 16 | \
5094 		 (unsigned char) ret[2] <<  8 | \
5095 		 (unsigned char) ret[3];
5096 
5097 	efree(ret);
5098 
5099 	return cp;
5100 }
5101 
5102 
5103 /* {{{ proto int|false mb_ord([string str[, string encoding]]) */
PHP_FUNCTION(mb_ord)5104 PHP_FUNCTION(mb_ord)
5105 {
5106 	char* str;
5107 	size_t str_len;
5108 	char* enc = NULL;
5109 	size_t enc_len;
5110 	zend_long cp;
5111 
5112 	ZEND_PARSE_PARAMETERS_START(1, 2)
5113 		Z_PARAM_STRING(str, str_len)
5114 		Z_PARAM_OPTIONAL
5115 		Z_PARAM_STRING(enc, enc_len)
5116 	ZEND_PARSE_PARAMETERS_END();
5117 
5118 	cp = php_mb_ord(str, str_len, enc);
5119 
5120 	if (0 > cp) {
5121 		RETURN_FALSE;
5122 	}
5123 
5124 	RETURN_LONG(cp);
5125 }
5126 /* }}} */
5127 
5128 
php_mb_chr(zend_long cp,const char * enc,size_t * output_len)5129 static inline char* php_mb_chr(zend_long cp, const char* enc, size_t *output_len)
5130 {
5131 	enum mbfl_no_encoding no_enc;
5132 	char* buf;
5133 	size_t buf_len;
5134 	char* ret;
5135 	size_t ret_len;
5136 
5137 	if (enc == NULL) {
5138 		no_enc = MBSTRG(current_internal_encoding)->no_encoding;
5139 	} else {
5140 		no_enc = mbfl_name2no_encoding(enc);
5141 		if (no_enc == mbfl_no_encoding_invalid) {
5142 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc);
5143 			return NULL;
5144 		}
5145 	}
5146 
5147 	if (php_mb_is_unsupported_no_encoding(no_enc)) {
5148 		php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc);
5149 		return NULL;
5150 	}
5151 
5152 	if (cp < 0 || cp > 0x10ffff) {
5153 		return NULL;
5154 	}
5155 
5156 	if (php_mb_is_no_encoding_utf8(no_enc)) {
5157 		if (cp > 0xd7ff && 0xe000 > cp) {
5158 			return NULL;
5159 		}
5160 
5161 		if (cp < 0x80) {
5162 			ret_len = 1;
5163 			ret = (char *) safe_emalloc(ret_len, 1, 1);
5164 			ret[0] = cp;
5165 			ret[1] = 0;
5166 		} else if (cp < 0x800) {
5167 			ret_len = 2;
5168 			ret = (char *) safe_emalloc(ret_len, 1, 1);
5169 			ret[0] = 0xc0 | (cp >> 6);
5170 			ret[1] = 0x80 | (cp & 0x3f);
5171 			ret[2] = 0;
5172 		} else if (cp < 0x10000) {
5173 			ret_len = 3;
5174 			ret = (char *) safe_emalloc(ret_len, 1, 1);
5175 			ret[0] = 0xe0 | (cp >> 12);
5176 			ret[1] = 0x80 | ((cp >> 6) & 0x3f);
5177 			ret[2] = 0x80 | (cp & 0x3f);
5178 			ret[3] = 0;
5179 		} else {
5180 			ret_len = 4;
5181 			ret = (char *) safe_emalloc(ret_len, 1, 1);
5182 			ret[0] = 0xf0 | (cp >> 18);
5183 			ret[1] = 0x80 | ((cp >> 12) & 0x3f);
5184 			ret[2] = 0x80 | ((cp >> 6) & 0x3f);
5185 			ret[3] = 0x80 | (cp & 0x3f);
5186 			ret[4] = 0;
5187 		}
5188 
5189 		if (output_len) {
5190 			*output_len = ret_len;
5191 		}
5192 
5193 		return ret;
5194 	}
5195 
5196 	buf_len = 4;
5197 	buf = (char *) safe_emalloc(buf_len, 1, 1);
5198 	buf[0] = (cp >> 24) & 0xff;
5199 	buf[1] = (cp >> 16) & 0xff;
5200 	buf[2] = (cp >>  8) & 0xff;
5201 	buf[3] = cp & 0xff;
5202 	buf[4] = 0;
5203 
5204 	{
5205 		long orig_illegalchars = MBSTRG(illegalchars);
5206 		MBSTRG(illegalchars) = 0;
5207 		ret = php_mb_convert_encoding(buf, buf_len, enc, "UCS-4BE", &ret_len);
5208 		if (MBSTRG(illegalchars) != 0) {
5209 			efree(buf);
5210 			efree(ret);
5211 			MBSTRG(illegalchars) = orig_illegalchars;
5212 			return NULL;
5213 		}
5214 
5215 		MBSTRG(illegalchars) = orig_illegalchars;
5216 	}
5217 
5218 	efree(buf);
5219 	if (output_len) {
5220 		*output_len = ret_len;
5221 	}
5222 
5223 	return ret;
5224 }
5225 
5226 
5227 /* {{{ proto string|false mb_chr([int cp[, string encoding]]) */
PHP_FUNCTION(mb_chr)5228 PHP_FUNCTION(mb_chr)
5229 {
5230 	zend_long cp;
5231 	char* enc = NULL;
5232 	size_t enc_len;
5233 	char* ret;
5234 	size_t ret_len;
5235 
5236 	ZEND_PARSE_PARAMETERS_START(1, 2)
5237 		Z_PARAM_LONG(cp)
5238 		Z_PARAM_OPTIONAL
5239 		Z_PARAM_STRING(enc, enc_len)
5240 	ZEND_PARSE_PARAMETERS_END();
5241 
5242 	ret = php_mb_chr(cp, enc, &ret_len);
5243 
5244 	if (ret == NULL) {
5245 		RETURN_FALSE;
5246 	}
5247 
5248 	RETVAL_STRING(ret);
5249 	efree(ret);
5250 }
5251 /* }}} */
5252 
5253 
php_mb_scrub(const char * str,size_t str_len,const char * enc,size_t * ret_len)5254 static inline char* php_mb_scrub(const char* str, size_t str_len, const char* enc, size_t *ret_len)
5255 {
5256 	return php_mb_convert_encoding(str, str_len, enc, enc, ret_len);
5257 }
5258 
5259 
5260 /* {{{ proto string|false mb_scrub([string str[, string encoding]]) */
PHP_FUNCTION(mb_scrub)5261 PHP_FUNCTION(mb_scrub)
5262 {
5263 	char* str;
5264 	size_t str_len;
5265 	char *enc = NULL;
5266 	size_t enc_len;
5267 	char *ret;
5268 	size_t ret_len;
5269 
5270 	ZEND_PARSE_PARAMETERS_START(1, 2)
5271 		Z_PARAM_STRING(str, str_len)
5272 		Z_PARAM_OPTIONAL
5273 		Z_PARAM_STRING(enc, enc_len)
5274 	ZEND_PARSE_PARAMETERS_END();
5275 
5276 	if (enc == NULL) {
5277 		enc = (char *) MBSTRG(current_internal_encoding)->name;
5278 	} else if (!mbfl_is_support_encoding(enc)) {
5279 		php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc);
5280 		RETURN_FALSE;
5281 	}
5282 
5283 	ret = php_mb_scrub(str, str_len, enc, &ret_len);
5284 
5285 	if (ret == NULL) {
5286 		RETURN_FALSE;
5287 	}
5288 
5289 	RETVAL_STRINGL(ret, ret_len);
5290 	efree(ret);
5291 }
5292 /* }}} */
5293 
5294 
5295 /* {{{ php_mb_populate_current_detect_order_list */
php_mb_populate_current_detect_order_list(void)5296 static void php_mb_populate_current_detect_order_list(void)
5297 {
5298 	const mbfl_encoding **entry = 0;
5299 	size_t nentries;
5300 
5301 	if (MBSTRG(current_detect_order_list)) {
5302 		return;
5303 	}
5304 
5305 	if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
5306 		nentries = MBSTRG(detect_order_list_size);
5307 		entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
5308 		memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
5309 	} else {
5310 		const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
5311 		size_t i;
5312 		nentries = MBSTRG(default_detect_order_list_size);
5313 		entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
5314 		for (i = 0; i < nentries; i++) {
5315 			entry[i] = mbfl_no2encoding(src[i]);
5316 		}
5317 	}
5318 	MBSTRG(current_detect_order_list) = entry;
5319 	MBSTRG(current_detect_order_list_size) = nentries;
5320 }
5321 /* }}} */
5322 
5323 /* {{{ static int php_mb_encoding_translation() */
php_mb_encoding_translation(void)5324 static int php_mb_encoding_translation(void)
5325 {
5326 	return MBSTRG(encoding_translation);
5327 }
5328 /* }}} */
5329 
5330 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
php_mb_mbchar_bytes_ex(const char * s,const mbfl_encoding * enc)5331 MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
5332 {
5333 	if (enc != NULL) {
5334 		if (enc->flag & MBFL_ENCTYPE_MBCS) {
5335 			if (enc->mblen_table != NULL) {
5336 				if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
5337 			}
5338 		} else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
5339 			return 2;
5340 		} else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
5341 			return 4;
5342 		}
5343 	}
5344 	return 1;
5345 }
5346 /* }}} */
5347 
5348 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
php_mb_mbchar_bytes(const char * s)5349 MBSTRING_API size_t php_mb_mbchar_bytes(const char *s)
5350 {
5351 	return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
5352 }
5353 /* }}} */
5354 
5355 /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
php_mb_safe_strrchr_ex(const char * s,unsigned int c,size_t nbytes,const mbfl_encoding * enc)5356 MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
5357 {
5358 	register const char *p = s;
5359 	char *last=NULL;
5360 
5361 	if (nbytes == (size_t)-1) {
5362 		size_t nb = 0;
5363 
5364 		while (*p != '\0') {
5365 			if (nb == 0) {
5366 				if ((unsigned char)*p == (unsigned char)c) {
5367 					last = (char *)p;
5368 				}
5369 				nb = php_mb_mbchar_bytes_ex(p, enc);
5370 				if (nb == 0) {
5371 					return NULL; /* something is going wrong! */
5372 				}
5373 			}
5374 			--nb;
5375 			++p;
5376 		}
5377 	} else {
5378 		register size_t bcnt = nbytes;
5379 		register size_t nbytes_char;
5380 		while (bcnt > 0) {
5381 			if ((unsigned char)*p == (unsigned char)c) {
5382 				last = (char *)p;
5383 			}
5384 			nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
5385 			if (bcnt < nbytes_char) {
5386 				return NULL;
5387 			}
5388 			p += nbytes_char;
5389 			bcnt -= nbytes_char;
5390 		}
5391 	}
5392 	return last;
5393 }
5394 /* }}} */
5395 
5396 /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
php_mb_safe_strrchr(const char * s,unsigned int c,size_t nbytes)5397 MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes)
5398 {
5399 	return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
5400 }
5401 /* }}} */
5402 
5403 /* {{{ MBSTRING_API int php_mb_stripos()
5404  */
php_mb_stripos(int mode,const char * old_haystack,unsigned int old_haystack_len,const char * old_needle,unsigned int old_needle_len,long offset,const char * from_encoding)5405 MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding)
5406 {
5407 	int n;
5408 	mbfl_string haystack, needle;
5409 	n = -1;
5410 
5411 	mbfl_string_init(&haystack);
5412 	mbfl_string_init(&needle);
5413 	haystack.no_language = MBSTRG(language);
5414 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
5415 	needle.no_language = MBSTRG(language);
5416 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
5417 
5418 	do {
5419 		size_t len = 0;
5420 		haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding);
5421 		haystack.len = len;
5422 
5423 		if (!haystack.val) {
5424 			break;
5425 		}
5426 
5427 		if (haystack.len <= 0) {
5428 			break;
5429 		}
5430 
5431 		needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding);
5432 		needle.len = len;
5433 
5434 		if (!needle.val) {
5435 			break;
5436 		}
5437 
5438 		if (needle.len <= 0) {
5439 			break;
5440 		}
5441 
5442 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
5443 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
5444 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
5445 			break;
5446 		}
5447 
5448  		{
5449  			int haystack_char_len = mbfl_strlen(&haystack);
5450 
5451  			if (mode) {
5452  				if ((offset > 0 && offset > haystack_char_len) ||
5453  					(offset < 0 && -offset > haystack_char_len)) {
5454  					php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
5455  					break;
5456  				}
5457  			} else {
5458 				if (offset < 0) {
5459 					offset += (long)haystack_char_len;
5460 				}
5461  				if (offset < 0 || offset > haystack_char_len) {
5462  					php_error_docref(NULL, E_WARNING, "Offset not contained in string");
5463  					break;
5464  				}
5465  			}
5466 		}
5467 
5468 		n = mbfl_strpos(&haystack, &needle, offset, mode);
5469 	} while(0);
5470 
5471 	if (haystack.val) {
5472 		efree(haystack.val);
5473 	}
5474 
5475 	if (needle.val) {
5476 		efree(needle.val);
5477 	}
5478 
5479 	return n;
5480 }
5481 /* }}} */
5482 
php_mb_gpc_get_detect_order(const zend_encoding *** list,size_t * list_size)5483 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size) /* {{{ */
5484 {
5485 	*list = (const zend_encoding **)MBSTRG(http_input_list);
5486 	*list_size = MBSTRG(http_input_list_size);
5487 }
5488 /* }}} */
5489 
php_mb_gpc_set_input_encoding(const zend_encoding * encoding)5490 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding) /* {{{ */
5491 {
5492 	MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
5493 }
5494 /* }}} */
5495 
5496 #endif	/* HAVE_MBSTRING */
5497 
5498 /*
5499  * Local variables:
5500  * tab-width: 4
5501  * c-basic-offset: 4
5502  * End:
5503  * vim600: fdm=marker
5504  * vim: noet sw=4 ts=4
5505  */
5506