xref: /PHP-7.1/ext/mbstring/mbstring.c (revision 7f6387b5)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1997-2018 The PHP Group                                |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
16    |         Rui Hirokawa <hirokawa@php.net>                              |
17    +----------------------------------------------------------------------+
18  */
19 
20 /* $Id$ */
21 
22 /*
23  * PHP 4 Multibyte String module "mbstring"
24  *
25  * History:
26  *   2000.5.19  Release php-4.0RC2_jstring-1.0
27  *   2001.4.1   Release php4_jstring-1.0.91
28  *   2001.4.30  Release php4_jstring-1.1 (contribute to The PHP Group)
29  *   2001.5.1   Renamed from jstring to mbstring (hirokawa@php.net)
30  */
31 
32 /*
33  * PHP3 Internationalization support program.
34  *
35  * Copyright (c) 1999,2000 by the PHP3 internationalization team.
36  * All rights reserved.
37  *
38  * See README_PHP3-i18n-ja for more detail.
39  *
40  * Authors:
41  *    Hironori Sato <satoh@jpnnet.com>
42  *    Shigeru Kanemoto <sgk@happysize.co.jp>
43  *    Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
44  *    Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
45  */
46 
47 /* {{{ includes */
48 #ifdef HAVE_CONFIG_H
49 #include "config.h"
50 #endif
51 
52 #include "php.h"
53 #include "php_ini.h"
54 #include "php_variables.h"
55 #include "mbstring.h"
56 #include "ext/standard/php_string.h"
57 #include "ext/standard/php_mail.h"
58 #include "ext/standard/exec.h"
59 #include "ext/standard/url.h"
60 #include "main/php_output.h"
61 #include "ext/standard/info.h"
62 
63 #include "libmbfl/mbfl/mbfl_allocators.h"
64 #include "libmbfl/mbfl/mbfilter_pass.h"
65 
66 #include "php_variables.h"
67 #include "php_globals.h"
68 #include "rfc1867.h"
69 #include "php_content_types.h"
70 #include "SAPI.h"
71 #include "php_unicode.h"
72 #include "TSRM.h"
73 
74 #include "mb_gpc.h"
75 
76 #if HAVE_MBREGEX
77 #include "php_mbregex.h"
78 #endif
79 
80 #include "zend_multibyte.h"
81 
82 #if HAVE_ONIG
83 #include "php_onig_compat.h"
84 #include <oniguruma.h>
85 #undef UChar
86 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
87 #include "ext/pcre/php_pcre.h"
88 #endif
89 /* }}} */
90 
91 #if HAVE_MBSTRING
92 
93 /* {{{ prototypes */
94 ZEND_DECLARE_MODULE_GLOBALS(mbstring)
95 
96 static PHP_GINIT_FUNCTION(mbstring);
97 static PHP_GSHUTDOWN_FUNCTION(mbstring);
98 
99 static void php_mb_populate_current_detect_order_list(void);
100 
101 static int php_mb_encoding_translation(void);
102 
103 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size);
104 
105 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
106 
107 /* }}} */
108 
109 /* {{{ php_mb_default_identify_list */
110 typedef struct _php_mb_nls_ident_list {
111 	enum mbfl_no_language lang;
112 	const enum mbfl_no_encoding *list;
113 	size_t list_size;
114 } php_mb_nls_ident_list;
115 
116 static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
117 	mbfl_no_encoding_ascii,
118 	mbfl_no_encoding_jis,
119 	mbfl_no_encoding_utf8,
120 	mbfl_no_encoding_euc_jp,
121 	mbfl_no_encoding_sjis
122 };
123 
124 static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
125 	mbfl_no_encoding_ascii,
126 	mbfl_no_encoding_utf8,
127 	mbfl_no_encoding_euc_cn,
128 	mbfl_no_encoding_cp936
129 };
130 
131 static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
132 	mbfl_no_encoding_ascii,
133 	mbfl_no_encoding_utf8,
134 	mbfl_no_encoding_euc_tw,
135 	mbfl_no_encoding_big5
136 };
137 
138 static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
139 	mbfl_no_encoding_ascii,
140 	mbfl_no_encoding_utf8,
141 	mbfl_no_encoding_euc_kr,
142 	mbfl_no_encoding_uhc
143 };
144 
145 static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
146 	mbfl_no_encoding_ascii,
147 	mbfl_no_encoding_utf8,
148 	mbfl_no_encoding_koi8r,
149 	mbfl_no_encoding_cp1251,
150 	mbfl_no_encoding_cp866
151 };
152 
153 static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
154 	mbfl_no_encoding_ascii,
155 	mbfl_no_encoding_utf8,
156 	mbfl_no_encoding_armscii8
157 };
158 
159 static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
160 	mbfl_no_encoding_ascii,
161 	mbfl_no_encoding_utf8,
162 	mbfl_no_encoding_cp1254,
163 	mbfl_no_encoding_8859_9
164 };
165 
166 static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
167 	mbfl_no_encoding_ascii,
168 	mbfl_no_encoding_utf8,
169 	mbfl_no_encoding_koi8u
170 };
171 
172 static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
173 	mbfl_no_encoding_ascii,
174 	mbfl_no_encoding_utf8
175 };
176 
177 
178 static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
179 	{ mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
180 	{ mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
181 	{ mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
182 	{ mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
183 	{ mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
184 	{ mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
185 	{ mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
186 	{ mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
187 	{ mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
188 };
189 
190 /* }}} */
191 
192 /* {{{ mb_overload_def mb_ovld[] */
193 static const struct mb_overload_def mb_ovld[] = {
194 	{MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
195 	{MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
196 	{MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
197 	{MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
198 	{MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
199 	{MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
200 	{MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
201 	{MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
202 	{MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
203 	{MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
204 	{MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
205 	{MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
206 	{MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
207 	{0, NULL, NULL, NULL}
208 };
209 /* }}} */
210 
211 /* {{{ arginfo */
212 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
213 	ZEND_ARG_INFO(0, language)
214 ZEND_END_ARG_INFO()
215 
216 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
217 	ZEND_ARG_INFO(0, encoding)
218 ZEND_END_ARG_INFO()
219 
220 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
221 	ZEND_ARG_INFO(0, type)
222 ZEND_END_ARG_INFO()
223 
224 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
225 	ZEND_ARG_INFO(0, encoding)
226 ZEND_END_ARG_INFO()
227 
228 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
229 	ZEND_ARG_INFO(0, encoding)
230 ZEND_END_ARG_INFO()
231 
232 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
233 	ZEND_ARG_INFO(0, substchar)
234 ZEND_END_ARG_INFO()
235 
236 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
237 	ZEND_ARG_INFO(0, encoding)
238 ZEND_END_ARG_INFO()
239 
240 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
241 	ZEND_ARG_INFO(0, encoded_string)
242 	ZEND_ARG_INFO(1, result)
243 ZEND_END_ARG_INFO()
244 
245 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
246 	ZEND_ARG_INFO(0, contents)
247 	ZEND_ARG_INFO(0, status)
248 ZEND_END_ARG_INFO()
249 
250 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
251 	ZEND_ARG_INFO(0, str)
252 	ZEND_ARG_INFO(0, encoding)
253 ZEND_END_ARG_INFO()
254 
255 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
256 	ZEND_ARG_INFO(0, haystack)
257 	ZEND_ARG_INFO(0, needle)
258 	ZEND_ARG_INFO(0, offset)
259 	ZEND_ARG_INFO(0, encoding)
260 ZEND_END_ARG_INFO()
261 
262 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
263 	ZEND_ARG_INFO(0, haystack)
264 	ZEND_ARG_INFO(0, needle)
265 	ZEND_ARG_INFO(0, offset)
266 	ZEND_ARG_INFO(0, encoding)
267 ZEND_END_ARG_INFO()
268 
269 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
270 	ZEND_ARG_INFO(0, haystack)
271 	ZEND_ARG_INFO(0, needle)
272 	ZEND_ARG_INFO(0, offset)
273 	ZEND_ARG_INFO(0, encoding)
274 ZEND_END_ARG_INFO()
275 
276 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
277 	ZEND_ARG_INFO(0, haystack)
278 	ZEND_ARG_INFO(0, needle)
279 	ZEND_ARG_INFO(0, offset)
280 	ZEND_ARG_INFO(0, encoding)
281 ZEND_END_ARG_INFO()
282 
283 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
284 	ZEND_ARG_INFO(0, haystack)
285 	ZEND_ARG_INFO(0, needle)
286 	ZEND_ARG_INFO(0, part)
287 	ZEND_ARG_INFO(0, encoding)
288 ZEND_END_ARG_INFO()
289 
290 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
291 	ZEND_ARG_INFO(0, haystack)
292 	ZEND_ARG_INFO(0, needle)
293 	ZEND_ARG_INFO(0, part)
294 	ZEND_ARG_INFO(0, encoding)
295 ZEND_END_ARG_INFO()
296 
297 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
298 	ZEND_ARG_INFO(0, haystack)
299 	ZEND_ARG_INFO(0, needle)
300 	ZEND_ARG_INFO(0, part)
301 	ZEND_ARG_INFO(0, encoding)
302 ZEND_END_ARG_INFO()
303 
304 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
305 	ZEND_ARG_INFO(0, haystack)
306 	ZEND_ARG_INFO(0, needle)
307 	ZEND_ARG_INFO(0, part)
308 	ZEND_ARG_INFO(0, encoding)
309 ZEND_END_ARG_INFO()
310 
311 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
312 	ZEND_ARG_INFO(0, haystack)
313 	ZEND_ARG_INFO(0, needle)
314 	ZEND_ARG_INFO(0, encoding)
315 ZEND_END_ARG_INFO()
316 
317 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
318 	ZEND_ARG_INFO(0, str)
319 	ZEND_ARG_INFO(0, start)
320 	ZEND_ARG_INFO(0, length)
321 	ZEND_ARG_INFO(0, encoding)
322 ZEND_END_ARG_INFO()
323 
324 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
325 	ZEND_ARG_INFO(0, str)
326 	ZEND_ARG_INFO(0, start)
327 	ZEND_ARG_INFO(0, length)
328 	ZEND_ARG_INFO(0, encoding)
329 ZEND_END_ARG_INFO()
330 
331 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
332 	ZEND_ARG_INFO(0, str)
333 	ZEND_ARG_INFO(0, encoding)
334 ZEND_END_ARG_INFO()
335 
336 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
337 	ZEND_ARG_INFO(0, str)
338 	ZEND_ARG_INFO(0, start)
339 	ZEND_ARG_INFO(0, width)
340 	ZEND_ARG_INFO(0, trimmarker)
341 	ZEND_ARG_INFO(0, encoding)
342 ZEND_END_ARG_INFO()
343 
344 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
345 	ZEND_ARG_INFO(0, str)
346 	ZEND_ARG_INFO(0, to)
347 	ZEND_ARG_INFO(0, from)
348 ZEND_END_ARG_INFO()
349 
350 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
351 	ZEND_ARG_INFO(0, sourcestring)
352 	ZEND_ARG_INFO(0, mode)
353 	ZEND_ARG_INFO(0, encoding)
354 ZEND_END_ARG_INFO()
355 
356 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
357 	ZEND_ARG_INFO(0, sourcestring)
358 	ZEND_ARG_INFO(0, encoding)
359 ZEND_END_ARG_INFO()
360 
361 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
362 	ZEND_ARG_INFO(0, sourcestring)
363 	ZEND_ARG_INFO(0, encoding)
364 ZEND_END_ARG_INFO()
365 
366 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
367 	ZEND_ARG_INFO(0, str)
368 	ZEND_ARG_INFO(0, encoding_list)
369 	ZEND_ARG_INFO(0, strict)
370 ZEND_END_ARG_INFO()
371 
372 ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
373 ZEND_END_ARG_INFO()
374 
375 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
376 	ZEND_ARG_INFO(0, encoding)
377 ZEND_END_ARG_INFO()
378 
379 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
380 	ZEND_ARG_INFO(0, str)
381 	ZEND_ARG_INFO(0, charset)
382 	ZEND_ARG_INFO(0, transfer)
383 	ZEND_ARG_INFO(0, linefeed)
384 	ZEND_ARG_INFO(0, indent)
385 ZEND_END_ARG_INFO()
386 
387 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
388 	ZEND_ARG_INFO(0, string)
389 ZEND_END_ARG_INFO()
390 
391 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
392 	ZEND_ARG_INFO(0, str)
393 	ZEND_ARG_INFO(0, option)
394 	ZEND_ARG_INFO(0, encoding)
395 ZEND_END_ARG_INFO()
396 
397 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 0, 0, 3)
398 	ZEND_ARG_INFO(0, to)
399 	ZEND_ARG_INFO(0, from)
400 	ZEND_ARG_VARIADIC_INFO(1, vars)
401 ZEND_END_ARG_INFO()
402 
403 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
404 	ZEND_ARG_INFO(0, string)
405 	ZEND_ARG_INFO(0, convmap)
406 	ZEND_ARG_INFO(0, encoding)
407 	ZEND_ARG_INFO(0, is_hex)
408 ZEND_END_ARG_INFO()
409 
410 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
411 	ZEND_ARG_INFO(0, string)
412 	ZEND_ARG_INFO(0, convmap)
413 	ZEND_ARG_INFO(0, encoding)
414 ZEND_END_ARG_INFO()
415 
416 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
417 	ZEND_ARG_INFO(0, to)
418 	ZEND_ARG_INFO(0, subject)
419 	ZEND_ARG_INFO(0, message)
420 	ZEND_ARG_INFO(0, additional_headers)
421 	ZEND_ARG_INFO(0, additional_parameters)
422 ZEND_END_ARG_INFO()
423 
424 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
425 	ZEND_ARG_INFO(0, type)
426 ZEND_END_ARG_INFO()
427 
428 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
429 	ZEND_ARG_INFO(0, var)
430 	ZEND_ARG_INFO(0, encoding)
431 ZEND_END_ARG_INFO()
432 
433 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
434 	ZEND_ARG_INFO(0, encoding)
435 ZEND_END_ARG_INFO()
436 
437 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
438 	ZEND_ARG_INFO(0, pattern)
439 	ZEND_ARG_INFO(0, string)
440 	ZEND_ARG_INFO(1, registers)
441 ZEND_END_ARG_INFO()
442 
443 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
444 	ZEND_ARG_INFO(0, pattern)
445 	ZEND_ARG_INFO(0, string)
446 	ZEND_ARG_INFO(1, registers)
447 ZEND_END_ARG_INFO()
448 
449 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
450 	ZEND_ARG_INFO(0, pattern)
451 	ZEND_ARG_INFO(0, replacement)
452 	ZEND_ARG_INFO(0, string)
453 	ZEND_ARG_INFO(0, option)
454 ZEND_END_ARG_INFO()
455 
456 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
457 	ZEND_ARG_INFO(0, pattern)
458 	ZEND_ARG_INFO(0, replacement)
459 	ZEND_ARG_INFO(0, string)
460 	ZEND_ARG_INFO(0, option)
461 ZEND_END_ARG_INFO()
462 
463 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3)
464 	ZEND_ARG_INFO(0, pattern)
465 	ZEND_ARG_INFO(0, callback)
466 	ZEND_ARG_INFO(0, string)
467 	ZEND_ARG_INFO(0, option)
468 ZEND_END_ARG_INFO()
469 
470 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
471 	ZEND_ARG_INFO(0, pattern)
472 	ZEND_ARG_INFO(0, string)
473 	ZEND_ARG_INFO(0, limit)
474 ZEND_END_ARG_INFO()
475 
476 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
477 	ZEND_ARG_INFO(0, pattern)
478 	ZEND_ARG_INFO(0, string)
479 	ZEND_ARG_INFO(0, option)
480 ZEND_END_ARG_INFO()
481 
482 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
483 	ZEND_ARG_INFO(0, pattern)
484 	ZEND_ARG_INFO(0, option)
485 ZEND_END_ARG_INFO()
486 
487 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
488 	ZEND_ARG_INFO(0, pattern)
489 	ZEND_ARG_INFO(0, option)
490 ZEND_END_ARG_INFO()
491 
492 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
493 	ZEND_ARG_INFO(0, pattern)
494 	ZEND_ARG_INFO(0, option)
495 ZEND_END_ARG_INFO()
496 
497 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
498 	ZEND_ARG_INFO(0, string)
499 	ZEND_ARG_INFO(0, pattern)
500 	ZEND_ARG_INFO(0, option)
501 ZEND_END_ARG_INFO()
502 
503 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
504 ZEND_END_ARG_INFO()
505 
506 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
507 ZEND_END_ARG_INFO()
508 
509 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
510 	ZEND_ARG_INFO(0, position)
511 ZEND_END_ARG_INFO()
512 
513 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
514 	ZEND_ARG_INFO(0, options)
515 ZEND_END_ARG_INFO()
516 /* }}} */
517 
518 /* {{{ zend_function_entry mbstring_functions[] */
519 const zend_function_entry mbstring_functions[] = {
520 	PHP_FE(mb_convert_case,			arginfo_mb_convert_case)
521 	PHP_FE(mb_strtoupper,			arginfo_mb_strtoupper)
522 	PHP_FE(mb_strtolower,			arginfo_mb_strtolower)
523 	PHP_FE(mb_language,				arginfo_mb_language)
524 	PHP_FE(mb_internal_encoding,	arginfo_mb_internal_encoding)
525 	PHP_FE(mb_http_input,			arginfo_mb_http_input)
526 	PHP_FE(mb_http_output,			arginfo_mb_http_output)
527 	PHP_FE(mb_detect_order,			arginfo_mb_detect_order)
528 	PHP_FE(mb_substitute_character,	arginfo_mb_substitute_character)
529 	PHP_FE(mb_parse_str,			arginfo_mb_parse_str)
530 	PHP_FE(mb_output_handler,		arginfo_mb_output_handler)
531 	PHP_FE(mb_preferred_mime_name,	arginfo_mb_preferred_mime_name)
532 	PHP_FE(mb_strlen,				arginfo_mb_strlen)
533 	PHP_FE(mb_strpos,				arginfo_mb_strpos)
534 	PHP_FE(mb_strrpos,				arginfo_mb_strrpos)
535 	PHP_FE(mb_stripos,				arginfo_mb_stripos)
536 	PHP_FE(mb_strripos,				arginfo_mb_strripos)
537 	PHP_FE(mb_strstr,				arginfo_mb_strstr)
538 	PHP_FE(mb_strrchr,				arginfo_mb_strrchr)
539 	PHP_FE(mb_stristr,				arginfo_mb_stristr)
540 	PHP_FE(mb_strrichr,				arginfo_mb_strrichr)
541 	PHP_FE(mb_substr_count,			arginfo_mb_substr_count)
542 	PHP_FE(mb_substr,				arginfo_mb_substr)
543 	PHP_FE(mb_strcut,				arginfo_mb_strcut)
544 	PHP_FE(mb_strwidth,				arginfo_mb_strwidth)
545 	PHP_FE(mb_strimwidth,			arginfo_mb_strimwidth)
546 	PHP_FE(mb_convert_encoding,		arginfo_mb_convert_encoding)
547 	PHP_FE(mb_detect_encoding,		arginfo_mb_detect_encoding)
548 	PHP_FE(mb_list_encodings,		arginfo_mb_list_encodings)
549 	PHP_FE(mb_encoding_aliases,		arginfo_mb_encoding_aliases)
550 	PHP_FE(mb_convert_kana,			arginfo_mb_convert_kana)
551 	PHP_FE(mb_encode_mimeheader,	arginfo_mb_encode_mimeheader)
552 	PHP_FE(mb_decode_mimeheader,	arginfo_mb_decode_mimeheader)
553 	PHP_FE(mb_convert_variables,	arginfo_mb_convert_variables)
554 	PHP_FE(mb_encode_numericentity,	arginfo_mb_encode_numericentity)
555 	PHP_FE(mb_decode_numericentity,	arginfo_mb_decode_numericentity)
556 	PHP_FE(mb_send_mail,			arginfo_mb_send_mail)
557 	PHP_FE(mb_get_info,				arginfo_mb_get_info)
558 	PHP_FE(mb_check_encoding,		arginfo_mb_check_encoding)
559 #if HAVE_MBREGEX
560 	PHP_MBREGEX_FUNCTION_ENTRIES
561 #endif
562 	PHP_FE_END
563 };
564 /* }}} */
565 
566 /* {{{ zend_module_entry mbstring_module_entry */
567 zend_module_entry mbstring_module_entry = {
568 	STANDARD_MODULE_HEADER,
569 	"mbstring",
570 	mbstring_functions,
571 	PHP_MINIT(mbstring),
572 	PHP_MSHUTDOWN(mbstring),
573 	PHP_RINIT(mbstring),
574 	PHP_RSHUTDOWN(mbstring),
575 	PHP_MINFO(mbstring),
576 	PHP_MBSTRING_VERSION,
577 	PHP_MODULE_GLOBALS(mbstring),
578 	PHP_GINIT(mbstring),
579 	PHP_GSHUTDOWN(mbstring),
580 	NULL,
581 	STANDARD_MODULE_PROPERTIES_EX
582 };
583 /* }}} */
584 
585 /* {{{ static sapi_post_entry php_post_entries[] */
586 static sapi_post_entry php_post_entries[] = {
587 	{ DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data,	php_std_post_handler },
588 	{ MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
589 	{ NULL, 0, NULL, NULL }
590 };
591 /* }}} */
592 
593 #ifdef COMPILE_DL_MBSTRING
594 #ifdef ZTS
595 ZEND_TSRMLS_CACHE_DEFINE()
596 #endif
ZEND_GET_MODULE(mbstring)597 ZEND_GET_MODULE(mbstring)
598 #endif
599 
600 static char *get_internal_encoding(void) {
601 	if (PG(internal_encoding) && PG(internal_encoding)[0]) {
602 		return PG(internal_encoding);
603 	} else if (SG(default_charset)) {
604 		return SG(default_charset);
605 	}
606 	return "";
607 }
608 
get_input_encoding(void)609 static char *get_input_encoding(void) {
610 	if (PG(input_encoding) && PG(input_encoding)[0]) {
611 		return PG(input_encoding);
612 	} else if (SG(default_charset)) {
613 		return SG(default_charset);
614 	}
615 	return "";
616 }
617 
get_output_encoding(void)618 static char *get_output_encoding(void) {
619 	if (PG(output_encoding) && PG(output_encoding)[0]) {
620 		return PG(output_encoding);
621 	} else if (SG(default_charset)) {
622 		return SG(default_charset);
623 	}
624 	return "";
625 }
626 
627 
628 /* {{{ allocators */
_php_mb_allocators_malloc(unsigned int sz)629 static void *_php_mb_allocators_malloc(unsigned int sz)
630 {
631 	return emalloc(sz);
632 }
633 
_php_mb_allocators_realloc(void * ptr,unsigned int sz)634 static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
635 {
636 	return erealloc(ptr, sz);
637 }
638 
_php_mb_allocators_calloc(unsigned int nelems,unsigned int szelem)639 static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
640 {
641 	return ecalloc(nelems, szelem);
642 }
643 
_php_mb_allocators_free(void * ptr)644 static void _php_mb_allocators_free(void *ptr)
645 {
646 	efree(ptr);
647 }
648 
_php_mb_allocators_pmalloc(unsigned int sz)649 static void *_php_mb_allocators_pmalloc(unsigned int sz)
650 {
651 	return pemalloc(sz, 1);
652 }
653 
_php_mb_allocators_prealloc(void * ptr,unsigned int sz)654 static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
655 {
656 	return perealloc(ptr, sz, 1);
657 }
658 
_php_mb_allocators_pfree(void * ptr)659 static void _php_mb_allocators_pfree(void *ptr)
660 {
661 	pefree(ptr, 1);
662 }
663 
664 static mbfl_allocators _php_mb_allocators = {
665 	_php_mb_allocators_malloc,
666 	_php_mb_allocators_realloc,
667 	_php_mb_allocators_calloc,
668 	_php_mb_allocators_free,
669 	_php_mb_allocators_pmalloc,
670 	_php_mb_allocators_prealloc,
671 	_php_mb_allocators_pfree
672 };
673 /* }}} */
674 
675 /* {{{ static sapi_post_entry mbstr_post_entries[] */
676 static sapi_post_entry mbstr_post_entries[] = {
677 	{ DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
678 	{ MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
679 	{ NULL, 0, NULL, NULL }
680 };
681 /* }}} */
682 
683 /* {{{ static int php_mb_parse_encoding_list()
684  *  Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
685  *  Even if any illegal encoding is detected the result may contain a list
686  *  of parsed encodings.
687  */
688 static int
php_mb_parse_encoding_list(const char * value,size_t value_length,const mbfl_encoding *** return_list,size_t * return_size,int persistent)689 php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
690 {
691 	int bauto, ret = SUCCESS;
692 	size_t n, size;
693 	char *p, *p1, *p2, *endp, *tmpstr;
694 	const mbfl_encoding **entry, **list;
695 
696 	list = NULL;
697 	if (value == NULL || value_length <= 0) {
698 		if (return_list) {
699 			*return_list = NULL;
700 		}
701 		if (return_size) {
702 			*return_size = 0;
703 		}
704 		return FAILURE;
705 	} else {
706 		/* copy the value string for work */
707 		if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
708 			tmpstr = (char *)estrndup(value+1, value_length-2);
709 			value_length -= 2;
710 		}
711 		else
712 			tmpstr = (char *)estrndup(value, value_length);
713 		if (tmpstr == NULL) {
714 			return FAILURE;
715 		}
716 		/* count the number of listed encoding names */
717 		endp = tmpstr + value_length;
718 		n = 1;
719 		p1 = tmpstr;
720 		while ((p2 = (char*)php_memnstr(p1, ",", 1, endp)) != NULL) {
721 			p1 = p2 + 1;
722 			n++;
723 		}
724 		size = n + MBSTRG(default_detect_order_list_size);
725 		/* make list */
726 		list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
727 		if (list != NULL) {
728 			entry = list;
729 			n = 0;
730 			bauto = 0;
731 			p1 = tmpstr;
732 			do {
733 				p2 = p = (char*)php_memnstr(p1, ",", 1, endp);
734 				if (p == NULL) {
735 					p = endp;
736 				}
737 				*p = '\0';
738 				/* trim spaces */
739 				while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
740 					p1++;
741 				}
742 				p--;
743 				while (p > p1 && (*p == ' ' || *p == '\t')) {
744 					*p = '\0';
745 					p--;
746 				}
747 				/* convert to the encoding number and check encoding */
748 				if (strcasecmp(p1, "auto") == 0) {
749 					if (!bauto) {
750 						const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
751 						const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
752 						size_t i;
753 						bauto = 1;
754 						for (i = 0; i < identify_list_size; i++) {
755 							*entry++ = mbfl_no2encoding(*src++);
756 							n++;
757 						}
758 					}
759 				} else {
760 					const mbfl_encoding *encoding = mbfl_name2encoding(p1);
761 					if (encoding) {
762 						*entry++ = encoding;
763 						n++;
764 					} else {
765 						ret = FAILURE;
766 					}
767 				}
768 				p1 = p2 + 1;
769 			} while (n < size && p2 != NULL);
770 			if (n > 0) {
771 				if (return_list) {
772 					*return_list = list;
773 				} else {
774 					pefree(list, persistent);
775 				}
776 			} else {
777 				pefree(list, persistent);
778 				if (return_list) {
779 					*return_list = NULL;
780 				}
781 				ret = FAILURE;
782 			}
783 			if (return_size) {
784 				*return_size = n;
785 			}
786 		} else {
787 			if (return_list) {
788 				*return_list = NULL;
789 			}
790 			if (return_size) {
791 				*return_size = 0;
792 			}
793 			ret = FAILURE;
794 		}
795 		efree(tmpstr);
796 	}
797 
798 	return ret;
799 }
800 /* }}} */
801 
802 /* {{{ static int php_mb_parse_encoding_array()
803  *  Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
804  *  Even if any illegal encoding is detected the result may contain a list
805  *  of parsed encodings.
806  */
807 static int
php_mb_parse_encoding_array(zval * array,const mbfl_encoding *** return_list,size_t * return_size,int persistent)808 php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
809 {
810 	zval *hash_entry;
811 	HashTable *target_hash;
812 	int i, n, size, bauto, ret = SUCCESS;
813 	const mbfl_encoding **list, **entry;
814 
815 	list = NULL;
816 	if (Z_TYPE_P(array) == IS_ARRAY) {
817 		target_hash = Z_ARRVAL_P(array);
818 		i = zend_hash_num_elements(target_hash);
819 		size = i + MBSTRG(default_detect_order_list_size);
820 		list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
821 		if (list != NULL) {
822 			entry = list;
823 			bauto = 0;
824 			n = 0;
825 			ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
826 				convert_to_string_ex(hash_entry);
827 				if (strcasecmp(Z_STRVAL_P(hash_entry), "auto") == 0) {
828 					if (!bauto) {
829 						const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
830 						const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
831 						size_t j;
832 
833 						bauto = 1;
834 						for (j = 0; j < identify_list_size; j++) {
835 							*entry++ = mbfl_no2encoding(*src++);
836 							n++;
837 						}
838 					}
839 				} else {
840 					const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_P(hash_entry));
841 					if (encoding) {
842 						*entry++ = encoding;
843 						n++;
844 					} else {
845 						ret = FAILURE;
846 					}
847 				}
848 				i--;
849 			} ZEND_HASH_FOREACH_END();
850 			if (n > 0) {
851 				if (return_list) {
852 					*return_list = list;
853 				} else {
854 					pefree(list, persistent);
855 				}
856 			} else {
857 				pefree(list, persistent);
858 				if (return_list) {
859 					*return_list = NULL;
860 				}
861 				ret = FAILURE;
862 			}
863 			if (return_size) {
864 				*return_size = n;
865 			}
866 		} else {
867 			if (return_list) {
868 				*return_list = NULL;
869 			}
870 			if (return_size) {
871 				*return_size = 0;
872 			}
873 			ret = FAILURE;
874 		}
875 	}
876 
877 	return ret;
878 }
879 /* }}} */
880 
881 /* {{{ zend_multibyte interface */
php_mb_zend_encoding_fetcher(const char * encoding_name)882 static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name)
883 {
884 	return (const zend_encoding*)mbfl_name2encoding(encoding_name);
885 }
886 
php_mb_zend_encoding_name_getter(const zend_encoding * encoding)887 static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
888 {
889 	return ((const mbfl_encoding *)encoding)->name;
890 }
891 
php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding * _encoding)892 static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
893 {
894 	const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
895 	if (encoding->flag & MBFL_ENCTYPE_SBCS) {
896 		return 1;
897 	}
898 	if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
899 		return 1;
900 	}
901 	return 0;
902 }
903 
php_mb_zend_encoding_detector(const unsigned char * arg_string,size_t arg_length,const zend_encoding ** list,size_t list_size)904 static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size)
905 {
906 	mbfl_string string;
907 
908 	if (!list) {
909 		list = (const zend_encoding **)MBSTRG(current_detect_order_list);
910 		list_size = MBSTRG(current_detect_order_list_size);
911 	}
912 
913 	mbfl_string_init(&string);
914 	string.no_language = MBSTRG(language);
915 	string.val = (unsigned char *)arg_string;
916 	string.len = arg_length;
917 	return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0);
918 }
919 
php_mb_zend_encoding_converter(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length,const zend_encoding * encoding_to,const zend_encoding * encoding_from)920 static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
921 {
922 	mbfl_string string, result;
923 	mbfl_buffer_converter *convd;
924 	int status, loc;
925 
926 	/* new encoding */
927 	/* initialize string */
928 	mbfl_string_init(&string);
929 	mbfl_string_init(&result);
930 	string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding;
931 	string.no_language = MBSTRG(language);
932 	string.val = (unsigned char*)from;
933 	string.len = from_length;
934 
935 	/* initialize converter */
936 	convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
937 	if (convd == NULL) {
938 		return -1;
939 	}
940 	mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
941 	mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
942 
943 	/* do it */
944 	status = mbfl_buffer_converter_feed2(convd, &string, &loc);
945 	if (status) {
946 		mbfl_buffer_converter_delete(convd);
947 		return (size_t)-1;
948 	}
949 
950 	mbfl_buffer_converter_flush(convd);
951 	if (!mbfl_buffer_converter_result(convd, &result)) {
952 		mbfl_buffer_converter_delete(convd);
953 		return (size_t)-1;
954 	}
955 
956 	*to = result.val;
957 	*to_length = result.len;
958 
959 	mbfl_buffer_converter_delete(convd);
960 
961 	return loc;
962 }
963 
php_mb_zend_encoding_list_parser(const char * encoding_list,size_t encoding_list_len,const zend_encoding *** return_list,size_t * return_size,int persistent)964 static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent)
965 {
966 	return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent);
967 }
968 
php_mb_zend_internal_encoding_getter(void)969 static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
970 {
971 	return (const zend_encoding *)MBSTRG(internal_encoding);
972 }
973 
php_mb_zend_internal_encoding_setter(const zend_encoding * encoding)974 static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding)
975 {
976 	MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
977 	return SUCCESS;
978 }
979 
980 static zend_multibyte_functions php_mb_zend_multibyte_functions = {
981 	"mbstring",
982 	php_mb_zend_encoding_fetcher,
983 	php_mb_zend_encoding_name_getter,
984 	php_mb_zend_encoding_lexer_compatibility_checker,
985 	php_mb_zend_encoding_detector,
986 	php_mb_zend_encoding_converter,
987 	php_mb_zend_encoding_list_parser,
988 	php_mb_zend_internal_encoding_getter,
989 	php_mb_zend_internal_encoding_setter
990 };
991 /* }}} */
992 
993 static void *_php_mb_compile_regex(const char *pattern);
994 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
995 static void _php_mb_free_regex(void *opaque);
996 
997 #if HAVE_ONIG
998 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern)999 static void *_php_mb_compile_regex(const char *pattern)
1000 {
1001 	php_mb_regex_t *retval;
1002 	OnigErrorInfo err_info;
1003 	int err_code;
1004 
1005 	if ((err_code = onig_new(&retval,
1006 			(const OnigUChar *)pattern,
1007 			(const OnigUChar *)pattern + strlen(pattern),
1008 			ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
1009 			ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
1010 		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1011 		onig_error_code_to_str(err_str, err_code, err_info);
1012 		php_error_docref(NULL, E_WARNING, "%s: %s", pattern, err_str);
1013 		retval = NULL;
1014 	}
1015 	return retval;
1016 }
1017 /* }}} */
1018 
1019 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1020 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1021 {
1022 	return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
1023 			(const OnigUChar*)str + str_len, (const OnigUChar *)str,
1024 			(const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
1025 }
1026 /* }}} */
1027 
1028 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1029 static void _php_mb_free_regex(void *opaque)
1030 {
1031 	onig_free((php_mb_regex_t *)opaque);
1032 }
1033 /* }}} */
1034 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
1035 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern)1036 static void *_php_mb_compile_regex(const char *pattern)
1037 {
1038 	pcre *retval;
1039 	const char *err_str;
1040 	int err_offset;
1041 
1042 	if (!(retval = pcre_compile(pattern,
1043 			PCRE_CASELESS, &err_str, &err_offset, NULL))) {
1044 		php_error_docref(NULL, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
1045 	}
1046 	return retval;
1047 }
1048 /* }}} */
1049 
1050 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1051 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1052 {
1053 	return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
1054 			0, NULL, 0) >= 0;
1055 }
1056 /* }}} */
1057 
1058 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1059 static void _php_mb_free_regex(void *opaque)
1060 {
1061 	pcre_free(opaque);
1062 }
1063 /* }}} */
1064 #endif
1065 
1066 /* {{{ php_mb_nls_get_default_detect_order_list */
php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,enum mbfl_no_encoding ** plist,size_t * plist_size)1067 static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
1068 {
1069 	size_t i;
1070 
1071 	*plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1072 	*plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1073 
1074 	for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
1075 		if (php_mb_default_identify_list[i].lang == lang) {
1076 			*plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
1077 			*plist_size = php_mb_default_identify_list[i].list_size;
1078 			return 1;
1079 		}
1080 	}
1081 	return 0;
1082 }
1083 /* }}} */
1084 
php_mb_rfc1867_substring_conf(const zend_encoding * encoding,char * start,int len,char quote)1085 static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, int len, char quote)
1086 {
1087 	char *result = emalloc(len + 2);
1088 	char *resp = result;
1089 	int i;
1090 
1091 	for (i = 0; i < len && start[i] != quote; ++i) {
1092 		if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
1093 			*resp++ = start[++i];
1094 		} else {
1095 			size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
1096 
1097 			while (j-- > 0 && i < len) {
1098 				*resp++ = start[i++];
1099 			}
1100 			--i;
1101 		}
1102 	}
1103 
1104 	*resp = '\0';
1105 	return result;
1106 }
1107 
php_mb_rfc1867_getword(const zend_encoding * encoding,char ** line,char stop)1108 static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop) /* {{{ */
1109 {
1110 	char *pos = *line, quote;
1111 	char *res;
1112 
1113 	while (*pos && *pos != stop) {
1114 		if ((quote = *pos) == '"' || quote == '\'') {
1115 			++pos;
1116 			while (*pos && *pos != quote) {
1117 				if (*pos == '\\' && pos[1] && pos[1] == quote) {
1118 					pos += 2;
1119 				} else {
1120 					++pos;
1121 				}
1122 			}
1123 			if (*pos) {
1124 				++pos;
1125 			}
1126 		} else {
1127 			pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1128 
1129 		}
1130 	}
1131 	if (*pos == '\0') {
1132 		res = estrdup(*line);
1133 		*line += strlen(*line);
1134 		return res;
1135 	}
1136 
1137 	res = estrndup(*line, pos - *line);
1138 
1139 	while (*pos == stop) {
1140 		pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1141 	}
1142 
1143 	*line = pos;
1144 	return res;
1145 }
1146 /* }}} */
1147 
php_mb_rfc1867_getword_conf(const zend_encoding * encoding,char * str)1148 static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str) /* {{{ */
1149 {
1150 	while (*str && isspace(*(unsigned char *)str)) {
1151 		++str;
1152 	}
1153 
1154 	if (!*str) {
1155 		return estrdup("");
1156 	}
1157 
1158 	if (*str == '"' || *str == '\'') {
1159 		char quote = *str;
1160 
1161 		str++;
1162 		return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote);
1163 	} else {
1164 		char *strend = str;
1165 
1166 		while (*strend && !isspace(*(unsigned char *)strend)) {
1167 			++strend;
1168 		}
1169 		return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0);
1170 	}
1171 }
1172 /* }}} */
1173 
php_mb_rfc1867_basename(const zend_encoding * encoding,char * filename)1174 static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename) /* {{{ */
1175 {
1176 	char *s, *s2;
1177 	const size_t filename_len = strlen(filename);
1178 
1179 	/* The \ check should technically be needed for win32 systems only where
1180 	 * it is a valid path separator. However, IE in all it's wisdom always sends
1181 	 * the full path of the file on the user's filesystem, which means that unless
1182 	 * the user does basename() they get a bogus file name. Until IE's user base drops
1183 	 * to nill or problem is fixed this code must remain enabled for all systems. */
1184 	s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
1185 	s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
1186 
1187 	if (s && s2) {
1188 		if (s > s2) {
1189 			return ++s;
1190 		} else {
1191 			return ++s2;
1192 		}
1193 	} else if (s) {
1194 		return ++s;
1195 	} else if (s2) {
1196 		return ++s2;
1197 	} else {
1198 		return filename;
1199 	}
1200 }
1201 /* }}} */
1202 
1203 /* {{{ php.ini directive handler */
1204 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
PHP_INI_MH(OnUpdate_mbstring_language)1205 static PHP_INI_MH(OnUpdate_mbstring_language)
1206 {
1207 	enum mbfl_no_language no_language;
1208 
1209 	no_language = mbfl_name2no_language(ZSTR_VAL(new_value));
1210 	if (no_language == mbfl_no_language_invalid) {
1211 		MBSTRG(language) = mbfl_no_language_neutral;
1212 		return FAILURE;
1213 	}
1214 	MBSTRG(language) = no_language;
1215 	php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
1216 	return SUCCESS;
1217 }
1218 /* }}} */
1219 
1220 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
PHP_INI_MH(OnUpdate_mbstring_detect_order)1221 static PHP_INI_MH(OnUpdate_mbstring_detect_order)
1222 {
1223 	const mbfl_encoding **list;
1224 	size_t size;
1225 
1226 	if (!new_value) {
1227 		if (MBSTRG(detect_order_list)) {
1228 			pefree(MBSTRG(detect_order_list), 1);
1229 		}
1230 		MBSTRG(detect_order_list) = NULL;
1231 		MBSTRG(detect_order_list_size) = 0;
1232 		return SUCCESS;
1233 	}
1234 
1235 	if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) {
1236 		return FAILURE;
1237 	}
1238 
1239 	if (MBSTRG(detect_order_list)) {
1240 		pefree(MBSTRG(detect_order_list), 1);
1241 	}
1242 	MBSTRG(detect_order_list) = list;
1243 	MBSTRG(detect_order_list_size) = size;
1244 	return SUCCESS;
1245 }
1246 /* }}} */
1247 
1248 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
PHP_INI_MH(OnUpdate_mbstring_http_input)1249 static PHP_INI_MH(OnUpdate_mbstring_http_input)
1250 {
1251 	const mbfl_encoding **list;
1252 	size_t size;
1253 
1254 	if (!new_value || !ZSTR_VAL(new_value)) {
1255 		if (MBSTRG(http_input_list)) {
1256 			pefree(MBSTRG(http_input_list), 1);
1257 		}
1258 		if (SUCCESS == php_mb_parse_encoding_list(get_input_encoding(), strlen(get_input_encoding())+1, &list, &size, 1)) {
1259 			MBSTRG(http_input_list) = list;
1260 			MBSTRG(http_input_list_size) = size;
1261 			return SUCCESS;
1262 		}
1263 		MBSTRG(http_input_list) = NULL;
1264 		MBSTRG(http_input_list_size) = 0;
1265 		return SUCCESS;
1266 	}
1267 
1268 	if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) {
1269 		return FAILURE;
1270 	}
1271 
1272 	if (MBSTRG(http_input_list)) {
1273 		pefree(MBSTRG(http_input_list), 1);
1274 	}
1275 	MBSTRG(http_input_list) = list;
1276 	MBSTRG(http_input_list_size) = size;
1277 
1278 	if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1279 		php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_input is deprecated");
1280 	}
1281 
1282 	return SUCCESS;
1283 }
1284 /* }}} */
1285 
1286 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
PHP_INI_MH(OnUpdate_mbstring_http_output)1287 static PHP_INI_MH(OnUpdate_mbstring_http_output)
1288 {
1289 	const mbfl_encoding *encoding;
1290 
1291 	if (new_value == NULL || ZSTR_LEN(new_value) == 0) {
1292 		encoding = mbfl_name2encoding(get_output_encoding());
1293 		if (!encoding) {
1294 			MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1295 			MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1296 			return SUCCESS;
1297 		}
1298 	} else {
1299 		encoding = mbfl_name2encoding(ZSTR_VAL(new_value));
1300 		if (!encoding) {
1301 			MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1302 			MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1303 			return FAILURE;
1304 		}
1305 	}
1306 	MBSTRG(http_output_encoding) = encoding;
1307 	MBSTRG(current_http_output_encoding) = encoding;
1308 
1309 	if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1310 		php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_output is deprecated");
1311 	}
1312 
1313 	return SUCCESS;
1314 }
1315 /* }}} */
1316 
1317 /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
_php_mb_ini_mbstring_internal_encoding_set(const char * new_value,uint new_value_length)1318 int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length)
1319 {
1320 	const mbfl_encoding *encoding;
1321 
1322 	if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) {
1323 		/* falls back to UTF-8 if an unknown encoding name is given */
1324 		encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
1325 	}
1326 	MBSTRG(internal_encoding) = encoding;
1327 	MBSTRG(current_internal_encoding) = encoding;
1328 #if HAVE_MBREGEX
1329 	{
1330 		const char *enc_name = new_value;
1331 		if (FAILURE == php_mb_regex_set_default_mbctype(enc_name)) {
1332 			/* falls back to UTF-8 if an unknown encoding name is given */
1333 			enc_name = "UTF-8";
1334 			php_mb_regex_set_default_mbctype(enc_name);
1335 		}
1336 		php_mb_regex_set_mbctype(new_value);
1337 	}
1338 #endif
1339 	return SUCCESS;
1340 }
1341 /* }}} */
1342 
1343 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
PHP_INI_MH(OnUpdate_mbstring_internal_encoding)1344 static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1345 {
1346 	if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1347 		php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
1348 	}
1349 
1350 	if (OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage) == FAILURE) {
1351 		return FAILURE;
1352 	}
1353 
1354 	if (stage & (PHP_INI_STAGE_STARTUP | PHP_INI_STAGE_SHUTDOWN | PHP_INI_STAGE_RUNTIME)) {
1355 		if (new_value && ZSTR_LEN(new_value)) {
1356 			return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
1357 		} else {
1358 			return _php_mb_ini_mbstring_internal_encoding_set(get_internal_encoding(), strlen(get_internal_encoding())+1);
1359 		}
1360 	} else {
1361 		/* the corresponding mbstring globals needs to be set according to the
1362 		 * ini value in the later stage because it never falls back to the
1363 		 * default value if 1. no value for mbstring.internal_encoding is given,
1364 		 * 2. mbstring.language directive is processed in per-dir or runtime
1365 		 * context and 3. call to the handler for mbstring.language is done
1366 		 * after mbstring.internal_encoding is handled. */
1367 		return SUCCESS;
1368 	}
1369 }
1370 /* }}} */
1371 
1372 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
PHP_INI_MH(OnUpdate_mbstring_substitute_character)1373 static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1374 {
1375 	int c;
1376 	char *endptr = NULL;
1377 
1378 	if (new_value != NULL) {
1379 		if (strcasecmp("none", ZSTR_VAL(new_value)) == 0) {
1380 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1381 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1382 		} else if (strcasecmp("long", ZSTR_VAL(new_value)) == 0) {
1383 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1384 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1385 		} else if (strcasecmp("entity", ZSTR_VAL(new_value)) == 0) {
1386 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1387 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1388 		} else {
1389 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1390 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1391 			if (ZSTR_LEN(new_value) > 0) {
1392 				c = strtol(ZSTR_VAL(new_value), &endptr, 0);
1393 				if (*endptr == '\0') {
1394 					MBSTRG(filter_illegal_substchar) = c;
1395 					MBSTRG(current_filter_illegal_substchar) = c;
1396 				}
1397 			}
1398 		}
1399 	} else {
1400 		MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1401 		MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1402 		MBSTRG(filter_illegal_substchar) = 0x3f;	/* '?' */
1403 		MBSTRG(current_filter_illegal_substchar) = 0x3f;	/* '?' */
1404 	}
1405 
1406 	return SUCCESS;
1407 }
1408 /* }}} */
1409 
1410 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
PHP_INI_MH(OnUpdate_mbstring_encoding_translation)1411 static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1412 {
1413 	if (new_value == NULL) {
1414 		return FAILURE;
1415 	}
1416 
1417 	OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
1418 
1419 	if (MBSTRG(encoding_translation)) {
1420 		sapi_unregister_post_entry(php_post_entries);
1421 		sapi_register_post_entries(mbstr_post_entries);
1422 	} else {
1423 		sapi_unregister_post_entry(mbstr_post_entries);
1424 		sapi_register_post_entries(php_post_entries);
1425 	}
1426 
1427 	return SUCCESS;
1428 }
1429 /* }}} */
1430 
1431 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)1432 static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1433 {
1434 	zend_string *tmp;
1435 	void *re = NULL;
1436 
1437 	if (!new_value) {
1438 		new_value = entry->orig_value;
1439 	}
1440 	tmp = php_trim(new_value, NULL, 0, 3);
1441 
1442 	if (ZSTR_LEN(tmp) > 0) {
1443 		if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) {
1444 			zend_string_release(tmp);
1445 			return FAILURE;
1446 		}
1447 	}
1448 
1449 	if (MBSTRG(http_output_conv_mimetypes)) {
1450 		_php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1451 	}
1452 
1453 	MBSTRG(http_output_conv_mimetypes) = re;
1454 
1455 	zend_string_release(tmp);
1456 	return SUCCESS;
1457 }
1458 /* }}} */
1459 /* }}} */
1460 
1461 /* {{{ php.ini directive registration */
1462 PHP_INI_BEGIN()
1463 	PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1464 	PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1465 	PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
1466 	PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
1467 	STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
1468 	PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1469 	STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1470 	PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1471 
1472 	STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1473 		PHP_INI_SYSTEM | PHP_INI_PERDIR,
1474 		OnUpdate_mbstring_encoding_translation,
1475 		encoding_translation, zend_mbstring_globals, mbstring_globals)
1476 	PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1477 		"^(text/|application/xhtml\\+xml)",
1478 		PHP_INI_ALL,
1479 		OnUpdate_mbstring_http_output_conv_mimetypes)
1480 
1481 	STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1482 		PHP_INI_ALL,
1483 		OnUpdateLong,
1484 		strict_detection, zend_mbstring_globals, mbstring_globals)
PHP_INI_END()1485 PHP_INI_END()
1486 /* }}} */
1487 
1488 /* {{{ module global initialize handler */
1489 static PHP_GINIT_FUNCTION(mbstring)
1490 {
1491 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1492 ZEND_TSRMLS_CACHE_UPDATE();
1493 #endif
1494 
1495 	mbstring_globals->language = mbfl_no_language_uni;
1496 	mbstring_globals->internal_encoding = NULL;
1497 	mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1498 	mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
1499 	mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
1500 	mbstring_globals->http_input_identify = NULL;
1501 	mbstring_globals->http_input_identify_get = NULL;
1502 	mbstring_globals->http_input_identify_post = NULL;
1503 	mbstring_globals->http_input_identify_cookie = NULL;
1504 	mbstring_globals->http_input_identify_string = NULL;
1505 	mbstring_globals->http_input_list = NULL;
1506 	mbstring_globals->http_input_list_size = 0;
1507 	mbstring_globals->detect_order_list = NULL;
1508 	mbstring_globals->detect_order_list_size = 0;
1509 	mbstring_globals->current_detect_order_list = NULL;
1510 	mbstring_globals->current_detect_order_list_size = 0;
1511 	mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1512 	mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1513 	mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1514 	mbstring_globals->filter_illegal_substchar = 0x3f;	/* '?' */
1515 	mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1516 	mbstring_globals->current_filter_illegal_substchar = 0x3f;	/* '?' */
1517 	mbstring_globals->illegalchars = 0;
1518 	mbstring_globals->func_overload = 0;
1519 	mbstring_globals->encoding_translation = 0;
1520 	mbstring_globals->strict_detection = 0;
1521 	mbstring_globals->outconv = NULL;
1522 	mbstring_globals->http_output_conv_mimetypes = NULL;
1523 #if HAVE_MBREGEX
1524 	mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc();
1525 #endif
1526 }
1527 /* }}} */
1528 
1529 /* {{{ PHP_GSHUTDOWN_FUNCTION */
PHP_GSHUTDOWN_FUNCTION(mbstring)1530 static PHP_GSHUTDOWN_FUNCTION(mbstring)
1531 {
1532 	if (mbstring_globals->http_input_list) {
1533 		free(mbstring_globals->http_input_list);
1534 	}
1535 	if (mbstring_globals->detect_order_list) {
1536 		free(mbstring_globals->detect_order_list);
1537 	}
1538 	if (mbstring_globals->http_output_conv_mimetypes) {
1539 		_php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1540 	}
1541 #if HAVE_MBREGEX
1542 	php_mb_regex_globals_free(mbstring_globals->mb_regex_globals);
1543 #endif
1544 }
1545 /* }}} */
1546 
1547 /* {{{ PHP_MINIT_FUNCTION(mbstring) */
PHP_MINIT_FUNCTION(mbstring)1548 PHP_MINIT_FUNCTION(mbstring)
1549 {
1550 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1551 ZEND_TSRMLS_CACHE_UPDATE();
1552 #endif
1553 	__mbfl_allocators = &_php_mb_allocators;
1554 
1555 	REGISTER_INI_ENTRIES();
1556 
1557 	/* This is a global handler. Should not be set in a per-request handler. */
1558 	sapi_register_treat_data(mbstr_treat_data);
1559 
1560 	/* Post handlers are stored in the thread-local context. */
1561 	if (MBSTRG(encoding_translation)) {
1562 		sapi_register_post_entries(mbstr_post_entries);
1563 	}
1564 
1565 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1566 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1567 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1568 
1569 	REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1570 	REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1571 	REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1572 
1573 #if HAVE_MBREGEX
1574 	PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1575 #endif
1576 
1577 	if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions)) {
1578 		return FAILURE;
1579 	}
1580 
1581 	php_rfc1867_set_multibyte_callbacks(
1582 		php_mb_encoding_translation,
1583 		php_mb_gpc_get_detect_order,
1584 		php_mb_gpc_set_input_encoding,
1585 		php_mb_rfc1867_getword,
1586 		php_mb_rfc1867_getword_conf,
1587 		php_mb_rfc1867_basename);
1588 
1589 	return SUCCESS;
1590 }
1591 /* }}} */
1592 
1593 /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
PHP_MSHUTDOWN_FUNCTION(mbstring)1594 PHP_MSHUTDOWN_FUNCTION(mbstring)
1595 {
1596 	UNREGISTER_INI_ENTRIES();
1597 
1598 	zend_multibyte_restore_functions();
1599 
1600 #if HAVE_MBREGEX
1601 	PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1602 #endif
1603 
1604 	return SUCCESS;
1605 }
1606 /* }}} */
1607 
1608 /* {{{ PHP_RINIT_FUNCTION(mbstring) */
PHP_RINIT_FUNCTION(mbstring)1609 PHP_RINIT_FUNCTION(mbstring)
1610 {
1611 	zend_function *func, *orig;
1612 	const struct mb_overload_def *p;
1613 
1614 	MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1615 	MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1616 	MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1617 	MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1618 
1619 	MBSTRG(illegalchars) = 0;
1620 
1621 	php_mb_populate_current_detect_order_list();
1622 
1623  	/* override original function. */
1624 	if (MBSTRG(func_overload)){
1625 		p = &(mb_ovld[0]);
1626 
1627 		CG(compiler_options) |= ZEND_COMPILE_NO_BUILTIN_STRLEN;
1628 		while (p->type > 0) {
1629 			if ((MBSTRG(func_overload) & p->type) == p->type &&
1630 				!zend_hash_str_exists(EG(function_table), p->save_func, strlen(p->save_func))
1631 			) {
1632 				func = zend_hash_str_find_ptr(EG(function_table), p->ovld_func, strlen(p->ovld_func));
1633 
1634 				if ((orig = zend_hash_str_find_ptr(EG(function_table), p->orig_func, strlen(p->orig_func))) == NULL) {
1635 					php_error_docref("ref.mbstring", E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1636 					return FAILURE;
1637 				} else {
1638 					ZEND_ASSERT(orig->type == ZEND_INTERNAL_FUNCTION);
1639 					zend_hash_str_add_mem(EG(function_table), p->save_func, strlen(p->save_func), orig, sizeof(zend_internal_function));
1640 					function_add_ref(orig);
1641 
1642 					if (zend_hash_str_update_mem(EG(function_table), p->orig_func, strlen(p->orig_func), func, sizeof(zend_internal_function)) == NULL) {
1643 						php_error_docref("ref.mbstring", E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
1644 						return FAILURE;
1645 					}
1646 
1647 					function_add_ref(func);
1648 				}
1649 			}
1650 			p++;
1651 		}
1652 	}
1653 #if HAVE_MBREGEX
1654 	PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1655 #endif
1656 	zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding));
1657 
1658 	return SUCCESS;
1659 }
1660 /* }}} */
1661 
1662 /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
PHP_RSHUTDOWN_FUNCTION(mbstring)1663 PHP_RSHUTDOWN_FUNCTION(mbstring)
1664 {
1665 	const struct mb_overload_def *p;
1666 	zend_function *orig;
1667 
1668 	if (MBSTRG(current_detect_order_list) != NULL) {
1669 		efree(MBSTRG(current_detect_order_list));
1670 		MBSTRG(current_detect_order_list) = NULL;
1671 		MBSTRG(current_detect_order_list_size) = 0;
1672 	}
1673 	if (MBSTRG(outconv) != NULL) {
1674 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1675 		mbfl_buffer_converter_delete(MBSTRG(outconv));
1676 		MBSTRG(outconv) = NULL;
1677 	}
1678 
1679 	/* clear http input identification. */
1680 	MBSTRG(http_input_identify) = NULL;
1681 	MBSTRG(http_input_identify_post) = NULL;
1682 	MBSTRG(http_input_identify_get) = NULL;
1683 	MBSTRG(http_input_identify_cookie) = NULL;
1684 	MBSTRG(http_input_identify_string) = NULL;
1685 
1686  	/*  clear overloaded function. */
1687 	if (MBSTRG(func_overload)){
1688 		p = &(mb_ovld[0]);
1689 		while (p->type > 0) {
1690 			if ((MBSTRG(func_overload) & p->type) == p->type &&
1691 				(orig = zend_hash_str_find_ptr(EG(function_table), p->save_func, strlen(p->save_func)))) {
1692 
1693 				zend_hash_str_update_mem(EG(function_table), p->orig_func, strlen(p->orig_func), orig, sizeof(zend_internal_function));
1694 				function_add_ref(orig);
1695 				zend_hash_str_del(EG(function_table), p->save_func, strlen(p->save_func));
1696 			}
1697 			p++;
1698 		}
1699 		CG(compiler_options) &= ~ZEND_COMPILE_NO_BUILTIN_STRLEN;
1700 	}
1701 
1702 #if HAVE_MBREGEX
1703 	PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1704 #endif
1705 
1706 	return SUCCESS;
1707 }
1708 /* }}} */
1709 
1710 /* {{{ PHP_MINFO_FUNCTION(mbstring) */
PHP_MINFO_FUNCTION(mbstring)1711 PHP_MINFO_FUNCTION(mbstring)
1712 {
1713 	php_info_print_table_start();
1714 	php_info_print_table_row(2, "Multibyte Support", "enabled");
1715 	php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1716 	php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1717 	{
1718 		char tmp[256];
1719 		snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1720 		php_info_print_table_row(2, "libmbfl version", tmp);
1721 	}
1722 #if HAVE_ONIG
1723 	{
1724 		char tmp[256];
1725 		snprintf(tmp, sizeof(tmp), "%d.%d.%d", ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY);
1726 		php_info_print_table_row(2, "oniguruma version", tmp);
1727 	}
1728 #endif
1729 	php_info_print_table_end();
1730 
1731 	php_info_print_table_start();
1732 	php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1733 	php_info_print_table_end();
1734 
1735 #if HAVE_MBREGEX
1736 	PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1737 #endif
1738 
1739 	DISPLAY_INI_ENTRIES();
1740 }
1741 /* }}} */
1742 
1743 /* {{{ proto string mb_language([string language])
1744    Sets the current language or Returns the current language as a string */
PHP_FUNCTION(mb_language)1745 PHP_FUNCTION(mb_language)
1746 {
1747 	zend_string *name = NULL;
1748 
1749 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|S", &name) == FAILURE) {
1750 		return;
1751 	}
1752 	if (name == NULL) {
1753 		RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)));
1754 	} else {
1755 		zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0);
1756 		if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1757 			php_error_docref(NULL, E_WARNING, "Unknown language \"%s\"", ZSTR_VAL(name));
1758 			RETVAL_FALSE;
1759 		} else {
1760 			RETVAL_TRUE;
1761 		}
1762 		zend_string_release(ini_name);
1763 	}
1764 }
1765 /* }}} */
1766 
1767 /* {{{ proto string mb_internal_encoding([string encoding])
1768    Sets the current internal encoding or Returns the current internal encoding as a string */
PHP_FUNCTION(mb_internal_encoding)1769 PHP_FUNCTION(mb_internal_encoding)
1770 {
1771 	const char *name = NULL;
1772 	size_t name_len;
1773 	const mbfl_encoding *encoding;
1774 
1775 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
1776 		return;
1777 	}
1778 	if (name == NULL) {
1779 		name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
1780 		if (name != NULL) {
1781 			RETURN_STRING(name);
1782 		} else {
1783 			RETURN_FALSE;
1784 		}
1785 	} else {
1786 		encoding = mbfl_name2encoding(name);
1787 		if (!encoding) {
1788 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
1789 			RETURN_FALSE;
1790 		} else {
1791 			MBSTRG(current_internal_encoding) = encoding;
1792 			RETURN_TRUE;
1793 		}
1794 	}
1795 }
1796 /* }}} */
1797 
1798 /* {{{ proto mixed mb_http_input([string type])
1799    Returns the input encoding */
PHP_FUNCTION(mb_http_input)1800 PHP_FUNCTION(mb_http_input)
1801 {
1802 	char *typ = NULL;
1803 	size_t typ_len;
1804 	int retname;
1805 	char *list, *temp;
1806 	const mbfl_encoding *result = NULL;
1807 
1808 	retname = 1;
1809  	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
1810 		return;
1811  	}
1812  	if (typ == NULL) {
1813  		result = MBSTRG(http_input_identify);
1814  	} else {
1815  		switch (*typ) {
1816 		case 'G':
1817 		case 'g':
1818 			result = MBSTRG(http_input_identify_get);
1819 			break;
1820 		case 'P':
1821 		case 'p':
1822 			result = MBSTRG(http_input_identify_post);
1823 			break;
1824 		case 'C':
1825 		case 'c':
1826 			result = MBSTRG(http_input_identify_cookie);
1827 			break;
1828 		case 'S':
1829 		case 's':
1830 			result = MBSTRG(http_input_identify_string);
1831 			break;
1832 		case 'I':
1833 		case 'i':
1834 			{
1835 				const mbfl_encoding **entry = MBSTRG(http_input_list);
1836 				const size_t n = MBSTRG(http_input_list_size);
1837 				size_t i;
1838 				array_init(return_value);
1839 				for (i = 0; i < n; i++) {
1840 					add_next_index_string(return_value, (*entry)->name);
1841 					entry++;
1842 				}
1843 				retname = 0;
1844 			}
1845 			break;
1846 		case 'L':
1847 		case 'l':
1848 			{
1849 				const mbfl_encoding **entry = MBSTRG(http_input_list);
1850 				const size_t n = MBSTRG(http_input_list_size);
1851 				size_t i;
1852 				list = NULL;
1853 				for (i = 0; i < n; i++) {
1854 					if (list) {
1855 						temp = list;
1856 						spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
1857 						efree(temp);
1858 						if (!list) {
1859 							break;
1860 						}
1861 					} else {
1862 						list = estrdup((*entry)->name);
1863 					}
1864 					entry++;
1865 				}
1866 			}
1867 			if (!list) {
1868 				RETURN_FALSE;
1869 			}
1870 			RETVAL_STRING(list);
1871 			efree(list);
1872 			retname = 0;
1873 			break;
1874 		default:
1875 			result = MBSTRG(http_input_identify);
1876 			break;
1877 		}
1878 	}
1879 
1880 	if (retname) {
1881 		if (result) {
1882 			RETVAL_STRING(result->name);
1883 		} else {
1884 			RETVAL_FALSE;
1885 		}
1886 	}
1887 }
1888 /* }}} */
1889 
1890 /* {{{ proto string mb_http_output([string encoding])
1891    Sets the current output_encoding or returns the current output_encoding as a string */
PHP_FUNCTION(mb_http_output)1892 PHP_FUNCTION(mb_http_output)
1893 {
1894 	const char *name = NULL;
1895 	size_t name_len;
1896 	const mbfl_encoding *encoding;
1897 
1898 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
1899 		return;
1900 	}
1901 
1902 	if (name == NULL) {
1903 		name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
1904 		if (name != NULL) {
1905 			RETURN_STRING(name);
1906 		} else {
1907 			RETURN_FALSE;
1908 		}
1909 	} else {
1910 		encoding = mbfl_name2encoding(name);
1911 		if (!encoding) {
1912 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
1913 			RETURN_FALSE;
1914 		} else {
1915 			MBSTRG(current_http_output_encoding) = encoding;
1916 			RETURN_TRUE;
1917 		}
1918 	}
1919 }
1920 /* }}} */
1921 
1922 /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1923    Sets the current detect_order or Return the current detect_order as a array */
PHP_FUNCTION(mb_detect_order)1924 PHP_FUNCTION(mb_detect_order)
1925 {
1926 	zval *arg1 = NULL;
1927 
1928 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
1929 		return;
1930 	}
1931 
1932 	if (!arg1) {
1933 		size_t i;
1934 		size_t n = MBSTRG(current_detect_order_list_size);
1935 		const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1936 		array_init(return_value);
1937 		for (i = 0; i < n; i++) {
1938 			add_next_index_string(return_value, (*entry)->name);
1939 			entry++;
1940 		}
1941 	} else {
1942 		const mbfl_encoding **list = NULL;
1943 		size_t size = 0;
1944 		switch (Z_TYPE_P(arg1)) {
1945 			case IS_ARRAY:
1946 				if (FAILURE == php_mb_parse_encoding_array(arg1, &list, &size, 0)) {
1947 					if (list) {
1948 						efree(list);
1949 					}
1950 					RETURN_FALSE;
1951 				}
1952 				break;
1953 			default:
1954 				convert_to_string_ex(arg1);
1955 				if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(arg1), Z_STRLEN_P(arg1), &list, &size, 0)) {
1956 					if (list) {
1957 						efree(list);
1958 					}
1959 					RETURN_FALSE;
1960 				}
1961 				break;
1962 		}
1963 
1964 		if (list == NULL) {
1965 			RETURN_FALSE;
1966 		}
1967 
1968 		if (MBSTRG(current_detect_order_list)) {
1969 			efree(MBSTRG(current_detect_order_list));
1970 		}
1971 		MBSTRG(current_detect_order_list) = list;
1972 		MBSTRG(current_detect_order_list_size) = size;
1973 		RETURN_TRUE;
1974 	}
1975 }
1976 /* }}} */
1977 
1978 /* {{{ proto mixed mb_substitute_character([mixed substchar])
1979    Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)1980 PHP_FUNCTION(mb_substitute_character)
1981 {
1982 	zval *arg1 = NULL;
1983 
1984 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
1985 		return;
1986 	}
1987 
1988 	if (!arg1) {
1989 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
1990 			RETURN_STRING("none");
1991 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1992 			RETURN_STRING("long");
1993 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1994 			RETURN_STRING("entity");
1995 		} else {
1996 			RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1997 		}
1998 	} else {
1999 		RETVAL_TRUE;
2000 
2001 		switch (Z_TYPE_P(arg1)) {
2002 			case IS_STRING:
2003 				if (strncasecmp("none", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2004 					MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
2005 				} else if (strncasecmp("long", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2006 					MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
2007 				} else if (strncasecmp("entity", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2008 					MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
2009 				} else {
2010 					convert_to_long_ex(arg1);
2011 
2012 					if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) {
2013 						MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2014 						MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2015 					} else {
2016 						php_error_docref(NULL, E_WARNING, "Unknown character.");
2017 						RETURN_FALSE;
2018 					}
2019 				}
2020 				break;
2021 			default:
2022 				convert_to_long_ex(arg1);
2023 				if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) {
2024 					MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2025 					MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2026 				} else {
2027 					php_error_docref(NULL, E_WARNING, "Unknown character.");
2028 					RETURN_FALSE;
2029 				}
2030 				break;
2031 		}
2032 	}
2033 }
2034 /* }}} */
2035 
2036 /* {{{ proto string mb_preferred_mime_name(string encoding)
2037    Return the preferred MIME name (charset) as a string */
PHP_FUNCTION(mb_preferred_mime_name)2038 PHP_FUNCTION(mb_preferred_mime_name)
2039 {
2040 	enum mbfl_no_encoding no_encoding;
2041 	char *name = NULL;
2042 	size_t name_len;
2043 
2044 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
2045 		return;
2046 	} else {
2047 		no_encoding = mbfl_name2no_encoding(name);
2048 		if (no_encoding == mbfl_no_encoding_invalid) {
2049 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
2050 			RETVAL_FALSE;
2051 		} else {
2052 			const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
2053 			if (preferred_name == NULL || *preferred_name == '\0') {
2054 				php_error_docref(NULL, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
2055 				RETVAL_FALSE;
2056 			} else {
2057 				RETVAL_STRING((char *)preferred_name);
2058 			}
2059 		}
2060 	}
2061 }
2062 /* }}} */
2063 
2064 #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
2065 #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
2066 
2067 /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
2068    Parses GET/POST/COOKIE data and sets global variables */
PHP_FUNCTION(mb_parse_str)2069 PHP_FUNCTION(mb_parse_str)
2070 {
2071 	zval *track_vars_array = NULL;
2072 	char *encstr = NULL;
2073 	size_t encstr_len;
2074 	php_mb_encoding_handler_info_t info;
2075 	const mbfl_encoding *detected;
2076 
2077 	track_vars_array = NULL;
2078 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z/", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
2079 		return;
2080 	}
2081 
2082 	if (track_vars_array != NULL) {
2083 		/* Clear out the array */
2084 		zval_dtor(track_vars_array);
2085 		array_init(track_vars_array);
2086 	}
2087 
2088 	encstr = estrndup(encstr, encstr_len);
2089 
2090 	info.data_type              = PARSE_STRING;
2091 	info.separator              = PG(arg_separator).input;
2092 	info.report_errors          = 1;
2093 	info.to_encoding            = MBSTRG(current_internal_encoding);
2094 	info.to_language            = MBSTRG(language);
2095 	info.from_encodings         = MBSTRG(http_input_list);
2096 	info.num_from_encodings     = MBSTRG(http_input_list_size);
2097 	info.from_language          = MBSTRG(language);
2098 
2099 	if (track_vars_array != NULL) {
2100 		detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr);
2101 	} else {
2102 		zval tmp;
2103 		zend_array *symbol_table;
2104 		if (zend_forbid_dynamic_call("mb_parse_str() with a single argument") == FAILURE) {
2105 			efree(encstr);
2106 			return;
2107 		}
2108 
2109 		symbol_table = zend_rebuild_symbol_table();
2110 		ZVAL_ARR(&tmp, symbol_table);
2111 		detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr);
2112 	}
2113 
2114 	MBSTRG(http_input_identify) = detected;
2115 
2116 	RETVAL_BOOL(detected);
2117 
2118 	if (encstr != NULL) efree(encstr);
2119 }
2120 /* }}} */
2121 
2122 /* {{{ proto string mb_output_handler(string contents, int status)
2123    Returns string in output buffer converted to the http_output encoding */
PHP_FUNCTION(mb_output_handler)2124 PHP_FUNCTION(mb_output_handler)
2125 {
2126 	char *arg_string;
2127 	size_t arg_string_len;
2128 	zend_long arg_status;
2129 	mbfl_string string, result;
2130 	const char *charset;
2131 	char *p;
2132 	const mbfl_encoding *encoding;
2133 	int last_feed, len;
2134 	unsigned char send_text_mimetype = 0;
2135 	char *s, *mimetype = NULL;
2136 
2137 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
2138 		return;
2139 	}
2140 
2141 	encoding = MBSTRG(current_http_output_encoding);
2142 
2143  	/* start phase only */
2144  	if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
2145  		/* delete the converter just in case. */
2146  		if (MBSTRG(outconv)) {
2147 			MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2148  			mbfl_buffer_converter_delete(MBSTRG(outconv));
2149  			MBSTRG(outconv) = NULL;
2150   		}
2151 		if (encoding == &mbfl_encoding_pass) {
2152 			RETURN_STRINGL(arg_string, arg_string_len);
2153 		}
2154 
2155 		/* analyze mime type */
2156 		if (SG(sapi_headers).mimetype &&
2157 			_php_mb_match_regex(
2158 				MBSTRG(http_output_conv_mimetypes),
2159 				SG(sapi_headers).mimetype,
2160 				strlen(SG(sapi_headers).mimetype))) {
2161 			if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
2162 				mimetype = estrdup(SG(sapi_headers).mimetype);
2163 			} else {
2164 				mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
2165 			}
2166 			send_text_mimetype = 1;
2167 		} else if (SG(sapi_headers).send_default_content_type) {
2168 			mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
2169 		}
2170 
2171  		/* if content-type is not yet set, set it and activate the converter */
2172  		if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
2173 			charset = encoding->mime_name;
2174 			if (charset) {
2175 				len = spprintf( &p, 0, "Content-Type: %s; charset=%s",  mimetype, charset );
2176 				if (sapi_add_header(p, len, 0) != FAILURE) {
2177 					SG(sapi_headers).send_default_content_type = 0;
2178 				}
2179 			}
2180  			/* activate the converter */
2181  			MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0);
2182 			if (send_text_mimetype){
2183 				efree(mimetype);
2184 			}
2185  		}
2186   	}
2187 
2188  	/* just return if the converter is not activated. */
2189  	if (MBSTRG(outconv) == NULL) {
2190 		RETURN_STRINGL(arg_string, arg_string_len);
2191 	}
2192 
2193  	/* flag */
2194  	last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
2195  	/* mode */
2196  	mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
2197  	mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
2198 
2199  	/* feed the string */
2200  	mbfl_string_init(&string);
2201 	/* these are not needed. convd has encoding info.
2202 	string.no_language = MBSTRG(language);
2203 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2204 	*/
2205  	string.val = (unsigned char *)arg_string;
2206  	string.len = arg_string_len;
2207  	mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2208  	if (last_feed) {
2209  		mbfl_buffer_converter_flush(MBSTRG(outconv));
2210 	}
2211  	/* get the converter output, and return it */
2212  	mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2213 	// TODO: avoid reallocation ???
2214  	RETVAL_STRINGL((char *)result.val, result.len);		/* the string is already strdup()'ed */
2215 	efree(result.val);
2216 
2217  	/* delete the converter if it is the last feed. */
2218  	if (last_feed) {
2219 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2220 		mbfl_buffer_converter_delete(MBSTRG(outconv));
2221 		MBSTRG(outconv) = NULL;
2222 	}
2223 }
2224 /* }}} */
2225 
2226 /* {{{ proto int mb_strlen(string str [, string encoding])
2227    Get character numbers of a string */
PHP_FUNCTION(mb_strlen)2228 PHP_FUNCTION(mb_strlen)
2229 {
2230 	int n;
2231 	mbfl_string string;
2232 	char *enc_name = NULL;
2233 	size_t enc_name_len, string_len;
2234 
2235 	mbfl_string_init(&string);
2236 
2237 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string_len, &enc_name, &enc_name_len) == FAILURE) {
2238 		return;
2239 	}
2240 
2241 	if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
2242 			php_error_docref(NULL, E_WARNING, "String overflows the max allowed length of %u", UINT_MAX);
2243 			return;
2244 	}
2245 
2246 	string.len = (uint32_t)string_len;
2247 
2248 	string.no_language = MBSTRG(language);
2249 	if (enc_name == NULL) {
2250 		string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2251 	} else {
2252 		string.no_encoding = mbfl_name2no_encoding(enc_name);
2253 		if (string.no_encoding == mbfl_no_encoding_invalid) {
2254 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2255 			RETURN_FALSE;
2256 		}
2257 	}
2258 
2259 	n = mbfl_strlen(&string);
2260 	if (n >= 0) {
2261 		RETVAL_LONG(n);
2262 	} else {
2263 		RETVAL_FALSE;
2264 	}
2265 }
2266 /* }}} */
2267 
2268 /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2269    Find position of first occurrence of a string within another */
PHP_FUNCTION(mb_strpos)2270 PHP_FUNCTION(mb_strpos)
2271 {
2272 	int n, reverse = 0;
2273 	zend_long offset = 0, slen;
2274 	mbfl_string haystack, needle;
2275 	char *enc_name = NULL;
2276 	size_t enc_name_len, haystack_len, needle_len;
2277 
2278 	mbfl_string_init(&haystack);
2279 	mbfl_string_init(&needle);
2280 	haystack.no_language = MBSTRG(language);
2281 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2282 	needle.no_language = MBSTRG(language);
2283 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2284 
2285 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2286 		return;
2287 	}
2288 
2289 	if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2290 			php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2291 			return;
2292 	} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2293 			php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2294 			return;
2295 	}
2296 
2297 	haystack.len = (uint32_t)haystack_len;
2298 	needle.len = (uint32_t)needle_len;
2299 
2300 	if (enc_name != NULL) {
2301 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2302 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2303 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2304 			RETURN_FALSE;
2305 		}
2306 	}
2307 
2308 	slen = mbfl_strlen(&haystack);
2309 	if (offset < 0) {
2310 		offset += slen;
2311 	}
2312 	if (offset < 0 || offset > slen) {
2313 		php_error_docref(NULL, E_WARNING, "Offset not contained in string");
2314 		RETURN_FALSE;
2315 	}
2316 	if (needle.len == 0) {
2317 		php_error_docref(NULL, E_WARNING, "Empty delimiter");
2318 		RETURN_FALSE;
2319 	}
2320 
2321 	n = mbfl_strpos(&haystack, &needle, offset, reverse);
2322 	if (n >= 0) {
2323 		RETVAL_LONG(n);
2324 	} else {
2325 		switch (-n) {
2326 		case 1:
2327 			break;
2328 		case 2:
2329 			php_error_docref(NULL, E_WARNING, "Needle has not positive length");
2330 			break;
2331 		case 4:
2332 			php_error_docref(NULL, E_WARNING, "Unknown encoding or conversion error");
2333 			break;
2334 		case 8:
2335 			php_error_docref(NULL, E_NOTICE, "Argument is empty");
2336 			break;
2337 		default:
2338 			php_error_docref(NULL, E_WARNING, "Unknown error in mb_strpos");
2339 			break;
2340 		}
2341 		RETVAL_FALSE;
2342 	}
2343 }
2344 /* }}} */
2345 
2346 /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2347    Find position of last occurrence of a string within another */
PHP_FUNCTION(mb_strrpos)2348 PHP_FUNCTION(mb_strrpos)
2349 {
2350 	int n;
2351 	mbfl_string haystack, needle;
2352 	char *enc_name = NULL;
2353 	size_t enc_name_len, haystack_len, needle_len;
2354 	zval *zoffset = NULL;
2355 	long offset = 0, str_flg;
2356 	char *enc_name2 = NULL;
2357 	int enc_name_len2;
2358 
2359 	mbfl_string_init(&haystack);
2360 	mbfl_string_init(&needle);
2361 	haystack.no_language = MBSTRG(language);
2362 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2363 	needle.no_language = MBSTRG(language);
2364 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2365 
2366 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|zs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2367 		return;
2368 	}
2369 
2370 	if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2371 			php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2372 			return;
2373 	} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2374 			php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2375 			return;
2376 	}
2377 
2378 	haystack.len = (uint32_t)haystack_len;
2379 	needle.len = (uint32_t)needle_len;
2380 
2381 	if (zoffset) {
2382 		if (Z_TYPE_P(zoffset) == IS_STRING) {
2383 			enc_name2     = Z_STRVAL_P(zoffset);
2384 			enc_name_len2 = Z_STRLEN_P(zoffset);
2385 			str_flg       = 1;
2386 
2387 			if (enc_name2 != NULL) {
2388 				switch (*enc_name2) {
2389 					case '0':
2390 					case '1':
2391 					case '2':
2392 					case '3':
2393 					case '4':
2394 					case '5':
2395 					case '6':
2396 					case '7':
2397 					case '8':
2398 					case '9':
2399 					case ' ':
2400 					case '-':
2401 					case '.':
2402 						break;
2403 					default :
2404 						str_flg = 0;
2405 						break;
2406 				}
2407 			}
2408 
2409 			if (str_flg) {
2410 				convert_to_long_ex(zoffset);
2411 				offset   = Z_LVAL_P(zoffset);
2412 			} else {
2413 				enc_name     = enc_name2;
2414 				enc_name_len = enc_name_len2;
2415 			}
2416 		} else {
2417 			convert_to_long_ex(zoffset);
2418 			offset = Z_LVAL_P(zoffset);
2419 		}
2420 	}
2421 
2422 	if (enc_name != NULL) {
2423 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2424 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2425 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2426 			RETURN_FALSE;
2427 		}
2428 	}
2429 
2430 	if (haystack.len <= 0) {
2431 		RETURN_FALSE;
2432 	}
2433 	if (needle.len <= 0) {
2434 		RETURN_FALSE;
2435 	}
2436 
2437 	{
2438 		int haystack_char_len = mbfl_strlen(&haystack);
2439 		if ((offset > 0 && offset > haystack_char_len) ||
2440 			(offset < 0 && -offset > haystack_char_len)) {
2441 			php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
2442 			RETURN_FALSE;
2443 		}
2444 	}
2445 
2446 	n = mbfl_strpos(&haystack, &needle, offset, 1);
2447 	if (n >= 0) {
2448 		RETVAL_LONG(n);
2449 	} else {
2450 		RETVAL_FALSE;
2451 	}
2452 }
2453 /* }}} */
2454 
2455 /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2456    Finds position of first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stripos)2457 PHP_FUNCTION(mb_stripos)
2458 {
2459 	int n = -1;
2460 	zend_long offset = 0;
2461 	mbfl_string haystack, needle;
2462 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2463 	size_t from_encoding_len, haystack_len, needle_len;
2464 
2465 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2466 		return;
2467 	}
2468 
2469 	if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2470 			php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2471 			return;
2472 	} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2473 			php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2474 			return;
2475 	}
2476 
2477 	haystack.len = (uint32_t)haystack_len;
2478 	needle.len = (uint32_t)needle_len;
2479 
2480 	if (needle.len == 0) {
2481 		php_error_docref(NULL, E_WARNING, "Empty delimiter");
2482 		RETURN_FALSE;
2483 	}
2484 	n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2485 
2486 	if (n >= 0) {
2487 		RETVAL_LONG(n);
2488 	} else {
2489 		RETVAL_FALSE;
2490 	}
2491 }
2492 /* }}} */
2493 
2494 /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2495    Finds position of last occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_strripos)2496 PHP_FUNCTION(mb_strripos)
2497 {
2498 	int n = -1;
2499 	zend_long offset = 0;
2500 	mbfl_string haystack, needle;
2501 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2502 	size_t from_encoding_len, haystack_len, needle_len;
2503 
2504 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2505 		return;
2506 	}
2507 
2508 	if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2509 			php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2510 			return;
2511 	} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2512 			php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2513 			return;
2514 	}
2515 
2516 	haystack.len = (uint32_t)haystack_len;
2517 	needle.len = (uint32_t)needle_len;
2518 
2519 	n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2520 
2521 	if (n >= 0) {
2522 		RETVAL_LONG(n);
2523 	} else {
2524 		RETVAL_FALSE;
2525 	}
2526 }
2527 /* }}} */
2528 
2529 /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2530    Finds first occurrence of a string within another */
PHP_FUNCTION(mb_strstr)2531 PHP_FUNCTION(mb_strstr)
2532 {
2533 	int n, len, mblen;
2534 	mbfl_string haystack, needle, result, *ret = NULL;
2535 	char *enc_name = NULL;
2536 	size_t enc_name_len, haystack_len, needle_len;
2537 	zend_bool part = 0;
2538 
2539 	mbfl_string_init(&haystack);
2540 	mbfl_string_init(&needle);
2541 	haystack.no_language = MBSTRG(language);
2542 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2543 	needle.no_language = MBSTRG(language);
2544 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2545 
2546 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &enc_name, &enc_name_len) == FAILURE) {
2547 		return;
2548 	}
2549 
2550 	if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2551 			php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2552 			return;
2553 	} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2554 			php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2555 			return;
2556 	}
2557 
2558 	haystack.len = (uint32_t)haystack_len;
2559 	needle.len = (uint32_t)needle_len;
2560 
2561 	if (enc_name != NULL) {
2562 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2563 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2564 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2565 			RETURN_FALSE;
2566 		}
2567 	}
2568 
2569 	if (needle.len <= 0) {
2570 		php_error_docref(NULL, E_WARNING, "Empty delimiter");
2571 		RETURN_FALSE;
2572 	}
2573 	n = mbfl_strpos(&haystack, &needle, 0, 0);
2574 	if (n >= 0) {
2575 		mblen = mbfl_strlen(&haystack);
2576 		if (part) {
2577 			ret = mbfl_substr(&haystack, &result, 0, n);
2578 			if (ret != NULL) {
2579 				// TODO: avoid reallocation ???
2580 				RETVAL_STRINGL((char *)ret->val, ret->len);
2581 				efree(ret->val);
2582 			} else {
2583 				RETVAL_FALSE;
2584 			}
2585 		} else {
2586 			len = (mblen - n);
2587 			ret = mbfl_substr(&haystack, &result, n, len);
2588 			if (ret != NULL) {
2589 				// TODO: avoid reallocation ???
2590 				RETVAL_STRINGL((char *)ret->val, ret->len);
2591 				efree(ret->val);
2592 			} else {
2593 				RETVAL_FALSE;
2594 			}
2595 		}
2596 	} else {
2597 		RETVAL_FALSE;
2598 	}
2599 }
2600 /* }}} */
2601 
2602 /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2603    Finds the last occurrence of a character in a string within another */
PHP_FUNCTION(mb_strrchr)2604 PHP_FUNCTION(mb_strrchr)
2605 {
2606 	int n, len, mblen;
2607 	mbfl_string haystack, needle, result, *ret = NULL;
2608 	char *enc_name = NULL;
2609 	size_t enc_name_len, haystack_len, needle_len;
2610 	zend_bool part = 0;
2611 
2612 	mbfl_string_init(&haystack);
2613 	mbfl_string_init(&needle);
2614 	haystack.no_language = MBSTRG(language);
2615 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2616 	needle.no_language = MBSTRG(language);
2617 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2618 
2619 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &enc_name, &enc_name_len) == FAILURE) {
2620 		return;
2621 	}
2622 
2623 	if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2624 			php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2625 			return;
2626 	} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2627 			php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2628 			return;
2629 	}
2630 
2631 	haystack.len = (uint32_t)haystack_len;
2632 	needle.len = (uint32_t)needle_len;
2633 
2634 	if (enc_name != NULL) {
2635 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2636 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2637 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2638 			RETURN_FALSE;
2639 		}
2640 	}
2641 
2642 	if (haystack.len <= 0) {
2643 		RETURN_FALSE;
2644 	}
2645 	if (needle.len <= 0) {
2646 		RETURN_FALSE;
2647 	}
2648 	n = mbfl_strpos(&haystack, &needle, 0, 1);
2649 	if (n >= 0) {
2650 		mblen = mbfl_strlen(&haystack);
2651 		if (part) {
2652 			ret = mbfl_substr(&haystack, &result, 0, n);
2653 			if (ret != NULL) {
2654 				// TODO: avoid reallocation ???
2655 				RETVAL_STRINGL((char *)ret->val, ret->len);
2656 				efree(ret->val);
2657 			} else {
2658 				RETVAL_FALSE;
2659 			}
2660 		} else {
2661 			len = (mblen - n);
2662 			ret = mbfl_substr(&haystack, &result, n, len);
2663 			if (ret != NULL) {
2664 				// TODO: avoid reallocation ???
2665 				RETVAL_STRINGL((char *)ret->val, ret->len);
2666 				efree(ret->val);
2667 			} else {
2668 				RETVAL_FALSE;
2669 			}
2670 		}
2671 	} else {
2672 		RETVAL_FALSE;
2673 	}
2674 }
2675 /* }}} */
2676 
2677 /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2678    Finds first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stristr)2679 PHP_FUNCTION(mb_stristr)
2680 {
2681 	zend_bool part = 0;
2682 	size_t from_encoding_len, len, mblen, haystack_len, needle_len;
2683 	int n;
2684 	mbfl_string haystack, needle, result, *ret = NULL;
2685 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2686 	mbfl_string_init(&haystack);
2687 	mbfl_string_init(&needle);
2688 	haystack.no_language = MBSTRG(language);
2689 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2690 	needle.no_language = MBSTRG(language);
2691 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2692 
2693 
2694 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2695 		return;
2696 	}
2697 
2698 	if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2699 			php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2700 			return;
2701 	} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2702 			php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2703 			return;
2704 	}
2705 
2706 	haystack.len = (uint32_t)haystack_len;
2707 	needle.len = (uint32_t)needle_len;
2708 
2709 	if (!needle.len) {
2710 		php_error_docref(NULL, E_WARNING, "Empty delimiter");
2711 		RETURN_FALSE;
2712 	}
2713 
2714 	haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2715 	if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2716 		php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2717 		RETURN_FALSE;
2718 	}
2719 
2720 	n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2721 
2722 	if (n <0) {
2723 		RETURN_FALSE;
2724 	}
2725 
2726 	mblen = mbfl_strlen(&haystack);
2727 
2728 	if (part) {
2729 		ret = mbfl_substr(&haystack, &result, 0, n);
2730 		if (ret != NULL) {
2731 			// TODO: avoid reallocation ???
2732 			RETVAL_STRINGL((char *)ret->val, ret->len);
2733 			efree(ret->val);
2734 		} else {
2735 			RETVAL_FALSE;
2736 		}
2737 	} else {
2738 		len = (mblen - n);
2739 		ret = mbfl_substr(&haystack, &result, n, len);
2740 		if (ret != NULL) {
2741 			// TODO: avoid reallocaton ???
2742 			RETVAL_STRINGL((char *)ret->val, ret->len);
2743 			efree(ret->val);
2744 		} else {
2745 			RETVAL_FALSE;
2746 		}
2747 	}
2748 }
2749 /* }}} */
2750 
2751 /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2752    Finds the last occurrence of a character in a string within another, case insensitive */
PHP_FUNCTION(mb_strrichr)2753 PHP_FUNCTION(mb_strrichr)
2754 {
2755 	zend_bool part = 0;
2756 	int n, len, mblen;
2757 	size_t from_encoding_len, haystack_len, needle_len;
2758 	mbfl_string haystack, needle, result, *ret = NULL;
2759 	const char *from_encoding = MBSTRG(current_internal_encoding)->name;
2760 	mbfl_string_init(&haystack);
2761 	mbfl_string_init(&needle);
2762 	haystack.no_language = MBSTRG(language);
2763 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2764 	needle.no_language = MBSTRG(language);
2765 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2766 
2767 
2768 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2769 		return;
2770 	}
2771 
2772 	if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2773 			php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2774 			return;
2775 	} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2776 			php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2777 			return;
2778 	}
2779 
2780 	haystack.len = (uint32_t)haystack_len;
2781 	needle.len = (uint32_t)needle_len;
2782 
2783 	haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2784 	if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2785 		php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2786 		RETURN_FALSE;
2787 	}
2788 
2789 	n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2790 
2791 	if (n <0) {
2792 		RETURN_FALSE;
2793 	}
2794 
2795 	mblen = mbfl_strlen(&haystack);
2796 
2797 	if (part) {
2798 		ret = mbfl_substr(&haystack, &result, 0, n);
2799 		if (ret != NULL) {
2800 			// TODO: avoid reallocation ???
2801 			RETVAL_STRINGL((char *)ret->val, ret->len);
2802 			efree(ret->val);
2803 		} else {
2804 			RETVAL_FALSE;
2805 		}
2806 	} else {
2807 		len = (mblen - n);
2808 		ret = mbfl_substr(&haystack, &result, n, len);
2809 		if (ret != NULL) {
2810 			// TODO: avoid reallocation ???
2811 			RETVAL_STRINGL((char *)ret->val, ret->len);
2812 			efree(ret->val);
2813 		} else {
2814 			RETVAL_FALSE;
2815 		}
2816 	}
2817 }
2818 /* }}} */
2819 
2820 /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2821    Count the number of substring occurrences */
PHP_FUNCTION(mb_substr_count)2822 PHP_FUNCTION(mb_substr_count)
2823 {
2824 	int n;
2825 	mbfl_string haystack, needle;
2826 	char *enc_name = NULL;
2827 	size_t enc_name_len, haystack_len, needle_len;
2828 
2829 	mbfl_string_init(&haystack);
2830 	mbfl_string_init(&needle);
2831 	haystack.no_language = MBSTRG(language);
2832 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2833 	needle.no_language = MBSTRG(language);
2834 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2835 
2836 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &enc_name, &enc_name_len) == FAILURE) {
2837 		return;
2838 	}
2839 
2840 	if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2841 			php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2842 			return;
2843 	} else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2844 			php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2845 			return;
2846 	}
2847 
2848 	haystack.len = (uint32_t)haystack_len;
2849 	needle.len = (uint32_t)needle_len;
2850 
2851 	if (enc_name != NULL) {
2852 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2853 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2854 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2855 			RETURN_FALSE;
2856 		}
2857 	}
2858 
2859 	if (needle.len <= 0) {
2860 		php_error_docref(NULL, E_WARNING, "Empty substring");
2861 		RETURN_FALSE;
2862 	}
2863 
2864 	n = mbfl_substr_count(&haystack, &needle);
2865 	if (n >= 0) {
2866 		RETVAL_LONG(n);
2867 	} else {
2868 		RETVAL_FALSE;
2869 	}
2870 }
2871 /* }}} */
2872 
2873 /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2874    Returns part of a string */
PHP_FUNCTION(mb_substr)2875 PHP_FUNCTION(mb_substr)
2876 {
2877 	char *str, *encoding = NULL;
2878 	zend_long from, len;
2879 	int mblen;
2880 	size_t str_len, encoding_len;
2881 	zend_bool len_is_null = 1;
2882 	mbfl_string string, result, *ret;
2883 
2884 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", &str, &str_len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) {
2885 		return;
2886 	}
2887 
2888 	mbfl_string_init(&string);
2889 	string.no_language = MBSTRG(language);
2890 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2891 
2892 	if (encoding) {
2893 		string.no_encoding = mbfl_name2no_encoding(encoding);
2894 		if (string.no_encoding == mbfl_no_encoding_invalid) {
2895 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
2896 			RETURN_FALSE;
2897 		}
2898 	}
2899 
2900 	string.val = (unsigned char *)str;
2901 	string.len = str_len;
2902 
2903 	if (len_is_null) {
2904 		len = str_len;
2905 	}
2906 
2907 	/* measures length */
2908 	mblen = 0;
2909 	if (from < 0 || len < 0) {
2910 		mblen = mbfl_strlen(&string);
2911 	}
2912 
2913 	/* if "from" position is negative, count start position from the end
2914 	 * of the string
2915 	 */
2916 	if (from < 0) {
2917 		from = mblen + from;
2918 		if (from < 0) {
2919 			from = 0;
2920 		}
2921 	}
2922 
2923 	/* if "length" position is negative, set it to the length
2924 	 * needed to stop that many chars from the end of the string
2925 	 */
2926 	if (len < 0) {
2927 		len = (mblen - from) + len;
2928 		if (len < 0) {
2929 			len = 0;
2930 		}
2931 	}
2932 
2933 	if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2934 		&& (from >= mbfl_strlen(&string))) {
2935 		RETURN_FALSE;
2936 	}
2937 
2938 	if (from > INT_MAX) {
2939 		from = INT_MAX;
2940 	}
2941 	if (len > INT_MAX) {
2942 		len = INT_MAX;
2943 	}
2944 
2945 	ret = mbfl_substr(&string, &result, from, len);
2946 	if (NULL == ret) {
2947 		RETURN_FALSE;
2948 	}
2949 
2950 	// TODO: avoid reallocation ???
2951 	RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2952 	efree(ret->val);
2953 }
2954 /* }}} */
2955 
2956 /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
2957    Returns part of a string */
PHP_FUNCTION(mb_strcut)2958 PHP_FUNCTION(mb_strcut)
2959 {
2960 	char *encoding = NULL;
2961 	zend_long from, len;
2962 	size_t encoding_len, string_len;
2963 	zend_bool len_is_null = 1;
2964 	mbfl_string string, result, *ret;
2965 
2966 	mbfl_string_init(&string);
2967 	string.no_language = MBSTRG(language);
2968 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2969 
2970 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", (char **)&string.val, &string_len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) {
2971 		return;
2972 	}
2973 
2974 	if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
2975 			php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
2976 			return;
2977 	}
2978 
2979 	string.len = (uint32_t)string_len;
2980 
2981 	if (encoding) {
2982 		string.no_encoding = mbfl_name2no_encoding(encoding);
2983 		if (string.no_encoding == mbfl_no_encoding_invalid) {
2984 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
2985 			RETURN_FALSE;
2986 		}
2987 	}
2988 
2989 	if (len_is_null) {
2990 		len = string.len;
2991 	}
2992 
2993 	/* if "from" position is negative, count start position from the end
2994 	 * of the string
2995 	 */
2996 	if (from < 0) {
2997 		from = string.len + from;
2998 		if (from < 0) {
2999 			from = 0;
3000 		}
3001 	}
3002 
3003 	/* if "length" position is negative, set it to the length
3004 	 * needed to stop that many chars from the end of the string
3005 	 */
3006 	if (len < 0) {
3007 		len = (string.len - from) + len;
3008 		if (len < 0) {
3009 			len = 0;
3010 		}
3011 	}
3012 
3013 	if ((unsigned int)from > string.len) {
3014 		RETURN_FALSE;
3015 	}
3016 
3017 	ret = mbfl_strcut(&string, &result, from, len);
3018 	if (ret == NULL) {
3019 		RETURN_FALSE;
3020 	}
3021 
3022 	// TODO: avoid reallocation ???
3023 	RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3024 	efree(ret->val);
3025 }
3026 /* }}} */
3027 
3028 /* {{{ proto int mb_strwidth(string str [, string encoding])
3029    Gets terminal width of a string */
PHP_FUNCTION(mb_strwidth)3030 PHP_FUNCTION(mb_strwidth)
3031 {
3032 	int n;
3033 	mbfl_string string;
3034 	char *enc_name = NULL;
3035 	size_t enc_name_len, string_len;
3036 
3037 	mbfl_string_init(&string);
3038 
3039 	string.no_language = MBSTRG(language);
3040 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3041 
3042 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string_len, &enc_name, &enc_name_len) == FAILURE) {
3043 		return;
3044 	}
3045 
3046 	if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
3047 			php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
3048 			return;
3049 	}
3050 
3051 	string.len = (uint32_t)string_len;
3052 
3053 	if (enc_name != NULL) {
3054 		string.no_encoding = mbfl_name2no_encoding(enc_name);
3055 		if (string.no_encoding == mbfl_no_encoding_invalid) {
3056 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
3057 			RETURN_FALSE;
3058 		}
3059 	}
3060 
3061 	n = mbfl_strwidth(&string);
3062 	if (n >= 0) {
3063 		RETVAL_LONG(n);
3064 	} else {
3065 		RETVAL_FALSE;
3066 	}
3067 }
3068 /* }}} */
3069 
3070 /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
3071    Trim the string in terminal width */
PHP_FUNCTION(mb_strimwidth)3072 PHP_FUNCTION(mb_strimwidth)
3073 {
3074 	char *str, *trimmarker = NULL, *encoding = NULL;
3075 	zend_long from, width, swidth;
3076 	size_t str_len, trimmarker_len, encoding_len;
3077 	mbfl_string string, result, marker, *ret;
3078 
3079 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
3080 		return;
3081 	}
3082 
3083 	mbfl_string_init(&string);
3084 	mbfl_string_init(&marker);
3085 	string.no_language = MBSTRG(language);
3086 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3087 	marker.no_language = MBSTRG(language);
3088 	marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3089 	marker.val = NULL;
3090 	marker.len = 0;
3091 
3092 	if (encoding) {
3093 		string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
3094 		if (string.no_encoding == mbfl_no_encoding_invalid) {
3095 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
3096 			RETURN_FALSE;
3097 		}
3098 	}
3099 
3100 	string.val = (unsigned char *)str;
3101 	string.len = str_len;
3102 
3103 	if ((from < 0) || (width < 0)) {
3104 		swidth = mbfl_strwidth(&string);
3105 	}
3106 
3107 	if (from < 0) {
3108 		from += swidth;
3109 	}
3110 
3111 	if (from < 0 || (size_t)from > str_len) {
3112 		php_error_docref(NULL, E_WARNING, "Start position is out of range");
3113 		RETURN_FALSE;
3114 	}
3115 
3116 	if (width < 0) {
3117 		width = swidth + width - from;
3118 	}
3119 
3120 	if (width < 0) {
3121 		php_error_docref(NULL, E_WARNING, "Width is out of range");
3122 		RETURN_FALSE;
3123 	}
3124 
3125 	if (trimmarker) {
3126 		marker.val = (unsigned char *)trimmarker;
3127 		marker.len = trimmarker_len;
3128 	}
3129 
3130 	ret = mbfl_strimwidth(&string, &marker, &result, from, width);
3131 
3132 	if (ret == NULL) {
3133 		RETURN_FALSE;
3134 	}
3135 	// TODO: avoid reallocation ???
3136 	RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3137 	efree(ret->val);
3138 }
3139 /* }}} */
3140 
3141 /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
php_mb_convert_encoding(const char * input,size_t length,const char * _to_encoding,const char * _from_encodings,size_t * output_len)3142 MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
3143 {
3144 	mbfl_string string, result, *ret;
3145 	const mbfl_encoding *from_encoding, *to_encoding;
3146 	mbfl_buffer_converter *convd;
3147 	size_t size;
3148 	const mbfl_encoding **list;
3149 	char *output=NULL;
3150 
3151 	if (output_len) {
3152 		*output_len = 0;
3153 	}
3154 	if (!input) {
3155 		return NULL;
3156 	}
3157 	/* new encoding */
3158 	if (_to_encoding && strlen(_to_encoding)) {
3159 		to_encoding = mbfl_name2encoding(_to_encoding);
3160 		if (!to_encoding) {
3161 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
3162 			return NULL;
3163 		}
3164 	} else {
3165 		to_encoding = MBSTRG(current_internal_encoding);
3166 	}
3167 
3168 	/* initialize string */
3169 	mbfl_string_init(&string);
3170 	mbfl_string_init(&result);
3171 	from_encoding = MBSTRG(current_internal_encoding);
3172 	string.no_encoding = from_encoding->no_encoding;
3173 	string.no_language = MBSTRG(language);
3174 	string.val = (unsigned char *)input;
3175 	string.len = length;
3176 
3177 	/* pre-conversion encoding */
3178 	if (_from_encodings) {
3179 		list = NULL;
3180 		size = 0;
3181 		php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0);
3182 		if (size == 1) {
3183 			from_encoding = *list;
3184 			string.no_encoding = from_encoding->no_encoding;
3185 		} else if (size > 1) {
3186 			/* auto detect */
3187 			from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection));
3188 			if (from_encoding) {
3189 				string.no_encoding = from_encoding->no_encoding;
3190 			} else {
3191 				php_error_docref(NULL, E_WARNING, "Unable to detect character encoding");
3192 				from_encoding = &mbfl_encoding_pass;
3193 				to_encoding = from_encoding;
3194 				string.no_encoding = from_encoding->no_encoding;
3195 			}
3196 		} else {
3197 			php_error_docref(NULL, E_WARNING, "Illegal character encoding specified");
3198 		}
3199 		if (list != NULL) {
3200 			efree((void *)list);
3201 		}
3202 	}
3203 
3204 	/* initialize converter */
3205 	convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len);
3206 	if (convd == NULL) {
3207 		php_error_docref(NULL, E_WARNING, "Unable to create character encoding converter");
3208 		return NULL;
3209 	}
3210 	mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3211 	mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3212 
3213 	/* do it */
3214 	ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3215 	if (ret) {
3216 		if (output_len) {
3217 			*output_len = ret->len;
3218 		}
3219 		output = (char *)ret->val;
3220 	}
3221 
3222 	MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3223 	mbfl_buffer_converter_delete(convd);
3224 	return output;
3225 }
3226 /* }}} */
3227 
3228 /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
3229    Returns converted string in desired encoding */
PHP_FUNCTION(mb_convert_encoding)3230 PHP_FUNCTION(mb_convert_encoding)
3231 {
3232 	char *arg_str, *arg_new;
3233 	size_t str_len, new_len;
3234 	zval *arg_old = NULL;
3235 	size_t size, l, n;
3236 	char *_from_encodings = NULL, *ret, *s_free = NULL;
3237 
3238 	zval *hash_entry;
3239 	HashTable *target_hash;
3240 
3241 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
3242 		return;
3243 	}
3244 
3245 	if (arg_old) {
3246 		switch (Z_TYPE_P(arg_old)) {
3247 			case IS_ARRAY:
3248 				target_hash = Z_ARRVAL_P(arg_old);
3249 				_from_encodings = NULL;
3250 
3251 				ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
3252 
3253 					convert_to_string_ex(hash_entry);
3254 
3255 					if ( _from_encodings) {
3256 						l = strlen(_from_encodings);
3257 						n = strlen(Z_STRVAL_P(hash_entry));
3258 						_from_encodings = erealloc(_from_encodings, l+n+2);
3259 						memcpy(_from_encodings + l, ",", 1);
3260 						memcpy(_from_encodings + l + 1, Z_STRVAL_P(hash_entry), Z_STRLEN_P(hash_entry) + 1);
3261 					} else {
3262 						_from_encodings = estrdup(Z_STRVAL_P(hash_entry));
3263 					}
3264 				} ZEND_HASH_FOREACH_END();
3265 
3266 				if (_from_encodings != NULL && !strlen(_from_encodings)) {
3267 					efree(_from_encodings);
3268 					_from_encodings = NULL;
3269 				}
3270 				s_free = _from_encodings;
3271 				break;
3272 			default:
3273 				convert_to_string(arg_old);
3274 				_from_encodings = Z_STRVAL_P(arg_old);
3275 				break;
3276 			}
3277 	}
3278 
3279 	/* new encoding */
3280 	ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size);
3281 	if (ret != NULL) {
3282 		// TODO: avoid reallocation ???
3283 		RETVAL_STRINGL(ret, size);		/* the string is already strdup()'ed */
3284 		efree(ret);
3285 	} else {
3286 		RETVAL_FALSE;
3287 	}
3288 
3289 	if ( s_free) {
3290 		efree(s_free);
3291 	}
3292 }
3293 /* }}} */
3294 
3295 /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
3296    Returns a case-folded version of sourcestring */
PHP_FUNCTION(mb_convert_case)3297 PHP_FUNCTION(mb_convert_case)
3298 {
3299 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3300 	char *str;
3301 	size_t str_len, from_encoding_len;
3302 	zend_long case_mode = 0;
3303 	char *newstr;
3304 	size_t ret_len;
3305 
3306 	RETVAL_FALSE;
3307 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|s!", &str, &str_len,
3308 				&case_mode, &from_encoding, &from_encoding_len) == FAILURE) {
3309 		return;
3310 	}
3311 
3312 	newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding);
3313 
3314 	if (newstr) {
3315 		// TODO: avoid reallocation ???
3316 		RETVAL_STRINGL(newstr, ret_len);
3317 		efree(newstr);
3318 	}
3319 }
3320 /* }}} */
3321 
3322 /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
3323  *  Returns a uppercased version of sourcestring
3324  */
PHP_FUNCTION(mb_strtoupper)3325 PHP_FUNCTION(mb_strtoupper)
3326 {
3327 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3328 	char *str;
3329 	size_t str_len, from_encoding_len;
3330 	char *newstr;
3331 	size_t ret_len;
3332 
3333 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len,
3334 				&from_encoding, &from_encoding_len) == FAILURE) {
3335 		return;
3336 	}
3337 	newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding);
3338 
3339 	if (newstr) {
3340 		// TODO: avoid reallocation ???
3341 		RETVAL_STRINGL(newstr, ret_len);
3342 		efree(newstr);
3343 		return;
3344 	}
3345 	RETURN_FALSE;
3346 }
3347 /* }}} */
3348 
3349 /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
3350  *  Returns a lowercased version of sourcestring
3351  */
PHP_FUNCTION(mb_strtolower)3352 PHP_FUNCTION(mb_strtolower)
3353 {
3354 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3355 	char *str;
3356 	size_t str_len, from_encoding_len;
3357 	char *newstr;
3358 	size_t ret_len;
3359 
3360 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len,
3361 				&from_encoding, &from_encoding_len) == FAILURE) {
3362 		return;
3363 	}
3364 	newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding);
3365 
3366 	if (newstr) {
3367 		// TODO: avoid reallocation ???
3368 		RETVAL_STRINGL(newstr, ret_len);
3369 		efree(newstr);
3370 		return;
3371 	}
3372 	RETURN_FALSE;
3373 }
3374 /* }}} */
3375 
3376 /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3377    Encodings of the given string is returned (as a string) */
PHP_FUNCTION(mb_detect_encoding)3378 PHP_FUNCTION(mb_detect_encoding)
3379 {
3380 	char *str;
3381 	size_t str_len;
3382 	zend_bool strict=0;
3383 	zval *encoding_list = NULL;
3384 
3385 	mbfl_string string;
3386 	const mbfl_encoding *ret;
3387 	const mbfl_encoding **elist, **list;
3388 	size_t size;
3389 
3390 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z!b", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3391 		return;
3392 	}
3393 
3394 	/* make encoding list */
3395 	list = NULL;
3396 	size = 0;
3397 	if (encoding_list) {
3398 		switch (Z_TYPE_P(encoding_list)) {
3399 		case IS_ARRAY:
3400 			if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0)) {
3401 				if (list) {
3402 					efree(list);
3403 					list = NULL;
3404 					size = 0;
3405 				}
3406 			}
3407 			break;
3408 		default:
3409 			convert_to_string(encoding_list);
3410 			if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0)) {
3411 				if (list) {
3412 					efree(list);
3413 					list = NULL;
3414 					size = 0;
3415 				}
3416 			}
3417 			break;
3418 		}
3419 		if (size <= 0) {
3420 			php_error_docref(NULL, E_WARNING, "Illegal argument");
3421 		}
3422 	}
3423 
3424 	if (ZEND_NUM_ARGS() < 3) {
3425 		strict = (zend_bool)MBSTRG(strict_detection);
3426 	}
3427 
3428 	if (size > 0 && list != NULL) {
3429 		elist = list;
3430 	} else {
3431 		elist = MBSTRG(current_detect_order_list);
3432 		size = MBSTRG(current_detect_order_list_size);
3433 	}
3434 
3435 	mbfl_string_init(&string);
3436 	string.no_language = MBSTRG(language);
3437 	string.val = (unsigned char *)str;
3438 	string.len = str_len;
3439 	ret = mbfl_identify_encoding2(&string, elist, size, strict);
3440 
3441 	if (list != NULL) {
3442 		efree((void *)list);
3443 	}
3444 
3445 	if (ret == NULL) {
3446 		RETURN_FALSE;
3447 	}
3448 
3449 	RETVAL_STRING((char *)ret->name);
3450 }
3451 /* }}} */
3452 
3453 /* {{{ proto mixed mb_list_encodings()
3454    Returns an array of all supported entity encodings */
PHP_FUNCTION(mb_list_encodings)3455 PHP_FUNCTION(mb_list_encodings)
3456 {
3457 	const mbfl_encoding **encodings;
3458 	const mbfl_encoding *encoding;
3459 	int i;
3460 
3461 	if (zend_parse_parameters_none() == FAILURE) {
3462 		return;
3463 	}
3464 
3465 	array_init(return_value);
3466 	i = 0;
3467 	encodings = mbfl_get_supported_encodings();
3468 	while ((encoding = encodings[i++]) != NULL) {
3469 		add_next_index_string(return_value, (char *) encoding->name);
3470 	}
3471 }
3472 /* }}} */
3473 
3474 /* {{{ proto array mb_encoding_aliases(string encoding)
3475    Returns an array of the aliases of a given encoding name */
PHP_FUNCTION(mb_encoding_aliases)3476 PHP_FUNCTION(mb_encoding_aliases)
3477 {
3478 	const mbfl_encoding *encoding;
3479 	char *name = NULL;
3480 	size_t name_len;
3481 
3482 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
3483 		return;
3484 	}
3485 
3486 	encoding = mbfl_name2encoding(name);
3487 	if (!encoding) {
3488 		php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
3489 		RETURN_FALSE;
3490 	}
3491 
3492 	array_init(return_value);
3493 	if (encoding->aliases != NULL) {
3494 		const char **alias;
3495 		for (alias = *encoding->aliases; *alias; ++alias) {
3496 			add_next_index_string(return_value, (char *)*alias);
3497 		}
3498 	}
3499 }
3500 /* }}} */
3501 
3502 /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3503    Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
PHP_FUNCTION(mb_encode_mimeheader)3504 PHP_FUNCTION(mb_encode_mimeheader)
3505 {
3506 	enum mbfl_no_encoding charset, transenc;
3507 	mbfl_string  string, result, *ret;
3508 	char *charset_name = NULL;
3509 	size_t charset_name_len;
3510 	char *trans_enc_name = NULL;
3511 	size_t trans_enc_name_len;
3512 	char *linefeed = "\r\n";
3513 	size_t linefeed_len, string_len;
3514 	zend_long indent = 0;
3515 
3516 	mbfl_string_init(&string);
3517 	string.no_language = MBSTRG(language);
3518 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3519 
3520 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|sssl", (char **)&string.val, &string_len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3521 		return;
3522 	}
3523 
3524 	if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
3525 			php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
3526 			return;
3527 	}
3528 
3529 	string.len = (uint32_t)string_len;
3530 
3531 	charset = mbfl_no_encoding_pass;
3532 	transenc = mbfl_no_encoding_base64;
3533 
3534 	if (charset_name != NULL) {
3535 		charset = mbfl_name2no_encoding(charset_name);
3536 		if (charset == mbfl_no_encoding_invalid) {
3537 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3538 			RETURN_FALSE;
3539 		}
3540 	} else {
3541 		const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3542 		if (lang != NULL) {
3543 			charset = lang->mail_charset;
3544 			transenc = lang->mail_header_encoding;
3545 		}
3546 	}
3547 
3548 	if (trans_enc_name != NULL) {
3549 		if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3550 			transenc = mbfl_no_encoding_base64;
3551 		} else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3552 			transenc = mbfl_no_encoding_qprint;
3553 		}
3554 	}
3555 
3556 	mbfl_string_init(&result);
3557 	ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3558 	if (ret != NULL) {
3559 		// TODO: avoid reallocation ???
3560 		RETVAL_STRINGL((char *)ret->val, ret->len);	/* the string is already strdup()'ed */
3561 		efree(ret->val);
3562 	} else {
3563 		RETVAL_FALSE;
3564 	}
3565 }
3566 /* }}} */
3567 
3568 /* {{{ proto string mb_decode_mimeheader(string string)
3569    Decodes the MIME "encoded-word" in the string */
PHP_FUNCTION(mb_decode_mimeheader)3570 PHP_FUNCTION(mb_decode_mimeheader)
3571 {
3572 	mbfl_string string, result, *ret;
3573 	size_t string_len;
3574 
3575 	mbfl_string_init(&string);
3576 	string.no_language = MBSTRG(language);
3577 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3578 
3579 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", (char **)&string.val, &string_len) == FAILURE) {
3580 		return;
3581 	}
3582 
3583 	if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
3584 			php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
3585 			return;
3586 	}
3587 
3588 	string.len = (uint32_t)string_len;
3589 
3590 	mbfl_string_init(&result);
3591 	ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding);
3592 	if (ret != NULL) {
3593 		// TODO: avoid reallocation ???
3594 		RETVAL_STRINGL((char *)ret->val, ret->len);	/* the string is already strdup()'ed */
3595 		efree(ret->val);
3596 	} else {
3597 		RETVAL_FALSE;
3598 	}
3599 }
3600 /* }}} */
3601 
3602 /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3603    Conversion between full-width character and half-width character (Japanese) */
PHP_FUNCTION(mb_convert_kana)3604 PHP_FUNCTION(mb_convert_kana)
3605 {
3606 	int opt, i;
3607 	mbfl_string string, result, *ret;
3608 	char *optstr = NULL;
3609 	size_t optstr_len;
3610 	char *encname = NULL;
3611 	size_t encname_len, string_len;
3612 
3613 	mbfl_string_init(&string);
3614 	string.no_language = MBSTRG(language);
3615 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3616 
3617 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|ss", (char **)&string.val, &string_len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
3618 		return;
3619 	}
3620 
3621 	if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
3622 			php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
3623 			return;
3624 	}
3625 
3626 	string.len = (uint32_t)string_len;
3627 
3628 	/* option */
3629 	if (optstr != NULL) {
3630 		char *p = optstr;
3631 		int n = optstr_len;
3632 		i = 0;
3633 		opt = 0;
3634 		while (i < n) {
3635 			i++;
3636 			switch (*p++) {
3637 			case 'A':
3638 				opt |= 0x1;
3639 				break;
3640 			case 'a':
3641 				opt |= 0x10;
3642 				break;
3643 			case 'R':
3644 				opt |= 0x2;
3645 				break;
3646 			case 'r':
3647 				opt |= 0x20;
3648 				break;
3649 			case 'N':
3650 				opt |= 0x4;
3651 				break;
3652 			case 'n':
3653 				opt |= 0x40;
3654 				break;
3655 			case 'S':
3656 				opt |= 0x8;
3657 				break;
3658 			case 's':
3659 				opt |= 0x80;
3660 				break;
3661 			case 'K':
3662 				opt |= 0x100;
3663 				break;
3664 			case 'k':
3665 				opt |= 0x1000;
3666 				break;
3667 			case 'H':
3668 				opt |= 0x200;
3669 				break;
3670 			case 'h':
3671 				opt |= 0x2000;
3672 				break;
3673 			case 'V':
3674 				opt |= 0x800;
3675 				break;
3676 			case 'C':
3677 				opt |= 0x10000;
3678 				break;
3679 			case 'c':
3680 				opt |= 0x20000;
3681 				break;
3682 			case 'M':
3683 				opt |= 0x100000;
3684 				break;
3685 			case 'm':
3686 				opt |= 0x200000;
3687 				break;
3688 			}
3689 		}
3690 	} else {
3691 		opt = 0x900;
3692 	}
3693 
3694 	/* encoding */
3695 	if (encname != NULL) {
3696 		string.no_encoding = mbfl_name2no_encoding(encname);
3697 		if (string.no_encoding == mbfl_no_encoding_invalid) {
3698 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encname);
3699 			RETURN_FALSE;
3700 		}
3701 	}
3702 
3703 	ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3704 	if (ret != NULL) {
3705 		// TODO: avoid reallocation ???
3706 		RETVAL_STRINGL((char *)ret->val, ret->len);		/* the string is already strdup()'ed */
3707 		efree(ret->val);
3708 	} else {
3709 		RETVAL_FALSE;
3710 	}
3711 }
3712 /* }}} */
3713 
3714 #define PHP_MBSTR_STACK_BLOCK_SIZE 32
3715 
3716 /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3717    Converts the string resource in variables to desired encoding */
PHP_FUNCTION(mb_convert_variables)3718 PHP_FUNCTION(mb_convert_variables)
3719 {
3720 	zval *args, *stack, *var, *hash_entry, *hash_entry_ptr, *zfrom_enc;
3721 	HashTable *target_hash;
3722 	mbfl_string string, result, *ret;
3723 	const mbfl_encoding *from_encoding, *to_encoding;
3724 	mbfl_encoding_detector *identd;
3725 	mbfl_buffer_converter *convd;
3726 	int n, argc, stack_level, stack_max;
3727 	size_t to_enc_len;
3728 	size_t elistsz;
3729 	const mbfl_encoding **elist;
3730 	char *to_enc;
3731 	void *ptmp;
3732 	int recursion_error = 0;
3733 
3734 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3735 		return;
3736 	}
3737 
3738 	/* new encoding */
3739 	to_encoding = mbfl_name2encoding(to_enc);
3740 	if (!to_encoding) {
3741 		php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3742 		RETURN_FALSE;
3743 	}
3744 
3745 	/* initialize string */
3746 	mbfl_string_init(&string);
3747 	mbfl_string_init(&result);
3748 	from_encoding = MBSTRG(current_internal_encoding);
3749 	string.no_encoding = from_encoding->no_encoding;
3750 	string.no_language = MBSTRG(language);
3751 
3752 	/* pre-conversion encoding */
3753 	elist = NULL;
3754 	elistsz = 0;
3755 	switch (Z_TYPE_P(zfrom_enc)) {
3756 		case IS_ARRAY:
3757 			php_mb_parse_encoding_array(zfrom_enc, &elist, &elistsz, 0);
3758 			break;
3759 		default:
3760 			convert_to_string_ex(zfrom_enc);
3761 			php_mb_parse_encoding_list(Z_STRVAL_P(zfrom_enc), Z_STRLEN_P(zfrom_enc), &elist, &elistsz, 0);
3762 			break;
3763 	}
3764 
3765 	if (elistsz <= 0) {
3766 		from_encoding = &mbfl_encoding_pass;
3767 	} else if (elistsz == 1) {
3768 		from_encoding = *elist;
3769 	} else {
3770 		/* auto detect */
3771 		from_encoding = NULL;
3772 		stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3773 		stack = (zval *)safe_emalloc(stack_max, sizeof(zval), 0);
3774 		stack_level = 0;
3775 		identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection));
3776 		if (identd != NULL) {
3777 			n = 0;
3778 			while (n < argc || stack_level > 0) {
3779 				if (stack_level <= 0) {
3780 					var = &args[n++];
3781 					ZVAL_DEREF(var);
3782 					SEPARATE_ZVAL_NOREF(var);
3783 					if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3784 						target_hash = HASH_OF(var);
3785 						if (target_hash != NULL) {
3786 							zend_hash_internal_pointer_reset(target_hash);
3787 						}
3788 					}
3789 				} else {
3790 					stack_level--;
3791 					var = &stack[stack_level];
3792 				}
3793 				if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3794 					target_hash = HASH_OF(var);
3795 					if (target_hash != NULL) {
3796 						while ((hash_entry = zend_hash_get_current_data(target_hash)) != NULL) {
3797 							if (!Z_IMMUTABLE_P(var)) {
3798 								if (++target_hash->u.v.nApplyCount > 1) {
3799 									--target_hash->u.v.nApplyCount;
3800 									recursion_error = 1;
3801 									goto detect_end;
3802 								}
3803 							}
3804 							zend_hash_move_forward(target_hash);
3805 							if (Z_TYPE_P(hash_entry) == IS_INDIRECT) {
3806 								hash_entry = Z_INDIRECT_P(hash_entry);
3807 							}
3808 							ZVAL_DEREF(hash_entry);
3809 							if (Z_TYPE_P(hash_entry) == IS_ARRAY || Z_TYPE_P(hash_entry) == IS_OBJECT) {
3810 								if (stack_level >= stack_max) {
3811 									stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3812 									ptmp = erealloc(stack, sizeof(zval) * stack_max);
3813 									stack = (zval *)ptmp;
3814 								}
3815 								ZVAL_COPY_VALUE(&stack[stack_level], var);
3816 								stack_level++;
3817 								var = hash_entry;
3818 								target_hash = HASH_OF(var);
3819 								if (target_hash != NULL) {
3820 									zend_hash_internal_pointer_reset(target_hash);
3821 									continue;
3822 								}
3823 							} else if (Z_TYPE_P(hash_entry) == IS_STRING) {
3824 								string.val = (unsigned char *)Z_STRVAL_P(hash_entry);
3825 								string.len = Z_STRLEN_P(hash_entry);
3826 								if (mbfl_encoding_detector_feed(identd, &string)) {
3827 									goto detect_end;		/* complete detecting */
3828 								}
3829 							}
3830 						}
3831 					}
3832 				} else if (Z_TYPE_P(var) == IS_STRING) {
3833 					string.val = (unsigned char *)Z_STRVAL_P(var);
3834 					string.len = Z_STRLEN_P(var);
3835 					if (mbfl_encoding_detector_feed(identd, &string)) {
3836 						goto detect_end;		/* complete detecting */
3837 					}
3838 				}
3839 			}
3840 detect_end:
3841 			from_encoding = mbfl_encoding_detector_judge2(identd);
3842 			mbfl_encoding_detector_delete(identd);
3843 		}
3844 		if (recursion_error) {
3845 			while(stack_level-- && (var = &stack[stack_level])) {
3846 				if (!Z_IMMUTABLE_P(var)) {
3847 					if (HASH_OF(var)->u.v.nApplyCount > 1) {
3848 						HASH_OF(var)->u.v.nApplyCount--;
3849 					}
3850 				}
3851 			}
3852 			efree(stack);
3853 			if (elist != NULL) {
3854 				efree((void *)elist);
3855 			}
3856 			php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
3857 			RETURN_FALSE;
3858 		}
3859 		efree(stack);
3860 
3861 		if (!from_encoding) {
3862 			php_error_docref(NULL, E_WARNING, "Unable to detect encoding");
3863 			from_encoding = &mbfl_encoding_pass;
3864 		}
3865 	}
3866 	if (elist != NULL) {
3867 		efree((void *)elist);
3868 	}
3869 	/* create converter */
3870 	convd = NULL;
3871 	if (from_encoding != &mbfl_encoding_pass) {
3872 		convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0);
3873 		if (convd == NULL) {
3874 			php_error_docref(NULL, E_WARNING, "Unable to create converter");
3875 			RETURN_FALSE;
3876 		}
3877 		mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3878 		mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3879 	}
3880 
3881 	/* convert */
3882 	if (convd != NULL) {
3883 		stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3884 		stack = (zval*)safe_emalloc(stack_max, sizeof(zval), 0);
3885 		stack_level = 0;
3886 		n = 0;
3887 		while (n < argc || stack_level > 0) {
3888 			if (stack_level <= 0) {
3889 				var = &args[n++];
3890 				ZVAL_DEREF(var);
3891 				SEPARATE_ZVAL_NOREF(var);
3892 				if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3893 					target_hash = HASH_OF(var);
3894 					if (target_hash != NULL) {
3895 						zend_hash_internal_pointer_reset(target_hash);
3896 					}
3897 				}
3898 			} else {
3899 				stack_level--;
3900 				var = &stack[stack_level];
3901 			}
3902 			if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3903 				target_hash = HASH_OF(var);
3904 				if (target_hash != NULL) {
3905 					while ((hash_entry_ptr = zend_hash_get_current_data(target_hash)) != NULL) {
3906 						zend_hash_move_forward(target_hash);
3907 						if (Z_TYPE_P(hash_entry_ptr) == IS_INDIRECT) {
3908 							hash_entry_ptr = Z_INDIRECT_P(hash_entry_ptr);
3909 						}
3910 						hash_entry = hash_entry_ptr;
3911 						ZVAL_DEREF(hash_entry);
3912 						if (Z_TYPE_P(hash_entry) == IS_ARRAY || Z_TYPE_P(hash_entry) == IS_OBJECT) {
3913 							if (!Z_IMMUTABLE_P(hash_entry)) {
3914 								if (++(HASH_OF(hash_entry)->u.v.nApplyCount) > 1) {
3915 									--(HASH_OF(hash_entry)->u.v.nApplyCount);
3916 									recursion_error = 1;
3917 									goto conv_end;
3918 								}
3919 							}
3920 							if (stack_level >= stack_max) {
3921 								stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3922 								ptmp = erealloc(stack, sizeof(zval) * stack_max);
3923 								stack = (zval *)ptmp;
3924 							}
3925 							ZVAL_COPY_VALUE(&stack[stack_level], var);
3926 							stack_level++;
3927 							var = hash_entry;
3928 							SEPARATE_ZVAL(hash_entry);
3929 							target_hash = HASH_OF(var);
3930 							if (target_hash != NULL) {
3931 								zend_hash_internal_pointer_reset(target_hash);
3932 								continue;
3933 							}
3934 						} else if (Z_TYPE_P(hash_entry) == IS_STRING) {
3935 							string.val = (unsigned char *)Z_STRVAL_P(hash_entry);
3936 							string.len = Z_STRLEN_P(hash_entry);
3937 							ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3938 							if (ret != NULL) {
3939 								zval_ptr_dtor(hash_entry_ptr);
3940 								// TODO: avoid reallocation ???
3941 								ZVAL_STRINGL(hash_entry_ptr, (char *)ret->val, ret->len);
3942 								efree(ret->val);
3943 							}
3944 						}
3945 					}
3946 				}
3947 			} else if (Z_TYPE_P(var) == IS_STRING) {
3948 				string.val = (unsigned char *)Z_STRVAL_P(var);
3949 				string.len = Z_STRLEN_P(var);
3950 				ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3951 				if (ret != NULL) {
3952 					zval_ptr_dtor(var);
3953 					// TODO: avoid reallocation ???
3954 					ZVAL_STRINGL(var, (char *)ret->val, ret->len);
3955 					efree(ret->val);
3956 				}
3957 			}
3958 		}
3959 
3960 conv_end:
3961 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3962 		mbfl_buffer_converter_delete(convd);
3963 
3964 		if (recursion_error) {
3965 			while(stack_level-- && (var = &stack[stack_level])) {
3966 				if (!Z_IMMUTABLE_P(var)) {
3967 					if (HASH_OF(var)->u.v.nApplyCount > 1) {
3968 						HASH_OF(var)->u.v.nApplyCount--;
3969 					}
3970 				}
3971 			}
3972 			efree(stack);
3973 			php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
3974 			RETURN_FALSE;
3975 		}
3976 		efree(stack);
3977 	}
3978 
3979 	if (from_encoding) {
3980 		RETURN_STRING(from_encoding->name);
3981 	} else {
3982 		RETURN_FALSE;
3983 	}
3984 }
3985 /* }}} */
3986 
3987 /* {{{ HTML numeric entity */
3988 /* {{{ static void php_mb_numericentity_exec() */
3989 static void
php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS,int type)3990 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
3991 {
3992 	char *str, *encoding = NULL;
3993 	size_t str_len, encoding_len;
3994 	zval *zconvmap, *hash_entry;
3995 	HashTable *target_hash;
3996 	int i, *convmap, *mapelm, mapsize=0;
3997 	zend_bool is_hex = 0;
3998 	mbfl_string string, result, *ret;
3999 	enum mbfl_no_encoding no_encoding;
4000 
4001 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
4002 		return;
4003 	}
4004 
4005 	mbfl_string_init(&string);
4006 	string.no_language = MBSTRG(language);
4007 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4008 	string.val = (unsigned char *)str;
4009 	string.len = str_len;
4010 
4011 	/* encoding */
4012 	if (encoding && encoding_len > 0) {
4013 		no_encoding = mbfl_name2no_encoding(encoding);
4014 		if (no_encoding == mbfl_no_encoding_invalid) {
4015 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
4016 			RETURN_FALSE;
4017 		} else {
4018 			string.no_encoding = no_encoding;
4019 		}
4020 	}
4021 
4022 	if (type == 0 && is_hex) {
4023 		type = 2; /* output in hex format */
4024 	}
4025 
4026 	/* conversion map */
4027 	convmap = NULL;
4028 	if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
4029 		target_hash = Z_ARRVAL_P(zconvmap);
4030 		i = zend_hash_num_elements(target_hash);
4031 		if (i > 0) {
4032 			convmap = (int *)safe_emalloc(i, sizeof(int), 0);
4033 			mapelm = convmap;
4034 			mapsize = 0;
4035 			ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
4036 				convert_to_long_ex(hash_entry);
4037 				*mapelm++ = Z_LVAL_P(hash_entry);
4038 				mapsize++;
4039 			} ZEND_HASH_FOREACH_END();
4040 		}
4041 	}
4042 	if (convmap == NULL) {
4043 		RETURN_FALSE;
4044 	}
4045 	mapsize /= 4;
4046 
4047 	ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
4048 	if (ret != NULL) {
4049 		// TODO: avoid reallocation ???
4050 		RETVAL_STRINGL((char *)ret->val, ret->len);
4051 		efree(ret->val);
4052 	} else {
4053 		RETVAL_FALSE;
4054 	}
4055 	efree((void *)convmap);
4056 }
4057 /* }}} */
4058 
4059 /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
4060    Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)4061 PHP_FUNCTION(mb_encode_numericentity)
4062 {
4063 	php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
4064 }
4065 /* }}} */
4066 
4067 /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
4068    Converts HTML numeric entities to character code */
PHP_FUNCTION(mb_decode_numericentity)4069 PHP_FUNCTION(mb_decode_numericentity)
4070 {
4071 	php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
4072 }
4073 /* }}} */
4074 /* }}} */
4075 
4076 /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
4077  *  Sends an email message with MIME scheme
4078  */
4079 
4080 #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos)										\
4081 	if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) {	\
4082 		pos += 2;											\
4083 		while (str[pos + 1] == ' ' || str[pos + 1] == '\t') {							\
4084 			pos++;											\
4085 		}												\
4086 		continue;											\
4087 	}
4088 
4089 #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len)			\
4090 	pp = str;					\
4091 	ee = pp + len;					\
4092 	while ((pp = memchr(pp, '\0', (ee - pp)))) {	\
4093 		*pp = ' ';				\
4094 	}						\
4095 
_php_mbstr_parse_mail_headers(HashTable * ht,const char * str,size_t str_len)4096 static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
4097 {
4098 	const char *ps;
4099 	size_t icnt;
4100 	int state = 0;
4101 	int crlf_state = -1;
4102 	char *token = NULL;
4103 	size_t token_pos = 0;
4104 	zend_string *fld_name, *fld_val;
4105 
4106 	ps = str;
4107 	icnt = str_len;
4108 	fld_name = fld_val = NULL;
4109 
4110 	/*
4111 	 *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
4112 	 *             ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
4113 	 *      state  0            1           2          3
4114 	 *
4115 	 *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
4116 	 *             ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
4117 	 * crlf_state -1                       0                     1 -1
4118 	 *
4119 	 */
4120 
4121 	while (icnt > 0) {
4122 		switch (*ps) {
4123 			case ':':
4124 				if (crlf_state == 1) {
4125 					token_pos++;
4126 				}
4127 
4128 				if (state == 0 || state == 1) {
4129 					if(token && token_pos > 0) {
4130 						fld_name = zend_string_init(token, token_pos, 0);
4131 					}
4132 					state = 2;
4133 				} else {
4134 					token_pos++;
4135 				}
4136 
4137 				crlf_state = 0;
4138 				break;
4139 
4140 			case '\n':
4141 				if (crlf_state == -1) {
4142 					goto out;
4143 				}
4144 				crlf_state = -1;
4145 				break;
4146 
4147 			case '\r':
4148 				if (crlf_state == 1) {
4149 					token_pos++;
4150 				} else {
4151 					crlf_state = 1;
4152 				}
4153 				break;
4154 
4155 			case ' ': case '\t':
4156 				if (crlf_state == -1) {
4157 					if (state == 3) {
4158 						/* continuing from the previous line */
4159 						state = 4;
4160 					} else {
4161 						/* simply skipping this new line */
4162 						state = 5;
4163 					}
4164 				} else {
4165 					if (crlf_state == 1) {
4166 						token_pos++;
4167 					}
4168 					if (state == 1 || state == 3) {
4169 						token_pos++;
4170 					}
4171 				}
4172 				crlf_state = 0;
4173 				break;
4174 
4175 			default:
4176 				switch (state) {
4177 					case 0:
4178 						token = (char*)ps;
4179 						token_pos = 0;
4180 						state = 1;
4181 						break;
4182 
4183 					case 2:
4184 						if (crlf_state != -1) {
4185 							token = (char*)ps;
4186 							token_pos = 0;
4187 
4188 							state = 3;
4189 							break;
4190 						}
4191 						/* break is missing intentionally */
4192 
4193 					case 3:
4194 						if (crlf_state == -1) {
4195 							if(token && token_pos > 0) {
4196 								fld_val = zend_string_init(token, token_pos, 0);
4197 							}
4198 
4199 							if (fld_name != NULL && fld_val != NULL) {
4200 								zval val;
4201 								/* FIXME: some locale free implementation is
4202 								 * really required here,,, */
4203 								php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
4204 								ZVAL_STR(&val, fld_val);
4205 
4206 								zend_hash_update(ht, fld_name, &val);
4207 
4208 								zend_string_release(fld_name);
4209 							}
4210 
4211 							fld_name = fld_val = NULL;
4212 							token = (char*)ps;
4213 							token_pos = 0;
4214 
4215 							state = 1;
4216 						}
4217 						break;
4218 
4219 					case 4:
4220 						token_pos++;
4221 						state = 3;
4222 						break;
4223 				}
4224 
4225 				if (crlf_state == 1) {
4226 					token_pos++;
4227 				}
4228 
4229 				token_pos++;
4230 
4231 				crlf_state = 0;
4232 				break;
4233 		}
4234 		ps++, icnt--;
4235 	}
4236 out:
4237 	if (state == 2) {
4238 		token = "";
4239 		token_pos = 0;
4240 
4241 		state = 3;
4242 	}
4243 	if (state == 3) {
4244 		if(token && token_pos > 0) {
4245 			fld_val = zend_string_init(token, token_pos, 0);
4246 		}
4247 		if (fld_name != NULL && fld_val != NULL) {
4248 			zval val;
4249 			/* FIXME: some locale free implementation is
4250 			 * really required here,,, */
4251 			php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
4252 			ZVAL_STR(&val, fld_val);
4253 
4254 			zend_hash_update(ht, fld_name, &val);
4255 
4256 			zend_string_release(fld_name);
4257 		}
4258 	}
4259 	return state;
4260 }
4261 
PHP_FUNCTION(mb_send_mail)4262 PHP_FUNCTION(mb_send_mail)
4263 {
4264 	int n;
4265 	char *to = NULL;
4266 	size_t to_len;
4267 	char *message = NULL;
4268 	size_t message_len;
4269 	char *headers = NULL;
4270 	size_t headers_len;
4271 	char *subject = NULL;
4272 	zend_string *extra_cmd = NULL;
4273 	size_t subject_len;
4274 	int i;
4275 	char *to_r = NULL;
4276 	char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
4277 	struct {
4278 		int cnt_type:1;
4279 		int cnt_trans_enc:1;
4280 	} suppressed_hdrs = { 0, 0 };
4281 
4282 	char *message_buf = NULL, *subject_buf = NULL, *p;
4283 	mbfl_string orig_str, conv_str;
4284 	mbfl_string *pstr;	/* pointer to mbfl string for return value */
4285 	enum mbfl_no_encoding
4286 		tran_cs,	/* transfar text charset */
4287 		head_enc,	/* header transfar encoding */
4288 		body_enc;	/* body transfar encoding */
4289 	mbfl_memory_device device;	/* automatic allocateable buffer for additional header */
4290 	const mbfl_language *lang;
4291 	int err = 0;
4292 	HashTable ht_headers;
4293 	zval *s;
4294 	extern void mbfl_memory_device_unput(mbfl_memory_device *device);
4295 	char *pp, *ee;
4296 
4297 	/* initialize */
4298 	mbfl_memory_device_init(&device, 0, 0);
4299 	mbfl_string_init(&orig_str);
4300 	mbfl_string_init(&conv_str);
4301 
4302 	/* character-set, transfer-encoding */
4303 	tran_cs = mbfl_no_encoding_utf8;
4304 	head_enc = mbfl_no_encoding_base64;
4305 	body_enc = mbfl_no_encoding_base64;
4306 	lang = mbfl_no2language(MBSTRG(language));
4307 	if (lang != NULL) {
4308 		tran_cs = lang->mail_charset;
4309 		head_enc = lang->mail_header_encoding;
4310 		body_enc = lang->mail_body_encoding;
4311 	}
4312 
4313 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|sS", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd) == FAILURE) {
4314 		return;
4315 	}
4316 
4317 	/* ASCIIZ check */
4318 	MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
4319 	MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
4320 	MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
4321 	if (headers) {
4322 		MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
4323 	}
4324 	if (extra_cmd) {
4325 		MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(extra_cmd), ZSTR_LEN(extra_cmd));
4326 	}
4327 
4328 	zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0);
4329 
4330 	if (headers != NULL) {
4331 		_php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
4332 	}
4333 
4334 	if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) {
4335 		char *tmp;
4336 		char *param_name;
4337 		char *charset = NULL;
4338 
4339 		ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
4340 		p = strchr(Z_STRVAL_P(s), ';');
4341 
4342 		if (p != NULL) {
4343 			/* skipping the padded spaces */
4344 			do {
4345 				++p;
4346 			} while (*p == ' ' || *p == '\t');
4347 
4348 			if (*p != '\0') {
4349 				if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
4350 					if (strcasecmp(param_name, "charset") == 0) {
4351 						enum mbfl_no_encoding _tran_cs = tran_cs;
4352 
4353 						charset = php_strtok_r(NULL, "= \"", &tmp);
4354 						if (charset != NULL) {
4355 							_tran_cs = mbfl_name2no_encoding(charset);
4356 						}
4357 
4358 						if (_tran_cs == mbfl_no_encoding_invalid) {
4359 							php_error_docref(NULL, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
4360 							_tran_cs = mbfl_no_encoding_ascii;
4361 						}
4362 						tran_cs = _tran_cs;
4363 					}
4364 				}
4365 			}
4366 		}
4367 		suppressed_hdrs.cnt_type = 1;
4368 	}
4369 
4370 	if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1))) {
4371 		enum mbfl_no_encoding _body_enc;
4372 
4373 		ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
4374 		_body_enc = mbfl_name2no_encoding(Z_STRVAL_P(s));
4375 		switch (_body_enc) {
4376 			case mbfl_no_encoding_base64:
4377 			case mbfl_no_encoding_7bit:
4378 			case mbfl_no_encoding_8bit:
4379 				body_enc = _body_enc;
4380 				break;
4381 
4382 			default:
4383 				php_error_docref(NULL, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", Z_STRVAL_P(s));
4384 				body_enc =	mbfl_no_encoding_8bit;
4385 				break;
4386 		}
4387 		suppressed_hdrs.cnt_trans_enc = 1;
4388 	}
4389 
4390 	/* To: */
4391 	if (to != NULL) {
4392 		if (to_len > 0) {
4393 			to_r = estrndup(to, to_len);
4394 			for (; to_len; to_len--) {
4395 				if (!isspace((unsigned char) to_r[to_len - 1])) {
4396 					break;
4397 				}
4398 				to_r[to_len - 1] = '\0';
4399 			}
4400 			for (i = 0; to_r[i]; i++) {
4401 			if (iscntrl((unsigned char) to_r[i])) {
4402 				/* According to RFC 822, section 3.1.1 long headers may be separated into
4403 				 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
4404 				 * To prevent these separators from being replaced with a space, we use the
4405 				 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
4406 				 */
4407 				SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
4408 				to_r[i] = ' ';
4409 			}
4410 			}
4411 		} else {
4412 			to_r = to;
4413 		}
4414 	} else {
4415 		php_error_docref(NULL, E_WARNING, "Missing To: field");
4416 		err = 1;
4417 	}
4418 
4419 	/* Subject: */
4420 	if (subject != NULL) {
4421 		orig_str.no_language = MBSTRG(language);
4422 		orig_str.val = (unsigned char *)subject;
4423 		orig_str.len = subject_len;
4424 		orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4425 		if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4426 			const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4427 			orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4428 		}
4429 		pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4430 		if (pstr != NULL) {
4431 			subject_buf = subject = (char *)pstr->val;
4432 		}
4433 	} else {
4434 		php_error_docref(NULL, E_WARNING, "Missing Subject: field");
4435 		err = 1;
4436 	}
4437 
4438 	/* message body */
4439 	if (message != NULL) {
4440 		orig_str.no_language = MBSTRG(language);
4441 		orig_str.val = (unsigned char *)message;
4442 		orig_str.len = (unsigned int)message_len;
4443 		orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4444 
4445 		if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4446 			const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4447 			orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4448 		}
4449 
4450 		pstr = NULL;
4451 		{
4452 			mbfl_string tmpstr;
4453 
4454 			if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4455 				tmpstr.no_encoding=mbfl_no_encoding_8bit;
4456 				pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4457 				efree(tmpstr.val);
4458 			}
4459 		}
4460 		if (pstr != NULL) {
4461 			message_buf = message = (char *)pstr->val;
4462 		}
4463 	} else {
4464 		/* this is not really an error, so it is allowed. */
4465 		php_error_docref(NULL, E_WARNING, "Empty message body");
4466 		message = NULL;
4467 	}
4468 
4469 	/* other headers */
4470 #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4471 #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4472 #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4473 #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4474 	if (headers != NULL) {
4475 		p = headers;
4476 		n = headers_len;
4477 		mbfl_memory_device_strncat(&device, p, n);
4478 		if (n > 0 && p[n - 1] != '\n') {
4479 			mbfl_memory_device_strncat(&device, "\n", 1);
4480 		}
4481 	}
4482 
4483 	if (!zend_hash_str_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4484 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4485 		mbfl_memory_device_strncat(&device, "\n", 1);
4486 	}
4487 
4488 	if (!suppressed_hdrs.cnt_type) {
4489 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4490 
4491 		p = (char *)mbfl_no2preferred_mime_name(tran_cs);
4492 		if (p != NULL) {
4493 			mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4494 			mbfl_memory_device_strcat(&device, p);
4495 		}
4496 		mbfl_memory_device_strncat(&device, "\n", 1);
4497 	}
4498 	if (!suppressed_hdrs.cnt_trans_enc) {
4499 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4500 		p = (char *)mbfl_no2preferred_mime_name(body_enc);
4501 		if (p == NULL) {
4502 			p = "7bit";
4503 		}
4504 		mbfl_memory_device_strcat(&device, p);
4505 		mbfl_memory_device_strncat(&device, "\n", 1);
4506 	}
4507 
4508 	mbfl_memory_device_unput(&device);
4509 	mbfl_memory_device_output('\0', &device);
4510 	headers = (char *)device.buffer;
4511 
4512 	if (force_extra_parameters) {
4513 		extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4514 	} else if (extra_cmd) {
4515 		extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd));
4516 	}
4517 
4518 	if (!err && php_mail(to_r, subject, message, headers, extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
4519 		RETVAL_TRUE;
4520 	} else {
4521 		RETVAL_FALSE;
4522 	}
4523 
4524 	if (extra_cmd) {
4525 		zend_string_release(extra_cmd);
4526 	}
4527 
4528 	if (to_r != to) {
4529 		efree(to_r);
4530 	}
4531 	if (subject_buf) {
4532 		efree((void *)subject_buf);
4533 	}
4534 	if (message_buf) {
4535 		efree((void *)message_buf);
4536 	}
4537 	mbfl_memory_device_clear(&device);
4538 	zend_hash_destroy(&ht_headers);
4539 }
4540 
4541 #undef SKIP_LONG_HEADER_SEP_MBSTRING
4542 #undef MAIL_ASCIIZ_CHECK_MBSTRING
4543 #undef PHP_MBSTR_MAIL_MIME_HEADER1
4544 #undef PHP_MBSTR_MAIL_MIME_HEADER2
4545 #undef PHP_MBSTR_MAIL_MIME_HEADER3
4546 #undef PHP_MBSTR_MAIL_MIME_HEADER4
4547 /* }}} */
4548 
4549 /* {{{ proto mixed mb_get_info([string type])
4550    Returns the current settings of mbstring */
PHP_FUNCTION(mb_get_info)4551 PHP_FUNCTION(mb_get_info)
4552 {
4553 	char *typ = NULL;
4554 	size_t typ_len;
4555 	size_t n;
4556 	char *name;
4557 	const struct mb_overload_def *over_func;
4558 	zval row1, row2;
4559 	const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4560 	const mbfl_encoding **entry;
4561 
4562 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
4563 		return;
4564 	}
4565 
4566 	if (!typ || !strcasecmp("all", typ)) {
4567 		array_init(return_value);
4568 		if (MBSTRG(current_internal_encoding)) {
4569 			add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name);
4570 		}
4571 		if (MBSTRG(http_input_identify)) {
4572 			add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name);
4573 		}
4574 		if (MBSTRG(current_http_output_encoding)) {
4575 			add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name);
4576 		}
4577 		if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
4578 			add_assoc_string(return_value, "http_output_conv_mimetypes", name);
4579 		}
4580 		add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4581 		if (MBSTRG(func_overload)){
4582 			over_func = &(mb_ovld[0]);
4583 			array_init(&row1);
4584 			while (over_func->type > 0) {
4585 				if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4586 					add_assoc_string(&row1, over_func->orig_func, over_func->ovld_func);
4587 				}
4588 				over_func++;
4589 			}
4590 			add_assoc_zval(return_value, "func_overload_list", &row1);
4591 		} else {
4592 			add_assoc_string(return_value, "func_overload_list", "no overload");
4593  		}
4594 		if (lang != NULL) {
4595 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4596 				add_assoc_string(return_value, "mail_charset", name);
4597 			}
4598 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4599 				add_assoc_string(return_value, "mail_header_encoding", name);
4600 			}
4601 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4602 				add_assoc_string(return_value, "mail_body_encoding", name);
4603 			}
4604 		}
4605 		add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4606 		if (MBSTRG(encoding_translation)) {
4607 			add_assoc_string(return_value, "encoding_translation", "On");
4608 		} else {
4609 			add_assoc_string(return_value, "encoding_translation", "Off");
4610 		}
4611 		if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4612 			add_assoc_string(return_value, "language", name);
4613 		}
4614 		n = MBSTRG(current_detect_order_list_size);
4615 		entry = MBSTRG(current_detect_order_list);
4616 		if (n > 0) {
4617 			size_t i;
4618 			array_init(&row2);
4619 			for (i = 0; i < n; i++) {
4620 				add_next_index_string(&row2, (*entry)->name);
4621 				entry++;
4622 			}
4623 			add_assoc_zval(return_value, "detect_order", &row2);
4624 		}
4625 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4626 			add_assoc_string(return_value, "substitute_character", "none");
4627 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4628 			add_assoc_string(return_value, "substitute_character", "long");
4629 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4630 			add_assoc_string(return_value, "substitute_character", "entity");
4631 		} else {
4632 			add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4633 		}
4634 		if (MBSTRG(strict_detection)) {
4635 			add_assoc_string(return_value, "strict_detection", "On");
4636 		} else {
4637 			add_assoc_string(return_value, "strict_detection", "Off");
4638 		}
4639 	} else if (!strcasecmp("internal_encoding", typ)) {
4640 		if (MBSTRG(current_internal_encoding)) {
4641 			RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name);
4642 		}
4643 	} else if (!strcasecmp("http_input", typ)) {
4644 		if (MBSTRG(http_input_identify)) {
4645 			RETVAL_STRING((char *)MBSTRG(http_input_identify)->name);
4646 		}
4647 	} else if (!strcasecmp("http_output", typ)) {
4648 		if (MBSTRG(current_http_output_encoding)) {
4649 			RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name);
4650 		}
4651 	} else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4652 		if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
4653 			RETVAL_STRING(name);
4654 		}
4655 	} else if (!strcasecmp("func_overload", typ)) {
4656  		RETVAL_LONG(MBSTRG(func_overload));
4657 	} else if (!strcasecmp("func_overload_list", typ)) {
4658 		if (MBSTRG(func_overload)){
4659 				over_func = &(mb_ovld[0]);
4660 				array_init(return_value);
4661 				while (over_func->type > 0) {
4662 					if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4663 						add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func);
4664 					}
4665 					over_func++;
4666 				}
4667 		} else {
4668 			RETVAL_STRING("no overload");
4669 		}
4670 	} else if (!strcasecmp("mail_charset", typ)) {
4671 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4672 			RETVAL_STRING(name);
4673 		}
4674 	} else if (!strcasecmp("mail_header_encoding", typ)) {
4675 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4676 			RETVAL_STRING(name);
4677 		}
4678 	} else if (!strcasecmp("mail_body_encoding", typ)) {
4679 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4680 			RETVAL_STRING(name);
4681 		}
4682 	} else if (!strcasecmp("illegal_chars", typ)) {
4683 		RETVAL_LONG(MBSTRG(illegalchars));
4684 	} else if (!strcasecmp("encoding_translation", typ)) {
4685 		if (MBSTRG(encoding_translation)) {
4686 			RETVAL_STRING("On");
4687 		} else {
4688 			RETVAL_STRING("Off");
4689 		}
4690 	} else if (!strcasecmp("language", typ)) {
4691 		if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4692 			RETVAL_STRING(name);
4693 		}
4694 	} else if (!strcasecmp("detect_order", typ)) {
4695 		n = MBSTRG(current_detect_order_list_size);
4696 		entry = MBSTRG(current_detect_order_list);
4697 		if (n > 0) {
4698 			size_t i;
4699 			array_init(return_value);
4700 			for (i = 0; i < n; i++) {
4701 				add_next_index_string(return_value, (*entry)->name);
4702 				entry++;
4703 			}
4704 		}
4705 	} else if (!strcasecmp("substitute_character", typ)) {
4706 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4707 			RETVAL_STRING("none");
4708 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4709 			RETVAL_STRING("long");
4710 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4711 			RETVAL_STRING("entity");
4712 		} else {
4713 			RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4714 		}
4715 	} else if (!strcasecmp("strict_detection", typ)) {
4716 		if (MBSTRG(strict_detection)) {
4717 			RETVAL_STRING("On");
4718 		} else {
4719 			RETVAL_STRING("Off");
4720 		}
4721 	} else {
4722 		RETURN_FALSE;
4723 	}
4724 }
4725 /* }}} */
4726 
php_mb_check_encoding(const char * input,size_t length,const char * enc)4727 MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc)
4728 {
4729 	const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4730 	mbfl_buffer_converter *convd;
4731 	mbfl_string string, result, *ret = NULL;
4732 	long illegalchars = 0;
4733 
4734 	if (input == NULL) {
4735 		return MBSTRG(illegalchars) == 0;
4736 	}
4737 
4738 	if (enc != NULL) {
4739 		encoding = mbfl_name2encoding(enc);
4740 		if (!encoding || encoding == &mbfl_encoding_pass) {
4741 			php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", enc);
4742 			return 0;
4743 		}
4744 	}
4745 
4746 	convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
4747 
4748 	if (convd == NULL) {
4749 		php_error_docref(NULL, E_WARNING, "Unable to create converter");
4750 		return 0;
4751 	}
4752 
4753 	mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4754 	mbfl_buffer_converter_illegal_substchar(convd, 0);
4755 
4756 	/* initialize string */
4757 	mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
4758 	mbfl_string_init(&result);
4759 
4760 	string.val = (unsigned char *) input;
4761 	string.len = length;
4762 
4763 	ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4764 	illegalchars = mbfl_buffer_illegalchars(convd);
4765 	mbfl_buffer_converter_delete(convd);
4766 
4767 	if (ret != NULL) {
4768 		if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4769 			mbfl_string_clear(&result);
4770 			return 1;
4771 		}
4772 
4773 		mbfl_string_clear(&result);
4774 	}
4775 
4776 	return 0;
4777 }
4778 
4779 /* {{{ proto bool mb_check_encoding([string var[, string encoding]])
4780    Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)4781 PHP_FUNCTION(mb_check_encoding)
4782 {
4783 	char *var = NULL;
4784 	size_t var_len;
4785 	char *enc = NULL;
4786 	size_t enc_len;
4787 
4788 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
4789 		return;
4790     }
4791 
4792 	RETVAL_FALSE;
4793 
4794 	if (php_mb_check_encoding(var, var_len, enc)) {
4795 		RETVAL_TRUE;
4796 	}
4797 }
4798 /* }}} */
4799 
4800 /* {{{ php_mb_populate_current_detect_order_list */
php_mb_populate_current_detect_order_list(void)4801 static void php_mb_populate_current_detect_order_list(void)
4802 {
4803 	const mbfl_encoding **entry = 0;
4804 	size_t nentries;
4805 
4806 	if (MBSTRG(current_detect_order_list)) {
4807 		return;
4808 	}
4809 
4810 	if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
4811 		nentries = MBSTRG(detect_order_list_size);
4812 		entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4813 		memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
4814 	} else {
4815 		const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
4816 		size_t i;
4817 		nentries = MBSTRG(default_detect_order_list_size);
4818 		entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4819 		for (i = 0; i < nentries; i++) {
4820 			entry[i] = mbfl_no2encoding(src[i]);
4821 		}
4822 	}
4823 	MBSTRG(current_detect_order_list) = entry;
4824 	MBSTRG(current_detect_order_list_size) = nentries;
4825 }
4826 /* }}} */
4827 
4828 /* {{{ static int php_mb_encoding_translation() */
php_mb_encoding_translation(void)4829 static int php_mb_encoding_translation(void)
4830 {
4831 	return MBSTRG(encoding_translation);
4832 }
4833 /* }}} */
4834 
4835 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
php_mb_mbchar_bytes_ex(const char * s,const mbfl_encoding * enc)4836 MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
4837 {
4838 	if (enc != NULL) {
4839 		if (enc->flag & MBFL_ENCTYPE_MBCS) {
4840 			if (enc->mblen_table != NULL) {
4841 				if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
4842 			}
4843 		} else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
4844 			return 2;
4845 		} else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
4846 			return 4;
4847 		}
4848 	}
4849 	return 1;
4850 }
4851 /* }}} */
4852 
4853 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
php_mb_mbchar_bytes(const char * s)4854 MBSTRING_API size_t php_mb_mbchar_bytes(const char *s)
4855 {
4856 	return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
4857 }
4858 /* }}} */
4859 
4860 /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
php_mb_safe_strrchr_ex(const char * s,unsigned int c,size_t nbytes,const mbfl_encoding * enc)4861 MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
4862 {
4863 	register const char *p = s;
4864 	char *last=NULL;
4865 
4866 	if (nbytes == (size_t)-1) {
4867 		size_t nb = 0;
4868 
4869 		while (*p != '\0') {
4870 			if (nb == 0) {
4871 				if ((unsigned char)*p == (unsigned char)c) {
4872 					last = (char *)p;
4873 				}
4874 				nb = php_mb_mbchar_bytes_ex(p, enc);
4875 				if (nb == 0) {
4876 					return NULL; /* something is going wrong! */
4877 				}
4878 			}
4879 			--nb;
4880 			++p;
4881 		}
4882 	} else {
4883 		register size_t bcnt = nbytes;
4884 		register size_t nbytes_char;
4885 		while (bcnt > 0) {
4886 			if ((unsigned char)*p == (unsigned char)c) {
4887 				last = (char *)p;
4888 			}
4889 			nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
4890 			if (bcnt < nbytes_char) {
4891 				return NULL;
4892 			}
4893 			p += nbytes_char;
4894 			bcnt -= nbytes_char;
4895 		}
4896 	}
4897 	return last;
4898 }
4899 /* }}} */
4900 
4901 /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
php_mb_safe_strrchr(const char * s,unsigned int c,size_t nbytes)4902 MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes)
4903 {
4904 	return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
4905 }
4906 /* }}} */
4907 
4908 /* {{{ MBSTRING_API int php_mb_stripos()
4909  */
php_mb_stripos(int mode,const char * old_haystack,unsigned int old_haystack_len,const char * old_needle,unsigned int old_needle_len,long offset,const char * from_encoding)4910 MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding)
4911 {
4912 	int n;
4913 	mbfl_string haystack, needle;
4914 	n = -1;
4915 
4916 	mbfl_string_init(&haystack);
4917 	mbfl_string_init(&needle);
4918 	haystack.no_language = MBSTRG(language);
4919 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4920 	needle.no_language = MBSTRG(language);
4921 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4922 
4923 	do {
4924 		size_t len = 0;
4925 		haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding);
4926 		haystack.len = len;
4927 
4928 		if (!haystack.val) {
4929 			break;
4930 		}
4931 
4932 		if (haystack.len <= 0) {
4933 			break;
4934 		}
4935 
4936 		needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding);
4937 		needle.len = len;
4938 
4939 		if (!needle.val) {
4940 			break;
4941 		}
4942 
4943 		if (needle.len <= 0) {
4944 			break;
4945 		}
4946 
4947 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
4948 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
4949 			php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
4950 			break;
4951 		}
4952 
4953  		{
4954  			int haystack_char_len = mbfl_strlen(&haystack);
4955 
4956  			if (mode) {
4957  				if ((offset > 0 && offset > haystack_char_len) ||
4958  					(offset < 0 && -offset > haystack_char_len)) {
4959  					php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
4960  					break;
4961  				}
4962  			} else {
4963 				if (offset < 0) {
4964 					offset += (long)haystack_char_len;
4965 				}
4966  				if (offset < 0 || offset > haystack_char_len) {
4967  					php_error_docref(NULL, E_WARNING, "Offset not contained in string");
4968  					break;
4969  				}
4970  			}
4971 		}
4972 
4973 		n = mbfl_strpos(&haystack, &needle, offset, mode);
4974 	} while(0);
4975 
4976 	if (haystack.val) {
4977 		efree(haystack.val);
4978 	}
4979 
4980 	if (needle.val) {
4981 		efree(needle.val);
4982 	}
4983 
4984 	return n;
4985 }
4986 /* }}} */
4987 
php_mb_gpc_get_detect_order(const zend_encoding *** list,size_t * list_size)4988 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size) /* {{{ */
4989 {
4990 	*list = (const zend_encoding **)MBSTRG(http_input_list);
4991 	*list_size = MBSTRG(http_input_list_size);
4992 }
4993 /* }}} */
4994 
php_mb_gpc_set_input_encoding(const zend_encoding * encoding)4995 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding) /* {{{ */
4996 {
4997 	MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
4998 }
4999 /* }}} */
5000 
5001 #endif	/* HAVE_MBSTRING */
5002 
5003 /*
5004  * Local variables:
5005  * tab-width: 4
5006  * c-basic-offset: 4
5007  * End:
5008  * vim600: fdm=marker
5009  * vim: noet sw=4 ts=4
5010  */
5011