xref: /PHP-5.3/ext/mbstring/mbstring.c (revision a2045ff3)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 5                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1997-2013 The PHP Group                                |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
16    |         Rui Hirokawa <hirokawa@php.net>                              |
17    +----------------------------------------------------------------------+
18  */
19 
20 /* $Id$ */
21 
22 /*
23  * PHP 4 Multibyte String module "mbstring"
24  *
25  * History:
26  *   2000.5.19  Release php-4.0RC2_jstring-1.0
27  *   2001.4.1   Release php4_jstring-1.0.91
28  *   2001.4.30  Release php4_jstring-1.1 (contribute to The PHP Group)
29  *   2001.5.1   Renamed from jstring to mbstring (hirokawa@php.net)
30  */
31 
32 /*
33  * PHP3 Internationalization support program.
34  *
35  * Copyright (c) 1999,2000 by the PHP3 internationalization team.
36  * All rights reserved.
37  *
38  * See README_PHP3-i18n-ja for more detail.
39  *
40  * Authors:
41  *    Hironori Sato <satoh@jpnnet.com>
42  *    Shigeru Kanemoto <sgk@happysize.co.jp>
43  *    Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
44  *    Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
45  */
46 
47 /* {{{ includes */
48 #ifdef HAVE_CONFIG_H
49 #include "config.h"
50 #endif
51 
52 #include "php.h"
53 #include "php_ini.h"
54 #include "php_variables.h"
55 #include "mbstring.h"
56 #include "ext/standard/php_string.h"
57 #include "ext/standard/php_mail.h"
58 #include "ext/standard/exec.h"
59 #include "ext/standard/php_smart_str.h"
60 #include "ext/standard/url.h"
61 #include "main/php_output.h"
62 #include "ext/standard/info.h"
63 
64 #include "libmbfl/mbfl/mbfl_allocators.h"
65 
66 #include "php_variables.h"
67 #include "php_globals.h"
68 #include "rfc1867.h"
69 #include "php_content_types.h"
70 #include "SAPI.h"
71 #include "php_unicode.h"
72 #include "TSRM.h"
73 
74 #include "mb_gpc.h"
75 
76 #if HAVE_MBREGEX
77 #include "php_mbregex.h"
78 #endif
79 
80 #ifdef ZEND_MULTIBYTE
81 #include "zend_multibyte.h"
82 #endif /* ZEND_MULTIBYTE */
83 
84 #if HAVE_ONIG
85 #include "php_onig_compat.h"
86 #include <oniguruma.h>
87 #undef UChar
88 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
89 #include "ext/pcre/php_pcre.h"
90 #endif
91 /* }}} */
92 
93 #if HAVE_MBSTRING
94 
95 /* {{{ prototypes */
96 ZEND_DECLARE_MODULE_GLOBALS(mbstring)
97 
98 static PHP_GINIT_FUNCTION(mbstring);
99 static PHP_GSHUTDOWN_FUNCTION(mbstring);
100 
101 #ifdef ZEND_MULTIBYTE
102 static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC);
103 static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC);
104 static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC);
105 static int php_mb_set_zend_encoding(TSRMLS_D);
106 #endif
107 /* }}} */
108 
109 /* {{{ php_mb_default_identify_list */
110 typedef struct _php_mb_nls_ident_list {
111 	enum mbfl_no_language lang;
112 	const enum mbfl_no_encoding* list;
113 	int list_size;
114 } php_mb_nls_ident_list;
115 
116 static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
117 	mbfl_no_encoding_ascii,
118 	mbfl_no_encoding_jis,
119 	mbfl_no_encoding_utf8,
120 	mbfl_no_encoding_euc_jp,
121 	mbfl_no_encoding_sjis
122 };
123 
124 static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
125 	mbfl_no_encoding_ascii,
126 	mbfl_no_encoding_utf8,
127 	mbfl_no_encoding_euc_cn,
128 	mbfl_no_encoding_cp936
129 };
130 
131 static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
132 	mbfl_no_encoding_ascii,
133 	mbfl_no_encoding_utf8,
134 	mbfl_no_encoding_euc_tw,
135 	mbfl_no_encoding_big5
136 };
137 
138 static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
139 	mbfl_no_encoding_ascii,
140 	mbfl_no_encoding_utf8,
141 	mbfl_no_encoding_euc_kr,
142 	mbfl_no_encoding_uhc
143 };
144 
145 static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
146 	mbfl_no_encoding_ascii,
147 	mbfl_no_encoding_utf8,
148 	mbfl_no_encoding_koi8r,
149 	mbfl_no_encoding_cp1251,
150 	mbfl_no_encoding_cp866
151 };
152 
153 static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
154 	mbfl_no_encoding_ascii,
155 	mbfl_no_encoding_utf8,
156 	mbfl_no_encoding_armscii8
157 };
158 
159 static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
160 	mbfl_no_encoding_ascii,
161 	mbfl_no_encoding_utf8,
162 	mbfl_no_encoding_cp1254,
163 	mbfl_no_encoding_8859_9
164 };
165 
166 static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
167 	mbfl_no_encoding_ascii,
168 	mbfl_no_encoding_utf8,
169 	mbfl_no_encoding_koi8u
170 };
171 
172 static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
173 	mbfl_no_encoding_ascii,
174 	mbfl_no_encoding_utf8
175 };
176 
177 
178 static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
179 	{ mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
180 	{ mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
181 	{ mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
182 	{ mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
183 	{ mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
184 	{ mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
185 	{ mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
186 	{ mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
187 	{ mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
188 };
189 
190 /* }}} */
191 
192 /* {{{ mb_overload_def mb_ovld[] */
193 static const struct mb_overload_def mb_ovld[] = {
194 	{MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
195 	{MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
196 	{MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
197 	{MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
198 	{MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
199 	{MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
200 	{MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
201 	{MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
202 	{MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
203 	{MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
204 	{MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
205 	{MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
206 	{MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
207 #if HAVE_MBREGEX
208 	{MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"},
209 	{MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"},
210 	{MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"},
211 	{MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"},
212 	{MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},
213 #endif
214 	{0, NULL, NULL, NULL}
215 };
216 /* }}} */
217 
218 /* {{{ arginfo */
219 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
220 	ZEND_ARG_INFO(0, language)
221 ZEND_END_ARG_INFO()
222 
223 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
224 	ZEND_ARG_INFO(0, encoding)
225 ZEND_END_ARG_INFO()
226 
227 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
228 	ZEND_ARG_INFO(0, type)
229 ZEND_END_ARG_INFO()
230 
231 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
232 	ZEND_ARG_INFO(0, encoding)
233 ZEND_END_ARG_INFO()
234 
235 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
236 	ZEND_ARG_INFO(0, encoding)
237 ZEND_END_ARG_INFO()
238 
239 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
240 	ZEND_ARG_INFO(0, substchar)
241 ZEND_END_ARG_INFO()
242 
243 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
244 	ZEND_ARG_INFO(0, encoding)
245 ZEND_END_ARG_INFO()
246 
247 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
248 	ZEND_ARG_INFO(0, encoded_string)
249 	ZEND_ARG_INFO(1, result)
250 ZEND_END_ARG_INFO()
251 
252 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
253 	ZEND_ARG_INFO(0, contents)
254 	ZEND_ARG_INFO(0, status)
255 ZEND_END_ARG_INFO()
256 
257 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
258 	ZEND_ARG_INFO(0, str)
259 	ZEND_ARG_INFO(0, encoding)
260 ZEND_END_ARG_INFO()
261 
262 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
263 	ZEND_ARG_INFO(0, haystack)
264 	ZEND_ARG_INFO(0, needle)
265 	ZEND_ARG_INFO(0, offset)
266 	ZEND_ARG_INFO(0, encoding)
267 ZEND_END_ARG_INFO()
268 
269 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
270 	ZEND_ARG_INFO(0, haystack)
271 	ZEND_ARG_INFO(0, needle)
272 	ZEND_ARG_INFO(0, offset)
273 	ZEND_ARG_INFO(0, encoding)
274 ZEND_END_ARG_INFO()
275 
276 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
277 	ZEND_ARG_INFO(0, haystack)
278 	ZEND_ARG_INFO(0, needle)
279 	ZEND_ARG_INFO(0, offset)
280 	ZEND_ARG_INFO(0, encoding)
281 ZEND_END_ARG_INFO()
282 
283 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
284 	ZEND_ARG_INFO(0, haystack)
285 	ZEND_ARG_INFO(0, needle)
286 	ZEND_ARG_INFO(0, offset)
287 	ZEND_ARG_INFO(0, encoding)
288 ZEND_END_ARG_INFO()
289 
290 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
291 	ZEND_ARG_INFO(0, haystack)
292 	ZEND_ARG_INFO(0, needle)
293 	ZEND_ARG_INFO(0, part)
294 	ZEND_ARG_INFO(0, encoding)
295 ZEND_END_ARG_INFO()
296 
297 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
298 	ZEND_ARG_INFO(0, haystack)
299 	ZEND_ARG_INFO(0, needle)
300 	ZEND_ARG_INFO(0, part)
301 	ZEND_ARG_INFO(0, encoding)
302 ZEND_END_ARG_INFO()
303 
304 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
305 	ZEND_ARG_INFO(0, haystack)
306 	ZEND_ARG_INFO(0, needle)
307 	ZEND_ARG_INFO(0, part)
308 	ZEND_ARG_INFO(0, encoding)
309 ZEND_END_ARG_INFO()
310 
311 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
312 	ZEND_ARG_INFO(0, haystack)
313 	ZEND_ARG_INFO(0, needle)
314 	ZEND_ARG_INFO(0, part)
315 	ZEND_ARG_INFO(0, encoding)
316 ZEND_END_ARG_INFO()
317 
318 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
319 	ZEND_ARG_INFO(0, haystack)
320 	ZEND_ARG_INFO(0, needle)
321 	ZEND_ARG_INFO(0, encoding)
322 ZEND_END_ARG_INFO()
323 
324 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
325 	ZEND_ARG_INFO(0, str)
326 	ZEND_ARG_INFO(0, start)
327 	ZEND_ARG_INFO(0, length)
328 	ZEND_ARG_INFO(0, encoding)
329 ZEND_END_ARG_INFO()
330 
331 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
332 	ZEND_ARG_INFO(0, str)
333 	ZEND_ARG_INFO(0, start)
334 	ZEND_ARG_INFO(0, length)
335 	ZEND_ARG_INFO(0, encoding)
336 ZEND_END_ARG_INFO()
337 
338 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
339 	ZEND_ARG_INFO(0, str)
340 	ZEND_ARG_INFO(0, encoding)
341 ZEND_END_ARG_INFO()
342 
343 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
344 	ZEND_ARG_INFO(0, str)
345 	ZEND_ARG_INFO(0, start)
346 	ZEND_ARG_INFO(0, width)
347 	ZEND_ARG_INFO(0, trimmarker)
348 	ZEND_ARG_INFO(0, encoding)
349 ZEND_END_ARG_INFO()
350 
351 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
352 	ZEND_ARG_INFO(0, str)
353 	ZEND_ARG_INFO(0, to)
354 	ZEND_ARG_INFO(0, from)
355 ZEND_END_ARG_INFO()
356 
357 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
358 	ZEND_ARG_INFO(0, sourcestring)
359 	ZEND_ARG_INFO(0, mode)
360 	ZEND_ARG_INFO(0, encoding)
361 ZEND_END_ARG_INFO()
362 
363 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
364 	ZEND_ARG_INFO(0, sourcestring)
365 	ZEND_ARG_INFO(0, encoding)
366 ZEND_END_ARG_INFO()
367 
368 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
369 	ZEND_ARG_INFO(0, sourcestring)
370 	ZEND_ARG_INFO(0, encoding)
371 ZEND_END_ARG_INFO()
372 
373 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
374 	ZEND_ARG_INFO(0, str)
375 	ZEND_ARG_INFO(0, encoding_list)
376 	ZEND_ARG_INFO(0, strict)
377 ZEND_END_ARG_INFO()
378 
379 ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
380 ZEND_END_ARG_INFO()
381 
382 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
383 	ZEND_ARG_INFO(0, encoding)
384 ZEND_END_ARG_INFO()
385 
386 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
387 	ZEND_ARG_INFO(0, str)
388 	ZEND_ARG_INFO(0, charset)
389 	ZEND_ARG_INFO(0, transfer)
390 	ZEND_ARG_INFO(0, linefeed)
391 	ZEND_ARG_INFO(0, indent)
392 ZEND_END_ARG_INFO()
393 
394 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
395 	ZEND_ARG_INFO(0, string)
396 ZEND_END_ARG_INFO()
397 
398 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
399 	ZEND_ARG_INFO(0, str)
400 	ZEND_ARG_INFO(0, option)
401 	ZEND_ARG_INFO(0, encoding)
402 ZEND_END_ARG_INFO()
403 
404 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 1, 0, 3)
405 	ZEND_ARG_INFO(0, to)
406 	ZEND_ARG_INFO(0, from)
407 	ZEND_ARG_INFO(1, ...)
408 ZEND_END_ARG_INFO()
409 
410 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
411 	ZEND_ARG_INFO(0, string)
412 	ZEND_ARG_INFO(0, convmap)
413 	ZEND_ARG_INFO(0, encoding)
414 ZEND_END_ARG_INFO()
415 
416 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
417 	ZEND_ARG_INFO(0, string)
418 	ZEND_ARG_INFO(0, convmap)
419 	ZEND_ARG_INFO(0, encoding)
420 ZEND_END_ARG_INFO()
421 
422 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
423 	ZEND_ARG_INFO(0, to)
424 	ZEND_ARG_INFO(0, subject)
425 	ZEND_ARG_INFO(0, message)
426 	ZEND_ARG_INFO(0, additional_headers)
427 	ZEND_ARG_INFO(0, additional_parameters)
428 ZEND_END_ARG_INFO()
429 
430 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
431 	ZEND_ARG_INFO(0, type)
432 ZEND_END_ARG_INFO()
433 
434 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
435 	ZEND_ARG_INFO(0, var)
436 	ZEND_ARG_INFO(0, encoding)
437 ZEND_END_ARG_INFO()
438 
439 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
440 	ZEND_ARG_INFO(0, encoding)
441 ZEND_END_ARG_INFO()
442 
443 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
444 	ZEND_ARG_INFO(0, pattern)
445 	ZEND_ARG_INFO(0, string)
446 	ZEND_ARG_INFO(1, registers)
447 ZEND_END_ARG_INFO()
448 
449 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
450 	ZEND_ARG_INFO(0, pattern)
451 	ZEND_ARG_INFO(0, string)
452 	ZEND_ARG_INFO(1, registers)
453 ZEND_END_ARG_INFO()
454 
455 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
456 	ZEND_ARG_INFO(0, pattern)
457 	ZEND_ARG_INFO(0, replacement)
458 	ZEND_ARG_INFO(0, string)
459 	ZEND_ARG_INFO(0, option)
460 ZEND_END_ARG_INFO()
461 
462 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
463 	ZEND_ARG_INFO(0, pattern)
464 	ZEND_ARG_INFO(0, replacement)
465 	ZEND_ARG_INFO(0, string)
466 ZEND_END_ARG_INFO()
467 
468 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
469 	ZEND_ARG_INFO(0, pattern)
470 	ZEND_ARG_INFO(0, string)
471 	ZEND_ARG_INFO(0, limit)
472 ZEND_END_ARG_INFO()
473 
474 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
475 	ZEND_ARG_INFO(0, pattern)
476 	ZEND_ARG_INFO(0, string)
477 	ZEND_ARG_INFO(0, option)
478 ZEND_END_ARG_INFO()
479 
480 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
481 	ZEND_ARG_INFO(0, pattern)
482 	ZEND_ARG_INFO(0, option)
483 ZEND_END_ARG_INFO()
484 
485 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
486 	ZEND_ARG_INFO(0, pattern)
487 	ZEND_ARG_INFO(0, option)
488 ZEND_END_ARG_INFO()
489 
490 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
491 	ZEND_ARG_INFO(0, pattern)
492 	ZEND_ARG_INFO(0, option)
493 ZEND_END_ARG_INFO()
494 
495 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
496 	ZEND_ARG_INFO(0, string)
497 	ZEND_ARG_INFO(0, pattern)
498 	ZEND_ARG_INFO(0, option)
499 ZEND_END_ARG_INFO()
500 
501 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
502 ZEND_END_ARG_INFO()
503 
504 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
505 ZEND_END_ARG_INFO()
506 
507 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
508 	ZEND_ARG_INFO(0, position)
509 ZEND_END_ARG_INFO()
510 
511 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
512 	ZEND_ARG_INFO(0, options)
513 ZEND_END_ARG_INFO()
514 /* }}} */
515 
516 /* {{{ zend_function_entry mbstring_functions[] */
517 const zend_function_entry mbstring_functions[] = {
518 	PHP_FE(mb_convert_case,			arginfo_mb_convert_case)
519 	PHP_FE(mb_strtoupper,			arginfo_mb_strtoupper)
520 	PHP_FE(mb_strtolower,			arginfo_mb_strtolower)
521 	PHP_FE(mb_language,				arginfo_mb_language)
522 	PHP_FE(mb_internal_encoding,	arginfo_mb_internal_encoding)
523 	PHP_FE(mb_http_input,			arginfo_mb_http_input)
524 	PHP_FE(mb_http_output,			arginfo_mb_http_output)
525 	PHP_FE(mb_detect_order,			arginfo_mb_detect_order)
526 	PHP_FE(mb_substitute_character,	arginfo_mb_substitute_character)
527 	PHP_FE(mb_parse_str,			arginfo_mb_parse_str)
528 	PHP_FE(mb_output_handler,		arginfo_mb_output_handler)
529 	PHP_FE(mb_preferred_mime_name,	arginfo_mb_preferred_mime_name)
530 	PHP_FE(mb_strlen,				arginfo_mb_strlen)
531 	PHP_FE(mb_strpos,				arginfo_mb_strpos)
532 	PHP_FE(mb_strrpos,				arginfo_mb_strrpos)
533 	PHP_FE(mb_stripos,				arginfo_mb_stripos)
534 	PHP_FE(mb_strripos,				arginfo_mb_strripos)
535 	PHP_FE(mb_strstr,				arginfo_mb_strstr)
536 	PHP_FE(mb_strrchr,				arginfo_mb_strrchr)
537 	PHP_FE(mb_stristr,				arginfo_mb_stristr)
538 	PHP_FE(mb_strrichr,				arginfo_mb_strrichr)
539 	PHP_FE(mb_substr_count,			arginfo_mb_substr_count)
540 	PHP_FE(mb_substr,				arginfo_mb_substr)
541 	PHP_FE(mb_strcut,				arginfo_mb_strcut)
542 	PHP_FE(mb_strwidth,				arginfo_mb_strwidth)
543 	PHP_FE(mb_strimwidth,			arginfo_mb_strimwidth)
544 	PHP_FE(mb_convert_encoding,		arginfo_mb_convert_encoding)
545 	PHP_FE(mb_detect_encoding,		arginfo_mb_detect_encoding)
546 	PHP_FE(mb_list_encodings,		arginfo_mb_list_encodings)
547 	PHP_FE(mb_encoding_aliases,		arginfo_mb_encoding_aliases)
548 	PHP_FE(mb_convert_kana,			arginfo_mb_convert_kana)
549 	PHP_FE(mb_encode_mimeheader,	arginfo_mb_encode_mimeheader)
550 	PHP_FE(mb_decode_mimeheader,	arginfo_mb_decode_mimeheader)
551 	PHP_FE(mb_convert_variables,	arginfo_mb_convert_variables)
552 	PHP_FE(mb_encode_numericentity,	arginfo_mb_encode_numericentity)
553 	PHP_FE(mb_decode_numericentity,	arginfo_mb_decode_numericentity)
554 	PHP_FE(mb_send_mail,			arginfo_mb_send_mail)
555 	PHP_FE(mb_get_info,				arginfo_mb_get_info)
556 	PHP_FE(mb_check_encoding,		arginfo_mb_check_encoding)
557 #if HAVE_MBREGEX
558 	PHP_MBREGEX_FUNCTION_ENTRIES
559 #endif
560 	PHP_FE_END
561 };
562 /* }}} */
563 
564 /* {{{ zend_module_entry mbstring_module_entry */
565 zend_module_entry mbstring_module_entry = {
566     STANDARD_MODULE_HEADER,
567 	"mbstring",
568 	mbstring_functions,
569 	PHP_MINIT(mbstring),
570 	PHP_MSHUTDOWN(mbstring),
571 	PHP_RINIT(mbstring),
572 	PHP_RSHUTDOWN(mbstring),
573 	PHP_MINFO(mbstring),
574     NO_VERSION_YET,
575     PHP_MODULE_GLOBALS(mbstring),
576     PHP_GINIT(mbstring),
577     PHP_GSHUTDOWN(mbstring),
578     NULL,
579 	STANDARD_MODULE_PROPERTIES_EX
580 };
581 /* }}} */
582 
583 /* {{{ static sapi_post_entry php_post_entries[] */
584 static sapi_post_entry php_post_entries[] = {
585 	{ DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data,	php_std_post_handler },
586 	{ MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
587 	{ NULL, 0, NULL, NULL }
588 };
589 /* }}} */
590 
591 #ifdef COMPILE_DL_MBSTRING
ZEND_GET_MODULE(mbstring)592 ZEND_GET_MODULE(mbstring)
593 #endif
594 
595 /* {{{ allocators */
596 static void *_php_mb_allocators_malloc(unsigned int sz)
597 {
598 	return emalloc(sz);
599 }
600 
_php_mb_allocators_realloc(void * ptr,unsigned int sz)601 static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
602 {
603 	return erealloc(ptr, sz);
604 }
605 
_php_mb_allocators_calloc(unsigned int nelems,unsigned int szelem)606 static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
607 {
608 	return ecalloc(nelems, szelem);
609 }
610 
_php_mb_allocators_free(void * ptr)611 static void _php_mb_allocators_free(void *ptr)
612 {
613 	efree(ptr);
614 }
615 
_php_mb_allocators_pmalloc(unsigned int sz)616 static void *_php_mb_allocators_pmalloc(unsigned int sz)
617 {
618 	return pemalloc(sz, 1);
619 }
620 
_php_mb_allocators_prealloc(void * ptr,unsigned int sz)621 static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
622 {
623 	return perealloc(ptr, sz, 1);
624 }
625 
_php_mb_allocators_pfree(void * ptr)626 static void _php_mb_allocators_pfree(void *ptr)
627 {
628 	pefree(ptr, 1);
629 }
630 
631 static mbfl_allocators _php_mb_allocators = {
632 	_php_mb_allocators_malloc,
633 	_php_mb_allocators_realloc,
634 	_php_mb_allocators_calloc,
635 	_php_mb_allocators_free,
636 	_php_mb_allocators_pmalloc,
637 	_php_mb_allocators_prealloc,
638 	_php_mb_allocators_pfree
639 };
640 /* }}} */
641 
642 /* {{{ static sapi_post_entry mbstr_post_entries[] */
643 static sapi_post_entry mbstr_post_entries[] = {
644 	{ DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
645 	{ MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
646 	{ NULL, 0, NULL, NULL }
647 };
648 /* }}} */
649 
650 /* {{{ static int php_mb_parse_encoding_list()
651  *  Return 0 if input contains any illegal encoding, otherwise 1.
652  *  Even if any illegal encoding is detected the result may contain a list
653  *  of parsed encodings.
654  */
655 static int
php_mb_parse_encoding_list(const char * value,int value_length,enum mbfl_no_encoding ** return_list,int * return_size,int persistent TSRMLS_DC)656 php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
657 {
658 	int n, l, size, bauto, ret = 1;
659 	char *p, *p1, *p2, *endp, *tmpstr;
660 	enum mbfl_no_encoding no_encoding;
661 	enum mbfl_no_encoding *src, *entry, *list;
662 
663 	list = NULL;
664 	if (value == NULL || value_length <= 0) {
665 		if (return_list) {
666 			*return_list = NULL;
667 		}
668 		if (return_size) {
669 			*return_size = 0;
670 		}
671 		return 0;
672 	} else {
673 		enum mbfl_no_encoding *identify_list;
674 		int identify_list_size;
675 
676 		identify_list = MBSTRG(default_detect_order_list);
677 		identify_list_size = MBSTRG(default_detect_order_list_size);
678 
679 		/* copy the value string for work */
680 		if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
681 			tmpstr = (char *)estrndup(value+1, value_length-2);
682 			value_length -= 2;
683 		}
684 		else
685 			tmpstr = (char *)estrndup(value, value_length);
686 		if (tmpstr == NULL) {
687 			return 0;
688 		}
689 		/* count the number of listed encoding names */
690 		endp = tmpstr + value_length;
691 		n = 1;
692 		p1 = tmpstr;
693 		while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) {
694 			p1 = p2 + 1;
695 			n++;
696 		}
697 		size = n + identify_list_size;
698 		/* make list */
699 		list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
700 		if (list != NULL) {
701 			entry = list;
702 			n = 0;
703 			bauto = 0;
704 			p1 = tmpstr;
705 			do {
706 				p2 = p = php_memnstr(p1, ",", 1, endp);
707 				if (p == NULL) {
708 					p = endp;
709 				}
710 				*p = '\0';
711 				/* trim spaces */
712 				while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
713 					p1++;
714 				}
715 				p--;
716 				while (p > p1 && (*p == ' ' || *p == '\t')) {
717 					*p = '\0';
718 					p--;
719 				}
720 				/* convert to the encoding number and check encoding */
721 				if (strcasecmp(p1, "auto") == 0) {
722 					if (!bauto) {
723 						bauto = 1;
724 						l = identify_list_size;
725 						src = identify_list;
726 						while (l > 0) {
727 							*entry++ = *src++;
728 							l--;
729 							n++;
730 						}
731 					}
732 				} else {
733 					no_encoding = mbfl_name2no_encoding(p1);
734 					if (no_encoding != mbfl_no_encoding_invalid) {
735 						*entry++ = no_encoding;
736 						n++;
737 					} else {
738 						ret = 0;
739 					}
740 				}
741 				p1 = p2 + 1;
742 			} while (n < size && p2 != NULL);
743 			if (n > 0) {
744 				if (return_list) {
745 					*return_list = list;
746 				} else {
747 					pefree(list, persistent);
748 				}
749 			} else {
750 				pefree(list, persistent);
751 				if (return_list) {
752 					*return_list = NULL;
753 				}
754 				ret = 0;
755 			}
756 			if (return_size) {
757 				*return_size = n;
758 			}
759 		} else {
760 			if (return_list) {
761 				*return_list = NULL;
762 			}
763 			if (return_size) {
764 				*return_size = 0;
765 			}
766 			ret = 0;
767 		}
768 		efree(tmpstr);
769 	}
770 
771 	return ret;
772 }
773 /* }}} */
774 
775 /* {{{ MBSTRING_API php_mb_check_encoding_list */
php_mb_check_encoding_list(const char * encoding_list TSRMLS_DC)776 MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) {
777 	return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC);
778 }
779 /* }}} */
780 
781 /* {{{ static int php_mb_parse_encoding_array()
782  *  Return 0 if input contains any illegal encoding, otherwise 1.
783  *  Even if any illegal encoding is detected the result may contain a list
784  *  of parsed encodings.
785  */
786 static int
php_mb_parse_encoding_array(zval * array,enum mbfl_no_encoding ** return_list,int * return_size,int persistent TSRMLS_DC)787 php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
788 {
789 	zval **hash_entry;
790 	HashTable *target_hash;
791 	int i, n, l, size, bauto,ret = 1;
792 	enum mbfl_no_encoding no_encoding;
793 	enum mbfl_no_encoding *src, *list, *entry;
794 
795 	list = NULL;
796 	if (Z_TYPE_P(array) == IS_ARRAY) {
797 		enum mbfl_no_encoding *identify_list;
798 		int identify_list_size;
799 
800 		identify_list = MBSTRG(default_detect_order_list);
801 		identify_list_size = MBSTRG(default_detect_order_list_size);
802 
803 		target_hash = Z_ARRVAL_P(array);
804 		zend_hash_internal_pointer_reset(target_hash);
805 		i = zend_hash_num_elements(target_hash);
806 		size = i + identify_list_size;
807 		list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
808 		if (list != NULL) {
809 			entry = list;
810 			bauto = 0;
811 			n = 0;
812 			while (i > 0) {
813 				if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
814 					break;
815 				}
816 				convert_to_string_ex(hash_entry);
817 				if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
818 					if (!bauto) {
819 						bauto = 1;
820 						l = identify_list_size;
821 						src = identify_list;
822 						while (l > 0) {
823 							*entry++ = *src++;
824 							l--;
825 							n++;
826 						}
827 					}
828 				} else {
829 					no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry));
830 					if (no_encoding != mbfl_no_encoding_invalid) {
831 						*entry++ = no_encoding;
832 						n++;
833 					} else {
834 						ret = 0;
835 					}
836 				}
837 				zend_hash_move_forward(target_hash);
838 				i--;
839 			}
840 			if (n > 0) {
841 				if (return_list) {
842 					*return_list = list;
843 				} else {
844 					pefree(list, persistent);
845 				}
846 			} else {
847 				pefree(list, persistent);
848 				if (return_list) {
849 					*return_list = NULL;
850 				}
851 				ret = 0;
852 			}
853 			if (return_size) {
854 				*return_size = n;
855 			}
856 		} else {
857 			if (return_list) {
858 				*return_list = NULL;
859 			}
860 			if (return_size) {
861 				*return_size = 0;
862 			}
863 			ret = 0;
864 		}
865 	}
866 
867 	return ret;
868 }
869 /* }}} */
870 
871 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC);
872 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
873 static void _php_mb_free_regex(void *opaque);
874 
875 #if HAVE_ONIG
876 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern TSRMLS_DC)877 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
878 {
879 	php_mb_regex_t *retval;
880 	OnigErrorInfo err_info;
881 	int err_code;
882 
883 	if ((err_code = onig_new(&retval,
884 			(const OnigUChar *)pattern,
885 			(const OnigUChar *)pattern + strlen(pattern),
886 			ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
887 			ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
888 		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
889 		onig_error_code_to_str(err_str, err_code, err_info);
890 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", pattern, err_str);
891 		retval = NULL;
892 	}
893 	return retval;
894 }
895 /* }}} */
896 
897 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)898 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
899 {
900 	return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
901 			(const OnigUChar*)str + str_len, (const OnigUChar *)str,
902 			(const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
903 }
904 /* }}} */
905 
906 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)907 static void _php_mb_free_regex(void *opaque)
908 {
909 	onig_free((php_mb_regex_t *)opaque);
910 }
911 /* }}} */
912 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
913 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern TSRMLS_DC)914 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
915 {
916 	pcre *retval;
917 	const char *err_str;
918 	int err_offset;
919 
920 	if (!(retval = pcre_compile(pattern,
921 			PCRE_CASELESS, &err_str, &err_offset, NULL))) {
922 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
923 	}
924 	return retval;
925 }
926 /* }}} */
927 
928 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)929 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
930 {
931 	return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
932 			0, NULL, 0) >= 0;
933 }
934 /* }}} */
935 
936 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)937 static void _php_mb_free_regex(void *opaque)
938 {
939 	pcre_free(opaque);
940 }
941 /* }}} */
942 #endif
943 
944 /* {{{ php_mb_nls_get_default_detect_order_list */
php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,enum mbfl_no_encoding ** plist,int * plist_size)945 static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, int* plist_size)
946 {
947 	size_t i;
948 
949 	*plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
950 	*plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
951 
952 	for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
953 		if (php_mb_default_identify_list[i].lang == lang) {
954 			*plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
955 			*plist_size = php_mb_default_identify_list[i].list_size;
956 			return 1;
957 		}
958 	}
959 	return 0;
960 }
961 /* }}} */
962 
963 /* {{{ php.ini directive handler */
964 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
PHP_INI_MH(OnUpdate_mbstring_language)965 static PHP_INI_MH(OnUpdate_mbstring_language)
966 {
967 	enum mbfl_no_language no_language;
968 
969 	no_language = mbfl_name2no_language(new_value);
970 	if (no_language == mbfl_no_language_invalid) {
971 		MBSTRG(language) = mbfl_no_language_neutral;
972 		return FAILURE;
973 	}
974 	MBSTRG(language) = no_language;
975 	php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
976 	return SUCCESS;
977 }
978 /* }}} */
979 
980 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
PHP_INI_MH(OnUpdate_mbstring_detect_order)981 static PHP_INI_MH(OnUpdate_mbstring_detect_order)
982 {
983 	enum mbfl_no_encoding *list;
984 	int size;
985 
986 	if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
987 		if (MBSTRG(detect_order_list)) {
988 			free(MBSTRG(detect_order_list));
989 		}
990 		MBSTRG(detect_order_list) = list;
991 		MBSTRG(detect_order_list_size) = size;
992 	} else {
993 		if (MBSTRG(detect_order_list)) {
994 			free(MBSTRG(detect_order_list));
995 			MBSTRG(detect_order_list) = NULL;
996 		}
997 		return FAILURE;
998 	}
999 
1000 	return SUCCESS;
1001 }
1002 /* }}} */
1003 
1004 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
PHP_INI_MH(OnUpdate_mbstring_http_input)1005 static PHP_INI_MH(OnUpdate_mbstring_http_input)
1006 {
1007 	enum mbfl_no_encoding *list;
1008 	int size;
1009 
1010 	if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1011 		if (MBSTRG(http_input_list)) {
1012 			free(MBSTRG(http_input_list));
1013 		}
1014 		MBSTRG(http_input_list) = list;
1015 		MBSTRG(http_input_list_size) = size;
1016 	} else {
1017 		if (MBSTRG(http_input_list)) {
1018 			free(MBSTRG(http_input_list));
1019 			MBSTRG(http_input_list) = NULL;
1020 		}
1021 		MBSTRG(http_input_list_size) = 0;
1022 		return FAILURE;
1023 	}
1024 
1025 	return SUCCESS;
1026 }
1027 /* }}} */
1028 
1029 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
PHP_INI_MH(OnUpdate_mbstring_http_output)1030 static PHP_INI_MH(OnUpdate_mbstring_http_output)
1031 {
1032 	enum mbfl_no_encoding no_encoding;
1033 
1034 	no_encoding = mbfl_name2no_encoding(new_value);
1035 	if (no_encoding != mbfl_no_encoding_invalid) {
1036 		MBSTRG(http_output_encoding) = no_encoding;
1037 		MBSTRG(current_http_output_encoding) = no_encoding;
1038 	} else {
1039 		MBSTRG(http_output_encoding) = mbfl_no_encoding_pass;
1040 		MBSTRG(current_http_output_encoding) = mbfl_no_encoding_pass;
1041 		if (new_value != NULL && new_value_length > 0) {
1042 			return FAILURE;
1043 		}
1044 	}
1045 
1046 	return SUCCESS;
1047 }
1048 /* }}} */
1049 
1050 /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
_php_mb_ini_mbstring_internal_encoding_set(const char * new_value,uint new_value_length TSRMLS_DC)1051 int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
1052 {
1053 	enum mbfl_no_encoding no_encoding;
1054   	const char *enc_name = NULL;
1055   	uint enc_name_len = 0;
1056 
1057   	no_encoding = new_value ? mbfl_name2no_encoding(new_value):
1058   				mbfl_no_encoding_invalid;
1059 	if (no_encoding != mbfl_no_encoding_invalid) {
1060   		enc_name = new_value;
1061   		enc_name_len = new_value_length;
1062   	} else {
1063   		switch (MBSTRG(language)) {
1064   			case mbfl_no_language_uni:
1065   				enc_name = "UTF-8";
1066   				enc_name_len = sizeof("UTF-8") - 1;
1067   				break;
1068   			case mbfl_no_language_japanese:
1069   				enc_name = "EUC-JP";
1070   				enc_name_len = sizeof("EUC-JP") - 1;
1071   				break;
1072   			case mbfl_no_language_korean:
1073   				enc_name = "EUC-KR";
1074   				enc_name_len = sizeof("EUC-KR") - 1;
1075   				break;
1076   			case mbfl_no_language_simplified_chinese:
1077   				enc_name = "EUC-CN";
1078   				enc_name_len = sizeof("EUC-CN") - 1;
1079   				break;
1080   			case mbfl_no_language_traditional_chinese:
1081   				enc_name = "EUC-TW";
1082   				enc_name_len = sizeof("EUC-TW") - 1;
1083   				break;
1084   			case mbfl_no_language_russian:
1085   				enc_name = "KOI8-R";
1086   				enc_name_len = sizeof("KOI8-R") - 1;
1087   				break;
1088   			case mbfl_no_language_german:
1089   				enc_name = "ISO-8859-15";
1090   				enc_name_len = sizeof("ISO-8859-15") - 1;
1091   				break;
1092   			case mbfl_no_language_armenian:
1093   				enc_name = "ArmSCII-8";
1094   				enc_name_len = sizeof("ArmSCII-8") - 1;
1095   				break;
1096   			case mbfl_no_language_turkish:
1097   				enc_name = "ISO-8859-9";
1098   				enc_name_len = sizeof("ISO-8859-9") - 1;
1099   				break;
1100   			default:
1101   				enc_name = "ISO-8859-1";
1102   				enc_name_len = sizeof("ISO-8859-1") - 1;
1103   				break;
1104   		}
1105   		no_encoding = mbfl_name2no_encoding(enc_name);
1106   	}
1107 	MBSTRG(internal_encoding) = no_encoding;
1108 	MBSTRG(current_internal_encoding) = no_encoding;
1109 #if HAVE_MBREGEX
1110 	{
1111 		const char *enc_name = new_value;
1112 		if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
1113 			/* falls back to EUC-JP if an unknown encoding name is given */
1114 			enc_name = "EUC-JP";
1115 			php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
1116 		}
1117 		php_mb_regex_set_mbctype(new_value TSRMLS_CC);
1118 	}
1119 #endif
1120 	return SUCCESS;
1121 }
1122 /* }}} */
1123 
1124 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
PHP_INI_MH(OnUpdate_mbstring_internal_encoding)1125 static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1126 {
1127 	if (stage == PHP_INI_STAGE_STARTUP || stage == PHP_INI_STAGE_SHUTDOWN
1128 			|| stage == PHP_INI_STAGE_RUNTIME) {
1129 		return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
1130 	} else {
1131 		/* the corresponding mbstring globals needs to be set according to the
1132 		 * ini value in the later stage because it never falls back to the
1133 		 * default value if 1. no value for mbstring.internal_encoding is given,
1134 		 * 2. mbstring.language directive is processed in per-dir or runtime
1135 		 * context and 3. call to the handler for mbstring.language is done
1136 		 * after mbstring.internal_encoding is handled. */
1137 		return SUCCESS;
1138 	}
1139 }
1140 /* }}} */
1141 
1142 #ifdef ZEND_MULTIBYTE
1143 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_script_encoding) */
PHP_INI_MH(OnUpdate_mbstring_script_encoding)1144 static PHP_INI_MH(OnUpdate_mbstring_script_encoding)
1145 {
1146 	int *list, size;
1147 
1148 	if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1149 		if (MBSTRG(script_encoding_list) != NULL) {
1150 			free(MBSTRG(script_encoding_list));
1151 		}
1152 		MBSTRG(script_encoding_list) = list;
1153 		MBSTRG(script_encoding_list_size) = size;
1154 	} else {
1155 		if (MBSTRG(script_encoding_list) != NULL) {
1156 			free(MBSTRG(script_encoding_list));
1157 		}
1158 		MBSTRG(script_encoding_list) = NULL;
1159 		MBSTRG(script_encoding_list_size) = 0;
1160 		return FAILURE;
1161 	}
1162 
1163 	return SUCCESS;
1164 }
1165 /* }}} */
1166 #endif /* ZEND_MULTIBYTE */
1167 
1168 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
PHP_INI_MH(OnUpdate_mbstring_substitute_character)1169 static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1170 {
1171 	int c;
1172 	char *endptr = NULL;
1173 
1174 	if (new_value != NULL) {
1175 		if (strcasecmp("none", new_value) == 0) {
1176 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1177 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1178 		} else if (strcasecmp("long", new_value) == 0) {
1179 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1180 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1181 		} else if (strcasecmp("entity", new_value) == 0) {
1182 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1183 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1184 		} else {
1185 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1186 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1187 			if (new_value_length >0) {
1188 				c = strtol(new_value, &endptr, 0);
1189 				if (*endptr == '\0') {
1190 					MBSTRG(filter_illegal_substchar) = c;
1191 					MBSTRG(current_filter_illegal_substchar) = c;
1192 				}
1193 			}
1194 		}
1195 	} else {
1196 		MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1197 		MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1198 		MBSTRG(filter_illegal_substchar) = 0x3f;	/* '?' */
1199 		MBSTRG(current_filter_illegal_substchar) = 0x3f;	/* '?' */
1200 	}
1201 
1202 	return SUCCESS;
1203 }
1204 /* }}} */
1205 
1206 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
PHP_INI_MH(OnUpdate_mbstring_encoding_translation)1207 static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1208 {
1209 	if (new_value == NULL) {
1210 	   return FAILURE;
1211 	}
1212 
1213 	OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
1214 
1215 	if (MBSTRG(encoding_translation)) {
1216 		sapi_unregister_post_entry(php_post_entries TSRMLS_CC);
1217 		sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1218 	} else {
1219 		sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC);
1220 		sapi_register_post_entries(php_post_entries TSRMLS_CC);
1221 	}
1222 
1223 	return SUCCESS;
1224 }
1225 /* }}} */
1226 
1227 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)1228 static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1229 {
1230 	zval tmp;
1231 	void *re = NULL;
1232 
1233 	if (!new_value) {
1234 		new_value = entry->orig_value;
1235 		new_value_length = entry->orig_value_length;
1236 	}
1237 	php_trim(new_value, new_value_length, NULL, 0, &tmp, 3 TSRMLS_CC);
1238 
1239 	if (Z_STRLEN(tmp) > 0) {
1240 		if (!(re = _php_mb_compile_regex(Z_STRVAL(tmp) TSRMLS_CC))) {
1241 			zval_dtor(&tmp);
1242 			return FAILURE;
1243 		}
1244 	}
1245 
1246 	if (MBSTRG(http_output_conv_mimetypes)) {
1247 		_php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1248 	}
1249 
1250 	MBSTRG(http_output_conv_mimetypes) = re;
1251 
1252 	zval_dtor(&tmp);
1253 	return SUCCESS;
1254 }
1255 /* }}} */
1256 /* }}} */
1257 
1258 /* {{{ php.ini directive registration */
1259 PHP_INI_BEGIN()
1260 	PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1261 	PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1262 	PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input)
1263 	PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output)
1264 	PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding)
1265 #ifdef ZEND_MULTIBYTE
1266 	PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding)
1267 #endif /* ZEND_MULTIBYTE */
1268 	PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1269 	STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1270 	PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1271 
1272 	STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1273 		PHP_INI_SYSTEM | PHP_INI_PERDIR,
1274 		OnUpdate_mbstring_encoding_translation,
1275 		encoding_translation, zend_mbstring_globals, mbstring_globals)
1276 	PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1277 		"^(text/|application/xhtml\\+xml)",
1278 		PHP_INI_ALL,
1279 		OnUpdate_mbstring_http_output_conv_mimetypes)
1280 
1281 	STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1282 		PHP_INI_ALL,
1283 		OnUpdateLong,
1284 		strict_detection, zend_mbstring_globals, mbstring_globals)
PHP_INI_END()1285 PHP_INI_END()
1286 /* }}} */
1287 
1288 /* {{{ module global initialize handler */
1289 static PHP_GINIT_FUNCTION(mbstring)
1290 {
1291 	mbstring_globals->language = mbfl_no_language_uni;
1292 	mbstring_globals->internal_encoding = mbfl_no_encoding_invalid;
1293 	mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1294 #ifdef ZEND_MULTIBYTE
1295 	mbstring_globals->script_encoding_list = NULL;
1296 	mbstring_globals->script_encoding_list_size = 0;
1297 #endif /* ZEND_MULTIBYTE */
1298 	mbstring_globals->http_output_encoding = mbfl_no_encoding_pass;
1299 	mbstring_globals->current_http_output_encoding = mbfl_no_encoding_pass;
1300 	mbstring_globals->http_input_identify = mbfl_no_encoding_invalid;
1301 	mbstring_globals->http_input_identify_get = mbfl_no_encoding_invalid;
1302 	mbstring_globals->http_input_identify_post = mbfl_no_encoding_invalid;
1303 	mbstring_globals->http_input_identify_cookie = mbfl_no_encoding_invalid;
1304 	mbstring_globals->http_input_identify_string = mbfl_no_encoding_invalid;
1305 	mbstring_globals->http_input_list = NULL;
1306 	mbstring_globals->http_input_list_size = 0;
1307 	mbstring_globals->detect_order_list = NULL;
1308 	mbstring_globals->detect_order_list_size = 0;
1309 	mbstring_globals->current_detect_order_list = NULL;
1310 	mbstring_globals->current_detect_order_list_size = 0;
1311 	mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1312 	mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1313 	mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1314 	mbstring_globals->filter_illegal_substchar = 0x3f;	/* '?' */
1315 	mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1316 	mbstring_globals->current_filter_illegal_substchar = 0x3f;	/* '?' */
1317 	mbstring_globals->illegalchars = 0;
1318 	mbstring_globals->func_overload = 0;
1319 	mbstring_globals->encoding_translation = 0;
1320 	mbstring_globals->strict_detection = 0;
1321 	mbstring_globals->outconv = NULL;
1322 	mbstring_globals->http_output_conv_mimetypes = NULL;
1323 #if HAVE_MBREGEX
1324 	mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(TSRMLS_C);
1325 #endif
1326 }
1327 /* }}} */
1328 
1329 /* {{{ PHP_GSHUTDOWN_FUNCTION */
PHP_GSHUTDOWN_FUNCTION(mbstring)1330 static PHP_GSHUTDOWN_FUNCTION(mbstring)
1331 {
1332 	if (mbstring_globals->http_input_list) {
1333 		free(mbstring_globals->http_input_list);
1334 	}
1335 #ifdef ZEND_MULTIBYTE
1336 	if (mbstring_globals->script_encoding_list) {
1337 		free(mbstring_globals->script_encoding_list);
1338 	}
1339 #endif /* ZEND_MULTIBYTE */
1340 	if (mbstring_globals->detect_order_list) {
1341 		free(mbstring_globals->detect_order_list);
1342 	}
1343 	if (mbstring_globals->http_output_conv_mimetypes) {
1344 		_php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1345 	}
1346 #if HAVE_MBREGEX
1347 	php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC);
1348 #endif
1349 }
1350 /* }}} */
1351 
1352 /* {{{ PHP_MINIT_FUNCTION(mbstring) */
PHP_MINIT_FUNCTION(mbstring)1353 PHP_MINIT_FUNCTION(mbstring)
1354 {
1355 	__mbfl_allocators = &_php_mb_allocators;
1356 
1357 	REGISTER_INI_ENTRIES();
1358 
1359 	/* This is a global handler. Should not be set in a per-request handler. */
1360 	sapi_register_treat_data(mbstr_treat_data);
1361 
1362 	/* Post handlers are stored in the thread-local context. */
1363 	if (MBSTRG(encoding_translation)) {
1364 		sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1365 	}
1366 
1367 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1368 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1369 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1370 
1371 	REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1372 	REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1373 	REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1374 
1375 #if HAVE_MBREGEX
1376 	PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1377 #endif
1378 	return SUCCESS;
1379 }
1380 /* }}} */
1381 
1382 /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
PHP_MSHUTDOWN_FUNCTION(mbstring)1383 PHP_MSHUTDOWN_FUNCTION(mbstring)
1384 {
1385 	UNREGISTER_INI_ENTRIES();
1386 
1387 #if HAVE_MBREGEX
1388 	PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1389 #endif
1390 
1391 	return SUCCESS;
1392 }
1393 /* }}} */
1394 
1395 /* {{{ PHP_RINIT_FUNCTION(mbstring) */
PHP_RINIT_FUNCTION(mbstring)1396 PHP_RINIT_FUNCTION(mbstring)
1397 {
1398 	int n;
1399 	enum mbfl_no_encoding *list=NULL, *entry;
1400 	zend_function *func, *orig;
1401 	const struct mb_overload_def *p;
1402 
1403 	MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1404 	MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1405 	MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1406 	MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1407 
1408 	MBSTRG(illegalchars) = 0;
1409 
1410 	n = 0;
1411 	if (MBSTRG(detect_order_list)) {
1412 		list = MBSTRG(detect_order_list);
1413 		n = MBSTRG(detect_order_list_size);
1414 	}
1415 	if (n <= 0) {
1416 		list = MBSTRG(default_detect_order_list);
1417 		n = MBSTRG(default_detect_order_list_size);
1418 	}
1419 	entry = (enum mbfl_no_encoding *)safe_emalloc(n, sizeof(int), 0);
1420 	MBSTRG(current_detect_order_list) = entry;
1421 	MBSTRG(current_detect_order_list_size) = n;
1422 	while (n > 0) {
1423 		*entry++ = *list++;
1424 		n--;
1425 	}
1426 
1427  	/* override original function. */
1428 	if (MBSTRG(func_overload)){
1429 		p = &(mb_ovld[0]);
1430 
1431 		while (p->type > 0) {
1432 			if ((MBSTRG(func_overload) & p->type) == p->type &&
1433 				zend_hash_find(EG(function_table), p->save_func,
1434 					strlen(p->save_func)+1, (void **)&orig) != SUCCESS) {
1435 
1436 				zend_hash_find(EG(function_table), p->ovld_func, strlen(p->ovld_func)+1 , (void **)&func);
1437 
1438 				if (zend_hash_find(EG(function_table), p->orig_func, strlen(p->orig_func)+1, (void **)&orig) != SUCCESS) {
1439 					php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1440 					return FAILURE;
1441 				} else {
1442 					zend_hash_add(EG(function_table), p->save_func, strlen(p->save_func)+1, orig, sizeof(zend_function), NULL);
1443 
1444 					if (zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, func, sizeof(zend_function),
1445 						NULL) == FAILURE) {
1446 						php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
1447 						return FAILURE;
1448 					}
1449 				}
1450 			}
1451 			p++;
1452 		}
1453 	}
1454 #if HAVE_MBREGEX
1455 	PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1456 #endif
1457 #ifdef ZEND_MULTIBYTE
1458 	zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) TSRMLS_CC);
1459 	php_mb_set_zend_encoding(TSRMLS_C);
1460 #endif /* ZEND_MULTIBYTE */
1461 
1462 	return SUCCESS;
1463 }
1464 /* }}} */
1465 
1466 /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
PHP_RSHUTDOWN_FUNCTION(mbstring)1467 PHP_RSHUTDOWN_FUNCTION(mbstring)
1468 {
1469 	const struct mb_overload_def *p;
1470 	zend_function *orig;
1471 
1472 	if (MBSTRG(current_detect_order_list) != NULL) {
1473 		efree(MBSTRG(current_detect_order_list));
1474 		MBSTRG(current_detect_order_list) = NULL;
1475 		MBSTRG(current_detect_order_list_size) = 0;
1476 	}
1477 	if (MBSTRG(outconv) != NULL) {
1478 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1479 		mbfl_buffer_converter_delete(MBSTRG(outconv));
1480 		MBSTRG(outconv) = NULL;
1481 	}
1482 
1483 	/* clear http input identification. */
1484 	MBSTRG(http_input_identify) = mbfl_no_encoding_invalid;
1485 	MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid;
1486 	MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid;
1487 	MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid;
1488 	MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid;
1489 
1490  	/*  clear overloaded function. */
1491 	if (MBSTRG(func_overload)){
1492 		p = &(mb_ovld[0]);
1493 		while (p->type > 0) {
1494 			if ((MBSTRG(func_overload) & p->type) == p->type &&
1495 				zend_hash_find(EG(function_table), p->save_func,
1496 							   strlen(p->save_func)+1, (void **)&orig) == SUCCESS) {
1497 
1498 				zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, orig, sizeof(zend_function), NULL);
1499 				zend_hash_del(EG(function_table), p->save_func, strlen(p->save_func)+1);
1500 			}
1501 			p++;
1502 		}
1503 	}
1504 
1505 #if HAVE_MBREGEX
1506 	PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1507 #endif
1508 
1509 	return SUCCESS;
1510 }
1511 /* }}} */
1512 
1513 /* {{{ PHP_MINFO_FUNCTION(mbstring) */
PHP_MINFO_FUNCTION(mbstring)1514 PHP_MINFO_FUNCTION(mbstring)
1515 {
1516 	php_info_print_table_start();
1517 	php_info_print_table_row(2, "Multibyte Support", "enabled");
1518 	php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1519 	php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1520 	php_info_print_table_end();
1521 
1522 	php_info_print_table_start();
1523 	php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1524 	php_info_print_table_end();
1525 
1526 #if HAVE_MBREGEX
1527 	PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1528 #endif
1529 
1530 	DISPLAY_INI_ENTRIES();
1531 }
1532 /* }}} */
1533 
1534 /* {{{ proto string mb_language([string language])
1535    Sets the current language or Returns the current language as a string */
PHP_FUNCTION(mb_language)1536 PHP_FUNCTION(mb_language)
1537 {
1538 	char *name = NULL;
1539 	int name_len = 0;
1540 
1541 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1542 		return;
1543 	}
1544 	if (name == NULL) {
1545 		RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)), 1);
1546 	} else {
1547 		if (FAILURE == zend_alter_ini_entry(
1548 				"mbstring.language", sizeof("mbstring.language"),
1549 				name, name_len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1550 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name);
1551 			RETVAL_FALSE;
1552 		} else {
1553 			RETVAL_TRUE;
1554 		}
1555 	}
1556 }
1557 /* }}} */
1558 
1559 /* {{{ proto string mb_internal_encoding([string encoding])
1560    Sets the current internal encoding or Returns the current internal encoding as a string */
PHP_FUNCTION(mb_internal_encoding)1561 PHP_FUNCTION(mb_internal_encoding)
1562 {
1563 	char *name = NULL;
1564 	int name_len;
1565 	enum mbfl_no_encoding no_encoding;
1566 
1567 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1568 		RETURN_FALSE;
1569 	}
1570 	if (name == NULL) {
1571 		name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
1572 		if (name != NULL) {
1573 			RETURN_STRING(name, 1);
1574 		} else {
1575 			RETURN_FALSE;
1576 		}
1577 	} else {
1578 		no_encoding = mbfl_name2no_encoding(name);
1579 		if (no_encoding == mbfl_no_encoding_invalid) {
1580 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1581 			RETURN_FALSE;
1582 		} else {
1583 			MBSTRG(current_internal_encoding) = no_encoding;
1584 #ifdef ZEND_MULTIBYTE
1585 			/* TODO: make independent from mbstring.encoding_translation? */
1586 			if (MBSTRG(encoding_translation)) {
1587 				zend_multibyte_set_internal_encoding(name TSRMLS_CC);
1588 			}
1589 #endif /* ZEND_MULTIBYTE */
1590 			RETURN_TRUE;
1591 		}
1592 	}
1593 }
1594 /* }}} */
1595 
1596 /* {{{ proto mixed mb_http_input([string type])
1597    Returns the input encoding */
PHP_FUNCTION(mb_http_input)1598 PHP_FUNCTION(mb_http_input)
1599 {
1600 	char *typ = NULL;
1601 	int typ_len;
1602 	int retname, n;
1603 	char *name, *list, *temp;
1604 	enum mbfl_no_encoding *entry;
1605 	enum mbfl_no_encoding result = mbfl_no_encoding_invalid;
1606 
1607 	retname = 1;
1608  	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
1609  		RETURN_FALSE;
1610  	}
1611  	if (typ == NULL) {
1612  		result = MBSTRG(http_input_identify);
1613  	} else {
1614  		switch (*typ) {
1615 		case 'G':
1616 		case 'g':
1617 			result = MBSTRG(http_input_identify_get);
1618 			break;
1619 		case 'P':
1620 		case 'p':
1621 			result = MBSTRG(http_input_identify_post);
1622 			break;
1623 		case 'C':
1624 		case 'c':
1625 			result = MBSTRG(http_input_identify_cookie);
1626 			break;
1627 		case 'S':
1628 		case 's':
1629 			result = MBSTRG(http_input_identify_string);
1630 			break;
1631 		case 'I':
1632 		case 'i':
1633 			array_init(return_value);
1634 			entry = MBSTRG(http_input_list);
1635 			n = MBSTRG(http_input_list_size);
1636 			while (n > 0) {
1637 				name = (char *)mbfl_no_encoding2name(*entry);
1638 				if (name) {
1639 					add_next_index_string(return_value, name, 1);
1640 				}
1641 				entry++;
1642 				n--;
1643 			}
1644 			retname = 0;
1645 			break;
1646 		case 'L':
1647 		case 'l':
1648 			entry = MBSTRG(http_input_list);
1649 			n = MBSTRG(http_input_list_size);
1650 			list = NULL;
1651 			while (n > 0) {
1652 				name = (char *)mbfl_no_encoding2name(*entry);
1653 				if (name) {
1654 					if (list) {
1655 						temp = list;
1656 						spprintf(&list, 0, "%s,%s", temp, name);
1657 						efree(temp);
1658 						if (!list) {
1659 							break;
1660 						}
1661 					} else {
1662 						list = estrdup(name);
1663 					}
1664 				}
1665 				entry++;
1666 				n--;
1667 			}
1668 			if (!list) {
1669 				RETURN_FALSE;
1670 			}
1671 			RETVAL_STRING(list, 0);
1672 			retname = 0;
1673 			break;
1674 		default:
1675 			result = MBSTRG(http_input_identify);
1676 			break;
1677 		}
1678 	}
1679 
1680 	if (retname) {
1681 		if (result != mbfl_no_encoding_invalid &&
1682 			(name = (char *)mbfl_no_encoding2name(result)) != NULL) {
1683 			RETVAL_STRING(name, 1);
1684 		} else {
1685 			RETVAL_FALSE;
1686 		}
1687 	}
1688 }
1689 /* }}} */
1690 
1691 /* {{{ proto string mb_http_output([string encoding])
1692    Sets the current output_encoding or returns the current output_encoding as a string */
PHP_FUNCTION(mb_http_output)1693 PHP_FUNCTION(mb_http_output)
1694 {
1695 	char *name = NULL;
1696 	int name_len;
1697 	enum mbfl_no_encoding no_encoding;
1698 
1699 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) {
1700 		RETURN_FALSE;
1701 	}
1702 
1703 	if (name == NULL) {
1704 		name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding));
1705 		if (name != NULL) {
1706 			RETURN_STRING(name, 1);
1707 		} else {
1708 			RETURN_FALSE;
1709 		}
1710 	} else {
1711 		no_encoding = mbfl_name2no_encoding(name);
1712 		if (no_encoding == mbfl_no_encoding_invalid) {
1713 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1714 			RETURN_FALSE;
1715 		} else {
1716 			MBSTRG(current_http_output_encoding) = no_encoding;
1717 			RETURN_TRUE;
1718 		}
1719 	}
1720 }
1721 /* }}} */
1722 
1723 /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1724    Sets the current detect_order or Return the current detect_order as a array */
PHP_FUNCTION(mb_detect_order)1725 PHP_FUNCTION(mb_detect_order)
1726 {
1727 	zval **arg1 = NULL;
1728 	int n, size;
1729 	enum mbfl_no_encoding *list, *entry;
1730 	char *name;
1731 
1732 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1733 		return;
1734 	}
1735 
1736 	if (!arg1) {
1737 		array_init(return_value);
1738 		entry = MBSTRG(current_detect_order_list);
1739 		n = MBSTRG(current_detect_order_list_size);
1740 		while (n > 0) {
1741 			name = (char *)mbfl_no_encoding2name(*entry);
1742 			if (name) {
1743 				add_next_index_string(return_value, name, 1);
1744 			}
1745 			entry++;
1746 			n--;
1747 		}
1748 	} else {
1749 		list = NULL;
1750 		size = 0;
1751 		switch (Z_TYPE_PP(arg1)) {
1752 		case IS_ARRAY:
1753 			if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
1754 				if (list) {
1755 					efree(list);
1756 				}
1757 				RETURN_FALSE;
1758 			}
1759 			break;
1760 		default:
1761 			convert_to_string_ex(arg1);
1762 			if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
1763 				if (list) {
1764 					efree(list);
1765 				}
1766 				RETURN_FALSE;
1767 			}
1768 			break;
1769 		}
1770 
1771 		if (list == NULL) {
1772 			RETURN_FALSE;
1773 		}
1774 
1775 		if (MBSTRG(current_detect_order_list)) {
1776 			efree(MBSTRG(current_detect_order_list));
1777 		}
1778 		MBSTRG(current_detect_order_list) = list;
1779 		MBSTRG(current_detect_order_list_size) = size;
1780 		RETURN_TRUE;
1781 	}
1782 }
1783 /* }}} */
1784 
1785 /* {{{ proto mixed mb_substitute_character([mixed substchar])
1786    Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)1787 PHP_FUNCTION(mb_substitute_character)
1788 {
1789 	zval **arg1 = NULL;
1790 
1791 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1792 		return;
1793 	}
1794 
1795 	if (!arg1) {
1796 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
1797 			RETURN_STRING("none", 1);
1798 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1799 			RETURN_STRING("long", 1);
1800 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1801 			RETURN_STRING("entity", 1);
1802 		} else {
1803 			RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1804 		}
1805 	} else {
1806 		RETVAL_TRUE;
1807 
1808 		switch (Z_TYPE_PP(arg1)) {
1809 		case IS_STRING:
1810 			if (strncasecmp("none", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1811 				MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1812 			} else if (strncasecmp("long", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1813 				MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1814 			} else if (strncasecmp("entity", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1815 				MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1816 			} else {
1817 				convert_to_long_ex(arg1);
1818 
1819 				if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
1820 					MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1821 					MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
1822 				} else {
1823 					php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
1824 					RETURN_FALSE;
1825 				}
1826 			}
1827 			break;
1828 		default:
1829 			convert_to_long_ex(arg1);
1830 			if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
1831 				MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1832 				MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
1833 			} else {
1834 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
1835 				RETURN_FALSE;
1836 			}
1837 			break;
1838 		}
1839 	}
1840 }
1841 /* }}} */
1842 
1843 /* {{{ proto string mb_preferred_mime_name(string encoding)
1844    Return the preferred MIME name (charset) as a string */
PHP_FUNCTION(mb_preferred_mime_name)1845 PHP_FUNCTION(mb_preferred_mime_name)
1846 {
1847 	enum mbfl_no_encoding no_encoding;
1848 	char *name = NULL;
1849 	int name_len;
1850 
1851 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
1852 		return;
1853 	} else {
1854 		no_encoding = mbfl_name2no_encoding(name);
1855 		if (no_encoding == mbfl_no_encoding_invalid) {
1856 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1857 			RETVAL_FALSE;
1858 		} else {
1859 			const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
1860 			if (preferred_name == NULL || *preferred_name == '\0') {
1861 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
1862 				RETVAL_FALSE;
1863 			} else {
1864 				RETVAL_STRING((char *)preferred_name, 1);
1865 			}
1866 		}
1867 	}
1868 }
1869 /* }}} */
1870 
1871 #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
1872 #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
1873 
1874 /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
1875    Parses GET/POST/COOKIE data and sets global variables */
PHP_FUNCTION(mb_parse_str)1876 PHP_FUNCTION(mb_parse_str)
1877 {
1878 	zval *track_vars_array = NULL;
1879 	char *encstr = NULL;
1880 	int encstr_len;
1881 	php_mb_encoding_handler_info_t info;
1882 	enum mbfl_no_encoding detected;
1883 
1884 	track_vars_array = NULL;
1885 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
1886 		return;
1887 	}
1888 
1889 	/* Clear out the array */
1890 	if (track_vars_array != NULL) {
1891 		zval_dtor(track_vars_array);
1892 		array_init(track_vars_array);
1893 	}
1894 
1895 	encstr = estrndup(encstr, encstr_len);
1896 
1897 	info.data_type              = PARSE_STRING;
1898 	info.separator              = PG(arg_separator).input;
1899 	info.force_register_globals = (track_vars_array == NULL);
1900 	info.report_errors          = 1;
1901 	info.to_encoding            = MBSTRG(current_internal_encoding);
1902 	info.to_language            = MBSTRG(language);
1903 	info.from_encodings         = MBSTRG(http_input_list);
1904 	info.num_from_encodings     = MBSTRG(http_input_list_size);
1905 	info.from_language          = MBSTRG(language);
1906 
1907 	detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
1908 
1909 	MBSTRG(http_input_identify) = detected;
1910 
1911 	RETVAL_BOOL(detected != mbfl_no_encoding_invalid);
1912 
1913 	if (encstr != NULL) efree(encstr);
1914 }
1915 /* }}} */
1916 
1917 /* {{{ proto string mb_output_handler(string contents, int status)
1918    Returns string in output buffer converted to the http_output encoding */
PHP_FUNCTION(mb_output_handler)1919 PHP_FUNCTION(mb_output_handler)
1920 {
1921 	char *arg_string;
1922 	int arg_string_len;
1923 	long arg_status;
1924 	mbfl_string string, result;
1925 	const char *charset;
1926 	char *p;
1927 	enum mbfl_no_encoding encoding;
1928 	int last_feed, len;
1929 	unsigned char send_text_mimetype = 0;
1930 	char *s, *mimetype = NULL;
1931 
1932 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
1933 		return;
1934 	}
1935 
1936 	encoding = MBSTRG(current_http_output_encoding);
1937 
1938  	/* start phase only */
1939  	if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
1940  		/* delete the converter just in case. */
1941  		if (MBSTRG(outconv)) {
1942 			MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1943  			mbfl_buffer_converter_delete(MBSTRG(outconv));
1944  			MBSTRG(outconv) = NULL;
1945   		}
1946 		if (encoding == mbfl_no_encoding_pass) {
1947 			RETURN_STRINGL(arg_string, arg_string_len, 1);
1948 		}
1949 
1950 		/* analyze mime type */
1951 		if (SG(sapi_headers).mimetype &&
1952 			_php_mb_match_regex(
1953 				MBSTRG(http_output_conv_mimetypes),
1954 				SG(sapi_headers).mimetype,
1955 				strlen(SG(sapi_headers).mimetype))) {
1956 			if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
1957 				mimetype = estrdup(SG(sapi_headers).mimetype);
1958 			} else {
1959 				mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
1960 			}
1961 			send_text_mimetype = 1;
1962 		} else if (SG(sapi_headers).send_default_content_type) {
1963 			mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
1964 		}
1965 
1966  		/* if content-type is not yet set, set it and activate the converter */
1967  		if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
1968 			charset = mbfl_no2preferred_mime_name(encoding);
1969 			if (charset) {
1970 				len = spprintf( &p, 0, "Content-Type: %s; charset=%s",  mimetype, charset );
1971 				if (sapi_add_header(p, len, 0) != FAILURE) {
1972 					SG(sapi_headers).send_default_content_type = 0;
1973 				}
1974 			}
1975  			/* activate the converter */
1976  			MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
1977 			if (send_text_mimetype){
1978 				efree(mimetype);
1979 			}
1980  		}
1981   	}
1982 
1983  	/* just return if the converter is not activated. */
1984  	if (MBSTRG(outconv) == NULL) {
1985 		RETURN_STRINGL(arg_string, arg_string_len, 1);
1986 	}
1987 
1988  	/* flag */
1989  	last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
1990  	/* mode */
1991  	mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
1992  	mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
1993 
1994  	/* feed the string */
1995  	mbfl_string_init(&string);
1996  	string.no_language = MBSTRG(language);
1997  	string.no_encoding = MBSTRG(current_internal_encoding);
1998  	string.val = (unsigned char *)arg_string;
1999  	string.len = arg_string_len;
2000  	mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2001  	if (last_feed) {
2002  		mbfl_buffer_converter_flush(MBSTRG(outconv));
2003 	}
2004  	/* get the converter output, and return it */
2005  	mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2006  	RETVAL_STRINGL((char *)result.val, result.len, 0);		/* the string is already strdup()'ed */
2007 
2008  	/* delete the converter if it is the last feed. */
2009  	if (last_feed) {
2010 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2011 		mbfl_buffer_converter_delete(MBSTRG(outconv));
2012 		MBSTRG(outconv) = NULL;
2013 	}
2014 }
2015 /* }}} */
2016 
2017 /* {{{ proto int mb_strlen(string str [, string encoding])
2018    Get character numbers of a string */
PHP_FUNCTION(mb_strlen)2019 PHP_FUNCTION(mb_strlen)
2020 {
2021 	int n;
2022 	mbfl_string string;
2023 	char *enc_name = NULL;
2024 	int enc_name_len;
2025 
2026 	mbfl_string_init(&string);
2027 
2028 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2029 		RETURN_FALSE;
2030 	}
2031 
2032 	string.no_language = MBSTRG(language);
2033 	if (enc_name == NULL) {
2034 		string.no_encoding = MBSTRG(current_internal_encoding);
2035 	} else {
2036 		string.no_encoding = mbfl_name2no_encoding(enc_name);
2037 		if (string.no_encoding == mbfl_no_encoding_invalid) {
2038 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2039 			RETURN_FALSE;
2040 		}
2041 	}
2042 
2043 	n = mbfl_strlen(&string);
2044 	if (n >= 0) {
2045 		RETVAL_LONG(n);
2046 	} else {
2047 		RETVAL_FALSE;
2048 	}
2049 }
2050 /* }}} */
2051 
2052 /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2053    Find position of first occurrence of a string within another */
PHP_FUNCTION(mb_strpos)2054 PHP_FUNCTION(mb_strpos)
2055 {
2056 	int n, reverse = 0;
2057 	long offset;
2058 	mbfl_string haystack, needle;
2059 	char *enc_name = NULL;
2060 	int enc_name_len;
2061 
2062 	mbfl_string_init(&haystack);
2063 	mbfl_string_init(&needle);
2064 	haystack.no_language = MBSTRG(language);
2065 	haystack.no_encoding = MBSTRG(current_internal_encoding);
2066 	needle.no_language = MBSTRG(language);
2067 	needle.no_encoding = MBSTRG(current_internal_encoding);
2068 	offset = 0;
2069 
2070 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2071 		RETURN_FALSE;
2072 	}
2073 
2074 	if (enc_name != NULL) {
2075 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2076 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2077 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2078 			RETURN_FALSE;
2079 		}
2080 	}
2081 
2082 	if (offset < 0 || offset > mbfl_strlen(&haystack)) {
2083 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
2084 		RETURN_FALSE;
2085 	}
2086 	if (needle.len == 0) {
2087 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2088 		RETURN_FALSE;
2089 	}
2090 
2091 	n = mbfl_strpos(&haystack, &needle, offset, reverse);
2092 	if (n >= 0) {
2093 		RETVAL_LONG(n);
2094 	} else {
2095 		switch (-n) {
2096 		case 1:
2097 			break;
2098 		case 2:
2099 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length");
2100 			break;
2101 		case 4:
2102 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error");
2103 			break;
2104 		case 8:
2105 			php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty");
2106 			break;
2107 		default:
2108 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos");
2109 			break;
2110 		}
2111 		RETVAL_FALSE;
2112 	}
2113 }
2114 /* }}} */
2115 
2116 /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2117    Find position of last occurrence of a string within another */
PHP_FUNCTION(mb_strrpos)2118 PHP_FUNCTION(mb_strrpos)
2119 {
2120 	int n;
2121 	mbfl_string haystack, needle;
2122 	char *enc_name = NULL;
2123 	int enc_name_len;
2124 	zval **zoffset = NULL;
2125 	long offset = 0, str_flg;
2126 	char *enc_name2 = NULL;
2127 	int enc_name_len2;
2128 
2129 	mbfl_string_init(&haystack);
2130 	mbfl_string_init(&needle);
2131 	haystack.no_language = MBSTRG(language);
2132 	haystack.no_encoding = MBSTRG(current_internal_encoding);
2133 	needle.no_language = MBSTRG(language);
2134 	needle.no_encoding = MBSTRG(current_internal_encoding);
2135 
2136 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2137 		RETURN_FALSE;
2138 	}
2139 
2140 	if (zoffset) {
2141 		if (Z_TYPE_PP(zoffset) == IS_STRING) {
2142 			enc_name2     = Z_STRVAL_PP(zoffset);
2143 			enc_name_len2 = Z_STRLEN_PP(zoffset);
2144 			str_flg       = 1;
2145 
2146 			if (enc_name2 != NULL) {
2147 				switch (*enc_name2) {
2148 				case '0':
2149 				case '1':
2150 				case '2':
2151 				case '3':
2152 				case '4':
2153 				case '5':
2154 				case '6':
2155 				case '7':
2156 				case '8':
2157 				case '9':
2158 				case ' ':
2159 				case '-':
2160 				case '.':
2161 					break;
2162 				default :
2163 					str_flg = 0;
2164 					break;
2165 				}
2166 			}
2167 
2168 			if (str_flg) {
2169 				convert_to_long_ex(zoffset);
2170 				offset   = Z_LVAL_PP(zoffset);
2171 			} else {
2172 				enc_name     = enc_name2;
2173 				enc_name_len = enc_name_len2;
2174 			}
2175 		} else {
2176 			convert_to_long_ex(zoffset);
2177 			offset = Z_LVAL_PP(zoffset);
2178 		}
2179 	}
2180 
2181 	if (enc_name != NULL) {
2182 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2183 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2184 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2185 			RETURN_FALSE;
2186 		}
2187 	}
2188 
2189 	if (haystack.len <= 0) {
2190 		RETURN_FALSE;
2191 	}
2192 	if (needle.len <= 0) {
2193 		RETURN_FALSE;
2194 	}
2195 
2196 	{
2197 		int haystack_char_len = mbfl_strlen(&haystack);
2198 		if ((offset > 0 && offset > haystack_char_len) ||
2199 			(offset < 0 && -offset > haystack_char_len)) {
2200 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
2201 			RETURN_FALSE;
2202 		}
2203 	}
2204 
2205 	n = mbfl_strpos(&haystack, &needle, offset, 1);
2206 	if (n >= 0) {
2207 		RETVAL_LONG(n);
2208 	} else {
2209 		RETVAL_FALSE;
2210 	}
2211 }
2212 /* }}} */
2213 
2214 /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2215    Finds position of first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stripos)2216 PHP_FUNCTION(mb_stripos)
2217 {
2218 	int n;
2219 	long offset;
2220 	mbfl_string haystack, needle;
2221 	char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2222 	int from_encoding_len;
2223 	n = -1;
2224 	offset = 0;
2225 
2226 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2227 		RETURN_FALSE;
2228 	}
2229 	if (needle.len == 0) {
2230 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2231 		RETURN_FALSE;
2232 	}
2233 	n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2234 
2235 	if (n >= 0) {
2236 		RETVAL_LONG(n);
2237 	} else {
2238 		RETVAL_FALSE;
2239 	}
2240 }
2241 /* }}} */
2242 
2243 /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2244    Finds position of last occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_strripos)2245 PHP_FUNCTION(mb_strripos)
2246 {
2247 	int n;
2248 	long offset;
2249 	mbfl_string haystack, needle;
2250 	const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2251 	int from_encoding_len;
2252 	n = -1;
2253 	offset = 0;
2254 
2255 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2256 		RETURN_FALSE;
2257 	}
2258 
2259 	n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2260 
2261 	if (n >= 0) {
2262 		RETVAL_LONG(n);
2263 	} else {
2264 		RETVAL_FALSE;
2265 	}
2266 }
2267 /* }}} */
2268 
2269 /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2270    Finds first occurrence of a string within another */
PHP_FUNCTION(mb_strstr)2271 PHP_FUNCTION(mb_strstr)
2272 {
2273 	int n, len, mblen;
2274 	mbfl_string haystack, needle, result, *ret = NULL;
2275 	char *enc_name = NULL;
2276 	int enc_name_len;
2277 	zend_bool part = 0;
2278 
2279 	mbfl_string_init(&haystack);
2280 	mbfl_string_init(&needle);
2281 	haystack.no_language = MBSTRG(language);
2282 	haystack.no_encoding = MBSTRG(current_internal_encoding);
2283 	needle.no_language = MBSTRG(language);
2284 	needle.no_encoding = MBSTRG(current_internal_encoding);
2285 
2286 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2287 		RETURN_FALSE;
2288 	}
2289 
2290 	if (enc_name != NULL) {
2291 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2292 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2293 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2294 			RETURN_FALSE;
2295 		}
2296 	}
2297 
2298 	if (needle.len <= 0) {
2299 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2300 		RETURN_FALSE;
2301 	}
2302 	n = mbfl_strpos(&haystack, &needle, 0, 0);
2303 	if (n >= 0) {
2304 		mblen = mbfl_strlen(&haystack);
2305 		if (part) {
2306 			ret = mbfl_substr(&haystack, &result, 0, n);
2307 			if (ret != NULL) {
2308 				RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2309 			} else {
2310 				RETVAL_FALSE;
2311 			}
2312 		} else {
2313 			len = (mblen - n);
2314 			ret = mbfl_substr(&haystack, &result, n, len);
2315 			if (ret != NULL) {
2316 				RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2317 			} else {
2318 				RETVAL_FALSE;
2319 			}
2320 		}
2321 	} else {
2322 		RETVAL_FALSE;
2323 	}
2324 }
2325 /* }}} */
2326 
2327 /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2328    Finds the last occurrence of a character in a string within another */
PHP_FUNCTION(mb_strrchr)2329 PHP_FUNCTION(mb_strrchr)
2330 {
2331 	int n, len, mblen;
2332 	mbfl_string haystack, needle, result, *ret = NULL;
2333 	char *enc_name = NULL;
2334 	int enc_name_len;
2335 	zend_bool part = 0;
2336 
2337 	mbfl_string_init(&haystack);
2338 	mbfl_string_init(&needle);
2339 	haystack.no_language = MBSTRG(language);
2340 	haystack.no_encoding = MBSTRG(current_internal_encoding);
2341 	needle.no_language = MBSTRG(language);
2342 	needle.no_encoding = MBSTRG(current_internal_encoding);
2343 
2344 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2345 		RETURN_FALSE;
2346 	}
2347 
2348 	if (enc_name != NULL) {
2349 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2350 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2351 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2352 			RETURN_FALSE;
2353 		}
2354 	}
2355 
2356 	if (haystack.len <= 0) {
2357 		RETURN_FALSE;
2358 	}
2359 	if (needle.len <= 0) {
2360 		RETURN_FALSE;
2361 	}
2362 	n = mbfl_strpos(&haystack, &needle, 0, 1);
2363 	if (n >= 0) {
2364 		mblen = mbfl_strlen(&haystack);
2365 		if (part) {
2366 			ret = mbfl_substr(&haystack, &result, 0, n);
2367 			if (ret != NULL) {
2368 				RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2369 			} else {
2370 				RETVAL_FALSE;
2371 			}
2372 		} else {
2373 			len = (mblen - n);
2374 			ret = mbfl_substr(&haystack, &result, n, len);
2375 			if (ret != NULL) {
2376 				RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2377 			} else {
2378 				RETVAL_FALSE;
2379 			}
2380 		}
2381 	} else {
2382 		RETVAL_FALSE;
2383 	}
2384 }
2385 /* }}} */
2386 
2387 /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2388    Finds first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stristr)2389 PHP_FUNCTION(mb_stristr)
2390 {
2391 	zend_bool part = 0;
2392 	unsigned int from_encoding_len, len, mblen;
2393 	int n;
2394 	mbfl_string haystack, needle, result, *ret = NULL;
2395 	const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2396 	mbfl_string_init(&haystack);
2397 	mbfl_string_init(&needle);
2398 	haystack.no_language = MBSTRG(language);
2399 	haystack.no_encoding = MBSTRG(current_internal_encoding);
2400 	needle.no_language = MBSTRG(language);
2401 	needle.no_encoding = MBSTRG(current_internal_encoding);
2402 
2403 
2404 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2405 		RETURN_FALSE;
2406 	}
2407 
2408 	if (!needle.len) {
2409 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2410 		RETURN_FALSE;
2411 	}
2412 
2413 	haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2414 	if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2415 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2416 		RETURN_FALSE;
2417 	}
2418 
2419 	n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2420 
2421 	if (n <0) {
2422 		RETURN_FALSE;
2423 	}
2424 
2425 	mblen = mbfl_strlen(&haystack);
2426 
2427 	if (part) {
2428 		ret = mbfl_substr(&haystack, &result, 0, n);
2429 		if (ret != NULL) {
2430 			RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2431 		} else {
2432 			RETVAL_FALSE;
2433 		}
2434 	} else {
2435 		len = (mblen - n);
2436 		ret = mbfl_substr(&haystack, &result, n, len);
2437 		if (ret != NULL) {
2438 			RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2439 		} else {
2440 			RETVAL_FALSE;
2441 		}
2442 	}
2443 }
2444 /* }}} */
2445 
2446 /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2447    Finds the last occurrence of a character in a string within another, case insensitive */
PHP_FUNCTION(mb_strrichr)2448 PHP_FUNCTION(mb_strrichr)
2449 {
2450 	zend_bool part = 0;
2451 	int n, from_encoding_len, len, mblen;
2452 	mbfl_string haystack, needle, result, *ret = NULL;
2453 	char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2454 	mbfl_string_init(&haystack);
2455 	mbfl_string_init(&needle);
2456 	haystack.no_language = MBSTRG(language);
2457 	haystack.no_encoding = MBSTRG(current_internal_encoding);
2458 	needle.no_language = MBSTRG(language);
2459 	needle.no_encoding = MBSTRG(current_internal_encoding);
2460 
2461 
2462 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2463 		RETURN_FALSE;
2464 	}
2465 
2466 	haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2467 	if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2468 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2469 		RETURN_FALSE;
2470 	}
2471 
2472 	n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2473 
2474 	if (n <0) {
2475 		RETURN_FALSE;
2476 	}
2477 
2478 	mblen = mbfl_strlen(&haystack);
2479 
2480 	if (part) {
2481 		ret = mbfl_substr(&haystack, &result, 0, n);
2482 		if (ret != NULL) {
2483 			RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2484 		} else {
2485 			RETVAL_FALSE;
2486 		}
2487 	} else {
2488 		len = (mblen - n);
2489 		ret = mbfl_substr(&haystack, &result, n, len);
2490 		if (ret != NULL) {
2491 			RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2492 		} else {
2493 			RETVAL_FALSE;
2494 		}
2495 	}
2496 }
2497 /* }}} */
2498 
2499 /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2500    Count the number of substring occurrences */
PHP_FUNCTION(mb_substr_count)2501 PHP_FUNCTION(mb_substr_count)
2502 {
2503 	int n;
2504 	mbfl_string haystack, needle;
2505 	char *enc_name = NULL;
2506 	int enc_name_len;
2507 
2508 	mbfl_string_init(&haystack);
2509 	mbfl_string_init(&needle);
2510 	haystack.no_language = MBSTRG(language);
2511 	haystack.no_encoding = MBSTRG(current_internal_encoding);
2512 	needle.no_language = MBSTRG(language);
2513 	needle.no_encoding = MBSTRG(current_internal_encoding);
2514 
2515 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
2516 		return;
2517 	}
2518 
2519 	if (enc_name != NULL) {
2520 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2521 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2522 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2523 			RETURN_FALSE;
2524 		}
2525 	}
2526 
2527 	if (needle.len <= 0) {
2528 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring");
2529 		RETURN_FALSE;
2530 	}
2531 
2532 	n = mbfl_substr_count(&haystack, &needle);
2533 	if (n >= 0) {
2534 		RETVAL_LONG(n);
2535 	} else {
2536 		RETVAL_FALSE;
2537 	}
2538 }
2539 /* }}} */
2540 
2541 /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2542    Returns part of a string */
PHP_FUNCTION(mb_substr)2543 PHP_FUNCTION(mb_substr)
2544 {
2545 	size_t argc = ZEND_NUM_ARGS();
2546 	char *str, *encoding;
2547 	long from, len;
2548 	int mblen, str_len, encoding_len;
2549 	mbfl_string string, result, *ret;
2550 
2551 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", &str, &str_len, &from, &len, &encoding, &encoding_len) == FAILURE) {
2552 		return;
2553 	}
2554 
2555 	mbfl_string_init(&string);
2556 	string.no_language = MBSTRG(language);
2557 	string.no_encoding = MBSTRG(current_internal_encoding);
2558 
2559 	if (argc == 4) {
2560 		string.no_encoding = mbfl_name2no_encoding(encoding);
2561 		if (string.no_encoding == mbfl_no_encoding_invalid) {
2562 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2563 			RETURN_FALSE;
2564 		}
2565 	}
2566 
2567 	string.val = (unsigned char *)str;
2568 	string.len = str_len;
2569 
2570 	if (argc < 3) {
2571 		len = str_len;
2572 	}
2573 
2574 	/* measures length */
2575 	mblen = 0;
2576 	if (from < 0 || len < 0) {
2577 		mblen = mbfl_strlen(&string);
2578 	}
2579 
2580 	/* if "from" position is negative, count start position from the end
2581 	 * of the string
2582 	 */
2583 	if (from < 0) {
2584 		from = mblen + from;
2585 		if (from < 0) {
2586 			from = 0;
2587 		}
2588 	}
2589 
2590 	/* if "length" position is negative, set it to the length
2591 	 * needed to stop that many chars from the end of the string
2592 	 */
2593 	if (len < 0) {
2594 		len = (mblen - from) + len;
2595 		if (len < 0) {
2596 			len = 0;
2597 		}
2598 	}
2599 
2600 	if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2601 		&& (from >= mbfl_strlen(&string))) {
2602 		RETURN_FALSE;
2603 	}
2604 
2605 	ret = mbfl_substr(&string, &result, from, len);
2606 	if (NULL == ret) {
2607 		RETURN_FALSE;
2608 	}
2609 
2610 	RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2611 }
2612 /* }}} */
2613 
2614 /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
2615    Returns part of a string */
PHP_FUNCTION(mb_strcut)2616 PHP_FUNCTION(mb_strcut)
2617 {
2618 	size_t argc = ZEND_NUM_ARGS();
2619 	char *encoding;
2620 	long from, len;
2621 	int encoding_len;
2622 	mbfl_string string, result, *ret;
2623 
2624 	mbfl_string_init(&string);
2625 	string.no_language = MBSTRG(language);
2626 	string.no_encoding = MBSTRG(current_internal_encoding);
2627 
2628 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", (char **)&string.val, (int **)&string.len, &from, &len, &encoding, &encoding_len) == FAILURE) {
2629 		return;
2630 	}
2631 
2632 	if (argc == 4) {
2633 		string.no_encoding = mbfl_name2no_encoding(encoding);
2634 		if (string.no_encoding == mbfl_no_encoding_invalid) {
2635 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2636 			RETURN_FALSE;
2637 		}
2638 	}
2639 
2640 	if (argc < 3) {
2641 		len = string.len;
2642 	}
2643 
2644 	/* if "from" position is negative, count start position from the end
2645 	 * of the string
2646 	 */
2647 	if (from < 0) {
2648 		from = string.len + from;
2649 		if (from < 0) {
2650 			from = 0;
2651 		}
2652 	}
2653 
2654 	/* if "length" position is negative, set it to the length
2655 	 * needed to stop that many chars from the end of the string
2656 	 */
2657 	if (len < 0) {
2658 		len = (string.len - from) + len;
2659 		if (len < 0) {
2660 			len = 0;
2661 		}
2662 	}
2663 
2664 	if ((unsigned int)from > string.len) {
2665 		RETURN_FALSE;
2666 	}
2667 
2668 	ret = mbfl_strcut(&string, &result, from, len);
2669 	if (ret == NULL) {
2670 		RETURN_FALSE;
2671 	}
2672 
2673 	RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2674 }
2675 /* }}} */
2676 
2677 /* {{{ proto int mb_strwidth(string str [, string encoding])
2678    Gets terminal width of a string */
PHP_FUNCTION(mb_strwidth)2679 PHP_FUNCTION(mb_strwidth)
2680 {
2681 	int n;
2682 	mbfl_string string;
2683 	char *enc_name = NULL;
2684 	int enc_name_len;
2685 
2686 	mbfl_string_init(&string);
2687 
2688 	string.no_language = MBSTRG(language);
2689 	string.no_encoding = MBSTRG(current_internal_encoding);
2690 
2691 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2692 		return;
2693 	}
2694 
2695 	if (enc_name != NULL) {
2696 		string.no_encoding = mbfl_name2no_encoding(enc_name);
2697 		if (string.no_encoding == mbfl_no_encoding_invalid) {
2698 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2699 			RETURN_FALSE;
2700 		}
2701 	}
2702 
2703 	n = mbfl_strwidth(&string);
2704 	if (n >= 0) {
2705 		RETVAL_LONG(n);
2706 	} else {
2707 		RETVAL_FALSE;
2708 	}
2709 }
2710 /* }}} */
2711 
2712 /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
2713    Trim the string in terminal width */
PHP_FUNCTION(mb_strimwidth)2714 PHP_FUNCTION(mb_strimwidth)
2715 {
2716 	char *str, *trimmarker, *encoding;
2717 	long from, width;
2718 	int str_len, trimmarker_len, encoding_len;
2719 	mbfl_string string, result, marker, *ret;
2720 
2721 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
2722 		return;
2723 	}
2724 
2725 	mbfl_string_init(&string);
2726 	mbfl_string_init(&marker);
2727 	string.no_language = MBSTRG(language);
2728 	string.no_encoding = MBSTRG(current_internal_encoding);
2729 	marker.no_language = MBSTRG(language);
2730 	marker.no_encoding = MBSTRG(current_internal_encoding);
2731 	marker.val = NULL;
2732 	marker.len = 0;
2733 
2734 	if (ZEND_NUM_ARGS() == 5) {
2735 		string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
2736 		if (string.no_encoding == mbfl_no_encoding_invalid) {
2737 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2738 			RETURN_FALSE;
2739 		}
2740 	}
2741 
2742 	string.val = (unsigned char *)str;
2743 	string.len = str_len;
2744 
2745 	if (from < 0 || from > str_len) {
2746 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of range");
2747 		RETURN_FALSE;
2748 	}
2749 
2750 	if (width < 0) {
2751 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value");
2752 		RETURN_FALSE;
2753 	}
2754 
2755 	if (ZEND_NUM_ARGS() >= 4) {
2756 		marker.val = (unsigned char *)trimmarker;
2757 		marker.len = trimmarker_len;
2758 	}
2759 
2760 	ret = mbfl_strimwidth(&string, &marker, &result, from, width);
2761 
2762 	if (ret == NULL) {
2763 		RETURN_FALSE;
2764 	}
2765 
2766 	RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2767 }
2768 /* }}} */
2769 
2770 /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
php_mb_convert_encoding(const char * input,size_t length,const char * _to_encoding,const char * _from_encodings,size_t * output_len TSRMLS_DC)2771 MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC)
2772 {
2773 	mbfl_string string, result, *ret;
2774 	enum mbfl_no_encoding from_encoding, to_encoding;
2775 	mbfl_buffer_converter *convd;
2776 	int size, *list;
2777 	char *output=NULL;
2778 
2779 	if (output_len) {
2780 		*output_len = 0;
2781 	}
2782 	if (!input) {
2783 		return NULL;
2784 	}
2785 	/* new encoding */
2786 	if (_to_encoding && strlen(_to_encoding)) {
2787 		to_encoding = mbfl_name2no_encoding(_to_encoding);
2788 		if (to_encoding == mbfl_no_encoding_invalid) {
2789 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
2790 			return NULL;
2791 		}
2792 	} else {
2793 		to_encoding = MBSTRG(current_internal_encoding);
2794 	}
2795 
2796 	/* initialize string */
2797 	mbfl_string_init(&string);
2798 	mbfl_string_init(&result);
2799 	from_encoding = MBSTRG(current_internal_encoding);
2800 	string.no_encoding = from_encoding;
2801 	string.no_language = MBSTRG(language);
2802 	string.val = (unsigned char *)input;
2803 	string.len = length;
2804 
2805 	/* pre-conversion encoding */
2806 	if (_from_encodings) {
2807 		list = NULL;
2808 		size = 0;
2809 	    php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
2810 		if (size == 1) {
2811 			from_encoding = *list;
2812 			string.no_encoding = from_encoding;
2813 		} else if (size > 1) {
2814 			/* auto detect */
2815 			from_encoding = mbfl_identify_encoding_no(&string, list, size, MBSTRG(strict_detection));
2816 			if (from_encoding != mbfl_no_encoding_invalid) {
2817 				string.no_encoding = from_encoding;
2818 			} else {
2819 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
2820 				from_encoding = mbfl_no_encoding_pass;
2821 				to_encoding = from_encoding;
2822 				string.no_encoding = from_encoding;
2823 			}
2824 		} else {
2825 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
2826 		}
2827 		if (list != NULL) {
2828 			efree((void *)list);
2829 		}
2830 	}
2831 
2832 	/* initialize converter */
2833 	convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
2834 	if (convd == NULL) {
2835 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
2836 		return NULL;
2837 	}
2838 	mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
2839 	mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
2840 
2841 	/* do it */
2842 	ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
2843 	if (ret) {
2844 		if (output_len) {
2845 			*output_len = ret->len;
2846 		}
2847 		output = (char *)ret->val;
2848 	}
2849 
2850 	MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
2851 	mbfl_buffer_converter_delete(convd);
2852 	return output;
2853 }
2854 /* }}} */
2855 
2856 /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
2857    Returns converted string in desired encoding */
PHP_FUNCTION(mb_convert_encoding)2858 PHP_FUNCTION(mb_convert_encoding)
2859 {
2860 	char *arg_str, *arg_new;
2861 	int str_len, new_len;
2862 	zval *arg_old;
2863 	int i;
2864 	size_t size, l, n;
2865 	char *_from_encodings = NULL, *ret, *s_free = NULL;
2866 
2867 	zval **hash_entry;
2868 	HashTable *target_hash;
2869 
2870 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
2871 		return;
2872 	}
2873 
2874 	if (ZEND_NUM_ARGS() == 3) {
2875 		switch (Z_TYPE_P(arg_old)) {
2876 		case IS_ARRAY:
2877 			target_hash = Z_ARRVAL_P(arg_old);
2878 			zend_hash_internal_pointer_reset(target_hash);
2879 			i = zend_hash_num_elements(target_hash);
2880 			_from_encodings = NULL;
2881 
2882 			while (i > 0) {
2883 				if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
2884 					break;
2885 				}
2886 
2887 				convert_to_string_ex(hash_entry);
2888 
2889 				if ( _from_encodings) {
2890 					l = strlen(_from_encodings);
2891 					n = strlen(Z_STRVAL_PP(hash_entry));
2892 					_from_encodings = erealloc(_from_encodings, l+n+2);
2893 					strcpy(_from_encodings+l, ",");
2894 					strcpy(_from_encodings+l+1, Z_STRVAL_PP(hash_entry));
2895 				} else {
2896 					_from_encodings = estrdup(Z_STRVAL_PP(hash_entry));
2897 				}
2898 
2899 				zend_hash_move_forward(target_hash);
2900 				i--;
2901 			}
2902 
2903 			if (_from_encodings != NULL && !strlen(_from_encodings)) {
2904 				efree(_from_encodings);
2905 				_from_encodings = NULL;
2906 			}
2907 			s_free = _from_encodings;
2908 			break;
2909 		default:
2910 			convert_to_string(arg_old);
2911 			_from_encodings = Z_STRVAL_P(arg_old);
2912 			break;
2913 		}
2914 	}
2915 
2916 	/* new encoding */
2917 	ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size TSRMLS_CC);
2918 	if (ret != NULL) {
2919 		RETVAL_STRINGL(ret, size, 0);		/* the string is already strdup()'ed */
2920 	} else {
2921 		RETVAL_FALSE;
2922 	}
2923 
2924 	if ( s_free) {
2925 		efree(s_free);
2926 	}
2927 }
2928 /* }}} */
2929 
2930 /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
2931    Returns a case-folded version of sourcestring */
PHP_FUNCTION(mb_convert_case)2932 PHP_FUNCTION(mb_convert_case)
2933 {
2934 	char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2935 	int str_len, from_encoding_len;
2936 	long case_mode = 0;
2937 	char *newstr;
2938 	size_t ret_len;
2939 
2940 	RETVAL_FALSE;
2941 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len,
2942 				&case_mode, &from_encoding, &from_encoding_len) == FAILURE)
2943 		RETURN_FALSE;
2944 
2945 	newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
2946 
2947 	if (newstr) {
2948 		RETVAL_STRINGL(newstr, ret_len, 0);
2949 	}
2950 }
2951 /* }}} */
2952 
2953 /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
2954  *  Returns a uppercased version of sourcestring
2955  */
PHP_FUNCTION(mb_strtoupper)2956 PHP_FUNCTION(mb_strtoupper)
2957 {
2958 	char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2959 	int str_len, from_encoding_len;
2960 	char *newstr;
2961 	size_t ret_len;
2962 
2963 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
2964 				&from_encoding, &from_encoding_len) == FAILURE) {
2965 		return;
2966 	}
2967 	newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
2968 
2969 	if (newstr) {
2970 		RETURN_STRINGL(newstr, ret_len, 0);
2971 	}
2972 	RETURN_FALSE;
2973 }
2974 /* }}} */
2975 
2976 /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
2977  *  Returns a lowercased version of sourcestring
2978  */
PHP_FUNCTION(mb_strtolower)2979 PHP_FUNCTION(mb_strtolower)
2980 {
2981 	char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2982 	int str_len, from_encoding_len;
2983 	char *newstr;
2984 	size_t ret_len;
2985 
2986 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
2987 				&from_encoding, &from_encoding_len) == FAILURE) {
2988 		return;
2989 	}
2990 	newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
2991 
2992 	if (newstr) {
2993 		RETURN_STRINGL(newstr, ret_len, 0);
2994 	}
2995 	RETURN_FALSE;
2996 }
2997 /* }}} */
2998 
2999 /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3000    Encodings of the given string is returned (as a string) */
PHP_FUNCTION(mb_detect_encoding)3001 PHP_FUNCTION(mb_detect_encoding)
3002 {
3003 	char *str;
3004 	int str_len;
3005 	zend_bool strict=0;
3006 	zval *encoding_list;
3007 
3008 	mbfl_string string;
3009 	const char *ret;
3010 	enum mbfl_no_encoding *elist;
3011 	int size, *list;
3012 
3013 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3014 		return;
3015 	}
3016 
3017 	/* make encoding list */
3018 	list = NULL;
3019 	size = 0;
3020 	if (ZEND_NUM_ARGS() >= 2 && !ZVAL_IS_NULL(encoding_list)) {
3021 		switch (Z_TYPE_P(encoding_list)) {
3022 		case IS_ARRAY:
3023 			if (!php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) {
3024 				if (list) {
3025 					efree(list);
3026 					list = NULL;
3027 					size = 0;
3028 				}
3029 			}
3030 			break;
3031 		default:
3032 			convert_to_string(encoding_list);
3033 			if (!php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) {
3034 				if (list) {
3035 					efree(list);
3036 					list = NULL;
3037 					size = 0;
3038 				}
3039 			}
3040 			break;
3041 		}
3042 		if (size <= 0) {
3043 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument");
3044 		}
3045 	}
3046 
3047 	if (ZEND_NUM_ARGS() < 3) {
3048 		strict = (zend_bool)MBSTRG(strict_detection);
3049 	}
3050 
3051 	if (size > 0 && list != NULL) {
3052 		elist = list;
3053 	} else {
3054 		elist = MBSTRG(current_detect_order_list);
3055 		size = MBSTRG(current_detect_order_list_size);
3056 	}
3057 
3058 	mbfl_string_init(&string);
3059 	string.no_language = MBSTRG(language);
3060 	string.val = (unsigned char *)str;
3061 	string.len = str_len;
3062 	ret = mbfl_identify_encoding_name(&string, elist, size, strict);
3063 
3064 	if (list != NULL) {
3065 		efree((void *)list);
3066 	}
3067 
3068 	if (ret == NULL) {
3069 		RETURN_FALSE;
3070 	}
3071 
3072 	RETVAL_STRING((char *)ret, 1);
3073 }
3074 /* }}} */
3075 
3076 /* {{{ proto mixed mb_list_encodings()
3077    Returns an array of all supported entity encodings */
PHP_FUNCTION(mb_list_encodings)3078 PHP_FUNCTION(mb_list_encodings)
3079 {
3080 	const mbfl_encoding **encodings;
3081 	const mbfl_encoding *encoding;
3082 	int i;
3083 
3084 	array_init(return_value);
3085 	i = 0;
3086 	encodings = mbfl_get_supported_encodings();
3087 	while ((encoding = encodings[i++]) != NULL) {
3088 		add_next_index_string(return_value, (char *) encoding->name, 1);
3089 	}
3090 }
3091 /* }}} */
3092 
3093 /* {{{ proto array mb_encoding_aliases(string encoding)
3094    Returns an array of the aliases of a given encoding name */
PHP_FUNCTION(mb_encoding_aliases)3095 PHP_FUNCTION(mb_encoding_aliases)
3096 {
3097 	const mbfl_encoding *encoding;
3098 	char *name = NULL;
3099 	int name_len;
3100 
3101 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
3102 		RETURN_FALSE;
3103 	}
3104 
3105 	encoding = mbfl_name2encoding(name);
3106 	if (!encoding) {
3107 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
3108 		RETURN_FALSE;
3109 	}
3110 
3111 	array_init(return_value);
3112 	if (encoding->aliases != NULL) {
3113 		const char **alias;
3114 		for (alias = *encoding->aliases; *alias; ++alias) {
3115 			add_next_index_string(return_value, (char *)*alias, 1);
3116 		}
3117 	}
3118 }
3119 /* }}} */
3120 
3121 /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3122    Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
PHP_FUNCTION(mb_encode_mimeheader)3123 PHP_FUNCTION(mb_encode_mimeheader)
3124 {
3125 	enum mbfl_no_encoding charset, transenc;
3126 	mbfl_string  string, result, *ret;
3127 	char *charset_name = NULL;
3128 	int charset_name_len;
3129 	char *trans_enc_name = NULL;
3130 	int trans_enc_name_len;
3131 	char *linefeed = "\r\n";
3132 	int linefeed_len;
3133 	long indent = 0;
3134 
3135 	mbfl_string_init(&string);
3136 	string.no_language = MBSTRG(language);
3137 	string.no_encoding = MBSTRG(current_internal_encoding);
3138 
3139 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3140 		return;
3141 	}
3142 
3143 	charset = mbfl_no_encoding_pass;
3144 	transenc = mbfl_no_encoding_base64;
3145 
3146 	if (charset_name != NULL) {
3147 		charset = mbfl_name2no_encoding(charset_name);
3148 		if (charset == mbfl_no_encoding_invalid) {
3149 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3150 			RETURN_FALSE;
3151 		}
3152 	} else {
3153 		const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3154 		if (lang != NULL) {
3155 			charset = lang->mail_charset;
3156 			transenc = lang->mail_header_encoding;
3157 		}
3158 	}
3159 
3160 	if (trans_enc_name != NULL) {
3161 		if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3162 			transenc = mbfl_no_encoding_base64;
3163 		} else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3164 			transenc = mbfl_no_encoding_qprint;
3165 		}
3166 	}
3167 
3168 	mbfl_string_init(&result);
3169 	ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3170 	if (ret != NULL) {
3171 		RETVAL_STRINGL((char *)ret->val, ret->len, 0)	/* the string is already strdup()'ed */
3172 	} else {
3173 		RETVAL_FALSE;
3174 	}
3175 }
3176 /* }}} */
3177 
3178 /* {{{ proto string mb_decode_mimeheader(string string)
3179    Decodes the MIME "encoded-word" in the string */
PHP_FUNCTION(mb_decode_mimeheader)3180 PHP_FUNCTION(mb_decode_mimeheader)
3181 {
3182 	mbfl_string string, result, *ret;
3183 
3184 	mbfl_string_init(&string);
3185 	string.no_language = MBSTRG(language);
3186 	string.no_encoding = MBSTRG(current_internal_encoding);
3187 
3188 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
3189 		return;
3190 	}
3191 
3192 	mbfl_string_init(&result);
3193 	ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
3194 	if (ret != NULL) {
3195 		RETVAL_STRINGL((char *)ret->val, ret->len, 0)	/* the string is already strdup()'ed */
3196 	} else {
3197 		RETVAL_FALSE;
3198 	}
3199 }
3200 /* }}} */
3201 
3202 /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3203    Conversion between full-width character and half-width character (Japanese) */
PHP_FUNCTION(mb_convert_kana)3204 PHP_FUNCTION(mb_convert_kana)
3205 {
3206 	int opt, i;
3207 	mbfl_string string, result, *ret;
3208 	char *optstr = NULL;
3209 	int optstr_len;
3210 	char *encname = NULL;
3211 	int encname_len;
3212 
3213 	mbfl_string_init(&string);
3214 	string.no_language = MBSTRG(language);
3215 	string.no_encoding = MBSTRG(current_internal_encoding);
3216 
3217 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
3218 		return;
3219 	}
3220 
3221 	/* option */
3222 	if (optstr != NULL) {
3223 		char *p = optstr;
3224 		int n = optstr_len;
3225 		i = 0;
3226 		opt = 0;
3227 		while (i < n) {
3228 			i++;
3229 			switch (*p++) {
3230 			case 'A':
3231 				opt |= 0x1;
3232 				break;
3233 			case 'a':
3234 				opt |= 0x10;
3235 				break;
3236 			case 'R':
3237 				opt |= 0x2;
3238 				break;
3239 			case 'r':
3240 				opt |= 0x20;
3241 				break;
3242 			case 'N':
3243 				opt |= 0x4;
3244 				break;
3245 			case 'n':
3246 				opt |= 0x40;
3247 				break;
3248 			case 'S':
3249 				opt |= 0x8;
3250 				break;
3251 			case 's':
3252 				opt |= 0x80;
3253 				break;
3254 			case 'K':
3255 				opt |= 0x100;
3256 				break;
3257 			case 'k':
3258 				opt |= 0x1000;
3259 				break;
3260 			case 'H':
3261 				opt |= 0x200;
3262 				break;
3263 			case 'h':
3264 				opt |= 0x2000;
3265 				break;
3266 			case 'V':
3267 				opt |= 0x800;
3268 				break;
3269 			case 'C':
3270 				opt |= 0x10000;
3271 				break;
3272 			case 'c':
3273 				opt |= 0x20000;
3274 				break;
3275 			case 'M':
3276 				opt |= 0x100000;
3277 				break;
3278 			case 'm':
3279 				opt |= 0x200000;
3280 				break;
3281 			}
3282 		}
3283 	} else {
3284 		opt = 0x900;
3285 	}
3286 
3287 	/* encoding */
3288 	if (encname != NULL) {
3289 		string.no_encoding = mbfl_name2no_encoding(encname);
3290 		if (string.no_encoding == mbfl_no_encoding_invalid) {
3291 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname);
3292 			RETURN_FALSE;
3293 		}
3294 	}
3295 
3296 	ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3297 	if (ret != NULL) {
3298 		RETVAL_STRINGL((char *)ret->val, ret->len, 0);		/* the string is already strdup()'ed */
3299 	} else {
3300 		RETVAL_FALSE;
3301 	}
3302 }
3303 /* }}} */
3304 
3305 #define PHP_MBSTR_STACK_BLOCK_SIZE 32
3306 
3307 /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3308    Converts the string resource in variables to desired encoding */
PHP_FUNCTION(mb_convert_variables)3309 PHP_FUNCTION(mb_convert_variables)
3310 {
3311 	zval ***args, ***stack, **var, **hash_entry, **zfrom_enc;
3312 	HashTable *target_hash;
3313 	mbfl_string string, result, *ret;
3314 	enum mbfl_no_encoding from_encoding, to_encoding;
3315 	mbfl_encoding_detector *identd;
3316 	mbfl_buffer_converter *convd;
3317 	int n, to_enc_len, argc, stack_level, stack_max, elistsz;
3318 	enum mbfl_no_encoding *elist;
3319 	char *name, *to_enc;
3320 	void *ptmp;
3321 
3322 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3323 		return;
3324 	}
3325 
3326 	/* new encoding */
3327 	to_encoding = mbfl_name2no_encoding(to_enc);
3328 	if (to_encoding == mbfl_no_encoding_invalid) {
3329 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3330 		efree(args);
3331 		RETURN_FALSE;
3332 	}
3333 
3334 	/* initialize string */
3335 	mbfl_string_init(&string);
3336 	mbfl_string_init(&result);
3337 	from_encoding = MBSTRG(current_internal_encoding);
3338 	string.no_encoding = from_encoding;
3339 	string.no_language = MBSTRG(language);
3340 
3341 	/* pre-conversion encoding */
3342 	elist = NULL;
3343 	elistsz = 0;
3344 	switch (Z_TYPE_PP(zfrom_enc)) {
3345 	case IS_ARRAY:
3346 		php_mb_parse_encoding_array(*zfrom_enc, &elist, &elistsz, 0 TSRMLS_CC);
3347 		break;
3348 	default:
3349 		convert_to_string_ex(zfrom_enc);
3350 		php_mb_parse_encoding_list(Z_STRVAL_PP(zfrom_enc), Z_STRLEN_PP(zfrom_enc), &elist, &elistsz, 0 TSRMLS_CC);
3351 		break;
3352 	}
3353 	if (elistsz <= 0) {
3354 		from_encoding = mbfl_no_encoding_pass;
3355 	} else if (elistsz == 1) {
3356 		from_encoding = *elist;
3357 	} else {
3358 		/* auto detect */
3359 		from_encoding = mbfl_no_encoding_invalid;
3360 		stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3361 		stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3362 		stack_level = 0;
3363 		identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
3364 		if (identd != NULL) {
3365 			n = 0;
3366 			while (n < argc || stack_level > 0) {
3367 				if (stack_level <= 0) {
3368 					var = args[n++];
3369 					if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3370 						target_hash = HASH_OF(*var);
3371 						if (target_hash != NULL) {
3372 							zend_hash_internal_pointer_reset(target_hash);
3373 						}
3374 					}
3375 				} else {
3376 					stack_level--;
3377 					var = stack[stack_level];
3378 				}
3379 				if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3380 					target_hash = HASH_OF(*var);
3381 					if (target_hash != NULL) {
3382 						while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3383 							zend_hash_move_forward(target_hash);
3384 							if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3385 								if (stack_level >= stack_max) {
3386 									stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3387 									ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3388 									stack = (zval ***)ptmp;
3389 								}
3390 								stack[stack_level] = var;
3391 								stack_level++;
3392 								var = hash_entry;
3393 								target_hash = HASH_OF(*var);
3394 								if (target_hash != NULL) {
3395 									zend_hash_internal_pointer_reset(target_hash);
3396 									continue;
3397 								}
3398 							} else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3399 								string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3400 								string.len = Z_STRLEN_PP(hash_entry);
3401 								if (mbfl_encoding_detector_feed(identd, &string)) {
3402 									goto detect_end;		/* complete detecting */
3403 								}
3404 							}
3405 						}
3406 					}
3407 				} else if (Z_TYPE_PP(var) == IS_STRING) {
3408 					string.val = (unsigned char *)Z_STRVAL_PP(var);
3409 					string.len = Z_STRLEN_PP(var);
3410 					if (mbfl_encoding_detector_feed(identd, &string)) {
3411 						goto detect_end;		/* complete detecting */
3412 					}
3413 				}
3414 			}
3415 detect_end:
3416 			from_encoding = mbfl_encoding_detector_judge(identd);
3417 			mbfl_encoding_detector_delete(identd);
3418 		}
3419 		efree(stack);
3420 
3421 		if (from_encoding == mbfl_no_encoding_invalid) {
3422 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
3423 			from_encoding = mbfl_no_encoding_pass;
3424 		}
3425 	}
3426 	if (elist != NULL) {
3427 		efree((void *)elist);
3428 	}
3429 	/* create converter */
3430 	convd = NULL;
3431 	if (from_encoding != mbfl_no_encoding_pass) {
3432 		convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
3433 		if (convd == NULL) {
3434 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
3435 			RETURN_FALSE;
3436 		}
3437 		mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3438 		mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3439 	}
3440 
3441 	/* convert */
3442 	if (convd != NULL) {
3443 		stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3444 		stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3445 		stack_level = 0;
3446 		n = 0;
3447 		while (n < argc || stack_level > 0) {
3448 			if (stack_level <= 0) {
3449 				var = args[n++];
3450 				if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3451 					target_hash = HASH_OF(*var);
3452 					if (target_hash != NULL) {
3453 						zend_hash_internal_pointer_reset(target_hash);
3454 					}
3455 				}
3456 			} else {
3457 				stack_level--;
3458 				var = stack[stack_level];
3459 			}
3460 			if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3461 				target_hash = HASH_OF(*var);
3462 				if (target_hash != NULL) {
3463 					while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3464 						zend_hash_move_forward(target_hash);
3465 						if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3466 							if (stack_level >= stack_max) {
3467 								stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3468 								ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3469 								stack = (zval ***)ptmp;
3470 							}
3471 							stack[stack_level] = var;
3472 							stack_level++;
3473 							var = hash_entry;
3474 							SEPARATE_ZVAL(hash_entry);
3475 							target_hash = HASH_OF(*var);
3476 							if (target_hash != NULL) {
3477 								zend_hash_internal_pointer_reset(target_hash);
3478 								continue;
3479 							}
3480 						} else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3481 							string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3482 							string.len = Z_STRLEN_PP(hash_entry);
3483 							ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3484 							if (ret != NULL) {
3485 								if (Z_REFCOUNT_PP(hash_entry) > 1) {
3486 									Z_DELREF_PP(hash_entry);
3487 									MAKE_STD_ZVAL(*hash_entry);
3488 								} else {
3489 									zval_dtor(*hash_entry);
3490 								}
3491 							ZVAL_STRINGL(*hash_entry, (char *)ret->val, ret->len, 0);
3492 						}
3493 					}
3494 				}
3495 			}
3496 		} else if (Z_TYPE_PP(var) == IS_STRING) {
3497 			string.val = (unsigned char *)Z_STRVAL_PP(var);
3498 			string.len = Z_STRLEN_PP(var);
3499 			ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3500 			if (ret != NULL) {
3501 				zval_dtor(*var);
3502 				ZVAL_STRINGL(*var, (char *)ret->val, ret->len, 0);
3503 				}
3504 			}
3505 		}
3506 		efree(stack);
3507 
3508 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3509 		mbfl_buffer_converter_delete(convd);
3510 	}
3511 
3512 	efree(args);
3513 
3514 	name = (char *)mbfl_no_encoding2name(from_encoding);
3515 	if (name != NULL) {
3516 		RETURN_STRING(name, 1);
3517 	} else {
3518 		RETURN_FALSE;
3519 	}
3520 }
3521 /* }}} */
3522 
3523 /* {{{ HTML numeric entity */
3524 /* {{{ static void php_mb_numericentity_exec() */
3525 static void
php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS,int type)3526 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
3527 {
3528 	char *str, *encoding;
3529 	int str_len, encoding_len;
3530 	zval *zconvmap, **hash_entry;
3531 	HashTable *target_hash;
3532 	size_t argc = ZEND_NUM_ARGS();
3533 	int i, *convmap, *mapelm, mapsize=0;
3534 	mbfl_string string, result, *ret;
3535 	enum mbfl_no_encoding no_encoding;
3536 
3537 	if (zend_parse_parameters(argc TSRMLS_CC, "szs", &str, &str_len, &zconvmap, &encoding, &encoding_len) == FAILURE) {
3538 		return;
3539 	}
3540 
3541 	mbfl_string_init(&string);
3542 	string.no_language = MBSTRG(language);
3543 	string.no_encoding = MBSTRG(current_internal_encoding);
3544 	string.val = (unsigned char *)str;
3545 	string.len = str_len;
3546 
3547 	/* encoding */
3548 	if (argc == 3) {
3549 		no_encoding = mbfl_name2no_encoding(encoding);
3550 		if (no_encoding == mbfl_no_encoding_invalid) {
3551 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
3552 			RETURN_FALSE;
3553 		} else {
3554 			string.no_encoding = no_encoding;
3555 		}
3556 	}
3557 
3558 	/* conversion map */
3559 	convmap = NULL;
3560 	if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
3561 		target_hash = Z_ARRVAL_P(zconvmap);
3562 		zend_hash_internal_pointer_reset(target_hash);
3563 		i = zend_hash_num_elements(target_hash);
3564 		if (i > 0) {
3565 			convmap = (int *)safe_emalloc(i, sizeof(int), 0);
3566 			mapelm = convmap;
3567 			mapsize = 0;
3568 			while (i > 0) {
3569 				if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
3570 					break;
3571 				}
3572 				convert_to_long_ex(hash_entry);
3573 				*mapelm++ = Z_LVAL_PP(hash_entry);
3574 				mapsize++;
3575 				i--;
3576 				zend_hash_move_forward(target_hash);
3577 			}
3578 		}
3579 	}
3580 	if (convmap == NULL) {
3581 		RETURN_FALSE;
3582 	}
3583 	mapsize /= 4;
3584 
3585 	ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
3586 	if (ret != NULL) {
3587 		RETVAL_STRINGL((char *)ret->val, ret->len, 0);
3588 	} else {
3589 		RETVAL_FALSE;
3590 	}
3591 	efree((void *)convmap);
3592 }
3593 /* }}} */
3594 
3595 /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding])
3596    Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)3597 PHP_FUNCTION(mb_encode_numericentity)
3598 {
3599 	php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
3600 }
3601 /* }}} */
3602 
3603 /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
3604    Converts HTML numeric entities to character code */
PHP_FUNCTION(mb_decode_numericentity)3605 PHP_FUNCTION(mb_decode_numericentity)
3606 {
3607 	php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
3608 }
3609 /* }}} */
3610 /* }}} */
3611 
3612 /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
3613  *  Sends an email message with MIME scheme
3614  */
3615 
3616 #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos)										\
3617 	if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) {	\
3618 		pos += 2;											\
3619 		while (str[pos + 1] == ' ' || str[pos + 1] == '\t') {							\
3620 			pos++;											\
3621 		}												\
3622 		continue;											\
3623 	}
3624 
3625 #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len)			\
3626 	pp = str;					\
3627 	ee = pp + len;					\
3628 	while ((pp = memchr(pp, '\0', (ee - pp)))) {	\
3629 		*pp = ' ';				\
3630 	}						\
3631 
3632 #define APPEND_ONE_CHAR(ch) do { \
3633 	if (token.a > 0) { \
3634 		smart_str_appendc(&token, ch); \
3635 	} else {\
3636 		token.len++; \
3637 	} \
3638 } while (0)
3639 
3640 #define SEPARATE_SMART_STR(str) do {\
3641 	if ((str)->a == 0) { \
3642 		char *tmp_ptr; \
3643 		(str)->a = 1; \
3644 		while ((str)->a < (str)->len) { \
3645 			(str)->a <<= 1; \
3646 		} \
3647 		tmp_ptr = emalloc((str)->a + 1); \
3648 		memcpy(tmp_ptr, (str)->c, (str)->len); \
3649 		(str)->c = tmp_ptr; \
3650 	} \
3651 } while (0)
3652 
my_smart_str_dtor(smart_str * s)3653 static void my_smart_str_dtor(smart_str *s)
3654 {
3655 	if (s->a > 0) {
3656 		smart_str_free(s);
3657 	}
3658 }
3659 
_php_mbstr_parse_mail_headers(HashTable * ht,const char * str,size_t str_len)3660 static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
3661 {
3662 	const char *ps;
3663 	size_t icnt;
3664 	int state = 0;
3665 	int crlf_state = -1;
3666 
3667 	smart_str token = { 0, 0, 0 };
3668 	smart_str fld_name = { 0, 0, 0 }, fld_val = { 0, 0, 0 };
3669 
3670 	ps = str;
3671 	icnt = str_len;
3672 
3673 	/*
3674 	 *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
3675 	 *             ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
3676 	 *      state  0            1           2          3
3677 	 *
3678 	 *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
3679 	 *             ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
3680 	 * crlf_state -1                       0                     1 -1
3681 	 *
3682 	 */
3683 
3684 	while (icnt > 0) {
3685 		switch (*ps) {
3686 			case ':':
3687 				if (crlf_state == 1) {
3688 					APPEND_ONE_CHAR('\r');
3689 				}
3690 
3691 				if (state == 0 || state == 1) {
3692 					fld_name = token;
3693 
3694 					state = 2;
3695 				} else {
3696 					APPEND_ONE_CHAR(*ps);
3697 				}
3698 
3699 				crlf_state = 0;
3700 				break;
3701 
3702 			case '\n':
3703 				if (crlf_state == -1) {
3704 					goto out;
3705 				}
3706 				crlf_state = -1;
3707 				break;
3708 
3709 			case '\r':
3710 				if (crlf_state == 1) {
3711 					APPEND_ONE_CHAR('\r');
3712 				} else {
3713 					crlf_state = 1;
3714 				}
3715 				break;
3716 
3717 			case ' ': case '\t':
3718 				if (crlf_state == -1) {
3719 					if (state == 3) {
3720 						/* continuing from the previous line */
3721 						SEPARATE_SMART_STR(&token);
3722 						state = 4;
3723 					} else {
3724 						/* simply skipping this new line */
3725 						state = 5;
3726 					}
3727 				} else {
3728 					if (crlf_state == 1) {
3729 						APPEND_ONE_CHAR('\r');
3730 					}
3731 					if (state == 1 || state == 3) {
3732 						APPEND_ONE_CHAR(*ps);
3733 					}
3734 				}
3735 				crlf_state = 0;
3736 				break;
3737 
3738 			default:
3739 				switch (state) {
3740 					case 0:
3741 						token.c = (char *)ps;
3742 						token.len = 0;
3743 						token.a = 0;
3744 						state = 1;
3745 						break;
3746 
3747 					case 2:
3748 						if (crlf_state != -1) {
3749 							token.c = (char *)ps;
3750 							token.len = 0;
3751 							token.a = 0;
3752 
3753 							state = 3;
3754 							break;
3755 						}
3756 						/* break is missing intentionally */
3757 
3758 					case 3:
3759 						if (crlf_state == -1) {
3760 							fld_val = token;
3761 
3762 							if (fld_name.c != NULL && fld_val.c != NULL) {
3763 								char *dummy;
3764 
3765 								/* FIXME: some locale free implementation is
3766 								 * really required here,,, */
3767 								SEPARATE_SMART_STR(&fld_name);
3768 								php_strtoupper(fld_name.c, fld_name.len);
3769 
3770 								zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
3771 
3772 								my_smart_str_dtor(&fld_name);
3773 							}
3774 
3775 							memset(&fld_name, 0, sizeof(smart_str));
3776 							memset(&fld_val, 0, sizeof(smart_str));
3777 
3778 							token.c = (char *)ps;
3779 							token.len = 0;
3780 							token.a = 0;
3781 
3782 							state = 1;
3783 						}
3784 						break;
3785 
3786 					case 4:
3787 						APPEND_ONE_CHAR(' ');
3788 						state = 3;
3789 						break;
3790 				}
3791 
3792 				if (crlf_state == 1) {
3793 					APPEND_ONE_CHAR('\r');
3794 				}
3795 
3796 				APPEND_ONE_CHAR(*ps);
3797 
3798 				crlf_state = 0;
3799 				break;
3800 		}
3801 		ps++, icnt--;
3802 	}
3803 out:
3804 	if (state == 2) {
3805 		token.c = "";
3806 		token.len = 0;
3807 		token.a = 0;
3808 
3809 		state = 3;
3810 	}
3811 	if (state == 3) {
3812 		fld_val = token;
3813 
3814 		if (fld_name.c != NULL && fld_val.c != NULL) {
3815 			void *dummy;
3816 
3817 			/* FIXME: some locale free implementation is
3818 			 * really required here,,, */
3819 			SEPARATE_SMART_STR(&fld_name);
3820 			php_strtoupper(fld_name.c, fld_name.len);
3821 
3822 			zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
3823 
3824 			my_smart_str_dtor(&fld_name);
3825 		}
3826 	}
3827 	return state;
3828 }
3829 
PHP_FUNCTION(mb_send_mail)3830 PHP_FUNCTION(mb_send_mail)
3831 {
3832 	int n;
3833 	char *to = NULL;
3834 	int to_len;
3835 	char *message = NULL;
3836 	int message_len;
3837 	char *headers = NULL;
3838 	int headers_len;
3839 	char *subject = NULL;
3840 	int subject_len;
3841 	char *extra_cmd = NULL;
3842 	int extra_cmd_len;
3843 	int i;
3844 	char *to_r = NULL;
3845 	char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
3846 	struct {
3847 		int cnt_type:1;
3848 		int cnt_trans_enc:1;
3849 	} suppressed_hdrs = { 0, 0 };
3850 
3851 	char *message_buf = NULL, *subject_buf = NULL, *p;
3852 	mbfl_string orig_str, conv_str;
3853 	mbfl_string *pstr;	/* pointer to mbfl string for return value */
3854 	enum mbfl_no_encoding
3855 	    tran_cs,	/* transfar text charset */
3856 	    head_enc,	/* header transfar encoding */
3857 	    body_enc;	/* body transfar encoding */
3858 	mbfl_memory_device device;	/* automatic allocateable buffer for additional header */
3859 	const mbfl_language *lang;
3860 	int err = 0;
3861 	HashTable ht_headers;
3862 	smart_str *s;
3863 	extern void mbfl_memory_device_unput(mbfl_memory_device *device);
3864 	char *pp, *ee;
3865 
3866 	if (PG(safe_mode) && (ZEND_NUM_ARGS() == 5)) {
3867 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "SAFE MODE Restriction in effect.  The fifth parameter is disabled in SAFE MODE.");
3868 		RETURN_FALSE;
3869 	}
3870 
3871 	/* initialize */
3872 	mbfl_memory_device_init(&device, 0, 0);
3873 	mbfl_string_init(&orig_str);
3874 	mbfl_string_init(&conv_str);
3875 
3876 	/* character-set, transfer-encoding */
3877 	tran_cs = mbfl_no_encoding_utf8;
3878 	head_enc = mbfl_no_encoding_base64;
3879 	body_enc = mbfl_no_encoding_base64;
3880 	lang = mbfl_no2language(MBSTRG(language));
3881 	if (lang != NULL) {
3882 		tran_cs = lang->mail_charset;
3883 		head_enc = lang->mail_header_encoding;
3884 		body_enc = lang->mail_body_encoding;
3885 	}
3886 
3887 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|ss", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd, &extra_cmd_len) == FAILURE) {
3888 		return;
3889 	}
3890 
3891 	/* ASCIIZ check */
3892 	MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
3893 	MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
3894 	MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
3895 	if (headers) {
3896 		MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
3897 	}
3898 	if (extra_cmd) {
3899 		MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd, extra_cmd_len);
3900 	}
3901 
3902 	zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0);
3903 
3904 	if (headers != NULL) {
3905 		_php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
3906 	}
3907 
3908 	if (zend_hash_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1, (void **)&s) == SUCCESS) {
3909 		char *tmp;
3910 		char *param_name;
3911 		char *charset = NULL;
3912 
3913 		SEPARATE_SMART_STR(s);
3914 		smart_str_0(s);
3915 
3916 		p = strchr(s->c, ';');
3917 
3918 		if (p != NULL) {
3919 			/* skipping the padded spaces */
3920 			do {
3921 				++p;
3922 			} while (*p == ' ' || *p == '\t');
3923 
3924 			if (*p != '\0') {
3925 				if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
3926 					if (strcasecmp(param_name, "charset") == 0) {
3927 						enum mbfl_no_encoding _tran_cs = tran_cs;
3928 
3929 						charset = php_strtok_r(NULL, "= \"", &tmp);
3930 						if (charset != NULL) {
3931 							_tran_cs = mbfl_name2no_encoding(charset);
3932 						}
3933 
3934 						if (_tran_cs == mbfl_no_encoding_invalid) {
3935 							php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
3936 							_tran_cs = mbfl_no_encoding_ascii;
3937 						}
3938 						tran_cs = _tran_cs;
3939 					}
3940 				}
3941 			}
3942 		}
3943 		suppressed_hdrs.cnt_type = 1;
3944 	}
3945 
3946 	if (zend_hash_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1, (void **)&s) == SUCCESS) {
3947 		enum mbfl_no_encoding _body_enc;
3948 		SEPARATE_SMART_STR(s);
3949 		smart_str_0(s);
3950 
3951 		_body_enc = mbfl_name2no_encoding(s->c);
3952 		switch (_body_enc) {
3953 			case mbfl_no_encoding_base64:
3954 			case mbfl_no_encoding_7bit:
3955 			case mbfl_no_encoding_8bit:
3956 				body_enc = _body_enc;
3957 				break;
3958 
3959 			default:
3960 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", s->c);
3961 				body_enc =	mbfl_no_encoding_8bit;
3962 				break;
3963 		}
3964 		suppressed_hdrs.cnt_trans_enc = 1;
3965 	}
3966 
3967 	/* To: */
3968 	if (to != NULL) {
3969         if (to_len > 0) {
3970             to_r = estrndup(to, to_len);
3971             for (; to_len; to_len--) {
3972                 if (!isspace((unsigned char) to_r[to_len - 1])) {
3973                     break;
3974                 }
3975                 to_r[to_len - 1] = '\0';
3976             }
3977             for (i = 0; to_r[i]; i++) {
3978 			if (iscntrl((unsigned char) to_r[i])) {
3979 				/* According to RFC 822, section 3.1.1 long headers may be separated into
3980 				 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
3981 				 * To prevent these separators from being replaced with a space, we use the
3982 				 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
3983 				 */
3984 				SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
3985 				to_r[i] = ' ';
3986 			}
3987             }
3988         } else {
3989             to_r = to;
3990         }
3991     } else {
3992 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
3993 		err = 1;
3994 	}
3995 
3996 	/* Subject: */
3997 	if (subject != NULL && subject_len >= 0) {
3998 		orig_str.no_language = MBSTRG(language);
3999 		orig_str.val = (unsigned char *)subject;
4000 		orig_str.len = subject_len;
4001 		orig_str.no_encoding = MBSTRG(current_internal_encoding);
4002 		if (orig_str.no_encoding == mbfl_no_encoding_invalid
4003 		    || orig_str.no_encoding == mbfl_no_encoding_pass) {
4004 			orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4005 		}
4006 		pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4007 		if (pstr != NULL) {
4008 			subject_buf = subject = (char *)pstr->val;
4009 		}
4010 	} else {
4011 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field");
4012 		err = 1;
4013 	}
4014 
4015 	/* message body */
4016 	if (message != NULL) {
4017 		orig_str.no_language = MBSTRG(language);
4018 		orig_str.val = (unsigned char *)message;
4019 		orig_str.len = (unsigned int)message_len;
4020 		orig_str.no_encoding = MBSTRG(current_internal_encoding);
4021 
4022 		if (orig_str.no_encoding == mbfl_no_encoding_invalid
4023 		    || orig_str.no_encoding == mbfl_no_encoding_pass) {
4024 			orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4025 		}
4026 
4027 		pstr = NULL;
4028 		{
4029 			mbfl_string tmpstr;
4030 
4031 			if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4032 				tmpstr.no_encoding=mbfl_no_encoding_8bit;
4033 				pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4034 				efree(tmpstr.val);
4035 			}
4036 		}
4037 		if (pstr != NULL) {
4038 			message_buf = message = (char *)pstr->val;
4039 		}
4040 	} else {
4041 		/* this is not really an error, so it is allowed. */
4042 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body");
4043 		message = NULL;
4044 	}
4045 
4046 	/* other headers */
4047 #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4048 #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4049 #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4050 #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4051 	if (headers != NULL) {
4052 		p = headers;
4053 		n = headers_len;
4054 		mbfl_memory_device_strncat(&device, p, n);
4055 		if (n > 0 && p[n - 1] != '\n') {
4056 			mbfl_memory_device_strncat(&device, "\n", 1);
4057 		}
4058 	}
4059 
4060 	if (!zend_hash_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4061 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4062 		mbfl_memory_device_strncat(&device, "\n", 1);
4063 	}
4064 
4065 	if (!suppressed_hdrs.cnt_type) {
4066 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4067 
4068 		p = (char *)mbfl_no2preferred_mime_name(tran_cs);
4069 		if (p != NULL) {
4070 			mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4071 			mbfl_memory_device_strcat(&device, p);
4072 		}
4073 		mbfl_memory_device_strncat(&device, "\n", 1);
4074 	}
4075 	if (!suppressed_hdrs.cnt_trans_enc) {
4076 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4077 		p = (char *)mbfl_no2preferred_mime_name(body_enc);
4078 		if (p == NULL) {
4079 			p = "7bit";
4080 		}
4081 		mbfl_memory_device_strcat(&device, p);
4082 		mbfl_memory_device_strncat(&device, "\n", 1);
4083 	}
4084 
4085 	mbfl_memory_device_unput(&device);
4086 	mbfl_memory_device_output('\0', &device);
4087 	headers = (char *)device.buffer;
4088 
4089 	if (force_extra_parameters) {
4090 		extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4091 	} else if (extra_cmd) {
4092 		extra_cmd = php_escape_shell_cmd(extra_cmd);
4093 	}
4094 
4095 	if (!err && php_mail(to_r, subject, message, headers, extra_cmd TSRMLS_CC)) {
4096 		RETVAL_TRUE;
4097 	} else {
4098 		RETVAL_FALSE;
4099 	}
4100 
4101 	if (extra_cmd) {
4102 		efree(extra_cmd);
4103 	}
4104 	if (to_r != to) {
4105 		efree(to_r);
4106 	}
4107 	if (subject_buf) {
4108 		efree((void *)subject_buf);
4109 	}
4110 	if (message_buf) {
4111 		efree((void *)message_buf);
4112 	}
4113 	mbfl_memory_device_clear(&device);
4114 	zend_hash_destroy(&ht_headers);
4115 }
4116 
4117 #undef SKIP_LONG_HEADER_SEP_MBSTRING
4118 #undef MAIL_ASCIIZ_CHECK_MBSTRING
4119 #undef APPEND_ONE_CHAR
4120 #undef SEPARATE_SMART_STR
4121 #undef PHP_MBSTR_MAIL_MIME_HEADER1
4122 #undef PHP_MBSTR_MAIL_MIME_HEADER2
4123 #undef PHP_MBSTR_MAIL_MIME_HEADER3
4124 #undef PHP_MBSTR_MAIL_MIME_HEADER4
4125 /* }}} */
4126 
4127 /* {{{ proto mixed mb_get_info([string type])
4128    Returns the current settings of mbstring */
PHP_FUNCTION(mb_get_info)4129 PHP_FUNCTION(mb_get_info)
4130 {
4131 	char *typ = NULL;
4132 	int typ_len, n;
4133 	char *name;
4134 	const struct mb_overload_def *over_func;
4135 	zval *row1, *row2;
4136 	const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4137 	enum mbfl_no_encoding *entry;
4138 #ifdef ZEND_MULTIBYTE
4139 	zval *row3;
4140 #endif /* ZEND_MULTIBYTE */
4141 
4142 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
4143 		RETURN_FALSE;
4144 	}
4145 
4146 	if (!typ || !strcasecmp("all", typ)) {
4147 		array_init(return_value);
4148 		if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
4149 			add_assoc_string(return_value, "internal_encoding", name, 1);
4150 		}
4151 		if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
4152 			add_assoc_string(return_value, "http_input", name, 1);
4153 		}
4154 		if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
4155 			add_assoc_string(return_value, "http_output", name, 1);
4156 		}
4157 		if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4158 			add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1);
4159 		}
4160 		add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4161 		if (MBSTRG(func_overload)){
4162 			over_func = &(mb_ovld[0]);
4163 			MAKE_STD_ZVAL(row1);
4164 			array_init(row1);
4165 			while (over_func->type > 0) {
4166 				if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4167 					add_assoc_string(row1, over_func->orig_func, over_func->ovld_func, 1);
4168 				}
4169 				over_func++;
4170 			}
4171 			add_assoc_zval(return_value, "func_overload_list", row1);
4172 		} else {
4173 			add_assoc_string(return_value, "func_overload_list", "no overload", 1);
4174  		}
4175 		if (lang != NULL) {
4176 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4177 				add_assoc_string(return_value, "mail_charset", name, 1);
4178 			}
4179 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4180 				add_assoc_string(return_value, "mail_header_encoding", name, 1);
4181 			}
4182 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4183 				add_assoc_string(return_value, "mail_body_encoding", name, 1);
4184 			}
4185 		}
4186 		add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4187 		if (MBSTRG(encoding_translation)) {
4188 			add_assoc_string(return_value, "encoding_translation", "On", 1);
4189 		} else {
4190 			add_assoc_string(return_value, "encoding_translation", "Off", 1);
4191 		}
4192 		if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4193 			add_assoc_string(return_value, "language", name, 1);
4194 		}
4195 		n = MBSTRG(current_detect_order_list_size);
4196 		entry = MBSTRG(current_detect_order_list);
4197 		if(n > 0) {
4198 			MAKE_STD_ZVAL(row2);
4199 			array_init(row2);
4200 			while (n > 0) {
4201 				if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) {
4202 					add_next_index_string(row2, name, 1);
4203 				}
4204 				entry++;
4205 				n--;
4206 			}
4207 			add_assoc_zval(return_value, "detect_order", row2);
4208 		}
4209 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4210 			add_assoc_string(return_value, "substitute_character", "none", 1);
4211 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4212 			add_assoc_string(return_value, "substitute_character", "long", 1);
4213 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4214 			add_assoc_string(return_value, "substitute_character", "entity", 1);
4215 		} else {
4216 			add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4217 		}
4218 		if (MBSTRG(strict_detection)) {
4219 			add_assoc_string(return_value, "strict_detection", "On", 1);
4220 		} else {
4221 			add_assoc_string(return_value, "strict_detection", "Off", 1);
4222 		}
4223 #ifdef ZEND_MULTIBYTE
4224 		entry = MBSTRG(script_encoding_list);
4225 		n = MBSTRG(script_encoding_list_size);
4226 		if(n > 0) {
4227 			MAKE_STD_ZVAL(row3);
4228 			array_init(row3);
4229 			while (n > 0) {
4230 				if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) {
4231 					add_next_index_string(row3, name, 1);
4232 				}
4233 				entry++;
4234 				n--;
4235 			}
4236 			add_assoc_zval(return_value, "script_encoding", row3);
4237 		}
4238 #endif /* ZEND_MULTIBYTE */
4239 	} else if (!strcasecmp("internal_encoding", typ)) {
4240 		if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
4241 			RETVAL_STRING(name, 1);
4242 		}
4243 	} else if (!strcasecmp("http_input", typ)) {
4244 		if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
4245 			RETVAL_STRING(name, 1);
4246 		}
4247 	} else if (!strcasecmp("http_output", typ)) {
4248 		if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
4249 			RETVAL_STRING(name, 1);
4250 		}
4251 	} else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4252 		if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4253 			RETVAL_STRING(name, 1);
4254 		}
4255 	} else if (!strcasecmp("func_overload", typ)) {
4256  		RETVAL_LONG(MBSTRG(func_overload));
4257 	} else if (!strcasecmp("func_overload_list", typ)) {
4258 		if (MBSTRG(func_overload)){
4259 				over_func = &(mb_ovld[0]);
4260 				array_init(return_value);
4261 				while (over_func->type > 0) {
4262 					if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4263 						add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func, 1);
4264 					}
4265 					over_func++;
4266 				}
4267 		} else {
4268 			RETVAL_STRING("no overload", 1);
4269 		}
4270 	} else if (!strcasecmp("mail_charset", typ)) {
4271 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4272 			RETVAL_STRING(name, 1);
4273 		}
4274 	} else if (!strcasecmp("mail_header_encoding", typ)) {
4275 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4276 			RETVAL_STRING(name, 1);
4277 		}
4278 	} else if (!strcasecmp("mail_body_encoding", typ)) {
4279 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4280 			RETVAL_STRING(name, 1);
4281 		}
4282 	} else if (!strcasecmp("illegal_chars", typ)) {
4283 		RETVAL_LONG(MBSTRG(illegalchars));
4284 	} else if (!strcasecmp("encoding_translation", typ)) {
4285 		if (MBSTRG(encoding_translation)) {
4286 			RETVAL_STRING("On", 1);
4287 		} else {
4288 			RETVAL_STRING("Off", 1);
4289 		}
4290 	} else if (!strcasecmp("language", typ)) {
4291 		if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4292 			RETVAL_STRING(name, 1);
4293 		}
4294 	} else if (!strcasecmp("detect_order", typ)) {
4295 		n = MBSTRG(current_detect_order_list_size);
4296 		entry = MBSTRG(current_detect_order_list);
4297 		if(n > 0) {
4298 			array_init(return_value);
4299 			while (n > 0) {
4300 				name = (char *)mbfl_no_encoding2name(*entry);
4301 				if (name) {
4302 					add_next_index_string(return_value, name, 1);
4303 				}
4304 				entry++;
4305 				n--;
4306 			}
4307 		}
4308 	} else if (!strcasecmp("substitute_character", typ)) {
4309 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4310 			RETVAL_STRING("none", 1);
4311 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4312 			RETVAL_STRING("long", 1);
4313 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4314 			RETVAL_STRING("entity", 1);
4315 		} else {
4316 			RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4317 		}
4318 	} else if (!strcasecmp("strict_detection", typ)) {
4319 		if (MBSTRG(strict_detection)) {
4320 			RETVAL_STRING("On", 1);
4321 		} else {
4322 			RETVAL_STRING("Off", 1);
4323 		}
4324 	} else {
4325 #ifdef ZEND_MULTIBYTE
4326 	if (!strcasecmp("script_encoding", typ)) {
4327 		entry = MBSTRG(script_encoding_list);
4328 		n = MBSTRG(script_encoding_list_size);
4329 		if(n > 0) {
4330 			array_init(return_value);
4331 			while (n > 0) {
4332 				name = (char *)mbfl_no_encoding2name(*entry);
4333 				if (name) {
4334 					add_next_index_string(return_value, name, 1);
4335 				}
4336 				entry++;
4337 				n--;
4338 			}
4339 		}
4340 		return;
4341 	}
4342 #endif /* ZEND_MULTIBYTE */
4343 		RETURN_FALSE;
4344 	}
4345 }
4346 /* }}} */
4347 
4348 /* {{{ proto bool mb_check_encoding([string var[, string encoding]])
4349    Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)4350 PHP_FUNCTION(mb_check_encoding)
4351 {
4352 	char *var = NULL;
4353 	int var_len;
4354 	char *enc = NULL;
4355 	int enc_len;
4356 	mbfl_buffer_converter *convd;
4357 	enum mbfl_no_encoding no_encoding = MBSTRG(current_internal_encoding);
4358 	mbfl_string string, result, *ret = NULL;
4359 	long illegalchars = 0;
4360 
4361 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
4362 		RETURN_FALSE;
4363 	}
4364 
4365 	if (var == NULL) {
4366 		RETURN_BOOL(MBSTRG(illegalchars) == 0);
4367 	}
4368 
4369 	if (enc != NULL) {
4370 		no_encoding = mbfl_name2no_encoding(enc);
4371 		if (no_encoding == mbfl_no_encoding_invalid || no_encoding == mbfl_no_encoding_pass) {
4372 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc);
4373 			RETURN_FALSE;
4374 		}
4375 	}
4376 
4377 	convd = mbfl_buffer_converter_new(no_encoding, no_encoding, 0);
4378 	if (convd == NULL) {
4379 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
4380 		RETURN_FALSE;
4381 	}
4382 	mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4383 	mbfl_buffer_converter_illegal_substchar(convd, 0);
4384 
4385 	/* initialize string */
4386 	mbfl_string_init_set(&string, mbfl_no_language_neutral, no_encoding);
4387 	mbfl_string_init(&result);
4388 
4389 	string.val = (unsigned char *)var;
4390 	string.len = var_len;
4391 	ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4392 	illegalchars = mbfl_buffer_illegalchars(convd);
4393 	mbfl_buffer_converter_delete(convd);
4394 
4395 	RETVAL_FALSE;
4396 	if (ret != NULL) {
4397 		if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4398 			RETVAL_TRUE;
4399 		}
4400 		mbfl_string_clear(&result);
4401 	}
4402 }
4403 /* }}} */
4404 
4405 /* {{{ MBSTRING_API int php_mb_encoding_translation() */
php_mb_encoding_translation(TSRMLS_D)4406 MBSTRING_API int php_mb_encoding_translation(TSRMLS_D)
4407 {
4408 	return MBSTRG(encoding_translation);
4409 }
4410 /* }}} */
4411 
4412 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
php_mb_mbchar_bytes_ex(const char * s,const mbfl_encoding * enc)4413 MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
4414 {
4415 	if (enc != NULL) {
4416 		if (enc->flag & MBFL_ENCTYPE_MBCS) {
4417 			if (enc->mblen_table != NULL) {
4418 				if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
4419 			}
4420 		} else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
4421 			return 2;
4422 		} else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
4423 			return 4;
4424 		}
4425 	}
4426 	return 1;
4427 }
4428 /* }}} */
4429 
4430 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
php_mb_mbchar_bytes(const char * s TSRMLS_DC)4431 MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
4432 {
4433 	return php_mb_mbchar_bytes_ex(s,
4434 		mbfl_no2encoding(MBSTRG(internal_encoding)));
4435 }
4436 /* }}} */
4437 
4438 /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
php_mb_safe_strrchr_ex(const char * s,unsigned int c,size_t nbytes,const mbfl_encoding * enc)4439 MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
4440 {
4441 	register const char *p = s;
4442 	char *last=NULL;
4443 
4444 	if (nbytes == (size_t)-1) {
4445 		size_t nb = 0;
4446 
4447 		while (*p != '\0') {
4448 			if (nb == 0) {
4449 				if ((unsigned char)*p == (unsigned char)c) {
4450 					last = (char *)p;
4451 				}
4452 				nb = php_mb_mbchar_bytes_ex(p, enc);
4453 				if (nb == 0) {
4454 					return NULL; /* something is going wrong! */
4455 				}
4456 			}
4457 			--nb;
4458 			++p;
4459 		}
4460 	} else {
4461 		register size_t bcnt = nbytes;
4462 		register size_t nbytes_char;
4463 		while (bcnt > 0) {
4464 			if ((unsigned char)*p == (unsigned char)c) {
4465 				last = (char *)p;
4466 			}
4467 			nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
4468 			if (bcnt < nbytes_char) {
4469 				return NULL;
4470 			}
4471 			p += nbytes_char;
4472 			bcnt -= nbytes_char;
4473 		}
4474 	}
4475 	return last;
4476 }
4477 /* }}} */
4478 
4479 /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
php_mb_safe_strrchr(const char * s,unsigned int c,size_t nbytes TSRMLS_DC)4480 MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
4481 {
4482 	return php_mb_safe_strrchr_ex(s, c, nbytes,
4483 		mbfl_no2encoding(MBSTRG(internal_encoding)));
4484 }
4485 /* }}} */
4486 
4487 /* {{{ MBSTRING_API char *php_mb_strrchr() */
php_mb_strrchr(const char * s,char c TSRMLS_DC)4488 MBSTRING_API char *php_mb_strrchr(const char *s, char c TSRMLS_DC)
4489 {
4490 	return php_mb_safe_strrchr(s, c, -1 TSRMLS_CC);
4491 }
4492 /* }}} */
4493 
4494 /* {{{ MBSTRING_API size_t php_mb_gpc_mbchar_bytes() */
php_mb_gpc_mbchar_bytes(const char * s TSRMLS_DC)4495 MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC)
4496 {
4497 
4498 	if (MBSTRG(http_input_identify) != mbfl_no_encoding_invalid){
4499 		return php_mb_mbchar_bytes_ex(s,
4500     		mbfl_no2encoding(MBSTRG(http_input_identify)));
4501 	} else {
4502 		return php_mb_mbchar_bytes_ex(s,
4503 	    	mbfl_no2encoding(MBSTRG(internal_encoding)));
4504 	}
4505 }
4506 /* }}} */
4507 
4508 /*	{{{ MBSTRING_API int php_mb_gpc_encoding_converter() */
php_mb_gpc_encoding_converter(char ** str,int * len,int num,const char * encoding_to,const char * encoding_from TSRMLS_DC)4509 MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from TSRMLS_DC)
4510 {
4511 	int i;
4512 	mbfl_string string, result, *ret = NULL;
4513 	enum mbfl_no_encoding from_encoding, to_encoding;
4514 	mbfl_buffer_converter *convd;
4515 
4516 	if (encoding_to) {
4517 		/* new encoding */
4518 		to_encoding = mbfl_name2no_encoding(encoding_to);
4519 		if (to_encoding == mbfl_no_encoding_invalid) {
4520 			return -1;
4521 		}
4522 	} else {
4523 		to_encoding = MBSTRG(current_internal_encoding);
4524 	}
4525 	if (encoding_from) {
4526 		/* old encoding */
4527 		from_encoding = mbfl_name2no_encoding(encoding_from);
4528 		if (from_encoding == mbfl_no_encoding_invalid) {
4529 			return -1;
4530 		}
4531 	} else {
4532 		from_encoding = MBSTRG(http_input_identify);
4533 	}
4534 
4535 	if (from_encoding == mbfl_no_encoding_pass) {
4536 		return 0;
4537 	}
4538 
4539 	/* initialize string */
4540 	mbfl_string_init(&string);
4541 	mbfl_string_init(&result);
4542 	string.no_encoding = from_encoding;
4543 	string.no_language = MBSTRG(language);
4544 
4545 	for (i=0; i<num; i++){
4546 		string.val = (unsigned char *)str[i];
4547 		string.len = len[i];
4548 
4549 		/* initialize converter */
4550 		convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
4551 		if (convd == NULL) {
4552 			return -1;
4553 		}
4554 		mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
4555 		mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
4556 
4557 		/* do it */
4558 		ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4559 		if (ret != NULL) {
4560 			efree(str[i]);
4561 			str[i] = (char *)ret->val;
4562 			len[i] = (int)ret->len;
4563 		}
4564 
4565 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
4566 		mbfl_buffer_converter_delete(convd);
4567 	}
4568 
4569 	return ret ? 0 : -1;
4570 }
4571 /* }}} */
4572 
4573 /* {{{ MBSTRING_API int php_mb_gpc_encoding_detector()
4574  */
php_mb_gpc_encoding_detector(char ** arg_string,int * arg_length,int num,char * arg_list TSRMLS_DC)4575 MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length, int num, char *arg_list TSRMLS_DC)
4576 {
4577 	mbfl_string string;
4578 	enum mbfl_no_encoding *elist;
4579 	enum mbfl_no_encoding encoding = mbfl_no_encoding_invalid;
4580 	mbfl_encoding_detector *identd = NULL;
4581 
4582 	int size;
4583 	enum mbfl_no_encoding *list;
4584 
4585 	if (MBSTRG(http_input_list_size) == 1 &&
4586 		MBSTRG(http_input_list)[0] == mbfl_no_encoding_pass) {
4587 		MBSTRG(http_input_identify) = mbfl_no_encoding_pass;
4588 		return SUCCESS;
4589 	}
4590 
4591 	if (MBSTRG(http_input_list_size) == 1 &&
4592 		MBSTRG(http_input_list)[0] != mbfl_no_encoding_auto &&
4593 		mbfl_no_encoding2name(MBSTRG(http_input_list)[0]) != NULL) {
4594 		MBSTRG(http_input_identify) = MBSTRG(http_input_list)[0];
4595 		return SUCCESS;
4596 	}
4597 
4598 	if (arg_list && strlen(arg_list)>0) {
4599 		/* make encoding list */
4600 		list = NULL;
4601 		size = 0;
4602 		php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
4603 
4604 		if (size > 0 && list != NULL) {
4605 			elist = list;
4606 		} else {
4607 			elist = MBSTRG(current_detect_order_list);
4608 			size = MBSTRG(current_detect_order_list_size);
4609 			if (size <= 0){
4610 				elist = MBSTRG(default_detect_order_list);
4611 				size = MBSTRG(default_detect_order_list_size);
4612 			}
4613 		}
4614 	} else {
4615 		elist = MBSTRG(current_detect_order_list);
4616 		size = MBSTRG(current_detect_order_list_size);
4617 		if (size <= 0){
4618 			elist = MBSTRG(default_detect_order_list);
4619 			size = MBSTRG(default_detect_order_list_size);
4620 		}
4621 	}
4622 
4623 	mbfl_string_init(&string);
4624 	string.no_language = MBSTRG(language);
4625 
4626 	identd = mbfl_encoding_detector_new(elist, size, MBSTRG(strict_detection));
4627 
4628 	if (identd) {
4629 		int n = 0;
4630 		while(n < num){
4631 			string.val = (unsigned char *)arg_string[n];
4632 			string.len = arg_length[n];
4633 			if (mbfl_encoding_detector_feed(identd, &string)) {
4634 				break;
4635 			}
4636 			n++;
4637 		}
4638 		encoding = mbfl_encoding_detector_judge(identd);
4639 		mbfl_encoding_detector_delete(identd);
4640 	}
4641 
4642 	if (encoding != mbfl_no_encoding_invalid) {
4643 		MBSTRG(http_input_identify) = encoding;
4644 		return SUCCESS;
4645 	} else {
4646 		return FAILURE;
4647 	}
4648 }
4649 /* }}} */
4650 
4651 /* {{{ MBSTRING_API int php_mb_stripos()
4652  */
php_mb_stripos(int mode,const char * old_haystack,unsigned int old_haystack_len,const char * old_needle,unsigned int old_needle_len,long offset,const char * from_encoding TSRMLS_DC)4653 MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC)
4654 {
4655 	int n;
4656 	mbfl_string haystack, needle;
4657 	n = -1;
4658 
4659 	mbfl_string_init(&haystack);
4660 	mbfl_string_init(&needle);
4661 	haystack.no_language = MBSTRG(language);
4662 	haystack.no_encoding = MBSTRG(current_internal_encoding);
4663 	needle.no_language = MBSTRG(language);
4664 	needle.no_encoding = MBSTRG(current_internal_encoding);
4665 
4666 	do {
4667 		size_t len = 0;
4668 		haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding TSRMLS_CC);
4669 		haystack.len = len;
4670 
4671 		if (!haystack.val) {
4672 			break;
4673 		}
4674 
4675 		if (haystack.len <= 0) {
4676 			break;
4677 		}
4678 
4679 		needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding TSRMLS_CC);
4680 		needle.len = len;
4681 
4682 		if (!needle.val) {
4683 			break;
4684 		}
4685 
4686 		if (needle.len <= 0) {
4687 			break;
4688 		}
4689 
4690 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
4691 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
4692 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
4693 			break;
4694 		}
4695 
4696  		{
4697  			int haystack_char_len = mbfl_strlen(&haystack);
4698 
4699  			if (mode) {
4700  				if ((offset > 0 && offset > haystack_char_len) ||
4701  					(offset < 0 && -offset > haystack_char_len)) {
4702  					php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
4703  					break;
4704  				}
4705  			} else {
4706  				if (offset < 0 || offset > haystack_char_len) {
4707  					php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
4708  					break;
4709  				}
4710  			}
4711 		}
4712 
4713 		n = mbfl_strpos(&haystack, &needle, offset, mode);
4714 	} while(0);
4715 
4716 	if (haystack.val) {
4717 		efree(haystack.val);
4718 	}
4719 
4720 	if (needle.val) {
4721 		efree(needle.val);
4722 	}
4723 
4724 	return n;
4725 }
4726 /* }}} */
4727 
4728 #ifdef ZEND_MULTIBYTE
4729 /* {{{ php_mb_set_zend_encoding() */
php_mb_set_zend_encoding(TSRMLS_D)4730 static int php_mb_set_zend_encoding(TSRMLS_D)
4731 {
4732 	/* 'd better use mbfl_memory_device? */
4733 	char *name, *list = NULL;
4734 	int n, *entry, list_size = 0;
4735 	zend_encoding_detector encoding_detector;
4736 	zend_encoding_converter encoding_converter;
4737 	zend_encoding_oddlen encoding_oddlen;
4738 
4739 	/* notify script encoding to Zend Engine */
4740 	entry = MBSTRG(script_encoding_list);
4741 	n = MBSTRG(script_encoding_list_size);
4742 	while (n > 0) {
4743 		name = (char *)mbfl_no_encoding2name(*entry);
4744 		if (name) {
4745 			list_size += strlen(name) + 1;
4746 			if (!list) {
4747 				list = (char*)emalloc(list_size);
4748 				*list = '\0';
4749 			} else {
4750 				list = (char*)erealloc(list, list_size);
4751 				strcat(list, ",");
4752 			}
4753 			strcat(list, name);
4754 		}
4755 		entry++;
4756 		n--;
4757 	}
4758 	zend_multibyte_set_script_encoding(list, (list ? strlen(list) : 0) TSRMLS_CC);
4759 	if (list) {
4760 		efree(list);
4761 	}
4762 	encoding_detector = php_mb_encoding_detector;
4763 	encoding_converter = php_mb_encoding_converter;
4764 	encoding_oddlen = php_mb_oddlen;
4765 
4766 	/* TODO: make independent from mbstring.encoding_translation? */
4767 	if (MBSTRG(encoding_translation)) {
4768 		/* notify internal encoding to Zend Engine */
4769 		name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
4770 		zend_multibyte_set_internal_encoding(name TSRMLS_CC);
4771 	}
4772 
4773 	zend_multibyte_set_functions(encoding_detector, encoding_converter, encoding_oddlen TSRMLS_CC);
4774 
4775 	return 0;
4776 }
4777 /* }}} */
4778 
4779 /* {{{ char *php_mb_encoding_detector()
4780  * Interface for Zend Engine
4781  */
php_mb_encoding_detector(const unsigned char * arg_string,size_t arg_length,char * arg_list TSRMLS_DC)4782 static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC)
4783 {
4784 	mbfl_string string;
4785 	const char *ret;
4786 	enum mbfl_no_encoding *elist;
4787 	int size, *list;
4788 
4789 	/* make encoding list */
4790 	list = NULL;
4791 	size = 0;
4792 	php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
4793 	if (size <= 0) {
4794 		return NULL;
4795 	}
4796 	if (size > 0 && list != NULL) {
4797 		elist = list;
4798 	} else {
4799 		elist = MBSTRG(current_detect_order_list);
4800 		size = MBSTRG(current_detect_order_list_size);
4801 	}
4802 
4803 	mbfl_string_init(&string);
4804 	string.no_language = MBSTRG(language);
4805 	string.val = (unsigned char *)arg_string;
4806 	string.len = arg_length;
4807 	ret = mbfl_identify_encoding_name(&string, elist, size, 0);
4808 	if (list != NULL) {
4809 		efree((void *)list);
4810 	}
4811 	if (ret != NULL) {
4812 		return estrdup(ret);
4813 	} else {
4814 		return NULL;
4815 	}
4816 }
4817 /* }}} */
4818 
4819 /*	{{{ int php_mb_encoding_converter() */
php_mb_encoding_converter(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length,const char * encoding_to,const char * encoding_from TSRMLS_DC)4820 static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC)
4821 {
4822 	mbfl_string string, result, *ret;
4823 	enum mbfl_no_encoding from_encoding, to_encoding;
4824 	mbfl_buffer_converter *convd;
4825 
4826 	/* new encoding */
4827 	to_encoding = mbfl_name2no_encoding(encoding_to);
4828 	if (to_encoding == mbfl_no_encoding_invalid) {
4829 		return -1;
4830 	}
4831 	/* old encoding */
4832 	from_encoding = mbfl_name2no_encoding(encoding_from);
4833 	if (from_encoding == mbfl_no_encoding_invalid) {
4834 		return -1;
4835 	}
4836 	/* initialize string */
4837 	mbfl_string_init(&string);
4838 	mbfl_string_init(&result);
4839 	string.no_encoding = from_encoding;
4840 	string.no_language = MBSTRG(language);
4841 	string.val = (unsigned char*)from;
4842 	string.len = from_length;
4843 
4844 	/* initialize converter */
4845 	convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
4846 	if (convd == NULL) {
4847 		return -1;
4848 	}
4849 	mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
4850 	mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
4851 
4852 	/* do it */
4853 	ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4854 	if (ret != NULL) {
4855 		*to = ret->val;
4856 		*to_length = ret->len;
4857 	}
4858 
4859 	MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
4860 	mbfl_buffer_converter_delete(convd);
4861 
4862 	return ret ? 0 : -1;
4863 }
4864 /* }}} */
4865 
4866 /* {{{ int php_mb_oddlen()
4867  *	returns number of odd (e.g. appears only first byte of multibyte
4868  *	character) chars
4869  */
php_mb_oddlen(const unsigned char * string,size_t length,const char * encoding TSRMLS_DC)4870 static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC)
4871 {
4872 	mbfl_string mb_string;
4873 
4874 	mbfl_string_init(&mb_string);
4875 	mb_string.no_language = MBSTRG(language);
4876 	mb_string.no_encoding = mbfl_name2no_encoding(encoding);
4877 	mb_string.val = (unsigned char *)string;
4878 	mb_string.len = length;
4879 
4880 	if (mb_string.no_encoding == mbfl_no_encoding_invalid) {
4881 		return 0;
4882 	}
4883 	return mbfl_oddlen(&mb_string);
4884 }
4885 /* }}} */
4886 #endif /* ZEND_MULTIBYTE */
4887 
4888 #endif	/* HAVE_MBSTRING */
4889 
4890 /*
4891  * Local variables:
4892  * tab-width: 4
4893  * c-basic-offset: 4
4894  * End:
4895  * vim600: fdm=marker
4896  * vim: noet sw=4 ts=4
4897  */
4898