xref: /PHP-5.5/ext/mbstring/mbstring.c (revision 73c1be26)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 5                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1997-2015 The PHP Group                                |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
16    |         Rui Hirokawa <hirokawa@php.net>                              |
17    +----------------------------------------------------------------------+
18  */
19 
20 /* $Id$ */
21 
22 /*
23  * PHP 4 Multibyte String module "mbstring"
24  *
25  * History:
26  *   2000.5.19  Release php-4.0RC2_jstring-1.0
27  *   2001.4.1   Release php4_jstring-1.0.91
28  *   2001.4.30  Release php4_jstring-1.1 (contribute to The PHP Group)
29  *   2001.5.1   Renamed from jstring to mbstring (hirokawa@php.net)
30  */
31 
32 /*
33  * PHP3 Internationalization support program.
34  *
35  * Copyright (c) 1999,2000 by the PHP3 internationalization team.
36  * All rights reserved.
37  *
38  * See README_PHP3-i18n-ja for more detail.
39  *
40  * Authors:
41  *    Hironori Sato <satoh@jpnnet.com>
42  *    Shigeru Kanemoto <sgk@happysize.co.jp>
43  *    Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
44  *    Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
45  */
46 
47 /* {{{ includes */
48 #ifdef HAVE_CONFIG_H
49 #include "config.h"
50 #endif
51 
52 #include "php.h"
53 #include "php_ini.h"
54 #include "php_variables.h"
55 #include "mbstring.h"
56 #include "ext/standard/php_string.h"
57 #include "ext/standard/php_mail.h"
58 #include "ext/standard/exec.h"
59 #include "ext/standard/php_smart_str.h"
60 #include "ext/standard/url.h"
61 #include "main/php_output.h"
62 #include "ext/standard/info.h"
63 
64 #include "libmbfl/mbfl/mbfl_allocators.h"
65 #include "libmbfl/mbfl/mbfilter_pass.h"
66 
67 #include "php_variables.h"
68 #include "php_globals.h"
69 #include "rfc1867.h"
70 #include "php_content_types.h"
71 #include "SAPI.h"
72 #include "php_unicode.h"
73 #include "TSRM.h"
74 
75 #include "mb_gpc.h"
76 
77 #if HAVE_MBREGEX
78 #include "php_mbregex.h"
79 #endif
80 
81 #include "zend_multibyte.h"
82 
83 #if HAVE_ONIG
84 #include "php_onig_compat.h"
85 #include <oniguruma.h>
86 #undef UChar
87 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
88 #include "ext/pcre/php_pcre.h"
89 #endif
90 /* }}} */
91 
92 #if HAVE_MBSTRING
93 
94 /* {{{ prototypes */
95 ZEND_DECLARE_MODULE_GLOBALS(mbstring)
96 
97 static PHP_GINIT_FUNCTION(mbstring);
98 static PHP_GSHUTDOWN_FUNCTION(mbstring);
99 
100 static void php_mb_populate_current_detect_order_list(TSRMLS_D);
101 
102 static int php_mb_encoding_translation(TSRMLS_D);
103 
104 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC);
105 
106 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC);
107 
108 /* }}} */
109 
110 /* {{{ php_mb_default_identify_list */
111 typedef struct _php_mb_nls_ident_list {
112 	enum mbfl_no_language lang;
113 	const enum mbfl_no_encoding *list;
114 	size_t list_size;
115 } php_mb_nls_ident_list;
116 
117 static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
118 	mbfl_no_encoding_ascii,
119 	mbfl_no_encoding_jis,
120 	mbfl_no_encoding_utf8,
121 	mbfl_no_encoding_euc_jp,
122 	mbfl_no_encoding_sjis
123 };
124 
125 static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
126 	mbfl_no_encoding_ascii,
127 	mbfl_no_encoding_utf8,
128 	mbfl_no_encoding_euc_cn,
129 	mbfl_no_encoding_cp936
130 };
131 
132 static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
133 	mbfl_no_encoding_ascii,
134 	mbfl_no_encoding_utf8,
135 	mbfl_no_encoding_euc_tw,
136 	mbfl_no_encoding_big5
137 };
138 
139 static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
140 	mbfl_no_encoding_ascii,
141 	mbfl_no_encoding_utf8,
142 	mbfl_no_encoding_euc_kr,
143 	mbfl_no_encoding_uhc
144 };
145 
146 static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
147 	mbfl_no_encoding_ascii,
148 	mbfl_no_encoding_utf8,
149 	mbfl_no_encoding_koi8r,
150 	mbfl_no_encoding_cp1251,
151 	mbfl_no_encoding_cp866
152 };
153 
154 static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
155 	mbfl_no_encoding_ascii,
156 	mbfl_no_encoding_utf8,
157 	mbfl_no_encoding_armscii8
158 };
159 
160 static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
161 	mbfl_no_encoding_ascii,
162 	mbfl_no_encoding_utf8,
163 	mbfl_no_encoding_cp1254,
164 	mbfl_no_encoding_8859_9
165 };
166 
167 static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
168 	mbfl_no_encoding_ascii,
169 	mbfl_no_encoding_utf8,
170 	mbfl_no_encoding_koi8u
171 };
172 
173 static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
174 	mbfl_no_encoding_ascii,
175 	mbfl_no_encoding_utf8
176 };
177 
178 
179 static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
180 	{ mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
181 	{ mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
182 	{ mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
183 	{ mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
184 	{ mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
185 	{ mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
186 	{ mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
187 	{ mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
188 	{ mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
189 };
190 
191 /* }}} */
192 
193 /* {{{ mb_overload_def mb_ovld[] */
194 static const struct mb_overload_def mb_ovld[] = {
195 	{MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
196 	{MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
197 	{MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
198 	{MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
199 	{MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
200 	{MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
201 	{MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
202 	{MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
203 	{MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
204 	{MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
205 	{MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
206 	{MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
207 	{MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
208 #if HAVE_MBREGEX
209 	{MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"},
210 	{MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"},
211 	{MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"},
212 	{MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"},
213 	{MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},
214 #endif
215 	{0, NULL, NULL, NULL}
216 };
217 /* }}} */
218 
219 /* {{{ arginfo */
220 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
221 	ZEND_ARG_INFO(0, language)
222 ZEND_END_ARG_INFO()
223 
224 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
225 	ZEND_ARG_INFO(0, encoding)
226 ZEND_END_ARG_INFO()
227 
228 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
229 	ZEND_ARG_INFO(0, type)
230 ZEND_END_ARG_INFO()
231 
232 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
233 	ZEND_ARG_INFO(0, encoding)
234 ZEND_END_ARG_INFO()
235 
236 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
237 	ZEND_ARG_INFO(0, encoding)
238 ZEND_END_ARG_INFO()
239 
240 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
241 	ZEND_ARG_INFO(0, substchar)
242 ZEND_END_ARG_INFO()
243 
244 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
245 	ZEND_ARG_INFO(0, encoding)
246 ZEND_END_ARG_INFO()
247 
248 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
249 	ZEND_ARG_INFO(0, encoded_string)
250 	ZEND_ARG_INFO(1, result)
251 ZEND_END_ARG_INFO()
252 
253 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
254 	ZEND_ARG_INFO(0, contents)
255 	ZEND_ARG_INFO(0, status)
256 ZEND_END_ARG_INFO()
257 
258 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
259 	ZEND_ARG_INFO(0, str)
260 	ZEND_ARG_INFO(0, encoding)
261 ZEND_END_ARG_INFO()
262 
263 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
264 	ZEND_ARG_INFO(0, haystack)
265 	ZEND_ARG_INFO(0, needle)
266 	ZEND_ARG_INFO(0, offset)
267 	ZEND_ARG_INFO(0, encoding)
268 ZEND_END_ARG_INFO()
269 
270 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
271 	ZEND_ARG_INFO(0, haystack)
272 	ZEND_ARG_INFO(0, needle)
273 	ZEND_ARG_INFO(0, offset)
274 	ZEND_ARG_INFO(0, encoding)
275 ZEND_END_ARG_INFO()
276 
277 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
278 	ZEND_ARG_INFO(0, haystack)
279 	ZEND_ARG_INFO(0, needle)
280 	ZEND_ARG_INFO(0, offset)
281 	ZEND_ARG_INFO(0, encoding)
282 ZEND_END_ARG_INFO()
283 
284 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
285 	ZEND_ARG_INFO(0, haystack)
286 	ZEND_ARG_INFO(0, needle)
287 	ZEND_ARG_INFO(0, offset)
288 	ZEND_ARG_INFO(0, encoding)
289 ZEND_END_ARG_INFO()
290 
291 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
292 	ZEND_ARG_INFO(0, haystack)
293 	ZEND_ARG_INFO(0, needle)
294 	ZEND_ARG_INFO(0, part)
295 	ZEND_ARG_INFO(0, encoding)
296 ZEND_END_ARG_INFO()
297 
298 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
299 	ZEND_ARG_INFO(0, haystack)
300 	ZEND_ARG_INFO(0, needle)
301 	ZEND_ARG_INFO(0, part)
302 	ZEND_ARG_INFO(0, encoding)
303 ZEND_END_ARG_INFO()
304 
305 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
306 	ZEND_ARG_INFO(0, haystack)
307 	ZEND_ARG_INFO(0, needle)
308 	ZEND_ARG_INFO(0, part)
309 	ZEND_ARG_INFO(0, encoding)
310 ZEND_END_ARG_INFO()
311 
312 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
313 	ZEND_ARG_INFO(0, haystack)
314 	ZEND_ARG_INFO(0, needle)
315 	ZEND_ARG_INFO(0, part)
316 	ZEND_ARG_INFO(0, encoding)
317 ZEND_END_ARG_INFO()
318 
319 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
320 	ZEND_ARG_INFO(0, haystack)
321 	ZEND_ARG_INFO(0, needle)
322 	ZEND_ARG_INFO(0, encoding)
323 ZEND_END_ARG_INFO()
324 
325 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
326 	ZEND_ARG_INFO(0, str)
327 	ZEND_ARG_INFO(0, start)
328 	ZEND_ARG_INFO(0, length)
329 	ZEND_ARG_INFO(0, encoding)
330 ZEND_END_ARG_INFO()
331 
332 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
333 	ZEND_ARG_INFO(0, str)
334 	ZEND_ARG_INFO(0, start)
335 	ZEND_ARG_INFO(0, length)
336 	ZEND_ARG_INFO(0, encoding)
337 ZEND_END_ARG_INFO()
338 
339 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
340 	ZEND_ARG_INFO(0, str)
341 	ZEND_ARG_INFO(0, encoding)
342 ZEND_END_ARG_INFO()
343 
344 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
345 	ZEND_ARG_INFO(0, str)
346 	ZEND_ARG_INFO(0, start)
347 	ZEND_ARG_INFO(0, width)
348 	ZEND_ARG_INFO(0, trimmarker)
349 	ZEND_ARG_INFO(0, encoding)
350 ZEND_END_ARG_INFO()
351 
352 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
353 	ZEND_ARG_INFO(0, str)
354 	ZEND_ARG_INFO(0, to)
355 	ZEND_ARG_INFO(0, from)
356 ZEND_END_ARG_INFO()
357 
358 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
359 	ZEND_ARG_INFO(0, sourcestring)
360 	ZEND_ARG_INFO(0, mode)
361 	ZEND_ARG_INFO(0, encoding)
362 ZEND_END_ARG_INFO()
363 
364 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
365 	ZEND_ARG_INFO(0, sourcestring)
366 	ZEND_ARG_INFO(0, encoding)
367 ZEND_END_ARG_INFO()
368 
369 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
370 	ZEND_ARG_INFO(0, sourcestring)
371 	ZEND_ARG_INFO(0, encoding)
372 ZEND_END_ARG_INFO()
373 
374 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
375 	ZEND_ARG_INFO(0, str)
376 	ZEND_ARG_INFO(0, encoding_list)
377 	ZEND_ARG_INFO(0, strict)
378 ZEND_END_ARG_INFO()
379 
380 ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
381 ZEND_END_ARG_INFO()
382 
383 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
384 	ZEND_ARG_INFO(0, encoding)
385 ZEND_END_ARG_INFO()
386 
387 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
388 	ZEND_ARG_INFO(0, str)
389 	ZEND_ARG_INFO(0, charset)
390 	ZEND_ARG_INFO(0, transfer)
391 	ZEND_ARG_INFO(0, linefeed)
392 	ZEND_ARG_INFO(0, indent)
393 ZEND_END_ARG_INFO()
394 
395 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
396 	ZEND_ARG_INFO(0, string)
397 ZEND_END_ARG_INFO()
398 
399 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
400 	ZEND_ARG_INFO(0, str)
401 	ZEND_ARG_INFO(0, option)
402 	ZEND_ARG_INFO(0, encoding)
403 ZEND_END_ARG_INFO()
404 
405 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 1, 0, 3)
406 	ZEND_ARG_INFO(0, to)
407 	ZEND_ARG_INFO(0, from)
408 	ZEND_ARG_INFO(1, ...)
409 ZEND_END_ARG_INFO()
410 
411 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
412 	ZEND_ARG_INFO(0, string)
413 	ZEND_ARG_INFO(0, convmap)
414 	ZEND_ARG_INFO(0, encoding)
415 	ZEND_ARG_INFO(0, is_hex)
416 ZEND_END_ARG_INFO()
417 
418 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
419 	ZEND_ARG_INFO(0, string)
420 	ZEND_ARG_INFO(0, convmap)
421 	ZEND_ARG_INFO(0, encoding)
422 ZEND_END_ARG_INFO()
423 
424 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
425 	ZEND_ARG_INFO(0, to)
426 	ZEND_ARG_INFO(0, subject)
427 	ZEND_ARG_INFO(0, message)
428 	ZEND_ARG_INFO(0, additional_headers)
429 	ZEND_ARG_INFO(0, additional_parameters)
430 ZEND_END_ARG_INFO()
431 
432 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
433 	ZEND_ARG_INFO(0, type)
434 ZEND_END_ARG_INFO()
435 
436 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
437 	ZEND_ARG_INFO(0, var)
438 	ZEND_ARG_INFO(0, encoding)
439 ZEND_END_ARG_INFO()
440 
441 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
442 	ZEND_ARG_INFO(0, encoding)
443 ZEND_END_ARG_INFO()
444 
445 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
446 	ZEND_ARG_INFO(0, pattern)
447 	ZEND_ARG_INFO(0, string)
448 	ZEND_ARG_INFO(1, registers)
449 ZEND_END_ARG_INFO()
450 
451 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
452 	ZEND_ARG_INFO(0, pattern)
453 	ZEND_ARG_INFO(0, string)
454 	ZEND_ARG_INFO(1, registers)
455 ZEND_END_ARG_INFO()
456 
457 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
458 	ZEND_ARG_INFO(0, pattern)
459 	ZEND_ARG_INFO(0, replacement)
460 	ZEND_ARG_INFO(0, string)
461 	ZEND_ARG_INFO(0, option)
462 ZEND_END_ARG_INFO()
463 
464 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
465 	ZEND_ARG_INFO(0, pattern)
466 	ZEND_ARG_INFO(0, replacement)
467 	ZEND_ARG_INFO(0, string)
468 ZEND_END_ARG_INFO()
469 
470 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3)
471 	ZEND_ARG_INFO(0, pattern)
472 	ZEND_ARG_INFO(0, callback)
473 	ZEND_ARG_INFO(0, string)
474 	ZEND_ARG_INFO(0, option)
475 ZEND_END_ARG_INFO()
476 
477 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
478 	ZEND_ARG_INFO(0, pattern)
479 	ZEND_ARG_INFO(0, string)
480 	ZEND_ARG_INFO(0, limit)
481 ZEND_END_ARG_INFO()
482 
483 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
484 	ZEND_ARG_INFO(0, pattern)
485 	ZEND_ARG_INFO(0, string)
486 	ZEND_ARG_INFO(0, option)
487 ZEND_END_ARG_INFO()
488 
489 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
490 	ZEND_ARG_INFO(0, pattern)
491 	ZEND_ARG_INFO(0, option)
492 ZEND_END_ARG_INFO()
493 
494 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
495 	ZEND_ARG_INFO(0, pattern)
496 	ZEND_ARG_INFO(0, option)
497 ZEND_END_ARG_INFO()
498 
499 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
500 	ZEND_ARG_INFO(0, pattern)
501 	ZEND_ARG_INFO(0, option)
502 ZEND_END_ARG_INFO()
503 
504 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
505 	ZEND_ARG_INFO(0, string)
506 	ZEND_ARG_INFO(0, pattern)
507 	ZEND_ARG_INFO(0, option)
508 ZEND_END_ARG_INFO()
509 
510 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
511 ZEND_END_ARG_INFO()
512 
513 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
514 ZEND_END_ARG_INFO()
515 
516 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
517 	ZEND_ARG_INFO(0, position)
518 ZEND_END_ARG_INFO()
519 
520 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
521 	ZEND_ARG_INFO(0, options)
522 ZEND_END_ARG_INFO()
523 /* }}} */
524 
525 /* {{{ zend_function_entry mbstring_functions[] */
526 const zend_function_entry mbstring_functions[] = {
527 	PHP_FE(mb_convert_case,			arginfo_mb_convert_case)
528 	PHP_FE(mb_strtoupper,			arginfo_mb_strtoupper)
529 	PHP_FE(mb_strtolower,			arginfo_mb_strtolower)
530 	PHP_FE(mb_language,				arginfo_mb_language)
531 	PHP_FE(mb_internal_encoding,	arginfo_mb_internal_encoding)
532 	PHP_FE(mb_http_input,			arginfo_mb_http_input)
533 	PHP_FE(mb_http_output,			arginfo_mb_http_output)
534 	PHP_FE(mb_detect_order,			arginfo_mb_detect_order)
535 	PHP_FE(mb_substitute_character,	arginfo_mb_substitute_character)
536 	PHP_FE(mb_parse_str,			arginfo_mb_parse_str)
537 	PHP_FE(mb_output_handler,		arginfo_mb_output_handler)
538 	PHP_FE(mb_preferred_mime_name,	arginfo_mb_preferred_mime_name)
539 	PHP_FE(mb_strlen,				arginfo_mb_strlen)
540 	PHP_FE(mb_strpos,				arginfo_mb_strpos)
541 	PHP_FE(mb_strrpos,				arginfo_mb_strrpos)
542 	PHP_FE(mb_stripos,				arginfo_mb_stripos)
543 	PHP_FE(mb_strripos,				arginfo_mb_strripos)
544 	PHP_FE(mb_strstr,				arginfo_mb_strstr)
545 	PHP_FE(mb_strrchr,				arginfo_mb_strrchr)
546 	PHP_FE(mb_stristr,				arginfo_mb_stristr)
547 	PHP_FE(mb_strrichr,				arginfo_mb_strrichr)
548 	PHP_FE(mb_substr_count,			arginfo_mb_substr_count)
549 	PHP_FE(mb_substr,				arginfo_mb_substr)
550 	PHP_FE(mb_strcut,				arginfo_mb_strcut)
551 	PHP_FE(mb_strwidth,				arginfo_mb_strwidth)
552 	PHP_FE(mb_strimwidth,			arginfo_mb_strimwidth)
553 	PHP_FE(mb_convert_encoding,		arginfo_mb_convert_encoding)
554 	PHP_FE(mb_detect_encoding,		arginfo_mb_detect_encoding)
555 	PHP_FE(mb_list_encodings,		arginfo_mb_list_encodings)
556 	PHP_FE(mb_encoding_aliases,		arginfo_mb_encoding_aliases)
557 	PHP_FE(mb_convert_kana,			arginfo_mb_convert_kana)
558 	PHP_FE(mb_encode_mimeheader,	arginfo_mb_encode_mimeheader)
559 	PHP_FE(mb_decode_mimeheader,	arginfo_mb_decode_mimeheader)
560 	PHP_FE(mb_convert_variables,	arginfo_mb_convert_variables)
561 	PHP_FE(mb_encode_numericentity,	arginfo_mb_encode_numericentity)
562 	PHP_FE(mb_decode_numericentity,	arginfo_mb_decode_numericentity)
563 	PHP_FE(mb_send_mail,			arginfo_mb_send_mail)
564 	PHP_FE(mb_get_info,				arginfo_mb_get_info)
565 	PHP_FE(mb_check_encoding,		arginfo_mb_check_encoding)
566 #if HAVE_MBREGEX
567 	PHP_MBREGEX_FUNCTION_ENTRIES
568 #endif
569 	PHP_FE_END
570 };
571 /* }}} */
572 
573 /* {{{ zend_module_entry mbstring_module_entry */
574 zend_module_entry mbstring_module_entry = {
575 	STANDARD_MODULE_HEADER,
576 	"mbstring",
577 	mbstring_functions,
578 	PHP_MINIT(mbstring),
579 	PHP_MSHUTDOWN(mbstring),
580 	PHP_RINIT(mbstring),
581 	PHP_RSHUTDOWN(mbstring),
582 	PHP_MINFO(mbstring),
583 	NO_VERSION_YET,
584 	PHP_MODULE_GLOBALS(mbstring),
585 	PHP_GINIT(mbstring),
586 	PHP_GSHUTDOWN(mbstring),
587 	NULL,
588 	STANDARD_MODULE_PROPERTIES_EX
589 };
590 /* }}} */
591 
592 /* {{{ static sapi_post_entry php_post_entries[] */
593 static sapi_post_entry php_post_entries[] = {
594 	{ DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data,	php_std_post_handler },
595 	{ MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
596 	{ NULL, 0, NULL, NULL }
597 };
598 /* }}} */
599 
600 #ifdef COMPILE_DL_MBSTRING
ZEND_GET_MODULE(mbstring)601 ZEND_GET_MODULE(mbstring)
602 #endif
603 
604 /* {{{ allocators */
605 static void *_php_mb_allocators_malloc(unsigned int sz)
606 {
607 	return emalloc(sz);
608 }
609 
_php_mb_allocators_realloc(void * ptr,unsigned int sz)610 static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
611 {
612 	return erealloc(ptr, sz);
613 }
614 
_php_mb_allocators_calloc(unsigned int nelems,unsigned int szelem)615 static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
616 {
617 	return ecalloc(nelems, szelem);
618 }
619 
_php_mb_allocators_free(void * ptr)620 static void _php_mb_allocators_free(void *ptr)
621 {
622 	efree(ptr);
623 }
624 
_php_mb_allocators_pmalloc(unsigned int sz)625 static void *_php_mb_allocators_pmalloc(unsigned int sz)
626 {
627 	return pemalloc(sz, 1);
628 }
629 
_php_mb_allocators_prealloc(void * ptr,unsigned int sz)630 static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
631 {
632 	return perealloc(ptr, sz, 1);
633 }
634 
_php_mb_allocators_pfree(void * ptr)635 static void _php_mb_allocators_pfree(void *ptr)
636 {
637 	pefree(ptr, 1);
638 }
639 
640 static mbfl_allocators _php_mb_allocators = {
641 	_php_mb_allocators_malloc,
642 	_php_mb_allocators_realloc,
643 	_php_mb_allocators_calloc,
644 	_php_mb_allocators_free,
645 	_php_mb_allocators_pmalloc,
646 	_php_mb_allocators_prealloc,
647 	_php_mb_allocators_pfree
648 };
649 /* }}} */
650 
651 /* {{{ static sapi_post_entry mbstr_post_entries[] */
652 static sapi_post_entry mbstr_post_entries[] = {
653 	{ DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
654 	{ MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
655 	{ NULL, 0, NULL, NULL }
656 };
657 /* }}} */
658 
659 /* {{{ static int php_mb_parse_encoding_list()
660  *  Return 0 if input contains any illegal encoding, otherwise 1.
661  *  Even if any illegal encoding is detected the result may contain a list
662  *  of parsed encodings.
663  */
664 static int
php_mb_parse_encoding_list(const char * value,size_t value_length,const mbfl_encoding *** return_list,size_t * return_size,int persistent TSRMLS_DC)665 php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
666 {
667 	int size, bauto, ret = SUCCESS;
668 	size_t n;
669 	char *p, *p1, *p2, *endp, *tmpstr;
670 	const mbfl_encoding **entry, **list;
671 
672 	list = NULL;
673 	if (value == NULL || value_length <= 0) {
674 		if (return_list) {
675 			*return_list = NULL;
676 		}
677 		if (return_size) {
678 			*return_size = 0;
679 		}
680 		return FAILURE;
681 	} else {
682 		/* copy the value string for work */
683 		if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
684 			tmpstr = (char *)estrndup(value+1, value_length-2);
685 			value_length -= 2;
686 		}
687 		else
688 			tmpstr = (char *)estrndup(value, value_length);
689 		if (tmpstr == NULL) {
690 			return FAILURE;
691 		}
692 		/* count the number of listed encoding names */
693 		endp = tmpstr + value_length;
694 		n = 1;
695 		p1 = tmpstr;
696 		while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) {
697 			p1 = p2 + 1;
698 			n++;
699 		}
700 		size = n + MBSTRG(default_detect_order_list_size);
701 		/* make list */
702 		list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
703 		if (list != NULL) {
704 			entry = list;
705 			n = 0;
706 			bauto = 0;
707 			p1 = tmpstr;
708 			do {
709 				p2 = p = php_memnstr(p1, ",", 1, endp);
710 				if (p == NULL) {
711 					p = endp;
712 				}
713 				*p = '\0';
714 				/* trim spaces */
715 				while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
716 					p1++;
717 				}
718 				p--;
719 				while (p > p1 && (*p == ' ' || *p == '\t')) {
720 					*p = '\0';
721 					p--;
722 				}
723 				/* convert to the encoding number and check encoding */
724 				if (strcasecmp(p1, "auto") == 0) {
725 					if (!bauto) {
726 						const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
727 						const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
728 						size_t i;
729 						bauto = 1;
730 						for (i = 0; i < identify_list_size; i++) {
731 							*entry++ = mbfl_no2encoding(*src++);
732 							n++;
733 						}
734 					}
735 				} else {
736 					const mbfl_encoding *encoding = mbfl_name2encoding(p1);
737 					if (encoding) {
738 						*entry++ = encoding;
739 						n++;
740 					} else {
741 						ret = 0;
742 					}
743 				}
744 				p1 = p2 + 1;
745 			} while (n < size && p2 != NULL);
746 			if (n > 0) {
747 				if (return_list) {
748 					*return_list = list;
749 				} else {
750 					pefree(list, persistent);
751 				}
752 			} else {
753 				pefree(list, persistent);
754 				if (return_list) {
755 					*return_list = NULL;
756 				}
757 				ret = 0;
758 			}
759 			if (return_size) {
760 				*return_size = n;
761 			}
762 		} else {
763 			if (return_list) {
764 				*return_list = NULL;
765 			}
766 			if (return_size) {
767 				*return_size = 0;
768 			}
769 			ret = 0;
770 		}
771 		efree(tmpstr);
772 	}
773 
774 	return ret;
775 }
776 /* }}} */
777 
778 /* {{{ static int php_mb_parse_encoding_array()
779  *  Return 0 if input contains any illegal encoding, otherwise 1.
780  *  Even if any illegal encoding is detected the result may contain a list
781  *  of parsed encodings.
782  */
783 static int
php_mb_parse_encoding_array(zval * array,const mbfl_encoding *** return_list,size_t * return_size,int persistent TSRMLS_DC)784 php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
785 {
786 	zval **hash_entry;
787 	HashTable *target_hash;
788 	int i, n, size, bauto, ret = SUCCESS;
789 	const mbfl_encoding **list, **entry;
790 
791 	list = NULL;
792 	if (Z_TYPE_P(array) == IS_ARRAY) {
793 		target_hash = Z_ARRVAL_P(array);
794 		zend_hash_internal_pointer_reset(target_hash);
795 		i = zend_hash_num_elements(target_hash);
796 		size = i + MBSTRG(default_detect_order_list_size);
797 		list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
798 		if (list != NULL) {
799 			entry = list;
800 			bauto = 0;
801 			n = 0;
802 			while (i > 0) {
803 				if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
804 					break;
805 				}
806 				convert_to_string_ex(hash_entry);
807 				if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
808 					if (!bauto) {
809 						const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
810 						const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
811 						size_t j;
812 
813 						bauto = 1;
814 						for (j = 0; j < identify_list_size; j++) {
815 							*entry++ = mbfl_no2encoding(*src++);
816 							n++;
817 						}
818 					}
819 				} else {
820 					const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_PP(hash_entry));
821 					if (encoding) {
822 						*entry++ = encoding;
823 						n++;
824 					} else {
825 						ret = FAILURE;
826 					}
827 				}
828 				zend_hash_move_forward(target_hash);
829 				i--;
830 			}
831 			if (n > 0) {
832 				if (return_list) {
833 					*return_list = list;
834 				} else {
835 					pefree(list, persistent);
836 				}
837 			} else {
838 				pefree(list, persistent);
839 				if (return_list) {
840 					*return_list = NULL;
841 				}
842 				ret = FAILURE;
843 			}
844 			if (return_size) {
845 				*return_size = n;
846 			}
847 		} else {
848 			if (return_list) {
849 				*return_list = NULL;
850 			}
851 			if (return_size) {
852 				*return_size = 0;
853 			}
854 			ret = FAILURE;
855 		}
856 	}
857 
858 	return ret;
859 }
860 /* }}} */
861 
862 /* {{{ zend_multibyte interface */
php_mb_zend_encoding_fetcher(const char * encoding_name TSRMLS_DC)863 static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name TSRMLS_DC)
864 {
865 	return (const zend_encoding*)mbfl_name2encoding(encoding_name);
866 }
867 
php_mb_zend_encoding_name_getter(const zend_encoding * encoding)868 static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
869 {
870 	return ((const mbfl_encoding *)encoding)->name;
871 }
872 
php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding * _encoding)873 static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
874 {
875 	const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
876 	if (encoding->flag & MBFL_ENCTYPE_SBCS) {
877 		return 1;
878 	}
879 	if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
880 		return 1;
881 	}
882 	return 0;
883 }
884 
php_mb_zend_encoding_detector(const unsigned char * arg_string,size_t arg_length,const zend_encoding ** list,size_t list_size TSRMLS_DC)885 static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size TSRMLS_DC)
886 {
887 	mbfl_string string;
888 
889 	if (!list) {
890 		list = (const zend_encoding **)MBSTRG(current_detect_order_list);
891 		list_size = MBSTRG(current_detect_order_list_size);
892 	}
893 
894 	mbfl_string_init(&string);
895 	string.no_language = MBSTRG(language);
896 	string.val = (unsigned char *)arg_string;
897 	string.len = arg_length;
898 	return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0);
899 }
900 
php_mb_zend_encoding_converter(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length,const zend_encoding * encoding_to,const zend_encoding * encoding_from TSRMLS_DC)901 static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC)
902 {
903 	mbfl_string string, result;
904 	mbfl_buffer_converter *convd;
905 	int status, loc;
906 
907 	/* new encoding */
908 	/* initialize string */
909 	mbfl_string_init(&string);
910 	mbfl_string_init(&result);
911 	string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding;
912 	string.no_language = MBSTRG(language);
913 	string.val = (unsigned char*)from;
914 	string.len = from_length;
915 
916 	/* initialize converter */
917 	convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
918 	if (convd == NULL) {
919 		return -1;
920 	}
921 	mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
922 	mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
923 
924 	/* do it */
925 	status = mbfl_buffer_converter_feed2(convd, &string, &loc);
926 	if (status) {
927 		mbfl_buffer_converter_delete(convd);
928 		return (size_t)-1;
929 	}
930 
931 	mbfl_buffer_converter_flush(convd);
932 	if (!mbfl_buffer_converter_result(convd, &result)) {
933 		mbfl_buffer_converter_delete(convd);
934 		return (size_t)-1;
935 	}
936 
937 	*to = result.val;
938 	*to_length = result.len;
939 
940 	mbfl_buffer_converter_delete(convd);
941 
942 	return loc;
943 }
944 
php_mb_zend_encoding_list_parser(const char * encoding_list,size_t encoding_list_len,const zend_encoding *** return_list,size_t * return_size,int persistent TSRMLS_DC)945 static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
946 {
947 	return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent TSRMLS_CC);
948 }
949 
php_mb_zend_internal_encoding_getter(TSRMLS_D)950 static const zend_encoding *php_mb_zend_internal_encoding_getter(TSRMLS_D)
951 {
952 	return (const zend_encoding *)MBSTRG(internal_encoding);
953 }
954 
php_mb_zend_internal_encoding_setter(const zend_encoding * encoding TSRMLS_DC)955 static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding TSRMLS_DC)
956 {
957 	MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
958 	return SUCCESS;
959 }
960 
961 static zend_multibyte_functions php_mb_zend_multibyte_functions = {
962 	"mbstring",
963 	php_mb_zend_encoding_fetcher,
964 	php_mb_zend_encoding_name_getter,
965 	php_mb_zend_encoding_lexer_compatibility_checker,
966 	php_mb_zend_encoding_detector,
967 	php_mb_zend_encoding_converter,
968 	php_mb_zend_encoding_list_parser,
969 	php_mb_zend_internal_encoding_getter,
970 	php_mb_zend_internal_encoding_setter
971 };
972 /* }}} */
973 
974 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC);
975 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
976 static void _php_mb_free_regex(void *opaque);
977 
978 #if HAVE_ONIG
979 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern TSRMLS_DC)980 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
981 {
982 	php_mb_regex_t *retval;
983 	OnigErrorInfo err_info;
984 	int err_code;
985 
986 	if ((err_code = onig_new(&retval,
987 			(const OnigUChar *)pattern,
988 			(const OnigUChar *)pattern + strlen(pattern),
989 			ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
990 			ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
991 		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
992 		onig_error_code_to_str(err_str, err_code, err_info);
993 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", pattern, err_str);
994 		retval = NULL;
995 	}
996 	return retval;
997 }
998 /* }}} */
999 
1000 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1001 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1002 {
1003 	return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
1004 			(const OnigUChar*)str + str_len, (const OnigUChar *)str,
1005 			(const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
1006 }
1007 /* }}} */
1008 
1009 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1010 static void _php_mb_free_regex(void *opaque)
1011 {
1012 	onig_free((php_mb_regex_t *)opaque);
1013 }
1014 /* }}} */
1015 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
1016 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern TSRMLS_DC)1017 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
1018 {
1019 	pcre *retval;
1020 	const char *err_str;
1021 	int err_offset;
1022 
1023 	if (!(retval = pcre_compile(pattern,
1024 			PCRE_CASELESS, &err_str, &err_offset, NULL))) {
1025 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
1026 	}
1027 	return retval;
1028 }
1029 /* }}} */
1030 
1031 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1032 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1033 {
1034 	return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
1035 			0, NULL, 0) >= 0;
1036 }
1037 /* }}} */
1038 
1039 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1040 static void _php_mb_free_regex(void *opaque)
1041 {
1042 	pcre_free(opaque);
1043 }
1044 /* }}} */
1045 #endif
1046 
1047 /* {{{ php_mb_nls_get_default_detect_order_list */
php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,enum mbfl_no_encoding ** plist,size_t * plist_size)1048 static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
1049 {
1050 	size_t i;
1051 
1052 	*plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1053 	*plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1054 
1055 	for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
1056 		if (php_mb_default_identify_list[i].lang == lang) {
1057 			*plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
1058 			*plist_size = php_mb_default_identify_list[i].list_size;
1059 			return 1;
1060 		}
1061 	}
1062 	return 0;
1063 }
1064 /* }}} */
1065 
php_mb_rfc1867_substring_conf(const zend_encoding * encoding,char * start,int len,char quote TSRMLS_DC)1066 static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, int len, char quote TSRMLS_DC)
1067 {
1068 	char *result = emalloc(len + 2);
1069 	char *resp = result;
1070 	int i;
1071 
1072 	for (i = 0; i < len && start[i] != quote; ++i) {
1073 		if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
1074 			*resp++ = start[++i];
1075 		} else {
1076 			size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
1077 
1078 			while (j-- > 0 && i < len) {
1079 				*resp++ = start[i++];
1080 			}
1081 			--i;
1082 		}
1083 	}
1084 
1085 	*resp = '\0';
1086 	return result;
1087 }
1088 
php_mb_rfc1867_getword(const zend_encoding * encoding,char ** line,char stop TSRMLS_DC)1089 static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop TSRMLS_DC) /* {{{ */
1090 {
1091 	char *pos = *line, quote;
1092 	char *res;
1093 
1094 	while (*pos && *pos != stop) {
1095 		if ((quote = *pos) == '"' || quote == '\'') {
1096 			++pos;
1097 			while (*pos && *pos != quote) {
1098 				if (*pos == '\\' && pos[1] && pos[1] == quote) {
1099 					pos += 2;
1100 				} else {
1101 					++pos;
1102 				}
1103 			}
1104 			if (*pos) {
1105 				++pos;
1106 			}
1107 		} else {
1108 			pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1109 
1110 		}
1111 	}
1112 	if (*pos == '\0') {
1113 		res = estrdup(*line);
1114 		*line += strlen(*line);
1115 		return res;
1116 	}
1117 
1118 	res = estrndup(*line, pos - *line);
1119 
1120 	while (*pos == stop) {
1121 		pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1122 	}
1123 
1124 	*line = pos;
1125 	return res;
1126 }
1127 /* }}} */
1128 
php_mb_rfc1867_getword_conf(const zend_encoding * encoding,char * str TSRMLS_DC)1129 static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str TSRMLS_DC) /* {{{ */
1130 {
1131 	while (*str && isspace(*(unsigned char *)str)) {
1132 		++str;
1133 	}
1134 
1135 	if (!*str) {
1136 		return estrdup("");
1137 	}
1138 
1139 	if (*str == '"' || *str == '\'') {
1140 		char quote = *str;
1141 
1142 		str++;
1143 		return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote TSRMLS_CC);
1144 	} else {
1145 		char *strend = str;
1146 
1147 		while (*strend && !isspace(*(unsigned char *)strend)) {
1148 			++strend;
1149 		}
1150 		return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0 TSRMLS_CC);
1151 	}
1152 }
1153 /* }}} */
1154 
php_mb_rfc1867_basename(const zend_encoding * encoding,char * filename TSRMLS_DC)1155 static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename TSRMLS_DC) /* {{{ */
1156 {
1157 	char *s, *s2;
1158 	const size_t filename_len = strlen(filename);
1159 
1160 	/* The \ check should technically be needed for win32 systems only where
1161 	 * it is a valid path separator. However, IE in all it's wisdom always sends
1162 	 * the full path of the file on the user's filesystem, which means that unless
1163 	 * the user does basename() they get a bogus file name. Until IE's user base drops
1164 	 * to nill or problem is fixed this code must remain enabled for all systems. */
1165 	s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
1166 	s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
1167 
1168 	if (s && s2) {
1169 		if (s > s2) {
1170 			return ++s;
1171 		} else {
1172 			return ++s2;
1173 		}
1174 	} else if (s) {
1175 		return ++s;
1176 	} else if (s2) {
1177 		return ++s2;
1178 	} else {
1179 		return filename;
1180 	}
1181 }
1182 /* }}} */
1183 
1184 /* {{{ php.ini directive handler */
1185 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
PHP_INI_MH(OnUpdate_mbstring_language)1186 static PHP_INI_MH(OnUpdate_mbstring_language)
1187 {
1188 	enum mbfl_no_language no_language;
1189 
1190 	no_language = mbfl_name2no_language(new_value);
1191 	if (no_language == mbfl_no_language_invalid) {
1192 		MBSTRG(language) = mbfl_no_language_neutral;
1193 		return FAILURE;
1194 	}
1195 	MBSTRG(language) = no_language;
1196 	php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
1197 	return SUCCESS;
1198 }
1199 /* }}} */
1200 
1201 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
PHP_INI_MH(OnUpdate_mbstring_detect_order)1202 static PHP_INI_MH(OnUpdate_mbstring_detect_order)
1203 {
1204 	const mbfl_encoding **list;
1205 	size_t size;
1206 
1207 	if (!new_value) {
1208 		if (MBSTRG(detect_order_list)) {
1209 			pefree(MBSTRG(detect_order_list), 1);
1210 		}
1211 		MBSTRG(detect_order_list) = NULL;
1212 		MBSTRG(detect_order_list_size) = 0;
1213 		return SUCCESS;
1214 	}
1215 
1216 	if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1217 		return FAILURE;
1218 	}
1219 
1220 	if (MBSTRG(detect_order_list)) {
1221 		pefree(MBSTRG(detect_order_list), 1);
1222 	}
1223 	MBSTRG(detect_order_list) = list;
1224 	MBSTRG(detect_order_list_size) = size;
1225 	return SUCCESS;
1226 }
1227 /* }}} */
1228 
1229 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
PHP_INI_MH(OnUpdate_mbstring_http_input)1230 static PHP_INI_MH(OnUpdate_mbstring_http_input)
1231 {
1232 	const mbfl_encoding **list;
1233 	size_t size;
1234 
1235 	if (!new_value) {
1236 		if (MBSTRG(http_input_list)) {
1237 			pefree(MBSTRG(http_input_list), 1);
1238 		}
1239 		MBSTRG(http_input_list) = NULL;
1240 		MBSTRG(http_input_list_size) = 0;
1241 		return SUCCESS;
1242 	}
1243 
1244 	if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1245 		return FAILURE;
1246 	}
1247 
1248 	if (MBSTRG(http_input_list)) {
1249 		pefree(MBSTRG(http_input_list), 1);
1250 	}
1251 	MBSTRG(http_input_list) = list;
1252 	MBSTRG(http_input_list_size) = size;
1253 
1254 	return SUCCESS;
1255 }
1256 /* }}} */
1257 
1258 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
PHP_INI_MH(OnUpdate_mbstring_http_output)1259 static PHP_INI_MH(OnUpdate_mbstring_http_output)
1260 {
1261 	const mbfl_encoding *encoding;
1262 
1263 	if (new_value == NULL || new_value_length == 0) {
1264 		MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1265 		MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1266 		return SUCCESS;
1267 	}
1268 
1269 	encoding = mbfl_name2encoding(new_value);
1270 	if (!encoding) {
1271 		MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1272 		MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1273 		return FAILURE;
1274 	}
1275 
1276 	MBSTRG(http_output_encoding) = encoding;
1277 	MBSTRG(current_http_output_encoding) = encoding;
1278 	return SUCCESS;
1279 }
1280 /* }}} */
1281 
1282 /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
_php_mb_ini_mbstring_internal_encoding_set(const char * new_value,uint new_value_length TSRMLS_DC)1283 int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
1284 {
1285 	const mbfl_encoding *encoding;
1286 
1287 	if (!new_value || new_value_length == 0 || !(encoding = mbfl_name2encoding(new_value))) {
1288   		switch (MBSTRG(language)) {
1289   			case mbfl_no_language_uni:
1290   				encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
1291   				break;
1292   			case mbfl_no_language_japanese:
1293   				encoding = mbfl_no2encoding(mbfl_no_encoding_euc_jp);
1294   				break;
1295   			case mbfl_no_language_korean:
1296   				encoding = mbfl_no2encoding(mbfl_no_encoding_euc_kr);
1297   				break;
1298   			case mbfl_no_language_simplified_chinese:
1299   				encoding = mbfl_no2encoding(mbfl_no_encoding_euc_cn);
1300   				break;
1301   			case mbfl_no_language_traditional_chinese:
1302   				encoding = mbfl_no2encoding(mbfl_no_encoding_euc_tw);
1303   				break;
1304   			case mbfl_no_language_russian:
1305   				encoding = mbfl_no2encoding(mbfl_no_encoding_koi8r);
1306   				break;
1307   			case mbfl_no_language_german:
1308   				encoding = mbfl_no2encoding(mbfl_no_encoding_8859_15);
1309   				break;
1310   			case mbfl_no_language_armenian:
1311   				encoding = mbfl_no2encoding(mbfl_no_encoding_armscii8);
1312   				break;
1313   			case mbfl_no_language_turkish:
1314   				encoding = mbfl_no2encoding(mbfl_no_encoding_8859_9);
1315   				break;
1316   			default:
1317   				encoding = mbfl_no2encoding(mbfl_no_encoding_8859_1);
1318   				break;
1319   		}
1320   	}
1321 	MBSTRG(internal_encoding) = encoding;
1322 	MBSTRG(current_internal_encoding) = encoding;
1323 #if HAVE_MBREGEX
1324 	{
1325 		const char *enc_name = new_value;
1326 		if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
1327 			/* falls back to EUC-JP if an unknown encoding name is given */
1328 			enc_name = "EUC-JP";
1329 			php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
1330 		}
1331 		php_mb_regex_set_mbctype(new_value TSRMLS_CC);
1332 	}
1333 #endif
1334 	return SUCCESS;
1335 }
1336 /* }}} */
1337 
1338 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
PHP_INI_MH(OnUpdate_mbstring_internal_encoding)1339 static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1340 {
1341 	if (OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC) == FAILURE) {
1342 		return FAILURE;
1343 	}
1344 	if (stage == PHP_INI_STAGE_STARTUP || stage == PHP_INI_STAGE_SHUTDOWN
1345 			|| stage == PHP_INI_STAGE_RUNTIME) {
1346 		return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
1347 	} else {
1348 		/* the corresponding mbstring globals needs to be set according to the
1349 		 * ini value in the later stage because it never falls back to the
1350 		 * default value if 1. no value for mbstring.internal_encoding is given,
1351 		 * 2. mbstring.language directive is processed in per-dir or runtime
1352 		 * context and 3. call to the handler for mbstring.language is done
1353 		 * after mbstring.internal_encoding is handled. */
1354 		return SUCCESS;
1355 	}
1356 }
1357 /* }}} */
1358 
1359 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
PHP_INI_MH(OnUpdate_mbstring_substitute_character)1360 static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1361 {
1362 	int c;
1363 	char *endptr = NULL;
1364 
1365 	if (new_value != NULL) {
1366 		if (strcasecmp("none", new_value) == 0) {
1367 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1368 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1369 		} else if (strcasecmp("long", new_value) == 0) {
1370 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1371 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1372 		} else if (strcasecmp("entity", new_value) == 0) {
1373 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1374 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1375 		} else {
1376 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1377 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1378 			if (new_value_length >0) {
1379 				c = strtol(new_value, &endptr, 0);
1380 				if (*endptr == '\0') {
1381 					MBSTRG(filter_illegal_substchar) = c;
1382 					MBSTRG(current_filter_illegal_substchar) = c;
1383 				}
1384 			}
1385 		}
1386 	} else {
1387 		MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1388 		MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1389 		MBSTRG(filter_illegal_substchar) = 0x3f;	/* '?' */
1390 		MBSTRG(current_filter_illegal_substchar) = 0x3f;	/* '?' */
1391 	}
1392 
1393 	return SUCCESS;
1394 }
1395 /* }}} */
1396 
1397 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
PHP_INI_MH(OnUpdate_mbstring_encoding_translation)1398 static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1399 {
1400 	if (new_value == NULL) {
1401 		return FAILURE;
1402 	}
1403 
1404 	OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
1405 
1406 	if (MBSTRG(encoding_translation)) {
1407 		sapi_unregister_post_entry(php_post_entries TSRMLS_CC);
1408 		sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1409 	} else {
1410 		sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC);
1411 		sapi_register_post_entries(php_post_entries TSRMLS_CC);
1412 	}
1413 
1414 	return SUCCESS;
1415 }
1416 /* }}} */
1417 
1418 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)1419 static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1420 {
1421 	zval tmp;
1422 	void *re = NULL;
1423 
1424 	if (!new_value) {
1425 		new_value = entry->orig_value;
1426 		new_value_length = entry->orig_value_length;
1427 	}
1428 	php_trim(new_value, new_value_length, NULL, 0, &tmp, 3 TSRMLS_CC);
1429 
1430 	if (Z_STRLEN(tmp) > 0) {
1431 		if (!(re = _php_mb_compile_regex(Z_STRVAL(tmp) TSRMLS_CC))) {
1432 			zval_dtor(&tmp);
1433 			return FAILURE;
1434 		}
1435 	}
1436 
1437 	if (MBSTRG(http_output_conv_mimetypes)) {
1438 		_php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1439 	}
1440 
1441 	MBSTRG(http_output_conv_mimetypes) = re;
1442 
1443 	zval_dtor(&tmp);
1444 	return SUCCESS;
1445 }
1446 /* }}} */
1447 /* }}} */
1448 
1449 /* {{{ php.ini directive registration */
1450 PHP_INI_BEGIN()
1451 	PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1452 	PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1453 	PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input)
1454 	PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output)
1455 	STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
1456 	PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1457 	STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1458 	PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1459 
1460 	STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1461 		PHP_INI_SYSTEM | PHP_INI_PERDIR,
1462 		OnUpdate_mbstring_encoding_translation,
1463 		encoding_translation, zend_mbstring_globals, mbstring_globals)
1464 	PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1465 		"^(text/|application/xhtml\\+xml)",
1466 		PHP_INI_ALL,
1467 		OnUpdate_mbstring_http_output_conv_mimetypes)
1468 
1469 	STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1470 		PHP_INI_ALL,
1471 		OnUpdateLong,
1472 		strict_detection, zend_mbstring_globals, mbstring_globals)
PHP_INI_END()1473 PHP_INI_END()
1474 /* }}} */
1475 
1476 /* {{{ module global initialize handler */
1477 static PHP_GINIT_FUNCTION(mbstring)
1478 {
1479 	mbstring_globals->language = mbfl_no_language_uni;
1480 	mbstring_globals->internal_encoding = NULL;
1481 	mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1482 	mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
1483 	mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
1484 	mbstring_globals->http_input_identify = NULL;
1485 	mbstring_globals->http_input_identify_get = NULL;
1486 	mbstring_globals->http_input_identify_post = NULL;
1487 	mbstring_globals->http_input_identify_cookie = NULL;
1488 	mbstring_globals->http_input_identify_string = NULL;
1489 	mbstring_globals->http_input_list = NULL;
1490 	mbstring_globals->http_input_list_size = 0;
1491 	mbstring_globals->detect_order_list = NULL;
1492 	mbstring_globals->detect_order_list_size = 0;
1493 	mbstring_globals->current_detect_order_list = NULL;
1494 	mbstring_globals->current_detect_order_list_size = 0;
1495 	mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1496 	mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1497 	mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1498 	mbstring_globals->filter_illegal_substchar = 0x3f;	/* '?' */
1499 	mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1500 	mbstring_globals->current_filter_illegal_substchar = 0x3f;	/* '?' */
1501 	mbstring_globals->illegalchars = 0;
1502 	mbstring_globals->func_overload = 0;
1503 	mbstring_globals->encoding_translation = 0;
1504 	mbstring_globals->strict_detection = 0;
1505 	mbstring_globals->outconv = NULL;
1506 	mbstring_globals->http_output_conv_mimetypes = NULL;
1507 #if HAVE_MBREGEX
1508 	mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(TSRMLS_C);
1509 #endif
1510 }
1511 /* }}} */
1512 
1513 /* {{{ PHP_GSHUTDOWN_FUNCTION */
PHP_GSHUTDOWN_FUNCTION(mbstring)1514 static PHP_GSHUTDOWN_FUNCTION(mbstring)
1515 {
1516 	if (mbstring_globals->http_input_list) {
1517 		free(mbstring_globals->http_input_list);
1518 	}
1519 	if (mbstring_globals->detect_order_list) {
1520 		free(mbstring_globals->detect_order_list);
1521 	}
1522 	if (mbstring_globals->http_output_conv_mimetypes) {
1523 		_php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1524 	}
1525 #if HAVE_MBREGEX
1526 	php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC);
1527 #endif
1528 }
1529 /* }}} */
1530 
1531 /* {{{ PHP_MINIT_FUNCTION(mbstring) */
PHP_MINIT_FUNCTION(mbstring)1532 PHP_MINIT_FUNCTION(mbstring)
1533 {
1534 	__mbfl_allocators = &_php_mb_allocators;
1535 
1536 	REGISTER_INI_ENTRIES();
1537 
1538 	/* This is a global handler. Should not be set in a per-request handler. */
1539 	sapi_register_treat_data(mbstr_treat_data TSRMLS_CC);
1540 
1541 	/* Post handlers are stored in the thread-local context. */
1542 	if (MBSTRG(encoding_translation)) {
1543 		sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1544 	}
1545 
1546 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1547 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1548 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1549 
1550 	REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1551 	REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1552 	REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1553 
1554 #if HAVE_MBREGEX
1555 	PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1556 #endif
1557 
1558 	if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions TSRMLS_CC)) {
1559 		return FAILURE;
1560 	}
1561 
1562 	php_rfc1867_set_multibyte_callbacks(
1563 		php_mb_encoding_translation,
1564 		php_mb_gpc_get_detect_order,
1565 		php_mb_gpc_set_input_encoding,
1566 		php_mb_rfc1867_getword,
1567 		php_mb_rfc1867_getword_conf,
1568 		php_mb_rfc1867_basename);
1569 
1570 	return SUCCESS;
1571 }
1572 /* }}} */
1573 
1574 /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
PHP_MSHUTDOWN_FUNCTION(mbstring)1575 PHP_MSHUTDOWN_FUNCTION(mbstring)
1576 {
1577 	UNREGISTER_INI_ENTRIES();
1578 
1579 #if HAVE_MBREGEX
1580 	PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1581 #endif
1582 
1583 	return SUCCESS;
1584 }
1585 /* }}} */
1586 
1587 /* {{{ PHP_RINIT_FUNCTION(mbstring) */
PHP_RINIT_FUNCTION(mbstring)1588 PHP_RINIT_FUNCTION(mbstring)
1589 {
1590 	zend_function *func, *orig;
1591 	const struct mb_overload_def *p;
1592 
1593 	MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1594 	MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1595 	MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1596 	MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1597 
1598 	MBSTRG(illegalchars) = 0;
1599 
1600 	php_mb_populate_current_detect_order_list(TSRMLS_C);
1601 
1602  	/* override original function. */
1603 	if (MBSTRG(func_overload)){
1604 		p = &(mb_ovld[0]);
1605 
1606 		while (p->type > 0) {
1607 			if ((MBSTRG(func_overload) & p->type) == p->type &&
1608 				zend_hash_find(EG(function_table), p->save_func,
1609 					strlen(p->save_func)+1, (void **)&orig) != SUCCESS) {
1610 
1611 				zend_hash_find(EG(function_table), p->ovld_func, strlen(p->ovld_func)+1 , (void **)&func);
1612 
1613 				if (zend_hash_find(EG(function_table), p->orig_func, strlen(p->orig_func)+1, (void **)&orig) != SUCCESS) {
1614 					php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1615 					return FAILURE;
1616 				} else {
1617 					zend_hash_add(EG(function_table), p->save_func, strlen(p->save_func)+1, orig, sizeof(zend_function), NULL);
1618 
1619 					if (zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, func, sizeof(zend_function),
1620 						NULL) == FAILURE) {
1621 						php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
1622 						return FAILURE;
1623 					}
1624 				}
1625 			}
1626 			p++;
1627 		}
1628 	}
1629 #if HAVE_MBREGEX
1630 	PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1631 #endif
1632 	zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding) TSRMLS_CC);
1633 
1634 	return SUCCESS;
1635 }
1636 /* }}} */
1637 
1638 /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
PHP_RSHUTDOWN_FUNCTION(mbstring)1639 PHP_RSHUTDOWN_FUNCTION(mbstring)
1640 {
1641 	const struct mb_overload_def *p;
1642 	zend_function *orig;
1643 
1644 	if (MBSTRG(current_detect_order_list) != NULL) {
1645 		efree(MBSTRG(current_detect_order_list));
1646 		MBSTRG(current_detect_order_list) = NULL;
1647 		MBSTRG(current_detect_order_list_size) = 0;
1648 	}
1649 	if (MBSTRG(outconv) != NULL) {
1650 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1651 		mbfl_buffer_converter_delete(MBSTRG(outconv));
1652 		MBSTRG(outconv) = NULL;
1653 	}
1654 
1655 	/* clear http input identification. */
1656 	MBSTRG(http_input_identify) = NULL;
1657 	MBSTRG(http_input_identify_post) = NULL;
1658 	MBSTRG(http_input_identify_get) = NULL;
1659 	MBSTRG(http_input_identify_cookie) = NULL;
1660 	MBSTRG(http_input_identify_string) = NULL;
1661 
1662  	/*  clear overloaded function. */
1663 	if (MBSTRG(func_overload)){
1664 		p = &(mb_ovld[0]);
1665 		while (p->type > 0) {
1666 			if ((MBSTRG(func_overload) & p->type) == p->type &&
1667 				zend_hash_find(EG(function_table), p->save_func,
1668 							   strlen(p->save_func)+1, (void **)&orig) == SUCCESS) {
1669 
1670 				zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, orig, sizeof(zend_function), NULL);
1671 				zend_hash_del(EG(function_table), p->save_func, strlen(p->save_func)+1);
1672 			}
1673 			p++;
1674 		}
1675 	}
1676 
1677 #if HAVE_MBREGEX
1678 	PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1679 #endif
1680 
1681 	return SUCCESS;
1682 }
1683 /* }}} */
1684 
1685 /* {{{ PHP_MINFO_FUNCTION(mbstring) */
PHP_MINFO_FUNCTION(mbstring)1686 PHP_MINFO_FUNCTION(mbstring)
1687 {
1688 	php_info_print_table_start();
1689 	php_info_print_table_row(2, "Multibyte Support", "enabled");
1690 	php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1691 	php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1692 	{
1693 		char tmp[256];
1694 		snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1695 		php_info_print_table_row(2, "libmbfl version", tmp);
1696 	}
1697 	php_info_print_table_end();
1698 
1699 	php_info_print_table_start();
1700 	php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1701 	php_info_print_table_end();
1702 
1703 #if HAVE_MBREGEX
1704 	PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1705 #endif
1706 
1707 	DISPLAY_INI_ENTRIES();
1708 }
1709 /* }}} */
1710 
1711 /* {{{ proto string mb_language([string language])
1712    Sets the current language or Returns the current language as a string */
PHP_FUNCTION(mb_language)1713 PHP_FUNCTION(mb_language)
1714 {
1715 	char *name = NULL;
1716 	int name_len = 0;
1717 
1718 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1719 		return;
1720 	}
1721 	if (name == NULL) {
1722 		RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)), 1);
1723 	} else {
1724 		if (FAILURE == zend_alter_ini_entry(
1725 				"mbstring.language", sizeof("mbstring.language"),
1726 				name, name_len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1727 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name);
1728 			RETVAL_FALSE;
1729 		} else {
1730 			RETVAL_TRUE;
1731 		}
1732 	}
1733 }
1734 /* }}} */
1735 
1736 /* {{{ proto string mb_internal_encoding([string encoding])
1737    Sets the current internal encoding or Returns the current internal encoding as a string */
PHP_FUNCTION(mb_internal_encoding)1738 PHP_FUNCTION(mb_internal_encoding)
1739 {
1740 	const char *name = NULL;
1741 	int name_len;
1742 	const mbfl_encoding *encoding;
1743 
1744 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1745 		RETURN_FALSE;
1746 	}
1747 	if (name == NULL) {
1748 		name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
1749 		if (name != NULL) {
1750 			RETURN_STRING(name, 1);
1751 		} else {
1752 			RETURN_FALSE;
1753 		}
1754 	} else {
1755 		encoding = mbfl_name2encoding(name);
1756 		if (!encoding) {
1757 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1758 			RETURN_FALSE;
1759 		} else {
1760 			MBSTRG(current_internal_encoding) = encoding;
1761 			RETURN_TRUE;
1762 		}
1763 	}
1764 }
1765 /* }}} */
1766 
1767 /* {{{ proto mixed mb_http_input([string type])
1768    Returns the input encoding */
PHP_FUNCTION(mb_http_input)1769 PHP_FUNCTION(mb_http_input)
1770 {
1771 	char *typ = NULL;
1772 	int typ_len;
1773 	int retname;
1774 	char *list, *temp;
1775 	const mbfl_encoding *result = NULL;
1776 
1777 	retname = 1;
1778  	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
1779  		RETURN_FALSE;
1780  	}
1781  	if (typ == NULL) {
1782  		result = MBSTRG(http_input_identify);
1783  	} else {
1784  		switch (*typ) {
1785 		case 'G':
1786 		case 'g':
1787 			result = MBSTRG(http_input_identify_get);
1788 			break;
1789 		case 'P':
1790 		case 'p':
1791 			result = MBSTRG(http_input_identify_post);
1792 			break;
1793 		case 'C':
1794 		case 'c':
1795 			result = MBSTRG(http_input_identify_cookie);
1796 			break;
1797 		case 'S':
1798 		case 's':
1799 			result = MBSTRG(http_input_identify_string);
1800 			break;
1801 		case 'I':
1802 		case 'i':
1803 			{
1804 				const mbfl_encoding **entry = MBSTRG(http_input_list);
1805 				const size_t n = MBSTRG(http_input_list_size);
1806 				size_t i;
1807 				array_init(return_value);
1808 				for (i = 0; i < n; i++) {
1809 					add_next_index_string(return_value, (*entry)->name, 1);
1810 					entry++;
1811 				}
1812 				retname = 0;
1813 			}
1814 			break;
1815 		case 'L':
1816 		case 'l':
1817 			{
1818 				const mbfl_encoding **entry = MBSTRG(http_input_list);
1819 				const size_t n = MBSTRG(http_input_list_size);
1820 				size_t i;
1821 				list = NULL;
1822 				for (i = 0; i < n; i++) {
1823 					if (list) {
1824 						temp = list;
1825 						spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
1826 						efree(temp);
1827 						if (!list) {
1828 							break;
1829 						}
1830 					} else {
1831 						list = estrdup((*entry)->name);
1832 					}
1833 					entry++;
1834 				}
1835 			}
1836 			if (!list) {
1837 				RETURN_FALSE;
1838 			}
1839 			RETVAL_STRING(list, 0);
1840 			retname = 0;
1841 			break;
1842 		default:
1843 			result = MBSTRG(http_input_identify);
1844 			break;
1845 		}
1846 	}
1847 
1848 	if (retname) {
1849 		if (result) {
1850 			RETVAL_STRING(result->name, 1);
1851 		} else {
1852 			RETVAL_FALSE;
1853 		}
1854 	}
1855 }
1856 /* }}} */
1857 
1858 /* {{{ proto string mb_http_output([string encoding])
1859    Sets the current output_encoding or returns the current output_encoding as a string */
PHP_FUNCTION(mb_http_output)1860 PHP_FUNCTION(mb_http_output)
1861 {
1862 	const char *name = NULL;
1863 	int name_len;
1864 	const mbfl_encoding *encoding;
1865 
1866 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) {
1867 		RETURN_FALSE;
1868 	}
1869 
1870 	if (name == NULL) {
1871 		name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
1872 		if (name != NULL) {
1873 			RETURN_STRING(name, 1);
1874 		} else {
1875 			RETURN_FALSE;
1876 		}
1877 	} else {
1878 		encoding = mbfl_name2encoding(name);
1879 		if (!encoding) {
1880 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1881 			RETURN_FALSE;
1882 		} else {
1883 			MBSTRG(current_http_output_encoding) = encoding;
1884 			RETURN_TRUE;
1885 		}
1886 	}
1887 }
1888 /* }}} */
1889 
1890 /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1891    Sets the current detect_order or Return the current detect_order as a array */
PHP_FUNCTION(mb_detect_order)1892 PHP_FUNCTION(mb_detect_order)
1893 {
1894 	zval **arg1 = NULL;
1895 
1896 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1897 		return;
1898 	}
1899 
1900 	if (!arg1) {
1901 		size_t i;
1902 		size_t n = MBSTRG(current_detect_order_list_size);
1903 		const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1904 		array_init(return_value);
1905 		for (i = 0; i < n; i++) {
1906 			add_next_index_string(return_value, (*entry)->name, 1);
1907 			entry++;
1908 		}
1909 	} else {
1910 		const mbfl_encoding **list = NULL;
1911 		size_t size = 0;
1912 		switch (Z_TYPE_PP(arg1)) {
1913 		case IS_ARRAY:
1914 			if (FAILURE == php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
1915 				if (list) {
1916 					efree(list);
1917 				}
1918 				RETURN_FALSE;
1919 			}
1920 			break;
1921 		default:
1922 			convert_to_string_ex(arg1);
1923 			if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
1924 				if (list) {
1925 					efree(list);
1926 				}
1927 				RETURN_FALSE;
1928 			}
1929 			break;
1930 		}
1931 
1932 		if (list == NULL) {
1933 			RETURN_FALSE;
1934 		}
1935 
1936 		if (MBSTRG(current_detect_order_list)) {
1937 			efree(MBSTRG(current_detect_order_list));
1938 		}
1939 		MBSTRG(current_detect_order_list) = list;
1940 		MBSTRG(current_detect_order_list_size) = size;
1941 		RETURN_TRUE;
1942 	}
1943 }
1944 /* }}} */
1945 
1946 /* {{{ proto mixed mb_substitute_character([mixed substchar])
1947    Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)1948 PHP_FUNCTION(mb_substitute_character)
1949 {
1950 	zval **arg1 = NULL;
1951 
1952 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1953 		return;
1954 	}
1955 
1956 	if (!arg1) {
1957 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
1958 			RETURN_STRING("none", 1);
1959 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1960 			RETURN_STRING("long", 1);
1961 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1962 			RETURN_STRING("entity", 1);
1963 		} else {
1964 			RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1965 		}
1966 	} else {
1967 		RETVAL_TRUE;
1968 
1969 		switch (Z_TYPE_PP(arg1)) {
1970 		case IS_STRING:
1971 			if (strncasecmp("none", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1972 				MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1973 			} else if (strncasecmp("long", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1974 				MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1975 			} else if (strncasecmp("entity", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1976 				MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1977 			} else {
1978 				convert_to_long_ex(arg1);
1979 
1980 				if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
1981 					MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1982 					MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
1983 				} else {
1984 					php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
1985 					RETURN_FALSE;
1986 				}
1987 			}
1988 			break;
1989 		default:
1990 			convert_to_long_ex(arg1);
1991 			if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
1992 				MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1993 				MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
1994 			} else {
1995 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
1996 				RETURN_FALSE;
1997 			}
1998 			break;
1999 		}
2000 	}
2001 }
2002 /* }}} */
2003 
2004 /* {{{ proto string mb_preferred_mime_name(string encoding)
2005    Return the preferred MIME name (charset) as a string */
PHP_FUNCTION(mb_preferred_mime_name)2006 PHP_FUNCTION(mb_preferred_mime_name)
2007 {
2008 	enum mbfl_no_encoding no_encoding;
2009 	char *name = NULL;
2010 	int name_len;
2011 
2012 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
2013 		return;
2014 	} else {
2015 		no_encoding = mbfl_name2no_encoding(name);
2016 		if (no_encoding == mbfl_no_encoding_invalid) {
2017 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
2018 			RETVAL_FALSE;
2019 		} else {
2020 			const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
2021 			if (preferred_name == NULL || *preferred_name == '\0') {
2022 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
2023 				RETVAL_FALSE;
2024 			} else {
2025 				RETVAL_STRING((char *)preferred_name, 1);
2026 			}
2027 		}
2028 	}
2029 }
2030 /* }}} */
2031 
2032 #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
2033 #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
2034 
2035 /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
2036    Parses GET/POST/COOKIE data and sets global variables */
PHP_FUNCTION(mb_parse_str)2037 PHP_FUNCTION(mb_parse_str)
2038 {
2039 	zval *track_vars_array = NULL;
2040 	char *encstr = NULL;
2041 	int encstr_len;
2042 	php_mb_encoding_handler_info_t info;
2043 	const mbfl_encoding *detected;
2044 
2045 	track_vars_array = NULL;
2046 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
2047 		return;
2048 	}
2049 
2050 	if (track_vars_array != NULL) {
2051 		/* Clear out the array */
2052 		zval_dtor(track_vars_array);
2053 		array_init(track_vars_array);
2054 	}
2055 
2056 	encstr = estrndup(encstr, encstr_len);
2057 
2058 	info.data_type              = PARSE_STRING;
2059 	info.separator              = PG(arg_separator).input;
2060 	info.report_errors          = 1;
2061 	info.to_encoding            = MBSTRG(current_internal_encoding);
2062 	info.to_language            = MBSTRG(language);
2063 	info.from_encodings         = MBSTRG(http_input_list);
2064 	info.num_from_encodings     = MBSTRG(http_input_list_size);
2065 	info.from_language          = MBSTRG(language);
2066 
2067 	if (track_vars_array != NULL) {
2068 		detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
2069 	} else {
2070 		zval tmp;
2071 		if (!EG(active_symbol_table)) {
2072 			zend_rebuild_symbol_table(TSRMLS_C);
2073 		}
2074 		Z_ARRVAL(tmp) = EG(active_symbol_table);
2075 		detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr TSRMLS_CC);
2076 	}
2077 
2078 	MBSTRG(http_input_identify) = detected;
2079 
2080 	RETVAL_BOOL(detected);
2081 
2082 	if (encstr != NULL) efree(encstr);
2083 }
2084 /* }}} */
2085 
2086 /* {{{ proto string mb_output_handler(string contents, int status)
2087    Returns string in output buffer converted to the http_output encoding */
PHP_FUNCTION(mb_output_handler)2088 PHP_FUNCTION(mb_output_handler)
2089 {
2090 	char *arg_string;
2091 	int arg_string_len;
2092 	long arg_status;
2093 	mbfl_string string, result;
2094 	const char *charset;
2095 	char *p;
2096 	const mbfl_encoding *encoding;
2097 	int last_feed, len;
2098 	unsigned char send_text_mimetype = 0;
2099 	char *s, *mimetype = NULL;
2100 
2101 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
2102 		return;
2103 	}
2104 
2105 	encoding = MBSTRG(current_http_output_encoding);
2106 
2107  	/* start phase only */
2108  	if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
2109  		/* delete the converter just in case. */
2110  		if (MBSTRG(outconv)) {
2111 			MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2112  			mbfl_buffer_converter_delete(MBSTRG(outconv));
2113  			MBSTRG(outconv) = NULL;
2114   		}
2115 		if (encoding == &mbfl_encoding_pass) {
2116 			RETURN_STRINGL(arg_string, arg_string_len, 1);
2117 		}
2118 
2119 		/* analyze mime type */
2120 		if (SG(sapi_headers).mimetype &&
2121 			_php_mb_match_regex(
2122 				MBSTRG(http_output_conv_mimetypes),
2123 				SG(sapi_headers).mimetype,
2124 				strlen(SG(sapi_headers).mimetype))) {
2125 			if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
2126 				mimetype = estrdup(SG(sapi_headers).mimetype);
2127 			} else {
2128 				mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
2129 			}
2130 			send_text_mimetype = 1;
2131 		} else if (SG(sapi_headers).send_default_content_type) {
2132 			mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
2133 		}
2134 
2135  		/* if content-type is not yet set, set it and activate the converter */
2136  		if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
2137 			charset = encoding->mime_name;
2138 			if (charset) {
2139 				len = spprintf( &p, 0, "Content-Type: %s; charset=%s",  mimetype, charset );
2140 				if (sapi_add_header(p, len, 0) != FAILURE) {
2141 					SG(sapi_headers).send_default_content_type = 0;
2142 				}
2143 			}
2144  			/* activate the converter */
2145  			MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0);
2146 			if (send_text_mimetype){
2147 				efree(mimetype);
2148 			}
2149  		}
2150   	}
2151 
2152  	/* just return if the converter is not activated. */
2153  	if (MBSTRG(outconv) == NULL) {
2154 		RETURN_STRINGL(arg_string, arg_string_len, 1);
2155 	}
2156 
2157  	/* flag */
2158  	last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
2159  	/* mode */
2160  	mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
2161  	mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
2162 
2163  	/* feed the string */
2164  	mbfl_string_init(&string);
2165  	string.no_language = MBSTRG(language);
2166  	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2167  	string.val = (unsigned char *)arg_string;
2168  	string.len = arg_string_len;
2169  	mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2170  	if (last_feed) {
2171  		mbfl_buffer_converter_flush(MBSTRG(outconv));
2172 	}
2173  	/* get the converter output, and return it */
2174  	mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2175  	RETVAL_STRINGL((char *)result.val, result.len, 0);		/* the string is already strdup()'ed */
2176 
2177  	/* delete the converter if it is the last feed. */
2178  	if (last_feed) {
2179 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2180 		mbfl_buffer_converter_delete(MBSTRG(outconv));
2181 		MBSTRG(outconv) = NULL;
2182 	}
2183 }
2184 /* }}} */
2185 
2186 /* {{{ proto int mb_strlen(string str [, string encoding])
2187    Get character numbers of a string */
PHP_FUNCTION(mb_strlen)2188 PHP_FUNCTION(mb_strlen)
2189 {
2190 	int n;
2191 	mbfl_string string;
2192 	char *enc_name = NULL;
2193 	int enc_name_len;
2194 
2195 	mbfl_string_init(&string);
2196 
2197 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2198 		RETURN_FALSE;
2199 	}
2200 
2201 	string.no_language = MBSTRG(language);
2202 	if (enc_name == NULL) {
2203 		string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2204 	} else {
2205 		string.no_encoding = mbfl_name2no_encoding(enc_name);
2206 		if (string.no_encoding == mbfl_no_encoding_invalid) {
2207 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2208 			RETURN_FALSE;
2209 		}
2210 	}
2211 
2212 	n = mbfl_strlen(&string);
2213 	if (n >= 0) {
2214 		RETVAL_LONG(n);
2215 	} else {
2216 		RETVAL_FALSE;
2217 	}
2218 }
2219 /* }}} */
2220 
2221 /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2222    Find position of first occurrence of a string within another */
PHP_FUNCTION(mb_strpos)2223 PHP_FUNCTION(mb_strpos)
2224 {
2225 	int n, reverse = 0;
2226 	long offset;
2227 	mbfl_string haystack, needle;
2228 	char *enc_name = NULL;
2229 	int enc_name_len;
2230 
2231 	mbfl_string_init(&haystack);
2232 	mbfl_string_init(&needle);
2233 	haystack.no_language = MBSTRG(language);
2234 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2235 	needle.no_language = MBSTRG(language);
2236 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2237 	offset = 0;
2238 
2239 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2240 		RETURN_FALSE;
2241 	}
2242 
2243 	if (enc_name != NULL) {
2244 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2245 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2246 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2247 			RETURN_FALSE;
2248 		}
2249 	}
2250 
2251 	if (offset < 0 || offset > mbfl_strlen(&haystack)) {
2252 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
2253 		RETURN_FALSE;
2254 	}
2255 	if (needle.len == 0) {
2256 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2257 		RETURN_FALSE;
2258 	}
2259 
2260 	n = mbfl_strpos(&haystack, &needle, offset, reverse);
2261 	if (n >= 0) {
2262 		RETVAL_LONG(n);
2263 	} else {
2264 		switch (-n) {
2265 		case 1:
2266 			break;
2267 		case 2:
2268 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length");
2269 			break;
2270 		case 4:
2271 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error");
2272 			break;
2273 		case 8:
2274 			php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty");
2275 			break;
2276 		default:
2277 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos");
2278 			break;
2279 		}
2280 		RETVAL_FALSE;
2281 	}
2282 }
2283 /* }}} */
2284 
2285 /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2286    Find position of last occurrence of a string within another */
PHP_FUNCTION(mb_strrpos)2287 PHP_FUNCTION(mb_strrpos)
2288 {
2289 	int n;
2290 	mbfl_string haystack, needle;
2291 	char *enc_name = NULL;
2292 	int enc_name_len;
2293 	zval **zoffset = NULL;
2294 	long offset = 0, str_flg;
2295 	char *enc_name2 = NULL;
2296 	int enc_name_len2;
2297 
2298 	mbfl_string_init(&haystack);
2299 	mbfl_string_init(&needle);
2300 	haystack.no_language = MBSTRG(language);
2301 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2302 	needle.no_language = MBSTRG(language);
2303 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2304 
2305 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2306 		RETURN_FALSE;
2307 	}
2308 
2309 	if (zoffset) {
2310 		if (Z_TYPE_PP(zoffset) == IS_STRING) {
2311 			enc_name2     = Z_STRVAL_PP(zoffset);
2312 			enc_name_len2 = Z_STRLEN_PP(zoffset);
2313 			str_flg       = 1;
2314 
2315 			if (enc_name2 != NULL) {
2316 				switch (*enc_name2) {
2317 				case '0':
2318 				case '1':
2319 				case '2':
2320 				case '3':
2321 				case '4':
2322 				case '5':
2323 				case '6':
2324 				case '7':
2325 				case '8':
2326 				case '9':
2327 				case ' ':
2328 				case '-':
2329 				case '.':
2330 					break;
2331 				default :
2332 					str_flg = 0;
2333 					break;
2334 				}
2335 			}
2336 
2337 			if (str_flg) {
2338 				convert_to_long_ex(zoffset);
2339 				offset   = Z_LVAL_PP(zoffset);
2340 			} else {
2341 				enc_name     = enc_name2;
2342 				enc_name_len = enc_name_len2;
2343 			}
2344 		} else {
2345 			convert_to_long_ex(zoffset);
2346 			offset = Z_LVAL_PP(zoffset);
2347 		}
2348 	}
2349 
2350 	if (enc_name != NULL) {
2351 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2352 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2353 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2354 			RETURN_FALSE;
2355 		}
2356 	}
2357 
2358 	if (haystack.len <= 0) {
2359 		RETURN_FALSE;
2360 	}
2361 	if (needle.len <= 0) {
2362 		RETURN_FALSE;
2363 	}
2364 
2365 	{
2366 		int haystack_char_len = mbfl_strlen(&haystack);
2367 		if ((offset > 0 && offset > haystack_char_len) ||
2368 			(offset < 0 && -offset > haystack_char_len)) {
2369 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
2370 			RETURN_FALSE;
2371 		}
2372 	}
2373 
2374 	n = mbfl_strpos(&haystack, &needle, offset, 1);
2375 	if (n >= 0) {
2376 		RETVAL_LONG(n);
2377 	} else {
2378 		RETVAL_FALSE;
2379 	}
2380 }
2381 /* }}} */
2382 
2383 /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2384    Finds position of first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stripos)2385 PHP_FUNCTION(mb_stripos)
2386 {
2387 	int n;
2388 	long offset;
2389 	mbfl_string haystack, needle;
2390 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2391 	int from_encoding_len;
2392 	n = -1;
2393 	offset = 0;
2394 
2395 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2396 		RETURN_FALSE;
2397 	}
2398 	if (needle.len == 0) {
2399 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2400 		RETURN_FALSE;
2401 	}
2402 	n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2403 
2404 	if (n >= 0) {
2405 		RETVAL_LONG(n);
2406 	} else {
2407 		RETVAL_FALSE;
2408 	}
2409 }
2410 /* }}} */
2411 
2412 /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2413    Finds position of last occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_strripos)2414 PHP_FUNCTION(mb_strripos)
2415 {
2416 	int n;
2417 	long offset;
2418 	mbfl_string haystack, needle;
2419 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2420 	int from_encoding_len;
2421 	n = -1;
2422 	offset = 0;
2423 
2424 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2425 		RETURN_FALSE;
2426 	}
2427 
2428 	n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2429 
2430 	if (n >= 0) {
2431 		RETVAL_LONG(n);
2432 	} else {
2433 		RETVAL_FALSE;
2434 	}
2435 }
2436 /* }}} */
2437 
2438 /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2439    Finds first occurrence of a string within another */
PHP_FUNCTION(mb_strstr)2440 PHP_FUNCTION(mb_strstr)
2441 {
2442 	int n, len, mblen;
2443 	mbfl_string haystack, needle, result, *ret = NULL;
2444 	char *enc_name = NULL;
2445 	int enc_name_len;
2446 	zend_bool part = 0;
2447 
2448 	mbfl_string_init(&haystack);
2449 	mbfl_string_init(&needle);
2450 	haystack.no_language = MBSTRG(language);
2451 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2452 	needle.no_language = MBSTRG(language);
2453 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2454 
2455 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2456 		RETURN_FALSE;
2457 	}
2458 
2459 	if (enc_name != NULL) {
2460 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2461 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2462 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2463 			RETURN_FALSE;
2464 		}
2465 	}
2466 
2467 	if (needle.len <= 0) {
2468 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2469 		RETURN_FALSE;
2470 	}
2471 	n = mbfl_strpos(&haystack, &needle, 0, 0);
2472 	if (n >= 0) {
2473 		mblen = mbfl_strlen(&haystack);
2474 		if (part) {
2475 			ret = mbfl_substr(&haystack, &result, 0, n);
2476 			if (ret != NULL) {
2477 				RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2478 			} else {
2479 				RETVAL_FALSE;
2480 			}
2481 		} else {
2482 			len = (mblen - n);
2483 			ret = mbfl_substr(&haystack, &result, n, len);
2484 			if (ret != NULL) {
2485 				RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2486 			} else {
2487 				RETVAL_FALSE;
2488 			}
2489 		}
2490 	} else {
2491 		RETVAL_FALSE;
2492 	}
2493 }
2494 /* }}} */
2495 
2496 /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2497    Finds the last occurrence of a character in a string within another */
PHP_FUNCTION(mb_strrchr)2498 PHP_FUNCTION(mb_strrchr)
2499 {
2500 	int n, len, mblen;
2501 	mbfl_string haystack, needle, result, *ret = NULL;
2502 	char *enc_name = NULL;
2503 	int enc_name_len;
2504 	zend_bool part = 0;
2505 
2506 	mbfl_string_init(&haystack);
2507 	mbfl_string_init(&needle);
2508 	haystack.no_language = MBSTRG(language);
2509 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2510 	needle.no_language = MBSTRG(language);
2511 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2512 
2513 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2514 		RETURN_FALSE;
2515 	}
2516 
2517 	if (enc_name != NULL) {
2518 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2519 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2520 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2521 			RETURN_FALSE;
2522 		}
2523 	}
2524 
2525 	if (haystack.len <= 0) {
2526 		RETURN_FALSE;
2527 	}
2528 	if (needle.len <= 0) {
2529 		RETURN_FALSE;
2530 	}
2531 	n = mbfl_strpos(&haystack, &needle, 0, 1);
2532 	if (n >= 0) {
2533 		mblen = mbfl_strlen(&haystack);
2534 		if (part) {
2535 			ret = mbfl_substr(&haystack, &result, 0, n);
2536 			if (ret != NULL) {
2537 				RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2538 			} else {
2539 				RETVAL_FALSE;
2540 			}
2541 		} else {
2542 			len = (mblen - n);
2543 			ret = mbfl_substr(&haystack, &result, n, len);
2544 			if (ret != NULL) {
2545 				RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2546 			} else {
2547 				RETVAL_FALSE;
2548 			}
2549 		}
2550 	} else {
2551 		RETVAL_FALSE;
2552 	}
2553 }
2554 /* }}} */
2555 
2556 /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2557    Finds first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stristr)2558 PHP_FUNCTION(mb_stristr)
2559 {
2560 	zend_bool part = 0;
2561 	unsigned int from_encoding_len, len, mblen;
2562 	int n;
2563 	mbfl_string haystack, needle, result, *ret = NULL;
2564 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2565 	mbfl_string_init(&haystack);
2566 	mbfl_string_init(&needle);
2567 	haystack.no_language = MBSTRG(language);
2568 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2569 	needle.no_language = MBSTRG(language);
2570 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2571 
2572 
2573 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2574 		RETURN_FALSE;
2575 	}
2576 
2577 	if (!needle.len) {
2578 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2579 		RETURN_FALSE;
2580 	}
2581 
2582 	haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2583 	if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2584 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2585 		RETURN_FALSE;
2586 	}
2587 
2588 	n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2589 
2590 	if (n <0) {
2591 		RETURN_FALSE;
2592 	}
2593 
2594 	mblen = mbfl_strlen(&haystack);
2595 
2596 	if (part) {
2597 		ret = mbfl_substr(&haystack, &result, 0, n);
2598 		if (ret != NULL) {
2599 			RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2600 		} else {
2601 			RETVAL_FALSE;
2602 		}
2603 	} else {
2604 		len = (mblen - n);
2605 		ret = mbfl_substr(&haystack, &result, n, len);
2606 		if (ret != NULL) {
2607 			RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2608 		} else {
2609 			RETVAL_FALSE;
2610 		}
2611 	}
2612 }
2613 /* }}} */
2614 
2615 /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2616    Finds the last occurrence of a character in a string within another, case insensitive */
PHP_FUNCTION(mb_strrichr)2617 PHP_FUNCTION(mb_strrichr)
2618 {
2619 	zend_bool part = 0;
2620 	int n, from_encoding_len, len, mblen;
2621 	mbfl_string haystack, needle, result, *ret = NULL;
2622 	const char *from_encoding = MBSTRG(current_internal_encoding)->name;
2623 	mbfl_string_init(&haystack);
2624 	mbfl_string_init(&needle);
2625 	haystack.no_language = MBSTRG(language);
2626 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2627 	needle.no_language = MBSTRG(language);
2628 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2629 
2630 
2631 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2632 		RETURN_FALSE;
2633 	}
2634 
2635 	haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2636 	if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2637 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2638 		RETURN_FALSE;
2639 	}
2640 
2641 	n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2642 
2643 	if (n <0) {
2644 		RETURN_FALSE;
2645 	}
2646 
2647 	mblen = mbfl_strlen(&haystack);
2648 
2649 	if (part) {
2650 		ret = mbfl_substr(&haystack, &result, 0, n);
2651 		if (ret != NULL) {
2652 			RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2653 		} else {
2654 			RETVAL_FALSE;
2655 		}
2656 	} else {
2657 		len = (mblen - n);
2658 		ret = mbfl_substr(&haystack, &result, n, len);
2659 		if (ret != NULL) {
2660 			RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2661 		} else {
2662 			RETVAL_FALSE;
2663 		}
2664 	}
2665 }
2666 /* }}} */
2667 
2668 /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2669    Count the number of substring occurrences */
PHP_FUNCTION(mb_substr_count)2670 PHP_FUNCTION(mb_substr_count)
2671 {
2672 	int n;
2673 	mbfl_string haystack, needle;
2674 	char *enc_name = NULL;
2675 	int enc_name_len;
2676 
2677 	mbfl_string_init(&haystack);
2678 	mbfl_string_init(&needle);
2679 	haystack.no_language = MBSTRG(language);
2680 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2681 	needle.no_language = MBSTRG(language);
2682 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2683 
2684 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
2685 		return;
2686 	}
2687 
2688 	if (enc_name != NULL) {
2689 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2690 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2691 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2692 			RETURN_FALSE;
2693 		}
2694 	}
2695 
2696 	if (needle.len <= 0) {
2697 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring");
2698 		RETURN_FALSE;
2699 	}
2700 
2701 	n = mbfl_substr_count(&haystack, &needle);
2702 	if (n >= 0) {
2703 		RETVAL_LONG(n);
2704 	} else {
2705 		RETVAL_FALSE;
2706 	}
2707 }
2708 /* }}} */
2709 
2710 /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2711    Returns part of a string */
PHP_FUNCTION(mb_substr)2712 PHP_FUNCTION(mb_substr)
2713 {
2714 	size_t argc = ZEND_NUM_ARGS();
2715 	char *str, *encoding;
2716 	long from, len;
2717 	int mblen, str_len, encoding_len;
2718 	zval **z_len = NULL;
2719 	mbfl_string string, result, *ret;
2720 
2721 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|Zs", &str, &str_len, &from, &z_len, &encoding, &encoding_len) == FAILURE) {
2722 		return;
2723 	}
2724 
2725 	mbfl_string_init(&string);
2726 	string.no_language = MBSTRG(language);
2727 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2728 
2729 	if (argc == 4) {
2730 		string.no_encoding = mbfl_name2no_encoding(encoding);
2731 		if (string.no_encoding == mbfl_no_encoding_invalid) {
2732 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2733 			RETURN_FALSE;
2734 		}
2735 	}
2736 
2737 	string.val = (unsigned char *)str;
2738 	string.len = str_len;
2739 
2740 	if (argc < 3 || Z_TYPE_PP(z_len) == IS_NULL) {
2741 		len = str_len;
2742 	} else {
2743 		convert_to_long_ex(z_len);
2744 		len = Z_LVAL_PP(z_len);
2745 	}
2746 
2747 	/* measures length */
2748 	mblen = 0;
2749 	if (from < 0 || len < 0) {
2750 		mblen = mbfl_strlen(&string);
2751 	}
2752 
2753 	/* if "from" position is negative, count start position from the end
2754 	 * of the string
2755 	 */
2756 	if (from < 0) {
2757 		from = mblen + from;
2758 		if (from < 0) {
2759 			from = 0;
2760 		}
2761 	}
2762 
2763 	/* if "length" position is negative, set it to the length
2764 	 * needed to stop that many chars from the end of the string
2765 	 */
2766 	if (len < 0) {
2767 		len = (mblen - from) + len;
2768 		if (len < 0) {
2769 			len = 0;
2770 		}
2771 	}
2772 
2773 	if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2774 		&& (from >= mbfl_strlen(&string))) {
2775 		RETURN_FALSE;
2776 	}
2777 
2778 	ret = mbfl_substr(&string, &result, from, len);
2779 	if (NULL == ret) {
2780 		RETURN_FALSE;
2781 	}
2782 
2783 	RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2784 }
2785 /* }}} */
2786 
2787 /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
2788    Returns part of a string */
PHP_FUNCTION(mb_strcut)2789 PHP_FUNCTION(mb_strcut)
2790 {
2791 	size_t argc = ZEND_NUM_ARGS();
2792 	char *encoding;
2793 	long from, len;
2794 	int encoding_len;
2795 	zval **z_len = NULL;
2796 	mbfl_string string, result, *ret;
2797 
2798 	mbfl_string_init(&string);
2799 	string.no_language = MBSTRG(language);
2800 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2801 
2802 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|Zs", (char **)&string.val, (int **)&string.len, &from, &z_len, &encoding, &encoding_len) == FAILURE) {
2803 		return;
2804 	}
2805 
2806 	if (argc == 4) {
2807 		string.no_encoding = mbfl_name2no_encoding(encoding);
2808 		if (string.no_encoding == mbfl_no_encoding_invalid) {
2809 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2810 			RETURN_FALSE;
2811 		}
2812 	}
2813 
2814 	if (argc < 3 || Z_TYPE_PP(z_len) == IS_NULL) {
2815 		len = string.len;
2816 	} else {
2817 		convert_to_long_ex(z_len);
2818 		len = Z_LVAL_PP(z_len);
2819 	}
2820 
2821 	/* if "from" position is negative, count start position from the end
2822 	 * of the string
2823 	 */
2824 	if (from < 0) {
2825 		from = string.len + from;
2826 		if (from < 0) {
2827 			from = 0;
2828 		}
2829 	}
2830 
2831 	/* if "length" position is negative, set it to the length
2832 	 * needed to stop that many chars from the end of the string
2833 	 */
2834 	if (len < 0) {
2835 		len = (string.len - from) + len;
2836 		if (len < 0) {
2837 			len = 0;
2838 		}
2839 	}
2840 
2841 	if ((unsigned int)from > string.len) {
2842 		RETURN_FALSE;
2843 	}
2844 
2845 	ret = mbfl_strcut(&string, &result, from, len);
2846 	if (ret == NULL) {
2847 		RETURN_FALSE;
2848 	}
2849 
2850 	RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2851 }
2852 /* }}} */
2853 
2854 /* {{{ proto int mb_strwidth(string str [, string encoding])
2855    Gets terminal width of a string */
PHP_FUNCTION(mb_strwidth)2856 PHP_FUNCTION(mb_strwidth)
2857 {
2858 	int n;
2859 	mbfl_string string;
2860 	char *enc_name = NULL;
2861 	int enc_name_len;
2862 
2863 	mbfl_string_init(&string);
2864 
2865 	string.no_language = MBSTRG(language);
2866 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2867 
2868 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2869 		return;
2870 	}
2871 
2872 	if (enc_name != NULL) {
2873 		string.no_encoding = mbfl_name2no_encoding(enc_name);
2874 		if (string.no_encoding == mbfl_no_encoding_invalid) {
2875 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2876 			RETURN_FALSE;
2877 		}
2878 	}
2879 
2880 	n = mbfl_strwidth(&string);
2881 	if (n >= 0) {
2882 		RETVAL_LONG(n);
2883 	} else {
2884 		RETVAL_FALSE;
2885 	}
2886 }
2887 /* }}} */
2888 
2889 /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
2890    Trim the string in terminal width */
PHP_FUNCTION(mb_strimwidth)2891 PHP_FUNCTION(mb_strimwidth)
2892 {
2893 	char *str, *trimmarker, *encoding;
2894 	long from, width;
2895 	int str_len, trimmarker_len, encoding_len;
2896 	mbfl_string string, result, marker, *ret;
2897 
2898 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
2899 		return;
2900 	}
2901 
2902 	mbfl_string_init(&string);
2903 	mbfl_string_init(&marker);
2904 	string.no_language = MBSTRG(language);
2905 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2906 	marker.no_language = MBSTRG(language);
2907 	marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2908 	marker.val = NULL;
2909 	marker.len = 0;
2910 
2911 	if (ZEND_NUM_ARGS() == 5) {
2912 		string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
2913 		if (string.no_encoding == mbfl_no_encoding_invalid) {
2914 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2915 			RETURN_FALSE;
2916 		}
2917 	}
2918 
2919 	string.val = (unsigned char *)str;
2920 	string.len = str_len;
2921 
2922 	if (from < 0 || from > str_len) {
2923 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of range");
2924 		RETURN_FALSE;
2925 	}
2926 
2927 	if (width < 0) {
2928 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value");
2929 		RETURN_FALSE;
2930 	}
2931 
2932 	if (ZEND_NUM_ARGS() >= 4) {
2933 		marker.val = (unsigned char *)trimmarker;
2934 		marker.len = trimmarker_len;
2935 	}
2936 
2937 	ret = mbfl_strimwidth(&string, &marker, &result, from, width);
2938 
2939 	if (ret == NULL) {
2940 		RETURN_FALSE;
2941 	}
2942 
2943 	RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2944 }
2945 /* }}} */
2946 
2947 /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
php_mb_convert_encoding(const char * input,size_t length,const char * _to_encoding,const char * _from_encodings,size_t * output_len TSRMLS_DC)2948 MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC)
2949 {
2950 	mbfl_string string, result, *ret;
2951 	const mbfl_encoding *from_encoding, *to_encoding;
2952 	mbfl_buffer_converter *convd;
2953 	size_t size;
2954 	const mbfl_encoding **list;
2955 	char *output=NULL;
2956 
2957 	if (output_len) {
2958 		*output_len = 0;
2959 	}
2960 	if (!input) {
2961 		return NULL;
2962 	}
2963 	/* new encoding */
2964 	if (_to_encoding && strlen(_to_encoding)) {
2965 		to_encoding = mbfl_name2encoding(_to_encoding);
2966 		if (!to_encoding) {
2967 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
2968 			return NULL;
2969 		}
2970 	} else {
2971 		to_encoding = MBSTRG(current_internal_encoding);
2972 	}
2973 
2974 	/* initialize string */
2975 	mbfl_string_init(&string);
2976 	mbfl_string_init(&result);
2977 	from_encoding = MBSTRG(current_internal_encoding);
2978 	string.no_encoding = from_encoding->no_encoding;
2979 	string.no_language = MBSTRG(language);
2980 	string.val = (unsigned char *)input;
2981 	string.len = length;
2982 
2983 	/* pre-conversion encoding */
2984 	if (_from_encodings) {
2985 		list = NULL;
2986 		size = 0;
2987 		php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
2988 		if (size == 1) {
2989 			from_encoding = *list;
2990 			string.no_encoding = from_encoding->no_encoding;
2991 		} else if (size > 1) {
2992 			/* auto detect */
2993 			from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection));
2994 			if (from_encoding) {
2995 				string.no_encoding = from_encoding->no_encoding;
2996 			} else {
2997 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
2998 				from_encoding = &mbfl_encoding_pass;
2999 				to_encoding = from_encoding;
3000 				string.no_encoding = from_encoding->no_encoding;
3001 			}
3002 		} else {
3003 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
3004 		}
3005 		if (list != NULL) {
3006 			efree((void *)list);
3007 		}
3008 	}
3009 
3010 	/* initialize converter */
3011 	convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len);
3012 	if (convd == NULL) {
3013 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
3014 		return NULL;
3015 	}
3016 	mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3017 	mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3018 
3019 	/* do it */
3020 	ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3021 	if (ret) {
3022 		if (output_len) {
3023 			*output_len = ret->len;
3024 		}
3025 		output = (char *)ret->val;
3026 	}
3027 
3028 	MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3029 	mbfl_buffer_converter_delete(convd);
3030 	return output;
3031 }
3032 /* }}} */
3033 
3034 /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
3035    Returns converted string in desired encoding */
PHP_FUNCTION(mb_convert_encoding)3036 PHP_FUNCTION(mb_convert_encoding)
3037 {
3038 	char *arg_str, *arg_new;
3039 	int str_len, new_len;
3040 	zval *arg_old;
3041 	int i;
3042 	size_t size, l, n;
3043 	char *_from_encodings = NULL, *ret, *s_free = NULL;
3044 
3045 	zval **hash_entry;
3046 	HashTable *target_hash;
3047 
3048 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
3049 		return;
3050 	}
3051 
3052 	if (ZEND_NUM_ARGS() == 3) {
3053 		switch (Z_TYPE_P(arg_old)) {
3054 		case IS_ARRAY:
3055 			target_hash = Z_ARRVAL_P(arg_old);
3056 			zend_hash_internal_pointer_reset(target_hash);
3057 			i = zend_hash_num_elements(target_hash);
3058 			_from_encodings = NULL;
3059 
3060 			while (i > 0) {
3061 				if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
3062 					break;
3063 				}
3064 
3065 				convert_to_string_ex(hash_entry);
3066 
3067 				if ( _from_encodings) {
3068 					l = strlen(_from_encodings);
3069 					n = strlen(Z_STRVAL_PP(hash_entry));
3070 					_from_encodings = erealloc(_from_encodings, l+n+2);
3071 					strcpy(_from_encodings+l, ",");
3072 					strcpy(_from_encodings+l+1, Z_STRVAL_PP(hash_entry));
3073 				} else {
3074 					_from_encodings = estrdup(Z_STRVAL_PP(hash_entry));
3075 				}
3076 
3077 				zend_hash_move_forward(target_hash);
3078 				i--;
3079 			}
3080 
3081 			if (_from_encodings != NULL && !strlen(_from_encodings)) {
3082 				efree(_from_encodings);
3083 				_from_encodings = NULL;
3084 			}
3085 			s_free = _from_encodings;
3086 			break;
3087 		default:
3088 			convert_to_string(arg_old);
3089 			_from_encodings = Z_STRVAL_P(arg_old);
3090 			break;
3091 		}
3092 	}
3093 
3094 	/* new encoding */
3095 	ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size TSRMLS_CC);
3096 	if (ret != NULL) {
3097 		RETVAL_STRINGL(ret, size, 0);		/* the string is already strdup()'ed */
3098 	} else {
3099 		RETVAL_FALSE;
3100 	}
3101 
3102 	if ( s_free) {
3103 		efree(s_free);
3104 	}
3105 }
3106 /* }}} */
3107 
3108 /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
3109    Returns a case-folded version of sourcestring */
PHP_FUNCTION(mb_convert_case)3110 PHP_FUNCTION(mb_convert_case)
3111 {
3112 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3113 	char *str;
3114 	int str_len, from_encoding_len;
3115 	long case_mode = 0;
3116 	char *newstr;
3117 	size_t ret_len;
3118 
3119 	RETVAL_FALSE;
3120 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len,
3121 				&case_mode, &from_encoding, &from_encoding_len) == FAILURE)
3122 		RETURN_FALSE;
3123 
3124 	newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3125 
3126 	if (newstr) {
3127 		RETVAL_STRINGL(newstr, ret_len, 0);
3128 	}
3129 }
3130 /* }}} */
3131 
3132 /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
3133  *  Returns a uppercased version of sourcestring
3134  */
PHP_FUNCTION(mb_strtoupper)3135 PHP_FUNCTION(mb_strtoupper)
3136 {
3137 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3138 	char *str;
3139 	int str_len, from_encoding_len;
3140 	char *newstr;
3141 	size_t ret_len;
3142 
3143 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
3144 				&from_encoding, &from_encoding_len) == FAILURE) {
3145 		return;
3146 	}
3147 	newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3148 
3149 	if (newstr) {
3150 		RETURN_STRINGL(newstr, ret_len, 0);
3151 	}
3152 	RETURN_FALSE;
3153 }
3154 /* }}} */
3155 
3156 /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
3157  *  Returns a lowercased version of sourcestring
3158  */
PHP_FUNCTION(mb_strtolower)3159 PHP_FUNCTION(mb_strtolower)
3160 {
3161 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3162 	char *str;
3163 	int str_len, from_encoding_len;
3164 	char *newstr;
3165 	size_t ret_len;
3166 
3167 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
3168 				&from_encoding, &from_encoding_len) == FAILURE) {
3169 		return;
3170 	}
3171 	newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3172 
3173 	if (newstr) {
3174 		RETURN_STRINGL(newstr, ret_len, 0);
3175 	}
3176 	RETURN_FALSE;
3177 }
3178 /* }}} */
3179 
3180 /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3181    Encodings of the given string is returned (as a string) */
PHP_FUNCTION(mb_detect_encoding)3182 PHP_FUNCTION(mb_detect_encoding)
3183 {
3184 	char *str;
3185 	int str_len;
3186 	zend_bool strict=0;
3187 	zval *encoding_list;
3188 
3189 	mbfl_string string;
3190 	const mbfl_encoding *ret;
3191 	const mbfl_encoding **elist, **list;
3192 	size_t size;
3193 
3194 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3195 		return;
3196 	}
3197 
3198 	/* make encoding list */
3199 	list = NULL;
3200 	size = 0;
3201 	if (ZEND_NUM_ARGS() >= 2 && !ZVAL_IS_NULL(encoding_list)) {
3202 		switch (Z_TYPE_P(encoding_list)) {
3203 		case IS_ARRAY:
3204 			if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) {
3205 				if (list) {
3206 					efree(list);
3207 					list = NULL;
3208 					size = 0;
3209 				}
3210 			}
3211 			break;
3212 		default:
3213 			convert_to_string(encoding_list);
3214 			if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) {
3215 				if (list) {
3216 					efree(list);
3217 					list = NULL;
3218 					size = 0;
3219 				}
3220 			}
3221 			break;
3222 		}
3223 		if (size <= 0) {
3224 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument");
3225 		}
3226 	}
3227 
3228 	if (ZEND_NUM_ARGS() < 3) {
3229 		strict = (zend_bool)MBSTRG(strict_detection);
3230 	}
3231 
3232 	if (size > 0 && list != NULL) {
3233 		elist = list;
3234 	} else {
3235 		elist = MBSTRG(current_detect_order_list);
3236 		size = MBSTRG(current_detect_order_list_size);
3237 	}
3238 
3239 	mbfl_string_init(&string);
3240 	string.no_language = MBSTRG(language);
3241 	string.val = (unsigned char *)str;
3242 	string.len = str_len;
3243 	ret = mbfl_identify_encoding2(&string, elist, size, strict);
3244 
3245 	if (list != NULL) {
3246 		efree((void *)list);
3247 	}
3248 
3249 	if (ret == NULL) {
3250 		RETURN_FALSE;
3251 	}
3252 
3253 	RETVAL_STRING((char *)ret->name, 1);
3254 }
3255 /* }}} */
3256 
3257 /* {{{ proto mixed mb_list_encodings()
3258    Returns an array of all supported entity encodings */
PHP_FUNCTION(mb_list_encodings)3259 PHP_FUNCTION(mb_list_encodings)
3260 {
3261 	const mbfl_encoding **encodings;
3262 	const mbfl_encoding *encoding;
3263 	int i;
3264 
3265 	array_init(return_value);
3266 	i = 0;
3267 	encodings = mbfl_get_supported_encodings();
3268 	while ((encoding = encodings[i++]) != NULL) {
3269 		add_next_index_string(return_value, (char *) encoding->name, 1);
3270 	}
3271 }
3272 /* }}} */
3273 
3274 /* {{{ proto array mb_encoding_aliases(string encoding)
3275    Returns an array of the aliases of a given encoding name */
PHP_FUNCTION(mb_encoding_aliases)3276 PHP_FUNCTION(mb_encoding_aliases)
3277 {
3278 	const mbfl_encoding *encoding;
3279 	char *name = NULL;
3280 	int name_len;
3281 
3282 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
3283 		RETURN_FALSE;
3284 	}
3285 
3286 	encoding = mbfl_name2encoding(name);
3287 	if (!encoding) {
3288 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
3289 		RETURN_FALSE;
3290 	}
3291 
3292 	array_init(return_value);
3293 	if (encoding->aliases != NULL) {
3294 		const char **alias;
3295 		for (alias = *encoding->aliases; *alias; ++alias) {
3296 			add_next_index_string(return_value, (char *)*alias, 1);
3297 		}
3298 	}
3299 }
3300 /* }}} */
3301 
3302 /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3303    Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
PHP_FUNCTION(mb_encode_mimeheader)3304 PHP_FUNCTION(mb_encode_mimeheader)
3305 {
3306 	enum mbfl_no_encoding charset, transenc;
3307 	mbfl_string  string, result, *ret;
3308 	char *charset_name = NULL;
3309 	int charset_name_len;
3310 	char *trans_enc_name = NULL;
3311 	int trans_enc_name_len;
3312 	char *linefeed = "\r\n";
3313 	int linefeed_len;
3314 	long indent = 0;
3315 
3316 	mbfl_string_init(&string);
3317 	string.no_language = MBSTRG(language);
3318 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3319 
3320 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3321 		return;
3322 	}
3323 
3324 	charset = mbfl_no_encoding_pass;
3325 	transenc = mbfl_no_encoding_base64;
3326 
3327 	if (charset_name != NULL) {
3328 		charset = mbfl_name2no_encoding(charset_name);
3329 		if (charset == mbfl_no_encoding_invalid) {
3330 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3331 			RETURN_FALSE;
3332 		}
3333 	} else {
3334 		const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3335 		if (lang != NULL) {
3336 			charset = lang->mail_charset;
3337 			transenc = lang->mail_header_encoding;
3338 		}
3339 	}
3340 
3341 	if (trans_enc_name != NULL) {
3342 		if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3343 			transenc = mbfl_no_encoding_base64;
3344 		} else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3345 			transenc = mbfl_no_encoding_qprint;
3346 		}
3347 	}
3348 
3349 	mbfl_string_init(&result);
3350 	ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3351 	if (ret != NULL) {
3352 		RETVAL_STRINGL((char *)ret->val, ret->len, 0);	/* the string is already strdup()'ed */
3353 	} else {
3354 		RETVAL_FALSE;
3355 	}
3356 }
3357 /* }}} */
3358 
3359 /* {{{ proto string mb_decode_mimeheader(string string)
3360    Decodes the MIME "encoded-word" in the string */
PHP_FUNCTION(mb_decode_mimeheader)3361 PHP_FUNCTION(mb_decode_mimeheader)
3362 {
3363 	mbfl_string string, result, *ret;
3364 
3365 	mbfl_string_init(&string);
3366 	string.no_language = MBSTRG(language);
3367 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3368 
3369 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
3370 		return;
3371 	}
3372 
3373 	mbfl_string_init(&result);
3374 	ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding);
3375 	if (ret != NULL) {
3376 		RETVAL_STRINGL((char *)ret->val, ret->len, 0);	/* the string is already strdup()'ed */
3377 	} else {
3378 		RETVAL_FALSE;
3379 	}
3380 }
3381 /* }}} */
3382 
3383 /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3384    Conversion between full-width character and half-width character (Japanese) */
PHP_FUNCTION(mb_convert_kana)3385 PHP_FUNCTION(mb_convert_kana)
3386 {
3387 	int opt, i;
3388 	mbfl_string string, result, *ret;
3389 	char *optstr = NULL;
3390 	int optstr_len;
3391 	char *encname = NULL;
3392 	int encname_len;
3393 
3394 	mbfl_string_init(&string);
3395 	string.no_language = MBSTRG(language);
3396 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3397 
3398 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
3399 		return;
3400 	}
3401 
3402 	/* option */
3403 	if (optstr != NULL) {
3404 		char *p = optstr;
3405 		int n = optstr_len;
3406 		i = 0;
3407 		opt = 0;
3408 		while (i < n) {
3409 			i++;
3410 			switch (*p++) {
3411 			case 'A':
3412 				opt |= 0x1;
3413 				break;
3414 			case 'a':
3415 				opt |= 0x10;
3416 				break;
3417 			case 'R':
3418 				opt |= 0x2;
3419 				break;
3420 			case 'r':
3421 				opt |= 0x20;
3422 				break;
3423 			case 'N':
3424 				opt |= 0x4;
3425 				break;
3426 			case 'n':
3427 				opt |= 0x40;
3428 				break;
3429 			case 'S':
3430 				opt |= 0x8;
3431 				break;
3432 			case 's':
3433 				opt |= 0x80;
3434 				break;
3435 			case 'K':
3436 				opt |= 0x100;
3437 				break;
3438 			case 'k':
3439 				opt |= 0x1000;
3440 				break;
3441 			case 'H':
3442 				opt |= 0x200;
3443 				break;
3444 			case 'h':
3445 				opt |= 0x2000;
3446 				break;
3447 			case 'V':
3448 				opt |= 0x800;
3449 				break;
3450 			case 'C':
3451 				opt |= 0x10000;
3452 				break;
3453 			case 'c':
3454 				opt |= 0x20000;
3455 				break;
3456 			case 'M':
3457 				opt |= 0x100000;
3458 				break;
3459 			case 'm':
3460 				opt |= 0x200000;
3461 				break;
3462 			}
3463 		}
3464 	} else {
3465 		opt = 0x900;
3466 	}
3467 
3468 	/* encoding */
3469 	if (encname != NULL) {
3470 		string.no_encoding = mbfl_name2no_encoding(encname);
3471 		if (string.no_encoding == mbfl_no_encoding_invalid) {
3472 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname);
3473 			RETURN_FALSE;
3474 		}
3475 	}
3476 
3477 	ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3478 	if (ret != NULL) {
3479 		RETVAL_STRINGL((char *)ret->val, ret->len, 0);		/* the string is already strdup()'ed */
3480 	} else {
3481 		RETVAL_FALSE;
3482 	}
3483 }
3484 /* }}} */
3485 
3486 #define PHP_MBSTR_STACK_BLOCK_SIZE 32
3487 
3488 /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3489    Converts the string resource in variables to desired encoding */
PHP_FUNCTION(mb_convert_variables)3490 PHP_FUNCTION(mb_convert_variables)
3491 {
3492 	zval ***args, ***stack, **var, **hash_entry, **zfrom_enc;
3493 	HashTable *target_hash;
3494 	mbfl_string string, result, *ret;
3495 	const mbfl_encoding *from_encoding, *to_encoding;
3496 	mbfl_encoding_detector *identd;
3497 	mbfl_buffer_converter *convd;
3498 	int n, to_enc_len, argc, stack_level, stack_max;
3499 	size_t elistsz;
3500 	const mbfl_encoding **elist;
3501 	char *to_enc;
3502 	void *ptmp;
3503 
3504 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3505 		return;
3506 	}
3507 
3508 	/* new encoding */
3509 	to_encoding = mbfl_name2encoding(to_enc);
3510 	if (!to_encoding) {
3511 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3512 		efree(args);
3513 		RETURN_FALSE;
3514 	}
3515 
3516 	/* initialize string */
3517 	mbfl_string_init(&string);
3518 	mbfl_string_init(&result);
3519 	from_encoding = MBSTRG(current_internal_encoding);
3520 	string.no_encoding = from_encoding->no_encoding;
3521 	string.no_language = MBSTRG(language);
3522 
3523 	/* pre-conversion encoding */
3524 	elist = NULL;
3525 	elistsz = 0;
3526 	switch (Z_TYPE_PP(zfrom_enc)) {
3527 	case IS_ARRAY:
3528 		php_mb_parse_encoding_array(*zfrom_enc, &elist, &elistsz, 0 TSRMLS_CC);
3529 		break;
3530 	default:
3531 		convert_to_string_ex(zfrom_enc);
3532 		php_mb_parse_encoding_list(Z_STRVAL_PP(zfrom_enc), Z_STRLEN_PP(zfrom_enc), &elist, &elistsz, 0 TSRMLS_CC);
3533 		break;
3534 	}
3535 	if (elistsz <= 0) {
3536 		from_encoding = &mbfl_encoding_pass;
3537 	} else if (elistsz == 1) {
3538 		from_encoding = *elist;
3539 	} else {
3540 		/* auto detect */
3541 		from_encoding = NULL;
3542 		stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3543 		stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3544 		stack_level = 0;
3545 		identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection));
3546 		if (identd != NULL) {
3547 			n = 0;
3548 			while (n < argc || stack_level > 0) {
3549 				if (stack_level <= 0) {
3550 					var = args[n++];
3551 					if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3552 						target_hash = HASH_OF(*var);
3553 						if (target_hash != NULL) {
3554 							zend_hash_internal_pointer_reset(target_hash);
3555 						}
3556 					}
3557 				} else {
3558 					stack_level--;
3559 					var = stack[stack_level];
3560 				}
3561 				if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3562 					target_hash = HASH_OF(*var);
3563 					if (target_hash != NULL) {
3564 						while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3565 							zend_hash_move_forward(target_hash);
3566 							if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3567 								if (stack_level >= stack_max) {
3568 									stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3569 									ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3570 									stack = (zval ***)ptmp;
3571 								}
3572 								stack[stack_level] = var;
3573 								stack_level++;
3574 								var = hash_entry;
3575 								target_hash = HASH_OF(*var);
3576 								if (target_hash != NULL) {
3577 									zend_hash_internal_pointer_reset(target_hash);
3578 									continue;
3579 								}
3580 							} else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3581 								string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3582 								string.len = Z_STRLEN_PP(hash_entry);
3583 								if (mbfl_encoding_detector_feed(identd, &string)) {
3584 									goto detect_end;		/* complete detecting */
3585 								}
3586 							}
3587 						}
3588 					}
3589 				} else if (Z_TYPE_PP(var) == IS_STRING) {
3590 					string.val = (unsigned char *)Z_STRVAL_PP(var);
3591 					string.len = Z_STRLEN_PP(var);
3592 					if (mbfl_encoding_detector_feed(identd, &string)) {
3593 						goto detect_end;		/* complete detecting */
3594 					}
3595 				}
3596 			}
3597 detect_end:
3598 			from_encoding = mbfl_encoding_detector_judge2(identd);
3599 			mbfl_encoding_detector_delete(identd);
3600 		}
3601 		efree(stack);
3602 
3603 		if (!from_encoding) {
3604 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
3605 			from_encoding = &mbfl_encoding_pass;
3606 		}
3607 	}
3608 	if (elist != NULL) {
3609 		efree((void *)elist);
3610 	}
3611 	/* create converter */
3612 	convd = NULL;
3613 	if (from_encoding != &mbfl_encoding_pass) {
3614 		convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0);
3615 		if (convd == NULL) {
3616 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
3617 			RETURN_FALSE;
3618 		}
3619 		mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3620 		mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3621 	}
3622 
3623 	/* convert */
3624 	if (convd != NULL) {
3625 		stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3626 		stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3627 		stack_level = 0;
3628 		n = 0;
3629 		while (n < argc || stack_level > 0) {
3630 			if (stack_level <= 0) {
3631 				var = args[n++];
3632 				if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3633 					target_hash = HASH_OF(*var);
3634 					if (target_hash != NULL) {
3635 						zend_hash_internal_pointer_reset(target_hash);
3636 					}
3637 				}
3638 			} else {
3639 				stack_level--;
3640 				var = stack[stack_level];
3641 			}
3642 			if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3643 				target_hash = HASH_OF(*var);
3644 				if (target_hash != NULL) {
3645 					while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3646 						zend_hash_move_forward(target_hash);
3647 						if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3648 							if (stack_level >= stack_max) {
3649 								stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3650 								ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3651 								stack = (zval ***)ptmp;
3652 							}
3653 							stack[stack_level] = var;
3654 							stack_level++;
3655 							var = hash_entry;
3656 							SEPARATE_ZVAL(hash_entry);
3657 							target_hash = HASH_OF(*var);
3658 							if (target_hash != NULL) {
3659 								zend_hash_internal_pointer_reset(target_hash);
3660 								continue;
3661 							}
3662 						} else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3663 							string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3664 							string.len = Z_STRLEN_PP(hash_entry);
3665 							ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3666 							if (ret != NULL) {
3667 								if (Z_REFCOUNT_PP(hash_entry) > 1) {
3668 									Z_DELREF_PP(hash_entry);
3669 									MAKE_STD_ZVAL(*hash_entry);
3670 								} else {
3671 									zval_dtor(*hash_entry);
3672 								}
3673 							ZVAL_STRINGL(*hash_entry, (char *)ret->val, ret->len, 0);
3674 						}
3675 					}
3676 				}
3677 			}
3678 		} else if (Z_TYPE_PP(var) == IS_STRING) {
3679 			string.val = (unsigned char *)Z_STRVAL_PP(var);
3680 			string.len = Z_STRLEN_PP(var);
3681 			ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3682 			if (ret != NULL) {
3683 				zval_dtor(*var);
3684 				ZVAL_STRINGL(*var, (char *)ret->val, ret->len, 0);
3685 				}
3686 			}
3687 		}
3688 		efree(stack);
3689 
3690 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3691 		mbfl_buffer_converter_delete(convd);
3692 	}
3693 
3694 	efree(args);
3695 
3696 	if (from_encoding) {
3697 		RETURN_STRING(from_encoding->name, 1);
3698 	} else {
3699 		RETURN_FALSE;
3700 	}
3701 }
3702 /* }}} */
3703 
3704 /* {{{ HTML numeric entity */
3705 /* {{{ static void php_mb_numericentity_exec() */
3706 static void
php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS,int type)3707 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
3708 {
3709 	char *str, *encoding;
3710 	int str_len, encoding_len;
3711 	zval *zconvmap, **hash_entry;
3712 	HashTable *target_hash;
3713 	size_t argc = ZEND_NUM_ARGS();
3714 	int i, *convmap, *mapelm, mapsize=0;
3715 	zend_bool is_hex = 0;
3716 	mbfl_string string, result, *ret;
3717 	enum mbfl_no_encoding no_encoding;
3718 
3719 	if (zend_parse_parameters(argc TSRMLS_CC, "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
3720 		return;
3721 	}
3722 
3723 	mbfl_string_init(&string);
3724 	string.no_language = MBSTRG(language);
3725 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3726 	string.val = (unsigned char *)str;
3727 	string.len = str_len;
3728 
3729 	/* encoding */
3730 	if ((argc == 3 || argc == 4) && encoding_len > 0) {
3731 		no_encoding = mbfl_name2no_encoding(encoding);
3732 		if (no_encoding == mbfl_no_encoding_invalid) {
3733 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
3734 			RETURN_FALSE;
3735 		} else {
3736 			string.no_encoding = no_encoding;
3737 		}
3738 	}
3739 
3740 	if (argc == 4) {
3741 		if (type == 0 && is_hex) {
3742 			type = 2; /* output in hex format */
3743 		}
3744 	}
3745 
3746 	/* conversion map */
3747 	convmap = NULL;
3748 	if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
3749 		target_hash = Z_ARRVAL_P(zconvmap);
3750 		zend_hash_internal_pointer_reset(target_hash);
3751 		i = zend_hash_num_elements(target_hash);
3752 		if (i > 0) {
3753 			convmap = (int *)safe_emalloc(i, sizeof(int), 0);
3754 			mapelm = convmap;
3755 			mapsize = 0;
3756 			while (i > 0) {
3757 				if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
3758 					break;
3759 				}
3760 				convert_to_long_ex(hash_entry);
3761 				*mapelm++ = Z_LVAL_PP(hash_entry);
3762 				mapsize++;
3763 				i--;
3764 				zend_hash_move_forward(target_hash);
3765 			}
3766 		}
3767 	}
3768 	if (convmap == NULL) {
3769 		RETURN_FALSE;
3770 	}
3771 	mapsize /= 4;
3772 
3773 	ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
3774 	if (ret != NULL) {
3775 		RETVAL_STRINGL((char *)ret->val, ret->len, 0);
3776 	} else {
3777 		RETVAL_FALSE;
3778 	}
3779 	efree((void *)convmap);
3780 }
3781 /* }}} */
3782 
3783 /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
3784    Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)3785 PHP_FUNCTION(mb_encode_numericentity)
3786 {
3787 	php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
3788 }
3789 /* }}} */
3790 
3791 /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
3792    Converts HTML numeric entities to character code */
PHP_FUNCTION(mb_decode_numericentity)3793 PHP_FUNCTION(mb_decode_numericentity)
3794 {
3795 	php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
3796 }
3797 /* }}} */
3798 /* }}} */
3799 
3800 /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
3801  *  Sends an email message with MIME scheme
3802  */
3803 
3804 #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos)										\
3805 	if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) {	\
3806 		pos += 2;											\
3807 		while (str[pos + 1] == ' ' || str[pos + 1] == '\t') {							\
3808 			pos++;											\
3809 		}												\
3810 		continue;											\
3811 	}
3812 
3813 #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len)			\
3814 	pp = str;					\
3815 	ee = pp + len;					\
3816 	while ((pp = memchr(pp, '\0', (ee - pp)))) {	\
3817 		*pp = ' ';				\
3818 	}						\
3819 
3820 #define APPEND_ONE_CHAR(ch) do { \
3821 	if (token.a > 0) { \
3822 		smart_str_appendc(&token, ch); \
3823 	} else {\
3824 		token.len++; \
3825 	} \
3826 } while (0)
3827 
3828 #define SEPARATE_SMART_STR(str) do {\
3829 	if ((str)->a == 0) { \
3830 		char *tmp_ptr; \
3831 		(str)->a = 1; \
3832 		while ((str)->a < (str)->len) { \
3833 			(str)->a <<= 1; \
3834 		} \
3835 		tmp_ptr = emalloc((str)->a + 1); \
3836 		memcpy(tmp_ptr, (str)->c, (str)->len); \
3837 		(str)->c = tmp_ptr; \
3838 	} \
3839 } while (0)
3840 
my_smart_str_dtor(smart_str * s)3841 static void my_smart_str_dtor(smart_str *s)
3842 {
3843 	if (s->a > 0) {
3844 		smart_str_free(s);
3845 	}
3846 }
3847 
_php_mbstr_parse_mail_headers(HashTable * ht,const char * str,size_t str_len)3848 static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
3849 {
3850 	const char *ps;
3851 	size_t icnt;
3852 	int state = 0;
3853 	int crlf_state = -1;
3854 
3855 	smart_str token = { 0, 0, 0 };
3856 	smart_str fld_name = { 0, 0, 0 }, fld_val = { 0, 0, 0 };
3857 
3858 	ps = str;
3859 	icnt = str_len;
3860 
3861 	/*
3862 	 *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
3863 	 *             ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
3864 	 *      state  0            1           2          3
3865 	 *
3866 	 *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
3867 	 *             ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
3868 	 * crlf_state -1                       0                     1 -1
3869 	 *
3870 	 */
3871 
3872 	while (icnt > 0) {
3873 		switch (*ps) {
3874 			case ':':
3875 				if (crlf_state == 1) {
3876 					APPEND_ONE_CHAR('\r');
3877 				}
3878 
3879 				if (state == 0 || state == 1) {
3880 					fld_name = token;
3881 
3882 					state = 2;
3883 				} else {
3884 					APPEND_ONE_CHAR(*ps);
3885 				}
3886 
3887 				crlf_state = 0;
3888 				break;
3889 
3890 			case '\n':
3891 				if (crlf_state == -1) {
3892 					goto out;
3893 				}
3894 				crlf_state = -1;
3895 				break;
3896 
3897 			case '\r':
3898 				if (crlf_state == 1) {
3899 					APPEND_ONE_CHAR('\r');
3900 				} else {
3901 					crlf_state = 1;
3902 				}
3903 				break;
3904 
3905 			case ' ': case '\t':
3906 				if (crlf_state == -1) {
3907 					if (state == 3) {
3908 						/* continuing from the previous line */
3909 						SEPARATE_SMART_STR(&token);
3910 						state = 4;
3911 					} else {
3912 						/* simply skipping this new line */
3913 						state = 5;
3914 					}
3915 				} else {
3916 					if (crlf_state == 1) {
3917 						APPEND_ONE_CHAR('\r');
3918 					}
3919 					if (state == 1 || state == 3) {
3920 						APPEND_ONE_CHAR(*ps);
3921 					}
3922 				}
3923 				crlf_state = 0;
3924 				break;
3925 
3926 			default:
3927 				switch (state) {
3928 					case 0:
3929 						token.c = (char *)ps;
3930 						token.len = 0;
3931 						token.a = 0;
3932 						state = 1;
3933 						break;
3934 
3935 					case 2:
3936 						if (crlf_state != -1) {
3937 							token.c = (char *)ps;
3938 							token.len = 0;
3939 							token.a = 0;
3940 
3941 							state = 3;
3942 							break;
3943 						}
3944 						/* break is missing intentionally */
3945 
3946 					case 3:
3947 						if (crlf_state == -1) {
3948 							fld_val = token;
3949 
3950 							if (fld_name.c != NULL && fld_val.c != NULL) {
3951 								char *dummy;
3952 
3953 								/* FIXME: some locale free implementation is
3954 								 * really required here,,, */
3955 								SEPARATE_SMART_STR(&fld_name);
3956 								php_strtoupper(fld_name.c, fld_name.len);
3957 
3958 								zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
3959 
3960 								my_smart_str_dtor(&fld_name);
3961 							}
3962 
3963 							memset(&fld_name, 0, sizeof(smart_str));
3964 							memset(&fld_val, 0, sizeof(smart_str));
3965 
3966 							token.c = (char *)ps;
3967 							token.len = 0;
3968 							token.a = 0;
3969 
3970 							state = 1;
3971 						}
3972 						break;
3973 
3974 					case 4:
3975 						APPEND_ONE_CHAR(' ');
3976 						state = 3;
3977 						break;
3978 				}
3979 
3980 				if (crlf_state == 1) {
3981 					APPEND_ONE_CHAR('\r');
3982 				}
3983 
3984 				APPEND_ONE_CHAR(*ps);
3985 
3986 				crlf_state = 0;
3987 				break;
3988 		}
3989 		ps++, icnt--;
3990 	}
3991 out:
3992 	if (state == 2) {
3993 		token.c = "";
3994 		token.len = 0;
3995 		token.a = 0;
3996 
3997 		state = 3;
3998 	}
3999 	if (state == 3) {
4000 		fld_val = token;
4001 
4002 		if (fld_name.c != NULL && fld_val.c != NULL) {
4003 			void *dummy;
4004 
4005 			/* FIXME: some locale free implementation is
4006 			 * really required here,,, */
4007 			SEPARATE_SMART_STR(&fld_name);
4008 			php_strtoupper(fld_name.c, fld_name.len);
4009 
4010 			zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
4011 
4012 			my_smart_str_dtor(&fld_name);
4013 		}
4014 	}
4015 	return state;
4016 }
4017 
PHP_FUNCTION(mb_send_mail)4018 PHP_FUNCTION(mb_send_mail)
4019 {
4020 	int n;
4021 	char *to = NULL;
4022 	int to_len;
4023 	char *message = NULL;
4024 	int message_len;
4025 	char *headers = NULL;
4026 	int headers_len;
4027 	char *subject = NULL;
4028 	int subject_len;
4029 	char *extra_cmd = NULL;
4030 	int extra_cmd_len;
4031 	int i;
4032 	char *to_r = NULL;
4033 	char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
4034 	struct {
4035 		int cnt_type:1;
4036 		int cnt_trans_enc:1;
4037 	} suppressed_hdrs = { 0, 0 };
4038 
4039 	char *message_buf = NULL, *subject_buf = NULL, *p;
4040 	mbfl_string orig_str, conv_str;
4041 	mbfl_string *pstr;	/* pointer to mbfl string for return value */
4042 	enum mbfl_no_encoding
4043 		tran_cs,	/* transfar text charset */
4044 		head_enc,	/* header transfar encoding */
4045 		body_enc;	/* body transfar encoding */
4046 	mbfl_memory_device device;	/* automatic allocateable buffer for additional header */
4047 	const mbfl_language *lang;
4048 	int err = 0;
4049 	HashTable ht_headers;
4050 	smart_str *s;
4051 	extern void mbfl_memory_device_unput(mbfl_memory_device *device);
4052 	char *pp, *ee;
4053 
4054 	/* initialize */
4055 	mbfl_memory_device_init(&device, 0, 0);
4056 	mbfl_string_init(&orig_str);
4057 	mbfl_string_init(&conv_str);
4058 
4059 	/* character-set, transfer-encoding */
4060 	tran_cs = mbfl_no_encoding_utf8;
4061 	head_enc = mbfl_no_encoding_base64;
4062 	body_enc = mbfl_no_encoding_base64;
4063 	lang = mbfl_no2language(MBSTRG(language));
4064 	if (lang != NULL) {
4065 		tran_cs = lang->mail_charset;
4066 		head_enc = lang->mail_header_encoding;
4067 		body_enc = lang->mail_body_encoding;
4068 	}
4069 
4070 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|ss", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd, &extra_cmd_len) == FAILURE) {
4071 		return;
4072 	}
4073 
4074 	/* ASCIIZ check */
4075 	MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
4076 	MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
4077 	MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
4078 	if (headers) {
4079 		MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
4080 	}
4081 	if (extra_cmd) {
4082 		MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd, extra_cmd_len);
4083 	}
4084 
4085 	zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0);
4086 
4087 	if (headers != NULL) {
4088 		_php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
4089 	}
4090 
4091 	if (zend_hash_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1, (void **)&s) == SUCCESS) {
4092 		char *tmp;
4093 		char *param_name;
4094 		char *charset = NULL;
4095 
4096 		SEPARATE_SMART_STR(s);
4097 		smart_str_0(s);
4098 
4099 		p = strchr(s->c, ';');
4100 
4101 		if (p != NULL) {
4102 			/* skipping the padded spaces */
4103 			do {
4104 				++p;
4105 			} while (*p == ' ' || *p == '\t');
4106 
4107 			if (*p != '\0') {
4108 				if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
4109 					if (strcasecmp(param_name, "charset") == 0) {
4110 						enum mbfl_no_encoding _tran_cs = tran_cs;
4111 
4112 						charset = php_strtok_r(NULL, "= \"", &tmp);
4113 						if (charset != NULL) {
4114 							_tran_cs = mbfl_name2no_encoding(charset);
4115 						}
4116 
4117 						if (_tran_cs == mbfl_no_encoding_invalid) {
4118 							php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
4119 							_tran_cs = mbfl_no_encoding_ascii;
4120 						}
4121 						tran_cs = _tran_cs;
4122 					}
4123 				}
4124 			}
4125 		}
4126 		suppressed_hdrs.cnt_type = 1;
4127 	}
4128 
4129 	if (zend_hash_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1, (void **)&s) == SUCCESS) {
4130 		enum mbfl_no_encoding _body_enc;
4131 		SEPARATE_SMART_STR(s);
4132 		smart_str_0(s);
4133 
4134 		_body_enc = mbfl_name2no_encoding(s->c);
4135 		switch (_body_enc) {
4136 			case mbfl_no_encoding_base64:
4137 			case mbfl_no_encoding_7bit:
4138 			case mbfl_no_encoding_8bit:
4139 				body_enc = _body_enc;
4140 				break;
4141 
4142 			default:
4143 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", s->c);
4144 				body_enc =	mbfl_no_encoding_8bit;
4145 				break;
4146 		}
4147 		suppressed_hdrs.cnt_trans_enc = 1;
4148 	}
4149 
4150 	/* To: */
4151 	if (to != NULL) {
4152 		if (to_len > 0) {
4153 			to_r = estrndup(to, to_len);
4154 			for (; to_len; to_len--) {
4155 				if (!isspace((unsigned char) to_r[to_len - 1])) {
4156 					break;
4157 				}
4158 				to_r[to_len - 1] = '\0';
4159 			}
4160 			for (i = 0; to_r[i]; i++) {
4161 			if (iscntrl((unsigned char) to_r[i])) {
4162 				/* According to RFC 822, section 3.1.1 long headers may be separated into
4163 				 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
4164 				 * To prevent these separators from being replaced with a space, we use the
4165 				 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
4166 				 */
4167 				SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
4168 				to_r[i] = ' ';
4169 			}
4170 			}
4171 		} else {
4172 			to_r = to;
4173 		}
4174 	} else {
4175 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
4176 		err = 1;
4177 	}
4178 
4179 	/* Subject: */
4180 	if (subject != NULL && subject_len >= 0) {
4181 		orig_str.no_language = MBSTRG(language);
4182 		orig_str.val = (unsigned char *)subject;
4183 		orig_str.len = subject_len;
4184 		orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4185 		if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4186 			const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4187 			orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4188 		}
4189 		pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4190 		if (pstr != NULL) {
4191 			subject_buf = subject = (char *)pstr->val;
4192 		}
4193 	} else {
4194 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field");
4195 		err = 1;
4196 	}
4197 
4198 	/* message body */
4199 	if (message != NULL) {
4200 		orig_str.no_language = MBSTRG(language);
4201 		orig_str.val = (unsigned char *)message;
4202 		orig_str.len = (unsigned int)message_len;
4203 		orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4204 
4205 		if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4206 			const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4207 			orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4208 		}
4209 
4210 		pstr = NULL;
4211 		{
4212 			mbfl_string tmpstr;
4213 
4214 			if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4215 				tmpstr.no_encoding=mbfl_no_encoding_8bit;
4216 				pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4217 				efree(tmpstr.val);
4218 			}
4219 		}
4220 		if (pstr != NULL) {
4221 			message_buf = message = (char *)pstr->val;
4222 		}
4223 	} else {
4224 		/* this is not really an error, so it is allowed. */
4225 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body");
4226 		message = NULL;
4227 	}
4228 
4229 	/* other headers */
4230 #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4231 #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4232 #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4233 #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4234 	if (headers != NULL) {
4235 		p = headers;
4236 		n = headers_len;
4237 		mbfl_memory_device_strncat(&device, p, n);
4238 		if (n > 0 && p[n - 1] != '\n') {
4239 			mbfl_memory_device_strncat(&device, "\n", 1);
4240 		}
4241 	}
4242 
4243 	if (!zend_hash_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4244 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4245 		mbfl_memory_device_strncat(&device, "\n", 1);
4246 	}
4247 
4248 	if (!suppressed_hdrs.cnt_type) {
4249 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4250 
4251 		p = (char *)mbfl_no2preferred_mime_name(tran_cs);
4252 		if (p != NULL) {
4253 			mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4254 			mbfl_memory_device_strcat(&device, p);
4255 		}
4256 		mbfl_memory_device_strncat(&device, "\n", 1);
4257 	}
4258 	if (!suppressed_hdrs.cnt_trans_enc) {
4259 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4260 		p = (char *)mbfl_no2preferred_mime_name(body_enc);
4261 		if (p == NULL) {
4262 			p = "7bit";
4263 		}
4264 		mbfl_memory_device_strcat(&device, p);
4265 		mbfl_memory_device_strncat(&device, "\n", 1);
4266 	}
4267 
4268 	mbfl_memory_device_unput(&device);
4269 	mbfl_memory_device_output('\0', &device);
4270 	headers = (char *)device.buffer;
4271 
4272 	if (force_extra_parameters) {
4273 		extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4274 	} else if (extra_cmd) {
4275 		extra_cmd = php_escape_shell_cmd(extra_cmd);
4276 	}
4277 
4278 	if (!err && php_mail(to_r, subject, message, headers, extra_cmd TSRMLS_CC)) {
4279 		RETVAL_TRUE;
4280 	} else {
4281 		RETVAL_FALSE;
4282 	}
4283 
4284 	if (extra_cmd) {
4285 		efree(extra_cmd);
4286 	}
4287 	if (to_r != to) {
4288 		efree(to_r);
4289 	}
4290 	if (subject_buf) {
4291 		efree((void *)subject_buf);
4292 	}
4293 	if (message_buf) {
4294 		efree((void *)message_buf);
4295 	}
4296 	mbfl_memory_device_clear(&device);
4297 	zend_hash_destroy(&ht_headers);
4298 }
4299 
4300 #undef SKIP_LONG_HEADER_SEP_MBSTRING
4301 #undef MAIL_ASCIIZ_CHECK_MBSTRING
4302 #undef APPEND_ONE_CHAR
4303 #undef SEPARATE_SMART_STR
4304 #undef PHP_MBSTR_MAIL_MIME_HEADER1
4305 #undef PHP_MBSTR_MAIL_MIME_HEADER2
4306 #undef PHP_MBSTR_MAIL_MIME_HEADER3
4307 #undef PHP_MBSTR_MAIL_MIME_HEADER4
4308 /* }}} */
4309 
4310 /* {{{ proto mixed mb_get_info([string type])
4311    Returns the current settings of mbstring */
PHP_FUNCTION(mb_get_info)4312 PHP_FUNCTION(mb_get_info)
4313 {
4314 	char *typ = NULL;
4315 	int typ_len;
4316 	size_t n;
4317 	char *name;
4318 	const struct mb_overload_def *over_func;
4319 	zval *row1, *row2;
4320 	const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4321 	const mbfl_encoding **entry;
4322 
4323 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
4324 		RETURN_FALSE;
4325 	}
4326 
4327 	if (!typ || !strcasecmp("all", typ)) {
4328 		array_init(return_value);
4329 		if (MBSTRG(current_internal_encoding)) {
4330 			add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name, 1);
4331 		}
4332 		if (MBSTRG(http_input_identify)) {
4333 			add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name, 1);
4334 		}
4335 		if (MBSTRG(current_http_output_encoding)) {
4336 			add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name, 1);
4337 		}
4338 		if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4339 			add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1);
4340 		}
4341 		add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4342 		if (MBSTRG(func_overload)){
4343 			over_func = &(mb_ovld[0]);
4344 			MAKE_STD_ZVAL(row1);
4345 			array_init(row1);
4346 			while (over_func->type > 0) {
4347 				if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4348 					add_assoc_string(row1, over_func->orig_func, over_func->ovld_func, 1);
4349 				}
4350 				over_func++;
4351 			}
4352 			add_assoc_zval(return_value, "func_overload_list", row1);
4353 		} else {
4354 			add_assoc_string(return_value, "func_overload_list", "no overload", 1);
4355  		}
4356 		if (lang != NULL) {
4357 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4358 				add_assoc_string(return_value, "mail_charset", name, 1);
4359 			}
4360 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4361 				add_assoc_string(return_value, "mail_header_encoding", name, 1);
4362 			}
4363 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4364 				add_assoc_string(return_value, "mail_body_encoding", name, 1);
4365 			}
4366 		}
4367 		add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4368 		if (MBSTRG(encoding_translation)) {
4369 			add_assoc_string(return_value, "encoding_translation", "On", 1);
4370 		} else {
4371 			add_assoc_string(return_value, "encoding_translation", "Off", 1);
4372 		}
4373 		if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4374 			add_assoc_string(return_value, "language", name, 1);
4375 		}
4376 		n = MBSTRG(current_detect_order_list_size);
4377 		entry = MBSTRG(current_detect_order_list);
4378 		if (n > 0) {
4379 			size_t i;
4380 			MAKE_STD_ZVAL(row2);
4381 			array_init(row2);
4382 			for (i = 0; i < n; i++) {
4383 				add_next_index_string(row2, (*entry)->name, 1);
4384 				entry++;
4385 			}
4386 			add_assoc_zval(return_value, "detect_order", row2);
4387 		}
4388 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4389 			add_assoc_string(return_value, "substitute_character", "none", 1);
4390 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4391 			add_assoc_string(return_value, "substitute_character", "long", 1);
4392 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4393 			add_assoc_string(return_value, "substitute_character", "entity", 1);
4394 		} else {
4395 			add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4396 		}
4397 		if (MBSTRG(strict_detection)) {
4398 			add_assoc_string(return_value, "strict_detection", "On", 1);
4399 		} else {
4400 			add_assoc_string(return_value, "strict_detection", "Off", 1);
4401 		}
4402 	} else if (!strcasecmp("internal_encoding", typ)) {
4403 		if (MBSTRG(current_internal_encoding)) {
4404 			RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name, 1);
4405 		}
4406 	} else if (!strcasecmp("http_input", typ)) {
4407 		if (MBSTRG(http_input_identify)) {
4408 			RETVAL_STRING((char *)MBSTRG(http_input_identify)->name, 1);
4409 		}
4410 	} else if (!strcasecmp("http_output", typ)) {
4411 		if (MBSTRG(current_http_output_encoding)) {
4412 			RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name, 1);
4413 		}
4414 	} else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4415 		if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4416 			RETVAL_STRING(name, 1);
4417 		}
4418 	} else if (!strcasecmp("func_overload", typ)) {
4419  		RETVAL_LONG(MBSTRG(func_overload));
4420 	} else if (!strcasecmp("func_overload_list", typ)) {
4421 		if (MBSTRG(func_overload)){
4422 				over_func = &(mb_ovld[0]);
4423 				array_init(return_value);
4424 				while (over_func->type > 0) {
4425 					if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4426 						add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func, 1);
4427 					}
4428 					over_func++;
4429 				}
4430 		} else {
4431 			RETVAL_STRING("no overload", 1);
4432 		}
4433 	} else if (!strcasecmp("mail_charset", typ)) {
4434 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4435 			RETVAL_STRING(name, 1);
4436 		}
4437 	} else if (!strcasecmp("mail_header_encoding", typ)) {
4438 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4439 			RETVAL_STRING(name, 1);
4440 		}
4441 	} else if (!strcasecmp("mail_body_encoding", typ)) {
4442 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4443 			RETVAL_STRING(name, 1);
4444 		}
4445 	} else if (!strcasecmp("illegal_chars", typ)) {
4446 		RETVAL_LONG(MBSTRG(illegalchars));
4447 	} else if (!strcasecmp("encoding_translation", typ)) {
4448 		if (MBSTRG(encoding_translation)) {
4449 			RETVAL_STRING("On", 1);
4450 		} else {
4451 			RETVAL_STRING("Off", 1);
4452 		}
4453 	} else if (!strcasecmp("language", typ)) {
4454 		if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4455 			RETVAL_STRING(name, 1);
4456 		}
4457 	} else if (!strcasecmp("detect_order", typ)) {
4458 		n = MBSTRG(current_detect_order_list_size);
4459 		entry = MBSTRG(current_detect_order_list);
4460 		if (n > 0) {
4461 			size_t i;
4462 			array_init(return_value);
4463 			for (i = 0; i < n; i++) {
4464 				add_next_index_string(return_value, (*entry)->name, 1);
4465 				entry++;
4466 			}
4467 		}
4468 	} else if (!strcasecmp("substitute_character", typ)) {
4469 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4470 			RETVAL_STRING("none", 1);
4471 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4472 			RETVAL_STRING("long", 1);
4473 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4474 			RETVAL_STRING("entity", 1);
4475 		} else {
4476 			RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4477 		}
4478 	} else if (!strcasecmp("strict_detection", typ)) {
4479 		if (MBSTRG(strict_detection)) {
4480 			RETVAL_STRING("On", 1);
4481 		} else {
4482 			RETVAL_STRING("Off", 1);
4483 		}
4484 	} else {
4485 		RETURN_FALSE;
4486 	}
4487 }
4488 /* }}} */
4489 
4490 /* {{{ proto bool mb_check_encoding([string var[, string encoding]])
4491    Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)4492 PHP_FUNCTION(mb_check_encoding)
4493 {
4494 	char *var = NULL;
4495 	int var_len;
4496 	char *enc = NULL;
4497 	int enc_len;
4498 	mbfl_buffer_converter *convd;
4499 	const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4500 	mbfl_string string, result, *ret = NULL;
4501 	long illegalchars = 0;
4502 
4503 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
4504 		RETURN_FALSE;
4505 	}
4506 
4507 	if (var == NULL) {
4508 		RETURN_BOOL(MBSTRG(illegalchars) == 0);
4509 	}
4510 
4511 	if (enc != NULL) {
4512 		encoding = mbfl_name2encoding(enc);
4513 		if (!encoding || encoding == &mbfl_encoding_pass) {
4514 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc);
4515 			RETURN_FALSE;
4516 		}
4517 	}
4518 
4519 	convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
4520 	if (convd == NULL) {
4521 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
4522 		RETURN_FALSE;
4523 	}
4524 	mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4525 	mbfl_buffer_converter_illegal_substchar(convd, 0);
4526 
4527 	/* initialize string */
4528 	mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
4529 	mbfl_string_init(&result);
4530 
4531 	string.val = (unsigned char *)var;
4532 	string.len = var_len;
4533 	ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4534 	illegalchars = mbfl_buffer_illegalchars(convd);
4535 	mbfl_buffer_converter_delete(convd);
4536 
4537 	RETVAL_FALSE;
4538 	if (ret != NULL) {
4539 		if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4540 			RETVAL_TRUE;
4541 		}
4542 		mbfl_string_clear(&result);
4543 	}
4544 }
4545 /* }}} */
4546 
4547 
4548 /* {{{ php_mb_populate_current_detect_order_list */
php_mb_populate_current_detect_order_list(TSRMLS_D)4549 static void php_mb_populate_current_detect_order_list(TSRMLS_D)
4550 {
4551 	const mbfl_encoding **entry = 0;
4552 	size_t nentries;
4553 
4554 	if (MBSTRG(current_detect_order_list)) {
4555 		return;
4556 	}
4557 
4558 	if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
4559 		nentries = MBSTRG(detect_order_list_size);
4560 		entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4561 		memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
4562 	} else {
4563 		const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
4564 		size_t i;
4565 		nentries = MBSTRG(default_detect_order_list_size);
4566 		entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4567 		for (i = 0; i < nentries; i++) {
4568 			entry[i] = mbfl_no2encoding(src[i]);
4569 		}
4570 	}
4571 	MBSTRG(current_detect_order_list) = entry;
4572 	MBSTRG(current_detect_order_list_size) = nentries;
4573 }
4574 
4575 /* {{{ static int php_mb_encoding_translation() */
php_mb_encoding_translation(TSRMLS_D)4576 static int php_mb_encoding_translation(TSRMLS_D)
4577 {
4578 	return MBSTRG(encoding_translation);
4579 }
4580 /* }}} */
4581 
4582 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
php_mb_mbchar_bytes_ex(const char * s,const mbfl_encoding * enc)4583 MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
4584 {
4585 	if (enc != NULL) {
4586 		if (enc->flag & MBFL_ENCTYPE_MBCS) {
4587 			if (enc->mblen_table != NULL) {
4588 				if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
4589 			}
4590 		} else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
4591 			return 2;
4592 		} else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
4593 			return 4;
4594 		}
4595 	}
4596 	return 1;
4597 }
4598 /* }}} */
4599 
4600 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
php_mb_mbchar_bytes(const char * s TSRMLS_DC)4601 MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
4602 {
4603 	return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
4604 }
4605 /* }}} */
4606 
4607 /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
php_mb_safe_strrchr_ex(const char * s,unsigned int c,size_t nbytes,const mbfl_encoding * enc)4608 MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
4609 {
4610 	register const char *p = s;
4611 	char *last=NULL;
4612 
4613 	if (nbytes == (size_t)-1) {
4614 		size_t nb = 0;
4615 
4616 		while (*p != '\0') {
4617 			if (nb == 0) {
4618 				if ((unsigned char)*p == (unsigned char)c) {
4619 					last = (char *)p;
4620 				}
4621 				nb = php_mb_mbchar_bytes_ex(p, enc);
4622 				if (nb == 0) {
4623 					return NULL; /* something is going wrong! */
4624 				}
4625 			}
4626 			--nb;
4627 			++p;
4628 		}
4629 	} else {
4630 		register size_t bcnt = nbytes;
4631 		register size_t nbytes_char;
4632 		while (bcnt > 0) {
4633 			if ((unsigned char)*p == (unsigned char)c) {
4634 				last = (char *)p;
4635 			}
4636 			nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
4637 			if (bcnt < nbytes_char) {
4638 				return NULL;
4639 			}
4640 			p += nbytes_char;
4641 			bcnt -= nbytes_char;
4642 		}
4643 	}
4644 	return last;
4645 }
4646 /* }}} */
4647 
4648 /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
php_mb_safe_strrchr(const char * s,unsigned int c,size_t nbytes TSRMLS_DC)4649 MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
4650 {
4651 	return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
4652 }
4653 /* }}} */
4654 
4655 /* {{{ MBSTRING_API int php_mb_stripos()
4656  */
php_mb_stripos(int mode,const char * old_haystack,unsigned int old_haystack_len,const char * old_needle,unsigned int old_needle_len,long offset,const char * from_encoding TSRMLS_DC)4657 MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC)
4658 {
4659 	int n;
4660 	mbfl_string haystack, needle;
4661 	n = -1;
4662 
4663 	mbfl_string_init(&haystack);
4664 	mbfl_string_init(&needle);
4665 	haystack.no_language = MBSTRG(language);
4666 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4667 	needle.no_language = MBSTRG(language);
4668 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4669 
4670 	do {
4671 		size_t len = 0;
4672 		haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding TSRMLS_CC);
4673 		haystack.len = len;
4674 
4675 		if (!haystack.val) {
4676 			break;
4677 		}
4678 
4679 		if (haystack.len <= 0) {
4680 			break;
4681 		}
4682 
4683 		needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding TSRMLS_CC);
4684 		needle.len = len;
4685 
4686 		if (!needle.val) {
4687 			break;
4688 		}
4689 
4690 		if (needle.len <= 0) {
4691 			break;
4692 		}
4693 
4694 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
4695 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
4696 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
4697 			break;
4698 		}
4699 
4700  		{
4701  			int haystack_char_len = mbfl_strlen(&haystack);
4702 
4703  			if (mode) {
4704  				if ((offset > 0 && offset > haystack_char_len) ||
4705  					(offset < 0 && -offset > haystack_char_len)) {
4706  					php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
4707  					break;
4708  				}
4709  			} else {
4710  				if (offset < 0 || offset > haystack_char_len) {
4711  					php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
4712  					break;
4713  				}
4714  			}
4715 		}
4716 
4717 		n = mbfl_strpos(&haystack, &needle, offset, mode);
4718 	} while(0);
4719 
4720 	if (haystack.val) {
4721 		efree(haystack.val);
4722 	}
4723 
4724 	if (needle.val) {
4725 		efree(needle.val);
4726 	}
4727 
4728 	return n;
4729 }
4730 /* }}} */
4731 
php_mb_gpc_get_detect_order(const zend_encoding *** list,size_t * list_size TSRMLS_DC)4732 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC) /* {{{ */
4733 {
4734 	*list = (const zend_encoding **)MBSTRG(http_input_list);
4735 	*list_size = MBSTRG(http_input_list_size);
4736 }
4737 /* }}} */
4738 
php_mb_gpc_set_input_encoding(const zend_encoding * encoding TSRMLS_DC)4739 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC) /* {{{ */
4740 {
4741 	MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
4742 }
4743 /* }}} */
4744 
4745 #endif	/* HAVE_MBSTRING */
4746 
4747 /*
4748  * Local variables:
4749  * tab-width: 4
4750  * c-basic-offset: 4
4751  * End:
4752  * vim600: fdm=marker
4753  * vim: noet sw=4 ts=4
4754  */
4755