xref: /PHP-5.6/ext/mbstring/mbstring.c (revision e1709b7e)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 5                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1997-2016 The PHP Group                                |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
16    |         Rui Hirokawa <hirokawa@php.net>                              |
17    +----------------------------------------------------------------------+
18  */
19 
20 /* $Id$ */
21 
22 /*
23  * PHP 4 Multibyte String module "mbstring"
24  *
25  * History:
26  *   2000.5.19  Release php-4.0RC2_jstring-1.0
27  *   2001.4.1   Release php4_jstring-1.0.91
28  *   2001.4.30  Release php4_jstring-1.1 (contribute to The PHP Group)
29  *   2001.5.1   Renamed from jstring to mbstring (hirokawa@php.net)
30  */
31 
32 /*
33  * PHP3 Internationalization support program.
34  *
35  * Copyright (c) 1999,2000 by the PHP3 internationalization team.
36  * All rights reserved.
37  *
38  * See README_PHP3-i18n-ja for more detail.
39  *
40  * Authors:
41  *    Hironori Sato <satoh@jpnnet.com>
42  *    Shigeru Kanemoto <sgk@happysize.co.jp>
43  *    Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
44  *    Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
45  */
46 
47 /* {{{ includes */
48 #ifdef HAVE_CONFIG_H
49 #include "config.h"
50 #endif
51 
52 #include "php.h"
53 #include "php_ini.h"
54 #include "php_variables.h"
55 #include "mbstring.h"
56 #include "ext/standard/php_string.h"
57 #include "ext/standard/php_mail.h"
58 #include "ext/standard/exec.h"
59 #include "ext/standard/php_smart_str.h"
60 #include "ext/standard/url.h"
61 #include "main/php_output.h"
62 #include "ext/standard/info.h"
63 
64 #include "libmbfl/mbfl/mbfl_allocators.h"
65 #include "libmbfl/mbfl/mbfilter_pass.h"
66 
67 #include "php_variables.h"
68 #include "php_globals.h"
69 #include "rfc1867.h"
70 #include "php_content_types.h"
71 #include "SAPI.h"
72 #include "php_unicode.h"
73 #include "TSRM.h"
74 
75 #include "mb_gpc.h"
76 
77 #if HAVE_MBREGEX
78 #include "php_mbregex.h"
79 #endif
80 
81 #include "zend_multibyte.h"
82 
83 #if HAVE_ONIG
84 #include "php_onig_compat.h"
85 #include <oniguruma.h>
86 #undef UChar
87 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
88 #include "ext/pcre/php_pcre.h"
89 #endif
90 /* }}} */
91 
92 #if HAVE_MBSTRING
93 
94 /* {{{ prototypes */
95 ZEND_DECLARE_MODULE_GLOBALS(mbstring)
96 
97 static PHP_GINIT_FUNCTION(mbstring);
98 static PHP_GSHUTDOWN_FUNCTION(mbstring);
99 
100 static void php_mb_populate_current_detect_order_list(TSRMLS_D);
101 
102 static int php_mb_encoding_translation(TSRMLS_D);
103 
104 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC);
105 
106 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC);
107 
108 /* }}} */
109 
110 /* {{{ php_mb_default_identify_list */
111 typedef struct _php_mb_nls_ident_list {
112 	enum mbfl_no_language lang;
113 	const enum mbfl_no_encoding *list;
114 	size_t list_size;
115 } php_mb_nls_ident_list;
116 
117 static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
118 	mbfl_no_encoding_ascii,
119 	mbfl_no_encoding_jis,
120 	mbfl_no_encoding_utf8,
121 	mbfl_no_encoding_euc_jp,
122 	mbfl_no_encoding_sjis
123 };
124 
125 static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
126 	mbfl_no_encoding_ascii,
127 	mbfl_no_encoding_utf8,
128 	mbfl_no_encoding_euc_cn,
129 	mbfl_no_encoding_cp936
130 };
131 
132 static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
133 	mbfl_no_encoding_ascii,
134 	mbfl_no_encoding_utf8,
135 	mbfl_no_encoding_euc_tw,
136 	mbfl_no_encoding_big5
137 };
138 
139 static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
140 	mbfl_no_encoding_ascii,
141 	mbfl_no_encoding_utf8,
142 	mbfl_no_encoding_euc_kr,
143 	mbfl_no_encoding_uhc
144 };
145 
146 static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
147 	mbfl_no_encoding_ascii,
148 	mbfl_no_encoding_utf8,
149 	mbfl_no_encoding_koi8r,
150 	mbfl_no_encoding_cp1251,
151 	mbfl_no_encoding_cp866
152 };
153 
154 static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
155 	mbfl_no_encoding_ascii,
156 	mbfl_no_encoding_utf8,
157 	mbfl_no_encoding_armscii8
158 };
159 
160 static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
161 	mbfl_no_encoding_ascii,
162 	mbfl_no_encoding_utf8,
163 	mbfl_no_encoding_cp1254,
164 	mbfl_no_encoding_8859_9
165 };
166 
167 static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
168 	mbfl_no_encoding_ascii,
169 	mbfl_no_encoding_utf8,
170 	mbfl_no_encoding_koi8u
171 };
172 
173 static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
174 	mbfl_no_encoding_ascii,
175 	mbfl_no_encoding_utf8
176 };
177 
178 
179 static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
180 	{ mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
181 	{ mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
182 	{ mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
183 	{ mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
184 	{ mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
185 	{ mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
186 	{ mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
187 	{ mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
188 	{ mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
189 };
190 
191 /* }}} */
192 
193 /* {{{ mb_overload_def mb_ovld[] */
194 static const struct mb_overload_def mb_ovld[] = {
195 	{MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
196 	{MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
197 	{MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
198 	{MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
199 	{MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
200 	{MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
201 	{MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
202 	{MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
203 	{MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
204 	{MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
205 	{MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
206 	{MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
207 	{MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
208 #if HAVE_MBREGEX
209 	{MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"},
210 	{MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"},
211 	{MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"},
212 	{MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"},
213 	{MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},
214 #endif
215 	{0, NULL, NULL, NULL}
216 };
217 /* }}} */
218 
219 /* {{{ arginfo */
220 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
221 	ZEND_ARG_INFO(0, language)
222 ZEND_END_ARG_INFO()
223 
224 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
225 	ZEND_ARG_INFO(0, encoding)
226 ZEND_END_ARG_INFO()
227 
228 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
229 	ZEND_ARG_INFO(0, type)
230 ZEND_END_ARG_INFO()
231 
232 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
233 	ZEND_ARG_INFO(0, encoding)
234 ZEND_END_ARG_INFO()
235 
236 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
237 	ZEND_ARG_INFO(0, encoding)
238 ZEND_END_ARG_INFO()
239 
240 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
241 	ZEND_ARG_INFO(0, substchar)
242 ZEND_END_ARG_INFO()
243 
244 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
245 	ZEND_ARG_INFO(0, encoding)
246 ZEND_END_ARG_INFO()
247 
248 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
249 	ZEND_ARG_INFO(0, encoded_string)
250 	ZEND_ARG_INFO(1, result)
251 ZEND_END_ARG_INFO()
252 
253 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
254 	ZEND_ARG_INFO(0, contents)
255 	ZEND_ARG_INFO(0, status)
256 ZEND_END_ARG_INFO()
257 
258 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
259 	ZEND_ARG_INFO(0, str)
260 	ZEND_ARG_INFO(0, encoding)
261 ZEND_END_ARG_INFO()
262 
263 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
264 	ZEND_ARG_INFO(0, haystack)
265 	ZEND_ARG_INFO(0, needle)
266 	ZEND_ARG_INFO(0, offset)
267 	ZEND_ARG_INFO(0, encoding)
268 ZEND_END_ARG_INFO()
269 
270 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
271 	ZEND_ARG_INFO(0, haystack)
272 	ZEND_ARG_INFO(0, needle)
273 	ZEND_ARG_INFO(0, offset)
274 	ZEND_ARG_INFO(0, encoding)
275 ZEND_END_ARG_INFO()
276 
277 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
278 	ZEND_ARG_INFO(0, haystack)
279 	ZEND_ARG_INFO(0, needle)
280 	ZEND_ARG_INFO(0, offset)
281 	ZEND_ARG_INFO(0, encoding)
282 ZEND_END_ARG_INFO()
283 
284 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
285 	ZEND_ARG_INFO(0, haystack)
286 	ZEND_ARG_INFO(0, needle)
287 	ZEND_ARG_INFO(0, offset)
288 	ZEND_ARG_INFO(0, encoding)
289 ZEND_END_ARG_INFO()
290 
291 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
292 	ZEND_ARG_INFO(0, haystack)
293 	ZEND_ARG_INFO(0, needle)
294 	ZEND_ARG_INFO(0, part)
295 	ZEND_ARG_INFO(0, encoding)
296 ZEND_END_ARG_INFO()
297 
298 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
299 	ZEND_ARG_INFO(0, haystack)
300 	ZEND_ARG_INFO(0, needle)
301 	ZEND_ARG_INFO(0, part)
302 	ZEND_ARG_INFO(0, encoding)
303 ZEND_END_ARG_INFO()
304 
305 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
306 	ZEND_ARG_INFO(0, haystack)
307 	ZEND_ARG_INFO(0, needle)
308 	ZEND_ARG_INFO(0, part)
309 	ZEND_ARG_INFO(0, encoding)
310 ZEND_END_ARG_INFO()
311 
312 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
313 	ZEND_ARG_INFO(0, haystack)
314 	ZEND_ARG_INFO(0, needle)
315 	ZEND_ARG_INFO(0, part)
316 	ZEND_ARG_INFO(0, encoding)
317 ZEND_END_ARG_INFO()
318 
319 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
320 	ZEND_ARG_INFO(0, haystack)
321 	ZEND_ARG_INFO(0, needle)
322 	ZEND_ARG_INFO(0, encoding)
323 ZEND_END_ARG_INFO()
324 
325 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
326 	ZEND_ARG_INFO(0, str)
327 	ZEND_ARG_INFO(0, start)
328 	ZEND_ARG_INFO(0, length)
329 	ZEND_ARG_INFO(0, encoding)
330 ZEND_END_ARG_INFO()
331 
332 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
333 	ZEND_ARG_INFO(0, str)
334 	ZEND_ARG_INFO(0, start)
335 	ZEND_ARG_INFO(0, length)
336 	ZEND_ARG_INFO(0, encoding)
337 ZEND_END_ARG_INFO()
338 
339 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
340 	ZEND_ARG_INFO(0, str)
341 	ZEND_ARG_INFO(0, encoding)
342 ZEND_END_ARG_INFO()
343 
344 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
345 	ZEND_ARG_INFO(0, str)
346 	ZEND_ARG_INFO(0, start)
347 	ZEND_ARG_INFO(0, width)
348 	ZEND_ARG_INFO(0, trimmarker)
349 	ZEND_ARG_INFO(0, encoding)
350 ZEND_END_ARG_INFO()
351 
352 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
353 	ZEND_ARG_INFO(0, str)
354 	ZEND_ARG_INFO(0, to)
355 	ZEND_ARG_INFO(0, from)
356 ZEND_END_ARG_INFO()
357 
358 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
359 	ZEND_ARG_INFO(0, sourcestring)
360 	ZEND_ARG_INFO(0, mode)
361 	ZEND_ARG_INFO(0, encoding)
362 ZEND_END_ARG_INFO()
363 
364 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
365 	ZEND_ARG_INFO(0, sourcestring)
366 	ZEND_ARG_INFO(0, encoding)
367 ZEND_END_ARG_INFO()
368 
369 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
370 	ZEND_ARG_INFO(0, sourcestring)
371 	ZEND_ARG_INFO(0, encoding)
372 ZEND_END_ARG_INFO()
373 
374 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
375 	ZEND_ARG_INFO(0, str)
376 	ZEND_ARG_INFO(0, encoding_list)
377 	ZEND_ARG_INFO(0, strict)
378 ZEND_END_ARG_INFO()
379 
380 ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
381 ZEND_END_ARG_INFO()
382 
383 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
384 	ZEND_ARG_INFO(0, encoding)
385 ZEND_END_ARG_INFO()
386 
387 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
388 	ZEND_ARG_INFO(0, str)
389 	ZEND_ARG_INFO(0, charset)
390 	ZEND_ARG_INFO(0, transfer)
391 	ZEND_ARG_INFO(0, linefeed)
392 	ZEND_ARG_INFO(0, indent)
393 ZEND_END_ARG_INFO()
394 
395 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
396 	ZEND_ARG_INFO(0, string)
397 ZEND_END_ARG_INFO()
398 
399 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
400 	ZEND_ARG_INFO(0, str)
401 	ZEND_ARG_INFO(0, option)
402 	ZEND_ARG_INFO(0, encoding)
403 ZEND_END_ARG_INFO()
404 
405 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 0, 0, 3)
406 	ZEND_ARG_INFO(0, to)
407 	ZEND_ARG_INFO(0, from)
408 	ZEND_ARG_VARIADIC_INFO(1, vars)
409 ZEND_END_ARG_INFO()
410 
411 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
412 	ZEND_ARG_INFO(0, string)
413 	ZEND_ARG_INFO(0, convmap)
414 	ZEND_ARG_INFO(0, encoding)
415 	ZEND_ARG_INFO(0, is_hex)
416 ZEND_END_ARG_INFO()
417 
418 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
419 	ZEND_ARG_INFO(0, string)
420 	ZEND_ARG_INFO(0, convmap)
421 	ZEND_ARG_INFO(0, encoding)
422 ZEND_END_ARG_INFO()
423 
424 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
425 	ZEND_ARG_INFO(0, to)
426 	ZEND_ARG_INFO(0, subject)
427 	ZEND_ARG_INFO(0, message)
428 	ZEND_ARG_INFO(0, additional_headers)
429 	ZEND_ARG_INFO(0, additional_parameters)
430 ZEND_END_ARG_INFO()
431 
432 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
433 	ZEND_ARG_INFO(0, type)
434 ZEND_END_ARG_INFO()
435 
436 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
437 	ZEND_ARG_INFO(0, var)
438 	ZEND_ARG_INFO(0, encoding)
439 ZEND_END_ARG_INFO()
440 
441 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
442 	ZEND_ARG_INFO(0, encoding)
443 ZEND_END_ARG_INFO()
444 
445 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
446 	ZEND_ARG_INFO(0, pattern)
447 	ZEND_ARG_INFO(0, string)
448 	ZEND_ARG_INFO(1, registers)
449 ZEND_END_ARG_INFO()
450 
451 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
452 	ZEND_ARG_INFO(0, pattern)
453 	ZEND_ARG_INFO(0, string)
454 	ZEND_ARG_INFO(1, registers)
455 ZEND_END_ARG_INFO()
456 
457 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
458 	ZEND_ARG_INFO(0, pattern)
459 	ZEND_ARG_INFO(0, replacement)
460 	ZEND_ARG_INFO(0, string)
461 	ZEND_ARG_INFO(0, option)
462 ZEND_END_ARG_INFO()
463 
464 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
465 	ZEND_ARG_INFO(0, pattern)
466 	ZEND_ARG_INFO(0, replacement)
467 	ZEND_ARG_INFO(0, string)
468 ZEND_END_ARG_INFO()
469 
470 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3)
471 	ZEND_ARG_INFO(0, pattern)
472 	ZEND_ARG_INFO(0, callback)
473 	ZEND_ARG_INFO(0, string)
474 	ZEND_ARG_INFO(0, option)
475 ZEND_END_ARG_INFO()
476 
477 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
478 	ZEND_ARG_INFO(0, pattern)
479 	ZEND_ARG_INFO(0, string)
480 	ZEND_ARG_INFO(0, limit)
481 ZEND_END_ARG_INFO()
482 
483 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
484 	ZEND_ARG_INFO(0, pattern)
485 	ZEND_ARG_INFO(0, string)
486 	ZEND_ARG_INFO(0, option)
487 ZEND_END_ARG_INFO()
488 
489 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
490 	ZEND_ARG_INFO(0, pattern)
491 	ZEND_ARG_INFO(0, option)
492 ZEND_END_ARG_INFO()
493 
494 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
495 	ZEND_ARG_INFO(0, pattern)
496 	ZEND_ARG_INFO(0, option)
497 ZEND_END_ARG_INFO()
498 
499 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
500 	ZEND_ARG_INFO(0, pattern)
501 	ZEND_ARG_INFO(0, option)
502 ZEND_END_ARG_INFO()
503 
504 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
505 	ZEND_ARG_INFO(0, string)
506 	ZEND_ARG_INFO(0, pattern)
507 	ZEND_ARG_INFO(0, option)
508 ZEND_END_ARG_INFO()
509 
510 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
511 ZEND_END_ARG_INFO()
512 
513 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
514 ZEND_END_ARG_INFO()
515 
516 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
517 	ZEND_ARG_INFO(0, position)
518 ZEND_END_ARG_INFO()
519 
520 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
521 	ZEND_ARG_INFO(0, options)
522 ZEND_END_ARG_INFO()
523 /* }}} */
524 
525 /* {{{ zend_function_entry mbstring_functions[] */
526 const zend_function_entry mbstring_functions[] = {
527 	PHP_FE(mb_convert_case,			arginfo_mb_convert_case)
528 	PHP_FE(mb_strtoupper,			arginfo_mb_strtoupper)
529 	PHP_FE(mb_strtolower,			arginfo_mb_strtolower)
530 	PHP_FE(mb_language,				arginfo_mb_language)
531 	PHP_FE(mb_internal_encoding,	arginfo_mb_internal_encoding)
532 	PHP_FE(mb_http_input,			arginfo_mb_http_input)
533 	PHP_FE(mb_http_output,			arginfo_mb_http_output)
534 	PHP_FE(mb_detect_order,			arginfo_mb_detect_order)
535 	PHP_FE(mb_substitute_character,	arginfo_mb_substitute_character)
536 	PHP_FE(mb_parse_str,			arginfo_mb_parse_str)
537 	PHP_FE(mb_output_handler,		arginfo_mb_output_handler)
538 	PHP_FE(mb_preferred_mime_name,	arginfo_mb_preferred_mime_name)
539 	PHP_FE(mb_strlen,				arginfo_mb_strlen)
540 	PHP_FE(mb_strpos,				arginfo_mb_strpos)
541 	PHP_FE(mb_strrpos,				arginfo_mb_strrpos)
542 	PHP_FE(mb_stripos,				arginfo_mb_stripos)
543 	PHP_FE(mb_strripos,				arginfo_mb_strripos)
544 	PHP_FE(mb_strstr,				arginfo_mb_strstr)
545 	PHP_FE(mb_strrchr,				arginfo_mb_strrchr)
546 	PHP_FE(mb_stristr,				arginfo_mb_stristr)
547 	PHP_FE(mb_strrichr,				arginfo_mb_strrichr)
548 	PHP_FE(mb_substr_count,			arginfo_mb_substr_count)
549 	PHP_FE(mb_substr,				arginfo_mb_substr)
550 	PHP_FE(mb_strcut,				arginfo_mb_strcut)
551 	PHP_FE(mb_strwidth,				arginfo_mb_strwidth)
552 	PHP_FE(mb_strimwidth,			arginfo_mb_strimwidth)
553 	PHP_FE(mb_convert_encoding,		arginfo_mb_convert_encoding)
554 	PHP_FE(mb_detect_encoding,		arginfo_mb_detect_encoding)
555 	PHP_FE(mb_list_encodings,		arginfo_mb_list_encodings)
556 	PHP_FE(mb_encoding_aliases,		arginfo_mb_encoding_aliases)
557 	PHP_FE(mb_convert_kana,			arginfo_mb_convert_kana)
558 	PHP_FE(mb_encode_mimeheader,	arginfo_mb_encode_mimeheader)
559 	PHP_FE(mb_decode_mimeheader,	arginfo_mb_decode_mimeheader)
560 	PHP_FE(mb_convert_variables,	arginfo_mb_convert_variables)
561 	PHP_FE(mb_encode_numericentity,	arginfo_mb_encode_numericentity)
562 	PHP_FE(mb_decode_numericentity,	arginfo_mb_decode_numericentity)
563 	PHP_FE(mb_send_mail,			arginfo_mb_send_mail)
564 	PHP_FE(mb_get_info,				arginfo_mb_get_info)
565 	PHP_FE(mb_check_encoding,		arginfo_mb_check_encoding)
566 #if HAVE_MBREGEX
567 	PHP_MBREGEX_FUNCTION_ENTRIES
568 #endif
569 	PHP_FE_END
570 };
571 /* }}} */
572 
573 /* {{{ zend_module_entry mbstring_module_entry */
574 zend_module_entry mbstring_module_entry = {
575 	STANDARD_MODULE_HEADER,
576 	"mbstring",
577 	mbstring_functions,
578 	PHP_MINIT(mbstring),
579 	PHP_MSHUTDOWN(mbstring),
580 	PHP_RINIT(mbstring),
581 	PHP_RSHUTDOWN(mbstring),
582 	PHP_MINFO(mbstring),
583 	NO_VERSION_YET,
584 	PHP_MODULE_GLOBALS(mbstring),
585 	PHP_GINIT(mbstring),
586 	PHP_GSHUTDOWN(mbstring),
587 	NULL,
588 	STANDARD_MODULE_PROPERTIES_EX
589 };
590 /* }}} */
591 
592 /* {{{ static sapi_post_entry php_post_entries[] */
593 static sapi_post_entry php_post_entries[] = {
594 	{ DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data,	php_std_post_handler },
595 	{ MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
596 	{ NULL, 0, NULL, NULL }
597 };
598 /* }}} */
599 
600 #ifdef COMPILE_DL_MBSTRING
ZEND_GET_MODULE(mbstring)601 ZEND_GET_MODULE(mbstring)
602 #endif
603 
604 static char *get_internal_encoding(TSRMLS_D) {
605 	if (PG(internal_encoding) && PG(internal_encoding)[0]) {
606 		return PG(internal_encoding);
607 	} else if (SG(default_charset)) {
608 		return SG(default_charset);
609 	}
610 	return "";
611 }
612 
get_input_encoding(TSRMLS_D)613 static char *get_input_encoding(TSRMLS_D) {
614 	if (PG(input_encoding) && PG(input_encoding)[0]) {
615 		return PG(input_encoding);
616 	} else if (SG(default_charset)) {
617 		return SG(default_charset);
618 	}
619 	return "";
620 }
621 
get_output_encoding(TSRMLS_D)622 static char *get_output_encoding(TSRMLS_D) {
623 	if (PG(output_encoding) && PG(output_encoding)[0]) {
624 		return PG(output_encoding);
625 	} else if (SG(default_charset)) {
626 		return SG(default_charset);
627 	}
628 	return "";
629 }
630 
631 
632 /* {{{ allocators */
_php_mb_allocators_malloc(unsigned int sz)633 static void *_php_mb_allocators_malloc(unsigned int sz)
634 {
635 	return emalloc(sz);
636 }
637 
_php_mb_allocators_realloc(void * ptr,unsigned int sz)638 static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
639 {
640 	return erealloc(ptr, sz);
641 }
642 
_php_mb_allocators_calloc(unsigned int nelems,unsigned int szelem)643 static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
644 {
645 	return ecalloc(nelems, szelem);
646 }
647 
_php_mb_allocators_free(void * ptr)648 static void _php_mb_allocators_free(void *ptr)
649 {
650 	efree(ptr);
651 }
652 
_php_mb_allocators_pmalloc(unsigned int sz)653 static void *_php_mb_allocators_pmalloc(unsigned int sz)
654 {
655 	return pemalloc(sz, 1);
656 }
657 
_php_mb_allocators_prealloc(void * ptr,unsigned int sz)658 static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
659 {
660 	return perealloc(ptr, sz, 1);
661 }
662 
_php_mb_allocators_pfree(void * ptr)663 static void _php_mb_allocators_pfree(void *ptr)
664 {
665 	pefree(ptr, 1);
666 }
667 
668 static mbfl_allocators _php_mb_allocators = {
669 	_php_mb_allocators_malloc,
670 	_php_mb_allocators_realloc,
671 	_php_mb_allocators_calloc,
672 	_php_mb_allocators_free,
673 	_php_mb_allocators_pmalloc,
674 	_php_mb_allocators_prealloc,
675 	_php_mb_allocators_pfree
676 };
677 /* }}} */
678 
679 /* {{{ static sapi_post_entry mbstr_post_entries[] */
680 static sapi_post_entry mbstr_post_entries[] = {
681 	{ DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
682 	{ MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
683 	{ NULL, 0, NULL, NULL }
684 };
685 /* }}} */
686 
687 /* {{{ static int php_mb_parse_encoding_list()
688  *  Return 0 if input contains any illegal encoding, otherwise 1.
689  *  Even if any illegal encoding is detected the result may contain a list
690  *  of parsed encodings.
691  */
692 static int
php_mb_parse_encoding_list(const char * value,size_t value_length,const mbfl_encoding *** return_list,size_t * return_size,int persistent TSRMLS_DC)693 php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
694 {
695 	int size, bauto, ret = SUCCESS;
696 	size_t n;
697 	char *p, *p1, *p2, *endp, *tmpstr;
698 	const mbfl_encoding **entry, **list;
699 
700 	list = NULL;
701 	if (value == NULL || value_length <= 0) {
702 		if (return_list) {
703 			*return_list = NULL;
704 		}
705 		if (return_size) {
706 			*return_size = 0;
707 		}
708 		return FAILURE;
709 	} else {
710 		/* copy the value string for work */
711 		if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
712 			tmpstr = (char *)estrndup(value+1, value_length-2);
713 			value_length -= 2;
714 		}
715 		else
716 			tmpstr = (char *)estrndup(value, value_length);
717 		if (tmpstr == NULL) {
718 			return FAILURE;
719 		}
720 		/* count the number of listed encoding names */
721 		endp = tmpstr + value_length;
722 		n = 1;
723 		p1 = tmpstr;
724 		while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) {
725 			p1 = p2 + 1;
726 			n++;
727 		}
728 		size = n + MBSTRG(default_detect_order_list_size);
729 		/* make list */
730 		list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
731 		if (list != NULL) {
732 			entry = list;
733 			n = 0;
734 			bauto = 0;
735 			p1 = tmpstr;
736 			do {
737 				p2 = p = php_memnstr(p1, ",", 1, endp);
738 				if (p == NULL) {
739 					p = endp;
740 				}
741 				*p = '\0';
742 				/* trim spaces */
743 				while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
744 					p1++;
745 				}
746 				p--;
747 				while (p > p1 && (*p == ' ' || *p == '\t')) {
748 					*p = '\0';
749 					p--;
750 				}
751 				/* convert to the encoding number and check encoding */
752 				if (strcasecmp(p1, "auto") == 0) {
753 					if (!bauto) {
754 						const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
755 						const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
756 						size_t i;
757 						bauto = 1;
758 						for (i = 0; i < identify_list_size; i++) {
759 							*entry++ = mbfl_no2encoding(*src++);
760 							n++;
761 						}
762 					}
763 				} else {
764 					const mbfl_encoding *encoding = mbfl_name2encoding(p1);
765 					if (encoding) {
766 						*entry++ = encoding;
767 						n++;
768 					} else {
769 						ret = 0;
770 					}
771 				}
772 				p1 = p2 + 1;
773 			} while (n < size && p2 != NULL);
774 			if (n > 0) {
775 				if (return_list) {
776 					*return_list = list;
777 				} else {
778 					pefree(list, persistent);
779 				}
780 			} else {
781 				pefree(list, persistent);
782 				if (return_list) {
783 					*return_list = NULL;
784 				}
785 				ret = 0;
786 			}
787 			if (return_size) {
788 				*return_size = n;
789 			}
790 		} else {
791 			if (return_list) {
792 				*return_list = NULL;
793 			}
794 			if (return_size) {
795 				*return_size = 0;
796 			}
797 			ret = 0;
798 		}
799 		efree(tmpstr);
800 	}
801 
802 	return ret;
803 }
804 /* }}} */
805 
806 /* {{{ static int php_mb_parse_encoding_array()
807  *  Return 0 if input contains any illegal encoding, otherwise 1.
808  *  Even if any illegal encoding is detected the result may contain a list
809  *  of parsed encodings.
810  */
811 static int
php_mb_parse_encoding_array(zval * array,const mbfl_encoding *** return_list,size_t * return_size,int persistent TSRMLS_DC)812 php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
813 {
814 	zval **hash_entry;
815 	HashTable *target_hash;
816 	int i, n, size, bauto, ret = SUCCESS;
817 	const mbfl_encoding **list, **entry;
818 
819 	list = NULL;
820 	if (Z_TYPE_P(array) == IS_ARRAY) {
821 		target_hash = Z_ARRVAL_P(array);
822 		zend_hash_internal_pointer_reset(target_hash);
823 		i = zend_hash_num_elements(target_hash);
824 		size = i + MBSTRG(default_detect_order_list_size);
825 		list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
826 		if (list != NULL) {
827 			entry = list;
828 			bauto = 0;
829 			n = 0;
830 			while (i > 0) {
831 				if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
832 					break;
833 				}
834 				convert_to_string_ex(hash_entry);
835 				if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
836 					if (!bauto) {
837 						const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
838 						const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
839 						size_t j;
840 
841 						bauto = 1;
842 						for (j = 0; j < identify_list_size; j++) {
843 							*entry++ = mbfl_no2encoding(*src++);
844 							n++;
845 						}
846 					}
847 				} else {
848 					const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_PP(hash_entry));
849 					if (encoding) {
850 						*entry++ = encoding;
851 						n++;
852 					} else {
853 						ret = FAILURE;
854 					}
855 				}
856 				zend_hash_move_forward(target_hash);
857 				i--;
858 			}
859 			if (n > 0) {
860 				if (return_list) {
861 					*return_list = list;
862 				} else {
863 					pefree(list, persistent);
864 				}
865 			} else {
866 				pefree(list, persistent);
867 				if (return_list) {
868 					*return_list = NULL;
869 				}
870 				ret = FAILURE;
871 			}
872 			if (return_size) {
873 				*return_size = n;
874 			}
875 		} else {
876 			if (return_list) {
877 				*return_list = NULL;
878 			}
879 			if (return_size) {
880 				*return_size = 0;
881 			}
882 			ret = FAILURE;
883 		}
884 	}
885 
886 	return ret;
887 }
888 /* }}} */
889 
890 /* {{{ zend_multibyte interface */
php_mb_zend_encoding_fetcher(const char * encoding_name TSRMLS_DC)891 static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name TSRMLS_DC)
892 {
893 	return (const zend_encoding*)mbfl_name2encoding(encoding_name);
894 }
895 
php_mb_zend_encoding_name_getter(const zend_encoding * encoding)896 static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
897 {
898 	return ((const mbfl_encoding *)encoding)->name;
899 }
900 
php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding * _encoding)901 static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
902 {
903 	const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
904 	if (encoding->flag & MBFL_ENCTYPE_SBCS) {
905 		return 1;
906 	}
907 	if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
908 		return 1;
909 	}
910 	return 0;
911 }
912 
php_mb_zend_encoding_detector(const unsigned char * arg_string,size_t arg_length,const zend_encoding ** list,size_t list_size TSRMLS_DC)913 static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size TSRMLS_DC)
914 {
915 	mbfl_string string;
916 
917 	if (!list) {
918 		list = (const zend_encoding **)MBSTRG(current_detect_order_list);
919 		list_size = MBSTRG(current_detect_order_list_size);
920 	}
921 
922 	mbfl_string_init(&string);
923 	string.no_language = MBSTRG(language);
924 	string.val = (unsigned char *)arg_string;
925 	string.len = arg_length;
926 	return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0);
927 }
928 
php_mb_zend_encoding_converter(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length,const zend_encoding * encoding_to,const zend_encoding * encoding_from TSRMLS_DC)929 static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC)
930 {
931 	mbfl_string string, result;
932 	mbfl_buffer_converter *convd;
933 	int status, loc;
934 
935 	/* new encoding */
936 	/* initialize string */
937 	mbfl_string_init(&string);
938 	mbfl_string_init(&result);
939 	string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding;
940 	string.no_language = MBSTRG(language);
941 	string.val = (unsigned char*)from;
942 	string.len = from_length;
943 
944 	/* initialize converter */
945 	convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
946 	if (convd == NULL) {
947 		return -1;
948 	}
949 	mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
950 	mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
951 
952 	/* do it */
953 	status = mbfl_buffer_converter_feed2(convd, &string, &loc);
954 	if (status) {
955 		mbfl_buffer_converter_delete(convd);
956 		return (size_t)-1;
957 	}
958 
959 	mbfl_buffer_converter_flush(convd);
960 	if (!mbfl_buffer_converter_result(convd, &result)) {
961 		mbfl_buffer_converter_delete(convd);
962 		return (size_t)-1;
963 	}
964 
965 	*to = result.val;
966 	*to_length = result.len;
967 
968 	mbfl_buffer_converter_delete(convd);
969 
970 	return loc;
971 }
972 
php_mb_zend_encoding_list_parser(const char * encoding_list,size_t encoding_list_len,const zend_encoding *** return_list,size_t * return_size,int persistent TSRMLS_DC)973 static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
974 {
975 	return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent TSRMLS_CC);
976 }
977 
php_mb_zend_internal_encoding_getter(TSRMLS_D)978 static const zend_encoding *php_mb_zend_internal_encoding_getter(TSRMLS_D)
979 {
980 	return (const zend_encoding *)MBSTRG(internal_encoding);
981 }
982 
php_mb_zend_internal_encoding_setter(const zend_encoding * encoding TSRMLS_DC)983 static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding TSRMLS_DC)
984 {
985 	MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
986 	return SUCCESS;
987 }
988 
989 static zend_multibyte_functions php_mb_zend_multibyte_functions = {
990 	"mbstring",
991 	php_mb_zend_encoding_fetcher,
992 	php_mb_zend_encoding_name_getter,
993 	php_mb_zend_encoding_lexer_compatibility_checker,
994 	php_mb_zend_encoding_detector,
995 	php_mb_zend_encoding_converter,
996 	php_mb_zend_encoding_list_parser,
997 	php_mb_zend_internal_encoding_getter,
998 	php_mb_zend_internal_encoding_setter
999 };
1000 /* }}} */
1001 
1002 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC);
1003 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
1004 static void _php_mb_free_regex(void *opaque);
1005 
1006 #if HAVE_ONIG
1007 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern TSRMLS_DC)1008 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
1009 {
1010 	php_mb_regex_t *retval;
1011 	OnigErrorInfo err_info;
1012 	int err_code;
1013 
1014 	if ((err_code = onig_new(&retval,
1015 			(const OnigUChar *)pattern,
1016 			(const OnigUChar *)pattern + strlen(pattern),
1017 			ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
1018 			ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
1019 		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1020 		onig_error_code_to_str(err_str, err_code, err_info);
1021 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", pattern, err_str);
1022 		retval = NULL;
1023 	}
1024 	return retval;
1025 }
1026 /* }}} */
1027 
1028 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1029 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1030 {
1031 	return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
1032 			(const OnigUChar*)str + str_len, (const OnigUChar *)str,
1033 			(const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
1034 }
1035 /* }}} */
1036 
1037 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1038 static void _php_mb_free_regex(void *opaque)
1039 {
1040 	onig_free((php_mb_regex_t *)opaque);
1041 }
1042 /* }}} */
1043 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
1044 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern TSRMLS_DC)1045 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
1046 {
1047 	pcre *retval;
1048 	const char *err_str;
1049 	int err_offset;
1050 
1051 	if (!(retval = pcre_compile(pattern,
1052 			PCRE_CASELESS, &err_str, &err_offset, NULL))) {
1053 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
1054 	}
1055 	return retval;
1056 }
1057 /* }}} */
1058 
1059 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1060 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1061 {
1062 	return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
1063 			0, NULL, 0) >= 0;
1064 }
1065 /* }}} */
1066 
1067 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1068 static void _php_mb_free_regex(void *opaque)
1069 {
1070 	pcre_free(opaque);
1071 }
1072 /* }}} */
1073 #endif
1074 
1075 /* {{{ php_mb_nls_get_default_detect_order_list */
php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,enum mbfl_no_encoding ** plist,size_t * plist_size)1076 static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
1077 {
1078 	size_t i;
1079 
1080 	*plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1081 	*plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1082 
1083 	for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
1084 		if (php_mb_default_identify_list[i].lang == lang) {
1085 			*plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
1086 			*plist_size = php_mb_default_identify_list[i].list_size;
1087 			return 1;
1088 		}
1089 	}
1090 	return 0;
1091 }
1092 /* }}} */
1093 
php_mb_rfc1867_substring_conf(const zend_encoding * encoding,char * start,int len,char quote TSRMLS_DC)1094 static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, int len, char quote TSRMLS_DC)
1095 {
1096 	char *result = emalloc(len + 2);
1097 	char *resp = result;
1098 	int i;
1099 
1100 	for (i = 0; i < len && start[i] != quote; ++i) {
1101 		if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
1102 			*resp++ = start[++i];
1103 		} else {
1104 			size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
1105 
1106 			while (j-- > 0 && i < len) {
1107 				*resp++ = start[i++];
1108 			}
1109 			--i;
1110 		}
1111 	}
1112 
1113 	*resp = '\0';
1114 	return result;
1115 }
1116 
php_mb_rfc1867_getword(const zend_encoding * encoding,char ** line,char stop TSRMLS_DC)1117 static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop TSRMLS_DC) /* {{{ */
1118 {
1119 	char *pos = *line, quote;
1120 	char *res;
1121 
1122 	while (*pos && *pos != stop) {
1123 		if ((quote = *pos) == '"' || quote == '\'') {
1124 			++pos;
1125 			while (*pos && *pos != quote) {
1126 				if (*pos == '\\' && pos[1] && pos[1] == quote) {
1127 					pos += 2;
1128 				} else {
1129 					++pos;
1130 				}
1131 			}
1132 			if (*pos) {
1133 				++pos;
1134 			}
1135 		} else {
1136 			pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1137 
1138 		}
1139 	}
1140 	if (*pos == '\0') {
1141 		res = estrdup(*line);
1142 		*line += strlen(*line);
1143 		return res;
1144 	}
1145 
1146 	res = estrndup(*line, pos - *line);
1147 
1148 	while (*pos == stop) {
1149 		pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1150 	}
1151 
1152 	*line = pos;
1153 	return res;
1154 }
1155 /* }}} */
1156 
php_mb_rfc1867_getword_conf(const zend_encoding * encoding,char * str TSRMLS_DC)1157 static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str TSRMLS_DC) /* {{{ */
1158 {
1159 	while (*str && isspace(*(unsigned char *)str)) {
1160 		++str;
1161 	}
1162 
1163 	if (!*str) {
1164 		return estrdup("");
1165 	}
1166 
1167 	if (*str == '"' || *str == '\'') {
1168 		char quote = *str;
1169 
1170 		str++;
1171 		return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote TSRMLS_CC);
1172 	} else {
1173 		char *strend = str;
1174 
1175 		while (*strend && !isspace(*(unsigned char *)strend)) {
1176 			++strend;
1177 		}
1178 		return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0 TSRMLS_CC);
1179 	}
1180 }
1181 /* }}} */
1182 
php_mb_rfc1867_basename(const zend_encoding * encoding,char * filename TSRMLS_DC)1183 static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename TSRMLS_DC) /* {{{ */
1184 {
1185 	char *s, *s2;
1186 	const size_t filename_len = strlen(filename);
1187 
1188 	/* The \ check should technically be needed for win32 systems only where
1189 	 * it is a valid path separator. However, IE in all it's wisdom always sends
1190 	 * the full path of the file on the user's filesystem, which means that unless
1191 	 * the user does basename() they get a bogus file name. Until IE's user base drops
1192 	 * to nill or problem is fixed this code must remain enabled for all systems. */
1193 	s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
1194 	s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
1195 
1196 	if (s && s2) {
1197 		if (s > s2) {
1198 			return ++s;
1199 		} else {
1200 			return ++s2;
1201 		}
1202 	} else if (s) {
1203 		return ++s;
1204 	} else if (s2) {
1205 		return ++s2;
1206 	} else {
1207 		return filename;
1208 	}
1209 }
1210 /* }}} */
1211 
1212 /* {{{ php.ini directive handler */
1213 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
PHP_INI_MH(OnUpdate_mbstring_language)1214 static PHP_INI_MH(OnUpdate_mbstring_language)
1215 {
1216 	enum mbfl_no_language no_language;
1217 
1218 	no_language = mbfl_name2no_language(new_value);
1219 	if (no_language == mbfl_no_language_invalid) {
1220 		MBSTRG(language) = mbfl_no_language_neutral;
1221 		return FAILURE;
1222 	}
1223 	MBSTRG(language) = no_language;
1224 	php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
1225 	return SUCCESS;
1226 }
1227 /* }}} */
1228 
1229 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
PHP_INI_MH(OnUpdate_mbstring_detect_order)1230 static PHP_INI_MH(OnUpdate_mbstring_detect_order)
1231 {
1232 	const mbfl_encoding **list;
1233 	size_t size;
1234 
1235 	if (!new_value) {
1236 		if (MBSTRG(detect_order_list)) {
1237 			pefree(MBSTRG(detect_order_list), 1);
1238 		}
1239 		MBSTRG(detect_order_list) = NULL;
1240 		MBSTRG(detect_order_list_size) = 0;
1241 		return SUCCESS;
1242 	}
1243 
1244 	if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1245 		return FAILURE;
1246 	}
1247 
1248 	if (MBSTRG(detect_order_list)) {
1249 		pefree(MBSTRG(detect_order_list), 1);
1250 	}
1251 	MBSTRG(detect_order_list) = list;
1252 	MBSTRG(detect_order_list_size) = size;
1253 	return SUCCESS;
1254 }
1255 /* }}} */
1256 
1257 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
PHP_INI_MH(OnUpdate_mbstring_http_input)1258 static PHP_INI_MH(OnUpdate_mbstring_http_input)
1259 {
1260 	const mbfl_encoding **list;
1261 	size_t size;
1262 
1263 	if (!new_value || !new_value_length) {
1264 		if (MBSTRG(http_input_list)) {
1265 			pefree(MBSTRG(http_input_list), 1);
1266 		}
1267 		if (SUCCESS == php_mb_parse_encoding_list(get_input_encoding(TSRMLS_C), strlen(get_input_encoding(TSRMLS_C))+1, &list, &size, 1 TSRMLS_CC)) {
1268 			MBSTRG(http_input_list) = list;
1269 			MBSTRG(http_input_list_size) = size;
1270 			return SUCCESS;
1271 		}
1272 		MBSTRG(http_input_list) = NULL;
1273 		MBSTRG(http_input_list_size) = 0;
1274 		return SUCCESS;
1275 	}
1276 
1277 	if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1278 		return FAILURE;
1279 	}
1280 
1281 	if (MBSTRG(http_input_list)) {
1282 		pefree(MBSTRG(http_input_list), 1);
1283 	}
1284 	MBSTRG(http_input_list) = list;
1285 	MBSTRG(http_input_list_size) = size;
1286 
1287 	if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1288 		php_error_docref("ref.mbstring" TSRMLS_CC, E_DEPRECATED, "Use of mbstring.http_input is deprecated");
1289 	}
1290 
1291 	return SUCCESS;
1292 }
1293 /* }}} */
1294 
1295 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
PHP_INI_MH(OnUpdate_mbstring_http_output)1296 static PHP_INI_MH(OnUpdate_mbstring_http_output)
1297 {
1298 	const mbfl_encoding *encoding;
1299 
1300 	if (new_value == NULL || new_value_length == 0) {
1301 		encoding = mbfl_name2encoding(get_output_encoding(TSRMLS_C));
1302 		if (!encoding) {
1303 			MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1304 			MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1305 			return SUCCESS;
1306 		}
1307 	} else {
1308 		encoding = mbfl_name2encoding(new_value);
1309 		if (!encoding) {
1310 			MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1311 			MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1312 			return FAILURE;
1313 		}
1314 	}
1315 	MBSTRG(http_output_encoding) = encoding;
1316 	MBSTRG(current_http_output_encoding) = encoding;
1317 
1318 	if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1319 		php_error_docref("ref.mbstring" TSRMLS_CC, E_DEPRECATED, "Use of mbstring.http_output is deprecated");
1320 	}
1321 
1322 	return SUCCESS;
1323 }
1324 /* }}} */
1325 
1326 /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
_php_mb_ini_mbstring_internal_encoding_set(const char * new_value,uint new_value_length TSRMLS_DC)1327 int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
1328 {
1329 	const mbfl_encoding *encoding;
1330 
1331 	if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) {
1332 		/* falls back to UTF-8 if an unknown encoding name is given */
1333 		encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
1334 	}
1335 	MBSTRG(internal_encoding) = encoding;
1336 	MBSTRG(current_internal_encoding) = encoding;
1337 #if HAVE_MBREGEX
1338 	{
1339 		const char *enc_name = new_value;
1340 		if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
1341 			/* falls back to UTF-8 if an unknown encoding name is given */
1342 			enc_name = "UTF-8";
1343 			php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
1344 		}
1345 		php_mb_regex_set_mbctype(new_value TSRMLS_CC);
1346 	}
1347 #endif
1348 	return SUCCESS;
1349 }
1350 /* }}} */
1351 
1352 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
PHP_INI_MH(OnUpdate_mbstring_internal_encoding)1353 static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1354 {
1355 	if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1356 		php_error_docref("ref.mbstring" TSRMLS_CC, E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
1357 	}
1358 
1359 	if (OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC) == FAILURE) {
1360 		return FAILURE;
1361 	}
1362 
1363 	if (stage & (PHP_INI_STAGE_STARTUP | PHP_INI_STAGE_SHUTDOWN | PHP_INI_STAGE_RUNTIME)) {
1364 		if (new_value && new_value_length) {
1365 			return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
1366 		} else {
1367 			return _php_mb_ini_mbstring_internal_encoding_set(get_internal_encoding(TSRMLS_C), strlen(get_internal_encoding(TSRMLS_C))+1 TSRMLS_CC);
1368 		}
1369 	} else {
1370 		/* the corresponding mbstring globals needs to be set according to the
1371 		 * ini value in the later stage because it never falls back to the
1372 		 * default value if 1. no value for mbstring.internal_encoding is given,
1373 		 * 2. mbstring.language directive is processed in per-dir or runtime
1374 		 * context and 3. call to the handler for mbstring.language is done
1375 		 * after mbstring.internal_encoding is handled. */
1376 		return SUCCESS;
1377 	}
1378 }
1379 /* }}} */
1380 
1381 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
PHP_INI_MH(OnUpdate_mbstring_substitute_character)1382 static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1383 {
1384 	int c;
1385 	char *endptr = NULL;
1386 
1387 	if (new_value != NULL) {
1388 		if (strcasecmp("none", new_value) == 0) {
1389 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1390 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1391 		} else if (strcasecmp("long", new_value) == 0) {
1392 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1393 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1394 		} else if (strcasecmp("entity", new_value) == 0) {
1395 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1396 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1397 		} else {
1398 			MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1399 			MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1400 			if (new_value_length >0) {
1401 				c = strtol(new_value, &endptr, 0);
1402 				if (*endptr == '\0') {
1403 					MBSTRG(filter_illegal_substchar) = c;
1404 					MBSTRG(current_filter_illegal_substchar) = c;
1405 				}
1406 			}
1407 		}
1408 	} else {
1409 		MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1410 		MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1411 		MBSTRG(filter_illegal_substchar) = 0x3f;	/* '?' */
1412 		MBSTRG(current_filter_illegal_substchar) = 0x3f;	/* '?' */
1413 	}
1414 
1415 	return SUCCESS;
1416 }
1417 /* }}} */
1418 
1419 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
PHP_INI_MH(OnUpdate_mbstring_encoding_translation)1420 static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1421 {
1422 	if (new_value == NULL) {
1423 		return FAILURE;
1424 	}
1425 
1426 	OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
1427 
1428 	if (MBSTRG(encoding_translation)) {
1429 		sapi_unregister_post_entry(php_post_entries TSRMLS_CC);
1430 		sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1431 	} else {
1432 		sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC);
1433 		sapi_register_post_entries(php_post_entries TSRMLS_CC);
1434 	}
1435 
1436 	return SUCCESS;
1437 }
1438 /* }}} */
1439 
1440 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)1441 static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1442 {
1443 	zval tmp;
1444 	void *re = NULL;
1445 
1446 	if (!new_value) {
1447 		new_value = entry->orig_value;
1448 		new_value_length = entry->orig_value_length;
1449 	}
1450 	php_trim(new_value, new_value_length, NULL, 0, &tmp, 3 TSRMLS_CC);
1451 
1452 	if (Z_STRLEN(tmp) > 0) {
1453 		if (!(re = _php_mb_compile_regex(Z_STRVAL(tmp) TSRMLS_CC))) {
1454 			zval_dtor(&tmp);
1455 			return FAILURE;
1456 		}
1457 	}
1458 
1459 	if (MBSTRG(http_output_conv_mimetypes)) {
1460 		_php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1461 	}
1462 
1463 	MBSTRG(http_output_conv_mimetypes) = re;
1464 
1465 	zval_dtor(&tmp);
1466 	return SUCCESS;
1467 }
1468 /* }}} */
1469 /* }}} */
1470 
1471 /* {{{ php.ini directive registration */
1472 PHP_INI_BEGIN()
1473 	PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1474 	PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1475 	PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
1476 	PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
1477 	STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
1478 	PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1479 	STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1480 	PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1481 
1482 	STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1483 		PHP_INI_SYSTEM | PHP_INI_PERDIR,
1484 		OnUpdate_mbstring_encoding_translation,
1485 		encoding_translation, zend_mbstring_globals, mbstring_globals)
1486 	PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1487 		"^(text/|application/xhtml\\+xml)",
1488 		PHP_INI_ALL,
1489 		OnUpdate_mbstring_http_output_conv_mimetypes)
1490 
1491 	STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1492 		PHP_INI_ALL,
1493 		OnUpdateLong,
1494 		strict_detection, zend_mbstring_globals, mbstring_globals)
PHP_INI_END()1495 PHP_INI_END()
1496 /* }}} */
1497 
1498 /* {{{ module global initialize handler */
1499 static PHP_GINIT_FUNCTION(mbstring)
1500 {
1501 	mbstring_globals->language = mbfl_no_language_uni;
1502 	mbstring_globals->internal_encoding = NULL;
1503 	mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1504 	mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
1505 	mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
1506 	mbstring_globals->http_input_identify = NULL;
1507 	mbstring_globals->http_input_identify_get = NULL;
1508 	mbstring_globals->http_input_identify_post = NULL;
1509 	mbstring_globals->http_input_identify_cookie = NULL;
1510 	mbstring_globals->http_input_identify_string = NULL;
1511 	mbstring_globals->http_input_list = NULL;
1512 	mbstring_globals->http_input_list_size = 0;
1513 	mbstring_globals->detect_order_list = NULL;
1514 	mbstring_globals->detect_order_list_size = 0;
1515 	mbstring_globals->current_detect_order_list = NULL;
1516 	mbstring_globals->current_detect_order_list_size = 0;
1517 	mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1518 	mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1519 	mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1520 	mbstring_globals->filter_illegal_substchar = 0x3f;	/* '?' */
1521 	mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1522 	mbstring_globals->current_filter_illegal_substchar = 0x3f;	/* '?' */
1523 	mbstring_globals->illegalchars = 0;
1524 	mbstring_globals->func_overload = 0;
1525 	mbstring_globals->encoding_translation = 0;
1526 	mbstring_globals->strict_detection = 0;
1527 	mbstring_globals->outconv = NULL;
1528 	mbstring_globals->http_output_conv_mimetypes = NULL;
1529 #if HAVE_MBREGEX
1530 	mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(TSRMLS_C);
1531 #endif
1532 }
1533 /* }}} */
1534 
1535 /* {{{ PHP_GSHUTDOWN_FUNCTION */
PHP_GSHUTDOWN_FUNCTION(mbstring)1536 static PHP_GSHUTDOWN_FUNCTION(mbstring)
1537 {
1538 	if (mbstring_globals->http_input_list) {
1539 		free(mbstring_globals->http_input_list);
1540 	}
1541 	if (mbstring_globals->detect_order_list) {
1542 		free(mbstring_globals->detect_order_list);
1543 	}
1544 	if (mbstring_globals->http_output_conv_mimetypes) {
1545 		_php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1546 	}
1547 #if HAVE_MBREGEX
1548 	php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC);
1549 #endif
1550 }
1551 /* }}} */
1552 
1553 /* {{{ PHP_MINIT_FUNCTION(mbstring) */
PHP_MINIT_FUNCTION(mbstring)1554 PHP_MINIT_FUNCTION(mbstring)
1555 {
1556 	__mbfl_allocators = &_php_mb_allocators;
1557 
1558 	REGISTER_INI_ENTRIES();
1559 
1560 	/* This is a global handler. Should not be set in a per-request handler. */
1561 	sapi_register_treat_data(mbstr_treat_data TSRMLS_CC);
1562 
1563 	/* Post handlers are stored in the thread-local context. */
1564 	if (MBSTRG(encoding_translation)) {
1565 		sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1566 	}
1567 
1568 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1569 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1570 	REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1571 
1572 	REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1573 	REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1574 	REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1575 
1576 #if HAVE_MBREGEX
1577 	PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1578 #endif
1579 
1580 	if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions TSRMLS_CC)) {
1581 		return FAILURE;
1582 	}
1583 
1584 	php_rfc1867_set_multibyte_callbacks(
1585 		php_mb_encoding_translation,
1586 		php_mb_gpc_get_detect_order,
1587 		php_mb_gpc_set_input_encoding,
1588 		php_mb_rfc1867_getword,
1589 		php_mb_rfc1867_getword_conf,
1590 		php_mb_rfc1867_basename);
1591 
1592 	return SUCCESS;
1593 }
1594 /* }}} */
1595 
1596 /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
PHP_MSHUTDOWN_FUNCTION(mbstring)1597 PHP_MSHUTDOWN_FUNCTION(mbstring)
1598 {
1599 	UNREGISTER_INI_ENTRIES();
1600 
1601 #if HAVE_MBREGEX
1602 	PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1603 #endif
1604 
1605 	return SUCCESS;
1606 }
1607 /* }}} */
1608 
1609 /* {{{ PHP_RINIT_FUNCTION(mbstring) */
PHP_RINIT_FUNCTION(mbstring)1610 PHP_RINIT_FUNCTION(mbstring)
1611 {
1612 	zend_function *func, *orig;
1613 	const struct mb_overload_def *p;
1614 
1615 	MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1616 	MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1617 	MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1618 	MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1619 
1620 	MBSTRG(illegalchars) = 0;
1621 
1622 	php_mb_populate_current_detect_order_list(TSRMLS_C);
1623 
1624  	/* override original function. */
1625 	if (MBSTRG(func_overload)){
1626 		p = &(mb_ovld[0]);
1627 
1628 		while (p->type > 0) {
1629 			if ((MBSTRG(func_overload) & p->type) == p->type &&
1630 				zend_hash_find(EG(function_table), p->save_func,
1631 					strlen(p->save_func)+1, (void **)&orig) != SUCCESS) {
1632 
1633 				zend_hash_find(EG(function_table), p->ovld_func, strlen(p->ovld_func)+1 , (void **)&func);
1634 
1635 				if (zend_hash_find(EG(function_table), p->orig_func, strlen(p->orig_func)+1, (void **)&orig) != SUCCESS) {
1636 					php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1637 					return FAILURE;
1638 				} else {
1639 					zend_hash_add(EG(function_table), p->save_func, strlen(p->save_func)+1, orig, sizeof(zend_function), NULL);
1640 
1641 					if (zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, func, sizeof(zend_function),
1642 						NULL) == FAILURE) {
1643 						php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
1644 						return FAILURE;
1645 					}
1646 				}
1647 			}
1648 			p++;
1649 		}
1650 	}
1651 #if HAVE_MBREGEX
1652 	PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1653 #endif
1654 	zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding) TSRMLS_CC);
1655 
1656 	return SUCCESS;
1657 }
1658 /* }}} */
1659 
1660 /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
PHP_RSHUTDOWN_FUNCTION(mbstring)1661 PHP_RSHUTDOWN_FUNCTION(mbstring)
1662 {
1663 	const struct mb_overload_def *p;
1664 	zend_function *orig;
1665 
1666 	if (MBSTRG(current_detect_order_list) != NULL) {
1667 		efree(MBSTRG(current_detect_order_list));
1668 		MBSTRG(current_detect_order_list) = NULL;
1669 		MBSTRG(current_detect_order_list_size) = 0;
1670 	}
1671 	if (MBSTRG(outconv) != NULL) {
1672 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1673 		mbfl_buffer_converter_delete(MBSTRG(outconv));
1674 		MBSTRG(outconv) = NULL;
1675 	}
1676 
1677 	/* clear http input identification. */
1678 	MBSTRG(http_input_identify) = NULL;
1679 	MBSTRG(http_input_identify_post) = NULL;
1680 	MBSTRG(http_input_identify_get) = NULL;
1681 	MBSTRG(http_input_identify_cookie) = NULL;
1682 	MBSTRG(http_input_identify_string) = NULL;
1683 
1684  	/*  clear overloaded function. */
1685 	if (MBSTRG(func_overload)){
1686 		p = &(mb_ovld[0]);
1687 		while (p->type > 0) {
1688 			if ((MBSTRG(func_overload) & p->type) == p->type &&
1689 				zend_hash_find(EG(function_table), p->save_func,
1690 							   strlen(p->save_func)+1, (void **)&orig) == SUCCESS) {
1691 
1692 				zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, orig, sizeof(zend_function), NULL);
1693 				zend_hash_del(EG(function_table), p->save_func, strlen(p->save_func)+1);
1694 			}
1695 			p++;
1696 		}
1697 	}
1698 
1699 #if HAVE_MBREGEX
1700 	PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1701 #endif
1702 
1703 	return SUCCESS;
1704 }
1705 /* }}} */
1706 
1707 /* {{{ PHP_MINFO_FUNCTION(mbstring) */
PHP_MINFO_FUNCTION(mbstring)1708 PHP_MINFO_FUNCTION(mbstring)
1709 {
1710 	php_info_print_table_start();
1711 	php_info_print_table_row(2, "Multibyte Support", "enabled");
1712 	php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1713 	php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1714 	{
1715 		char tmp[256];
1716 		snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1717 		php_info_print_table_row(2, "libmbfl version", tmp);
1718 	}
1719 	php_info_print_table_end();
1720 
1721 	php_info_print_table_start();
1722 	php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1723 	php_info_print_table_end();
1724 
1725 #if HAVE_MBREGEX
1726 	PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1727 #endif
1728 
1729 	DISPLAY_INI_ENTRIES();
1730 }
1731 /* }}} */
1732 
1733 /* {{{ proto string mb_language([string language])
1734    Sets the current language or Returns the current language as a string */
PHP_FUNCTION(mb_language)1735 PHP_FUNCTION(mb_language)
1736 {
1737 	char *name = NULL;
1738 	int name_len = 0;
1739 
1740 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1741 		return;
1742 	}
1743 	if (name == NULL) {
1744 		RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)), 1);
1745 	} else {
1746 		if (FAILURE == zend_alter_ini_entry(
1747 				"mbstring.language", sizeof("mbstring.language"),
1748 				name, name_len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1749 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name);
1750 			RETVAL_FALSE;
1751 		} else {
1752 			RETVAL_TRUE;
1753 		}
1754 	}
1755 }
1756 /* }}} */
1757 
1758 /* {{{ proto string mb_internal_encoding([string encoding])
1759    Sets the current internal encoding or Returns the current internal encoding as a string */
PHP_FUNCTION(mb_internal_encoding)1760 PHP_FUNCTION(mb_internal_encoding)
1761 {
1762 	const char *name = NULL;
1763 	int name_len;
1764 	const mbfl_encoding *encoding;
1765 
1766 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1767 		RETURN_FALSE;
1768 	}
1769 	if (name == NULL) {
1770 		name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
1771 		if (name != NULL) {
1772 			RETURN_STRING(name, 1);
1773 		} else {
1774 			RETURN_FALSE;
1775 		}
1776 	} else {
1777 		encoding = mbfl_name2encoding(name);
1778 		if (!encoding) {
1779 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1780 			RETURN_FALSE;
1781 		} else {
1782 			MBSTRG(current_internal_encoding) = encoding;
1783 			RETURN_TRUE;
1784 		}
1785 	}
1786 }
1787 /* }}} */
1788 
1789 /* {{{ proto mixed mb_http_input([string type])
1790    Returns the input encoding */
PHP_FUNCTION(mb_http_input)1791 PHP_FUNCTION(mb_http_input)
1792 {
1793 	char *typ = NULL;
1794 	int typ_len;
1795 	int retname;
1796 	char *list, *temp;
1797 	const mbfl_encoding *result = NULL;
1798 
1799 	retname = 1;
1800  	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
1801  		RETURN_FALSE;
1802  	}
1803  	if (typ == NULL) {
1804  		result = MBSTRG(http_input_identify);
1805  	} else {
1806  		switch (*typ) {
1807 		case 'G':
1808 		case 'g':
1809 			result = MBSTRG(http_input_identify_get);
1810 			break;
1811 		case 'P':
1812 		case 'p':
1813 			result = MBSTRG(http_input_identify_post);
1814 			break;
1815 		case 'C':
1816 		case 'c':
1817 			result = MBSTRG(http_input_identify_cookie);
1818 			break;
1819 		case 'S':
1820 		case 's':
1821 			result = MBSTRG(http_input_identify_string);
1822 			break;
1823 		case 'I':
1824 		case 'i':
1825 			{
1826 				const mbfl_encoding **entry = MBSTRG(http_input_list);
1827 				const size_t n = MBSTRG(http_input_list_size);
1828 				size_t i;
1829 				array_init(return_value);
1830 				for (i = 0; i < n; i++) {
1831 					add_next_index_string(return_value, (*entry)->name, 1);
1832 					entry++;
1833 				}
1834 				retname = 0;
1835 			}
1836 			break;
1837 		case 'L':
1838 		case 'l':
1839 			{
1840 				const mbfl_encoding **entry = MBSTRG(http_input_list);
1841 				const size_t n = MBSTRG(http_input_list_size);
1842 				size_t i;
1843 				list = NULL;
1844 				for (i = 0; i < n; i++) {
1845 					if (list) {
1846 						temp = list;
1847 						spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
1848 						efree(temp);
1849 						if (!list) {
1850 							break;
1851 						}
1852 					} else {
1853 						list = estrdup((*entry)->name);
1854 					}
1855 					entry++;
1856 				}
1857 			}
1858 			if (!list) {
1859 				RETURN_FALSE;
1860 			}
1861 			RETVAL_STRING(list, 0);
1862 			retname = 0;
1863 			break;
1864 		default:
1865 			result = MBSTRG(http_input_identify);
1866 			break;
1867 		}
1868 	}
1869 
1870 	if (retname) {
1871 		if (result) {
1872 			RETVAL_STRING(result->name, 1);
1873 		} else {
1874 			RETVAL_FALSE;
1875 		}
1876 	}
1877 }
1878 /* }}} */
1879 
1880 /* {{{ proto string mb_http_output([string encoding])
1881    Sets the current output_encoding or returns the current output_encoding as a string */
PHP_FUNCTION(mb_http_output)1882 PHP_FUNCTION(mb_http_output)
1883 {
1884 	const char *name = NULL;
1885 	int name_len;
1886 	const mbfl_encoding *encoding;
1887 
1888 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) {
1889 		RETURN_FALSE;
1890 	}
1891 
1892 	if (name == NULL) {
1893 		name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
1894 		if (name != NULL) {
1895 			RETURN_STRING(name, 1);
1896 		} else {
1897 			RETURN_FALSE;
1898 		}
1899 	} else {
1900 		encoding = mbfl_name2encoding(name);
1901 		if (!encoding) {
1902 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1903 			RETURN_FALSE;
1904 		} else {
1905 			MBSTRG(current_http_output_encoding) = encoding;
1906 			RETURN_TRUE;
1907 		}
1908 	}
1909 }
1910 /* }}} */
1911 
1912 /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1913    Sets the current detect_order or Return the current detect_order as a array */
PHP_FUNCTION(mb_detect_order)1914 PHP_FUNCTION(mb_detect_order)
1915 {
1916 	zval **arg1 = NULL;
1917 
1918 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1919 		return;
1920 	}
1921 
1922 	if (!arg1) {
1923 		size_t i;
1924 		size_t n = MBSTRG(current_detect_order_list_size);
1925 		const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1926 		array_init(return_value);
1927 		for (i = 0; i < n; i++) {
1928 			add_next_index_string(return_value, (*entry)->name, 1);
1929 			entry++;
1930 		}
1931 	} else {
1932 		const mbfl_encoding **list = NULL;
1933 		size_t size = 0;
1934 		switch (Z_TYPE_PP(arg1)) {
1935 		case IS_ARRAY:
1936 			if (FAILURE == php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
1937 				if (list) {
1938 					efree(list);
1939 				}
1940 				RETURN_FALSE;
1941 			}
1942 			break;
1943 		default:
1944 			convert_to_string_ex(arg1);
1945 			if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
1946 				if (list) {
1947 					efree(list);
1948 				}
1949 				RETURN_FALSE;
1950 			}
1951 			break;
1952 		}
1953 
1954 		if (list == NULL) {
1955 			RETURN_FALSE;
1956 		}
1957 
1958 		if (MBSTRG(current_detect_order_list)) {
1959 			efree(MBSTRG(current_detect_order_list));
1960 		}
1961 		MBSTRG(current_detect_order_list) = list;
1962 		MBSTRG(current_detect_order_list_size) = size;
1963 		RETURN_TRUE;
1964 	}
1965 }
1966 /* }}} */
1967 
1968 /* {{{ proto mixed mb_substitute_character([mixed substchar])
1969    Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)1970 PHP_FUNCTION(mb_substitute_character)
1971 {
1972 	zval **arg1 = NULL;
1973 
1974 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1975 		return;
1976 	}
1977 
1978 	if (!arg1) {
1979 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
1980 			RETURN_STRING("none", 1);
1981 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1982 			RETURN_STRING("long", 1);
1983 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1984 			RETURN_STRING("entity", 1);
1985 		} else {
1986 			RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1987 		}
1988 	} else {
1989 		RETVAL_TRUE;
1990 
1991 		switch (Z_TYPE_PP(arg1)) {
1992 		case IS_STRING:
1993 			if (strncasecmp("none", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1994 				MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1995 			} else if (strncasecmp("long", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1996 				MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1997 			} else if (strncasecmp("entity", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1998 				MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1999 			} else {
2000 				convert_to_long_ex(arg1);
2001 
2002 				if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
2003 					MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2004 					MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
2005 				} else {
2006 					php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
2007 					RETURN_FALSE;
2008 				}
2009 			}
2010 			break;
2011 		default:
2012 			convert_to_long_ex(arg1);
2013 			if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
2014 				MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2015 				MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
2016 			} else {
2017 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
2018 				RETURN_FALSE;
2019 			}
2020 			break;
2021 		}
2022 	}
2023 }
2024 /* }}} */
2025 
2026 /* {{{ proto string mb_preferred_mime_name(string encoding)
2027    Return the preferred MIME name (charset) as a string */
PHP_FUNCTION(mb_preferred_mime_name)2028 PHP_FUNCTION(mb_preferred_mime_name)
2029 {
2030 	enum mbfl_no_encoding no_encoding;
2031 	char *name = NULL;
2032 	int name_len;
2033 
2034 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
2035 		return;
2036 	} else {
2037 		no_encoding = mbfl_name2no_encoding(name);
2038 		if (no_encoding == mbfl_no_encoding_invalid) {
2039 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
2040 			RETVAL_FALSE;
2041 		} else {
2042 			const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
2043 			if (preferred_name == NULL || *preferred_name == '\0') {
2044 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
2045 				RETVAL_FALSE;
2046 			} else {
2047 				RETVAL_STRING((char *)preferred_name, 1);
2048 			}
2049 		}
2050 	}
2051 }
2052 /* }}} */
2053 
2054 #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
2055 #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
2056 
2057 /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
2058    Parses GET/POST/COOKIE data and sets global variables */
PHP_FUNCTION(mb_parse_str)2059 PHP_FUNCTION(mb_parse_str)
2060 {
2061 	zval *track_vars_array = NULL;
2062 	char *encstr = NULL;
2063 	int encstr_len;
2064 	php_mb_encoding_handler_info_t info;
2065 	const mbfl_encoding *detected;
2066 
2067 	track_vars_array = NULL;
2068 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
2069 		return;
2070 	}
2071 
2072 	if (track_vars_array != NULL) {
2073 		/* Clear out the array */
2074 		zval_dtor(track_vars_array);
2075 		array_init(track_vars_array);
2076 	}
2077 
2078 	encstr = estrndup(encstr, encstr_len);
2079 
2080 	info.data_type              = PARSE_STRING;
2081 	info.separator              = PG(arg_separator).input;
2082 	info.report_errors          = 1;
2083 	info.to_encoding            = MBSTRG(current_internal_encoding);
2084 	info.to_language            = MBSTRG(language);
2085 	info.from_encodings         = MBSTRG(http_input_list);
2086 	info.num_from_encodings     = MBSTRG(http_input_list_size);
2087 	info.from_language          = MBSTRG(language);
2088 
2089 	if (track_vars_array != NULL) {
2090 		detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
2091 	} else {
2092 		zval tmp;
2093 		if (!EG(active_symbol_table)) {
2094 			zend_rebuild_symbol_table(TSRMLS_C);
2095 		}
2096 		Z_ARRVAL(tmp) = EG(active_symbol_table);
2097 		detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr TSRMLS_CC);
2098 	}
2099 
2100 	MBSTRG(http_input_identify) = detected;
2101 
2102 	RETVAL_BOOL(detected);
2103 
2104 	if (encstr != NULL) efree(encstr);
2105 }
2106 /* }}} */
2107 
2108 /* {{{ proto string mb_output_handler(string contents, int status)
2109    Returns string in output buffer converted to the http_output encoding */
PHP_FUNCTION(mb_output_handler)2110 PHP_FUNCTION(mb_output_handler)
2111 {
2112 	char *arg_string;
2113 	int arg_string_len;
2114 	long arg_status;
2115 	mbfl_string string, result;
2116 	const char *charset;
2117 	char *p;
2118 	const mbfl_encoding *encoding;
2119 	int last_feed, len;
2120 	unsigned char send_text_mimetype = 0;
2121 	char *s, *mimetype = NULL;
2122 
2123 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
2124 		return;
2125 	}
2126 
2127 	encoding = MBSTRG(current_http_output_encoding);
2128 
2129  	/* start phase only */
2130  	if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
2131  		/* delete the converter just in case. */
2132  		if (MBSTRG(outconv)) {
2133 			MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2134  			mbfl_buffer_converter_delete(MBSTRG(outconv));
2135  			MBSTRG(outconv) = NULL;
2136   		}
2137 		if (encoding == &mbfl_encoding_pass) {
2138 			RETURN_STRINGL(arg_string, arg_string_len, 1);
2139 		}
2140 
2141 		/* analyze mime type */
2142 		if (SG(sapi_headers).mimetype &&
2143 			_php_mb_match_regex(
2144 				MBSTRG(http_output_conv_mimetypes),
2145 				SG(sapi_headers).mimetype,
2146 				strlen(SG(sapi_headers).mimetype))) {
2147 			if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
2148 				mimetype = estrdup(SG(sapi_headers).mimetype);
2149 			} else {
2150 				mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
2151 			}
2152 			send_text_mimetype = 1;
2153 		} else if (SG(sapi_headers).send_default_content_type) {
2154 			mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
2155 		}
2156 
2157  		/* if content-type is not yet set, set it and activate the converter */
2158  		if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
2159 			charset = encoding->mime_name;
2160 			if (charset) {
2161 				len = spprintf( &p, 0, "Content-Type: %s; charset=%s",  mimetype, charset );
2162 				if (sapi_add_header(p, len, 0) != FAILURE) {
2163 					SG(sapi_headers).send_default_content_type = 0;
2164 				}
2165 			}
2166  			/* activate the converter */
2167  			MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0);
2168 			if (send_text_mimetype){
2169 				efree(mimetype);
2170 			}
2171  		}
2172   	}
2173 
2174  	/* just return if the converter is not activated. */
2175  	if (MBSTRG(outconv) == NULL) {
2176 		RETURN_STRINGL(arg_string, arg_string_len, 1);
2177 	}
2178 
2179  	/* flag */
2180  	last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
2181  	/* mode */
2182  	mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
2183  	mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
2184 
2185  	/* feed the string */
2186  	mbfl_string_init(&string);
2187 	/* these are not needed. convd has encoding info.
2188 	string.no_language = MBSTRG(language);
2189 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2190 	*/
2191  	string.val = (unsigned char *)arg_string;
2192  	string.len = arg_string_len;
2193  	mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2194  	if (last_feed) {
2195  		mbfl_buffer_converter_flush(MBSTRG(outconv));
2196 	}
2197  	/* get the converter output, and return it */
2198  	mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2199  	RETVAL_STRINGL((char *)result.val, result.len, 0);		/* the string is already strdup()'ed */
2200 
2201  	/* delete the converter if it is the last feed. */
2202  	if (last_feed) {
2203 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2204 		mbfl_buffer_converter_delete(MBSTRG(outconv));
2205 		MBSTRG(outconv) = NULL;
2206 	}
2207 }
2208 /* }}} */
2209 
2210 /* {{{ proto int mb_strlen(string str [, string encoding])
2211    Get character numbers of a string */
PHP_FUNCTION(mb_strlen)2212 PHP_FUNCTION(mb_strlen)
2213 {
2214 	int n;
2215 	mbfl_string string;
2216 	char *enc_name = NULL;
2217 	int enc_name_len;
2218 
2219 	mbfl_string_init(&string);
2220 
2221 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2222 		RETURN_FALSE;
2223 	}
2224 
2225 	string.no_language = MBSTRG(language);
2226 	if (enc_name == NULL) {
2227 		string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2228 	} else {
2229 		string.no_encoding = mbfl_name2no_encoding(enc_name);
2230 		if (string.no_encoding == mbfl_no_encoding_invalid) {
2231 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2232 			RETURN_FALSE;
2233 		}
2234 	}
2235 
2236 	n = mbfl_strlen(&string);
2237 	if (n >= 0) {
2238 		RETVAL_LONG(n);
2239 	} else {
2240 		RETVAL_FALSE;
2241 	}
2242 }
2243 /* }}} */
2244 
2245 /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2246    Find position of first occurrence of a string within another */
PHP_FUNCTION(mb_strpos)2247 PHP_FUNCTION(mb_strpos)
2248 {
2249 	int n, reverse = 0;
2250 	long offset;
2251 	mbfl_string haystack, needle;
2252 	char *enc_name = NULL;
2253 	int enc_name_len;
2254 
2255 	mbfl_string_init(&haystack);
2256 	mbfl_string_init(&needle);
2257 	haystack.no_language = MBSTRG(language);
2258 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2259 	needle.no_language = MBSTRG(language);
2260 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2261 	offset = 0;
2262 
2263 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2264 		RETURN_FALSE;
2265 	}
2266 
2267 	if (enc_name != NULL) {
2268 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2269 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2270 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2271 			RETURN_FALSE;
2272 		}
2273 	}
2274 
2275 	if (offset < 0 || offset > mbfl_strlen(&haystack)) {
2276 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
2277 		RETURN_FALSE;
2278 	}
2279 	if (needle.len == 0) {
2280 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2281 		RETURN_FALSE;
2282 	}
2283 
2284 	n = mbfl_strpos(&haystack, &needle, offset, reverse);
2285 	if (n >= 0) {
2286 		RETVAL_LONG(n);
2287 	} else {
2288 		switch (-n) {
2289 		case 1:
2290 			break;
2291 		case 2:
2292 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length");
2293 			break;
2294 		case 4:
2295 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error");
2296 			break;
2297 		case 8:
2298 			php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty");
2299 			break;
2300 		default:
2301 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos");
2302 			break;
2303 		}
2304 		RETVAL_FALSE;
2305 	}
2306 }
2307 /* }}} */
2308 
2309 /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2310    Find position of last occurrence of a string within another */
PHP_FUNCTION(mb_strrpos)2311 PHP_FUNCTION(mb_strrpos)
2312 {
2313 	int n;
2314 	mbfl_string haystack, needle;
2315 	char *enc_name = NULL;
2316 	int enc_name_len;
2317 	zval **zoffset = NULL;
2318 	long offset = 0, str_flg;
2319 	char *enc_name2 = NULL;
2320 	int enc_name_len2;
2321 
2322 	mbfl_string_init(&haystack);
2323 	mbfl_string_init(&needle);
2324 	haystack.no_language = MBSTRG(language);
2325 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2326 	needle.no_language = MBSTRG(language);
2327 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2328 
2329 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2330 		RETURN_FALSE;
2331 	}
2332 
2333 	if (zoffset) {
2334 		if (Z_TYPE_PP(zoffset) == IS_STRING) {
2335 			enc_name2     = Z_STRVAL_PP(zoffset);
2336 			enc_name_len2 = Z_STRLEN_PP(zoffset);
2337 			str_flg       = 1;
2338 
2339 			if (enc_name2 != NULL) {
2340 				switch (*enc_name2) {
2341 				case '0':
2342 				case '1':
2343 				case '2':
2344 				case '3':
2345 				case '4':
2346 				case '5':
2347 				case '6':
2348 				case '7':
2349 				case '8':
2350 				case '9':
2351 				case ' ':
2352 				case '-':
2353 				case '.':
2354 					break;
2355 				default :
2356 					str_flg = 0;
2357 					break;
2358 				}
2359 			}
2360 
2361 			if (str_flg) {
2362 				convert_to_long_ex(zoffset);
2363 				offset   = Z_LVAL_PP(zoffset);
2364 			} else {
2365 				enc_name     = enc_name2;
2366 				enc_name_len = enc_name_len2;
2367 			}
2368 		} else {
2369 			convert_to_long_ex(zoffset);
2370 			offset = Z_LVAL_PP(zoffset);
2371 		}
2372 	}
2373 
2374 	if (enc_name != NULL) {
2375 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2376 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2377 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2378 			RETURN_FALSE;
2379 		}
2380 	}
2381 
2382 	if (haystack.len <= 0) {
2383 		RETURN_FALSE;
2384 	}
2385 	if (needle.len <= 0) {
2386 		RETURN_FALSE;
2387 	}
2388 
2389 	{
2390 		int haystack_char_len = mbfl_strlen(&haystack);
2391 		if ((offset > 0 && offset > haystack_char_len) ||
2392 			(offset < 0 && -offset > haystack_char_len)) {
2393 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
2394 			RETURN_FALSE;
2395 		}
2396 	}
2397 
2398 	n = mbfl_strpos(&haystack, &needle, offset, 1);
2399 	if (n >= 0) {
2400 		RETVAL_LONG(n);
2401 	} else {
2402 		RETVAL_FALSE;
2403 	}
2404 }
2405 /* }}} */
2406 
2407 /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2408    Finds position of first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stripos)2409 PHP_FUNCTION(mb_stripos)
2410 {
2411 	int n;
2412 	long offset;
2413 	mbfl_string haystack, needle;
2414 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2415 	int from_encoding_len;
2416 	n = -1;
2417 	offset = 0;
2418 
2419 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2420 		RETURN_FALSE;
2421 	}
2422 	if (needle.len == 0) {
2423 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2424 		RETURN_FALSE;
2425 	}
2426 	n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2427 
2428 	if (n >= 0) {
2429 		RETVAL_LONG(n);
2430 	} else {
2431 		RETVAL_FALSE;
2432 	}
2433 }
2434 /* }}} */
2435 
2436 /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2437    Finds position of last occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_strripos)2438 PHP_FUNCTION(mb_strripos)
2439 {
2440 	int n;
2441 	long offset;
2442 	mbfl_string haystack, needle;
2443 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2444 	int from_encoding_len;
2445 	n = -1;
2446 	offset = 0;
2447 
2448 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2449 		RETURN_FALSE;
2450 	}
2451 
2452 	n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2453 
2454 	if (n >= 0) {
2455 		RETVAL_LONG(n);
2456 	} else {
2457 		RETVAL_FALSE;
2458 	}
2459 }
2460 /* }}} */
2461 
2462 /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2463    Finds first occurrence of a string within another */
PHP_FUNCTION(mb_strstr)2464 PHP_FUNCTION(mb_strstr)
2465 {
2466 	int n, len, mblen;
2467 	mbfl_string haystack, needle, result, *ret = NULL;
2468 	char *enc_name = NULL;
2469 	int enc_name_len;
2470 	zend_bool part = 0;
2471 
2472 	mbfl_string_init(&haystack);
2473 	mbfl_string_init(&needle);
2474 	haystack.no_language = MBSTRG(language);
2475 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2476 	needle.no_language = MBSTRG(language);
2477 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2478 
2479 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2480 		RETURN_FALSE;
2481 	}
2482 
2483 	if (enc_name != NULL) {
2484 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2485 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2486 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2487 			RETURN_FALSE;
2488 		}
2489 	}
2490 
2491 	if (needle.len <= 0) {
2492 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2493 		RETURN_FALSE;
2494 	}
2495 	n = mbfl_strpos(&haystack, &needle, 0, 0);
2496 	if (n >= 0) {
2497 		mblen = mbfl_strlen(&haystack);
2498 		if (part) {
2499 			ret = mbfl_substr(&haystack, &result, 0, n);
2500 			if (ret != NULL) {
2501 				RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2502 			} else {
2503 				RETVAL_FALSE;
2504 			}
2505 		} else {
2506 			len = (mblen - n);
2507 			ret = mbfl_substr(&haystack, &result, n, len);
2508 			if (ret != NULL) {
2509 				RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2510 			} else {
2511 				RETVAL_FALSE;
2512 			}
2513 		}
2514 	} else {
2515 		RETVAL_FALSE;
2516 	}
2517 }
2518 /* }}} */
2519 
2520 /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2521    Finds the last occurrence of a character in a string within another */
PHP_FUNCTION(mb_strrchr)2522 PHP_FUNCTION(mb_strrchr)
2523 {
2524 	int n, len, mblen;
2525 	mbfl_string haystack, needle, result, *ret = NULL;
2526 	char *enc_name = NULL;
2527 	int enc_name_len;
2528 	zend_bool part = 0;
2529 
2530 	mbfl_string_init(&haystack);
2531 	mbfl_string_init(&needle);
2532 	haystack.no_language = MBSTRG(language);
2533 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2534 	needle.no_language = MBSTRG(language);
2535 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2536 
2537 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2538 		RETURN_FALSE;
2539 	}
2540 
2541 	if (enc_name != NULL) {
2542 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2543 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2544 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2545 			RETURN_FALSE;
2546 		}
2547 	}
2548 
2549 	if (haystack.len <= 0) {
2550 		RETURN_FALSE;
2551 	}
2552 	if (needle.len <= 0) {
2553 		RETURN_FALSE;
2554 	}
2555 	n = mbfl_strpos(&haystack, &needle, 0, 1);
2556 	if (n >= 0) {
2557 		mblen = mbfl_strlen(&haystack);
2558 		if (part) {
2559 			ret = mbfl_substr(&haystack, &result, 0, n);
2560 			if (ret != NULL) {
2561 				RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2562 			} else {
2563 				RETVAL_FALSE;
2564 			}
2565 		} else {
2566 			len = (mblen - n);
2567 			ret = mbfl_substr(&haystack, &result, n, len);
2568 			if (ret != NULL) {
2569 				RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2570 			} else {
2571 				RETVAL_FALSE;
2572 			}
2573 		}
2574 	} else {
2575 		RETVAL_FALSE;
2576 	}
2577 }
2578 /* }}} */
2579 
2580 /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2581    Finds first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stristr)2582 PHP_FUNCTION(mb_stristr)
2583 {
2584 	zend_bool part = 0;
2585 	unsigned int from_encoding_len, len, mblen;
2586 	int n;
2587 	mbfl_string haystack, needle, result, *ret = NULL;
2588 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2589 	mbfl_string_init(&haystack);
2590 	mbfl_string_init(&needle);
2591 	haystack.no_language = MBSTRG(language);
2592 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2593 	needle.no_language = MBSTRG(language);
2594 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2595 
2596 
2597 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2598 		RETURN_FALSE;
2599 	}
2600 
2601 	if (!needle.len) {
2602 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2603 		RETURN_FALSE;
2604 	}
2605 
2606 	haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2607 	if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2608 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2609 		RETURN_FALSE;
2610 	}
2611 
2612 	n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2613 
2614 	if (n <0) {
2615 		RETURN_FALSE;
2616 	}
2617 
2618 	mblen = mbfl_strlen(&haystack);
2619 
2620 	if (part) {
2621 		ret = mbfl_substr(&haystack, &result, 0, n);
2622 		if (ret != NULL) {
2623 			RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2624 		} else {
2625 			RETVAL_FALSE;
2626 		}
2627 	} else {
2628 		len = (mblen - n);
2629 		ret = mbfl_substr(&haystack, &result, n, len);
2630 		if (ret != NULL) {
2631 			RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2632 		} else {
2633 			RETVAL_FALSE;
2634 		}
2635 	}
2636 }
2637 /* }}} */
2638 
2639 /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2640    Finds the last occurrence of a character in a string within another, case insensitive */
PHP_FUNCTION(mb_strrichr)2641 PHP_FUNCTION(mb_strrichr)
2642 {
2643 	zend_bool part = 0;
2644 	int n, from_encoding_len, len, mblen;
2645 	mbfl_string haystack, needle, result, *ret = NULL;
2646 	const char *from_encoding = MBSTRG(current_internal_encoding)->name;
2647 	mbfl_string_init(&haystack);
2648 	mbfl_string_init(&needle);
2649 	haystack.no_language = MBSTRG(language);
2650 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2651 	needle.no_language = MBSTRG(language);
2652 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2653 
2654 
2655 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2656 		RETURN_FALSE;
2657 	}
2658 
2659 	haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2660 	if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2661 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2662 		RETURN_FALSE;
2663 	}
2664 
2665 	n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2666 
2667 	if (n <0) {
2668 		RETURN_FALSE;
2669 	}
2670 
2671 	mblen = mbfl_strlen(&haystack);
2672 
2673 	if (part) {
2674 		ret = mbfl_substr(&haystack, &result, 0, n);
2675 		if (ret != NULL) {
2676 			RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2677 		} else {
2678 			RETVAL_FALSE;
2679 		}
2680 	} else {
2681 		len = (mblen - n);
2682 		ret = mbfl_substr(&haystack, &result, n, len);
2683 		if (ret != NULL) {
2684 			RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2685 		} else {
2686 			RETVAL_FALSE;
2687 		}
2688 	}
2689 }
2690 /* }}} */
2691 
2692 /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2693    Count the number of substring occurrences */
PHP_FUNCTION(mb_substr_count)2694 PHP_FUNCTION(mb_substr_count)
2695 {
2696 	int n;
2697 	mbfl_string haystack, needle;
2698 	char *enc_name = NULL;
2699 	int enc_name_len;
2700 
2701 	mbfl_string_init(&haystack);
2702 	mbfl_string_init(&needle);
2703 	haystack.no_language = MBSTRG(language);
2704 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2705 	needle.no_language = MBSTRG(language);
2706 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2707 
2708 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
2709 		return;
2710 	}
2711 
2712 	if (enc_name != NULL) {
2713 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2714 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2715 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2716 			RETURN_FALSE;
2717 		}
2718 	}
2719 
2720 	if (needle.len <= 0) {
2721 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring");
2722 		RETURN_FALSE;
2723 	}
2724 
2725 	n = mbfl_substr_count(&haystack, &needle);
2726 	if (n >= 0) {
2727 		RETVAL_LONG(n);
2728 	} else {
2729 		RETVAL_FALSE;
2730 	}
2731 }
2732 /* }}} */
2733 
2734 /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2735    Returns part of a string */
PHP_FUNCTION(mb_substr)2736 PHP_FUNCTION(mb_substr)
2737 {
2738 	size_t argc = ZEND_NUM_ARGS();
2739 	char *str, *encoding;
2740 	long from, len;
2741 	int mblen, str_len, encoding_len;
2742 	zval **z_len = NULL;
2743 	mbfl_string string, result, *ret;
2744 
2745 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|Zs", &str, &str_len, &from, &z_len, &encoding, &encoding_len) == FAILURE) {
2746 		return;
2747 	}
2748 
2749 	mbfl_string_init(&string);
2750 	string.no_language = MBSTRG(language);
2751 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2752 
2753 	if (argc == 4) {
2754 		string.no_encoding = mbfl_name2no_encoding(encoding);
2755 		if (string.no_encoding == mbfl_no_encoding_invalid) {
2756 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2757 			RETURN_FALSE;
2758 		}
2759 	}
2760 
2761 	string.val = (unsigned char *)str;
2762 	string.len = str_len;
2763 
2764 	if (argc < 3 || Z_TYPE_PP(z_len) == IS_NULL) {
2765 		len = str_len;
2766 	} else {
2767 		convert_to_long_ex(z_len);
2768 		len = Z_LVAL_PP(z_len);
2769 	}
2770 
2771 	/* measures length */
2772 	mblen = 0;
2773 	if (from < 0 || len < 0) {
2774 		mblen = mbfl_strlen(&string);
2775 	}
2776 
2777 	/* if "from" position is negative, count start position from the end
2778 	 * of the string
2779 	 */
2780 	if (from < 0) {
2781 		from = mblen + from;
2782 		if (from < 0) {
2783 			from = 0;
2784 		}
2785 	}
2786 
2787 	/* if "length" position is negative, set it to the length
2788 	 * needed to stop that many chars from the end of the string
2789 	 */
2790 	if (len < 0) {
2791 		len = (mblen - from) + len;
2792 		if (len < 0) {
2793 			len = 0;
2794 		}
2795 	}
2796 
2797 	if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2798 		&& (from >= mbfl_strlen(&string))) {
2799 		RETURN_FALSE;
2800 	}
2801 
2802 	if (from > INT_MAX) {
2803 		from = INT_MAX;
2804 	}
2805 	if (len > INT_MAX) {
2806 		len = INT_MAX;
2807 	}
2808 
2809 	ret = mbfl_substr(&string, &result, from, len);
2810 	if (NULL == ret) {
2811 		RETURN_FALSE;
2812 	}
2813 
2814 	RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2815 }
2816 /* }}} */
2817 
2818 /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
2819    Returns part of a string */
PHP_FUNCTION(mb_strcut)2820 PHP_FUNCTION(mb_strcut)
2821 {
2822 	size_t argc = ZEND_NUM_ARGS();
2823 	char *encoding;
2824 	long from, len;
2825 	int encoding_len;
2826 	zval **z_len = NULL;
2827 	mbfl_string string, result, *ret;
2828 
2829 	mbfl_string_init(&string);
2830 	string.no_language = MBSTRG(language);
2831 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2832 
2833 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|Zs", (char **)&string.val, (int **)&string.len, &from, &z_len, &encoding, &encoding_len) == FAILURE) {
2834 		return;
2835 	}
2836 
2837 	if (argc == 4) {
2838 		string.no_encoding = mbfl_name2no_encoding(encoding);
2839 		if (string.no_encoding == mbfl_no_encoding_invalid) {
2840 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2841 			RETURN_FALSE;
2842 		}
2843 	}
2844 
2845 	if (argc < 3 || Z_TYPE_PP(z_len) == IS_NULL) {
2846 		len = string.len;
2847 	} else {
2848 		convert_to_long_ex(z_len);
2849 		len = Z_LVAL_PP(z_len);
2850 	}
2851 
2852 	/* if "from" position is negative, count start position from the end
2853 	 * of the string
2854 	 */
2855 	if (from < 0) {
2856 		from = string.len + from;
2857 		if (from < 0) {
2858 			from = 0;
2859 		}
2860 	}
2861 
2862 	/* if "length" position is negative, set it to the length
2863 	 * needed to stop that many chars from the end of the string
2864 	 */
2865 	if (len < 0) {
2866 		len = (string.len - from) + len;
2867 		if (len < 0) {
2868 			len = 0;
2869 		}
2870 	}
2871 
2872 	if ((unsigned int)from > string.len) {
2873 		RETURN_FALSE;
2874 	}
2875 
2876 	ret = mbfl_strcut(&string, &result, from, len);
2877 	if (ret == NULL) {
2878 		RETURN_FALSE;
2879 	}
2880 
2881 	RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2882 }
2883 /* }}} */
2884 
2885 /* {{{ proto int mb_strwidth(string str [, string encoding])
2886    Gets terminal width of a string */
PHP_FUNCTION(mb_strwidth)2887 PHP_FUNCTION(mb_strwidth)
2888 {
2889 	int n;
2890 	mbfl_string string;
2891 	char *enc_name = NULL;
2892 	int enc_name_len;
2893 
2894 	mbfl_string_init(&string);
2895 
2896 	string.no_language = MBSTRG(language);
2897 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2898 
2899 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2900 		return;
2901 	}
2902 
2903 	if (enc_name != NULL) {
2904 		string.no_encoding = mbfl_name2no_encoding(enc_name);
2905 		if (string.no_encoding == mbfl_no_encoding_invalid) {
2906 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2907 			RETURN_FALSE;
2908 		}
2909 	}
2910 
2911 	n = mbfl_strwidth(&string);
2912 	if (n >= 0) {
2913 		RETVAL_LONG(n);
2914 	} else {
2915 		RETVAL_FALSE;
2916 	}
2917 }
2918 /* }}} */
2919 
2920 /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
2921    Trim the string in terminal width */
PHP_FUNCTION(mb_strimwidth)2922 PHP_FUNCTION(mb_strimwidth)
2923 {
2924 	char *str, *trimmarker, *encoding;
2925 	long from, width;
2926 	int str_len, trimmarker_len, encoding_len;
2927 	mbfl_string string, result, marker, *ret;
2928 
2929 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
2930 		return;
2931 	}
2932 
2933 	mbfl_string_init(&string);
2934 	mbfl_string_init(&marker);
2935 	string.no_language = MBSTRG(language);
2936 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2937 	marker.no_language = MBSTRG(language);
2938 	marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2939 	marker.val = NULL;
2940 	marker.len = 0;
2941 
2942 	if (ZEND_NUM_ARGS() == 5) {
2943 		string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
2944 		if (string.no_encoding == mbfl_no_encoding_invalid) {
2945 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2946 			RETURN_FALSE;
2947 		}
2948 	}
2949 
2950 	string.val = (unsigned char *)str;
2951 	string.len = str_len;
2952 
2953 	if (from < 0 || from > str_len) {
2954 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of range");
2955 		RETURN_FALSE;
2956 	}
2957 
2958 	if (width < 0) {
2959 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value");
2960 		RETURN_FALSE;
2961 	}
2962 
2963 	if (ZEND_NUM_ARGS() >= 4) {
2964 		marker.val = (unsigned char *)trimmarker;
2965 		marker.len = trimmarker_len;
2966 	}
2967 
2968 	ret = mbfl_strimwidth(&string, &marker, &result, from, width);
2969 
2970 	if (ret == NULL) {
2971 		RETURN_FALSE;
2972 	}
2973 
2974 	RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2975 }
2976 /* }}} */
2977 
2978 /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
php_mb_convert_encoding(const char * input,size_t length,const char * _to_encoding,const char * _from_encodings,size_t * output_len TSRMLS_DC)2979 MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC)
2980 {
2981 	mbfl_string string, result, *ret;
2982 	const mbfl_encoding *from_encoding, *to_encoding;
2983 	mbfl_buffer_converter *convd;
2984 	size_t size;
2985 	const mbfl_encoding **list;
2986 	char *output=NULL;
2987 
2988 	if (output_len) {
2989 		*output_len = 0;
2990 	}
2991 	if (!input) {
2992 		return NULL;
2993 	}
2994 	/* new encoding */
2995 	if (_to_encoding && strlen(_to_encoding)) {
2996 		to_encoding = mbfl_name2encoding(_to_encoding);
2997 		if (!to_encoding) {
2998 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
2999 			return NULL;
3000 		}
3001 	} else {
3002 		to_encoding = MBSTRG(current_internal_encoding);
3003 	}
3004 
3005 	/* initialize string */
3006 	mbfl_string_init(&string);
3007 	mbfl_string_init(&result);
3008 	from_encoding = MBSTRG(current_internal_encoding);
3009 	string.no_encoding = from_encoding->no_encoding;
3010 	string.no_language = MBSTRG(language);
3011 	string.val = (unsigned char *)input;
3012 	string.len = length;
3013 
3014 	/* pre-conversion encoding */
3015 	if (_from_encodings) {
3016 		list = NULL;
3017 		size = 0;
3018 		php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
3019 		if (size == 1) {
3020 			from_encoding = *list;
3021 			string.no_encoding = from_encoding->no_encoding;
3022 		} else if (size > 1) {
3023 			/* auto detect */
3024 			from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection));
3025 			if (from_encoding) {
3026 				string.no_encoding = from_encoding->no_encoding;
3027 			} else {
3028 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
3029 				from_encoding = &mbfl_encoding_pass;
3030 				to_encoding = from_encoding;
3031 				string.no_encoding = from_encoding->no_encoding;
3032 			}
3033 		} else {
3034 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
3035 		}
3036 		if (list != NULL) {
3037 			efree((void *)list);
3038 		}
3039 	}
3040 
3041 	/* initialize converter */
3042 	convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len);
3043 	if (convd == NULL) {
3044 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
3045 		return NULL;
3046 	}
3047 	mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3048 	mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3049 
3050 	/* do it */
3051 	ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3052 	if (ret) {
3053 		if (output_len) {
3054 			*output_len = ret->len;
3055 		}
3056 		output = (char *)ret->val;
3057 	}
3058 
3059 	MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3060 	mbfl_buffer_converter_delete(convd);
3061 	return output;
3062 }
3063 /* }}} */
3064 
3065 /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
3066    Returns converted string in desired encoding */
PHP_FUNCTION(mb_convert_encoding)3067 PHP_FUNCTION(mb_convert_encoding)
3068 {
3069 	char *arg_str, *arg_new;
3070 	int str_len, new_len;
3071 	zval *arg_old;
3072 	int i;
3073 	size_t size, l, n;
3074 	char *_from_encodings = NULL, *ret, *s_free = NULL;
3075 
3076 	zval **hash_entry;
3077 	HashTable *target_hash;
3078 
3079 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
3080 		return;
3081 	}
3082 
3083 	if (ZEND_NUM_ARGS() == 3) {
3084 		switch (Z_TYPE_P(arg_old)) {
3085 		case IS_ARRAY:
3086 			target_hash = Z_ARRVAL_P(arg_old);
3087 			zend_hash_internal_pointer_reset(target_hash);
3088 			i = zend_hash_num_elements(target_hash);
3089 			_from_encodings = NULL;
3090 
3091 			while (i > 0) {
3092 				if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
3093 					break;
3094 				}
3095 
3096 				convert_to_string_ex(hash_entry);
3097 
3098 				if ( _from_encodings) {
3099 					l = strlen(_from_encodings);
3100 					n = strlen(Z_STRVAL_PP(hash_entry));
3101 					_from_encodings = erealloc(_from_encodings, l+n+2);
3102 					strcpy(_from_encodings+l, ",");
3103 					strcpy(_from_encodings+l+1, Z_STRVAL_PP(hash_entry));
3104 				} else {
3105 					_from_encodings = estrdup(Z_STRVAL_PP(hash_entry));
3106 				}
3107 
3108 				zend_hash_move_forward(target_hash);
3109 				i--;
3110 			}
3111 
3112 			if (_from_encodings != NULL && !strlen(_from_encodings)) {
3113 				efree(_from_encodings);
3114 				_from_encodings = NULL;
3115 			}
3116 			s_free = _from_encodings;
3117 			break;
3118 		default:
3119 			convert_to_string(arg_old);
3120 			_from_encodings = Z_STRVAL_P(arg_old);
3121 			break;
3122 		}
3123 	}
3124 
3125 	/* new encoding */
3126 	ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size TSRMLS_CC);
3127 	if (ret != NULL) {
3128 		RETVAL_STRINGL(ret, size, 0);		/* the string is already strdup()'ed */
3129 	} else {
3130 		RETVAL_FALSE;
3131 	}
3132 
3133 	if ( s_free) {
3134 		efree(s_free);
3135 	}
3136 }
3137 /* }}} */
3138 
3139 /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
3140    Returns a case-folded version of sourcestring */
PHP_FUNCTION(mb_convert_case)3141 PHP_FUNCTION(mb_convert_case)
3142 {
3143 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3144 	char *str;
3145 	int str_len, from_encoding_len;
3146 	long case_mode = 0;
3147 	char *newstr;
3148 	size_t ret_len;
3149 
3150 	RETVAL_FALSE;
3151 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len,
3152 				&case_mode, &from_encoding, &from_encoding_len) == FAILURE)
3153 		RETURN_FALSE;
3154 
3155 	newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3156 
3157 	if (newstr) {
3158 		RETVAL_STRINGL(newstr, ret_len, 0);
3159 	}
3160 }
3161 /* }}} */
3162 
3163 /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
3164  *  Returns a uppercased version of sourcestring
3165  */
PHP_FUNCTION(mb_strtoupper)3166 PHP_FUNCTION(mb_strtoupper)
3167 {
3168 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3169 	char *str;
3170 	int str_len, from_encoding_len;
3171 	char *newstr;
3172 	size_t ret_len;
3173 
3174 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
3175 				&from_encoding, &from_encoding_len) == FAILURE) {
3176 		return;
3177 	}
3178 	newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3179 
3180 	if (newstr) {
3181 		RETURN_STRINGL(newstr, ret_len, 0);
3182 	}
3183 	RETURN_FALSE;
3184 }
3185 /* }}} */
3186 
3187 /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
3188  *  Returns a lowercased version of sourcestring
3189  */
PHP_FUNCTION(mb_strtolower)3190 PHP_FUNCTION(mb_strtolower)
3191 {
3192 	const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3193 	char *str;
3194 	int str_len, from_encoding_len;
3195 	char *newstr;
3196 	size_t ret_len;
3197 
3198 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
3199 				&from_encoding, &from_encoding_len) == FAILURE) {
3200 		return;
3201 	}
3202 	newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3203 
3204 	if (newstr) {
3205 		RETURN_STRINGL(newstr, ret_len, 0);
3206 	}
3207 	RETURN_FALSE;
3208 }
3209 /* }}} */
3210 
3211 /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3212    Encodings of the given string is returned (as a string) */
PHP_FUNCTION(mb_detect_encoding)3213 PHP_FUNCTION(mb_detect_encoding)
3214 {
3215 	char *str;
3216 	int str_len;
3217 	zend_bool strict=0;
3218 	zval *encoding_list;
3219 
3220 	mbfl_string string;
3221 	const mbfl_encoding *ret;
3222 	const mbfl_encoding **elist, **list;
3223 	size_t size;
3224 
3225 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3226 		return;
3227 	}
3228 
3229 	/* make encoding list */
3230 	list = NULL;
3231 	size = 0;
3232 	if (ZEND_NUM_ARGS() >= 2 && !ZVAL_IS_NULL(encoding_list)) {
3233 		switch (Z_TYPE_P(encoding_list)) {
3234 		case IS_ARRAY:
3235 			if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) {
3236 				if (list) {
3237 					efree(list);
3238 					list = NULL;
3239 					size = 0;
3240 				}
3241 			}
3242 			break;
3243 		default:
3244 			convert_to_string(encoding_list);
3245 			if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) {
3246 				if (list) {
3247 					efree(list);
3248 					list = NULL;
3249 					size = 0;
3250 				}
3251 			}
3252 			break;
3253 		}
3254 		if (size <= 0) {
3255 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument");
3256 		}
3257 	}
3258 
3259 	if (ZEND_NUM_ARGS() < 3) {
3260 		strict = (zend_bool)MBSTRG(strict_detection);
3261 	}
3262 
3263 	if (size > 0 && list != NULL) {
3264 		elist = list;
3265 	} else {
3266 		elist = MBSTRG(current_detect_order_list);
3267 		size = MBSTRG(current_detect_order_list_size);
3268 	}
3269 
3270 	mbfl_string_init(&string);
3271 	string.no_language = MBSTRG(language);
3272 	string.val = (unsigned char *)str;
3273 	string.len = str_len;
3274 	ret = mbfl_identify_encoding2(&string, elist, size, strict);
3275 
3276 	if (list != NULL) {
3277 		efree((void *)list);
3278 	}
3279 
3280 	if (ret == NULL) {
3281 		RETURN_FALSE;
3282 	}
3283 
3284 	RETVAL_STRING((char *)ret->name, 1);
3285 }
3286 /* }}} */
3287 
3288 /* {{{ proto mixed mb_list_encodings()
3289    Returns an array of all supported entity encodings */
PHP_FUNCTION(mb_list_encodings)3290 PHP_FUNCTION(mb_list_encodings)
3291 {
3292 	const mbfl_encoding **encodings;
3293 	const mbfl_encoding *encoding;
3294 	int i;
3295 
3296 	array_init(return_value);
3297 	i = 0;
3298 	encodings = mbfl_get_supported_encodings();
3299 	while ((encoding = encodings[i++]) != NULL) {
3300 		add_next_index_string(return_value, (char *) encoding->name, 1);
3301 	}
3302 }
3303 /* }}} */
3304 
3305 /* {{{ proto array mb_encoding_aliases(string encoding)
3306    Returns an array of the aliases of a given encoding name */
PHP_FUNCTION(mb_encoding_aliases)3307 PHP_FUNCTION(mb_encoding_aliases)
3308 {
3309 	const mbfl_encoding *encoding;
3310 	char *name = NULL;
3311 	int name_len;
3312 
3313 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
3314 		RETURN_FALSE;
3315 	}
3316 
3317 	encoding = mbfl_name2encoding(name);
3318 	if (!encoding) {
3319 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
3320 		RETURN_FALSE;
3321 	}
3322 
3323 	array_init(return_value);
3324 	if (encoding->aliases != NULL) {
3325 		const char **alias;
3326 		for (alias = *encoding->aliases; *alias; ++alias) {
3327 			add_next_index_string(return_value, (char *)*alias, 1);
3328 		}
3329 	}
3330 }
3331 /* }}} */
3332 
3333 /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3334    Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
PHP_FUNCTION(mb_encode_mimeheader)3335 PHP_FUNCTION(mb_encode_mimeheader)
3336 {
3337 	enum mbfl_no_encoding charset, transenc;
3338 	mbfl_string  string, result, *ret;
3339 	char *charset_name = NULL;
3340 	int charset_name_len;
3341 	char *trans_enc_name = NULL;
3342 	int trans_enc_name_len;
3343 	char *linefeed = "\r\n";
3344 	int linefeed_len;
3345 	long indent = 0;
3346 
3347 	mbfl_string_init(&string);
3348 	string.no_language = MBSTRG(language);
3349 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3350 
3351 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3352 		return;
3353 	}
3354 
3355 	charset = mbfl_no_encoding_pass;
3356 	transenc = mbfl_no_encoding_base64;
3357 
3358 	if (charset_name != NULL) {
3359 		charset = mbfl_name2no_encoding(charset_name);
3360 		if (charset == mbfl_no_encoding_invalid) {
3361 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3362 			RETURN_FALSE;
3363 		}
3364 	} else {
3365 		const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3366 		if (lang != NULL) {
3367 			charset = lang->mail_charset;
3368 			transenc = lang->mail_header_encoding;
3369 		}
3370 	}
3371 
3372 	if (trans_enc_name != NULL) {
3373 		if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3374 			transenc = mbfl_no_encoding_base64;
3375 		} else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3376 			transenc = mbfl_no_encoding_qprint;
3377 		}
3378 	}
3379 
3380 	mbfl_string_init(&result);
3381 	ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3382 	if (ret != NULL) {
3383 		RETVAL_STRINGL_CHECK((char *)ret->val, ret->len, 0);	/* the string is already strdup()'ed */
3384 	} else {
3385 		RETVAL_FALSE;
3386 	}
3387 }
3388 /* }}} */
3389 
3390 /* {{{ proto string mb_decode_mimeheader(string string)
3391    Decodes the MIME "encoded-word" in the string */
PHP_FUNCTION(mb_decode_mimeheader)3392 PHP_FUNCTION(mb_decode_mimeheader)
3393 {
3394 	mbfl_string string, result, *ret;
3395 
3396 	mbfl_string_init(&string);
3397 	string.no_language = MBSTRG(language);
3398 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3399 
3400 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
3401 		return;
3402 	}
3403 
3404 	mbfl_string_init(&result);
3405 	ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding);
3406 	if (ret != NULL) {
3407 		RETVAL_STRINGL((char *)ret->val, ret->len, 0);	/* the string is already strdup()'ed */
3408 	} else {
3409 		RETVAL_FALSE;
3410 	}
3411 }
3412 /* }}} */
3413 
3414 /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3415    Conversion between full-width character and half-width character (Japanese) */
PHP_FUNCTION(mb_convert_kana)3416 PHP_FUNCTION(mb_convert_kana)
3417 {
3418 	int opt, i;
3419 	mbfl_string string, result, *ret;
3420 	char *optstr = NULL;
3421 	int optstr_len;
3422 	char *encname = NULL;
3423 	int encname_len;
3424 
3425 	mbfl_string_init(&string);
3426 	string.no_language = MBSTRG(language);
3427 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3428 
3429 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
3430 		return;
3431 	}
3432 
3433 	/* option */
3434 	if (optstr != NULL) {
3435 		char *p = optstr;
3436 		int n = optstr_len;
3437 		i = 0;
3438 		opt = 0;
3439 		while (i < n) {
3440 			i++;
3441 			switch (*p++) {
3442 			case 'A':
3443 				opt |= 0x1;
3444 				break;
3445 			case 'a':
3446 				opt |= 0x10;
3447 				break;
3448 			case 'R':
3449 				opt |= 0x2;
3450 				break;
3451 			case 'r':
3452 				opt |= 0x20;
3453 				break;
3454 			case 'N':
3455 				opt |= 0x4;
3456 				break;
3457 			case 'n':
3458 				opt |= 0x40;
3459 				break;
3460 			case 'S':
3461 				opt |= 0x8;
3462 				break;
3463 			case 's':
3464 				opt |= 0x80;
3465 				break;
3466 			case 'K':
3467 				opt |= 0x100;
3468 				break;
3469 			case 'k':
3470 				opt |= 0x1000;
3471 				break;
3472 			case 'H':
3473 				opt |= 0x200;
3474 				break;
3475 			case 'h':
3476 				opt |= 0x2000;
3477 				break;
3478 			case 'V':
3479 				opt |= 0x800;
3480 				break;
3481 			case 'C':
3482 				opt |= 0x10000;
3483 				break;
3484 			case 'c':
3485 				opt |= 0x20000;
3486 				break;
3487 			case 'M':
3488 				opt |= 0x100000;
3489 				break;
3490 			case 'm':
3491 				opt |= 0x200000;
3492 				break;
3493 			}
3494 		}
3495 	} else {
3496 		opt = 0x900;
3497 	}
3498 
3499 	/* encoding */
3500 	if (encname != NULL) {
3501 		string.no_encoding = mbfl_name2no_encoding(encname);
3502 		if (string.no_encoding == mbfl_no_encoding_invalid) {
3503 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname);
3504 			RETURN_FALSE;
3505 		}
3506 	}
3507 
3508 	ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3509 	if (ret != NULL) {
3510 		RETVAL_STRINGL_CHECK((char *)ret->val, ret->len, 0);		/* the string is already strdup()'ed */
3511 	} else {
3512 		RETVAL_FALSE;
3513 	}
3514 }
3515 /* }}} */
3516 
3517 #define PHP_MBSTR_STACK_BLOCK_SIZE 32
3518 
3519 /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3520    Converts the string resource in variables to desired encoding */
PHP_FUNCTION(mb_convert_variables)3521 PHP_FUNCTION(mb_convert_variables)
3522 {
3523 	zval ***args, ***stack, **var, **hash_entry, **zfrom_enc;
3524 	HashTable *target_hash;
3525 	mbfl_string string, result, *ret;
3526 	const mbfl_encoding *from_encoding, *to_encoding;
3527 	mbfl_encoding_detector *identd;
3528 	mbfl_buffer_converter *convd;
3529 	int n, to_enc_len, argc, stack_level, stack_max;
3530 	size_t elistsz;
3531 	const mbfl_encoding **elist;
3532 	char *to_enc;
3533 	void *ptmp;
3534 	int recursion_error = 0;
3535 
3536 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3537 		return;
3538 	}
3539 
3540 	/* new encoding */
3541 	to_encoding = mbfl_name2encoding(to_enc);
3542 	if (!to_encoding) {
3543 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3544 		efree(args);
3545 		RETURN_FALSE;
3546 	}
3547 
3548 	/* initialize string */
3549 	mbfl_string_init(&string);
3550 	mbfl_string_init(&result);
3551 	from_encoding = MBSTRG(current_internal_encoding);
3552 	string.no_encoding = from_encoding->no_encoding;
3553 	string.no_language = MBSTRG(language);
3554 
3555 	/* pre-conversion encoding */
3556 	elist = NULL;
3557 	elistsz = 0;
3558 	switch (Z_TYPE_PP(zfrom_enc)) {
3559 	case IS_ARRAY:
3560 		php_mb_parse_encoding_array(*zfrom_enc, &elist, &elistsz, 0 TSRMLS_CC);
3561 		break;
3562 	default:
3563 		convert_to_string_ex(zfrom_enc);
3564 		php_mb_parse_encoding_list(Z_STRVAL_PP(zfrom_enc), Z_STRLEN_PP(zfrom_enc), &elist, &elistsz, 0 TSRMLS_CC);
3565 		break;
3566 	}
3567 	if (elistsz <= 0) {
3568 		from_encoding = &mbfl_encoding_pass;
3569 	} else if (elistsz == 1) {
3570 		from_encoding = *elist;
3571 	} else {
3572 		/* auto detect */
3573 		from_encoding = NULL;
3574 		stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3575 		stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3576 		stack_level = 0;
3577 		identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection));
3578 		if (identd != NULL) {
3579 			n = 0;
3580 			while (n < argc || stack_level > 0) {
3581 				if (stack_level <= 0) {
3582 					var = args[n++];
3583 					if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3584 						target_hash = HASH_OF(*var);
3585 						if (target_hash != NULL) {
3586 							zend_hash_internal_pointer_reset(target_hash);
3587 						}
3588 					}
3589 				} else {
3590 					stack_level--;
3591 					var = stack[stack_level];
3592 				}
3593 				if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3594 					target_hash = HASH_OF(*var);
3595 					if (target_hash != NULL) {
3596 						while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3597 							if (++target_hash->nApplyCount > 1) {
3598 								--target_hash->nApplyCount;
3599 								recursion_error = 1;
3600 								goto detect_end;
3601 							}
3602 							zend_hash_move_forward(target_hash);
3603 							if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3604 								if (stack_level >= stack_max) {
3605 									stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3606 									ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3607 									stack = (zval ***)ptmp;
3608 								}
3609 								stack[stack_level] = var;
3610 								stack_level++;
3611 								var = hash_entry;
3612 								target_hash = HASH_OF(*var);
3613 								if (target_hash != NULL) {
3614 									zend_hash_internal_pointer_reset(target_hash);
3615 									continue;
3616 								}
3617 							} else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3618 								string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3619 								string.len = Z_STRLEN_PP(hash_entry);
3620 								if (mbfl_encoding_detector_feed(identd, &string)) {
3621 									goto detect_end;		/* complete detecting */
3622 								}
3623 							}
3624 						}
3625 					}
3626 				} else if (Z_TYPE_PP(var) == IS_STRING) {
3627 					string.val = (unsigned char *)Z_STRVAL_PP(var);
3628 					string.len = Z_STRLEN_PP(var);
3629 					if (mbfl_encoding_detector_feed(identd, &string)) {
3630 						goto detect_end;		/* complete detecting */
3631 					}
3632 				}
3633 			}
3634 detect_end:
3635 			from_encoding = mbfl_encoding_detector_judge2(identd);
3636 			mbfl_encoding_detector_delete(identd);
3637 		}
3638 		if (recursion_error) {
3639 			while(stack_level-- && (var = stack[stack_level])) {
3640 				if (HASH_OF(*var)->nApplyCount > 1) {
3641 					HASH_OF(*var)->nApplyCount--;
3642 				}
3643 			}
3644 			efree(stack);
3645 			efree(args);
3646 			if (elist != NULL) {
3647 				efree((void *)elist);
3648 			}
3649 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot handle recursive references");
3650 			RETURN_FALSE;
3651 		}
3652 		efree(stack);
3653 
3654 		if (!from_encoding) {
3655 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
3656 			from_encoding = &mbfl_encoding_pass;
3657 		}
3658 	}
3659 	if (elist != NULL) {
3660 		efree((void *)elist);
3661 	}
3662 	/* create converter */
3663 	convd = NULL;
3664 	if (from_encoding != &mbfl_encoding_pass) {
3665 		convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0);
3666 		if (convd == NULL) {
3667 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
3668 			RETURN_FALSE;
3669 		}
3670 		mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3671 		mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3672 	}
3673 
3674 	/* convert */
3675 	if (convd != NULL) {
3676 		stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3677 		stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3678 		stack_level = 0;
3679 		n = 0;
3680 		while (n < argc || stack_level > 0) {
3681 			if (stack_level <= 0) {
3682 				var = args[n++];
3683 				if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3684 					target_hash = HASH_OF(*var);
3685 					if (target_hash != NULL) {
3686 						zend_hash_internal_pointer_reset(target_hash);
3687 					}
3688 				}
3689 			} else {
3690 				stack_level--;
3691 				var = stack[stack_level];
3692 			}
3693 			if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3694 				target_hash = HASH_OF(*var);
3695 				if (target_hash != NULL) {
3696 					while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3697 						zend_hash_move_forward(target_hash);
3698 						if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3699 							if (++(HASH_OF(*hash_entry)->nApplyCount) > 1) {
3700 								--(HASH_OF(*hash_entry)->nApplyCount);
3701 								recursion_error = 1;
3702 								goto conv_end;
3703 							}
3704 							if (stack_level >= stack_max) {
3705 								stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3706 								ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3707 								stack = (zval ***)ptmp;
3708 							}
3709 							stack[stack_level] = var;
3710 							stack_level++;
3711 							var = hash_entry;
3712 							SEPARATE_ZVAL_IF_NOT_REF(hash_entry);
3713 							target_hash = HASH_OF(*var);
3714 							if (target_hash != NULL) {
3715 								zend_hash_internal_pointer_reset(target_hash);
3716 								continue;
3717 							}
3718 						} else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3719 							string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3720 							string.len = Z_STRLEN_PP(hash_entry);
3721 							ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3722 							if (ret != NULL) {
3723 								if (Z_REFCOUNT_PP(hash_entry) > 1) {
3724 									Z_DELREF_PP(hash_entry);
3725 									MAKE_STD_ZVAL(*hash_entry);
3726 								} else {
3727 									zval_dtor(*hash_entry);
3728 								}
3729 								ZVAL_STRINGL(*hash_entry, (char *)ret->val, ret->len, 0);
3730 							}
3731 						}
3732 					}
3733 				}
3734 			} else if (Z_TYPE_PP(var) == IS_STRING) {
3735 				string.val = (unsigned char *)Z_STRVAL_PP(var);
3736 				string.len = Z_STRLEN_PP(var);
3737 				ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3738 				if (ret != NULL) {
3739 					zval_dtor(*var);
3740 					ZVAL_STRINGL(*var, (char *)ret->val, ret->len, 0);
3741 				}
3742 			}
3743 		}
3744 
3745 conv_end:
3746 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3747 		mbfl_buffer_converter_delete(convd);
3748 
3749 		if (recursion_error) {
3750 			while(stack_level-- && (var = stack[stack_level])) {
3751 				if (HASH_OF(*var)->nApplyCount > 1) {
3752 					HASH_OF(*var)->nApplyCount--;
3753 				}
3754 			}
3755 			efree(stack);
3756 			efree(args);
3757 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot handle recursive references");
3758 			RETURN_FALSE;
3759 		}
3760 		efree(stack);
3761 	}
3762 
3763 	efree(args);
3764 
3765 	if (from_encoding) {
3766 		RETURN_STRING(from_encoding->name, 1);
3767 	} else {
3768 		RETURN_FALSE;
3769 	}
3770 }
3771 /* }}} */
3772 
3773 /* {{{ HTML numeric entity */
3774 /* {{{ static void php_mb_numericentity_exec() */
3775 static void
php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS,int type)3776 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
3777 {
3778 	char *str, *encoding;
3779 	int str_len, encoding_len;
3780 	zval *zconvmap, **hash_entry;
3781 	HashTable *target_hash;
3782 	size_t argc = ZEND_NUM_ARGS();
3783 	int i, *convmap, *mapelm, mapsize=0;
3784 	zend_bool is_hex = 0;
3785 	mbfl_string string, result, *ret;
3786 	enum mbfl_no_encoding no_encoding;
3787 
3788 	if (zend_parse_parameters(argc TSRMLS_CC, "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
3789 		return;
3790 	}
3791 
3792 	mbfl_string_init(&string);
3793 	string.no_language = MBSTRG(language);
3794 	string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3795 	string.val = (unsigned char *)str;
3796 	string.len = str_len;
3797 
3798 	/* encoding */
3799 	if ((argc == 3 || argc == 4) && encoding_len > 0) {
3800 		no_encoding = mbfl_name2no_encoding(encoding);
3801 		if (no_encoding == mbfl_no_encoding_invalid) {
3802 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
3803 			RETURN_FALSE;
3804 		} else {
3805 			string.no_encoding = no_encoding;
3806 		}
3807 	}
3808 
3809 	if (argc == 4) {
3810 		if (type == 0 && is_hex) {
3811 			type = 2; /* output in hex format */
3812 		}
3813 	}
3814 
3815 	/* conversion map */
3816 	convmap = NULL;
3817 	if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
3818 		target_hash = Z_ARRVAL_P(zconvmap);
3819 		zend_hash_internal_pointer_reset(target_hash);
3820 		i = zend_hash_num_elements(target_hash);
3821 		if (i > 0) {
3822 			convmap = (int *)safe_emalloc(i, sizeof(int), 0);
3823 			mapelm = convmap;
3824 			mapsize = 0;
3825 			while (i > 0) {
3826 				if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
3827 					break;
3828 				}
3829 				convert_to_long_ex(hash_entry);
3830 				*mapelm++ = Z_LVAL_PP(hash_entry);
3831 				mapsize++;
3832 				i--;
3833 				zend_hash_move_forward(target_hash);
3834 			}
3835 		}
3836 	}
3837 	if (convmap == NULL) {
3838 		RETURN_FALSE;
3839 	}
3840 	mapsize /= 4;
3841 
3842 	ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
3843 	if (ret != NULL) {
3844 		RETVAL_STRINGL_CHECK((char *)ret->val, ret->len, 0);
3845 	} else {
3846 		RETVAL_FALSE;
3847 	}
3848 	efree((void *)convmap);
3849 }
3850 /* }}} */
3851 
3852 /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
3853    Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)3854 PHP_FUNCTION(mb_encode_numericentity)
3855 {
3856 	php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
3857 }
3858 /* }}} */
3859 
3860 /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
3861    Converts HTML numeric entities to character code */
PHP_FUNCTION(mb_decode_numericentity)3862 PHP_FUNCTION(mb_decode_numericentity)
3863 {
3864 	php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
3865 }
3866 /* }}} */
3867 /* }}} */
3868 
3869 /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
3870  *  Sends an email message with MIME scheme
3871  */
3872 
3873 #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos)										\
3874 	if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) {	\
3875 		pos += 2;											\
3876 		while (str[pos + 1] == ' ' || str[pos + 1] == '\t') {							\
3877 			pos++;											\
3878 		}												\
3879 		continue;											\
3880 	}
3881 
3882 #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len)			\
3883 	pp = str;					\
3884 	ee = pp + len;					\
3885 	while ((pp = memchr(pp, '\0', (ee - pp)))) {	\
3886 		*pp = ' ';				\
3887 	}						\
3888 
3889 #define APPEND_ONE_CHAR(ch) do { \
3890 	if (token.a > 0) { \
3891 		smart_str_appendc(&token, ch); \
3892 	} else {\
3893 		token.len++; \
3894 	} \
3895 } while (0)
3896 
3897 #define SEPARATE_SMART_STR(str) do {\
3898 	if ((str)->a == 0) { \
3899 		char *tmp_ptr; \
3900 		(str)->a = 1; \
3901 		while ((str)->a < (str)->len) { \
3902 			(str)->a <<= 1; \
3903 		} \
3904 		tmp_ptr = emalloc((str)->a + 1); \
3905 		memcpy(tmp_ptr, (str)->c, (str)->len); \
3906 		(str)->c = tmp_ptr; \
3907 	} \
3908 } while (0)
3909 
my_smart_str_dtor(smart_str * s)3910 static void my_smart_str_dtor(smart_str *s)
3911 {
3912 	if (s->a > 0) {
3913 		smart_str_free(s);
3914 	}
3915 }
3916 
_php_mbstr_parse_mail_headers(HashTable * ht,const char * str,size_t str_len)3917 static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
3918 {
3919 	const char *ps;
3920 	size_t icnt;
3921 	int state = 0;
3922 	int crlf_state = -1;
3923 
3924 	smart_str token = { 0, 0, 0 };
3925 	smart_str fld_name = { 0, 0, 0 }, fld_val = { 0, 0, 0 };
3926 
3927 	ps = str;
3928 	icnt = str_len;
3929 
3930 	/*
3931 	 *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
3932 	 *             ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
3933 	 *      state  0            1           2          3
3934 	 *
3935 	 *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
3936 	 *             ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
3937 	 * crlf_state -1                       0                     1 -1
3938 	 *
3939 	 */
3940 
3941 	while (icnt > 0) {
3942 		switch (*ps) {
3943 			case ':':
3944 				if (crlf_state == 1) {
3945 					APPEND_ONE_CHAR('\r');
3946 				}
3947 
3948 				if (state == 0 || state == 1) {
3949 					fld_name = token;
3950 
3951 					state = 2;
3952 				} else {
3953 					APPEND_ONE_CHAR(*ps);
3954 				}
3955 
3956 				crlf_state = 0;
3957 				break;
3958 
3959 			case '\n':
3960 				if (crlf_state == -1) {
3961 					goto out;
3962 				}
3963 				crlf_state = -1;
3964 				break;
3965 
3966 			case '\r':
3967 				if (crlf_state == 1) {
3968 					APPEND_ONE_CHAR('\r');
3969 				} else {
3970 					crlf_state = 1;
3971 				}
3972 				break;
3973 
3974 			case ' ': case '\t':
3975 				if (crlf_state == -1) {
3976 					if (state == 3) {
3977 						/* continuing from the previous line */
3978 						SEPARATE_SMART_STR(&token);
3979 						state = 4;
3980 					} else {
3981 						/* simply skipping this new line */
3982 						state = 5;
3983 					}
3984 				} else {
3985 					if (crlf_state == 1) {
3986 						APPEND_ONE_CHAR('\r');
3987 					}
3988 					if (state == 1 || state == 3) {
3989 						APPEND_ONE_CHAR(*ps);
3990 					}
3991 				}
3992 				crlf_state = 0;
3993 				break;
3994 
3995 			default:
3996 				switch (state) {
3997 					case 0:
3998 						token.c = (char *)ps;
3999 						token.len = 0;
4000 						token.a = 0;
4001 						state = 1;
4002 						break;
4003 
4004 					case 2:
4005 						if (crlf_state != -1) {
4006 							token.c = (char *)ps;
4007 							token.len = 0;
4008 							token.a = 0;
4009 
4010 							state = 3;
4011 							break;
4012 						}
4013 						/* break is missing intentionally */
4014 
4015 					case 3:
4016 						if (crlf_state == -1) {
4017 							fld_val = token;
4018 
4019 							if (fld_name.c != NULL && fld_val.c != NULL) {
4020 								char *dummy;
4021 
4022 								/* FIXME: some locale free implementation is
4023 								 * really required here,,, */
4024 								SEPARATE_SMART_STR(&fld_name);
4025 								php_strtoupper(fld_name.c, fld_name.len);
4026 
4027 								zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
4028 
4029 								my_smart_str_dtor(&fld_name);
4030 							}
4031 
4032 							memset(&fld_name, 0, sizeof(smart_str));
4033 							memset(&fld_val, 0, sizeof(smart_str));
4034 
4035 							token.c = (char *)ps;
4036 							token.len = 0;
4037 							token.a = 0;
4038 
4039 							state = 1;
4040 						}
4041 						break;
4042 
4043 					case 4:
4044 						APPEND_ONE_CHAR(' ');
4045 						state = 3;
4046 						break;
4047 				}
4048 
4049 				if (crlf_state == 1) {
4050 					APPEND_ONE_CHAR('\r');
4051 				}
4052 
4053 				APPEND_ONE_CHAR(*ps);
4054 
4055 				crlf_state = 0;
4056 				break;
4057 		}
4058 		ps++, icnt--;
4059 	}
4060 out:
4061 	if (state == 2) {
4062 		token.c = "";
4063 		token.len = 0;
4064 		token.a = 0;
4065 
4066 		state = 3;
4067 	}
4068 	if (state == 3) {
4069 		fld_val = token;
4070 
4071 		if (fld_name.c != NULL && fld_val.c != NULL) {
4072 			void *dummy;
4073 
4074 			/* FIXME: some locale free implementation is
4075 			 * really required here,,, */
4076 			SEPARATE_SMART_STR(&fld_name);
4077 			php_strtoupper(fld_name.c, fld_name.len);
4078 
4079 			zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
4080 
4081 			my_smart_str_dtor(&fld_name);
4082 		}
4083 	}
4084 	return state;
4085 }
4086 
PHP_FUNCTION(mb_send_mail)4087 PHP_FUNCTION(mb_send_mail)
4088 {
4089 	int n;
4090 	char *to = NULL;
4091 	int to_len;
4092 	char *message = NULL;
4093 	int message_len;
4094 	char *headers = NULL;
4095 	int headers_len;
4096 	char *subject = NULL;
4097 	int subject_len;
4098 	char *extra_cmd = NULL;
4099 	int extra_cmd_len;
4100 	int i;
4101 	char *to_r = NULL;
4102 	char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
4103 	struct {
4104 		int cnt_type:1;
4105 		int cnt_trans_enc:1;
4106 	} suppressed_hdrs = { 0, 0 };
4107 
4108 	char *message_buf = NULL, *subject_buf = NULL, *p;
4109 	mbfl_string orig_str, conv_str;
4110 	mbfl_string *pstr;	/* pointer to mbfl string for return value */
4111 	enum mbfl_no_encoding
4112 		tran_cs,	/* transfar text charset */
4113 		head_enc,	/* header transfar encoding */
4114 		body_enc;	/* body transfar encoding */
4115 	mbfl_memory_device device;	/* automatic allocateable buffer for additional header */
4116 	const mbfl_language *lang;
4117 	int err = 0;
4118 	HashTable ht_headers;
4119 	smart_str *s;
4120 	extern void mbfl_memory_device_unput(mbfl_memory_device *device);
4121 	char *pp, *ee;
4122 
4123 	/* initialize */
4124 	mbfl_memory_device_init(&device, 0, 0);
4125 	mbfl_string_init(&orig_str);
4126 	mbfl_string_init(&conv_str);
4127 
4128 	/* character-set, transfer-encoding */
4129 	tran_cs = mbfl_no_encoding_utf8;
4130 	head_enc = mbfl_no_encoding_base64;
4131 	body_enc = mbfl_no_encoding_base64;
4132 	lang = mbfl_no2language(MBSTRG(language));
4133 	if (lang != NULL) {
4134 		tran_cs = lang->mail_charset;
4135 		head_enc = lang->mail_header_encoding;
4136 		body_enc = lang->mail_body_encoding;
4137 	}
4138 
4139 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|ss", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd, &extra_cmd_len) == FAILURE) {
4140 		return;
4141 	}
4142 
4143 	/* ASCIIZ check */
4144 	MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
4145 	MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
4146 	MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
4147 	if (headers) {
4148 		MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
4149 	}
4150 	if (extra_cmd) {
4151 		MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd, extra_cmd_len);
4152 	}
4153 
4154 	zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0);
4155 
4156 	if (headers != NULL) {
4157 		_php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
4158 	}
4159 
4160 	if (zend_hash_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1, (void **)&s) == SUCCESS) {
4161 		char *tmp;
4162 		char *param_name;
4163 		char *charset = NULL;
4164 
4165 		SEPARATE_SMART_STR(s);
4166 		smart_str_0(s);
4167 
4168 		p = strchr(s->c, ';');
4169 
4170 		if (p != NULL) {
4171 			/* skipping the padded spaces */
4172 			do {
4173 				++p;
4174 			} while (*p == ' ' || *p == '\t');
4175 
4176 			if (*p != '\0') {
4177 				if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
4178 					if (strcasecmp(param_name, "charset") == 0) {
4179 						enum mbfl_no_encoding _tran_cs = tran_cs;
4180 
4181 						charset = php_strtok_r(NULL, "= \"", &tmp);
4182 						if (charset != NULL) {
4183 							_tran_cs = mbfl_name2no_encoding(charset);
4184 						}
4185 
4186 						if (_tran_cs == mbfl_no_encoding_invalid) {
4187 							php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
4188 							_tran_cs = mbfl_no_encoding_ascii;
4189 						}
4190 						tran_cs = _tran_cs;
4191 					}
4192 				}
4193 			}
4194 		}
4195 		suppressed_hdrs.cnt_type = 1;
4196 	}
4197 
4198 	if (zend_hash_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1, (void **)&s) == SUCCESS) {
4199 		enum mbfl_no_encoding _body_enc;
4200 		SEPARATE_SMART_STR(s);
4201 		smart_str_0(s);
4202 
4203 		_body_enc = mbfl_name2no_encoding(s->c);
4204 		switch (_body_enc) {
4205 			case mbfl_no_encoding_base64:
4206 			case mbfl_no_encoding_7bit:
4207 			case mbfl_no_encoding_8bit:
4208 				body_enc = _body_enc;
4209 				break;
4210 
4211 			default:
4212 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", s->c);
4213 				body_enc =	mbfl_no_encoding_8bit;
4214 				break;
4215 		}
4216 		suppressed_hdrs.cnt_trans_enc = 1;
4217 	}
4218 
4219 	/* To: */
4220 	if (to != NULL) {
4221 		if (to_len > 0) {
4222 			to_r = estrndup(to, to_len);
4223 			for (; to_len; to_len--) {
4224 				if (!isspace((unsigned char) to_r[to_len - 1])) {
4225 					break;
4226 				}
4227 				to_r[to_len - 1] = '\0';
4228 			}
4229 			for (i = 0; to_r[i]; i++) {
4230 			if (iscntrl((unsigned char) to_r[i])) {
4231 				/* According to RFC 822, section 3.1.1 long headers may be separated into
4232 				 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
4233 				 * To prevent these separators from being replaced with a space, we use the
4234 				 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
4235 				 */
4236 				SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
4237 				to_r[i] = ' ';
4238 			}
4239 			}
4240 		} else {
4241 			to_r = to;
4242 		}
4243 	} else {
4244 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
4245 		err = 1;
4246 	}
4247 
4248 	/* Subject: */
4249 	if (subject != NULL && subject_len >= 0) {
4250 		orig_str.no_language = MBSTRG(language);
4251 		orig_str.val = (unsigned char *)subject;
4252 		orig_str.len = subject_len;
4253 		orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4254 		if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4255 			const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4256 			orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4257 		}
4258 		pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4259 		if (pstr != NULL) {
4260 			subject_buf = subject = (char *)pstr->val;
4261 		}
4262 	} else {
4263 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field");
4264 		err = 1;
4265 	}
4266 
4267 	/* message body */
4268 	if (message != NULL) {
4269 		orig_str.no_language = MBSTRG(language);
4270 		orig_str.val = (unsigned char *)message;
4271 		orig_str.len = (unsigned int)message_len;
4272 		orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4273 
4274 		if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4275 			const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4276 			orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4277 		}
4278 
4279 		pstr = NULL;
4280 		{
4281 			mbfl_string tmpstr;
4282 
4283 			if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4284 				tmpstr.no_encoding=mbfl_no_encoding_8bit;
4285 				pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4286 				efree(tmpstr.val);
4287 			}
4288 		}
4289 		if (pstr != NULL) {
4290 			message_buf = message = (char *)pstr->val;
4291 		}
4292 	} else {
4293 		/* this is not really an error, so it is allowed. */
4294 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body");
4295 		message = NULL;
4296 	}
4297 
4298 	/* other headers */
4299 #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4300 #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4301 #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4302 #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4303 	if (headers != NULL) {
4304 		p = headers;
4305 		n = headers_len;
4306 		mbfl_memory_device_strncat(&device, p, n);
4307 		if (n > 0 && p[n - 1] != '\n') {
4308 			mbfl_memory_device_strncat(&device, "\n", 1);
4309 		}
4310 	}
4311 
4312 	if (!zend_hash_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4313 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4314 		mbfl_memory_device_strncat(&device, "\n", 1);
4315 	}
4316 
4317 	if (!suppressed_hdrs.cnt_type) {
4318 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4319 
4320 		p = (char *)mbfl_no2preferred_mime_name(tran_cs);
4321 		if (p != NULL) {
4322 			mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4323 			mbfl_memory_device_strcat(&device, p);
4324 		}
4325 		mbfl_memory_device_strncat(&device, "\n", 1);
4326 	}
4327 	if (!suppressed_hdrs.cnt_trans_enc) {
4328 		mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4329 		p = (char *)mbfl_no2preferred_mime_name(body_enc);
4330 		if (p == NULL) {
4331 			p = "7bit";
4332 		}
4333 		mbfl_memory_device_strcat(&device, p);
4334 		mbfl_memory_device_strncat(&device, "\n", 1);
4335 	}
4336 
4337 	mbfl_memory_device_unput(&device);
4338 	mbfl_memory_device_output('\0', &device);
4339 	headers = (char *)device.buffer;
4340 
4341 	if (force_extra_parameters) {
4342 		extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4343 	} else if (extra_cmd) {
4344 		extra_cmd = php_escape_shell_cmd(extra_cmd);
4345 	}
4346 
4347 	if (!err && php_mail(to_r, subject, message, headers, extra_cmd TSRMLS_CC)) {
4348 		RETVAL_TRUE;
4349 	} else {
4350 		RETVAL_FALSE;
4351 	}
4352 
4353 	if (extra_cmd) {
4354 		efree(extra_cmd);
4355 	}
4356 	if (to_r != to) {
4357 		efree(to_r);
4358 	}
4359 	if (subject_buf) {
4360 		efree((void *)subject_buf);
4361 	}
4362 	if (message_buf) {
4363 		efree((void *)message_buf);
4364 	}
4365 	mbfl_memory_device_clear(&device);
4366 	zend_hash_destroy(&ht_headers);
4367 }
4368 
4369 #undef SKIP_LONG_HEADER_SEP_MBSTRING
4370 #undef MAIL_ASCIIZ_CHECK_MBSTRING
4371 #undef APPEND_ONE_CHAR
4372 #undef SEPARATE_SMART_STR
4373 #undef PHP_MBSTR_MAIL_MIME_HEADER1
4374 #undef PHP_MBSTR_MAIL_MIME_HEADER2
4375 #undef PHP_MBSTR_MAIL_MIME_HEADER3
4376 #undef PHP_MBSTR_MAIL_MIME_HEADER4
4377 /* }}} */
4378 
4379 /* {{{ proto mixed mb_get_info([string type])
4380    Returns the current settings of mbstring */
PHP_FUNCTION(mb_get_info)4381 PHP_FUNCTION(mb_get_info)
4382 {
4383 	char *typ = NULL;
4384 	int typ_len;
4385 	size_t n;
4386 	char *name;
4387 	const struct mb_overload_def *over_func;
4388 	zval *row1, *row2;
4389 	const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4390 	const mbfl_encoding **entry;
4391 
4392 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
4393 		RETURN_FALSE;
4394 	}
4395 
4396 	if (!typ || !strcasecmp("all", typ)) {
4397 		array_init(return_value);
4398 		if (MBSTRG(current_internal_encoding)) {
4399 			add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name, 1);
4400 		}
4401 		if (MBSTRG(http_input_identify)) {
4402 			add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name, 1);
4403 		}
4404 		if (MBSTRG(current_http_output_encoding)) {
4405 			add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name, 1);
4406 		}
4407 		if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4408 			add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1);
4409 		}
4410 		add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4411 		if (MBSTRG(func_overload)){
4412 			over_func = &(mb_ovld[0]);
4413 			MAKE_STD_ZVAL(row1);
4414 			array_init(row1);
4415 			while (over_func->type > 0) {
4416 				if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4417 					add_assoc_string(row1, over_func->orig_func, over_func->ovld_func, 1);
4418 				}
4419 				over_func++;
4420 			}
4421 			add_assoc_zval(return_value, "func_overload_list", row1);
4422 		} else {
4423 			add_assoc_string(return_value, "func_overload_list", "no overload", 1);
4424  		}
4425 		if (lang != NULL) {
4426 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4427 				add_assoc_string(return_value, "mail_charset", name, 1);
4428 			}
4429 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4430 				add_assoc_string(return_value, "mail_header_encoding", name, 1);
4431 			}
4432 			if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4433 				add_assoc_string(return_value, "mail_body_encoding", name, 1);
4434 			}
4435 		}
4436 		add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4437 		if (MBSTRG(encoding_translation)) {
4438 			add_assoc_string(return_value, "encoding_translation", "On", 1);
4439 		} else {
4440 			add_assoc_string(return_value, "encoding_translation", "Off", 1);
4441 		}
4442 		if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4443 			add_assoc_string(return_value, "language", name, 1);
4444 		}
4445 		n = MBSTRG(current_detect_order_list_size);
4446 		entry = MBSTRG(current_detect_order_list);
4447 		if (n > 0) {
4448 			size_t i;
4449 			MAKE_STD_ZVAL(row2);
4450 			array_init(row2);
4451 			for (i = 0; i < n; i++) {
4452 				add_next_index_string(row2, (*entry)->name, 1);
4453 				entry++;
4454 			}
4455 			add_assoc_zval(return_value, "detect_order", row2);
4456 		}
4457 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4458 			add_assoc_string(return_value, "substitute_character", "none", 1);
4459 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4460 			add_assoc_string(return_value, "substitute_character", "long", 1);
4461 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4462 			add_assoc_string(return_value, "substitute_character", "entity", 1);
4463 		} else {
4464 			add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4465 		}
4466 		if (MBSTRG(strict_detection)) {
4467 			add_assoc_string(return_value, "strict_detection", "On", 1);
4468 		} else {
4469 			add_assoc_string(return_value, "strict_detection", "Off", 1);
4470 		}
4471 	} else if (!strcasecmp("internal_encoding", typ)) {
4472 		if (MBSTRG(current_internal_encoding)) {
4473 			RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name, 1);
4474 		}
4475 	} else if (!strcasecmp("http_input", typ)) {
4476 		if (MBSTRG(http_input_identify)) {
4477 			RETVAL_STRING((char *)MBSTRG(http_input_identify)->name, 1);
4478 		}
4479 	} else if (!strcasecmp("http_output", typ)) {
4480 		if (MBSTRG(current_http_output_encoding)) {
4481 			RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name, 1);
4482 		}
4483 	} else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4484 		if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4485 			RETVAL_STRING(name, 1);
4486 		}
4487 	} else if (!strcasecmp("func_overload", typ)) {
4488  		RETVAL_LONG(MBSTRG(func_overload));
4489 	} else if (!strcasecmp("func_overload_list", typ)) {
4490 		if (MBSTRG(func_overload)){
4491 				over_func = &(mb_ovld[0]);
4492 				array_init(return_value);
4493 				while (over_func->type > 0) {
4494 					if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4495 						add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func, 1);
4496 					}
4497 					over_func++;
4498 				}
4499 		} else {
4500 			RETVAL_STRING("no overload", 1);
4501 		}
4502 	} else if (!strcasecmp("mail_charset", typ)) {
4503 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4504 			RETVAL_STRING(name, 1);
4505 		}
4506 	} else if (!strcasecmp("mail_header_encoding", typ)) {
4507 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4508 			RETVAL_STRING(name, 1);
4509 		}
4510 	} else if (!strcasecmp("mail_body_encoding", typ)) {
4511 		if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4512 			RETVAL_STRING(name, 1);
4513 		}
4514 	} else if (!strcasecmp("illegal_chars", typ)) {
4515 		RETVAL_LONG(MBSTRG(illegalchars));
4516 	} else if (!strcasecmp("encoding_translation", typ)) {
4517 		if (MBSTRG(encoding_translation)) {
4518 			RETVAL_STRING("On", 1);
4519 		} else {
4520 			RETVAL_STRING("Off", 1);
4521 		}
4522 	} else if (!strcasecmp("language", typ)) {
4523 		if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4524 			RETVAL_STRING(name, 1);
4525 		}
4526 	} else if (!strcasecmp("detect_order", typ)) {
4527 		n = MBSTRG(current_detect_order_list_size);
4528 		entry = MBSTRG(current_detect_order_list);
4529 		if (n > 0) {
4530 			size_t i;
4531 			array_init(return_value);
4532 			for (i = 0; i < n; i++) {
4533 				add_next_index_string(return_value, (*entry)->name, 1);
4534 				entry++;
4535 			}
4536 		}
4537 	} else if (!strcasecmp("substitute_character", typ)) {
4538 		if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4539 			RETVAL_STRING("none", 1);
4540 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4541 			RETVAL_STRING("long", 1);
4542 		} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4543 			RETVAL_STRING("entity", 1);
4544 		} else {
4545 			RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4546 		}
4547 	} else if (!strcasecmp("strict_detection", typ)) {
4548 		if (MBSTRG(strict_detection)) {
4549 			RETVAL_STRING("On", 1);
4550 		} else {
4551 			RETVAL_STRING("Off", 1);
4552 		}
4553 	} else {
4554 		RETURN_FALSE;
4555 	}
4556 }
4557 /* }}} */
4558 
4559 /* {{{ proto bool mb_check_encoding([string var[, string encoding]])
4560    Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)4561 PHP_FUNCTION(mb_check_encoding)
4562 {
4563 	char *var = NULL;
4564 	int var_len;
4565 	char *enc = NULL;
4566 	int enc_len;
4567 	mbfl_buffer_converter *convd;
4568 	const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4569 	mbfl_string string, result, *ret = NULL;
4570 	long illegalchars = 0;
4571 
4572 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
4573 		RETURN_FALSE;
4574 	}
4575 
4576 	if (var == NULL) {
4577 		RETURN_BOOL(MBSTRG(illegalchars) == 0);
4578 	}
4579 
4580 	if (enc != NULL) {
4581 		encoding = mbfl_name2encoding(enc);
4582 		if (!encoding || encoding == &mbfl_encoding_pass) {
4583 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc);
4584 			RETURN_FALSE;
4585 		}
4586 	}
4587 
4588 	convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
4589 	if (convd == NULL) {
4590 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
4591 		RETURN_FALSE;
4592 	}
4593 	mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4594 	mbfl_buffer_converter_illegal_substchar(convd, 0);
4595 
4596 	/* initialize string */
4597 	mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
4598 	mbfl_string_init(&result);
4599 
4600 	string.val = (unsigned char *)var;
4601 	string.len = var_len;
4602 	ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4603 	illegalchars = mbfl_buffer_illegalchars(convd);
4604 	mbfl_buffer_converter_delete(convd);
4605 
4606 	RETVAL_FALSE;
4607 	if (ret != NULL) {
4608 		if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4609 			RETVAL_TRUE;
4610 		}
4611 		mbfl_string_clear(&result);
4612 	}
4613 }
4614 /* }}} */
4615 
4616 
4617 /* {{{ php_mb_populate_current_detect_order_list */
php_mb_populate_current_detect_order_list(TSRMLS_D)4618 static void php_mb_populate_current_detect_order_list(TSRMLS_D)
4619 {
4620 	const mbfl_encoding **entry = 0;
4621 	size_t nentries;
4622 
4623 	if (MBSTRG(current_detect_order_list)) {
4624 		return;
4625 	}
4626 
4627 	if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
4628 		nentries = MBSTRG(detect_order_list_size);
4629 		entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4630 		memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
4631 	} else {
4632 		const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
4633 		size_t i;
4634 		nentries = MBSTRG(default_detect_order_list_size);
4635 		entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4636 		for (i = 0; i < nentries; i++) {
4637 			entry[i] = mbfl_no2encoding(src[i]);
4638 		}
4639 	}
4640 	MBSTRG(current_detect_order_list) = entry;
4641 	MBSTRG(current_detect_order_list_size) = nentries;
4642 }
4643 
4644 /* {{{ static int php_mb_encoding_translation() */
php_mb_encoding_translation(TSRMLS_D)4645 static int php_mb_encoding_translation(TSRMLS_D)
4646 {
4647 	return MBSTRG(encoding_translation);
4648 }
4649 /* }}} */
4650 
4651 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
php_mb_mbchar_bytes_ex(const char * s,const mbfl_encoding * enc)4652 MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
4653 {
4654 	if (enc != NULL) {
4655 		if (enc->flag & MBFL_ENCTYPE_MBCS) {
4656 			if (enc->mblen_table != NULL) {
4657 				if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
4658 			}
4659 		} else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
4660 			return 2;
4661 		} else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
4662 			return 4;
4663 		}
4664 	}
4665 	return 1;
4666 }
4667 /* }}} */
4668 
4669 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
php_mb_mbchar_bytes(const char * s TSRMLS_DC)4670 MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
4671 {
4672 	return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
4673 }
4674 /* }}} */
4675 
4676 /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
php_mb_safe_strrchr_ex(const char * s,unsigned int c,size_t nbytes,const mbfl_encoding * enc)4677 MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
4678 {
4679 	register const char *p = s;
4680 	char *last=NULL;
4681 
4682 	if (nbytes == (size_t)-1) {
4683 		size_t nb = 0;
4684 
4685 		while (*p != '\0') {
4686 			if (nb == 0) {
4687 				if ((unsigned char)*p == (unsigned char)c) {
4688 					last = (char *)p;
4689 				}
4690 				nb = php_mb_mbchar_bytes_ex(p, enc);
4691 				if (nb == 0) {
4692 					return NULL; /* something is going wrong! */
4693 				}
4694 			}
4695 			--nb;
4696 			++p;
4697 		}
4698 	} else {
4699 		register size_t bcnt = nbytes;
4700 		register size_t nbytes_char;
4701 		while (bcnt > 0) {
4702 			if ((unsigned char)*p == (unsigned char)c) {
4703 				last = (char *)p;
4704 			}
4705 			nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
4706 			if (bcnt < nbytes_char) {
4707 				return NULL;
4708 			}
4709 			p += nbytes_char;
4710 			bcnt -= nbytes_char;
4711 		}
4712 	}
4713 	return last;
4714 }
4715 /* }}} */
4716 
4717 /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
php_mb_safe_strrchr(const char * s,unsigned int c,size_t nbytes TSRMLS_DC)4718 MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
4719 {
4720 	return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
4721 }
4722 /* }}} */
4723 
4724 /* {{{ MBSTRING_API int php_mb_stripos()
4725  */
php_mb_stripos(int mode,const char * old_haystack,unsigned int old_haystack_len,const char * old_needle,unsigned int old_needle_len,long offset,const char * from_encoding TSRMLS_DC)4726 MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC)
4727 {
4728 	int n;
4729 	mbfl_string haystack, needle;
4730 	n = -1;
4731 
4732 	mbfl_string_init(&haystack);
4733 	mbfl_string_init(&needle);
4734 	haystack.no_language = MBSTRG(language);
4735 	haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4736 	needle.no_language = MBSTRG(language);
4737 	needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4738 
4739 	do {
4740 		size_t len = 0;
4741 		haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding TSRMLS_CC);
4742 		haystack.len = len;
4743 
4744 		if (!haystack.val) {
4745 			break;
4746 		}
4747 
4748 		if (haystack.len <= 0) {
4749 			break;
4750 		}
4751 
4752 		needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding TSRMLS_CC);
4753 		needle.len = len;
4754 
4755 		if (!needle.val) {
4756 			break;
4757 		}
4758 
4759 		if (needle.len <= 0) {
4760 			break;
4761 		}
4762 
4763 		haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
4764 		if (haystack.no_encoding == mbfl_no_encoding_invalid) {
4765 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
4766 			break;
4767 		}
4768 
4769  		{
4770  			int haystack_char_len = mbfl_strlen(&haystack);
4771 
4772  			if (mode) {
4773  				if ((offset > 0 && offset > haystack_char_len) ||
4774  					(offset < 0 && -offset > haystack_char_len)) {
4775  					php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
4776  					break;
4777  				}
4778  			} else {
4779  				if (offset < 0 || offset > haystack_char_len) {
4780  					php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
4781  					break;
4782  				}
4783  			}
4784 		}
4785 
4786 		n = mbfl_strpos(&haystack, &needle, offset, mode);
4787 	} while(0);
4788 
4789 	if (haystack.val) {
4790 		efree(haystack.val);
4791 	}
4792 
4793 	if (needle.val) {
4794 		efree(needle.val);
4795 	}
4796 
4797 	return n;
4798 }
4799 /* }}} */
4800 
php_mb_gpc_get_detect_order(const zend_encoding *** list,size_t * list_size TSRMLS_DC)4801 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC) /* {{{ */
4802 {
4803 	*list = (const zend_encoding **)MBSTRG(http_input_list);
4804 	*list_size = MBSTRG(http_input_list_size);
4805 }
4806 /* }}} */
4807 
php_mb_gpc_set_input_encoding(const zend_encoding * encoding TSRMLS_DC)4808 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC) /* {{{ */
4809 {
4810 	MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
4811 }
4812 /* }}} */
4813 
4814 #endif	/* HAVE_MBSTRING */
4815 
4816 /*
4817  * Local variables:
4818  * tab-width: 4
4819  * c-basic-offset: 4
4820  * End:
4821  * vim600: fdm=marker
4822  * vim: noet sw=4 ts=4
4823  */
4824