1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2018 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
16 | Rui Hirokawa <hirokawa@php.net> |
17 +----------------------------------------------------------------------+
18 */
19
20 /* $Id$ */
21
22 /*
23 * PHP 4 Multibyte String module "mbstring"
24 *
25 * History:
26 * 2000.5.19 Release php-4.0RC2_jstring-1.0
27 * 2001.4.1 Release php4_jstring-1.0.91
28 * 2001.4.30 Release php4_jstring-1.1 (contribute to The PHP Group)
29 * 2001.5.1 Renamed from jstring to mbstring (hirokawa@php.net)
30 */
31
32 /*
33 * PHP3 Internationalization support program.
34 *
35 * Copyright (c) 1999,2000 by the PHP3 internationalization team.
36 * All rights reserved.
37 *
38 * See README_PHP3-i18n-ja for more detail.
39 *
40 * Authors:
41 * Hironori Sato <satoh@jpnnet.com>
42 * Shigeru Kanemoto <sgk@happysize.co.jp>
43 * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
44 * Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
45 */
46
47 /* {{{ includes */
48 #ifdef HAVE_CONFIG_H
49 #include "config.h"
50 #endif
51
52 #include "php.h"
53 #include "php_ini.h"
54 #include "php_variables.h"
55 #include "mbstring.h"
56 #include "ext/standard/php_string.h"
57 #include "ext/standard/php_mail.h"
58 #include "ext/standard/exec.h"
59 #include "ext/standard/url.h"
60 #include "main/php_output.h"
61 #include "ext/standard/info.h"
62
63 #include "libmbfl/mbfl/mbfl_allocators.h"
64 #include "libmbfl/mbfl/mbfilter_pass.h"
65
66 #include "php_variables.h"
67 #include "php_globals.h"
68 #include "rfc1867.h"
69 #include "php_content_types.h"
70 #include "SAPI.h"
71 #include "php_unicode.h"
72 #include "TSRM.h"
73
74 #include "mb_gpc.h"
75
76 #if HAVE_MBREGEX
77 #include "php_mbregex.h"
78 #endif
79
80 #include "zend_multibyte.h"
81
82 #if HAVE_ONIG
83 #include "php_onig_compat.h"
84 #include <oniguruma.h>
85 #undef UChar
86 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
87 #include "ext/pcre/php_pcre.h"
88 #endif
89 /* }}} */
90
91 #if HAVE_MBSTRING
92
93 /* {{{ prototypes */
94 ZEND_DECLARE_MODULE_GLOBALS(mbstring)
95
96 static PHP_GINIT_FUNCTION(mbstring);
97 static PHP_GSHUTDOWN_FUNCTION(mbstring);
98
99 static void php_mb_populate_current_detect_order_list(void);
100
101 static int php_mb_encoding_translation(void);
102
103 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size);
104
105 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
106
107 static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
108
109 static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
110 /* }}} */
111
112 /* {{{ php_mb_default_identify_list */
113 typedef struct _php_mb_nls_ident_list {
114 enum mbfl_no_language lang;
115 const enum mbfl_no_encoding *list;
116 size_t list_size;
117 } php_mb_nls_ident_list;
118
119 static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
120 mbfl_no_encoding_ascii,
121 mbfl_no_encoding_jis,
122 mbfl_no_encoding_utf8,
123 mbfl_no_encoding_euc_jp,
124 mbfl_no_encoding_sjis
125 };
126
127 static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
128 mbfl_no_encoding_ascii,
129 mbfl_no_encoding_utf8,
130 mbfl_no_encoding_euc_cn,
131 mbfl_no_encoding_cp936
132 };
133
134 static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
135 mbfl_no_encoding_ascii,
136 mbfl_no_encoding_utf8,
137 mbfl_no_encoding_euc_tw,
138 mbfl_no_encoding_big5
139 };
140
141 static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
142 mbfl_no_encoding_ascii,
143 mbfl_no_encoding_utf8,
144 mbfl_no_encoding_euc_kr,
145 mbfl_no_encoding_uhc
146 };
147
148 static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
149 mbfl_no_encoding_ascii,
150 mbfl_no_encoding_utf8,
151 mbfl_no_encoding_koi8r,
152 mbfl_no_encoding_cp1251,
153 mbfl_no_encoding_cp866
154 };
155
156 static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
157 mbfl_no_encoding_ascii,
158 mbfl_no_encoding_utf8,
159 mbfl_no_encoding_armscii8
160 };
161
162 static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
163 mbfl_no_encoding_ascii,
164 mbfl_no_encoding_utf8,
165 mbfl_no_encoding_cp1254,
166 mbfl_no_encoding_8859_9
167 };
168
169 static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
170 mbfl_no_encoding_ascii,
171 mbfl_no_encoding_utf8,
172 mbfl_no_encoding_koi8u
173 };
174
175 static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
176 mbfl_no_encoding_ascii,
177 mbfl_no_encoding_utf8
178 };
179
180
181 static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
182 { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
183 { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
184 { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
185 { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
186 { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
187 { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
188 { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
189 { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
190 { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
191 };
192
193 /* }}} */
194
195 /* {{{ mb_overload_def mb_ovld[] */
196 static const struct mb_overload_def mb_ovld[] = {
197 {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
198 {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
199 {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
200 {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
201 {MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
202 {MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
203 {MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
204 {MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
205 {MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
206 {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
207 {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
208 {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
209 {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
210 {0, NULL, NULL, NULL}
211 };
212 /* }}} */
213
214 /* {{{ arginfo */
215 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
216 ZEND_ARG_INFO(0, language)
217 ZEND_END_ARG_INFO()
218
219 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
220 ZEND_ARG_INFO(0, encoding)
221 ZEND_END_ARG_INFO()
222
223 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
224 ZEND_ARG_INFO(0, type)
225 ZEND_END_ARG_INFO()
226
227 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
228 ZEND_ARG_INFO(0, encoding)
229 ZEND_END_ARG_INFO()
230
231 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
232 ZEND_ARG_INFO(0, encoding)
233 ZEND_END_ARG_INFO()
234
235 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
236 ZEND_ARG_INFO(0, substchar)
237 ZEND_END_ARG_INFO()
238
239 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
240 ZEND_ARG_INFO(0, encoding)
241 ZEND_END_ARG_INFO()
242
243 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
244 ZEND_ARG_INFO(0, encoded_string)
245 ZEND_ARG_INFO(1, result)
246 ZEND_END_ARG_INFO()
247
248 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
249 ZEND_ARG_INFO(0, contents)
250 ZEND_ARG_INFO(0, status)
251 ZEND_END_ARG_INFO()
252
253 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
254 ZEND_ARG_INFO(0, str)
255 ZEND_ARG_INFO(0, encoding)
256 ZEND_END_ARG_INFO()
257
258 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
259 ZEND_ARG_INFO(0, haystack)
260 ZEND_ARG_INFO(0, needle)
261 ZEND_ARG_INFO(0, offset)
262 ZEND_ARG_INFO(0, encoding)
263 ZEND_END_ARG_INFO()
264
265 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
266 ZEND_ARG_INFO(0, haystack)
267 ZEND_ARG_INFO(0, needle)
268 ZEND_ARG_INFO(0, offset)
269 ZEND_ARG_INFO(0, encoding)
270 ZEND_END_ARG_INFO()
271
272 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
273 ZEND_ARG_INFO(0, haystack)
274 ZEND_ARG_INFO(0, needle)
275 ZEND_ARG_INFO(0, offset)
276 ZEND_ARG_INFO(0, encoding)
277 ZEND_END_ARG_INFO()
278
279 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
280 ZEND_ARG_INFO(0, haystack)
281 ZEND_ARG_INFO(0, needle)
282 ZEND_ARG_INFO(0, offset)
283 ZEND_ARG_INFO(0, encoding)
284 ZEND_END_ARG_INFO()
285
286 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
287 ZEND_ARG_INFO(0, haystack)
288 ZEND_ARG_INFO(0, needle)
289 ZEND_ARG_INFO(0, part)
290 ZEND_ARG_INFO(0, encoding)
291 ZEND_END_ARG_INFO()
292
293 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
294 ZEND_ARG_INFO(0, haystack)
295 ZEND_ARG_INFO(0, needle)
296 ZEND_ARG_INFO(0, part)
297 ZEND_ARG_INFO(0, encoding)
298 ZEND_END_ARG_INFO()
299
300 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
301 ZEND_ARG_INFO(0, haystack)
302 ZEND_ARG_INFO(0, needle)
303 ZEND_ARG_INFO(0, part)
304 ZEND_ARG_INFO(0, encoding)
305 ZEND_END_ARG_INFO()
306
307 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
308 ZEND_ARG_INFO(0, haystack)
309 ZEND_ARG_INFO(0, needle)
310 ZEND_ARG_INFO(0, part)
311 ZEND_ARG_INFO(0, encoding)
312 ZEND_END_ARG_INFO()
313
314 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
315 ZEND_ARG_INFO(0, haystack)
316 ZEND_ARG_INFO(0, needle)
317 ZEND_ARG_INFO(0, encoding)
318 ZEND_END_ARG_INFO()
319
320 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
321 ZEND_ARG_INFO(0, str)
322 ZEND_ARG_INFO(0, start)
323 ZEND_ARG_INFO(0, length)
324 ZEND_ARG_INFO(0, encoding)
325 ZEND_END_ARG_INFO()
326
327 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
328 ZEND_ARG_INFO(0, str)
329 ZEND_ARG_INFO(0, start)
330 ZEND_ARG_INFO(0, length)
331 ZEND_ARG_INFO(0, encoding)
332 ZEND_END_ARG_INFO()
333
334 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
335 ZEND_ARG_INFO(0, str)
336 ZEND_ARG_INFO(0, encoding)
337 ZEND_END_ARG_INFO()
338
339 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
340 ZEND_ARG_INFO(0, str)
341 ZEND_ARG_INFO(0, start)
342 ZEND_ARG_INFO(0, width)
343 ZEND_ARG_INFO(0, trimmarker)
344 ZEND_ARG_INFO(0, encoding)
345 ZEND_END_ARG_INFO()
346
347 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
348 ZEND_ARG_INFO(0, str)
349 ZEND_ARG_INFO(0, to)
350 ZEND_ARG_INFO(0, from)
351 ZEND_END_ARG_INFO()
352
353 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
354 ZEND_ARG_INFO(0, sourcestring)
355 ZEND_ARG_INFO(0, mode)
356 ZEND_ARG_INFO(0, encoding)
357 ZEND_END_ARG_INFO()
358
359 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
360 ZEND_ARG_INFO(0, sourcestring)
361 ZEND_ARG_INFO(0, encoding)
362 ZEND_END_ARG_INFO()
363
364 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
365 ZEND_ARG_INFO(0, sourcestring)
366 ZEND_ARG_INFO(0, encoding)
367 ZEND_END_ARG_INFO()
368
369 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
370 ZEND_ARG_INFO(0, str)
371 ZEND_ARG_INFO(0, encoding_list)
372 ZEND_ARG_INFO(0, strict)
373 ZEND_END_ARG_INFO()
374
375 ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
376 ZEND_END_ARG_INFO()
377
378 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
379 ZEND_ARG_INFO(0, encoding)
380 ZEND_END_ARG_INFO()
381
382 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
383 ZEND_ARG_INFO(0, str)
384 ZEND_ARG_INFO(0, charset)
385 ZEND_ARG_INFO(0, transfer)
386 ZEND_ARG_INFO(0, linefeed)
387 ZEND_ARG_INFO(0, indent)
388 ZEND_END_ARG_INFO()
389
390 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
391 ZEND_ARG_INFO(0, string)
392 ZEND_END_ARG_INFO()
393
394 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
395 ZEND_ARG_INFO(0, str)
396 ZEND_ARG_INFO(0, option)
397 ZEND_ARG_INFO(0, encoding)
398 ZEND_END_ARG_INFO()
399
400 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 0, 0, 3)
401 ZEND_ARG_INFO(0, to)
402 ZEND_ARG_INFO(0, from)
403 ZEND_ARG_VARIADIC_INFO(1, vars)
404 ZEND_END_ARG_INFO()
405
406 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
407 ZEND_ARG_INFO(0, string)
408 ZEND_ARG_INFO(0, convmap)
409 ZEND_ARG_INFO(0, encoding)
410 ZEND_ARG_INFO(0, is_hex)
411 ZEND_END_ARG_INFO()
412
413 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
414 ZEND_ARG_INFO(0, string)
415 ZEND_ARG_INFO(0, convmap)
416 ZEND_ARG_INFO(0, encoding)
417 ZEND_ARG_INFO(0, is_hex)
418 ZEND_END_ARG_INFO()
419
420 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
421 ZEND_ARG_INFO(0, to)
422 ZEND_ARG_INFO(0, subject)
423 ZEND_ARG_INFO(0, message)
424 ZEND_ARG_INFO(0, additional_headers)
425 ZEND_ARG_INFO(0, additional_parameters)
426 ZEND_END_ARG_INFO()
427
428 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
429 ZEND_ARG_INFO(0, type)
430 ZEND_END_ARG_INFO()
431
432 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
433 ZEND_ARG_INFO(0, var)
434 ZEND_ARG_INFO(0, encoding)
435 ZEND_END_ARG_INFO()
436
437 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_scrub, 0, 0, 1)
438 ZEND_ARG_INFO(0, str)
439 ZEND_ARG_INFO(0, encoding)
440 ZEND_END_ARG_INFO()
441
442 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ord, 0, 0, 1)
443 ZEND_ARG_INFO(0, str)
444 ZEND_ARG_INFO(0, encoding)
445 ZEND_END_ARG_INFO()
446
447 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_chr, 0, 0, 1)
448 ZEND_ARG_INFO(0, cp)
449 ZEND_ARG_INFO(0, encoding)
450 ZEND_END_ARG_INFO()
451
452 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
453 ZEND_ARG_INFO(0, encoding)
454 ZEND_END_ARG_INFO()
455
456 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
457 ZEND_ARG_INFO(0, pattern)
458 ZEND_ARG_INFO(0, string)
459 ZEND_ARG_INFO(1, registers)
460 ZEND_END_ARG_INFO()
461
462 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
463 ZEND_ARG_INFO(0, pattern)
464 ZEND_ARG_INFO(0, string)
465 ZEND_ARG_INFO(1, registers)
466 ZEND_END_ARG_INFO()
467
468 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
469 ZEND_ARG_INFO(0, pattern)
470 ZEND_ARG_INFO(0, replacement)
471 ZEND_ARG_INFO(0, string)
472 ZEND_ARG_INFO(0, option)
473 ZEND_END_ARG_INFO()
474
475 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
476 ZEND_ARG_INFO(0, pattern)
477 ZEND_ARG_INFO(0, replacement)
478 ZEND_ARG_INFO(0, string)
479 ZEND_ARG_INFO(0, option)
480 ZEND_END_ARG_INFO()
481
482 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3)
483 ZEND_ARG_INFO(0, pattern)
484 ZEND_ARG_INFO(0, callback)
485 ZEND_ARG_INFO(0, string)
486 ZEND_ARG_INFO(0, option)
487 ZEND_END_ARG_INFO()
488
489 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
490 ZEND_ARG_INFO(0, pattern)
491 ZEND_ARG_INFO(0, string)
492 ZEND_ARG_INFO(0, limit)
493 ZEND_END_ARG_INFO()
494
495 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
496 ZEND_ARG_INFO(0, pattern)
497 ZEND_ARG_INFO(0, string)
498 ZEND_ARG_INFO(0, option)
499 ZEND_END_ARG_INFO()
500
501 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
502 ZEND_ARG_INFO(0, pattern)
503 ZEND_ARG_INFO(0, option)
504 ZEND_END_ARG_INFO()
505
506 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
507 ZEND_ARG_INFO(0, pattern)
508 ZEND_ARG_INFO(0, option)
509 ZEND_END_ARG_INFO()
510
511 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
512 ZEND_ARG_INFO(0, pattern)
513 ZEND_ARG_INFO(0, option)
514 ZEND_END_ARG_INFO()
515
516 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
517 ZEND_ARG_INFO(0, string)
518 ZEND_ARG_INFO(0, pattern)
519 ZEND_ARG_INFO(0, option)
520 ZEND_END_ARG_INFO()
521
522 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
523 ZEND_END_ARG_INFO()
524
525 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
526 ZEND_END_ARG_INFO()
527
528 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
529 ZEND_ARG_INFO(0, position)
530 ZEND_END_ARG_INFO()
531
532 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
533 ZEND_ARG_INFO(0, options)
534 ZEND_END_ARG_INFO()
535 /* }}} */
536
537 /* {{{ zend_function_entry mbstring_functions[] */
538 const zend_function_entry mbstring_functions[] = {
539 PHP_FE(mb_convert_case, arginfo_mb_convert_case)
540 PHP_FE(mb_strtoupper, arginfo_mb_strtoupper)
541 PHP_FE(mb_strtolower, arginfo_mb_strtolower)
542 PHP_FE(mb_language, arginfo_mb_language)
543 PHP_FE(mb_internal_encoding, arginfo_mb_internal_encoding)
544 PHP_FE(mb_http_input, arginfo_mb_http_input)
545 PHP_FE(mb_http_output, arginfo_mb_http_output)
546 PHP_FE(mb_detect_order, arginfo_mb_detect_order)
547 PHP_FE(mb_substitute_character, arginfo_mb_substitute_character)
548 PHP_FE(mb_parse_str, arginfo_mb_parse_str)
549 PHP_FE(mb_output_handler, arginfo_mb_output_handler)
550 PHP_FE(mb_preferred_mime_name, arginfo_mb_preferred_mime_name)
551 PHP_FE(mb_strlen, arginfo_mb_strlen)
552 PHP_FE(mb_strpos, arginfo_mb_strpos)
553 PHP_FE(mb_strrpos, arginfo_mb_strrpos)
554 PHP_FE(mb_stripos, arginfo_mb_stripos)
555 PHP_FE(mb_strripos, arginfo_mb_strripos)
556 PHP_FE(mb_strstr, arginfo_mb_strstr)
557 PHP_FE(mb_strrchr, arginfo_mb_strrchr)
558 PHP_FE(mb_stristr, arginfo_mb_stristr)
559 PHP_FE(mb_strrichr, arginfo_mb_strrichr)
560 PHP_FE(mb_substr_count, arginfo_mb_substr_count)
561 PHP_FE(mb_substr, arginfo_mb_substr)
562 PHP_FE(mb_strcut, arginfo_mb_strcut)
563 PHP_FE(mb_strwidth, arginfo_mb_strwidth)
564 PHP_FE(mb_strimwidth, arginfo_mb_strimwidth)
565 PHP_FE(mb_convert_encoding, arginfo_mb_convert_encoding)
566 PHP_FE(mb_detect_encoding, arginfo_mb_detect_encoding)
567 PHP_FE(mb_list_encodings, arginfo_mb_list_encodings)
568 PHP_FE(mb_encoding_aliases, arginfo_mb_encoding_aliases)
569 PHP_FE(mb_convert_kana, arginfo_mb_convert_kana)
570 PHP_FE(mb_encode_mimeheader, arginfo_mb_encode_mimeheader)
571 PHP_FE(mb_decode_mimeheader, arginfo_mb_decode_mimeheader)
572 PHP_FE(mb_convert_variables, arginfo_mb_convert_variables)
573 PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity)
574 PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity)
575 PHP_FE(mb_send_mail, arginfo_mb_send_mail)
576 PHP_FE(mb_get_info, arginfo_mb_get_info)
577 PHP_FE(mb_check_encoding, arginfo_mb_check_encoding)
578 PHP_FE(mb_ord, arginfo_mb_ord)
579 PHP_FE(mb_chr, arginfo_mb_chr)
580 PHP_FE(mb_scrub, arginfo_mb_scrub)
581 #if HAVE_MBREGEX
582 PHP_MBREGEX_FUNCTION_ENTRIES
583 #endif
584 PHP_FE_END
585 };
586 /* }}} */
587
588 /* {{{ zend_module_entry mbstring_module_entry */
589 zend_module_entry mbstring_module_entry = {
590 STANDARD_MODULE_HEADER,
591 "mbstring",
592 mbstring_functions,
593 PHP_MINIT(mbstring),
594 PHP_MSHUTDOWN(mbstring),
595 PHP_RINIT(mbstring),
596 PHP_RSHUTDOWN(mbstring),
597 PHP_MINFO(mbstring),
598 PHP_MBSTRING_VERSION,
599 PHP_MODULE_GLOBALS(mbstring),
600 PHP_GINIT(mbstring),
601 PHP_GSHUTDOWN(mbstring),
602 NULL,
603 STANDARD_MODULE_PROPERTIES_EX
604 };
605 /* }}} */
606
607 /* {{{ static sapi_post_entry php_post_entries[] */
608 static sapi_post_entry php_post_entries[] = {
609 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
610 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
611 { NULL, 0, NULL, NULL }
612 };
613 /* }}} */
614
615 #ifdef COMPILE_DL_MBSTRING
616 #ifdef ZTS
617 ZEND_TSRMLS_CACHE_DEFINE()
618 #endif
ZEND_GET_MODULE(mbstring)619 ZEND_GET_MODULE(mbstring)
620 #endif
621
622 static char *get_internal_encoding(void) {
623 if (PG(internal_encoding) && PG(internal_encoding)[0]) {
624 return PG(internal_encoding);
625 } else if (SG(default_charset)) {
626 return SG(default_charset);
627 }
628 return "";
629 }
630
get_input_encoding(void)631 static char *get_input_encoding(void) {
632 if (PG(input_encoding) && PG(input_encoding)[0]) {
633 return PG(input_encoding);
634 } else if (SG(default_charset)) {
635 return SG(default_charset);
636 }
637 return "";
638 }
639
get_output_encoding(void)640 static char *get_output_encoding(void) {
641 if (PG(output_encoding) && PG(output_encoding)[0]) {
642 return PG(output_encoding);
643 } else if (SG(default_charset)) {
644 return SG(default_charset);
645 }
646 return "";
647 }
648
649
650 /* {{{ allocators */
_php_mb_allocators_malloc(unsigned int sz)651 static void *_php_mb_allocators_malloc(unsigned int sz)
652 {
653 return emalloc(sz);
654 }
655
_php_mb_allocators_realloc(void * ptr,unsigned int sz)656 static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
657 {
658 return erealloc(ptr, sz);
659 }
660
_php_mb_allocators_calloc(unsigned int nelems,unsigned int szelem)661 static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
662 {
663 return ecalloc(nelems, szelem);
664 }
665
_php_mb_allocators_free(void * ptr)666 static void _php_mb_allocators_free(void *ptr)
667 {
668 efree(ptr);
669 }
670
_php_mb_allocators_pmalloc(unsigned int sz)671 static void *_php_mb_allocators_pmalloc(unsigned int sz)
672 {
673 return pemalloc(sz, 1);
674 }
675
_php_mb_allocators_prealloc(void * ptr,unsigned int sz)676 static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
677 {
678 return perealloc(ptr, sz, 1);
679 }
680
_php_mb_allocators_pfree(void * ptr)681 static void _php_mb_allocators_pfree(void *ptr)
682 {
683 pefree(ptr, 1);
684 }
685
686 static mbfl_allocators _php_mb_allocators = {
687 _php_mb_allocators_malloc,
688 _php_mb_allocators_realloc,
689 _php_mb_allocators_calloc,
690 _php_mb_allocators_free,
691 _php_mb_allocators_pmalloc,
692 _php_mb_allocators_prealloc,
693 _php_mb_allocators_pfree
694 };
695 /* }}} */
696
697 /* {{{ static sapi_post_entry mbstr_post_entries[] */
698 static sapi_post_entry mbstr_post_entries[] = {
699 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
700 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
701 { NULL, 0, NULL, NULL }
702 };
703 /* }}} */
704
705 /* {{{ static int php_mb_parse_encoding_list()
706 * Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
707 * Even if any illegal encoding is detected the result may contain a list
708 * of parsed encodings.
709 */
710 static int
php_mb_parse_encoding_list(const char * value,size_t value_length,const mbfl_encoding *** return_list,size_t * return_size,int persistent)711 php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
712 {
713 int bauto, ret = SUCCESS;
714 size_t n, size;
715 char *p, *p1, *p2, *endp, *tmpstr;
716 const mbfl_encoding **entry, **list;
717
718 list = NULL;
719 if (value == NULL || value_length <= 0) {
720 if (return_list) {
721 *return_list = NULL;
722 }
723 if (return_size) {
724 *return_size = 0;
725 }
726 return FAILURE;
727 } else {
728 /* copy the value string for work */
729 if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
730 tmpstr = (char *)estrndup(value+1, value_length-2);
731 value_length -= 2;
732 }
733 else
734 tmpstr = (char *)estrndup(value, value_length);
735 /* count the number of listed encoding names */
736 endp = tmpstr + value_length;
737 n = 1;
738 p1 = tmpstr;
739 while ((p2 = (char*)php_memnstr(p1, ",", 1, endp)) != NULL) {
740 p1 = p2 + 1;
741 n++;
742 }
743 size = n + MBSTRG(default_detect_order_list_size);
744 /* make list */
745 list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
746 entry = list;
747 n = 0;
748 bauto = 0;
749 p1 = tmpstr;
750 do {
751 p2 = p = (char*)php_memnstr(p1, ",", 1, endp);
752 if (p == NULL) {
753 p = endp;
754 }
755 *p = '\0';
756 /* trim spaces */
757 while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
758 p1++;
759 }
760 p--;
761 while (p > p1 && (*p == ' ' || *p == '\t')) {
762 *p = '\0';
763 p--;
764 }
765 /* convert to the encoding number and check encoding */
766 if (strcasecmp(p1, "auto") == 0) {
767 if (!bauto) {
768 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
769 const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
770 size_t i;
771 bauto = 1;
772 for (i = 0; i < identify_list_size; i++) {
773 *entry++ = mbfl_no2encoding(*src++);
774 n++;
775 }
776 }
777 } else {
778 const mbfl_encoding *encoding = mbfl_name2encoding(p1);
779 if (encoding) {
780 *entry++ = encoding;
781 n++;
782 } else {
783 ret = FAILURE;
784 }
785 }
786 p1 = p2 + 1;
787 } while (n < size && p2 != NULL);
788 if (n > 0) {
789 if (return_list) {
790 *return_list = list;
791 } else {
792 pefree(list, persistent);
793 }
794 } else {
795 pefree(list, persistent);
796 if (return_list) {
797 *return_list = NULL;
798 }
799 ret = FAILURE;
800 }
801 if (return_size) {
802 *return_size = n;
803 }
804 efree(tmpstr);
805 }
806
807 return ret;
808 }
809 /* }}} */
810
811 /* {{{ static int php_mb_parse_encoding_array()
812 * Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
813 * Even if any illegal encoding is detected the result may contain a list
814 * of parsed encodings.
815 */
816 static int
php_mb_parse_encoding_array(zval * array,const mbfl_encoding *** return_list,size_t * return_size,int persistent)817 php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
818 {
819 zval *hash_entry;
820 HashTable *target_hash;
821 int i, n, size, bauto, ret = SUCCESS;
822 const mbfl_encoding **list, **entry;
823
824 list = NULL;
825 if (Z_TYPE_P(array) == IS_ARRAY) {
826 target_hash = Z_ARRVAL_P(array);
827 i = zend_hash_num_elements(target_hash);
828 size = i + MBSTRG(default_detect_order_list_size);
829 list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
830 entry = list;
831 bauto = 0;
832 n = 0;
833 ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
834 convert_to_string_ex(hash_entry);
835 if (strcasecmp(Z_STRVAL_P(hash_entry), "auto") == 0) {
836 if (!bauto) {
837 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
838 const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
839 size_t j;
840
841 bauto = 1;
842 for (j = 0; j < identify_list_size; j++) {
843 *entry++ = mbfl_no2encoding(*src++);
844 n++;
845 }
846 }
847 } else {
848 const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_P(hash_entry));
849 if (encoding) {
850 *entry++ = encoding;
851 n++;
852 } else {
853 ret = FAILURE;
854 }
855 }
856 i--;
857 } ZEND_HASH_FOREACH_END();
858 if (n > 0) {
859 if (return_list) {
860 *return_list = list;
861 } else {
862 pefree(list, persistent);
863 }
864 } else {
865 pefree(list, persistent);
866 if (return_list) {
867 *return_list = NULL;
868 }
869 ret = FAILURE;
870 }
871 if (return_size) {
872 *return_size = n;
873 }
874 }
875
876 return ret;
877 }
878 /* }}} */
879
880 /* {{{ zend_multibyte interface */
php_mb_zend_encoding_fetcher(const char * encoding_name)881 static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name)
882 {
883 return (const zend_encoding*)mbfl_name2encoding(encoding_name);
884 }
885
php_mb_zend_encoding_name_getter(const zend_encoding * encoding)886 static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
887 {
888 return ((const mbfl_encoding *)encoding)->name;
889 }
890
php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding * _encoding)891 static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
892 {
893 const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
894 if (encoding->flag & MBFL_ENCTYPE_SBCS) {
895 return 1;
896 }
897 if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
898 return 1;
899 }
900 return 0;
901 }
902
php_mb_zend_encoding_detector(const unsigned char * arg_string,size_t arg_length,const zend_encoding ** list,size_t list_size)903 static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size)
904 {
905 mbfl_string string;
906
907 if (!list) {
908 list = (const zend_encoding **)MBSTRG(current_detect_order_list);
909 list_size = MBSTRG(current_detect_order_list_size);
910 }
911
912 mbfl_string_init(&string);
913 string.no_language = MBSTRG(language);
914 string.val = (unsigned char *)arg_string;
915 string.len = arg_length;
916 return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0);
917 }
918
php_mb_zend_encoding_converter(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length,const zend_encoding * encoding_to,const zend_encoding * encoding_from)919 static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
920 {
921 mbfl_string string, result;
922 mbfl_buffer_converter *convd;
923 int status, loc;
924
925 /* new encoding */
926 /* initialize string */
927 mbfl_string_init(&string);
928 mbfl_string_init(&result);
929 string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding;
930 string.no_language = MBSTRG(language);
931 string.val = (unsigned char*)from;
932 string.len = from_length;
933
934 /* initialize converter */
935 convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
936 if (convd == NULL) {
937 return -1;
938 }
939
940 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
941 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
942
943 /* do it */
944 status = mbfl_buffer_converter_feed2(convd, &string, &loc);
945 if (status) {
946 mbfl_buffer_converter_delete(convd);
947 return (size_t)-1;
948 }
949
950 mbfl_buffer_converter_flush(convd);
951 if (!mbfl_buffer_converter_result(convd, &result)) {
952 mbfl_buffer_converter_delete(convd);
953 return (size_t)-1;
954 }
955
956 *to = result.val;
957 *to_length = result.len;
958
959 mbfl_buffer_converter_delete(convd);
960
961 return loc;
962 }
963
php_mb_zend_encoding_list_parser(const char * encoding_list,size_t encoding_list_len,const zend_encoding *** return_list,size_t * return_size,int persistent)964 static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent)
965 {
966 return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent);
967 }
968
php_mb_zend_internal_encoding_getter(void)969 static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
970 {
971 return (const zend_encoding *)MBSTRG(internal_encoding);
972 }
973
php_mb_zend_internal_encoding_setter(const zend_encoding * encoding)974 static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding)
975 {
976 MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
977 return SUCCESS;
978 }
979
980 static zend_multibyte_functions php_mb_zend_multibyte_functions = {
981 "mbstring",
982 php_mb_zend_encoding_fetcher,
983 php_mb_zend_encoding_name_getter,
984 php_mb_zend_encoding_lexer_compatibility_checker,
985 php_mb_zend_encoding_detector,
986 php_mb_zend_encoding_converter,
987 php_mb_zend_encoding_list_parser,
988 php_mb_zend_internal_encoding_getter,
989 php_mb_zend_internal_encoding_setter
990 };
991 /* }}} */
992
993 static void *_php_mb_compile_regex(const char *pattern);
994 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
995 static void _php_mb_free_regex(void *opaque);
996
997 #if HAVE_ONIG
998 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern)999 static void *_php_mb_compile_regex(const char *pattern)
1000 {
1001 php_mb_regex_t *retval;
1002 OnigErrorInfo err_info;
1003 int err_code;
1004
1005 if ((err_code = onig_new(&retval,
1006 (const OnigUChar *)pattern,
1007 (const OnigUChar *)pattern + strlen(pattern),
1008 ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
1009 ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
1010 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1011 onig_error_code_to_str(err_str, err_code, err_info);
1012 php_error_docref(NULL, E_WARNING, "%s: %s", pattern, err_str);
1013 retval = NULL;
1014 }
1015 return retval;
1016 }
1017 /* }}} */
1018
1019 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1020 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1021 {
1022 return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
1023 (const OnigUChar*)str + str_len, (const OnigUChar *)str,
1024 (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
1025 }
1026 /* }}} */
1027
1028 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1029 static void _php_mb_free_regex(void *opaque)
1030 {
1031 onig_free((php_mb_regex_t *)opaque);
1032 }
1033 /* }}} */
1034 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
1035 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern)1036 static void *_php_mb_compile_regex(const char *pattern)
1037 {
1038 pcre *retval;
1039 const char *err_str;
1040 int err_offset;
1041
1042 if (!(retval = pcre_compile(pattern,
1043 PCRE_CASELESS, &err_str, &err_offset, NULL))) {
1044 php_error_docref(NULL, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
1045 }
1046 return retval;
1047 }
1048 /* }}} */
1049
1050 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1051 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1052 {
1053 return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
1054 0, NULL, 0) >= 0;
1055 }
1056 /* }}} */
1057
1058 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1059 static void _php_mb_free_regex(void *opaque)
1060 {
1061 pcre_free(opaque);
1062 }
1063 /* }}} */
1064 #endif
1065
1066 /* {{{ php_mb_nls_get_default_detect_order_list */
php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,enum mbfl_no_encoding ** plist,size_t * plist_size)1067 static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
1068 {
1069 size_t i;
1070
1071 *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1072 *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1073
1074 for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
1075 if (php_mb_default_identify_list[i].lang == lang) {
1076 *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
1077 *plist_size = php_mb_default_identify_list[i].list_size;
1078 return 1;
1079 }
1080 }
1081 return 0;
1082 }
1083 /* }}} */
1084
php_mb_rfc1867_substring_conf(const zend_encoding * encoding,char * start,int len,char quote)1085 static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, int len, char quote)
1086 {
1087 char *result = emalloc(len + 2);
1088 char *resp = result;
1089 int i;
1090
1091 for (i = 0; i < len && start[i] != quote; ++i) {
1092 if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
1093 *resp++ = start[++i];
1094 } else {
1095 size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
1096
1097 while (j-- > 0 && i < len) {
1098 *resp++ = start[i++];
1099 }
1100 --i;
1101 }
1102 }
1103
1104 *resp = '\0';
1105 return result;
1106 }
1107
php_mb_rfc1867_getword(const zend_encoding * encoding,char ** line,char stop)1108 static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop) /* {{{ */
1109 {
1110 char *pos = *line, quote;
1111 char *res;
1112
1113 while (*pos && *pos != stop) {
1114 if ((quote = *pos) == '"' || quote == '\'') {
1115 ++pos;
1116 while (*pos && *pos != quote) {
1117 if (*pos == '\\' && pos[1] && pos[1] == quote) {
1118 pos += 2;
1119 } else {
1120 ++pos;
1121 }
1122 }
1123 if (*pos) {
1124 ++pos;
1125 }
1126 } else {
1127 pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1128
1129 }
1130 }
1131 if (*pos == '\0') {
1132 res = estrdup(*line);
1133 *line += strlen(*line);
1134 return res;
1135 }
1136
1137 res = estrndup(*line, pos - *line);
1138
1139 while (*pos == stop) {
1140 pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1141 }
1142
1143 *line = pos;
1144 return res;
1145 }
1146 /* }}} */
1147
php_mb_rfc1867_getword_conf(const zend_encoding * encoding,char * str)1148 static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str) /* {{{ */
1149 {
1150 while (*str && isspace(*(unsigned char *)str)) {
1151 ++str;
1152 }
1153
1154 if (!*str) {
1155 return estrdup("");
1156 }
1157
1158 if (*str == '"' || *str == '\'') {
1159 char quote = *str;
1160
1161 str++;
1162 return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote);
1163 } else {
1164 char *strend = str;
1165
1166 while (*strend && !isspace(*(unsigned char *)strend)) {
1167 ++strend;
1168 }
1169 return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0);
1170 }
1171 }
1172 /* }}} */
1173
php_mb_rfc1867_basename(const zend_encoding * encoding,char * filename)1174 static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename) /* {{{ */
1175 {
1176 char *s, *s2;
1177 const size_t filename_len = strlen(filename);
1178
1179 /* The \ check should technically be needed for win32 systems only where
1180 * it is a valid path separator. However, IE in all it's wisdom always sends
1181 * the full path of the file on the user's filesystem, which means that unless
1182 * the user does basename() they get a bogus file name. Until IE's user base drops
1183 * to nill or problem is fixed this code must remain enabled for all systems. */
1184 s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
1185 s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
1186
1187 if (s && s2) {
1188 if (s > s2) {
1189 return ++s;
1190 } else {
1191 return ++s2;
1192 }
1193 } else if (s) {
1194 return ++s;
1195 } else if (s2) {
1196 return ++s2;
1197 } else {
1198 return filename;
1199 }
1200 }
1201 /* }}} */
1202
1203 /* {{{ php.ini directive handler */
1204 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
PHP_INI_MH(OnUpdate_mbstring_language)1205 static PHP_INI_MH(OnUpdate_mbstring_language)
1206 {
1207 enum mbfl_no_language no_language;
1208
1209 no_language = mbfl_name2no_language(ZSTR_VAL(new_value));
1210 if (no_language == mbfl_no_language_invalid) {
1211 MBSTRG(language) = mbfl_no_language_neutral;
1212 return FAILURE;
1213 }
1214 MBSTRG(language) = no_language;
1215 php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
1216 return SUCCESS;
1217 }
1218 /* }}} */
1219
1220 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
PHP_INI_MH(OnUpdate_mbstring_detect_order)1221 static PHP_INI_MH(OnUpdate_mbstring_detect_order)
1222 {
1223 const mbfl_encoding **list;
1224 size_t size;
1225
1226 if (!new_value) {
1227 if (MBSTRG(detect_order_list)) {
1228 pefree(MBSTRG(detect_order_list), 1);
1229 }
1230 MBSTRG(detect_order_list) = NULL;
1231 MBSTRG(detect_order_list_size) = 0;
1232 return SUCCESS;
1233 }
1234
1235 if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) {
1236 return FAILURE;
1237 }
1238
1239 if (MBSTRG(detect_order_list)) {
1240 pefree(MBSTRG(detect_order_list), 1);
1241 }
1242 MBSTRG(detect_order_list) = list;
1243 MBSTRG(detect_order_list_size) = size;
1244 return SUCCESS;
1245 }
1246 /* }}} */
1247
1248 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
PHP_INI_MH(OnUpdate_mbstring_http_input)1249 static PHP_INI_MH(OnUpdate_mbstring_http_input)
1250 {
1251 const mbfl_encoding **list;
1252 size_t size;
1253
1254 if (!new_value || !ZSTR_VAL(new_value)) {
1255 if (MBSTRG(http_input_list)) {
1256 pefree(MBSTRG(http_input_list), 1);
1257 }
1258 if (SUCCESS == php_mb_parse_encoding_list(get_input_encoding(), strlen(get_input_encoding())+1, &list, &size, 1)) {
1259 MBSTRG(http_input_list) = list;
1260 MBSTRG(http_input_list_size) = size;
1261 return SUCCESS;
1262 }
1263 MBSTRG(http_input_list) = NULL;
1264 MBSTRG(http_input_list_size) = 0;
1265 return SUCCESS;
1266 }
1267
1268 if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) {
1269 return FAILURE;
1270 }
1271
1272 if (MBSTRG(http_input_list)) {
1273 pefree(MBSTRG(http_input_list), 1);
1274 }
1275 MBSTRG(http_input_list) = list;
1276 MBSTRG(http_input_list_size) = size;
1277
1278 if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1279 php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_input is deprecated");
1280 }
1281
1282 return SUCCESS;
1283 }
1284 /* }}} */
1285
1286 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
PHP_INI_MH(OnUpdate_mbstring_http_output)1287 static PHP_INI_MH(OnUpdate_mbstring_http_output)
1288 {
1289 const mbfl_encoding *encoding;
1290
1291 if (new_value == NULL || ZSTR_LEN(new_value) == 0) {
1292 encoding = mbfl_name2encoding(get_output_encoding());
1293 if (!encoding) {
1294 MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1295 MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1296 return SUCCESS;
1297 }
1298 } else {
1299 encoding = mbfl_name2encoding(ZSTR_VAL(new_value));
1300 if (!encoding) {
1301 MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1302 MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1303 return FAILURE;
1304 }
1305 }
1306 MBSTRG(http_output_encoding) = encoding;
1307 MBSTRG(current_http_output_encoding) = encoding;
1308
1309 if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1310 php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_output is deprecated");
1311 }
1312
1313 return SUCCESS;
1314 }
1315 /* }}} */
1316
1317 /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
_php_mb_ini_mbstring_internal_encoding_set(const char * new_value,uint32_t new_value_length)1318 int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint32_t new_value_length)
1319 {
1320 const mbfl_encoding *encoding;
1321
1322 if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) {
1323 /* falls back to UTF-8 if an unknown encoding name is given */
1324 encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
1325 }
1326 MBSTRG(internal_encoding) = encoding;
1327 MBSTRG(current_internal_encoding) = encoding;
1328 #if HAVE_MBREGEX
1329 {
1330 const char *enc_name = new_value;
1331 if (FAILURE == php_mb_regex_set_default_mbctype(enc_name)) {
1332 /* falls back to UTF-8 if an unknown encoding name is given */
1333 enc_name = "UTF-8";
1334 php_mb_regex_set_default_mbctype(enc_name);
1335 }
1336 php_mb_regex_set_mbctype(new_value);
1337 }
1338 #endif
1339 return SUCCESS;
1340 }
1341 /* }}} */
1342
1343 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
PHP_INI_MH(OnUpdate_mbstring_internal_encoding)1344 static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1345 {
1346 if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1347 php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
1348 }
1349
1350 if (OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage) == FAILURE) {
1351 return FAILURE;
1352 }
1353
1354 if (stage & (PHP_INI_STAGE_STARTUP | PHP_INI_STAGE_SHUTDOWN | PHP_INI_STAGE_RUNTIME)) {
1355 if (new_value && ZSTR_LEN(new_value)) {
1356 return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
1357 } else {
1358 return _php_mb_ini_mbstring_internal_encoding_set(get_internal_encoding(), strlen(get_internal_encoding())+1);
1359 }
1360 } else {
1361 /* the corresponding mbstring globals needs to be set according to the
1362 * ini value in the later stage because it never falls back to the
1363 * default value if 1. no value for mbstring.internal_encoding is given,
1364 * 2. mbstring.language directive is processed in per-dir or runtime
1365 * context and 3. call to the handler for mbstring.language is done
1366 * after mbstring.internal_encoding is handled. */
1367 return SUCCESS;
1368 }
1369 }
1370 /* }}} */
1371
1372 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
PHP_INI_MH(OnUpdate_mbstring_substitute_character)1373 static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1374 {
1375 int c;
1376 char *endptr = NULL;
1377
1378 if (new_value != NULL) {
1379 if (strcasecmp("none", ZSTR_VAL(new_value)) == 0) {
1380 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1381 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1382 } else if (strcasecmp("long", ZSTR_VAL(new_value)) == 0) {
1383 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1384 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1385 } else if (strcasecmp("entity", ZSTR_VAL(new_value)) == 0) {
1386 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1387 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1388 } else {
1389 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1390 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1391 if (ZSTR_LEN(new_value) > 0) {
1392 c = strtol(ZSTR_VAL(new_value), &endptr, 0);
1393 if (*endptr == '\0') {
1394 MBSTRG(filter_illegal_substchar) = c;
1395 MBSTRG(current_filter_illegal_substchar) = c;
1396 }
1397 }
1398 }
1399 } else {
1400 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1401 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1402 MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
1403 MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
1404 }
1405
1406 return SUCCESS;
1407 }
1408 /* }}} */
1409
1410 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
PHP_INI_MH(OnUpdate_mbstring_encoding_translation)1411 static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1412 {
1413 if (new_value == NULL) {
1414 return FAILURE;
1415 }
1416
1417 OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
1418
1419 if (MBSTRG(encoding_translation)) {
1420 sapi_unregister_post_entry(php_post_entries);
1421 sapi_register_post_entries(mbstr_post_entries);
1422 } else {
1423 sapi_unregister_post_entry(mbstr_post_entries);
1424 sapi_register_post_entries(php_post_entries);
1425 }
1426
1427 return SUCCESS;
1428 }
1429 /* }}} */
1430
1431 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)1432 static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1433 {
1434 zend_string *tmp;
1435 void *re = NULL;
1436
1437 if (!new_value) {
1438 new_value = entry->orig_value;
1439 }
1440 tmp = php_trim(new_value, NULL, 0, 3);
1441
1442 if (ZSTR_LEN(tmp) > 0) {
1443 if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) {
1444 zend_string_release(tmp);
1445 return FAILURE;
1446 }
1447 }
1448
1449 if (MBSTRG(http_output_conv_mimetypes)) {
1450 _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1451 }
1452
1453 MBSTRG(http_output_conv_mimetypes) = re;
1454
1455 zend_string_release(tmp);
1456 return SUCCESS;
1457 }
1458 /* }}} */
1459 /* }}} */
1460
1461 /* {{{ php.ini directive registration */
1462 PHP_INI_BEGIN()
1463 PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1464 PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1465 PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
1466 PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
1467 STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
1468 PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1469 STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1470 PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1471
1472 STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1473 PHP_INI_SYSTEM | PHP_INI_PERDIR,
1474 OnUpdate_mbstring_encoding_translation,
1475 encoding_translation, zend_mbstring_globals, mbstring_globals)
1476 PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1477 "^(text/|application/xhtml\\+xml)",
1478 PHP_INI_ALL,
1479 OnUpdate_mbstring_http_output_conv_mimetypes)
1480
1481 STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1482 PHP_INI_ALL,
1483 OnUpdateLong,
1484 strict_detection, zend_mbstring_globals, mbstring_globals)
PHP_INI_END()1485 PHP_INI_END()
1486 /* }}} */
1487
1488 /* {{{ module global initialize handler */
1489 static PHP_GINIT_FUNCTION(mbstring)
1490 {
1491 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1492 ZEND_TSRMLS_CACHE_UPDATE();
1493 #endif
1494
1495 mbstring_globals->language = mbfl_no_language_uni;
1496 mbstring_globals->internal_encoding = NULL;
1497 mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1498 mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
1499 mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
1500 mbstring_globals->http_input_identify = NULL;
1501 mbstring_globals->http_input_identify_get = NULL;
1502 mbstring_globals->http_input_identify_post = NULL;
1503 mbstring_globals->http_input_identify_cookie = NULL;
1504 mbstring_globals->http_input_identify_string = NULL;
1505 mbstring_globals->http_input_list = NULL;
1506 mbstring_globals->http_input_list_size = 0;
1507 mbstring_globals->detect_order_list = NULL;
1508 mbstring_globals->detect_order_list_size = 0;
1509 mbstring_globals->current_detect_order_list = NULL;
1510 mbstring_globals->current_detect_order_list_size = 0;
1511 mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1512 mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1513 mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1514 mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */
1515 mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1516 mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */
1517 mbstring_globals->illegalchars = 0;
1518 mbstring_globals->func_overload = 0;
1519 mbstring_globals->encoding_translation = 0;
1520 mbstring_globals->strict_detection = 0;
1521 mbstring_globals->outconv = NULL;
1522 mbstring_globals->http_output_conv_mimetypes = NULL;
1523 #if HAVE_MBREGEX
1524 mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc();
1525 #endif
1526 }
1527 /* }}} */
1528
1529 /* {{{ PHP_GSHUTDOWN_FUNCTION */
PHP_GSHUTDOWN_FUNCTION(mbstring)1530 static PHP_GSHUTDOWN_FUNCTION(mbstring)
1531 {
1532 if (mbstring_globals->http_input_list) {
1533 free(mbstring_globals->http_input_list);
1534 }
1535 if (mbstring_globals->detect_order_list) {
1536 free(mbstring_globals->detect_order_list);
1537 }
1538 if (mbstring_globals->http_output_conv_mimetypes) {
1539 _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1540 }
1541 #if HAVE_MBREGEX
1542 php_mb_regex_globals_free(mbstring_globals->mb_regex_globals);
1543 #endif
1544 }
1545 /* }}} */
1546
1547 /* {{{ PHP_MINIT_FUNCTION(mbstring) */
PHP_MINIT_FUNCTION(mbstring)1548 PHP_MINIT_FUNCTION(mbstring)
1549 {
1550 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1551 ZEND_TSRMLS_CACHE_UPDATE();
1552 #endif
1553 __mbfl_allocators = &_php_mb_allocators;
1554
1555 REGISTER_INI_ENTRIES();
1556
1557 /* This is a global handler. Should not be set in a per-request handler. */
1558 sapi_register_treat_data(mbstr_treat_data);
1559
1560 /* Post handlers are stored in the thread-local context. */
1561 if (MBSTRG(encoding_translation)) {
1562 sapi_register_post_entries(mbstr_post_entries);
1563 }
1564
1565 REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1566 REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1567 REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1568
1569 REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1570 REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1571 REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1572
1573 #if HAVE_MBREGEX
1574 PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1575 #endif
1576
1577 if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions)) {
1578 return FAILURE;
1579 }
1580
1581 php_rfc1867_set_multibyte_callbacks(
1582 php_mb_encoding_translation,
1583 php_mb_gpc_get_detect_order,
1584 php_mb_gpc_set_input_encoding,
1585 php_mb_rfc1867_getword,
1586 php_mb_rfc1867_getword_conf,
1587 php_mb_rfc1867_basename);
1588
1589 return SUCCESS;
1590 }
1591 /* }}} */
1592
1593 /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
PHP_MSHUTDOWN_FUNCTION(mbstring)1594 PHP_MSHUTDOWN_FUNCTION(mbstring)
1595 {
1596 UNREGISTER_INI_ENTRIES();
1597
1598 zend_multibyte_restore_functions();
1599
1600 #if HAVE_MBREGEX
1601 PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1602 #endif
1603
1604 return SUCCESS;
1605 }
1606 /* }}} */
1607
1608 /* {{{ PHP_RINIT_FUNCTION(mbstring) */
PHP_RINIT_FUNCTION(mbstring)1609 PHP_RINIT_FUNCTION(mbstring)
1610 {
1611 zend_function *func, *orig;
1612 const struct mb_overload_def *p;
1613
1614 MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1615 MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1616 MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1617 MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1618
1619 MBSTRG(illegalchars) = 0;
1620
1621 php_mb_populate_current_detect_order_list();
1622
1623 /* override original function. */
1624 if (MBSTRG(func_overload)){
1625 zend_error(E_DEPRECATED, "The mbstring.func_overload directive is deprecated");
1626
1627 p = &(mb_ovld[0]);
1628 CG(compiler_options) |= ZEND_COMPILE_NO_BUILTIN_STRLEN;
1629 while (p->type > 0) {
1630 if ((MBSTRG(func_overload) & p->type) == p->type &&
1631 !zend_hash_str_exists(EG(function_table), p->save_func, strlen(p->save_func))
1632 ) {
1633 func = zend_hash_str_find_ptr(EG(function_table), p->ovld_func, strlen(p->ovld_func));
1634
1635 if ((orig = zend_hash_str_find_ptr(EG(function_table), p->orig_func, strlen(p->orig_func))) == NULL) {
1636 php_error_docref("ref.mbstring", E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1637 return FAILURE;
1638 } else {
1639 ZEND_ASSERT(orig->type == ZEND_INTERNAL_FUNCTION);
1640 zend_hash_str_add_mem(EG(function_table), p->save_func, strlen(p->save_func), orig, sizeof(zend_internal_function));
1641 function_add_ref(orig);
1642
1643 if (zend_hash_str_update_mem(EG(function_table), p->orig_func, strlen(p->orig_func), func, sizeof(zend_internal_function)) == NULL) {
1644 php_error_docref("ref.mbstring", E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
1645 return FAILURE;
1646 }
1647
1648 function_add_ref(func);
1649 }
1650 }
1651 p++;
1652 }
1653 }
1654 #if HAVE_MBREGEX
1655 PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1656 #endif
1657 zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding));
1658
1659 return SUCCESS;
1660 }
1661 /* }}} */
1662
1663 /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
PHP_RSHUTDOWN_FUNCTION(mbstring)1664 PHP_RSHUTDOWN_FUNCTION(mbstring)
1665 {
1666 const struct mb_overload_def *p;
1667 zend_function *orig;
1668
1669 if (MBSTRG(current_detect_order_list) != NULL) {
1670 efree(MBSTRG(current_detect_order_list));
1671 MBSTRG(current_detect_order_list) = NULL;
1672 MBSTRG(current_detect_order_list_size) = 0;
1673 }
1674 if (MBSTRG(outconv) != NULL) {
1675 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1676 mbfl_buffer_converter_delete(MBSTRG(outconv));
1677 MBSTRG(outconv) = NULL;
1678 }
1679
1680 /* clear http input identification. */
1681 MBSTRG(http_input_identify) = NULL;
1682 MBSTRG(http_input_identify_post) = NULL;
1683 MBSTRG(http_input_identify_get) = NULL;
1684 MBSTRG(http_input_identify_cookie) = NULL;
1685 MBSTRG(http_input_identify_string) = NULL;
1686
1687 /* clear overloaded function. */
1688 if (MBSTRG(func_overload)){
1689 p = &(mb_ovld[0]);
1690 while (p->type > 0) {
1691 if ((MBSTRG(func_overload) & p->type) == p->type &&
1692 (orig = zend_hash_str_find_ptr(EG(function_table), p->save_func, strlen(p->save_func)))) {
1693
1694 zend_hash_str_update_mem(EG(function_table), p->orig_func, strlen(p->orig_func), orig, sizeof(zend_internal_function));
1695 function_add_ref(orig);
1696 zend_hash_str_del(EG(function_table), p->save_func, strlen(p->save_func));
1697 }
1698 p++;
1699 }
1700 CG(compiler_options) &= ~ZEND_COMPILE_NO_BUILTIN_STRLEN;
1701 }
1702
1703 #if HAVE_MBREGEX
1704 PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1705 #endif
1706
1707 return SUCCESS;
1708 }
1709 /* }}} */
1710
1711 /* {{{ PHP_MINFO_FUNCTION(mbstring) */
PHP_MINFO_FUNCTION(mbstring)1712 PHP_MINFO_FUNCTION(mbstring)
1713 {
1714 php_info_print_table_start();
1715 php_info_print_table_row(2, "Multibyte Support", "enabled");
1716 php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1717 php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1718 {
1719 char tmp[256];
1720 snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1721 php_info_print_table_row(2, "libmbfl version", tmp);
1722 }
1723 #if HAVE_ONIG
1724 {
1725 char tmp[256];
1726 snprintf(tmp, sizeof(tmp), "%d.%d.%d", ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY);
1727 php_info_print_table_row(2, "oniguruma version", tmp);
1728 }
1729 #endif
1730 php_info_print_table_end();
1731
1732 php_info_print_table_start();
1733 php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1734 php_info_print_table_end();
1735
1736 #if HAVE_MBREGEX
1737 PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1738 #endif
1739
1740 DISPLAY_INI_ENTRIES();
1741 }
1742 /* }}} */
1743
1744 /* {{{ proto string mb_language([string language])
1745 Sets the current language or Returns the current language as a string */
PHP_FUNCTION(mb_language)1746 PHP_FUNCTION(mb_language)
1747 {
1748 zend_string *name = NULL;
1749
1750 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|S", &name) == FAILURE) {
1751 return;
1752 }
1753 if (name == NULL) {
1754 RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)));
1755 } else {
1756 zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0);
1757 if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1758 php_error_docref(NULL, E_WARNING, "Unknown language \"%s\"", ZSTR_VAL(name));
1759 RETVAL_FALSE;
1760 } else {
1761 RETVAL_TRUE;
1762 }
1763 zend_string_release(ini_name);
1764 }
1765 }
1766 /* }}} */
1767
1768 /* {{{ proto string mb_internal_encoding([string encoding])
1769 Sets the current internal encoding or Returns the current internal encoding as a string */
PHP_FUNCTION(mb_internal_encoding)1770 PHP_FUNCTION(mb_internal_encoding)
1771 {
1772 const char *name = NULL;
1773 size_t name_len;
1774 const mbfl_encoding *encoding;
1775
1776 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
1777 return;
1778 }
1779 if (name == NULL) {
1780 name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
1781 if (name != NULL) {
1782 RETURN_STRING(name);
1783 } else {
1784 RETURN_FALSE;
1785 }
1786 } else {
1787 encoding = mbfl_name2encoding(name);
1788 if (!encoding) {
1789 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
1790 RETURN_FALSE;
1791 } else {
1792 MBSTRG(current_internal_encoding) = encoding;
1793 RETURN_TRUE;
1794 }
1795 }
1796 }
1797 /* }}} */
1798
1799 /* {{{ proto mixed mb_http_input([string type])
1800 Returns the input encoding */
PHP_FUNCTION(mb_http_input)1801 PHP_FUNCTION(mb_http_input)
1802 {
1803 char *typ = NULL;
1804 size_t typ_len;
1805 int retname;
1806 char *list, *temp;
1807 const mbfl_encoding *result = NULL;
1808
1809 retname = 1;
1810 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
1811 return;
1812 }
1813 if (typ == NULL) {
1814 result = MBSTRG(http_input_identify);
1815 } else {
1816 switch (*typ) {
1817 case 'G':
1818 case 'g':
1819 result = MBSTRG(http_input_identify_get);
1820 break;
1821 case 'P':
1822 case 'p':
1823 result = MBSTRG(http_input_identify_post);
1824 break;
1825 case 'C':
1826 case 'c':
1827 result = MBSTRG(http_input_identify_cookie);
1828 break;
1829 case 'S':
1830 case 's':
1831 result = MBSTRG(http_input_identify_string);
1832 break;
1833 case 'I':
1834 case 'i':
1835 {
1836 const mbfl_encoding **entry = MBSTRG(http_input_list);
1837 const size_t n = MBSTRG(http_input_list_size);
1838 size_t i;
1839 array_init(return_value);
1840 for (i = 0; i < n; i++) {
1841 add_next_index_string(return_value, (*entry)->name);
1842 entry++;
1843 }
1844 retname = 0;
1845 }
1846 break;
1847 case 'L':
1848 case 'l':
1849 {
1850 const mbfl_encoding **entry = MBSTRG(http_input_list);
1851 const size_t n = MBSTRG(http_input_list_size);
1852 size_t i;
1853 list = NULL;
1854 for (i = 0; i < n; i++) {
1855 if (list) {
1856 temp = list;
1857 spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
1858 efree(temp);
1859 if (!list) {
1860 break;
1861 }
1862 } else {
1863 list = estrdup((*entry)->name);
1864 }
1865 entry++;
1866 }
1867 }
1868 if (!list) {
1869 RETURN_FALSE;
1870 }
1871 RETVAL_STRING(list);
1872 efree(list);
1873 retname = 0;
1874 break;
1875 default:
1876 result = MBSTRG(http_input_identify);
1877 break;
1878 }
1879 }
1880
1881 if (retname) {
1882 if (result) {
1883 RETVAL_STRING(result->name);
1884 } else {
1885 RETVAL_FALSE;
1886 }
1887 }
1888 }
1889 /* }}} */
1890
1891 /* {{{ proto string mb_http_output([string encoding])
1892 Sets the current output_encoding or returns the current output_encoding as a string */
PHP_FUNCTION(mb_http_output)1893 PHP_FUNCTION(mb_http_output)
1894 {
1895 const char *name = NULL;
1896 size_t name_len;
1897 const mbfl_encoding *encoding;
1898
1899 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
1900 return;
1901 }
1902
1903 if (name == NULL) {
1904 name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
1905 if (name != NULL) {
1906 RETURN_STRING(name);
1907 } else {
1908 RETURN_FALSE;
1909 }
1910 } else {
1911 encoding = mbfl_name2encoding(name);
1912 if (!encoding) {
1913 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
1914 RETURN_FALSE;
1915 } else {
1916 MBSTRG(current_http_output_encoding) = encoding;
1917 RETURN_TRUE;
1918 }
1919 }
1920 }
1921 /* }}} */
1922
1923 /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1924 Sets the current detect_order or Return the current detect_order as a array */
PHP_FUNCTION(mb_detect_order)1925 PHP_FUNCTION(mb_detect_order)
1926 {
1927 zval *arg1 = NULL;
1928
1929 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
1930 return;
1931 }
1932
1933 if (!arg1) {
1934 size_t i;
1935 size_t n = MBSTRG(current_detect_order_list_size);
1936 const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1937 array_init(return_value);
1938 for (i = 0; i < n; i++) {
1939 add_next_index_string(return_value, (*entry)->name);
1940 entry++;
1941 }
1942 } else {
1943 const mbfl_encoding **list = NULL;
1944 size_t size = 0;
1945 switch (Z_TYPE_P(arg1)) {
1946 case IS_ARRAY:
1947 if (FAILURE == php_mb_parse_encoding_array(arg1, &list, &size, 0)) {
1948 if (list) {
1949 efree(list);
1950 }
1951 RETURN_FALSE;
1952 }
1953 break;
1954 default:
1955 convert_to_string_ex(arg1);
1956 if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(arg1), Z_STRLEN_P(arg1), &list, &size, 0)) {
1957 if (list) {
1958 efree(list);
1959 }
1960 RETURN_FALSE;
1961 }
1962 break;
1963 }
1964
1965 if (list == NULL) {
1966 RETURN_FALSE;
1967 }
1968
1969 if (MBSTRG(current_detect_order_list)) {
1970 efree(MBSTRG(current_detect_order_list));
1971 }
1972 MBSTRG(current_detect_order_list) = list;
1973 MBSTRG(current_detect_order_list_size) = size;
1974 RETURN_TRUE;
1975 }
1976 }
1977 /* }}} */
1978
php_mb_check_code_point(long cp)1979 static inline int php_mb_check_code_point(long cp)
1980 {
1981 if (cp <= 0 || cp >= 0x110000) {
1982 /* Out of Unicode range */
1983 return 0;
1984 }
1985
1986 if (cp >= 0xd800 && cp <= 0xdfff) {
1987 /* Surrogate code-point. These are never valid on their own and we only allow a single
1988 * substitute character. */
1989 return 0;
1990 }
1991
1992 /* As the we do not know the target encoding of the conversion operation that is going to
1993 * use the substitution character, we cannot check whether the codepoint is actually mapped
1994 * in the given encoding at this point. Thus we have to accept everything. */
1995 return 1;
1996 }
1997
1998 /* {{{ proto mixed mb_substitute_character([mixed substchar])
1999 Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)2000 PHP_FUNCTION(mb_substitute_character)
2001 {
2002 zval *arg1 = NULL;
2003
2004 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
2005 return;
2006 }
2007
2008 if (!arg1) {
2009 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
2010 RETURN_STRING("none");
2011 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
2012 RETURN_STRING("long");
2013 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
2014 RETURN_STRING("entity");
2015 } else {
2016 RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
2017 }
2018 } else {
2019 RETVAL_TRUE;
2020
2021 switch (Z_TYPE_P(arg1)) {
2022 case IS_STRING:
2023 if (strncasecmp("none", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2024 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
2025 } else if (strncasecmp("long", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2026 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
2027 } else if (strncasecmp("entity", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2028 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
2029 } else {
2030 convert_to_long_ex(arg1);
2031
2032 if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
2033 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2034 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2035 } else {
2036 php_error_docref(NULL, E_WARNING, "Unknown character");
2037 RETURN_FALSE;
2038 }
2039 }
2040 break;
2041 default:
2042 convert_to_long_ex(arg1);
2043 if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
2044 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2045 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2046 } else {
2047 php_error_docref(NULL, E_WARNING, "Unknown character");
2048 RETURN_FALSE;
2049 }
2050 break;
2051 }
2052 }
2053 }
2054 /* }}} */
2055
2056 /* {{{ proto string mb_preferred_mime_name(string encoding)
2057 Return the preferred MIME name (charset) as a string */
PHP_FUNCTION(mb_preferred_mime_name)2058 PHP_FUNCTION(mb_preferred_mime_name)
2059 {
2060 enum mbfl_no_encoding no_encoding;
2061 char *name = NULL;
2062 size_t name_len;
2063
2064 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
2065 return;
2066 } else {
2067 no_encoding = mbfl_name2no_encoding(name);
2068 if (no_encoding == mbfl_no_encoding_invalid) {
2069 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
2070 RETVAL_FALSE;
2071 } else {
2072 const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
2073 if (preferred_name == NULL || *preferred_name == '\0') {
2074 php_error_docref(NULL, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
2075 RETVAL_FALSE;
2076 } else {
2077 RETVAL_STRING((char *)preferred_name);
2078 }
2079 }
2080 }
2081 }
2082 /* }}} */
2083
2084 #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
2085 #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
2086
2087 /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
2088 Parses GET/POST/COOKIE data and sets global variables */
PHP_FUNCTION(mb_parse_str)2089 PHP_FUNCTION(mb_parse_str)
2090 {
2091 zval *track_vars_array = NULL;
2092 char *encstr = NULL;
2093 size_t encstr_len;
2094 php_mb_encoding_handler_info_t info;
2095 const mbfl_encoding *detected;
2096
2097 track_vars_array = NULL;
2098 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z/", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
2099 return;
2100 }
2101
2102 if (track_vars_array != NULL) {
2103 /* Clear out the array */
2104 zval_dtor(track_vars_array);
2105 array_init(track_vars_array);
2106 }
2107
2108 encstr = estrndup(encstr, encstr_len);
2109
2110 info.data_type = PARSE_STRING;
2111 info.separator = PG(arg_separator).input;
2112 info.report_errors = 1;
2113 info.to_encoding = MBSTRG(current_internal_encoding);
2114 info.to_language = MBSTRG(language);
2115 info.from_encodings = MBSTRG(http_input_list);
2116 info.num_from_encodings = MBSTRG(http_input_list_size);
2117 info.from_language = MBSTRG(language);
2118
2119 if (track_vars_array != NULL) {
2120 detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr);
2121 } else {
2122 zval tmp;
2123 zend_array *symbol_table;
2124 if (zend_forbid_dynamic_call("mb_parse_str() with a single argument") == FAILURE) {
2125 efree(encstr);
2126 return;
2127 }
2128
2129 php_error_docref(NULL, E_DEPRECATED, "Calling mb_parse_str() without the result argument is deprecated");
2130
2131 symbol_table = zend_rebuild_symbol_table();
2132 ZVAL_ARR(&tmp, symbol_table);
2133 detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr);
2134 }
2135
2136 MBSTRG(http_input_identify) = detected;
2137
2138 RETVAL_BOOL(detected);
2139
2140 if (encstr != NULL) efree(encstr);
2141 }
2142 /* }}} */
2143
2144 /* {{{ proto string mb_output_handler(string contents, int status)
2145 Returns string in output buffer converted to the http_output encoding */
PHP_FUNCTION(mb_output_handler)2146 PHP_FUNCTION(mb_output_handler)
2147 {
2148 char *arg_string;
2149 size_t arg_string_len;
2150 zend_long arg_status;
2151 mbfl_string string, result;
2152 const char *charset;
2153 char *p;
2154 const mbfl_encoding *encoding;
2155 int last_feed, len;
2156 unsigned char send_text_mimetype = 0;
2157 char *s, *mimetype = NULL;
2158
2159 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
2160 return;
2161 }
2162
2163 encoding = MBSTRG(current_http_output_encoding);
2164
2165 /* start phase only */
2166 if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
2167 /* delete the converter just in case. */
2168 if (MBSTRG(outconv)) {
2169 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2170 mbfl_buffer_converter_delete(MBSTRG(outconv));
2171 MBSTRG(outconv) = NULL;
2172 }
2173 if (encoding == &mbfl_encoding_pass) {
2174 RETURN_STRINGL(arg_string, arg_string_len);
2175 }
2176
2177 /* analyze mime type */
2178 if (SG(sapi_headers).mimetype &&
2179 _php_mb_match_regex(
2180 MBSTRG(http_output_conv_mimetypes),
2181 SG(sapi_headers).mimetype,
2182 strlen(SG(sapi_headers).mimetype))) {
2183 if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
2184 mimetype = estrdup(SG(sapi_headers).mimetype);
2185 } else {
2186 mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
2187 }
2188 send_text_mimetype = 1;
2189 } else if (SG(sapi_headers).send_default_content_type) {
2190 mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
2191 }
2192
2193 /* if content-type is not yet set, set it and activate the converter */
2194 if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
2195 charset = encoding->mime_name;
2196 if (charset) {
2197 len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
2198 if (sapi_add_header(p, len, 0) != FAILURE) {
2199 SG(sapi_headers).send_default_content_type = 0;
2200 }
2201 }
2202 /* activate the converter */
2203 MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0);
2204 if (send_text_mimetype){
2205 efree(mimetype);
2206 }
2207 }
2208 }
2209
2210 /* just return if the converter is not activated. */
2211 if (MBSTRG(outconv) == NULL) {
2212 RETURN_STRINGL(arg_string, arg_string_len);
2213 }
2214
2215 /* flag */
2216 last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
2217 /* mode */
2218 mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
2219 mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
2220
2221 /* feed the string */
2222 mbfl_string_init(&string);
2223 /* these are not needed. convd has encoding info.
2224 string.no_language = MBSTRG(language);
2225 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2226 */
2227 string.val = (unsigned char *)arg_string;
2228 string.len = arg_string_len;
2229 mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2230 if (last_feed) {
2231 mbfl_buffer_converter_flush(MBSTRG(outconv));
2232 }
2233 /* get the converter output, and return it */
2234 mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2235 // TODO: avoid reallocation ???
2236 RETVAL_STRINGL((char *)result.val, result.len); /* the string is already strdup()'ed */
2237 efree(result.val);
2238
2239 /* delete the converter if it is the last feed. */
2240 if (last_feed) {
2241 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2242 mbfl_buffer_converter_delete(MBSTRG(outconv));
2243 MBSTRG(outconv) = NULL;
2244 }
2245 }
2246 /* }}} */
2247
2248 /* {{{ proto int mb_strlen(string str [, string encoding])
2249 Get character numbers of a string */
PHP_FUNCTION(mb_strlen)2250 PHP_FUNCTION(mb_strlen)
2251 {
2252 int n;
2253 mbfl_string string;
2254 char *enc_name = NULL;
2255 size_t enc_name_len, string_len;
2256
2257 mbfl_string_init(&string);
2258
2259 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string_len, &enc_name, &enc_name_len) == FAILURE) {
2260 return;
2261 }
2262
2263 if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
2264 php_error_docref(NULL, E_WARNING, "String overflows the max allowed length of %u", UINT_MAX);
2265 return;
2266 }
2267
2268 string.len = (uint32_t)string_len;
2269
2270 string.no_language = MBSTRG(language);
2271 if (enc_name == NULL) {
2272 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2273 } else {
2274 string.no_encoding = mbfl_name2no_encoding(enc_name);
2275 if (string.no_encoding == mbfl_no_encoding_invalid) {
2276 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2277 RETURN_FALSE;
2278 }
2279 }
2280
2281 n = mbfl_strlen(&string);
2282 if (n >= 0) {
2283 RETVAL_LONG(n);
2284 } else {
2285 RETVAL_FALSE;
2286 }
2287 }
2288 /* }}} */
2289
2290 /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2291 Find position of first occurrence of a string within another */
PHP_FUNCTION(mb_strpos)2292 PHP_FUNCTION(mb_strpos)
2293 {
2294 int n, reverse = 0;
2295 zend_long offset = 0, slen;
2296 mbfl_string haystack, needle;
2297 char *enc_name = NULL;
2298 size_t enc_name_len, haystack_len, needle_len;
2299
2300 mbfl_string_init(&haystack);
2301 mbfl_string_init(&needle);
2302 haystack.no_language = MBSTRG(language);
2303 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2304 needle.no_language = MBSTRG(language);
2305 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2306
2307 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2308 return;
2309 }
2310
2311 if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2312 php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2313 return;
2314 } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2315 php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2316 return;
2317 }
2318
2319 haystack.len = (uint32_t)haystack_len;
2320 needle.len = (uint32_t)needle_len;
2321
2322 if (enc_name != NULL) {
2323 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2324 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2325 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2326 RETURN_FALSE;
2327 }
2328 }
2329
2330 slen = mbfl_strlen(&haystack);
2331 if (offset < 0) {
2332 offset += slen;
2333 }
2334 if (offset < 0 || offset > slen) {
2335 php_error_docref(NULL, E_WARNING, "Offset not contained in string");
2336 RETURN_FALSE;
2337 }
2338 if (needle.len == 0) {
2339 php_error_docref(NULL, E_WARNING, "Empty delimiter");
2340 RETURN_FALSE;
2341 }
2342
2343 n = mbfl_strpos(&haystack, &needle, offset, reverse);
2344 if (n >= 0) {
2345 RETVAL_LONG(n);
2346 } else {
2347 switch (-n) {
2348 case 1:
2349 break;
2350 case 2:
2351 php_error_docref(NULL, E_WARNING, "Needle has not positive length");
2352 break;
2353 case 4:
2354 php_error_docref(NULL, E_WARNING, "Unknown encoding or conversion error");
2355 break;
2356 case 8:
2357 php_error_docref(NULL, E_NOTICE, "Argument is empty");
2358 break;
2359 default:
2360 php_error_docref(NULL, E_WARNING, "Unknown error in mb_strpos");
2361 break;
2362 }
2363 RETVAL_FALSE;
2364 }
2365 }
2366 /* }}} */
2367
2368 /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2369 Find position of last occurrence of a string within another */
PHP_FUNCTION(mb_strrpos)2370 PHP_FUNCTION(mb_strrpos)
2371 {
2372 int n;
2373 mbfl_string haystack, needle;
2374 char *enc_name = NULL;
2375 size_t enc_name_len, haystack_len, needle_len;
2376 zval *zoffset = NULL;
2377 long offset = 0, str_flg;
2378 char *enc_name2 = NULL;
2379 int enc_name_len2;
2380
2381 mbfl_string_init(&haystack);
2382 mbfl_string_init(&needle);
2383 haystack.no_language = MBSTRG(language);
2384 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2385 needle.no_language = MBSTRG(language);
2386 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2387
2388 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|zs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2389 return;
2390 }
2391
2392 if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2393 php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2394 return;
2395 } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2396 php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2397 return;
2398 }
2399
2400 haystack.len = (uint32_t)haystack_len;
2401 needle.len = (uint32_t)needle_len;
2402
2403 if (zoffset) {
2404 if (Z_TYPE_P(zoffset) == IS_STRING) {
2405 enc_name2 = Z_STRVAL_P(zoffset);
2406 enc_name_len2 = Z_STRLEN_P(zoffset);
2407 str_flg = 1;
2408
2409 if (enc_name2 != NULL) {
2410 switch (*enc_name2) {
2411 case '0':
2412 case '1':
2413 case '2':
2414 case '3':
2415 case '4':
2416 case '5':
2417 case '6':
2418 case '7':
2419 case '8':
2420 case '9':
2421 case ' ':
2422 case '-':
2423 case '.':
2424 break;
2425 default :
2426 str_flg = 0;
2427 break;
2428 }
2429 }
2430
2431 if (str_flg) {
2432 convert_to_long_ex(zoffset);
2433 offset = Z_LVAL_P(zoffset);
2434 } else {
2435 enc_name = enc_name2;
2436 enc_name_len = enc_name_len2;
2437 }
2438 } else {
2439 convert_to_long_ex(zoffset);
2440 offset = Z_LVAL_P(zoffset);
2441 }
2442 }
2443
2444 if (enc_name != NULL) {
2445 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2446 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2447 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2448 RETURN_FALSE;
2449 }
2450 }
2451
2452 if (haystack.len <= 0) {
2453 RETURN_FALSE;
2454 }
2455 if (needle.len <= 0) {
2456 RETURN_FALSE;
2457 }
2458
2459 {
2460 int haystack_char_len = mbfl_strlen(&haystack);
2461 if ((offset > 0 && offset > haystack_char_len) ||
2462 (offset < 0 && -offset > haystack_char_len)) {
2463 php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
2464 RETURN_FALSE;
2465 }
2466 }
2467
2468 n = mbfl_strpos(&haystack, &needle, offset, 1);
2469 if (n >= 0) {
2470 RETVAL_LONG(n);
2471 } else {
2472 RETVAL_FALSE;
2473 }
2474 }
2475 /* }}} */
2476
2477 /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2478 Finds position of first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stripos)2479 PHP_FUNCTION(mb_stripos)
2480 {
2481 int n = -1;
2482 zend_long offset = 0;
2483 mbfl_string haystack, needle;
2484 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2485 size_t from_encoding_len, haystack_len, needle_len;
2486
2487 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2488 return;
2489 }
2490
2491 if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2492 php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2493 return;
2494 } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2495 php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2496 return;
2497 }
2498
2499 haystack.len = (uint32_t)haystack_len;
2500 needle.len = (uint32_t)needle_len;
2501
2502 if (needle.len == 0) {
2503 php_error_docref(NULL, E_WARNING, "Empty delimiter");
2504 RETURN_FALSE;
2505 }
2506 n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2507
2508 if (n >= 0) {
2509 RETVAL_LONG(n);
2510 } else {
2511 RETVAL_FALSE;
2512 }
2513 }
2514 /* }}} */
2515
2516 /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2517 Finds position of last occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_strripos)2518 PHP_FUNCTION(mb_strripos)
2519 {
2520 int n = -1;
2521 zend_long offset = 0;
2522 mbfl_string haystack, needle;
2523 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2524 size_t from_encoding_len, haystack_len, needle_len;
2525
2526 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2527 return;
2528 }
2529
2530 if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2531 php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2532 return;
2533 } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2534 php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2535 return;
2536 }
2537
2538 haystack.len = (uint32_t)haystack_len;
2539 needle.len = (uint32_t)needle_len;
2540
2541 n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2542
2543 if (n >= 0) {
2544 RETVAL_LONG(n);
2545 } else {
2546 RETVAL_FALSE;
2547 }
2548 }
2549 /* }}} */
2550
2551 /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2552 Finds first occurrence of a string within another */
PHP_FUNCTION(mb_strstr)2553 PHP_FUNCTION(mb_strstr)
2554 {
2555 int n, len, mblen;
2556 mbfl_string haystack, needle, result, *ret = NULL;
2557 char *enc_name = NULL;
2558 size_t enc_name_len, haystack_len, needle_len;
2559 zend_bool part = 0;
2560
2561 mbfl_string_init(&haystack);
2562 mbfl_string_init(&needle);
2563 haystack.no_language = MBSTRG(language);
2564 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2565 needle.no_language = MBSTRG(language);
2566 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2567
2568 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &enc_name, &enc_name_len) == FAILURE) {
2569 return;
2570 }
2571
2572 if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2573 php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2574 return;
2575 } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2576 php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2577 return;
2578 }
2579
2580 haystack.len = (uint32_t)haystack_len;
2581 needle.len = (uint32_t)needle_len;
2582
2583 if (enc_name != NULL) {
2584 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2585 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2586 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2587 RETURN_FALSE;
2588 }
2589 }
2590
2591 if (needle.len <= 0) {
2592 php_error_docref(NULL, E_WARNING, "Empty delimiter");
2593 RETURN_FALSE;
2594 }
2595 n = mbfl_strpos(&haystack, &needle, 0, 0);
2596 if (n >= 0) {
2597 mblen = mbfl_strlen(&haystack);
2598 if (part) {
2599 ret = mbfl_substr(&haystack, &result, 0, n);
2600 if (ret != NULL) {
2601 // TODO: avoid reallocation ???
2602 RETVAL_STRINGL((char *)ret->val, ret->len);
2603 efree(ret->val);
2604 } else {
2605 RETVAL_FALSE;
2606 }
2607 } else {
2608 len = (mblen - n);
2609 ret = mbfl_substr(&haystack, &result, n, len);
2610 if (ret != NULL) {
2611 // TODO: avoid reallocation ???
2612 RETVAL_STRINGL((char *)ret->val, ret->len);
2613 efree(ret->val);
2614 } else {
2615 RETVAL_FALSE;
2616 }
2617 }
2618 } else {
2619 RETVAL_FALSE;
2620 }
2621 }
2622 /* }}} */
2623
2624 /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2625 Finds the last occurrence of a character in a string within another */
PHP_FUNCTION(mb_strrchr)2626 PHP_FUNCTION(mb_strrchr)
2627 {
2628 int n, len, mblen;
2629 mbfl_string haystack, needle, result, *ret = NULL;
2630 char *enc_name = NULL;
2631 size_t enc_name_len, haystack_len, needle_len;
2632 zend_bool part = 0;
2633
2634 mbfl_string_init(&haystack);
2635 mbfl_string_init(&needle);
2636 haystack.no_language = MBSTRG(language);
2637 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2638 needle.no_language = MBSTRG(language);
2639 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2640
2641 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &enc_name, &enc_name_len) == FAILURE) {
2642 return;
2643 }
2644
2645 if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2646 php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2647 return;
2648 } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2649 php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2650 return;
2651 }
2652
2653 haystack.len = (uint32_t)haystack_len;
2654 needle.len = (uint32_t)needle_len;
2655
2656 if (enc_name != NULL) {
2657 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2658 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2659 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2660 RETURN_FALSE;
2661 }
2662 }
2663
2664 if (haystack.len <= 0) {
2665 RETURN_FALSE;
2666 }
2667 if (needle.len <= 0) {
2668 RETURN_FALSE;
2669 }
2670 n = mbfl_strpos(&haystack, &needle, 0, 1);
2671 if (n >= 0) {
2672 mblen = mbfl_strlen(&haystack);
2673 if (part) {
2674 ret = mbfl_substr(&haystack, &result, 0, n);
2675 if (ret != NULL) {
2676 // TODO: avoid reallocation ???
2677 RETVAL_STRINGL((char *)ret->val, ret->len);
2678 efree(ret->val);
2679 } else {
2680 RETVAL_FALSE;
2681 }
2682 } else {
2683 len = (mblen - n);
2684 ret = mbfl_substr(&haystack, &result, n, len);
2685 if (ret != NULL) {
2686 // TODO: avoid reallocation ???
2687 RETVAL_STRINGL((char *)ret->val, ret->len);
2688 efree(ret->val);
2689 } else {
2690 RETVAL_FALSE;
2691 }
2692 }
2693 } else {
2694 RETVAL_FALSE;
2695 }
2696 }
2697 /* }}} */
2698
2699 /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2700 Finds first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stristr)2701 PHP_FUNCTION(mb_stristr)
2702 {
2703 zend_bool part = 0;
2704 size_t from_encoding_len, len, mblen, haystack_len, needle_len;
2705 int n;
2706 mbfl_string haystack, needle, result, *ret = NULL;
2707 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2708 mbfl_string_init(&haystack);
2709 mbfl_string_init(&needle);
2710 haystack.no_language = MBSTRG(language);
2711 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2712 needle.no_language = MBSTRG(language);
2713 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2714
2715
2716 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2717 return;
2718 }
2719
2720 if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2721 php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2722 return;
2723 } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2724 php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2725 return;
2726 }
2727
2728 haystack.len = (uint32_t)haystack_len;
2729 needle.len = (uint32_t)needle_len;
2730
2731 if (!needle.len) {
2732 php_error_docref(NULL, E_WARNING, "Empty delimiter");
2733 RETURN_FALSE;
2734 }
2735
2736 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2737 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2738 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2739 RETURN_FALSE;
2740 }
2741
2742 n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2743
2744 if (n <0) {
2745 RETURN_FALSE;
2746 }
2747
2748 mblen = mbfl_strlen(&haystack);
2749
2750 if (part) {
2751 ret = mbfl_substr(&haystack, &result, 0, n);
2752 if (ret != NULL) {
2753 // TODO: avoid reallocation ???
2754 RETVAL_STRINGL((char *)ret->val, ret->len);
2755 efree(ret->val);
2756 } else {
2757 RETVAL_FALSE;
2758 }
2759 } else {
2760 len = (mblen - n);
2761 ret = mbfl_substr(&haystack, &result, n, len);
2762 if (ret != NULL) {
2763 // TODO: avoid reallocaton ???
2764 RETVAL_STRINGL((char *)ret->val, ret->len);
2765 efree(ret->val);
2766 } else {
2767 RETVAL_FALSE;
2768 }
2769 }
2770 }
2771 /* }}} */
2772
2773 /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2774 Finds the last occurrence of a character in a string within another, case insensitive */
PHP_FUNCTION(mb_strrichr)2775 PHP_FUNCTION(mb_strrichr)
2776 {
2777 zend_bool part = 0;
2778 int n, len, mblen;
2779 size_t from_encoding_len, haystack_len, needle_len;
2780 mbfl_string haystack, needle, result, *ret = NULL;
2781 const char *from_encoding = MBSTRG(current_internal_encoding)->name;
2782 mbfl_string_init(&haystack);
2783 mbfl_string_init(&needle);
2784 haystack.no_language = MBSTRG(language);
2785 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2786 needle.no_language = MBSTRG(language);
2787 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2788
2789
2790 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2791 return;
2792 }
2793
2794 if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2795 php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2796 return;
2797 } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2798 php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2799 return;
2800 }
2801
2802 haystack.len = (uint32_t)haystack_len;
2803 needle.len = (uint32_t)needle_len;
2804
2805 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2806 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2807 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2808 RETURN_FALSE;
2809 }
2810
2811 n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2812
2813 if (n <0) {
2814 RETURN_FALSE;
2815 }
2816
2817 mblen = mbfl_strlen(&haystack);
2818
2819 if (part) {
2820 ret = mbfl_substr(&haystack, &result, 0, n);
2821 if (ret != NULL) {
2822 // TODO: avoid reallocation ???
2823 RETVAL_STRINGL((char *)ret->val, ret->len);
2824 efree(ret->val);
2825 } else {
2826 RETVAL_FALSE;
2827 }
2828 } else {
2829 len = (mblen - n);
2830 ret = mbfl_substr(&haystack, &result, n, len);
2831 if (ret != NULL) {
2832 // TODO: avoid reallocation ???
2833 RETVAL_STRINGL((char *)ret->val, ret->len);
2834 efree(ret->val);
2835 } else {
2836 RETVAL_FALSE;
2837 }
2838 }
2839 }
2840 /* }}} */
2841
2842 /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2843 Count the number of substring occurrences */
PHP_FUNCTION(mb_substr_count)2844 PHP_FUNCTION(mb_substr_count)
2845 {
2846 int n;
2847 mbfl_string haystack, needle;
2848 char *enc_name = NULL;
2849 size_t enc_name_len, haystack_len, needle_len;
2850
2851 mbfl_string_init(&haystack);
2852 mbfl_string_init(&needle);
2853 haystack.no_language = MBSTRG(language);
2854 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2855 needle.no_language = MBSTRG(language);
2856 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2857
2858 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &enc_name, &enc_name_len) == FAILURE) {
2859 return;
2860 }
2861
2862 if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2863 php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2864 return;
2865 } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2866 php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2867 return;
2868 }
2869
2870 haystack.len = (uint32_t)haystack_len;
2871 needle.len = (uint32_t)needle_len;
2872
2873 if (enc_name != NULL) {
2874 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2875 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2876 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2877 RETURN_FALSE;
2878 }
2879 }
2880
2881 if (needle.len <= 0) {
2882 php_error_docref(NULL, E_WARNING, "Empty substring");
2883 RETURN_FALSE;
2884 }
2885
2886 n = mbfl_substr_count(&haystack, &needle);
2887 if (n >= 0) {
2888 RETVAL_LONG(n);
2889 } else {
2890 RETVAL_FALSE;
2891 }
2892 }
2893 /* }}} */
2894
2895 /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2896 Returns part of a string */
PHP_FUNCTION(mb_substr)2897 PHP_FUNCTION(mb_substr)
2898 {
2899 char *str, *encoding = NULL;
2900 zend_long from, len;
2901 int mblen;
2902 size_t str_len, encoding_len;
2903 zend_bool len_is_null = 1;
2904 mbfl_string string, result, *ret;
2905
2906 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", &str, &str_len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) {
2907 return;
2908 }
2909
2910 mbfl_string_init(&string);
2911 string.no_language = MBSTRG(language);
2912 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2913
2914 if (encoding) {
2915 string.no_encoding = mbfl_name2no_encoding(encoding);
2916 if (string.no_encoding == mbfl_no_encoding_invalid) {
2917 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
2918 RETURN_FALSE;
2919 }
2920 }
2921
2922 string.val = (unsigned char *)str;
2923 string.len = str_len;
2924
2925 if (len_is_null) {
2926 len = str_len;
2927 }
2928
2929 /* measures length */
2930 mblen = 0;
2931 if (from < 0 || len < 0) {
2932 mblen = mbfl_strlen(&string);
2933 }
2934
2935 /* if "from" position is negative, count start position from the end
2936 * of the string
2937 */
2938 if (from < 0) {
2939 from = mblen + from;
2940 if (from < 0) {
2941 from = 0;
2942 }
2943 }
2944
2945 /* if "length" position is negative, set it to the length
2946 * needed to stop that many chars from the end of the string
2947 */
2948 if (len < 0) {
2949 len = (mblen - from) + len;
2950 if (len < 0) {
2951 len = 0;
2952 }
2953 }
2954
2955 if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2956 && (from >= mbfl_strlen(&string))) {
2957 RETURN_FALSE;
2958 }
2959
2960 if (from > INT_MAX) {
2961 from = INT_MAX;
2962 }
2963 if (len > INT_MAX) {
2964 len = INT_MAX;
2965 }
2966
2967 ret = mbfl_substr(&string, &result, from, len);
2968 if (NULL == ret) {
2969 RETURN_FALSE;
2970 }
2971
2972 // TODO: avoid reallocation ???
2973 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2974 efree(ret->val);
2975 }
2976 /* }}} */
2977
2978 /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
2979 Returns part of a string */
PHP_FUNCTION(mb_strcut)2980 PHP_FUNCTION(mb_strcut)
2981 {
2982 char *encoding = NULL;
2983 zend_long from, len;
2984 size_t encoding_len, string_len;
2985 zend_bool len_is_null = 1;
2986 mbfl_string string, result, *ret;
2987
2988 mbfl_string_init(&string);
2989 string.no_language = MBSTRG(language);
2990 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2991
2992 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", (char **)&string.val, &string_len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) {
2993 return;
2994 }
2995
2996 if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
2997 php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
2998 return;
2999 }
3000
3001 string.len = (uint32_t)string_len;
3002
3003 if (encoding) {
3004 string.no_encoding = mbfl_name2no_encoding(encoding);
3005 if (string.no_encoding == mbfl_no_encoding_invalid) {
3006 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
3007 RETURN_FALSE;
3008 }
3009 }
3010
3011 if (len_is_null) {
3012 len = string.len;
3013 }
3014
3015 /* if "from" position is negative, count start position from the end
3016 * of the string
3017 */
3018 if (from < 0) {
3019 from = string.len + from;
3020 if (from < 0) {
3021 from = 0;
3022 }
3023 }
3024
3025 /* if "length" position is negative, set it to the length
3026 * needed to stop that many chars from the end of the string
3027 */
3028 if (len < 0) {
3029 len = (string.len - from) + len;
3030 if (len < 0) {
3031 len = 0;
3032 }
3033 }
3034
3035 if ((unsigned int)from > string.len) {
3036 RETURN_FALSE;
3037 }
3038
3039 ret = mbfl_strcut(&string, &result, from, len);
3040 if (ret == NULL) {
3041 RETURN_FALSE;
3042 }
3043
3044 // TODO: avoid reallocation ???
3045 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3046 efree(ret->val);
3047 }
3048 /* }}} */
3049
3050 /* {{{ proto int mb_strwidth(string str [, string encoding])
3051 Gets terminal width of a string */
PHP_FUNCTION(mb_strwidth)3052 PHP_FUNCTION(mb_strwidth)
3053 {
3054 int n;
3055 mbfl_string string;
3056 char *enc_name = NULL;
3057 size_t enc_name_len, string_len;
3058
3059 mbfl_string_init(&string);
3060
3061 string.no_language = MBSTRG(language);
3062 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3063
3064 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string_len, &enc_name, &enc_name_len) == FAILURE) {
3065 return;
3066 }
3067
3068 if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
3069 php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
3070 return;
3071 }
3072
3073 string.len = (uint32_t)string_len;
3074
3075 if (enc_name != NULL) {
3076 string.no_encoding = mbfl_name2no_encoding(enc_name);
3077 if (string.no_encoding == mbfl_no_encoding_invalid) {
3078 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
3079 RETURN_FALSE;
3080 }
3081 }
3082
3083 n = mbfl_strwidth(&string);
3084 if (n >= 0) {
3085 RETVAL_LONG(n);
3086 } else {
3087 RETVAL_FALSE;
3088 }
3089 }
3090 /* }}} */
3091
3092 /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
3093 Trim the string in terminal width */
PHP_FUNCTION(mb_strimwidth)3094 PHP_FUNCTION(mb_strimwidth)
3095 {
3096 char *str, *trimmarker = NULL, *encoding = NULL;
3097 zend_long from, width, swidth;
3098 size_t str_len, trimmarker_len, encoding_len;
3099 mbfl_string string, result, marker, *ret;
3100
3101 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
3102 return;
3103 }
3104
3105 mbfl_string_init(&string);
3106 mbfl_string_init(&marker);
3107 string.no_language = MBSTRG(language);
3108 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3109 marker.no_language = MBSTRG(language);
3110 marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3111 marker.val = NULL;
3112 marker.len = 0;
3113
3114 if (encoding) {
3115 string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
3116 if (string.no_encoding == mbfl_no_encoding_invalid) {
3117 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
3118 RETURN_FALSE;
3119 }
3120 }
3121
3122 string.val = (unsigned char *)str;
3123 string.len = str_len;
3124
3125 if ((from < 0) || (width < 0)) {
3126 swidth = mbfl_strwidth(&string);
3127 }
3128
3129 if (from < 0) {
3130 from += swidth;
3131 }
3132
3133 if (from < 0 || (size_t)from > str_len) {
3134 php_error_docref(NULL, E_WARNING, "Start position is out of range");
3135 RETURN_FALSE;
3136 }
3137
3138 if (width < 0) {
3139 width = swidth + width - from;
3140 }
3141
3142 if (width < 0) {
3143 php_error_docref(NULL, E_WARNING, "Width is out of range");
3144 RETURN_FALSE;
3145 }
3146
3147 if (trimmarker) {
3148 marker.val = (unsigned char *)trimmarker;
3149 marker.len = trimmarker_len;
3150 }
3151
3152 ret = mbfl_strimwidth(&string, &marker, &result, from, width);
3153
3154 if (ret == NULL) {
3155 RETURN_FALSE;
3156 }
3157 // TODO: avoid reallocation ???
3158 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3159 efree(ret->val);
3160 }
3161 /* }}} */
3162
3163
3164 /* See mbfl_no_encoding definition for list of unsupported encodings */
php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)3165 static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)
3166 {
3167 return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint)
3168 || (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap)
3169 || (no_enc >= mbfl_no_encoding_jis && no_enc <= mbfl_no_encoding_2022jpms)
3170 || (no_enc >= mbfl_no_encoding_cp50220 && no_enc <= mbfl_no_encoding_cp50222));
3171 }
3172
3173
3174 /* See mbfl_no_encoding definition for list of UTF-8 encodings */
php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)3175 static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
3176 {
3177 return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb);
3178 }
3179
3180
3181 /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
php_mb_convert_encoding(const char * input,size_t length,const char * _to_encoding,const char * _from_encodings,size_t * output_len)3182 MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
3183 {
3184 mbfl_string string, result, *ret;
3185 const mbfl_encoding *from_encoding, *to_encoding;
3186 mbfl_buffer_converter *convd;
3187 size_t size;
3188 const mbfl_encoding **list;
3189 char *output=NULL;
3190
3191 if (output_len) {
3192 *output_len = 0;
3193 }
3194 if (!input) {
3195 return NULL;
3196 }
3197 /* new encoding */
3198 if (_to_encoding && strlen(_to_encoding)) {
3199 to_encoding = mbfl_name2encoding(_to_encoding);
3200 if (!to_encoding) {
3201 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
3202 return NULL;
3203 }
3204 } else {
3205 to_encoding = MBSTRG(current_internal_encoding);
3206 }
3207
3208 /* initialize string */
3209 mbfl_string_init(&string);
3210 mbfl_string_init(&result);
3211 from_encoding = MBSTRG(current_internal_encoding);
3212 string.no_encoding = from_encoding->no_encoding;
3213 string.no_language = MBSTRG(language);
3214 string.val = (unsigned char *)input;
3215 string.len = length;
3216
3217 /* pre-conversion encoding */
3218 if (_from_encodings) {
3219 list = NULL;
3220 size = 0;
3221 php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0);
3222 if (size == 1) {
3223 from_encoding = *list;
3224 string.no_encoding = from_encoding->no_encoding;
3225 } else if (size > 1) {
3226 /* auto detect */
3227 from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection));
3228 if (from_encoding) {
3229 string.no_encoding = from_encoding->no_encoding;
3230 } else {
3231 php_error_docref(NULL, E_WARNING, "Unable to detect character encoding");
3232 from_encoding = &mbfl_encoding_pass;
3233 to_encoding = from_encoding;
3234 string.no_encoding = from_encoding->no_encoding;
3235 }
3236 } else {
3237 php_error_docref(NULL, E_WARNING, "Illegal character encoding specified");
3238 }
3239 if (list != NULL) {
3240 efree((void *)list);
3241 }
3242 }
3243
3244 /* initialize converter */
3245 convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len);
3246 if (convd == NULL) {
3247 php_error_docref(NULL, E_WARNING, "Unable to create character encoding converter");
3248 return NULL;
3249 }
3250
3251 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3252 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3253
3254 /* do it */
3255 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3256 if (ret) {
3257 if (output_len) {
3258 *output_len = ret->len;
3259 }
3260 output = (char *)ret->val;
3261 }
3262
3263 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3264 mbfl_buffer_converter_delete(convd);
3265 return output;
3266 }
3267 /* }}} */
3268
php_mb_convert_encoding_recursive(HashTable * input,const char * _to_encoding,const char * _from_encodings)3269 MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const char *_to_encoding, const char *_from_encodings)
3270 {
3271 HashTable *output, *chash;
3272 zend_long idx;
3273 zend_string *key;
3274 zval *entry, entry_tmp;
3275 size_t ckey_len, cval_len;
3276 char *ckey, *cval;
3277
3278 if (!input) {
3279 return NULL;
3280 }
3281
3282 if (input->u.v.nApplyCount++ > 1) {
3283 input->u.v.nApplyCount--;
3284 php_error_docref(NULL, E_WARNING, "Cannot convert recursively referenced values");
3285 return NULL;
3286 }
3287 output = (HashTable *)emalloc(sizeof(HashTable));
3288 zend_hash_init(output, zend_hash_num_elements(input), NULL, ZVAL_PTR_DTOR, 0);
3289 ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) {
3290 /* convert key */
3291 if (key) {
3292 ckey = php_mb_convert_encoding(ZSTR_VAL(key), ZSTR_LEN(key), _to_encoding, _from_encodings, &ckey_len);
3293 key = zend_string_init(ckey, ckey_len, 0);
3294 efree(ckey);
3295 }
3296 /* convert value */
3297 ZEND_ASSERT(entry);
3298 switch(Z_TYPE_P(entry)) {
3299 case IS_STRING:
3300 cval = php_mb_convert_encoding(Z_STRVAL_P(entry), Z_STRLEN_P(entry), _to_encoding, _from_encodings, &cval_len);
3301 ZVAL_STRINGL(&entry_tmp, cval, cval_len);
3302 efree(cval);
3303 break;
3304 case IS_NULL:
3305 case IS_TRUE:
3306 case IS_FALSE:
3307 case IS_LONG:
3308 case IS_DOUBLE:
3309 ZVAL_COPY(&entry_tmp, entry);
3310 break;
3311 case IS_ARRAY:
3312 chash = php_mb_convert_encoding_recursive(HASH_OF(entry), _to_encoding, _from_encodings);
3313 if (!chash) {
3314 chash = (HashTable *)emalloc(sizeof(HashTable));
3315 zend_hash_init(chash, 0, NULL, ZVAL_PTR_DTOR, 0);
3316 }
3317 ZVAL_ARR(&entry_tmp, chash);
3318 break;
3319 case IS_OBJECT:
3320 default:
3321 if (key) {
3322 zend_string_release(key);
3323 }
3324 php_error_docref(NULL, E_WARNING, "Object is not supported");
3325 continue;
3326 }
3327 if (key) {
3328 zend_hash_add(output, key, &entry_tmp);
3329 zend_string_release(key);
3330 } else {
3331 zend_hash_index_add(output, idx, &entry_tmp);
3332 }
3333 } ZEND_HASH_FOREACH_END();
3334 input->u.v.nApplyCount--;
3335
3336 return output;
3337 }
3338 /* }}} */
3339
3340
3341 /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
3342 Returns converted string in desired encoding */
PHP_FUNCTION(mb_convert_encoding)3343 PHP_FUNCTION(mb_convert_encoding)
3344 {
3345 zval *input;
3346 char *arg_new;
3347 size_t new_len;
3348 zval *arg_old = NULL;
3349 size_t size, l, n;
3350 char *_from_encodings = NULL, *ret, *s_free = NULL;
3351
3352 zval *hash_entry;
3353 HashTable *target_hash;
3354
3355 if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z", &input, &arg_new, &new_len, &arg_old) == FAILURE) {
3356 return;
3357 }
3358
3359 if (Z_TYPE_P(input) != IS_STRING && Z_TYPE_P(input) != IS_ARRAY) {
3360 convert_to_string(input);
3361 }
3362
3363 if (arg_old) {
3364 switch (Z_TYPE_P(arg_old)) {
3365 case IS_ARRAY:
3366 target_hash = Z_ARRVAL_P(arg_old);
3367 _from_encodings = NULL;
3368
3369 ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
3370
3371 convert_to_string_ex(hash_entry);
3372
3373 if ( _from_encodings) {
3374 l = strlen(_from_encodings);
3375 n = strlen(Z_STRVAL_P(hash_entry));
3376 _from_encodings = erealloc(_from_encodings, l+n+2);
3377 memcpy(_from_encodings + l, ",", 1);
3378 memcpy(_from_encodings + l + 1, Z_STRVAL_P(hash_entry), Z_STRLEN_P(hash_entry) + 1);
3379 } else {
3380 _from_encodings = estrdup(Z_STRVAL_P(hash_entry));
3381 }
3382 } ZEND_HASH_FOREACH_END();
3383
3384 if (_from_encodings != NULL && !strlen(_from_encodings)) {
3385 efree(_from_encodings);
3386 _from_encodings = NULL;
3387 }
3388 s_free = _from_encodings;
3389 break;
3390 default:
3391 convert_to_string(arg_old);
3392 _from_encodings = Z_STRVAL_P(arg_old);
3393 break;
3394 }
3395 }
3396
3397 if (Z_TYPE_P(input) == IS_STRING) {
3398 /* new encoding */
3399 ret = php_mb_convert_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), arg_new, _from_encodings, &size);
3400 if (ret != NULL) {
3401 // TODO: avoid reallocation ???
3402 RETVAL_STRINGL(ret, size); /* the string is already strdup()'ed */
3403 efree(ret);
3404 } else {
3405 RETVAL_FALSE;
3406 }
3407 if (s_free) {
3408 efree(s_free);
3409 }
3410 } else {
3411 HashTable *tmp;
3412 tmp = php_mb_convert_encoding_recursive(HASH_OF(input), arg_new, _from_encodings);
3413 RETURN_ARR(tmp);
3414 }
3415
3416 return;
3417 }
3418 /* }}} */
3419
3420 /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
3421 Returns a case-folded version of sourcestring */
PHP_FUNCTION(mb_convert_case)3422 PHP_FUNCTION(mb_convert_case)
3423 {
3424 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3425 char *str;
3426 size_t str_len, from_encoding_len;
3427 zend_long case_mode = 0;
3428 char *newstr;
3429 size_t ret_len;
3430
3431 RETVAL_FALSE;
3432 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|s!", &str, &str_len,
3433 &case_mode, &from_encoding, &from_encoding_len) == FAILURE) {
3434 return;
3435 }
3436
3437 newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding);
3438
3439 if (newstr) {
3440 // TODO: avoid reallocation ???
3441 RETVAL_STRINGL(newstr, ret_len);
3442 efree(newstr);
3443 }
3444 }
3445 /* }}} */
3446
3447 /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
3448 * Returns a uppercased version of sourcestring
3449 */
PHP_FUNCTION(mb_strtoupper)3450 PHP_FUNCTION(mb_strtoupper)
3451 {
3452 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3453 char *str;
3454 size_t str_len, from_encoding_len;
3455 char *newstr;
3456 size_t ret_len;
3457
3458 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len,
3459 &from_encoding, &from_encoding_len) == FAILURE) {
3460 return;
3461 }
3462 newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding);
3463
3464 if (newstr) {
3465 // TODO: avoid reallocation ???
3466 RETVAL_STRINGL(newstr, ret_len);
3467 efree(newstr);
3468 return;
3469 }
3470 RETURN_FALSE;
3471 }
3472 /* }}} */
3473
3474 /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
3475 * Returns a lowercased version of sourcestring
3476 */
PHP_FUNCTION(mb_strtolower)3477 PHP_FUNCTION(mb_strtolower)
3478 {
3479 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3480 char *str;
3481 size_t str_len, from_encoding_len;
3482 char *newstr;
3483 size_t ret_len;
3484
3485 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len,
3486 &from_encoding, &from_encoding_len) == FAILURE) {
3487 return;
3488 }
3489 newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding);
3490
3491 if (newstr) {
3492 // TODO: avoid reallocation ???
3493 RETVAL_STRINGL(newstr, ret_len);
3494 efree(newstr);
3495 return;
3496 }
3497 RETURN_FALSE;
3498 }
3499 /* }}} */
3500
3501 /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3502 Encodings of the given string is returned (as a string) */
PHP_FUNCTION(mb_detect_encoding)3503 PHP_FUNCTION(mb_detect_encoding)
3504 {
3505 char *str;
3506 size_t str_len;
3507 zend_bool strict=0;
3508 zval *encoding_list = NULL;
3509
3510 mbfl_string string;
3511 const mbfl_encoding *ret;
3512 const mbfl_encoding **elist, **list;
3513 size_t size;
3514
3515 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z!b", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3516 return;
3517 }
3518
3519 /* make encoding list */
3520 list = NULL;
3521 size = 0;
3522 if (encoding_list) {
3523 switch (Z_TYPE_P(encoding_list)) {
3524 case IS_ARRAY:
3525 if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0)) {
3526 if (list) {
3527 efree(list);
3528 list = NULL;
3529 size = 0;
3530 }
3531 }
3532 break;
3533 default:
3534 convert_to_string(encoding_list);
3535 if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0)) {
3536 if (list) {
3537 efree(list);
3538 list = NULL;
3539 size = 0;
3540 }
3541 }
3542 break;
3543 }
3544 if (size <= 0) {
3545 php_error_docref(NULL, E_WARNING, "Illegal argument");
3546 }
3547 }
3548
3549 if (ZEND_NUM_ARGS() < 3) {
3550 strict = (zend_bool)MBSTRG(strict_detection);
3551 }
3552
3553 if (size > 0 && list != NULL) {
3554 elist = list;
3555 } else {
3556 elist = MBSTRG(current_detect_order_list);
3557 size = MBSTRG(current_detect_order_list_size);
3558 }
3559
3560 mbfl_string_init(&string);
3561 string.no_language = MBSTRG(language);
3562 string.val = (unsigned char *)str;
3563 string.len = str_len;
3564 ret = mbfl_identify_encoding2(&string, elist, size, strict);
3565
3566 if (list != NULL) {
3567 efree((void *)list);
3568 }
3569
3570 if (ret == NULL) {
3571 RETURN_FALSE;
3572 }
3573
3574 RETVAL_STRING((char *)ret->name);
3575 }
3576 /* }}} */
3577
3578 /* {{{ proto mixed mb_list_encodings()
3579 Returns an array of all supported entity encodings */
PHP_FUNCTION(mb_list_encodings)3580 PHP_FUNCTION(mb_list_encodings)
3581 {
3582 const mbfl_encoding **encodings;
3583 const mbfl_encoding *encoding;
3584 int i;
3585
3586 if (zend_parse_parameters_none() == FAILURE) {
3587 return;
3588 }
3589
3590 array_init(return_value);
3591 i = 0;
3592 encodings = mbfl_get_supported_encodings();
3593 while ((encoding = encodings[i++]) != NULL) {
3594 add_next_index_string(return_value, (char *) encoding->name);
3595 }
3596 }
3597 /* }}} */
3598
3599 /* {{{ proto array mb_encoding_aliases(string encoding)
3600 Returns an array of the aliases of a given encoding name */
PHP_FUNCTION(mb_encoding_aliases)3601 PHP_FUNCTION(mb_encoding_aliases)
3602 {
3603 const mbfl_encoding *encoding;
3604 char *name = NULL;
3605 size_t name_len;
3606
3607 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
3608 return;
3609 }
3610
3611 encoding = mbfl_name2encoding(name);
3612 if (!encoding) {
3613 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
3614 RETURN_FALSE;
3615 }
3616
3617 array_init(return_value);
3618 if (encoding->aliases != NULL) {
3619 const char **alias;
3620 for (alias = *encoding->aliases; *alias; ++alias) {
3621 add_next_index_string(return_value, (char *)*alias);
3622 }
3623 }
3624 }
3625 /* }}} */
3626
3627 /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3628 Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
PHP_FUNCTION(mb_encode_mimeheader)3629 PHP_FUNCTION(mb_encode_mimeheader)
3630 {
3631 enum mbfl_no_encoding charset, transenc;
3632 mbfl_string string, result, *ret;
3633 char *charset_name = NULL;
3634 size_t charset_name_len;
3635 char *trans_enc_name = NULL;
3636 size_t trans_enc_name_len;
3637 char *linefeed = "\r\n";
3638 size_t linefeed_len, string_len;
3639 zend_long indent = 0;
3640
3641 mbfl_string_init(&string);
3642 string.no_language = MBSTRG(language);
3643 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3644
3645 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|sssl", (char **)&string.val, &string_len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3646 return;
3647 }
3648
3649 if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
3650 php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
3651 return;
3652 }
3653
3654 string.len = (uint32_t)string_len;
3655
3656 charset = mbfl_no_encoding_pass;
3657 transenc = mbfl_no_encoding_base64;
3658
3659 if (charset_name != NULL) {
3660 charset = mbfl_name2no_encoding(charset_name);
3661 if (charset == mbfl_no_encoding_invalid) {
3662 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3663 RETURN_FALSE;
3664 }
3665 } else {
3666 const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3667 if (lang != NULL) {
3668 charset = lang->mail_charset;
3669 transenc = lang->mail_header_encoding;
3670 }
3671 }
3672
3673 if (trans_enc_name != NULL) {
3674 if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3675 transenc = mbfl_no_encoding_base64;
3676 } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3677 transenc = mbfl_no_encoding_qprint;
3678 }
3679 }
3680
3681 mbfl_string_init(&result);
3682 ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3683 if (ret != NULL) {
3684 // TODO: avoid reallocation ???
3685 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3686 efree(ret->val);
3687 } else {
3688 RETVAL_FALSE;
3689 }
3690 }
3691 /* }}} */
3692
3693 /* {{{ proto string mb_decode_mimeheader(string string)
3694 Decodes the MIME "encoded-word" in the string */
PHP_FUNCTION(mb_decode_mimeheader)3695 PHP_FUNCTION(mb_decode_mimeheader)
3696 {
3697 mbfl_string string, result, *ret;
3698 size_t string_len;
3699
3700 mbfl_string_init(&string);
3701 string.no_language = MBSTRG(language);
3702 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3703
3704 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", (char **)&string.val, &string_len) == FAILURE) {
3705 return;
3706 }
3707
3708 if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
3709 php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
3710 return;
3711 }
3712
3713 string.len = (uint32_t)string_len;
3714
3715 mbfl_string_init(&result);
3716 ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding);
3717 if (ret != NULL) {
3718 // TODO: avoid reallocation ???
3719 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3720 efree(ret->val);
3721 } else {
3722 RETVAL_FALSE;
3723 }
3724 }
3725 /* }}} */
3726
3727 /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3728 Conversion between full-width character and half-width character (Japanese) */
PHP_FUNCTION(mb_convert_kana)3729 PHP_FUNCTION(mb_convert_kana)
3730 {
3731 int opt, i;
3732 mbfl_string string, result, *ret;
3733 char *optstr = NULL;
3734 size_t optstr_len;
3735 char *encname = NULL;
3736 size_t encname_len, string_len;
3737
3738 mbfl_string_init(&string);
3739 string.no_language = MBSTRG(language);
3740 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3741
3742 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|ss", (char **)&string.val, &string_len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
3743 return;
3744 }
3745
3746 if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
3747 php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
3748 return;
3749 }
3750
3751 string.len = (uint32_t)string_len;
3752
3753 /* option */
3754 if (optstr != NULL) {
3755 char *p = optstr;
3756 int n = optstr_len;
3757 i = 0;
3758 opt = 0;
3759 while (i < n) {
3760 i++;
3761 switch (*p++) {
3762 case 'A':
3763 opt |= 0x1;
3764 break;
3765 case 'a':
3766 opt |= 0x10;
3767 break;
3768 case 'R':
3769 opt |= 0x2;
3770 break;
3771 case 'r':
3772 opt |= 0x20;
3773 break;
3774 case 'N':
3775 opt |= 0x4;
3776 break;
3777 case 'n':
3778 opt |= 0x40;
3779 break;
3780 case 'S':
3781 opt |= 0x8;
3782 break;
3783 case 's':
3784 opt |= 0x80;
3785 break;
3786 case 'K':
3787 opt |= 0x100;
3788 break;
3789 case 'k':
3790 opt |= 0x1000;
3791 break;
3792 case 'H':
3793 opt |= 0x200;
3794 break;
3795 case 'h':
3796 opt |= 0x2000;
3797 break;
3798 case 'V':
3799 opt |= 0x800;
3800 break;
3801 case 'C':
3802 opt |= 0x10000;
3803 break;
3804 case 'c':
3805 opt |= 0x20000;
3806 break;
3807 case 'M':
3808 opt |= 0x100000;
3809 break;
3810 case 'm':
3811 opt |= 0x200000;
3812 break;
3813 }
3814 }
3815 } else {
3816 opt = 0x900;
3817 }
3818
3819 /* encoding */
3820 if (encname != NULL) {
3821 string.no_encoding = mbfl_name2no_encoding(encname);
3822 if (string.no_encoding == mbfl_no_encoding_invalid) {
3823 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encname);
3824 RETURN_FALSE;
3825 }
3826 }
3827
3828 ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3829 if (ret != NULL) {
3830 // TODO: avoid reallocation ???
3831 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3832 efree(ret->val);
3833 } else {
3834 RETVAL_FALSE;
3835 }
3836 }
3837 /* }}} */
3838
3839 #define PHP_MBSTR_STACK_BLOCK_SIZE 32
3840
3841 /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3842 Converts the string resource in variables to desired encoding */
PHP_FUNCTION(mb_convert_variables)3843 PHP_FUNCTION(mb_convert_variables)
3844 {
3845 zval *args, *stack, *var, *hash_entry, *hash_entry_ptr, *zfrom_enc;
3846 HashTable *target_hash;
3847 mbfl_string string, result, *ret;
3848 const mbfl_encoding *from_encoding, *to_encoding;
3849 mbfl_encoding_detector *identd;
3850 mbfl_buffer_converter *convd;
3851 int n, argc, stack_level, stack_max;
3852 size_t to_enc_len;
3853 size_t elistsz;
3854 const mbfl_encoding **elist;
3855 char *to_enc;
3856 void *ptmp;
3857 int recursion_error = 0;
3858
3859 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3860 return;
3861 }
3862
3863 /* new encoding */
3864 to_encoding = mbfl_name2encoding(to_enc);
3865 if (!to_encoding) {
3866 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3867 RETURN_FALSE;
3868 }
3869
3870 /* initialize string */
3871 mbfl_string_init(&string);
3872 mbfl_string_init(&result);
3873 from_encoding = MBSTRG(current_internal_encoding);
3874 string.no_encoding = from_encoding->no_encoding;
3875 string.no_language = MBSTRG(language);
3876
3877 /* pre-conversion encoding */
3878 elist = NULL;
3879 elistsz = 0;
3880 switch (Z_TYPE_P(zfrom_enc)) {
3881 case IS_ARRAY:
3882 php_mb_parse_encoding_array(zfrom_enc, &elist, &elistsz, 0);
3883 break;
3884 default:
3885 convert_to_string_ex(zfrom_enc);
3886 php_mb_parse_encoding_list(Z_STRVAL_P(zfrom_enc), Z_STRLEN_P(zfrom_enc), &elist, &elistsz, 0);
3887 break;
3888 }
3889
3890 if (elistsz <= 0) {
3891 from_encoding = &mbfl_encoding_pass;
3892 } else if (elistsz == 1) {
3893 from_encoding = *elist;
3894 } else {
3895 /* auto detect */
3896 from_encoding = NULL;
3897 stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3898 stack = (zval *)safe_emalloc(stack_max, sizeof(zval), 0);
3899 stack_level = 0;
3900 identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection));
3901 if (identd != NULL) {
3902 n = 0;
3903 while (n < argc || stack_level > 0) {
3904 if (stack_level <= 0) {
3905 var = &args[n++];
3906 ZVAL_DEREF(var);
3907 SEPARATE_ZVAL_NOREF(var);
3908 if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3909 target_hash = HASH_OF(var);
3910 if (target_hash != NULL) {
3911 zend_hash_internal_pointer_reset(target_hash);
3912 }
3913 }
3914 } else {
3915 stack_level--;
3916 var = &stack[stack_level];
3917 }
3918 if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3919 target_hash = HASH_OF(var);
3920 if (target_hash != NULL) {
3921 while ((hash_entry = zend_hash_get_current_data(target_hash)) != NULL) {
3922 if (Z_REFCOUNTED_P(var)) {
3923 if (++target_hash->u.v.nApplyCount > 1) {
3924 --target_hash->u.v.nApplyCount;
3925 recursion_error = 1;
3926 goto detect_end;
3927 }
3928 }
3929 zend_hash_move_forward(target_hash);
3930 if (Z_TYPE_P(hash_entry) == IS_INDIRECT) {
3931 hash_entry = Z_INDIRECT_P(hash_entry);
3932 }
3933 ZVAL_DEREF(hash_entry);
3934 if (Z_TYPE_P(hash_entry) == IS_ARRAY || Z_TYPE_P(hash_entry) == IS_OBJECT) {
3935 if (stack_level >= stack_max) {
3936 stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3937 ptmp = erealloc(stack, sizeof(zval) * stack_max);
3938 stack = (zval *)ptmp;
3939 }
3940 ZVAL_COPY_VALUE(&stack[stack_level], var);
3941 stack_level++;
3942 var = hash_entry;
3943 target_hash = HASH_OF(var);
3944 if (target_hash != NULL) {
3945 zend_hash_internal_pointer_reset(target_hash);
3946 continue;
3947 }
3948 } else if (Z_TYPE_P(hash_entry) == IS_STRING) {
3949 string.val = (unsigned char *)Z_STRVAL_P(hash_entry);
3950 string.len = Z_STRLEN_P(hash_entry);
3951 if (mbfl_encoding_detector_feed(identd, &string)) {
3952 goto detect_end; /* complete detecting */
3953 }
3954 }
3955 }
3956 }
3957 } else if (Z_TYPE_P(var) == IS_STRING) {
3958 string.val = (unsigned char *)Z_STRVAL_P(var);
3959 string.len = Z_STRLEN_P(var);
3960 if (mbfl_encoding_detector_feed(identd, &string)) {
3961 goto detect_end; /* complete detecting */
3962 }
3963 }
3964 }
3965 detect_end:
3966 from_encoding = mbfl_encoding_detector_judge2(identd);
3967 mbfl_encoding_detector_delete(identd);
3968 }
3969 if (recursion_error) {
3970 while(stack_level-- && (var = &stack[stack_level])) {
3971 if (Z_REFCOUNTED_P(var)) {
3972 if (HASH_OF(var)->u.v.nApplyCount > 1) {
3973 HASH_OF(var)->u.v.nApplyCount--;
3974 }
3975 }
3976 }
3977 efree(stack);
3978 if (elist != NULL) {
3979 efree((void *)elist);
3980 }
3981 php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
3982 RETURN_FALSE;
3983 }
3984 efree(stack);
3985
3986 if (!from_encoding) {
3987 php_error_docref(NULL, E_WARNING, "Unable to detect encoding");
3988 from_encoding = &mbfl_encoding_pass;
3989 }
3990 }
3991 if (elist != NULL) {
3992 efree((void *)elist);
3993 }
3994 /* create converter */
3995 convd = NULL;
3996 if (from_encoding != &mbfl_encoding_pass) {
3997 convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0);
3998 if (convd == NULL) {
3999 php_error_docref(NULL, E_WARNING, "Unable to create converter");
4000 RETURN_FALSE;
4001 }
4002 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
4003 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
4004 }
4005
4006 /* convert */
4007 if (convd != NULL) {
4008 stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
4009 stack = (zval*)safe_emalloc(stack_max, sizeof(zval), 0);
4010 stack_level = 0;
4011 n = 0;
4012 while (n < argc || stack_level > 0) {
4013 if (stack_level <= 0) {
4014 var = &args[n++];
4015 ZVAL_DEREF(var);
4016 SEPARATE_ZVAL_NOREF(var);
4017 if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
4018 target_hash = HASH_OF(var);
4019 if (target_hash != NULL) {
4020 zend_hash_internal_pointer_reset(target_hash);
4021 }
4022 }
4023 } else {
4024 stack_level--;
4025 var = &stack[stack_level];
4026 }
4027 if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
4028 target_hash = HASH_OF(var);
4029 if (target_hash != NULL) {
4030 while ((hash_entry_ptr = zend_hash_get_current_data(target_hash)) != NULL) {
4031 zend_hash_move_forward(target_hash);
4032 if (Z_TYPE_P(hash_entry_ptr) == IS_INDIRECT) {
4033 hash_entry_ptr = Z_INDIRECT_P(hash_entry_ptr);
4034 }
4035 hash_entry = hash_entry_ptr;
4036 ZVAL_DEREF(hash_entry);
4037 if (Z_TYPE_P(hash_entry) == IS_ARRAY || Z_TYPE_P(hash_entry) == IS_OBJECT) {
4038 if (Z_REFCOUNTED_P(hash_entry)) {
4039 if (++(HASH_OF(hash_entry)->u.v.nApplyCount) > 1) {
4040 --(HASH_OF(hash_entry)->u.v.nApplyCount);
4041 recursion_error = 1;
4042 goto conv_end;
4043 }
4044 }
4045 if (stack_level >= stack_max) {
4046 stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
4047 ptmp = erealloc(stack, sizeof(zval) * stack_max);
4048 stack = (zval *)ptmp;
4049 }
4050 ZVAL_COPY_VALUE(&stack[stack_level], var);
4051 stack_level++;
4052 var = hash_entry;
4053 SEPARATE_ZVAL(hash_entry);
4054 target_hash = HASH_OF(var);
4055 if (target_hash != NULL) {
4056 zend_hash_internal_pointer_reset(target_hash);
4057 continue;
4058 }
4059 } else if (Z_TYPE_P(hash_entry) == IS_STRING) {
4060 string.val = (unsigned char *)Z_STRVAL_P(hash_entry);
4061 string.len = Z_STRLEN_P(hash_entry);
4062 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4063 if (ret != NULL) {
4064 zval_ptr_dtor(hash_entry_ptr);
4065 // TODO: avoid reallocation ???
4066 ZVAL_STRINGL(hash_entry_ptr, (char *)ret->val, ret->len);
4067 efree(ret->val);
4068 }
4069 }
4070 }
4071 }
4072 } else if (Z_TYPE_P(var) == IS_STRING) {
4073 string.val = (unsigned char *)Z_STRVAL_P(var);
4074 string.len = Z_STRLEN_P(var);
4075 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4076 if (ret != NULL) {
4077 zval_ptr_dtor(var);
4078 // TODO: avoid reallocation ???
4079 ZVAL_STRINGL(var, (char *)ret->val, ret->len);
4080 efree(ret->val);
4081 }
4082 }
4083 }
4084
4085 conv_end:
4086 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
4087 mbfl_buffer_converter_delete(convd);
4088
4089 if (recursion_error) {
4090 while(stack_level-- && (var = &stack[stack_level])) {
4091 if (Z_REFCOUNTED_P(var)) {
4092 if (HASH_OF(var)->u.v.nApplyCount > 1) {
4093 HASH_OF(var)->u.v.nApplyCount--;
4094 }
4095 }
4096 }
4097 efree(stack);
4098 php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
4099 RETURN_FALSE;
4100 }
4101 efree(stack);
4102 }
4103
4104 if (from_encoding) {
4105 RETURN_STRING(from_encoding->name);
4106 } else {
4107 RETURN_FALSE;
4108 }
4109 }
4110 /* }}} */
4111
4112 /* {{{ HTML numeric entity */
4113 /* {{{ static void php_mb_numericentity_exec() */
4114 static void
php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS,int type)4115 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
4116 {
4117 char *str, *encoding = NULL;
4118 size_t str_len, encoding_len;
4119 zval *zconvmap, *hash_entry;
4120 HashTable *target_hash;
4121 int i, *convmap, *mapelm, mapsize=0;
4122 zend_bool is_hex = 0;
4123 mbfl_string string, result, *ret;
4124 enum mbfl_no_encoding no_encoding;
4125
4126 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
4127 return;
4128 }
4129
4130 mbfl_string_init(&string);
4131 string.no_language = MBSTRG(language);
4132 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4133 string.val = (unsigned char *)str;
4134 string.len = str_len;
4135
4136 /* encoding */
4137 if (encoding && encoding_len > 0) {
4138 no_encoding = mbfl_name2no_encoding(encoding);
4139 if (no_encoding == mbfl_no_encoding_invalid) {
4140 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
4141 RETURN_FALSE;
4142 } else {
4143 string.no_encoding = no_encoding;
4144 }
4145 }
4146
4147 if (type == 0 && is_hex) {
4148 type = 2; /* output in hex format */
4149 }
4150
4151 /* conversion map */
4152 convmap = NULL;
4153 if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
4154 target_hash = Z_ARRVAL_P(zconvmap);
4155 i = zend_hash_num_elements(target_hash);
4156 if (i > 0) {
4157 convmap = (int *)safe_emalloc(i, sizeof(int), 0);
4158 mapelm = convmap;
4159 mapsize = 0;
4160 ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
4161 convert_to_long_ex(hash_entry);
4162 *mapelm++ = Z_LVAL_P(hash_entry);
4163 mapsize++;
4164 } ZEND_HASH_FOREACH_END();
4165 }
4166 }
4167 if (convmap == NULL) {
4168 RETURN_FALSE;
4169 }
4170 mapsize /= 4;
4171
4172 ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
4173 if (ret != NULL) {
4174 // TODO: avoid reallocation ???
4175 RETVAL_STRINGL((char *)ret->val, ret->len);
4176 efree(ret->val);
4177 } else {
4178 RETVAL_FALSE;
4179 }
4180 efree((void *)convmap);
4181 }
4182 /* }}} */
4183
4184 /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
4185 Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)4186 PHP_FUNCTION(mb_encode_numericentity)
4187 {
4188 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
4189 }
4190 /* }}} */
4191
4192 /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
4193 Converts HTML numeric entities to character code */
PHP_FUNCTION(mb_decode_numericentity)4194 PHP_FUNCTION(mb_decode_numericentity)
4195 {
4196 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
4197 }
4198 /* }}} */
4199 /* }}} */
4200
4201 /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
4202 * Sends an email message with MIME scheme
4203 */
4204
4205 #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \
4206 if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
4207 pos += 2; \
4208 while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \
4209 pos++; \
4210 } \
4211 continue; \
4212 }
4213
4214 #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len) \
4215 pp = str; \
4216 ee = pp + len; \
4217 while ((pp = memchr(pp, '\0', (ee - pp)))) { \
4218 *pp = ' '; \
4219 } \
4220
_php_mbstr_parse_mail_headers(HashTable * ht,const char * str,size_t str_len)4221 static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
4222 {
4223 const char *ps;
4224 size_t icnt;
4225 int state = 0;
4226 int crlf_state = -1;
4227 char *token = NULL;
4228 size_t token_pos = 0;
4229 zend_string *fld_name, *fld_val;
4230
4231 ps = str;
4232 icnt = str_len;
4233 fld_name = fld_val = NULL;
4234
4235 /*
4236 * C o n t e n t - T y p e : t e x t / h t m l \r\n
4237 * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
4238 * state 0 1 2 3
4239 *
4240 * C o n t e n t - T y p e : t e x t / h t m l \r\n
4241 * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
4242 * crlf_state -1 0 1 -1
4243 *
4244 */
4245
4246 while (icnt > 0) {
4247 switch (*ps) {
4248 case ':':
4249 if (crlf_state == 1) {
4250 token_pos++;
4251 }
4252
4253 if (state == 0 || state == 1) {
4254 if(token && token_pos > 0) {
4255 fld_name = zend_string_init(token, token_pos, 0);
4256 }
4257 state = 2;
4258 } else {
4259 token_pos++;
4260 }
4261
4262 crlf_state = 0;
4263 break;
4264
4265 case '\n':
4266 if (crlf_state == -1) {
4267 goto out;
4268 }
4269 crlf_state = -1;
4270 break;
4271
4272 case '\r':
4273 if (crlf_state == 1) {
4274 token_pos++;
4275 } else {
4276 crlf_state = 1;
4277 }
4278 break;
4279
4280 case ' ': case '\t':
4281 if (crlf_state == -1) {
4282 if (state == 3) {
4283 /* continuing from the previous line */
4284 state = 4;
4285 } else {
4286 /* simply skipping this new line */
4287 state = 5;
4288 }
4289 } else {
4290 if (crlf_state == 1) {
4291 token_pos++;
4292 }
4293 if (state == 1 || state == 3) {
4294 token_pos++;
4295 }
4296 }
4297 crlf_state = 0;
4298 break;
4299
4300 default:
4301 switch (state) {
4302 case 0:
4303 token = (char*)ps;
4304 token_pos = 0;
4305 state = 1;
4306 break;
4307
4308 case 2:
4309 if (crlf_state != -1) {
4310 token = (char*)ps;
4311 token_pos = 0;
4312
4313 state = 3;
4314 break;
4315 }
4316 /* break is missing intentionally */
4317
4318 case 3:
4319 if (crlf_state == -1) {
4320 if(token && token_pos > 0) {
4321 fld_val = zend_string_init(token, token_pos, 0);
4322 }
4323
4324 if (fld_name != NULL && fld_val != NULL) {
4325 zval val;
4326 /* FIXME: some locale free implementation is
4327 * really required here,,, */
4328 php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
4329 ZVAL_STR(&val, fld_val);
4330
4331 zend_hash_update(ht, fld_name, &val);
4332
4333 zend_string_release(fld_name);
4334 }
4335
4336 fld_name = fld_val = NULL;
4337 token = (char*)ps;
4338 token_pos = 0;
4339
4340 state = 1;
4341 }
4342 break;
4343
4344 case 4:
4345 token_pos++;
4346 state = 3;
4347 break;
4348 }
4349
4350 if (crlf_state == 1) {
4351 token_pos++;
4352 }
4353
4354 token_pos++;
4355
4356 crlf_state = 0;
4357 break;
4358 }
4359 ps++, icnt--;
4360 }
4361 out:
4362 if (state == 2) {
4363 token = "";
4364 token_pos = 0;
4365
4366 state = 3;
4367 }
4368 if (state == 3) {
4369 if(token && token_pos > 0) {
4370 fld_val = zend_string_init(token, token_pos, 0);
4371 }
4372 if (fld_name != NULL && fld_val != NULL) {
4373 zval val;
4374 /* FIXME: some locale free implementation is
4375 * really required here,,, */
4376 php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
4377 ZVAL_STR(&val, fld_val);
4378
4379 zend_hash_update(ht, fld_name, &val);
4380
4381 zend_string_release(fld_name);
4382 }
4383 }
4384 return state;
4385 }
4386
PHP_FUNCTION(mb_send_mail)4387 PHP_FUNCTION(mb_send_mail)
4388 {
4389 int n;
4390 char *to = NULL;
4391 size_t to_len;
4392 char *message = NULL;
4393 size_t message_len;
4394 char *subject = NULL;
4395 size_t subject_len;
4396 zval *headers = NULL;
4397 zend_string *extra_cmd = NULL;
4398 zend_string *str_headers=NULL, *tmp_headers;
4399 int i;
4400 char *to_r = NULL;
4401 char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
4402 struct {
4403 int cnt_type:1;
4404 int cnt_trans_enc:1;
4405 } suppressed_hdrs = { 0, 0 };
4406
4407 char *message_buf = NULL, *subject_buf = NULL, *p;
4408 mbfl_string orig_str, conv_str;
4409 mbfl_string *pstr; /* pointer to mbfl string for return value */
4410 enum mbfl_no_encoding
4411 tran_cs, /* transfar text charset */
4412 head_enc, /* header transfar encoding */
4413 body_enc; /* body transfar encoding */
4414 mbfl_memory_device device; /* automatic allocateable buffer for additional header */
4415 const mbfl_language *lang;
4416 int err = 0;
4417 HashTable ht_headers;
4418 zval *s;
4419 extern void mbfl_memory_device_unput(mbfl_memory_device *device);
4420 char *pp, *ee;
4421
4422 /* initialize */
4423 mbfl_memory_device_init(&device, 0, 0);
4424 mbfl_string_init(&orig_str);
4425 mbfl_string_init(&conv_str);
4426
4427 /* character-set, transfer-encoding */
4428 tran_cs = mbfl_no_encoding_utf8;
4429 head_enc = mbfl_no_encoding_base64;
4430 body_enc = mbfl_no_encoding_base64;
4431 lang = mbfl_no2language(MBSTRG(language));
4432 if (lang != NULL) {
4433 tran_cs = lang->mail_charset;
4434 head_enc = lang->mail_header_encoding;
4435 body_enc = lang->mail_body_encoding;
4436 }
4437
4438 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|zS", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &extra_cmd) == FAILURE) {
4439 return;
4440 }
4441
4442 /* ASCIIZ check */
4443 MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
4444 MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
4445 MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
4446 if (headers) {
4447 switch(Z_TYPE_P(headers)) {
4448 case IS_STRING:
4449 tmp_headers = zend_string_init(Z_STRVAL_P(headers), Z_STRLEN_P(headers), 0);
4450 MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(tmp_headers), ZSTR_LEN(tmp_headers));
4451 str_headers = php_trim(tmp_headers, NULL, 0, 2);
4452 zend_string_release(tmp_headers);
4453 break;
4454 case IS_ARRAY:
4455 str_headers = php_mail_build_headers(headers);
4456 break;
4457 default:
4458 php_error_docref(NULL, E_WARNING, "headers parameter must be string or array");
4459 RETURN_FALSE;
4460 }
4461 }
4462 if (extra_cmd) {
4463 MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(extra_cmd), ZSTR_LEN(extra_cmd));
4464 }
4465
4466 zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0);
4467
4468 if (str_headers != NULL) {
4469 _php_mbstr_parse_mail_headers(&ht_headers, ZSTR_VAL(str_headers), ZSTR_LEN(str_headers));
4470 }
4471
4472 if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) {
4473 char *tmp;
4474 char *param_name;
4475 char *charset = NULL;
4476
4477 ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
4478 p = strchr(Z_STRVAL_P(s), ';');
4479
4480 if (p != NULL) {
4481 /* skipping the padded spaces */
4482 do {
4483 ++p;
4484 } while (*p == ' ' || *p == '\t');
4485
4486 if (*p != '\0') {
4487 if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
4488 if (strcasecmp(param_name, "charset") == 0) {
4489 enum mbfl_no_encoding _tran_cs = tran_cs;
4490
4491 charset = php_strtok_r(NULL, "= \"", &tmp);
4492 if (charset != NULL) {
4493 _tran_cs = mbfl_name2no_encoding(charset);
4494 }
4495
4496 if (_tran_cs == mbfl_no_encoding_invalid) {
4497 php_error_docref(NULL, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
4498 _tran_cs = mbfl_no_encoding_ascii;
4499 }
4500 tran_cs = _tran_cs;
4501 }
4502 }
4503 }
4504 }
4505 suppressed_hdrs.cnt_type = 1;
4506 }
4507
4508 if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1))) {
4509 enum mbfl_no_encoding _body_enc;
4510
4511 ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
4512 _body_enc = mbfl_name2no_encoding(Z_STRVAL_P(s));
4513 switch (_body_enc) {
4514 case mbfl_no_encoding_base64:
4515 case mbfl_no_encoding_7bit:
4516 case mbfl_no_encoding_8bit:
4517 body_enc = _body_enc;
4518 break;
4519
4520 default:
4521 php_error_docref(NULL, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", Z_STRVAL_P(s));
4522 body_enc = mbfl_no_encoding_8bit;
4523 break;
4524 }
4525 suppressed_hdrs.cnt_trans_enc = 1;
4526 }
4527
4528 /* To: */
4529 if (to != NULL) {
4530 if (to_len > 0) {
4531 to_r = estrndup(to, to_len);
4532 for (; to_len; to_len--) {
4533 if (!isspace((unsigned char) to_r[to_len - 1])) {
4534 break;
4535 }
4536 to_r[to_len - 1] = '\0';
4537 }
4538 for (i = 0; to_r[i]; i++) {
4539 if (iscntrl((unsigned char) to_r[i])) {
4540 /* According to RFC 822, section 3.1.1 long headers may be separated into
4541 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
4542 * To prevent these separators from being replaced with a space, we use the
4543 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
4544 */
4545 SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
4546 to_r[i] = ' ';
4547 }
4548 }
4549 } else {
4550 to_r = to;
4551 }
4552 } else {
4553 php_error_docref(NULL, E_WARNING, "Missing To: field");
4554 err = 1;
4555 }
4556
4557 /* Subject: */
4558 if (subject != NULL) {
4559 orig_str.no_language = MBSTRG(language);
4560 orig_str.val = (unsigned char *)subject;
4561 orig_str.len = subject_len;
4562 orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4563 if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4564 const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4565 orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4566 }
4567 pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4568 if (pstr != NULL) {
4569 subject_buf = subject = (char *)pstr->val;
4570 }
4571 } else {
4572 php_error_docref(NULL, E_WARNING, "Missing Subject: field");
4573 err = 1;
4574 }
4575
4576 /* message body */
4577 if (message != NULL) {
4578 orig_str.no_language = MBSTRG(language);
4579 orig_str.val = (unsigned char *)message;
4580 orig_str.len = (unsigned int)message_len;
4581 orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4582
4583 if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4584 const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4585 orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4586 }
4587
4588 pstr = NULL;
4589 {
4590 mbfl_string tmpstr;
4591
4592 if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4593 tmpstr.no_encoding=mbfl_no_encoding_8bit;
4594 pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4595 efree(tmpstr.val);
4596 }
4597 }
4598 if (pstr != NULL) {
4599 message_buf = message = (char *)pstr->val;
4600 }
4601 } else {
4602 /* this is not really an error, so it is allowed. */
4603 php_error_docref(NULL, E_WARNING, "Empty message body");
4604 message = NULL;
4605 }
4606
4607 /* other headers */
4608 #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4609 #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4610 #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4611 #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4612 if (str_headers != NULL) {
4613 p = ZSTR_VAL(str_headers);
4614 n = ZSTR_LEN(str_headers);
4615 mbfl_memory_device_strncat(&device, p, n);
4616 if (n > 0 && p[n - 1] != '\n') {
4617 mbfl_memory_device_strncat(&device, "\n", 1);
4618 }
4619 zend_string_release(str_headers);
4620 }
4621
4622 if (!zend_hash_str_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4623 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4624 mbfl_memory_device_strncat(&device, "\n", 1);
4625 }
4626
4627 if (!suppressed_hdrs.cnt_type) {
4628 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4629
4630 p = (char *)mbfl_no2preferred_mime_name(tran_cs);
4631 if (p != NULL) {
4632 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4633 mbfl_memory_device_strcat(&device, p);
4634 }
4635 mbfl_memory_device_strncat(&device, "\n", 1);
4636 }
4637 if (!suppressed_hdrs.cnt_trans_enc) {
4638 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4639 p = (char *)mbfl_no2preferred_mime_name(body_enc);
4640 if (p == NULL) {
4641 p = "7bit";
4642 }
4643 mbfl_memory_device_strcat(&device, p);
4644 mbfl_memory_device_strncat(&device, "\n", 1);
4645 }
4646
4647 mbfl_memory_device_unput(&device);
4648 mbfl_memory_device_output('\0', &device);
4649 str_headers = zend_string_init((char *)device.buffer, strlen((char *)device.buffer), 0);
4650
4651 if (force_extra_parameters) {
4652 extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4653 } else if (extra_cmd) {
4654 extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd));
4655 }
4656
4657 if (!err && php_mail(to_r, subject, message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
4658 RETVAL_TRUE;
4659 } else {
4660 RETVAL_FALSE;
4661 }
4662
4663 if (extra_cmd) {
4664 zend_string_release(extra_cmd);
4665 }
4666
4667 if (to_r != to) {
4668 efree(to_r);
4669 }
4670 if (subject_buf) {
4671 efree((void *)subject_buf);
4672 }
4673 if (message_buf) {
4674 efree((void *)message_buf);
4675 }
4676 mbfl_memory_device_clear(&device);
4677 zend_hash_destroy(&ht_headers);
4678 if (str_headers) {
4679 zend_string_release(str_headers);
4680 }
4681 }
4682
4683 #undef SKIP_LONG_HEADER_SEP_MBSTRING
4684 #undef MAIL_ASCIIZ_CHECK_MBSTRING
4685 #undef PHP_MBSTR_MAIL_MIME_HEADER1
4686 #undef PHP_MBSTR_MAIL_MIME_HEADER2
4687 #undef PHP_MBSTR_MAIL_MIME_HEADER3
4688 #undef PHP_MBSTR_MAIL_MIME_HEADER4
4689 /* }}} */
4690
4691 /* {{{ proto mixed mb_get_info([string type])
4692 Returns the current settings of mbstring */
PHP_FUNCTION(mb_get_info)4693 PHP_FUNCTION(mb_get_info)
4694 {
4695 char *typ = NULL;
4696 size_t typ_len;
4697 size_t n;
4698 char *name;
4699 const struct mb_overload_def *over_func;
4700 zval row1, row2;
4701 const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4702 const mbfl_encoding **entry;
4703
4704 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
4705 return;
4706 }
4707
4708 if (!typ || !strcasecmp("all", typ)) {
4709 array_init(return_value);
4710 if (MBSTRG(current_internal_encoding)) {
4711 add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name);
4712 }
4713 if (MBSTRG(http_input_identify)) {
4714 add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name);
4715 }
4716 if (MBSTRG(current_http_output_encoding)) {
4717 add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name);
4718 }
4719 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
4720 add_assoc_string(return_value, "http_output_conv_mimetypes", name);
4721 }
4722 add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4723 if (MBSTRG(func_overload)){
4724 over_func = &(mb_ovld[0]);
4725 array_init(&row1);
4726 while (over_func->type > 0) {
4727 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4728 add_assoc_string(&row1, over_func->orig_func, over_func->ovld_func);
4729 }
4730 over_func++;
4731 }
4732 add_assoc_zval(return_value, "func_overload_list", &row1);
4733 } else {
4734 add_assoc_string(return_value, "func_overload_list", "no overload");
4735 }
4736 if (lang != NULL) {
4737 if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4738 add_assoc_string(return_value, "mail_charset", name);
4739 }
4740 if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4741 add_assoc_string(return_value, "mail_header_encoding", name);
4742 }
4743 if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4744 add_assoc_string(return_value, "mail_body_encoding", name);
4745 }
4746 }
4747 add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4748 if (MBSTRG(encoding_translation)) {
4749 add_assoc_string(return_value, "encoding_translation", "On");
4750 } else {
4751 add_assoc_string(return_value, "encoding_translation", "Off");
4752 }
4753 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4754 add_assoc_string(return_value, "language", name);
4755 }
4756 n = MBSTRG(current_detect_order_list_size);
4757 entry = MBSTRG(current_detect_order_list);
4758 if (n > 0) {
4759 size_t i;
4760 array_init(&row2);
4761 for (i = 0; i < n; i++) {
4762 add_next_index_string(&row2, (*entry)->name);
4763 entry++;
4764 }
4765 add_assoc_zval(return_value, "detect_order", &row2);
4766 }
4767 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4768 add_assoc_string(return_value, "substitute_character", "none");
4769 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4770 add_assoc_string(return_value, "substitute_character", "long");
4771 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4772 add_assoc_string(return_value, "substitute_character", "entity");
4773 } else {
4774 add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4775 }
4776 if (MBSTRG(strict_detection)) {
4777 add_assoc_string(return_value, "strict_detection", "On");
4778 } else {
4779 add_assoc_string(return_value, "strict_detection", "Off");
4780 }
4781 } else if (!strcasecmp("internal_encoding", typ)) {
4782 if (MBSTRG(current_internal_encoding)) {
4783 RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name);
4784 }
4785 } else if (!strcasecmp("http_input", typ)) {
4786 if (MBSTRG(http_input_identify)) {
4787 RETVAL_STRING((char *)MBSTRG(http_input_identify)->name);
4788 }
4789 } else if (!strcasecmp("http_output", typ)) {
4790 if (MBSTRG(current_http_output_encoding)) {
4791 RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name);
4792 }
4793 } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4794 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
4795 RETVAL_STRING(name);
4796 }
4797 } else if (!strcasecmp("func_overload", typ)) {
4798 RETVAL_LONG(MBSTRG(func_overload));
4799 } else if (!strcasecmp("func_overload_list", typ)) {
4800 if (MBSTRG(func_overload)){
4801 over_func = &(mb_ovld[0]);
4802 array_init(return_value);
4803 while (over_func->type > 0) {
4804 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4805 add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func);
4806 }
4807 over_func++;
4808 }
4809 } else {
4810 RETVAL_STRING("no overload");
4811 }
4812 } else if (!strcasecmp("mail_charset", typ)) {
4813 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4814 RETVAL_STRING(name);
4815 }
4816 } else if (!strcasecmp("mail_header_encoding", typ)) {
4817 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4818 RETVAL_STRING(name);
4819 }
4820 } else if (!strcasecmp("mail_body_encoding", typ)) {
4821 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4822 RETVAL_STRING(name);
4823 }
4824 } else if (!strcasecmp("illegal_chars", typ)) {
4825 RETVAL_LONG(MBSTRG(illegalchars));
4826 } else if (!strcasecmp("encoding_translation", typ)) {
4827 if (MBSTRG(encoding_translation)) {
4828 RETVAL_STRING("On");
4829 } else {
4830 RETVAL_STRING("Off");
4831 }
4832 } else if (!strcasecmp("language", typ)) {
4833 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4834 RETVAL_STRING(name);
4835 }
4836 } else if (!strcasecmp("detect_order", typ)) {
4837 n = MBSTRG(current_detect_order_list_size);
4838 entry = MBSTRG(current_detect_order_list);
4839 if (n > 0) {
4840 size_t i;
4841 array_init(return_value);
4842 for (i = 0; i < n; i++) {
4843 add_next_index_string(return_value, (*entry)->name);
4844 entry++;
4845 }
4846 }
4847 } else if (!strcasecmp("substitute_character", typ)) {
4848 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4849 RETVAL_STRING("none");
4850 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4851 RETVAL_STRING("long");
4852 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4853 RETVAL_STRING("entity");
4854 } else {
4855 RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4856 }
4857 } else if (!strcasecmp("strict_detection", typ)) {
4858 if (MBSTRG(strict_detection)) {
4859 RETVAL_STRING("On");
4860 } else {
4861 RETVAL_STRING("Off");
4862 }
4863 } else {
4864 RETURN_FALSE;
4865 }
4866 }
4867 /* }}} */
4868
4869
php_mb_init_convd(const mbfl_encoding * encoding)4870 static inline mbfl_buffer_converter *php_mb_init_convd(const mbfl_encoding *encoding)
4871 {
4872 mbfl_buffer_converter *convd;
4873
4874 convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
4875 if (convd == NULL) {
4876 return NULL;
4877 }
4878 mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4879 mbfl_buffer_converter_illegal_substchar(convd, 0);
4880 return convd;
4881 }
4882
4883
php_mb_check_encoding_impl(mbfl_buffer_converter * convd,const char * input,size_t length,const mbfl_encoding * encoding)4884 static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const char *input, size_t length, const mbfl_encoding *encoding) {
4885 mbfl_string string, result, *ret = NULL;
4886 long illegalchars = 0;
4887
4888 /* initialize string */
4889 mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
4890 mbfl_string_init(&result);
4891
4892 string.val = (unsigned char *) input;
4893 string.len = length;
4894
4895 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4896 illegalchars = mbfl_buffer_illegalchars(convd);
4897
4898 if (ret != NULL) {
4899 if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4900 mbfl_string_clear(&result);
4901 return 1;
4902 }
4903 mbfl_string_clear(&result);
4904 }
4905 return 0;
4906 }
4907
4908
php_mb_check_encoding(const char * input,size_t length,const char * enc)4909 MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc)
4910 {
4911 const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4912 mbfl_buffer_converter *convd;
4913
4914 if (input == NULL) {
4915 return MBSTRG(illegalchars) == 0;
4916 }
4917
4918 if (enc != NULL) {
4919 encoding = mbfl_name2encoding(enc);
4920 if (!encoding || encoding == &mbfl_encoding_pass) {
4921 php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", enc);
4922 return 0;
4923 }
4924 }
4925
4926 convd = php_mb_init_convd(encoding);
4927 if (convd == NULL) {
4928 php_error_docref(NULL, E_WARNING, "Unable to create converter");
4929 return 0;
4930 }
4931
4932 if (php_mb_check_encoding_impl(convd, input, length, encoding)) {
4933 mbfl_buffer_converter_delete(convd);
4934 return 1;
4935 }
4936 mbfl_buffer_converter_delete(convd);
4937 return 0;
4938 }
4939
4940
php_mb_check_encoding_recursive(HashTable * vars,const zend_string * enc)4941 MBSTRING_API int php_mb_check_encoding_recursive(HashTable *vars, const zend_string *enc)
4942 {
4943 const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4944 mbfl_buffer_converter *convd;
4945 zend_long idx;
4946 zend_string *key;
4947 zval *entry;
4948 int valid = 1;
4949
4950 (void)(idx);
4951
4952 if (enc != NULL) {
4953 encoding = mbfl_name2encoding(ZSTR_VAL(enc));
4954 if (!encoding || encoding == &mbfl_encoding_pass) {
4955 php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", ZSTR_VAL(enc));
4956 return 0;
4957 }
4958 }
4959
4960 convd = php_mb_init_convd(encoding);
4961 if (convd == NULL) {
4962 php_error_docref(NULL, E_WARNING, "Unable to create converter");
4963 return 0;
4964 }
4965
4966 if (vars->u.v.nApplyCount++ > 1) {
4967 vars->u.v.nApplyCount--;
4968 mbfl_buffer_converter_delete(convd);
4969 php_error_docref(NULL, E_WARNING, "Cannot not handle circular references");
4970 return 0;
4971 }
4972 ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) {
4973 ZVAL_DEREF(entry);
4974 if (key) {
4975 if (!php_mb_check_encoding_impl(convd, ZSTR_VAL(key), ZSTR_LEN(key), encoding)) {
4976 valid = 0;
4977 break;
4978 }
4979 }
4980 switch (Z_TYPE_P(entry)) {
4981 case IS_STRING:
4982 if (!php_mb_check_encoding_impl(convd, Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) {
4983 valid = 0;
4984 break;
4985 }
4986 break;
4987 case IS_ARRAY:
4988 if (!php_mb_check_encoding_recursive(HASH_OF(entry), enc)) {
4989 valid = 0;
4990 break;
4991 }
4992 break;
4993 case IS_LONG:
4994 case IS_DOUBLE:
4995 case IS_NULL:
4996 case IS_TRUE:
4997 case IS_FALSE:
4998 break;
4999 default:
5000 /* Other types are error. */
5001 valid = 0;
5002 break;
5003 }
5004 } ZEND_HASH_FOREACH_END();
5005 vars->u.v.nApplyCount--;
5006 mbfl_buffer_converter_delete(convd);
5007 return valid;
5008 }
5009
5010
5011 /* {{{ proto bool mb_check_encoding([mixed var[, string encoding]])
5012 Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)5013 PHP_FUNCTION(mb_check_encoding)
5014 {
5015 zval *input = NULL;
5016 zend_string *enc = NULL;
5017
5018 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|zS", &input, &enc) == FAILURE) {
5019 return;
5020 }
5021
5022 /* FIXME: Actually check all inputs, except $_FILES file content. */
5023 if (input == NULL) {
5024 if (MBSTRG(illegalchars) == 0) {
5025 RETURN_TRUE;
5026 }
5027 RETURN_FALSE;
5028 }
5029
5030 if (Z_TYPE_P(input) == IS_ARRAY) {
5031 if (!php_mb_check_encoding_recursive(HASH_OF(input), enc)) {
5032 RETURN_FALSE;
5033 }
5034 } else {
5035 convert_to_string(input);
5036 if (!php_mb_check_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), enc ? ZSTR_VAL(enc): NULL)) {
5037 RETURN_FALSE;
5038 }
5039 }
5040 RETURN_TRUE;
5041 }
5042 /* }}} */
5043
5044
php_mb_ord(const char * str,size_t str_len,const char * enc)5045 static inline zend_long php_mb_ord(const char* str, size_t str_len, const char* enc)
5046 {
5047 enum mbfl_no_encoding no_enc;
5048 char* ret;
5049 size_t ret_len;
5050 zend_long cp;
5051
5052 if (enc == NULL) {
5053 no_enc = MBSTRG(current_internal_encoding)->no_encoding;
5054 } else {
5055 no_enc = mbfl_name2no_encoding(enc);
5056
5057 if (no_enc == mbfl_no_encoding_invalid) {
5058 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc);
5059 return -1;
5060 }
5061 }
5062
5063 if (php_mb_is_unsupported_no_encoding(no_enc)) {
5064 php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc);
5065 return -1;
5066 }
5067
5068 if (str_len == 0) {
5069 php_error_docref(NULL, E_WARNING, "Empty string");
5070 return -1;
5071 }
5072
5073 {
5074 long orig_illegalchars = MBSTRG(illegalchars);
5075 MBSTRG(illegalchars) = 0;
5076 ret = php_mb_convert_encoding(str, str_len, "UCS-4BE", enc, &ret_len);
5077 if (MBSTRG(illegalchars) != 0) {
5078 if (ret) {
5079 efree(ret);
5080 }
5081 MBSTRG(illegalchars) = orig_illegalchars;
5082 return -1;
5083 }
5084
5085 MBSTRG(illegalchars) = orig_illegalchars;
5086 }
5087
5088 if (ret == NULL) {
5089 return -1;
5090 }
5091
5092 cp = (unsigned char) ret[0] << 24 | \
5093 (unsigned char) ret[1] << 16 | \
5094 (unsigned char) ret[2] << 8 | \
5095 (unsigned char) ret[3];
5096
5097 efree(ret);
5098
5099 return cp;
5100 }
5101
5102
5103 /* {{{ proto int|false mb_ord([string str[, string encoding]]) */
PHP_FUNCTION(mb_ord)5104 PHP_FUNCTION(mb_ord)
5105 {
5106 char* str;
5107 size_t str_len;
5108 char* enc = NULL;
5109 size_t enc_len;
5110 zend_long cp;
5111
5112 ZEND_PARSE_PARAMETERS_START(1, 2)
5113 Z_PARAM_STRING(str, str_len)
5114 Z_PARAM_OPTIONAL
5115 Z_PARAM_STRING(enc, enc_len)
5116 ZEND_PARSE_PARAMETERS_END();
5117
5118 cp = php_mb_ord(str, str_len, enc);
5119
5120 if (0 > cp) {
5121 RETURN_FALSE;
5122 }
5123
5124 RETURN_LONG(cp);
5125 }
5126 /* }}} */
5127
5128
php_mb_chr(zend_long cp,const char * enc,size_t * output_len)5129 static inline char* php_mb_chr(zend_long cp, const char* enc, size_t *output_len)
5130 {
5131 enum mbfl_no_encoding no_enc;
5132 char* buf;
5133 size_t buf_len;
5134 char* ret;
5135 size_t ret_len;
5136
5137 if (enc == NULL) {
5138 no_enc = MBSTRG(current_internal_encoding)->no_encoding;
5139 } else {
5140 no_enc = mbfl_name2no_encoding(enc);
5141 if (no_enc == mbfl_no_encoding_invalid) {
5142 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc);
5143 return NULL;
5144 }
5145 }
5146
5147 if (php_mb_is_unsupported_no_encoding(no_enc)) {
5148 php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc);
5149 return NULL;
5150 }
5151
5152 if (cp < 0 || cp > 0x10ffff) {
5153 return NULL;
5154 }
5155
5156 if (php_mb_is_no_encoding_utf8(no_enc)) {
5157 if (cp > 0xd7ff && 0xe000 > cp) {
5158 return NULL;
5159 }
5160
5161 if (cp < 0x80) {
5162 ret_len = 1;
5163 ret = (char *) safe_emalloc(ret_len, 1, 1);
5164 ret[0] = cp;
5165 ret[1] = 0;
5166 } else if (cp < 0x800) {
5167 ret_len = 2;
5168 ret = (char *) safe_emalloc(ret_len, 1, 1);
5169 ret[0] = 0xc0 | (cp >> 6);
5170 ret[1] = 0x80 | (cp & 0x3f);
5171 ret[2] = 0;
5172 } else if (cp < 0x10000) {
5173 ret_len = 3;
5174 ret = (char *) safe_emalloc(ret_len, 1, 1);
5175 ret[0] = 0xe0 | (cp >> 12);
5176 ret[1] = 0x80 | ((cp >> 6) & 0x3f);
5177 ret[2] = 0x80 | (cp & 0x3f);
5178 ret[3] = 0;
5179 } else {
5180 ret_len = 4;
5181 ret = (char *) safe_emalloc(ret_len, 1, 1);
5182 ret[0] = 0xf0 | (cp >> 18);
5183 ret[1] = 0x80 | ((cp >> 12) & 0x3f);
5184 ret[2] = 0x80 | ((cp >> 6) & 0x3f);
5185 ret[3] = 0x80 | (cp & 0x3f);
5186 ret[4] = 0;
5187 }
5188
5189 if (output_len) {
5190 *output_len = ret_len;
5191 }
5192
5193 return ret;
5194 }
5195
5196 buf_len = 4;
5197 buf = (char *) safe_emalloc(buf_len, 1, 1);
5198 buf[0] = (cp >> 24) & 0xff;
5199 buf[1] = (cp >> 16) & 0xff;
5200 buf[2] = (cp >> 8) & 0xff;
5201 buf[3] = cp & 0xff;
5202 buf[4] = 0;
5203
5204 {
5205 long orig_illegalchars = MBSTRG(illegalchars);
5206 MBSTRG(illegalchars) = 0;
5207 ret = php_mb_convert_encoding(buf, buf_len, enc, "UCS-4BE", &ret_len);
5208 if (MBSTRG(illegalchars) != 0) {
5209 efree(buf);
5210 efree(ret);
5211 MBSTRG(illegalchars) = orig_illegalchars;
5212 return NULL;
5213 }
5214
5215 MBSTRG(illegalchars) = orig_illegalchars;
5216 }
5217
5218 efree(buf);
5219 if (output_len) {
5220 *output_len = ret_len;
5221 }
5222
5223 return ret;
5224 }
5225
5226
5227 /* {{{ proto string|false mb_chr([int cp[, string encoding]]) */
PHP_FUNCTION(mb_chr)5228 PHP_FUNCTION(mb_chr)
5229 {
5230 zend_long cp;
5231 char* enc = NULL;
5232 size_t enc_len;
5233 char* ret;
5234 size_t ret_len;
5235
5236 ZEND_PARSE_PARAMETERS_START(1, 2)
5237 Z_PARAM_LONG(cp)
5238 Z_PARAM_OPTIONAL
5239 Z_PARAM_STRING(enc, enc_len)
5240 ZEND_PARSE_PARAMETERS_END();
5241
5242 ret = php_mb_chr(cp, enc, &ret_len);
5243
5244 if (ret == NULL) {
5245 RETURN_FALSE;
5246 }
5247
5248 RETVAL_STRING(ret);
5249 efree(ret);
5250 }
5251 /* }}} */
5252
5253
php_mb_scrub(const char * str,size_t str_len,const char * enc,size_t * ret_len)5254 static inline char* php_mb_scrub(const char* str, size_t str_len, const char* enc, size_t *ret_len)
5255 {
5256 return php_mb_convert_encoding(str, str_len, enc, enc, ret_len);
5257 }
5258
5259
5260 /* {{{ proto string|false mb_scrub([string str[, string encoding]]) */
PHP_FUNCTION(mb_scrub)5261 PHP_FUNCTION(mb_scrub)
5262 {
5263 char* str;
5264 size_t str_len;
5265 char *enc = NULL;
5266 size_t enc_len;
5267 char *ret;
5268 size_t ret_len;
5269
5270 ZEND_PARSE_PARAMETERS_START(1, 2)
5271 Z_PARAM_STRING(str, str_len)
5272 Z_PARAM_OPTIONAL
5273 Z_PARAM_STRING(enc, enc_len)
5274 ZEND_PARSE_PARAMETERS_END();
5275
5276 if (enc == NULL) {
5277 enc = (char *) MBSTRG(current_internal_encoding)->name;
5278 } else if (!mbfl_is_support_encoding(enc)) {
5279 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc);
5280 RETURN_FALSE;
5281 }
5282
5283 ret = php_mb_scrub(str, str_len, enc, &ret_len);
5284
5285 if (ret == NULL) {
5286 RETURN_FALSE;
5287 }
5288
5289 RETVAL_STRINGL(ret, ret_len);
5290 efree(ret);
5291 }
5292 /* }}} */
5293
5294
5295 /* {{{ php_mb_populate_current_detect_order_list */
php_mb_populate_current_detect_order_list(void)5296 static void php_mb_populate_current_detect_order_list(void)
5297 {
5298 const mbfl_encoding **entry = 0;
5299 size_t nentries;
5300
5301 if (MBSTRG(current_detect_order_list)) {
5302 return;
5303 }
5304
5305 if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
5306 nentries = MBSTRG(detect_order_list_size);
5307 entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
5308 memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
5309 } else {
5310 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
5311 size_t i;
5312 nentries = MBSTRG(default_detect_order_list_size);
5313 entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
5314 for (i = 0; i < nentries; i++) {
5315 entry[i] = mbfl_no2encoding(src[i]);
5316 }
5317 }
5318 MBSTRG(current_detect_order_list) = entry;
5319 MBSTRG(current_detect_order_list_size) = nentries;
5320 }
5321 /* }}} */
5322
5323 /* {{{ static int php_mb_encoding_translation() */
php_mb_encoding_translation(void)5324 static int php_mb_encoding_translation(void)
5325 {
5326 return MBSTRG(encoding_translation);
5327 }
5328 /* }}} */
5329
5330 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
php_mb_mbchar_bytes_ex(const char * s,const mbfl_encoding * enc)5331 MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
5332 {
5333 if (enc != NULL) {
5334 if (enc->flag & MBFL_ENCTYPE_MBCS) {
5335 if (enc->mblen_table != NULL) {
5336 if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
5337 }
5338 } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
5339 return 2;
5340 } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
5341 return 4;
5342 }
5343 }
5344 return 1;
5345 }
5346 /* }}} */
5347
5348 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
php_mb_mbchar_bytes(const char * s)5349 MBSTRING_API size_t php_mb_mbchar_bytes(const char *s)
5350 {
5351 return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
5352 }
5353 /* }}} */
5354
5355 /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
php_mb_safe_strrchr_ex(const char * s,unsigned int c,size_t nbytes,const mbfl_encoding * enc)5356 MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
5357 {
5358 register const char *p = s;
5359 char *last=NULL;
5360
5361 if (nbytes == (size_t)-1) {
5362 size_t nb = 0;
5363
5364 while (*p != '\0') {
5365 if (nb == 0) {
5366 if ((unsigned char)*p == (unsigned char)c) {
5367 last = (char *)p;
5368 }
5369 nb = php_mb_mbchar_bytes_ex(p, enc);
5370 if (nb == 0) {
5371 return NULL; /* something is going wrong! */
5372 }
5373 }
5374 --nb;
5375 ++p;
5376 }
5377 } else {
5378 register size_t bcnt = nbytes;
5379 register size_t nbytes_char;
5380 while (bcnt > 0) {
5381 if ((unsigned char)*p == (unsigned char)c) {
5382 last = (char *)p;
5383 }
5384 nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
5385 if (bcnt < nbytes_char) {
5386 return NULL;
5387 }
5388 p += nbytes_char;
5389 bcnt -= nbytes_char;
5390 }
5391 }
5392 return last;
5393 }
5394 /* }}} */
5395
5396 /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
php_mb_safe_strrchr(const char * s,unsigned int c,size_t nbytes)5397 MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes)
5398 {
5399 return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
5400 }
5401 /* }}} */
5402
5403 /* {{{ MBSTRING_API int php_mb_stripos()
5404 */
php_mb_stripos(int mode,const char * old_haystack,unsigned int old_haystack_len,const char * old_needle,unsigned int old_needle_len,long offset,const char * from_encoding)5405 MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding)
5406 {
5407 int n;
5408 mbfl_string haystack, needle;
5409 n = -1;
5410
5411 mbfl_string_init(&haystack);
5412 mbfl_string_init(&needle);
5413 haystack.no_language = MBSTRG(language);
5414 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
5415 needle.no_language = MBSTRG(language);
5416 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
5417
5418 do {
5419 size_t len = 0;
5420 haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding);
5421 haystack.len = len;
5422
5423 if (!haystack.val) {
5424 break;
5425 }
5426
5427 if (haystack.len <= 0) {
5428 break;
5429 }
5430
5431 needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding);
5432 needle.len = len;
5433
5434 if (!needle.val) {
5435 break;
5436 }
5437
5438 if (needle.len <= 0) {
5439 break;
5440 }
5441
5442 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
5443 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
5444 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
5445 break;
5446 }
5447
5448 {
5449 int haystack_char_len = mbfl_strlen(&haystack);
5450
5451 if (mode) {
5452 if ((offset > 0 && offset > haystack_char_len) ||
5453 (offset < 0 && -offset > haystack_char_len)) {
5454 php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
5455 break;
5456 }
5457 } else {
5458 if (offset < 0) {
5459 offset += (long)haystack_char_len;
5460 }
5461 if (offset < 0 || offset > haystack_char_len) {
5462 php_error_docref(NULL, E_WARNING, "Offset not contained in string");
5463 break;
5464 }
5465 }
5466 }
5467
5468 n = mbfl_strpos(&haystack, &needle, offset, mode);
5469 } while(0);
5470
5471 if (haystack.val) {
5472 efree(haystack.val);
5473 }
5474
5475 if (needle.val) {
5476 efree(needle.val);
5477 }
5478
5479 return n;
5480 }
5481 /* }}} */
5482
php_mb_gpc_get_detect_order(const zend_encoding *** list,size_t * list_size)5483 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size) /* {{{ */
5484 {
5485 *list = (const zend_encoding **)MBSTRG(http_input_list);
5486 *list_size = MBSTRG(http_input_list_size);
5487 }
5488 /* }}} */
5489
php_mb_gpc_set_input_encoding(const zend_encoding * encoding)5490 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding) /* {{{ */
5491 {
5492 MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
5493 }
5494 /* }}} */
5495
5496 #endif /* HAVE_MBSTRING */
5497
5498 /*
5499 * Local variables:
5500 * tab-width: 4
5501 * c-basic-offset: 4
5502 * End:
5503 * vim600: fdm=marker
5504 * vim: noet sw=4 ts=4
5505 */
5506