1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2018 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
16 | Rui Hirokawa <hirokawa@php.net> |
17 | Hironori Sato <satoh@jpnnet.com> |
18 | Shigeru Kanemoto <sgk@happysize.co.jp> |
19 +----------------------------------------------------------------------+
20 */
21
22 /* {{{ includes */
23 #ifdef HAVE_CONFIG_H
24 #include "config.h"
25 #endif
26
27 #include "php.h"
28 #include "php_ini.h"
29 #include "php_variables.h"
30 #include "mbstring.h"
31 #include "ext/standard/php_string.h"
32 #include "ext/standard/php_mail.h"
33 #include "ext/standard/exec.h"
34 #include "ext/standard/url.h"
35 #include "main/php_output.h"
36 #include "ext/standard/info.h"
37
38 #include "libmbfl/mbfl/mbfl_allocators.h"
39 #include "libmbfl/mbfl/mbfilter_8bit.h"
40 #include "libmbfl/mbfl/mbfilter_pass.h"
41 #include "libmbfl/mbfl/mbfilter_wchar.h"
42 #include "libmbfl/filters/mbfilter_ascii.h"
43 #include "libmbfl/filters/mbfilter_base64.h"
44 #include "libmbfl/filters/mbfilter_qprint.h"
45 #include "libmbfl/filters/mbfilter_ucs4.h"
46 #include "libmbfl/filters/mbfilter_utf8.h"
47
48 #include "php_variables.h"
49 #include "php_globals.h"
50 #include "rfc1867.h"
51 #include "php_content_types.h"
52 #include "SAPI.h"
53 #include "php_unicode.h"
54 #include "TSRM.h"
55
56 #include "mb_gpc.h"
57
58 #if HAVE_MBREGEX
59 #include "php_mbregex.h"
60 #endif
61
62 #include "zend_multibyte.h"
63
64 #if HAVE_ONIG
65 #include "php_onig_compat.h"
66 #include <oniguruma.h>
67 #undef UChar
68 #if ONIGURUMA_VERSION_INT < 60800
69 typedef void OnigMatchParam;
70 #define onig_new_match_param() (NULL)
71 #define onig_initialize_match_param(x) (void)(x)
72 #define onig_set_match_stack_limit_size_of_match_param(x, y)
73 #define onig_free_match_param(x)
74 #define onig_search_with_param(reg, str, end, start, range, region, option, mp) \
75 onig_search(reg, str, end, start, range, region, option)
76 #define onig_match_with_param(re, str, end, at, region, option, mp) \
77 onig_match(re, str, end, at, region, option)
78 #endif
79 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
80 #include "ext/pcre/php_pcre.h"
81 #endif
82 /* }}} */
83
84 #if HAVE_MBSTRING
85
86 /* {{{ prototypes */
87 ZEND_DECLARE_MODULE_GLOBALS(mbstring)
88
89 static PHP_GINIT_FUNCTION(mbstring);
90 static PHP_GSHUTDOWN_FUNCTION(mbstring);
91
92 static void php_mb_populate_current_detect_order_list(void);
93
94 static int php_mb_encoding_translation(void);
95
96 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size);
97
98 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
99
100 static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
101
102 static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
103 /* }}} */
104
105 /* {{{ php_mb_default_identify_list */
106 typedef struct _php_mb_nls_ident_list {
107 enum mbfl_no_language lang;
108 const enum mbfl_no_encoding *list;
109 size_t list_size;
110 } php_mb_nls_ident_list;
111
112 static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
113 mbfl_no_encoding_ascii,
114 mbfl_no_encoding_jis,
115 mbfl_no_encoding_utf8,
116 mbfl_no_encoding_euc_jp,
117 mbfl_no_encoding_sjis
118 };
119
120 static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
121 mbfl_no_encoding_ascii,
122 mbfl_no_encoding_utf8,
123 mbfl_no_encoding_euc_cn,
124 mbfl_no_encoding_cp936
125 };
126
127 static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
128 mbfl_no_encoding_ascii,
129 mbfl_no_encoding_utf8,
130 mbfl_no_encoding_euc_tw,
131 mbfl_no_encoding_big5
132 };
133
134 static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
135 mbfl_no_encoding_ascii,
136 mbfl_no_encoding_utf8,
137 mbfl_no_encoding_euc_kr,
138 mbfl_no_encoding_uhc
139 };
140
141 static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
142 mbfl_no_encoding_ascii,
143 mbfl_no_encoding_utf8,
144 mbfl_no_encoding_koi8r,
145 mbfl_no_encoding_cp1251,
146 mbfl_no_encoding_cp866
147 };
148
149 static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
150 mbfl_no_encoding_ascii,
151 mbfl_no_encoding_utf8,
152 mbfl_no_encoding_armscii8
153 };
154
155 static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
156 mbfl_no_encoding_ascii,
157 mbfl_no_encoding_utf8,
158 mbfl_no_encoding_cp1254,
159 mbfl_no_encoding_8859_9
160 };
161
162 static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
163 mbfl_no_encoding_ascii,
164 mbfl_no_encoding_utf8,
165 mbfl_no_encoding_koi8u
166 };
167
168 static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
169 mbfl_no_encoding_ascii,
170 mbfl_no_encoding_utf8
171 };
172
173
174 static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
175 { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
176 { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
177 { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
178 { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
179 { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
180 { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
181 { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
182 { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
183 { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
184 };
185
186 /* }}} */
187
188 /* {{{ mb_overload_def mb_ovld[] */
189 static const struct mb_overload_def mb_ovld[] = {
190 {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
191 {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
192 {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
193 {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
194 {MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
195 {MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
196 {MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
197 {MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
198 {MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
199 {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
200 {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
201 {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
202 {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
203 {0, NULL, NULL, NULL}
204 };
205 /* }}} */
206
207 /* {{{ arginfo */
208 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
209 ZEND_ARG_INFO(0, language)
210 ZEND_END_ARG_INFO()
211
212 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
213 ZEND_ARG_INFO(0, encoding)
214 ZEND_END_ARG_INFO()
215
216 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
217 ZEND_ARG_INFO(0, type)
218 ZEND_END_ARG_INFO()
219
220 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
221 ZEND_ARG_INFO(0, encoding)
222 ZEND_END_ARG_INFO()
223
224 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
225 ZEND_ARG_INFO(0, encoding)
226 ZEND_END_ARG_INFO()
227
228 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
229 ZEND_ARG_INFO(0, substchar)
230 ZEND_END_ARG_INFO()
231
232 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
233 ZEND_ARG_INFO(0, encoding)
234 ZEND_END_ARG_INFO()
235
236 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
237 ZEND_ARG_INFO(0, encoded_string)
238 ZEND_ARG_INFO(1, result)
239 ZEND_END_ARG_INFO()
240
241 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
242 ZEND_ARG_INFO(0, contents)
243 ZEND_ARG_INFO(0, status)
244 ZEND_END_ARG_INFO()
245
246 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
247 ZEND_ARG_INFO(0, str)
248 ZEND_ARG_INFO(0, encoding)
249 ZEND_END_ARG_INFO()
250
251 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
252 ZEND_ARG_INFO(0, haystack)
253 ZEND_ARG_INFO(0, needle)
254 ZEND_ARG_INFO(0, offset)
255 ZEND_ARG_INFO(0, encoding)
256 ZEND_END_ARG_INFO()
257
258 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
259 ZEND_ARG_INFO(0, haystack)
260 ZEND_ARG_INFO(0, needle)
261 ZEND_ARG_INFO(0, offset)
262 ZEND_ARG_INFO(0, encoding)
263 ZEND_END_ARG_INFO()
264
265 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
266 ZEND_ARG_INFO(0, haystack)
267 ZEND_ARG_INFO(0, needle)
268 ZEND_ARG_INFO(0, offset)
269 ZEND_ARG_INFO(0, encoding)
270 ZEND_END_ARG_INFO()
271
272 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
273 ZEND_ARG_INFO(0, haystack)
274 ZEND_ARG_INFO(0, needle)
275 ZEND_ARG_INFO(0, offset)
276 ZEND_ARG_INFO(0, encoding)
277 ZEND_END_ARG_INFO()
278
279 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
280 ZEND_ARG_INFO(0, haystack)
281 ZEND_ARG_INFO(0, needle)
282 ZEND_ARG_INFO(0, part)
283 ZEND_ARG_INFO(0, encoding)
284 ZEND_END_ARG_INFO()
285
286 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
287 ZEND_ARG_INFO(0, haystack)
288 ZEND_ARG_INFO(0, needle)
289 ZEND_ARG_INFO(0, part)
290 ZEND_ARG_INFO(0, encoding)
291 ZEND_END_ARG_INFO()
292
293 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
294 ZEND_ARG_INFO(0, haystack)
295 ZEND_ARG_INFO(0, needle)
296 ZEND_ARG_INFO(0, part)
297 ZEND_ARG_INFO(0, encoding)
298 ZEND_END_ARG_INFO()
299
300 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
301 ZEND_ARG_INFO(0, haystack)
302 ZEND_ARG_INFO(0, needle)
303 ZEND_ARG_INFO(0, part)
304 ZEND_ARG_INFO(0, encoding)
305 ZEND_END_ARG_INFO()
306
307 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
308 ZEND_ARG_INFO(0, haystack)
309 ZEND_ARG_INFO(0, needle)
310 ZEND_ARG_INFO(0, encoding)
311 ZEND_END_ARG_INFO()
312
313 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
314 ZEND_ARG_INFO(0, str)
315 ZEND_ARG_INFO(0, start)
316 ZEND_ARG_INFO(0, length)
317 ZEND_ARG_INFO(0, encoding)
318 ZEND_END_ARG_INFO()
319
320 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
321 ZEND_ARG_INFO(0, str)
322 ZEND_ARG_INFO(0, start)
323 ZEND_ARG_INFO(0, length)
324 ZEND_ARG_INFO(0, encoding)
325 ZEND_END_ARG_INFO()
326
327 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
328 ZEND_ARG_INFO(0, str)
329 ZEND_ARG_INFO(0, encoding)
330 ZEND_END_ARG_INFO()
331
332 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
333 ZEND_ARG_INFO(0, str)
334 ZEND_ARG_INFO(0, start)
335 ZEND_ARG_INFO(0, width)
336 ZEND_ARG_INFO(0, trimmarker)
337 ZEND_ARG_INFO(0, encoding)
338 ZEND_END_ARG_INFO()
339
340 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
341 ZEND_ARG_INFO(0, str)
342 ZEND_ARG_INFO(0, to)
343 ZEND_ARG_INFO(0, from)
344 ZEND_END_ARG_INFO()
345
346 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
347 ZEND_ARG_INFO(0, sourcestring)
348 ZEND_ARG_INFO(0, mode)
349 ZEND_ARG_INFO(0, encoding)
350 ZEND_END_ARG_INFO()
351
352 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
353 ZEND_ARG_INFO(0, sourcestring)
354 ZEND_ARG_INFO(0, encoding)
355 ZEND_END_ARG_INFO()
356
357 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
358 ZEND_ARG_INFO(0, sourcestring)
359 ZEND_ARG_INFO(0, encoding)
360 ZEND_END_ARG_INFO()
361
362 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
363 ZEND_ARG_INFO(0, str)
364 ZEND_ARG_INFO(0, encoding_list)
365 ZEND_ARG_INFO(0, strict)
366 ZEND_END_ARG_INFO()
367
368 ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
369 ZEND_END_ARG_INFO()
370
371 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
372 ZEND_ARG_INFO(0, encoding)
373 ZEND_END_ARG_INFO()
374
375 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
376 ZEND_ARG_INFO(0, str)
377 ZEND_ARG_INFO(0, charset)
378 ZEND_ARG_INFO(0, transfer)
379 ZEND_ARG_INFO(0, linefeed)
380 ZEND_ARG_INFO(0, indent)
381 ZEND_END_ARG_INFO()
382
383 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
384 ZEND_ARG_INFO(0, string)
385 ZEND_END_ARG_INFO()
386
387 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
388 ZEND_ARG_INFO(0, str)
389 ZEND_ARG_INFO(0, option)
390 ZEND_ARG_INFO(0, encoding)
391 ZEND_END_ARG_INFO()
392
393 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 0, 0, 3)
394 ZEND_ARG_INFO(0, to)
395 ZEND_ARG_INFO(0, from)
396 ZEND_ARG_VARIADIC_INFO(1, vars)
397 ZEND_END_ARG_INFO()
398
399 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
400 ZEND_ARG_INFO(0, string)
401 ZEND_ARG_INFO(0, convmap)
402 ZEND_ARG_INFO(0, encoding)
403 ZEND_ARG_INFO(0, is_hex)
404 ZEND_END_ARG_INFO()
405
406 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
407 ZEND_ARG_INFO(0, string)
408 ZEND_ARG_INFO(0, convmap)
409 ZEND_ARG_INFO(0, encoding)
410 ZEND_ARG_INFO(0, is_hex)
411 ZEND_END_ARG_INFO()
412
413 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
414 ZEND_ARG_INFO(0, to)
415 ZEND_ARG_INFO(0, subject)
416 ZEND_ARG_INFO(0, message)
417 ZEND_ARG_INFO(0, additional_headers)
418 ZEND_ARG_INFO(0, additional_parameters)
419 ZEND_END_ARG_INFO()
420
421 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
422 ZEND_ARG_INFO(0, type)
423 ZEND_END_ARG_INFO()
424
425 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
426 ZEND_ARG_INFO(0, var)
427 ZEND_ARG_INFO(0, encoding)
428 ZEND_END_ARG_INFO()
429
430 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_scrub, 0, 0, 1)
431 ZEND_ARG_INFO(0, str)
432 ZEND_ARG_INFO(0, encoding)
433 ZEND_END_ARG_INFO()
434
435 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ord, 0, 0, 1)
436 ZEND_ARG_INFO(0, str)
437 ZEND_ARG_INFO(0, encoding)
438 ZEND_END_ARG_INFO()
439
440 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_chr, 0, 0, 1)
441 ZEND_ARG_INFO(0, cp)
442 ZEND_ARG_INFO(0, encoding)
443 ZEND_END_ARG_INFO()
444
445 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
446 ZEND_ARG_INFO(0, encoding)
447 ZEND_END_ARG_INFO()
448
449 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
450 ZEND_ARG_INFO(0, pattern)
451 ZEND_ARG_INFO(0, string)
452 ZEND_ARG_INFO(1, registers)
453 ZEND_END_ARG_INFO()
454
455 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
456 ZEND_ARG_INFO(0, pattern)
457 ZEND_ARG_INFO(0, string)
458 ZEND_ARG_INFO(1, registers)
459 ZEND_END_ARG_INFO()
460
461 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
462 ZEND_ARG_INFO(0, pattern)
463 ZEND_ARG_INFO(0, replacement)
464 ZEND_ARG_INFO(0, string)
465 ZEND_ARG_INFO(0, option)
466 ZEND_END_ARG_INFO()
467
468 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
469 ZEND_ARG_INFO(0, pattern)
470 ZEND_ARG_INFO(0, replacement)
471 ZEND_ARG_INFO(0, string)
472 ZEND_ARG_INFO(0, option)
473 ZEND_END_ARG_INFO()
474
475 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3)
476 ZEND_ARG_INFO(0, pattern)
477 ZEND_ARG_INFO(0, callback)
478 ZEND_ARG_INFO(0, string)
479 ZEND_ARG_INFO(0, option)
480 ZEND_END_ARG_INFO()
481
482 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
483 ZEND_ARG_INFO(0, pattern)
484 ZEND_ARG_INFO(0, string)
485 ZEND_ARG_INFO(0, limit)
486 ZEND_END_ARG_INFO()
487
488 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
489 ZEND_ARG_INFO(0, pattern)
490 ZEND_ARG_INFO(0, string)
491 ZEND_ARG_INFO(0, option)
492 ZEND_END_ARG_INFO()
493
494 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
495 ZEND_ARG_INFO(0, pattern)
496 ZEND_ARG_INFO(0, option)
497 ZEND_END_ARG_INFO()
498
499 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
500 ZEND_ARG_INFO(0, pattern)
501 ZEND_ARG_INFO(0, option)
502 ZEND_END_ARG_INFO()
503
504 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
505 ZEND_ARG_INFO(0, pattern)
506 ZEND_ARG_INFO(0, option)
507 ZEND_END_ARG_INFO()
508
509 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
510 ZEND_ARG_INFO(0, string)
511 ZEND_ARG_INFO(0, pattern)
512 ZEND_ARG_INFO(0, option)
513 ZEND_END_ARG_INFO()
514
515 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
516 ZEND_END_ARG_INFO()
517
518 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
519 ZEND_END_ARG_INFO()
520
521 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
522 ZEND_ARG_INFO(0, position)
523 ZEND_END_ARG_INFO()
524
525 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
526 ZEND_ARG_INFO(0, options)
527 ZEND_END_ARG_INFO()
528 /* }}} */
529
530 /* {{{ zend_function_entry mbstring_functions[] */
531 static const zend_function_entry mbstring_functions[] = {
532 PHP_FE(mb_convert_case, arginfo_mb_convert_case)
533 PHP_FE(mb_strtoupper, arginfo_mb_strtoupper)
534 PHP_FE(mb_strtolower, arginfo_mb_strtolower)
535 PHP_FE(mb_language, arginfo_mb_language)
536 PHP_FE(mb_internal_encoding, arginfo_mb_internal_encoding)
537 PHP_FE(mb_http_input, arginfo_mb_http_input)
538 PHP_FE(mb_http_output, arginfo_mb_http_output)
539 PHP_FE(mb_detect_order, arginfo_mb_detect_order)
540 PHP_FE(mb_substitute_character, arginfo_mb_substitute_character)
541 PHP_FE(mb_parse_str, arginfo_mb_parse_str)
542 PHP_FE(mb_output_handler, arginfo_mb_output_handler)
543 PHP_FE(mb_preferred_mime_name, arginfo_mb_preferred_mime_name)
544 PHP_FE(mb_strlen, arginfo_mb_strlen)
545 PHP_FE(mb_strpos, arginfo_mb_strpos)
546 PHP_FE(mb_strrpos, arginfo_mb_strrpos)
547 PHP_FE(mb_stripos, arginfo_mb_stripos)
548 PHP_FE(mb_strripos, arginfo_mb_strripos)
549 PHP_FE(mb_strstr, arginfo_mb_strstr)
550 PHP_FE(mb_strrchr, arginfo_mb_strrchr)
551 PHP_FE(mb_stristr, arginfo_mb_stristr)
552 PHP_FE(mb_strrichr, arginfo_mb_strrichr)
553 PHP_FE(mb_substr_count, arginfo_mb_substr_count)
554 PHP_FE(mb_substr, arginfo_mb_substr)
555 PHP_FE(mb_strcut, arginfo_mb_strcut)
556 PHP_FE(mb_strwidth, arginfo_mb_strwidth)
557 PHP_FE(mb_strimwidth, arginfo_mb_strimwidth)
558 PHP_FE(mb_convert_encoding, arginfo_mb_convert_encoding)
559 PHP_FE(mb_detect_encoding, arginfo_mb_detect_encoding)
560 PHP_FE(mb_list_encodings, arginfo_mb_list_encodings)
561 PHP_FE(mb_encoding_aliases, arginfo_mb_encoding_aliases)
562 PHP_FE(mb_convert_kana, arginfo_mb_convert_kana)
563 PHP_FE(mb_encode_mimeheader, arginfo_mb_encode_mimeheader)
564 PHP_FE(mb_decode_mimeheader, arginfo_mb_decode_mimeheader)
565 PHP_FE(mb_convert_variables, arginfo_mb_convert_variables)
566 PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity)
567 PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity)
568 PHP_FE(mb_send_mail, arginfo_mb_send_mail)
569 PHP_FE(mb_get_info, arginfo_mb_get_info)
570 PHP_FE(mb_check_encoding, arginfo_mb_check_encoding)
571 PHP_FE(mb_ord, arginfo_mb_ord)
572 PHP_FE(mb_chr, arginfo_mb_chr)
573 PHP_FE(mb_scrub, arginfo_mb_scrub)
574 #if HAVE_MBREGEX
575 PHP_MBREGEX_FUNCTION_ENTRIES
576 #endif
577 PHP_FE_END
578 };
579 /* }}} */
580
581 /* {{{ zend_module_entry mbstring_module_entry */
582 zend_module_entry mbstring_module_entry = {
583 STANDARD_MODULE_HEADER,
584 "mbstring",
585 mbstring_functions,
586 PHP_MINIT(mbstring),
587 PHP_MSHUTDOWN(mbstring),
588 PHP_RINIT(mbstring),
589 PHP_RSHUTDOWN(mbstring),
590 PHP_MINFO(mbstring),
591 PHP_MBSTRING_VERSION,
592 PHP_MODULE_GLOBALS(mbstring),
593 PHP_GINIT(mbstring),
594 PHP_GSHUTDOWN(mbstring),
595 NULL,
596 STANDARD_MODULE_PROPERTIES_EX
597 };
598 /* }}} */
599
600 /* {{{ static sapi_post_entry php_post_entries[] */
601 static const sapi_post_entry php_post_entries[] = {
602 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
603 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
604 { NULL, 0, NULL, NULL }
605 };
606 /* }}} */
607
608 #ifdef COMPILE_DL_MBSTRING
609 #ifdef ZTS
610 ZEND_TSRMLS_CACHE_DEFINE()
611 #endif
ZEND_GET_MODULE(mbstring)612 ZEND_GET_MODULE(mbstring)
613 #endif
614
615 static char *get_internal_encoding(void) {
616 if (PG(internal_encoding) && PG(internal_encoding)[0]) {
617 return PG(internal_encoding);
618 } else if (SG(default_charset)) {
619 return SG(default_charset);
620 }
621 return "";
622 }
623
get_input_encoding(void)624 static char *get_input_encoding(void) {
625 if (PG(input_encoding) && PG(input_encoding)[0]) {
626 return PG(input_encoding);
627 } else if (SG(default_charset)) {
628 return SG(default_charset);
629 }
630 return "";
631 }
632
get_output_encoding(void)633 static char *get_output_encoding(void) {
634 if (PG(output_encoding) && PG(output_encoding)[0]) {
635 return PG(output_encoding);
636 } else if (SG(default_charset)) {
637 return SG(default_charset);
638 }
639 return "";
640 }
641
642
643 /* {{{ allocators */
_php_mb_allocators_malloc(size_t sz)644 static void *_php_mb_allocators_malloc(size_t sz)
645 {
646 return emalloc(sz);
647 }
648
_php_mb_allocators_realloc(void * ptr,size_t sz)649 static void *_php_mb_allocators_realloc(void *ptr, size_t sz)
650 {
651 return erealloc(ptr, sz);
652 }
653
_php_mb_allocators_calloc(size_t nelems,size_t szelem)654 static void *_php_mb_allocators_calloc(size_t nelems, size_t szelem)
655 {
656 return ecalloc(nelems, szelem);
657 }
658
_php_mb_allocators_free(void * ptr)659 static void _php_mb_allocators_free(void *ptr)
660 {
661 efree(ptr);
662 }
663
_php_mb_allocators_pmalloc(size_t sz)664 static void *_php_mb_allocators_pmalloc(size_t sz)
665 {
666 return pemalloc(sz, 1);
667 }
668
_php_mb_allocators_prealloc(void * ptr,size_t sz)669 static void *_php_mb_allocators_prealloc(void *ptr, size_t sz)
670 {
671 return perealloc(ptr, sz, 1);
672 }
673
_php_mb_allocators_pfree(void * ptr)674 static void _php_mb_allocators_pfree(void *ptr)
675 {
676 pefree(ptr, 1);
677 }
678
679 static const mbfl_allocators _php_mb_allocators = {
680 _php_mb_allocators_malloc,
681 _php_mb_allocators_realloc,
682 _php_mb_allocators_calloc,
683 _php_mb_allocators_free,
684 _php_mb_allocators_pmalloc,
685 _php_mb_allocators_prealloc,
686 _php_mb_allocators_pfree
687 };
688 /* }}} */
689
690 /* {{{ static sapi_post_entry mbstr_post_entries[] */
691 static const sapi_post_entry mbstr_post_entries[] = {
692 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
693 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
694 { NULL, 0, NULL, NULL }
695 };
696 /* }}} */
697
php_mb_get_encoding(const char * encoding_name)698 static const mbfl_encoding *php_mb_get_encoding(const char *encoding_name) {
699 if (encoding_name) {
700 const mbfl_encoding *encoding;
701 if (MBSTRG(last_used_encoding_name)
702 && !strcasecmp(encoding_name, MBSTRG(last_used_encoding_name))) {
703 return MBSTRG(last_used_encoding);
704 }
705
706 encoding = mbfl_name2encoding(encoding_name);
707 if (!encoding) {
708 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding_name);
709 return NULL;
710 }
711
712 if (MBSTRG(last_used_encoding_name)) {
713 efree(MBSTRG(last_used_encoding_name));
714 }
715 MBSTRG(last_used_encoding_name) = estrdup(encoding_name);
716 MBSTRG(last_used_encoding) = encoding;
717 return encoding;
718 } else {
719 return MBSTRG(current_internal_encoding);
720 }
721 }
722
723 /* {{{ static int php_mb_parse_encoding_list()
724 * Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
725 * Even if any illegal encoding is detected the result may contain a list
726 * of parsed encodings.
727 */
728 static int
php_mb_parse_encoding_list(const char * value,size_t value_length,const mbfl_encoding *** return_list,size_t * return_size,int persistent)729 php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
730 {
731 int bauto, ret = SUCCESS;
732 size_t n, size;
733 char *p, *p1, *p2, *endp, *tmpstr;
734 const mbfl_encoding **entry, **list;
735
736 list = NULL;
737 if (value == NULL || value_length == 0) {
738 if (return_list) {
739 *return_list = NULL;
740 }
741 if (return_size) {
742 *return_size = 0;
743 }
744 return FAILURE;
745 } else {
746 /* copy the value string for work */
747 if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
748 tmpstr = (char *)estrndup(value+1, value_length-2);
749 value_length -= 2;
750 }
751 else
752 tmpstr = (char *)estrndup(value, value_length);
753 /* count the number of listed encoding names */
754 endp = tmpstr + value_length;
755 n = 1;
756 p1 = tmpstr;
757 while ((p2 = (char*)php_memnstr(p1, ",", 1, endp)) != NULL) {
758 p1 = p2 + 1;
759 n++;
760 }
761 size = n + MBSTRG(default_detect_order_list_size);
762 /* make list */
763 list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
764 entry = list;
765 n = 0;
766 bauto = 0;
767 p1 = tmpstr;
768 do {
769 p2 = p = (char*)php_memnstr(p1, ",", 1, endp);
770 if (p == NULL) {
771 p = endp;
772 }
773 *p = '\0';
774 /* trim spaces */
775 while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
776 p1++;
777 }
778 p--;
779 while (p > p1 && (*p == ' ' || *p == '\t')) {
780 *p = '\0';
781 p--;
782 }
783 /* convert to the encoding number and check encoding */
784 if (strcasecmp(p1, "auto") == 0) {
785 if (!bauto) {
786 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
787 const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
788 size_t i;
789 bauto = 1;
790 for (i = 0; i < identify_list_size; i++) {
791 *entry++ = mbfl_no2encoding(*src++);
792 n++;
793 }
794 }
795 } else {
796 const mbfl_encoding *encoding = mbfl_name2encoding(p1);
797 if (encoding) {
798 *entry++ = encoding;
799 n++;
800 } else {
801 ret = FAILURE;
802 }
803 }
804 p1 = p2 + 1;
805 } while (n < size && p2 != NULL);
806 if (n > 0) {
807 if (return_list) {
808 *return_list = list;
809 } else {
810 pefree(list, persistent);
811 }
812 } else {
813 pefree(list, persistent);
814 if (return_list) {
815 *return_list = NULL;
816 }
817 ret = FAILURE;
818 }
819 if (return_size) {
820 *return_size = n;
821 }
822 efree(tmpstr);
823 }
824
825 return ret;
826 }
827 /* }}} */
828
829 /* {{{ static int php_mb_parse_encoding_array()
830 * Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
831 * Even if any illegal encoding is detected the result may contain a list
832 * of parsed encodings.
833 */
834 static int
php_mb_parse_encoding_array(zval * array,const mbfl_encoding *** return_list,size_t * return_size,int persistent)835 php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
836 {
837 zval *hash_entry;
838 HashTable *target_hash;
839 int i, n, bauto, ret = SUCCESS;
840 const mbfl_encoding **list, **entry;
841 size_t size;
842
843 list = NULL;
844 if (Z_TYPE_P(array) == IS_ARRAY) {
845 target_hash = Z_ARRVAL_P(array);
846 i = zend_hash_num_elements(target_hash);
847 size = i + MBSTRG(default_detect_order_list_size);
848 list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
849 entry = list;
850 bauto = 0;
851 n = 0;
852 ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
853 convert_to_string_ex(hash_entry);
854 if (strcasecmp(Z_STRVAL_P(hash_entry), "auto") == 0) {
855 if (!bauto) {
856 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
857 const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
858 size_t j;
859
860 bauto = 1;
861 for (j = 0; j < identify_list_size; j++) {
862 *entry++ = mbfl_no2encoding(*src++);
863 n++;
864 }
865 }
866 } else {
867 const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_P(hash_entry));
868 if (encoding) {
869 *entry++ = encoding;
870 n++;
871 } else {
872 ret = FAILURE;
873 }
874 }
875 i--;
876 } ZEND_HASH_FOREACH_END();
877 if (n > 0) {
878 if (return_list) {
879 *return_list = list;
880 } else {
881 pefree(list, persistent);
882 }
883 } else {
884 pefree(list, persistent);
885 if (return_list) {
886 *return_list = NULL;
887 }
888 ret = FAILURE;
889 }
890 if (return_size) {
891 *return_size = n;
892 }
893 }
894
895 return ret;
896 }
897 /* }}} */
898
899 /* {{{ zend_multibyte interface */
php_mb_zend_encoding_fetcher(const char * encoding_name)900 static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name)
901 {
902 return (const zend_encoding*)mbfl_name2encoding(encoding_name);
903 }
904
php_mb_zend_encoding_name_getter(const zend_encoding * encoding)905 static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
906 {
907 return ((const mbfl_encoding *)encoding)->name;
908 }
909
php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding * _encoding)910 static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
911 {
912 const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
913 if (encoding->flag & MBFL_ENCTYPE_SBCS) {
914 return 1;
915 }
916 if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
917 return 1;
918 }
919 return 0;
920 }
921
php_mb_zend_encoding_detector(const unsigned char * arg_string,size_t arg_length,const zend_encoding ** list,size_t list_size)922 static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size)
923 {
924 mbfl_string string;
925
926 if (!list) {
927 list = (const zend_encoding **)MBSTRG(current_detect_order_list);
928 list_size = MBSTRG(current_detect_order_list_size);
929 }
930
931 mbfl_string_init(&string);
932 string.no_language = MBSTRG(language);
933 string.val = (unsigned char *)arg_string;
934 string.len = arg_length;
935 return (const zend_encoding *) mbfl_identify_encoding(&string, (const mbfl_encoding **)list, list_size, 0);
936 }
937
php_mb_zend_encoding_converter(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length,const zend_encoding * encoding_to,const zend_encoding * encoding_from)938 static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
939 {
940 mbfl_string string, result;
941 mbfl_buffer_converter *convd;
942 int status;
943 size_t loc;
944
945 /* new encoding */
946 /* initialize string */
947 mbfl_string_init(&string);
948 mbfl_string_init(&result);
949 string.encoding = (const mbfl_encoding*)encoding_from;
950 string.no_language = MBSTRG(language);
951 string.val = (unsigned char*)from;
952 string.len = from_length;
953
954 /* initialize converter */
955 convd = mbfl_buffer_converter_new((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
956 if (convd == NULL) {
957 return (size_t) -1;
958 }
959
960 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
961 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
962
963 /* do it */
964 status = mbfl_buffer_converter_feed2(convd, &string, &loc);
965 if (status) {
966 mbfl_buffer_converter_delete(convd);
967 return (size_t)-1;
968 }
969
970 mbfl_buffer_converter_flush(convd);
971 if (!mbfl_buffer_converter_result(convd, &result)) {
972 mbfl_buffer_converter_delete(convd);
973 return (size_t)-1;
974 }
975
976 *to = result.val;
977 *to_length = result.len;
978
979 mbfl_buffer_converter_delete(convd);
980
981 return loc;
982 }
983
php_mb_zend_encoding_list_parser(const char * encoding_list,size_t encoding_list_len,const zend_encoding *** return_list,size_t * return_size,int persistent)984 static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent)
985 {
986 return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent);
987 }
988
php_mb_zend_internal_encoding_getter(void)989 static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
990 {
991 return (const zend_encoding *)MBSTRG(internal_encoding);
992 }
993
php_mb_zend_internal_encoding_setter(const zend_encoding * encoding)994 static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding)
995 {
996 MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
997 return SUCCESS;
998 }
999
1000 static zend_multibyte_functions php_mb_zend_multibyte_functions = {
1001 "mbstring",
1002 php_mb_zend_encoding_fetcher,
1003 php_mb_zend_encoding_name_getter,
1004 php_mb_zend_encoding_lexer_compatibility_checker,
1005 php_mb_zend_encoding_detector,
1006 php_mb_zend_encoding_converter,
1007 php_mb_zend_encoding_list_parser,
1008 php_mb_zend_internal_encoding_getter,
1009 php_mb_zend_internal_encoding_setter
1010 };
1011 /* }}} */
1012
1013 static void *_php_mb_compile_regex(const char *pattern);
1014 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
1015 static void _php_mb_free_regex(void *opaque);
1016
1017 #if HAVE_ONIG
1018 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern)1019 static void *_php_mb_compile_regex(const char *pattern)
1020 {
1021 php_mb_regex_t *retval;
1022 OnigErrorInfo err_info;
1023 int err_code;
1024
1025 if ((err_code = onig_new(&retval,
1026 (const OnigUChar *)pattern,
1027 (const OnigUChar *)pattern + strlen(pattern),
1028 ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
1029 ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
1030 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1031 onig_error_code_to_str(err_str, err_code, err_info);
1032 php_error_docref(NULL, E_WARNING, "%s: %s", pattern, err_str);
1033 retval = NULL;
1034 }
1035 return retval;
1036 }
1037 /* }}} */
1038
1039 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1040 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1041 {
1042 OnigMatchParam *mp = onig_new_match_param();
1043 int err;
1044 onig_initialize_match_param(mp);
1045 if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_stack_limit))) {
1046 onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int)MBSTRG(regex_stack_limit));
1047 }
1048 /* search */
1049 err = onig_search_with_param((php_mb_regex_t *)opaque, (const OnigUChar *)str,
1050 (const OnigUChar*)str + str_len, (const OnigUChar *)str,
1051 (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE, mp);
1052 onig_free_match_param(mp);
1053 return err >= 0;
1054 }
1055 /* }}} */
1056
1057 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1058 static void _php_mb_free_regex(void *opaque)
1059 {
1060 onig_free((php_mb_regex_t *)opaque);
1061 }
1062 /* }}} */
1063 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
1064 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern)1065 static void *_php_mb_compile_regex(const char *pattern)
1066 {
1067 pcre2_code *retval;
1068 PCRE2_SIZE err_offset;
1069 int errnum;
1070
1071 if (!(retval = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED,
1072 PCRE2_CASELESS, &errnum, &err_offset, php_pcre_cctx()))) {
1073 PCRE2_UCHAR err_str[128];
1074 pcre2_get_error_message(errnum, err_str, sizeof(err_str));
1075 php_error_docref(NULL, E_WARNING, "%s (offset=%zu): %s", pattern, err_offset, err_str);
1076 }
1077 return retval;
1078 }
1079 /* }}} */
1080
1081 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1082 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1083 {
1084 int res;
1085
1086 pcre2_match_data *match_data = php_pcre_create_match_data(0, opaque);
1087 if (NULL == match_data) {
1088 pcre2_code_free(opaque);
1089 php_error_docref(NULL, E_WARNING, "Cannot allocate match data");
1090 return FAILURE;
1091 }
1092 res = pcre2_match(opaque, (PCRE2_SPTR)str, str_len, 0, 0, match_data, php_pcre_mctx()) >= 0;
1093 php_pcre_free_match_data(match_data);
1094
1095 return res;
1096 }
1097 /* }}} */
1098
1099 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1100 static void _php_mb_free_regex(void *opaque)
1101 {
1102 pcre2_code_free(opaque);
1103 }
1104 /* }}} */
1105 #endif
1106
1107 /* {{{ php_mb_nls_get_default_detect_order_list */
php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,enum mbfl_no_encoding ** plist,size_t * plist_size)1108 static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
1109 {
1110 size_t i;
1111
1112 *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1113 *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1114
1115 for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
1116 if (php_mb_default_identify_list[i].lang == lang) {
1117 *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
1118 *plist_size = php_mb_default_identify_list[i].list_size;
1119 return 1;
1120 }
1121 }
1122 return 0;
1123 }
1124 /* }}} */
1125
php_mb_rfc1867_substring_conf(const zend_encoding * encoding,char * start,size_t len,char quote)1126 static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, size_t len, char quote)
1127 {
1128 char *result = emalloc(len + 2);
1129 char *resp = result;
1130 size_t i;
1131
1132 for (i = 0; i < len && start[i] != quote; ++i) {
1133 if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
1134 *resp++ = start[++i];
1135 } else {
1136 size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
1137
1138 while (j-- > 0 && i < len) {
1139 *resp++ = start[i++];
1140 }
1141 --i;
1142 }
1143 }
1144
1145 *resp = '\0';
1146 return result;
1147 }
1148
php_mb_rfc1867_getword(const zend_encoding * encoding,char ** line,char stop)1149 static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop) /* {{{ */
1150 {
1151 char *pos = *line, quote;
1152 char *res;
1153
1154 while (*pos && *pos != stop) {
1155 if ((quote = *pos) == '"' || quote == '\'') {
1156 ++pos;
1157 while (*pos && *pos != quote) {
1158 if (*pos == '\\' && pos[1] && pos[1] == quote) {
1159 pos += 2;
1160 } else {
1161 ++pos;
1162 }
1163 }
1164 if (*pos) {
1165 ++pos;
1166 }
1167 } else {
1168 pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1169
1170 }
1171 }
1172 if (*pos == '\0') {
1173 res = estrdup(*line);
1174 *line += strlen(*line);
1175 return res;
1176 }
1177
1178 res = estrndup(*line, pos - *line);
1179
1180 while (*pos == stop) {
1181 pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1182 }
1183
1184 *line = pos;
1185 return res;
1186 }
1187 /* }}} */
1188
php_mb_rfc1867_getword_conf(const zend_encoding * encoding,char * str)1189 static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str) /* {{{ */
1190 {
1191 while (*str && isspace(*(unsigned char *)str)) {
1192 ++str;
1193 }
1194
1195 if (!*str) {
1196 return estrdup("");
1197 }
1198
1199 if (*str == '"' || *str == '\'') {
1200 char quote = *str;
1201
1202 str++;
1203 return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote);
1204 } else {
1205 char *strend = str;
1206
1207 while (*strend && !isspace(*(unsigned char *)strend)) {
1208 ++strend;
1209 }
1210 return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0);
1211 }
1212 }
1213 /* }}} */
1214
php_mb_rfc1867_basename(const zend_encoding * encoding,char * filename)1215 static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename) /* {{{ */
1216 {
1217 char *s, *s2;
1218 const size_t filename_len = strlen(filename);
1219
1220 /* The \ check should technically be needed for win32 systems only where
1221 * it is a valid path separator. However, IE in all it's wisdom always sends
1222 * the full path of the file on the user's filesystem, which means that unless
1223 * the user does basename() they get a bogus file name. Until IE's user base drops
1224 * to nill or problem is fixed this code must remain enabled for all systems. */
1225 s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
1226 s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
1227
1228 if (s && s2) {
1229 if (s > s2) {
1230 return ++s;
1231 } else {
1232 return ++s2;
1233 }
1234 } else if (s) {
1235 return ++s;
1236 } else if (s2) {
1237 return ++s2;
1238 } else {
1239 return filename;
1240 }
1241 }
1242 /* }}} */
1243
1244 /* {{{ php.ini directive handler */
1245 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
PHP_INI_MH(OnUpdate_mbstring_language)1246 static PHP_INI_MH(OnUpdate_mbstring_language)
1247 {
1248 enum mbfl_no_language no_language;
1249
1250 no_language = mbfl_name2no_language(ZSTR_VAL(new_value));
1251 if (no_language == mbfl_no_language_invalid) {
1252 MBSTRG(language) = mbfl_no_language_neutral;
1253 return FAILURE;
1254 }
1255 MBSTRG(language) = no_language;
1256 php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
1257 return SUCCESS;
1258 }
1259 /* }}} */
1260
1261 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
PHP_INI_MH(OnUpdate_mbstring_detect_order)1262 static PHP_INI_MH(OnUpdate_mbstring_detect_order)
1263 {
1264 const mbfl_encoding **list;
1265 size_t size;
1266
1267 if (!new_value) {
1268 if (MBSTRG(detect_order_list)) {
1269 pefree(MBSTRG(detect_order_list), 1);
1270 }
1271 MBSTRG(detect_order_list) = NULL;
1272 MBSTRG(detect_order_list_size) = 0;
1273 return SUCCESS;
1274 }
1275
1276 if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) {
1277 return FAILURE;
1278 }
1279
1280 if (MBSTRG(detect_order_list)) {
1281 pefree(MBSTRG(detect_order_list), 1);
1282 }
1283 MBSTRG(detect_order_list) = list;
1284 MBSTRG(detect_order_list_size) = size;
1285 return SUCCESS;
1286 }
1287 /* }}} */
1288
1289 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
PHP_INI_MH(OnUpdate_mbstring_http_input)1290 static PHP_INI_MH(OnUpdate_mbstring_http_input)
1291 {
1292 const mbfl_encoding **list;
1293 size_t size;
1294
1295 if (!new_value || !ZSTR_VAL(new_value)) {
1296 if (MBSTRG(http_input_list)) {
1297 pefree(MBSTRG(http_input_list), 1);
1298 }
1299 if (SUCCESS == php_mb_parse_encoding_list(get_input_encoding(), strlen(get_input_encoding())+1, &list, &size, 1)) {
1300 MBSTRG(http_input_list) = list;
1301 MBSTRG(http_input_list_size) = size;
1302 return SUCCESS;
1303 }
1304 MBSTRG(http_input_list) = NULL;
1305 MBSTRG(http_input_list_size) = 0;
1306 return SUCCESS;
1307 }
1308
1309 if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) {
1310 return FAILURE;
1311 }
1312
1313 if (MBSTRG(http_input_list)) {
1314 pefree(MBSTRG(http_input_list), 1);
1315 }
1316 MBSTRG(http_input_list) = list;
1317 MBSTRG(http_input_list_size) = size;
1318
1319 if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1320 php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_input is deprecated");
1321 }
1322
1323 return SUCCESS;
1324 }
1325 /* }}} */
1326
1327 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
PHP_INI_MH(OnUpdate_mbstring_http_output)1328 static PHP_INI_MH(OnUpdate_mbstring_http_output)
1329 {
1330 const mbfl_encoding *encoding;
1331
1332 if (new_value == NULL || ZSTR_LEN(new_value) == 0) {
1333 encoding = mbfl_name2encoding(get_output_encoding());
1334 if (!encoding) {
1335 MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1336 MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1337 return SUCCESS;
1338 }
1339 } else {
1340 encoding = mbfl_name2encoding(ZSTR_VAL(new_value));
1341 if (!encoding) {
1342 MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1343 MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1344 return FAILURE;
1345 }
1346 }
1347 MBSTRG(http_output_encoding) = encoding;
1348 MBSTRG(current_http_output_encoding) = encoding;
1349
1350 if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1351 php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_output is deprecated");
1352 }
1353
1354 return SUCCESS;
1355 }
1356 /* }}} */
1357
1358 /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
_php_mb_ini_mbstring_internal_encoding_set(const char * new_value,size_t new_value_length)1359 int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, size_t new_value_length)
1360 {
1361 const mbfl_encoding *encoding;
1362
1363 if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) {
1364 /* falls back to UTF-8 if an unknown encoding name is given */
1365 encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
1366 }
1367 MBSTRG(internal_encoding) = encoding;
1368 MBSTRG(current_internal_encoding) = encoding;
1369 #if HAVE_MBREGEX
1370 {
1371 const char *enc_name = new_value;
1372 if (FAILURE == php_mb_regex_set_default_mbctype(enc_name)) {
1373 /* falls back to UTF-8 if an unknown encoding name is given */
1374 enc_name = "UTF-8";
1375 php_mb_regex_set_default_mbctype(enc_name);
1376 }
1377 php_mb_regex_set_mbctype(new_value);
1378 }
1379 #endif
1380 return SUCCESS;
1381 }
1382 /* }}} */
1383
1384 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
PHP_INI_MH(OnUpdate_mbstring_internal_encoding)1385 static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1386 {
1387 if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1388 php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
1389 }
1390
1391 if (OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage) == FAILURE) {
1392 return FAILURE;
1393 }
1394
1395 if (stage & (PHP_INI_STAGE_STARTUP | PHP_INI_STAGE_SHUTDOWN | PHP_INI_STAGE_RUNTIME)) {
1396 if (new_value && ZSTR_LEN(new_value)) {
1397 return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
1398 } else {
1399 return _php_mb_ini_mbstring_internal_encoding_set(get_internal_encoding(), strlen(get_internal_encoding())+1);
1400 }
1401 } else {
1402 /* the corresponding mbstring globals needs to be set according to the
1403 * ini value in the later stage because it never falls back to the
1404 * default value if 1. no value for mbstring.internal_encoding is given,
1405 * 2. mbstring.language directive is processed in per-dir or runtime
1406 * context and 3. call to the handler for mbstring.language is done
1407 * after mbstring.internal_encoding is handled. */
1408 return SUCCESS;
1409 }
1410 }
1411 /* }}} */
1412
1413 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
PHP_INI_MH(OnUpdate_mbstring_substitute_character)1414 static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1415 {
1416 int c;
1417 char *endptr = NULL;
1418
1419 if (new_value != NULL) {
1420 if (strcasecmp("none", ZSTR_VAL(new_value)) == 0) {
1421 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1422 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1423 } else if (strcasecmp("long", ZSTR_VAL(new_value)) == 0) {
1424 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1425 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1426 } else if (strcasecmp("entity", ZSTR_VAL(new_value)) == 0) {
1427 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1428 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1429 } else {
1430 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1431 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1432 if (ZSTR_LEN(new_value) > 0) {
1433 c = strtol(ZSTR_VAL(new_value), &endptr, 0);
1434 if (*endptr == '\0') {
1435 MBSTRG(filter_illegal_substchar) = c;
1436 MBSTRG(current_filter_illegal_substchar) = c;
1437 }
1438 }
1439 }
1440 } else {
1441 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1442 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1443 MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
1444 MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
1445 }
1446
1447 return SUCCESS;
1448 }
1449 /* }}} */
1450
1451 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
PHP_INI_MH(OnUpdate_mbstring_encoding_translation)1452 static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1453 {
1454 if (new_value == NULL) {
1455 return FAILURE;
1456 }
1457
1458 OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
1459
1460 if (MBSTRG(encoding_translation)) {
1461 sapi_unregister_post_entry(php_post_entries);
1462 sapi_register_post_entries(mbstr_post_entries);
1463 } else {
1464 sapi_unregister_post_entry(mbstr_post_entries);
1465 sapi_register_post_entries(php_post_entries);
1466 }
1467
1468 return SUCCESS;
1469 }
1470 /* }}} */
1471
1472 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)1473 static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1474 {
1475 zend_string *tmp;
1476 void *re = NULL;
1477
1478 if (!new_value) {
1479 new_value = entry->orig_value;
1480 }
1481 tmp = php_trim(new_value, NULL, 0, 3);
1482
1483 if (ZSTR_LEN(tmp) > 0) {
1484 if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) {
1485 zend_string_release_ex(tmp, 0);
1486 return FAILURE;
1487 }
1488 }
1489
1490 if (MBSTRG(http_output_conv_mimetypes)) {
1491 _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1492 }
1493
1494 MBSTRG(http_output_conv_mimetypes) = re;
1495
1496 zend_string_release_ex(tmp, 0);
1497 return SUCCESS;
1498 }
1499 /* }}} */
1500 /* }}} */
1501
1502 /* {{{ php.ini directive registration */
1503 PHP_INI_BEGIN()
1504 PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1505 PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1506 PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
1507 PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
1508 STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
1509 PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1510 STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1511 PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1512
1513 STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1514 PHP_INI_SYSTEM | PHP_INI_PERDIR,
1515 OnUpdate_mbstring_encoding_translation,
1516 encoding_translation, zend_mbstring_globals, mbstring_globals)
1517 PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1518 "^(text/|application/xhtml\\+xml)",
1519 PHP_INI_ALL,
1520 OnUpdate_mbstring_http_output_conv_mimetypes)
1521
1522 STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1523 PHP_INI_ALL,
1524 OnUpdateBool,
1525 strict_detection, zend_mbstring_globals, mbstring_globals)
1526 #if HAVE_MBREGEX
1527 STD_PHP_INI_ENTRY("mbstring.regex_stack_limit", "100000",PHP_INI_ALL, OnUpdateLong, regex_stack_limit, zend_mbstring_globals, mbstring_globals)
1528 #endif
PHP_INI_END()1529 PHP_INI_END()
1530 /* }}} */
1531
1532 /* {{{ module global initialize handler */
1533 static PHP_GINIT_FUNCTION(mbstring)
1534 {
1535 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1536 ZEND_TSRMLS_CACHE_UPDATE();
1537 #endif
1538
1539 mbstring_globals->language = mbfl_no_language_uni;
1540 mbstring_globals->internal_encoding = NULL;
1541 mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1542 mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
1543 mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
1544 mbstring_globals->http_input_identify = NULL;
1545 mbstring_globals->http_input_identify_get = NULL;
1546 mbstring_globals->http_input_identify_post = NULL;
1547 mbstring_globals->http_input_identify_cookie = NULL;
1548 mbstring_globals->http_input_identify_string = NULL;
1549 mbstring_globals->http_input_list = NULL;
1550 mbstring_globals->http_input_list_size = 0;
1551 mbstring_globals->detect_order_list = NULL;
1552 mbstring_globals->detect_order_list_size = 0;
1553 mbstring_globals->current_detect_order_list = NULL;
1554 mbstring_globals->current_detect_order_list_size = 0;
1555 mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1556 mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1557 mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1558 mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */
1559 mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1560 mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */
1561 mbstring_globals->illegalchars = 0;
1562 mbstring_globals->func_overload = 0;
1563 mbstring_globals->encoding_translation = 0;
1564 mbstring_globals->strict_detection = 0;
1565 mbstring_globals->outconv = NULL;
1566 mbstring_globals->http_output_conv_mimetypes = NULL;
1567 #if HAVE_MBREGEX
1568 mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc();
1569 #endif
1570 mbstring_globals->last_used_encoding_name = NULL;
1571 mbstring_globals->last_used_encoding = NULL;
1572 }
1573 /* }}} */
1574
1575 /* {{{ PHP_GSHUTDOWN_FUNCTION */
PHP_GSHUTDOWN_FUNCTION(mbstring)1576 static PHP_GSHUTDOWN_FUNCTION(mbstring)
1577 {
1578 if (mbstring_globals->http_input_list) {
1579 free(mbstring_globals->http_input_list);
1580 }
1581 if (mbstring_globals->detect_order_list) {
1582 free(mbstring_globals->detect_order_list);
1583 }
1584 if (mbstring_globals->http_output_conv_mimetypes) {
1585 _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1586 }
1587 #if HAVE_MBREGEX
1588 php_mb_regex_globals_free(mbstring_globals->mb_regex_globals);
1589 #endif
1590 }
1591 /* }}} */
1592
1593 /* {{{ PHP_MINIT_FUNCTION(mbstring) */
PHP_MINIT_FUNCTION(mbstring)1594 PHP_MINIT_FUNCTION(mbstring)
1595 {
1596 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1597 ZEND_TSRMLS_CACHE_UPDATE();
1598 #endif
1599 __mbfl_allocators = (mbfl_allocators*)&_php_mb_allocators;
1600
1601 REGISTER_INI_ENTRIES();
1602
1603 /* This is a global handler. Should not be set in a per-request handler. */
1604 sapi_register_treat_data(mbstr_treat_data);
1605
1606 /* Post handlers are stored in the thread-local context. */
1607 if (MBSTRG(encoding_translation)) {
1608 sapi_register_post_entries(mbstr_post_entries);
1609 }
1610
1611 REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1612 REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1613 REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1614
1615 REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1616 REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1617 REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1618 REGISTER_LONG_CONSTANT("MB_CASE_FOLD", PHP_UNICODE_CASE_FOLD, CONST_CS | CONST_PERSISTENT);
1619 REGISTER_LONG_CONSTANT("MB_CASE_UPPER_SIMPLE", PHP_UNICODE_CASE_UPPER_SIMPLE, CONST_CS | CONST_PERSISTENT);
1620 REGISTER_LONG_CONSTANT("MB_CASE_LOWER_SIMPLE", PHP_UNICODE_CASE_LOWER_SIMPLE, CONST_CS | CONST_PERSISTENT);
1621 REGISTER_LONG_CONSTANT("MB_CASE_TITLE_SIMPLE", PHP_UNICODE_CASE_TITLE_SIMPLE, CONST_CS | CONST_PERSISTENT);
1622 REGISTER_LONG_CONSTANT("MB_CASE_FOLD_SIMPLE", PHP_UNICODE_CASE_FOLD_SIMPLE, CONST_CS | CONST_PERSISTENT);
1623
1624 #if HAVE_MBREGEX
1625 PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1626 #endif
1627
1628 if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions)) {
1629 return FAILURE;
1630 }
1631
1632 php_rfc1867_set_multibyte_callbacks(
1633 php_mb_encoding_translation,
1634 php_mb_gpc_get_detect_order,
1635 php_mb_gpc_set_input_encoding,
1636 php_mb_rfc1867_getword,
1637 php_mb_rfc1867_getword_conf,
1638 php_mb_rfc1867_basename);
1639
1640 /* override original function (deprecated). */
1641 if (MBSTRG(func_overload)){
1642 zend_function *func, *orig;
1643 const struct mb_overload_def *p;
1644 zend_string *str;
1645
1646 p = &(mb_ovld[0]);
1647 while (p->type > 0) {
1648 if ((MBSTRG(func_overload) & p->type) == p->type &&
1649 !zend_hash_str_exists(CG(function_table), p->save_func, strlen(p->save_func))
1650 ) {
1651 func = zend_hash_str_find_ptr(CG(function_table), p->ovld_func, strlen(p->ovld_func));
1652
1653 if ((orig = zend_hash_str_find_ptr(CG(function_table), p->orig_func, strlen(p->orig_func))) == NULL) {
1654 php_error_docref("ref.mbstring", E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1655 return FAILURE;
1656 } else {
1657 ZEND_ASSERT(orig->type == ZEND_INTERNAL_FUNCTION);
1658 str = zend_string_init_interned(p->save_func, strlen(p->save_func), 1);
1659 zend_hash_add_mem(CG(function_table), str, orig, sizeof(zend_internal_function));
1660 zend_string_release_ex(str, 1);
1661 function_add_ref(orig);
1662
1663 str = zend_string_init_interned(p->orig_func, strlen(p->orig_func), 1);
1664 zend_hash_update_mem(CG(function_table), str, func, sizeof(zend_internal_function));
1665 zend_string_release_ex(str, 1);
1666 function_add_ref(func);
1667 }
1668 }
1669 p++;
1670 }
1671 }
1672
1673 return SUCCESS;
1674 }
1675 /* }}} */
1676
1677 /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
PHP_MSHUTDOWN_FUNCTION(mbstring)1678 PHP_MSHUTDOWN_FUNCTION(mbstring)
1679 {
1680 /* clear overloaded function. */
1681 if (MBSTRG(func_overload)){
1682 const struct mb_overload_def *p;
1683 zend_function *orig;
1684
1685 p = &(mb_ovld[0]);
1686 while (p->type > 0) {
1687 if ((MBSTRG(func_overload) & p->type) == p->type &&
1688 (orig = zend_hash_str_find_ptr(CG(function_table), p->save_func, strlen(p->save_func)))) {
1689
1690 zend_hash_str_update_mem(CG(function_table), p->orig_func, strlen(p->orig_func), orig, sizeof(zend_internal_function));
1691 function_add_ref(orig);
1692 zend_hash_str_del(CG(function_table), p->save_func, strlen(p->save_func));
1693 }
1694 p++;
1695 }
1696 }
1697
1698 UNREGISTER_INI_ENTRIES();
1699
1700 zend_multibyte_restore_functions();
1701
1702 #if HAVE_MBREGEX
1703 PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1704 #endif
1705
1706 return SUCCESS;
1707 }
1708 /* }}} */
1709
1710 /* {{{ PHP_RINIT_FUNCTION(mbstring) */
PHP_RINIT_FUNCTION(mbstring)1711 PHP_RINIT_FUNCTION(mbstring)
1712 {
1713 MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1714 MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1715 MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1716 MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1717
1718 MBSTRG(illegalchars) = 0;
1719
1720 php_mb_populate_current_detect_order_list();
1721
1722 /* override original function. */
1723 if (MBSTRG(func_overload)){
1724 zend_error(E_DEPRECATED, "The mbstring.func_overload directive is deprecated");
1725
1726 CG(compiler_options) |= ZEND_COMPILE_NO_BUILTIN_STRLEN;
1727 }
1728 #if HAVE_MBREGEX
1729 PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1730 #endif
1731 zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding));
1732
1733 return SUCCESS;
1734 }
1735 /* }}} */
1736
1737 /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
PHP_RSHUTDOWN_FUNCTION(mbstring)1738 PHP_RSHUTDOWN_FUNCTION(mbstring)
1739 {
1740 if (MBSTRG(current_detect_order_list) != NULL) {
1741 efree(MBSTRG(current_detect_order_list));
1742 MBSTRG(current_detect_order_list) = NULL;
1743 MBSTRG(current_detect_order_list_size) = 0;
1744 }
1745 if (MBSTRG(outconv) != NULL) {
1746 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1747 mbfl_buffer_converter_delete(MBSTRG(outconv));
1748 MBSTRG(outconv) = NULL;
1749 }
1750
1751 /* clear http input identification. */
1752 MBSTRG(http_input_identify) = NULL;
1753 MBSTRG(http_input_identify_post) = NULL;
1754 MBSTRG(http_input_identify_get) = NULL;
1755 MBSTRG(http_input_identify_cookie) = NULL;
1756 MBSTRG(http_input_identify_string) = NULL;
1757
1758 if (MBSTRG(last_used_encoding_name)) {
1759 efree(MBSTRG(last_used_encoding_name));
1760 MBSTRG(last_used_encoding_name) = NULL;
1761 }
1762
1763 #if HAVE_MBREGEX
1764 PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1765 #endif
1766
1767 return SUCCESS;
1768 }
1769 /* }}} */
1770
1771 /* {{{ PHP_MINFO_FUNCTION(mbstring) */
PHP_MINFO_FUNCTION(mbstring)1772 PHP_MINFO_FUNCTION(mbstring)
1773 {
1774 php_info_print_table_start();
1775 php_info_print_table_row(2, "Multibyte Support", "enabled");
1776 php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1777 php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1778 {
1779 char tmp[256];
1780 snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1781 php_info_print_table_row(2, "libmbfl version", tmp);
1782 }
1783 #if HAVE_ONIG
1784 {
1785 char tmp[256];
1786 snprintf(tmp, sizeof(tmp), "%d.%d.%d", ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY);
1787 php_info_print_table_row(2, "oniguruma version", tmp);
1788 }
1789 #endif
1790 php_info_print_table_end();
1791
1792 php_info_print_table_start();
1793 php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1794 php_info_print_table_end();
1795
1796 #if HAVE_MBREGEX
1797 PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1798 #endif
1799
1800 DISPLAY_INI_ENTRIES();
1801 }
1802 /* }}} */
1803
1804 /* {{{ proto string mb_language([string language])
1805 Sets the current language or Returns the current language as a string */
PHP_FUNCTION(mb_language)1806 PHP_FUNCTION(mb_language)
1807 {
1808 zend_string *name = NULL;
1809
1810 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|S", &name) == FAILURE) {
1811 return;
1812 }
1813 if (name == NULL) {
1814 RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)));
1815 } else {
1816 zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0);
1817 if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1818 php_error_docref(NULL, E_WARNING, "Unknown language \"%s\"", ZSTR_VAL(name));
1819 RETVAL_FALSE;
1820 } else {
1821 RETVAL_TRUE;
1822 }
1823 zend_string_release_ex(ini_name, 0);
1824 }
1825 }
1826 /* }}} */
1827
1828 /* {{{ proto string mb_internal_encoding([string encoding])
1829 Sets the current internal encoding or Returns the current internal encoding as a string */
PHP_FUNCTION(mb_internal_encoding)1830 PHP_FUNCTION(mb_internal_encoding)
1831 {
1832 const char *name = NULL;
1833 size_t name_len;
1834 const mbfl_encoding *encoding;
1835
1836 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
1837 return;
1838 }
1839 if (name == NULL) {
1840 name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
1841 if (name != NULL) {
1842 RETURN_STRING(name);
1843 } else {
1844 RETURN_FALSE;
1845 }
1846 } else {
1847 encoding = mbfl_name2encoding(name);
1848 if (!encoding) {
1849 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
1850 RETURN_FALSE;
1851 } else {
1852 MBSTRG(current_internal_encoding) = encoding;
1853 RETURN_TRUE;
1854 }
1855 }
1856 }
1857 /* }}} */
1858
1859 /* {{{ proto mixed mb_http_input([string type])
1860 Returns the input encoding */
PHP_FUNCTION(mb_http_input)1861 PHP_FUNCTION(mb_http_input)
1862 {
1863 char *typ = NULL;
1864 size_t typ_len;
1865 int retname;
1866 char *list, *temp;
1867 const mbfl_encoding *result = NULL;
1868
1869 retname = 1;
1870 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
1871 return;
1872 }
1873 if (typ == NULL) {
1874 result = MBSTRG(http_input_identify);
1875 } else {
1876 switch (*typ) {
1877 case 'G':
1878 case 'g':
1879 result = MBSTRG(http_input_identify_get);
1880 break;
1881 case 'P':
1882 case 'p':
1883 result = MBSTRG(http_input_identify_post);
1884 break;
1885 case 'C':
1886 case 'c':
1887 result = MBSTRG(http_input_identify_cookie);
1888 break;
1889 case 'S':
1890 case 's':
1891 result = MBSTRG(http_input_identify_string);
1892 break;
1893 case 'I':
1894 case 'i':
1895 {
1896 const mbfl_encoding **entry = MBSTRG(http_input_list);
1897 const size_t n = MBSTRG(http_input_list_size);
1898 size_t i;
1899 array_init(return_value);
1900 for (i = 0; i < n; i++) {
1901 add_next_index_string(return_value, (*entry)->name);
1902 entry++;
1903 }
1904 retname = 0;
1905 }
1906 break;
1907 case 'L':
1908 case 'l':
1909 {
1910 const mbfl_encoding **entry = MBSTRG(http_input_list);
1911 const size_t n = MBSTRG(http_input_list_size);
1912 size_t i;
1913 list = NULL;
1914 for (i = 0; i < n; i++) {
1915 if (list) {
1916 temp = list;
1917 spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
1918 efree(temp);
1919 if (!list) {
1920 break;
1921 }
1922 } else {
1923 list = estrdup((*entry)->name);
1924 }
1925 entry++;
1926 }
1927 }
1928 if (!list) {
1929 RETURN_FALSE;
1930 }
1931 RETVAL_STRING(list);
1932 efree(list);
1933 retname = 0;
1934 break;
1935 default:
1936 result = MBSTRG(http_input_identify);
1937 break;
1938 }
1939 }
1940
1941 if (retname) {
1942 if (result) {
1943 RETVAL_STRING(result->name);
1944 } else {
1945 RETVAL_FALSE;
1946 }
1947 }
1948 }
1949 /* }}} */
1950
1951 /* {{{ proto string mb_http_output([string encoding])
1952 Sets the current output_encoding or returns the current output_encoding as a string */
PHP_FUNCTION(mb_http_output)1953 PHP_FUNCTION(mb_http_output)
1954 {
1955 const char *name = NULL;
1956 size_t name_len;
1957 const mbfl_encoding *encoding;
1958
1959 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
1960 return;
1961 }
1962
1963 if (name == NULL) {
1964 name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
1965 if (name != NULL) {
1966 RETURN_STRING(name);
1967 } else {
1968 RETURN_FALSE;
1969 }
1970 } else {
1971 encoding = mbfl_name2encoding(name);
1972 if (!encoding) {
1973 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
1974 RETURN_FALSE;
1975 } else {
1976 MBSTRG(current_http_output_encoding) = encoding;
1977 RETURN_TRUE;
1978 }
1979 }
1980 }
1981 /* }}} */
1982
1983 /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1984 Sets the current detect_order or Return the current detect_order as a array */
PHP_FUNCTION(mb_detect_order)1985 PHP_FUNCTION(mb_detect_order)
1986 {
1987 zval *arg1 = NULL;
1988
1989 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
1990 return;
1991 }
1992
1993 if (!arg1) {
1994 size_t i;
1995 size_t n = MBSTRG(current_detect_order_list_size);
1996 const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1997 array_init(return_value);
1998 for (i = 0; i < n; i++) {
1999 add_next_index_string(return_value, (*entry)->name);
2000 entry++;
2001 }
2002 } else {
2003 const mbfl_encoding **list = NULL;
2004 size_t size = 0;
2005 switch (Z_TYPE_P(arg1)) {
2006 case IS_ARRAY:
2007 if (FAILURE == php_mb_parse_encoding_array(arg1, &list, &size, 0)) {
2008 if (list) {
2009 efree(list);
2010 }
2011 RETURN_FALSE;
2012 }
2013 break;
2014 default:
2015 convert_to_string_ex(arg1);
2016 if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(arg1), Z_STRLEN_P(arg1), &list, &size, 0)) {
2017 if (list) {
2018 efree(list);
2019 }
2020 RETURN_FALSE;
2021 }
2022 break;
2023 }
2024
2025 if (list == NULL) {
2026 RETURN_FALSE;
2027 }
2028
2029 if (MBSTRG(current_detect_order_list)) {
2030 efree(MBSTRG(current_detect_order_list));
2031 }
2032 MBSTRG(current_detect_order_list) = list;
2033 MBSTRG(current_detect_order_list_size) = size;
2034 RETURN_TRUE;
2035 }
2036 }
2037 /* }}} */
2038
php_mb_check_code_point(zend_long cp)2039 static inline int php_mb_check_code_point(zend_long cp)
2040 {
2041 if (cp <= 0 || cp >= 0x110000) {
2042 /* Out of Unicode range */
2043 return 0;
2044 }
2045
2046 if (cp >= 0xd800 && cp <= 0xdfff) {
2047 /* Surrogate code-point. These are never valid on their own and we only allow a single
2048 * substitute character. */
2049 return 0;
2050 }
2051
2052 /* As the we do not know the target encoding of the conversion operation that is going to
2053 * use the substitution character, we cannot check whether the codepoint is actually mapped
2054 * in the given encoding at this point. Thus we have to accept everything. */
2055 return 1;
2056 }
2057
2058 /* {{{ proto mixed mb_substitute_character([mixed substchar])
2059 Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)2060 PHP_FUNCTION(mb_substitute_character)
2061 {
2062 zval *arg1 = NULL;
2063
2064 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
2065 return;
2066 }
2067
2068 if (!arg1) {
2069 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
2070 RETURN_STRING("none");
2071 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
2072 RETURN_STRING("long");
2073 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
2074 RETURN_STRING("entity");
2075 } else {
2076 RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
2077 }
2078 } else {
2079 RETVAL_TRUE;
2080
2081 switch (Z_TYPE_P(arg1)) {
2082 case IS_STRING:
2083 if (strncasecmp("none", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2084 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
2085 } else if (strncasecmp("long", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2086 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
2087 } else if (strncasecmp("entity", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2088 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
2089 } else {
2090 convert_to_long_ex(arg1);
2091
2092 if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
2093 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2094 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2095 } else {
2096 php_error_docref(NULL, E_WARNING, "Unknown character");
2097 RETURN_FALSE;
2098 }
2099 }
2100 break;
2101 default:
2102 convert_to_long_ex(arg1);
2103 if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
2104 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2105 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2106 } else {
2107 php_error_docref(NULL, E_WARNING, "Unknown character");
2108 RETURN_FALSE;
2109 }
2110 break;
2111 }
2112 }
2113 }
2114 /* }}} */
2115
2116 /* {{{ proto string mb_preferred_mime_name(string encoding)
2117 Return the preferred MIME name (charset) as a string */
PHP_FUNCTION(mb_preferred_mime_name)2118 PHP_FUNCTION(mb_preferred_mime_name)
2119 {
2120 enum mbfl_no_encoding no_encoding;
2121 char *name = NULL;
2122 size_t name_len;
2123
2124 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
2125 return;
2126 } else {
2127 no_encoding = mbfl_name2no_encoding(name);
2128 if (no_encoding == mbfl_no_encoding_invalid) {
2129 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
2130 RETVAL_FALSE;
2131 } else {
2132 const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
2133 if (preferred_name == NULL || *preferred_name == '\0') {
2134 php_error_docref(NULL, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
2135 RETVAL_FALSE;
2136 } else {
2137 RETVAL_STRING((char *)preferred_name);
2138 }
2139 }
2140 }
2141 }
2142 /* }}} */
2143
2144 #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
2145 #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
2146
2147 /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
2148 Parses GET/POST/COOKIE data and sets global variables */
PHP_FUNCTION(mb_parse_str)2149 PHP_FUNCTION(mb_parse_str)
2150 {
2151 zval *track_vars_array = NULL;
2152 char *encstr = NULL;
2153 size_t encstr_len;
2154 php_mb_encoding_handler_info_t info;
2155 const mbfl_encoding *detected;
2156
2157 track_vars_array = NULL;
2158 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z/", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
2159 return;
2160 }
2161
2162 if (track_vars_array != NULL) {
2163 /* Clear out the array */
2164 zval_ptr_dtor(track_vars_array);
2165 array_init(track_vars_array);
2166 }
2167
2168 encstr = estrndup(encstr, encstr_len);
2169
2170 info.data_type = PARSE_STRING;
2171 info.separator = PG(arg_separator).input;
2172 info.report_errors = 1;
2173 info.to_encoding = MBSTRG(current_internal_encoding);
2174 info.to_language = MBSTRG(language);
2175 info.from_encodings = MBSTRG(http_input_list);
2176 info.num_from_encodings = MBSTRG(http_input_list_size);
2177 info.from_language = MBSTRG(language);
2178
2179 if (track_vars_array != NULL) {
2180 detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr);
2181 } else {
2182 zval tmp;
2183 zend_array *symbol_table;
2184 if (zend_forbid_dynamic_call("mb_parse_str() with a single argument") == FAILURE) {
2185 efree(encstr);
2186 return;
2187 }
2188
2189 php_error_docref(NULL, E_DEPRECATED, "Calling mb_parse_str() without the result argument is deprecated");
2190
2191 symbol_table = zend_rebuild_symbol_table();
2192 ZVAL_ARR(&tmp, symbol_table);
2193 detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr);
2194 }
2195
2196 MBSTRG(http_input_identify) = detected;
2197
2198 RETVAL_BOOL(detected);
2199
2200 if (encstr != NULL) efree(encstr);
2201 }
2202 /* }}} */
2203
2204 /* {{{ proto string mb_output_handler(string contents, int status)
2205 Returns string in output buffer converted to the http_output encoding */
PHP_FUNCTION(mb_output_handler)2206 PHP_FUNCTION(mb_output_handler)
2207 {
2208 char *arg_string;
2209 size_t arg_string_len;
2210 zend_long arg_status;
2211 mbfl_string string, result;
2212 const char *charset;
2213 char *p;
2214 const mbfl_encoding *encoding;
2215 int last_feed;
2216 size_t len;
2217 unsigned char send_text_mimetype = 0;
2218 char *s, *mimetype = NULL;
2219
2220 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
2221 return;
2222 }
2223
2224 encoding = MBSTRG(current_http_output_encoding);
2225
2226 /* start phase only */
2227 if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
2228 /* delete the converter just in case. */
2229 if (MBSTRG(outconv)) {
2230 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2231 mbfl_buffer_converter_delete(MBSTRG(outconv));
2232 MBSTRG(outconv) = NULL;
2233 }
2234 if (encoding == &mbfl_encoding_pass) {
2235 RETURN_STRINGL(arg_string, arg_string_len);
2236 }
2237
2238 /* analyze mime type */
2239 if (SG(sapi_headers).mimetype &&
2240 _php_mb_match_regex(
2241 MBSTRG(http_output_conv_mimetypes),
2242 SG(sapi_headers).mimetype,
2243 strlen(SG(sapi_headers).mimetype))) {
2244 if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
2245 mimetype = estrdup(SG(sapi_headers).mimetype);
2246 } else {
2247 mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
2248 }
2249 send_text_mimetype = 1;
2250 } else if (SG(sapi_headers).send_default_content_type) {
2251 mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
2252 }
2253
2254 /* if content-type is not yet set, set it and activate the converter */
2255 if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
2256 charset = encoding->mime_name;
2257 if (charset) {
2258 len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
2259 if (sapi_add_header(p, len, 0) != FAILURE) {
2260 SG(sapi_headers).send_default_content_type = 0;
2261 }
2262 }
2263 /* activate the converter */
2264 MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
2265 if (send_text_mimetype){
2266 efree(mimetype);
2267 }
2268 }
2269 }
2270
2271 /* just return if the converter is not activated. */
2272 if (MBSTRG(outconv) == NULL) {
2273 RETURN_STRINGL(arg_string, arg_string_len);
2274 }
2275
2276 /* flag */
2277 last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
2278 /* mode */
2279 mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
2280 mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
2281
2282 /* feed the string */
2283 mbfl_string_init(&string);
2284 /* these are not needed. convd has encoding info.
2285 string.no_language = MBSTRG(language);
2286 string.encoding = MBSTRG(current_internal_encoding);
2287 */
2288 string.val = (unsigned char *)arg_string;
2289 string.len = arg_string_len;
2290 mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2291 if (last_feed) {
2292 mbfl_buffer_converter_flush(MBSTRG(outconv));
2293 }
2294 /* get the converter output, and return it */
2295 mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2296 // TODO: avoid reallocation ???
2297 RETVAL_STRINGL((char *)result.val, result.len); /* the string is already strdup()'ed */
2298 efree(result.val);
2299
2300 /* delete the converter if it is the last feed. */
2301 if (last_feed) {
2302 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2303 mbfl_buffer_converter_delete(MBSTRG(outconv));
2304 MBSTRG(outconv) = NULL;
2305 }
2306 }
2307 /* }}} */
2308
2309 /* {{{ proto int mb_strlen(string str [, string encoding])
2310 Get character numbers of a string */
PHP_FUNCTION(mb_strlen)2311 PHP_FUNCTION(mb_strlen)
2312 {
2313 size_t n;
2314 mbfl_string string;
2315 char *str, *enc_name = NULL;
2316 size_t str_len, enc_name_len;
2317
2318 mbfl_string_init(&string);
2319
2320 ZEND_PARSE_PARAMETERS_START(1, 2)
2321 Z_PARAM_STRING(str, str_len)
2322 Z_PARAM_OPTIONAL
2323 Z_PARAM_STRING(enc_name, enc_name_len)
2324 ZEND_PARSE_PARAMETERS_END();
2325
2326 string.val = (unsigned char *) str;
2327 string.len = str_len;
2328 string.no_language = MBSTRG(language);
2329 string.encoding = php_mb_get_encoding(enc_name);
2330 if (!string.encoding) {
2331 RETURN_FALSE;
2332 }
2333
2334 n = mbfl_strlen(&string);
2335 if (!mbfl_is_error(n)) {
2336 RETVAL_LONG(n);
2337 } else {
2338 RETVAL_FALSE;
2339 }
2340 }
2341 /* }}} */
2342
2343 /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2344 Find position of first occurrence of a string within another */
PHP_FUNCTION(mb_strpos)2345 PHP_FUNCTION(mb_strpos)
2346 {
2347 int reverse = 0;
2348 zend_long offset = 0;
2349 mbfl_string haystack, needle;
2350 char *enc_name = NULL;
2351 size_t enc_name_len, n;
2352
2353 mbfl_string_init(&haystack);
2354 mbfl_string_init(&needle);
2355
2356 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2357 return;
2358 }
2359
2360 haystack.no_language = needle.no_language = MBSTRG(language);
2361 haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2362 if (!haystack.encoding) {
2363 RETURN_FALSE;
2364 }
2365
2366 if (offset != 0) {
2367 size_t slen = mbfl_strlen(&haystack);
2368 if (offset < 0) {
2369 offset += slen;
2370 }
2371 if (offset < 0 || offset > slen) {
2372 php_error_docref(NULL, E_WARNING, "Offset not contained in string");
2373 RETURN_FALSE;
2374 }
2375 }
2376
2377 if (needle.len == 0) {
2378 php_error_docref(NULL, E_WARNING, "Empty delimiter");
2379 RETURN_FALSE;
2380 }
2381
2382 n = mbfl_strpos(&haystack, &needle, offset, reverse);
2383 if (!mbfl_is_error(n)) {
2384 RETVAL_LONG(n);
2385 } else {
2386 switch (-n) {
2387 case 1:
2388 break;
2389 case 2:
2390 php_error_docref(NULL, E_WARNING, "Needle has not positive length");
2391 break;
2392 case 4:
2393 php_error_docref(NULL, E_WARNING, "Unknown encoding or conversion error");
2394 break;
2395 case 8:
2396 php_error_docref(NULL, E_NOTICE, "Argument is empty");
2397 break;
2398 default:
2399 php_error_docref(NULL, E_WARNING, "Unknown error in mb_strpos");
2400 break;
2401 }
2402 RETVAL_FALSE;
2403 }
2404 }
2405 /* }}} */
2406
2407 /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2408 Find position of last occurrence of a string within another */
PHP_FUNCTION(mb_strrpos)2409 PHP_FUNCTION(mb_strrpos)
2410 {
2411 mbfl_string haystack, needle;
2412 char *enc_name = NULL;
2413 size_t enc_name_len;
2414 zval *zoffset = NULL;
2415 zend_long offset = 0, str_flg, n;
2416 char *enc_name2 = NULL;
2417 size_t enc_name_len2;
2418
2419 mbfl_string_init(&haystack);
2420 mbfl_string_init(&needle);
2421
2422 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2423 return;
2424 }
2425
2426 if (zoffset) {
2427 if (Z_TYPE_P(zoffset) == IS_STRING) {
2428 enc_name2 = Z_STRVAL_P(zoffset);
2429 enc_name_len2 = Z_STRLEN_P(zoffset);
2430 str_flg = 1;
2431
2432 if (enc_name2 != NULL) {
2433 switch (*enc_name2) {
2434 case '0':
2435 case '1':
2436 case '2':
2437 case '3':
2438 case '4':
2439 case '5':
2440 case '6':
2441 case '7':
2442 case '8':
2443 case '9':
2444 case ' ':
2445 case '-':
2446 case '.':
2447 break;
2448 default :
2449 str_flg = 0;
2450 break;
2451 }
2452 }
2453
2454 if (str_flg) {
2455 convert_to_long_ex(zoffset);
2456 offset = Z_LVAL_P(zoffset);
2457 } else {
2458 enc_name = enc_name2;
2459 enc_name_len = enc_name_len2;
2460 }
2461 } else {
2462 convert_to_long_ex(zoffset);
2463 offset = Z_LVAL_P(zoffset);
2464 }
2465 }
2466
2467 haystack.no_language = needle.no_language = MBSTRG(language);
2468 haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2469 if (!haystack.encoding) {
2470 RETURN_FALSE;
2471 }
2472
2473 if (offset != 0) {
2474 size_t haystack_char_len = mbfl_strlen(&haystack);
2475 if ((offset > 0 && offset > haystack_char_len) ||
2476 (offset < 0 && -offset > haystack_char_len)) {
2477 php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
2478 RETURN_FALSE;
2479 }
2480 }
2481
2482 n = mbfl_strpos(&haystack, &needle, offset, 1);
2483 if (!mbfl_is_error(n)) {
2484 RETVAL_LONG(n);
2485 } else {
2486 RETVAL_FALSE;
2487 }
2488 }
2489 /* }}} */
2490
2491 /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2492 Finds position of first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stripos)2493 PHP_FUNCTION(mb_stripos)
2494 {
2495 size_t n = (size_t) -1;
2496 zend_long offset = 0;
2497 mbfl_string haystack, needle;
2498 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2499 size_t from_encoding_len;
2500
2501 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2502 return;
2503 }
2504
2505 if (needle.len == 0) {
2506 php_error_docref(NULL, E_WARNING, "Empty delimiter");
2507 RETURN_FALSE;
2508 }
2509
2510 n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2511
2512 if (!mbfl_is_error(n)) {
2513 RETVAL_LONG(n);
2514 } else {
2515 RETVAL_FALSE;
2516 }
2517 }
2518 /* }}} */
2519
2520 /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2521 Finds position of last occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_strripos)2522 PHP_FUNCTION(mb_strripos)
2523 {
2524 size_t n = (size_t) -1;
2525 zend_long offset = 0;
2526 mbfl_string haystack, needle;
2527 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2528 size_t from_encoding_len;
2529
2530 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2531 return;
2532 }
2533
2534 n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2535
2536 if (!mbfl_is_error(n)) {
2537 RETVAL_LONG(n);
2538 } else {
2539 RETVAL_FALSE;
2540 }
2541 }
2542 /* }}} */
2543
2544 /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2545 Finds first occurrence of a string within another */
PHP_FUNCTION(mb_strstr)2546 PHP_FUNCTION(mb_strstr)
2547 {
2548 size_t n;
2549 mbfl_string haystack, needle, result, *ret = NULL;
2550 char *enc_name = NULL;
2551 size_t enc_name_len;
2552 zend_bool part = 0;
2553
2554 mbfl_string_init(&haystack);
2555 mbfl_string_init(&needle);
2556
2557 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2558 return;
2559 }
2560
2561 haystack.no_language = needle.no_language = MBSTRG(language);
2562 haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2563 if (!haystack.encoding) {
2564 RETURN_FALSE;
2565 }
2566
2567 if (needle.len == 0) {
2568 php_error_docref(NULL, E_WARNING, "Empty delimiter");
2569 RETURN_FALSE;
2570 }
2571
2572 n = mbfl_strpos(&haystack, &needle, 0, 0);
2573 if (!mbfl_is_error(n)) {
2574 if (part) {
2575 ret = mbfl_substr(&haystack, &result, 0, n);
2576 if (ret != NULL) {
2577 // TODO: avoid reallocation ???
2578 RETVAL_STRINGL((char *)ret->val, ret->len);
2579 efree(ret->val);
2580 } else {
2581 RETVAL_FALSE;
2582 }
2583 } else {
2584 ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
2585 if (ret != NULL) {
2586 // TODO: avoid reallocation ???
2587 RETVAL_STRINGL((char *)ret->val, ret->len);
2588 efree(ret->val);
2589 } else {
2590 RETVAL_FALSE;
2591 }
2592 }
2593 } else {
2594 RETVAL_FALSE;
2595 }
2596 }
2597 /* }}} */
2598
2599 /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2600 Finds the last occurrence of a character in a string within another */
PHP_FUNCTION(mb_strrchr)2601 PHP_FUNCTION(mb_strrchr)
2602 {
2603 size_t n;
2604 mbfl_string haystack, needle, result, *ret = NULL;
2605 char *enc_name = NULL;
2606 size_t enc_name_len;
2607 zend_bool part = 0;
2608
2609 mbfl_string_init(&haystack);
2610 mbfl_string_init(&needle);
2611
2612 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2613 return;
2614 }
2615
2616 haystack.no_language = needle.no_language = MBSTRG(language);
2617 haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2618 if (!haystack.encoding) {
2619 RETURN_FALSE;
2620 }
2621
2622 if (haystack.len == 0) {
2623 RETURN_FALSE;
2624 }
2625 if (needle.len == 0) {
2626 RETURN_FALSE;
2627 }
2628
2629 n = mbfl_strpos(&haystack, &needle, 0, 1);
2630 if (!mbfl_is_error(n)) {
2631 if (part) {
2632 ret = mbfl_substr(&haystack, &result, 0, n);
2633 if (ret != NULL) {
2634 // TODO: avoid reallocation ???
2635 RETVAL_STRINGL((char *)ret->val, ret->len);
2636 efree(ret->val);
2637 } else {
2638 RETVAL_FALSE;
2639 }
2640 } else {
2641 ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
2642 if (ret != NULL) {
2643 // TODO: avoid reallocation ???
2644 RETVAL_STRINGL((char *)ret->val, ret->len);
2645 efree(ret->val);
2646 } else {
2647 RETVAL_FALSE;
2648 }
2649 }
2650 } else {
2651 RETVAL_FALSE;
2652 }
2653 }
2654 /* }}} */
2655
2656 /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2657 Finds first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stristr)2658 PHP_FUNCTION(mb_stristr)
2659 {
2660 zend_bool part = 0;
2661 size_t from_encoding_len, n;
2662 mbfl_string haystack, needle, result, *ret = NULL;
2663 const char *from_encoding = NULL;
2664 mbfl_string_init(&haystack);
2665 mbfl_string_init(&needle);
2666
2667 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2668 return;
2669 }
2670
2671 haystack.no_language = needle.no_language = MBSTRG(language);
2672 haystack.encoding = needle.encoding = php_mb_get_encoding(from_encoding);
2673 if (!haystack.encoding) {
2674 RETURN_FALSE;
2675 }
2676
2677 if (!needle.len) {
2678 php_error_docref(NULL, E_WARNING, "Empty delimiter");
2679 RETURN_FALSE;
2680 }
2681
2682 n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2683 if (mbfl_is_error(n)) {
2684 RETURN_FALSE;
2685 }
2686
2687 if (part) {
2688 ret = mbfl_substr(&haystack, &result, 0, n);
2689 if (ret != NULL) {
2690 // TODO: avoid reallocation ???
2691 RETVAL_STRINGL((char *)ret->val, ret->len);
2692 efree(ret->val);
2693 } else {
2694 RETVAL_FALSE;
2695 }
2696 } else {
2697 ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
2698 if (ret != NULL) {
2699 // TODO: avoid reallocaton ???
2700 RETVAL_STRINGL((char *)ret->val, ret->len);
2701 efree(ret->val);
2702 } else {
2703 RETVAL_FALSE;
2704 }
2705 }
2706 }
2707 /* }}} */
2708
2709 /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2710 Finds the last occurrence of a character in a string within another, case insensitive */
PHP_FUNCTION(mb_strrichr)2711 PHP_FUNCTION(mb_strrichr)
2712 {
2713 zend_bool part = 0;
2714 size_t n;
2715 size_t from_encoding_len;
2716 mbfl_string haystack, needle, result, *ret = NULL;
2717 const char *from_encoding = NULL;
2718 mbfl_string_init(&haystack);
2719 mbfl_string_init(&needle);
2720
2721 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2722 return;
2723 }
2724
2725 haystack.no_language = needle.no_language = MBSTRG(language);
2726 haystack.encoding = needle.encoding = php_mb_get_encoding(from_encoding);
2727 if (!haystack.encoding) {
2728 RETURN_FALSE;
2729 }
2730
2731 n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2732 if (mbfl_is_error(n)) {
2733 RETURN_FALSE;
2734 }
2735
2736 if (part) {
2737 ret = mbfl_substr(&haystack, &result, 0, n);
2738 if (ret != NULL) {
2739 // TODO: avoid reallocation ???
2740 RETVAL_STRINGL((char *)ret->val, ret->len);
2741 efree(ret->val);
2742 } else {
2743 RETVAL_FALSE;
2744 }
2745 } else {
2746 ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
2747 if (ret != NULL) {
2748 // TODO: avoid reallocation ???
2749 RETVAL_STRINGL((char *)ret->val, ret->len);
2750 efree(ret->val);
2751 } else {
2752 RETVAL_FALSE;
2753 }
2754 }
2755 }
2756 /* }}} */
2757
2758 /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2759 Count the number of substring occurrences */
PHP_FUNCTION(mb_substr_count)2760 PHP_FUNCTION(mb_substr_count)
2761 {
2762 size_t n;
2763 mbfl_string haystack, needle;
2764 char *enc_name = NULL;
2765 size_t enc_name_len;
2766
2767 mbfl_string_init(&haystack);
2768 mbfl_string_init(&needle);
2769
2770 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
2771 return;
2772 }
2773
2774 haystack.no_language = needle.no_language = MBSTRG(language);
2775 haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2776 if (!haystack.encoding) {
2777 RETURN_FALSE;
2778 }
2779
2780 if (needle.len == 0) {
2781 php_error_docref(NULL, E_WARNING, "Empty substring");
2782 RETURN_FALSE;
2783 }
2784
2785 n = mbfl_substr_count(&haystack, &needle);
2786 if (!mbfl_is_error(n)) {
2787 RETVAL_LONG(n);
2788 } else {
2789 RETVAL_FALSE;
2790 }
2791 }
2792 /* }}} */
2793
2794 /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2795 Returns part of a string */
PHP_FUNCTION(mb_substr)2796 PHP_FUNCTION(mb_substr)
2797 {
2798 char *str, *encoding = NULL;
2799 zend_long from, len;
2800 size_t mblen, real_from, real_len;
2801 size_t str_len, encoding_len;
2802 zend_bool len_is_null = 1;
2803 mbfl_string string, result, *ret;
2804
2805 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", &str, &str_len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) {
2806 return;
2807 }
2808
2809 mbfl_string_init(&string);
2810 string.no_language = MBSTRG(language);
2811 string.encoding = php_mb_get_encoding(encoding);
2812 if (!string.encoding) {
2813 RETURN_FALSE;
2814 }
2815
2816 string.val = (unsigned char *)str;
2817 string.len = str_len;
2818
2819 /* measures length */
2820 mblen = 0;
2821 if (from < 0 || (!len_is_null && len < 0)) {
2822 mblen = mbfl_strlen(&string);
2823 }
2824
2825 /* if "from" position is negative, count start position from the end
2826 * of the string
2827 */
2828 if (from >= 0) {
2829 real_from = (size_t) from;
2830 } else if (-from < mblen) {
2831 real_from = mblen + from;
2832 } else {
2833 real_from = 0;
2834 }
2835
2836 /* if "length" position is negative, set it to the length
2837 * needed to stop that many chars from the end of the string
2838 */
2839 if (len_is_null) {
2840 real_len = MBFL_SUBSTR_UNTIL_END;
2841 } else if (len >= 0) {
2842 real_len = (size_t) len;
2843 } else if (real_from < mblen && -len < mblen - real_from) {
2844 real_len = (mblen - real_from) + len;
2845 } else {
2846 real_len = 0;
2847 }
2848
2849 if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2850 && (real_from > mbfl_strlen(&string))) {
2851 RETURN_FALSE;
2852 }
2853
2854 ret = mbfl_substr(&string, &result, real_from, real_len);
2855 if (NULL == ret) {
2856 RETURN_FALSE;
2857 }
2858
2859 // TODO: avoid reallocation ???
2860 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2861 efree(ret->val);
2862 }
2863 /* }}} */
2864
2865 /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
2866 Returns part of a string */
PHP_FUNCTION(mb_strcut)2867 PHP_FUNCTION(mb_strcut)
2868 {
2869 char *encoding = NULL;
2870 zend_long from, len;
2871 size_t encoding_len;
2872 zend_bool len_is_null = 1;
2873 mbfl_string string, result, *ret;
2874
2875 mbfl_string_init(&string);
2876
2877 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", (char **)&string.val, &string.len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) {
2878 return;
2879 }
2880
2881 string.no_language = MBSTRG(language);
2882 string.encoding = php_mb_get_encoding(encoding);
2883 if (!string.encoding) {
2884 RETURN_FALSE;
2885 }
2886
2887 if (len_is_null) {
2888 len = string.len;
2889 }
2890
2891 /* if "from" position is negative, count start position from the end
2892 * of the string
2893 */
2894 if (from < 0) {
2895 from = string.len + from;
2896 if (from < 0) {
2897 from = 0;
2898 }
2899 }
2900
2901 /* if "length" position is negative, set it to the length
2902 * needed to stop that many chars from the end of the string
2903 */
2904 if (len < 0) {
2905 len = (string.len - from) + len;
2906 if (len < 0) {
2907 len = 0;
2908 }
2909 }
2910
2911 if (from > string.len) {
2912 RETURN_FALSE;
2913 }
2914
2915 ret = mbfl_strcut(&string, &result, from, len);
2916 if (ret == NULL) {
2917 RETURN_FALSE;
2918 }
2919
2920 // TODO: avoid reallocation ???
2921 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2922 efree(ret->val);
2923 }
2924 /* }}} */
2925
2926 /* {{{ proto int mb_strwidth(string str [, string encoding])
2927 Gets terminal width of a string */
PHP_FUNCTION(mb_strwidth)2928 PHP_FUNCTION(mb_strwidth)
2929 {
2930 size_t n;
2931 mbfl_string string;
2932 char *enc_name = NULL;
2933 size_t enc_name_len;
2934
2935 mbfl_string_init(&string);
2936
2937 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2938 return;
2939 }
2940
2941 string.no_language = MBSTRG(language);
2942 string.encoding = php_mb_get_encoding(enc_name);
2943 if (!string.encoding) {
2944 RETURN_FALSE;
2945 }
2946
2947 n = mbfl_strwidth(&string);
2948 if (!mbfl_is_error(n)) {
2949 RETVAL_LONG(n);
2950 } else {
2951 RETVAL_FALSE;
2952 }
2953 }
2954 /* }}} */
2955
2956 /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
2957 Trim the string in terminal width */
PHP_FUNCTION(mb_strimwidth)2958 PHP_FUNCTION(mb_strimwidth)
2959 {
2960 char *str, *trimmarker = NULL, *encoding = NULL;
2961 zend_long from, width, swidth;
2962 size_t str_len, trimmarker_len, encoding_len;
2963 mbfl_string string, result, marker, *ret;
2964
2965 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
2966 return;
2967 }
2968
2969 mbfl_string_init(&string);
2970 mbfl_string_init(&marker);
2971
2972 string.no_language = marker.no_language = MBSTRG(language);
2973 string.encoding = marker.encoding = php_mb_get_encoding(encoding);
2974 if (!string.encoding) {
2975 RETURN_FALSE;
2976 }
2977
2978 string.val = (unsigned char *)str;
2979 string.len = str_len;
2980 marker.val = NULL;
2981 marker.len = 0;
2982
2983 if ((from < 0) || (width < 0)) {
2984 swidth = mbfl_strwidth(&string);
2985 }
2986
2987 if (from < 0) {
2988 from += swidth;
2989 }
2990
2991 if (from < 0 || (size_t)from > str_len) {
2992 php_error_docref(NULL, E_WARNING, "Start position is out of range");
2993 RETURN_FALSE;
2994 }
2995
2996 if (width < 0) {
2997 width = swidth + width - from;
2998 }
2999
3000 if (width < 0) {
3001 php_error_docref(NULL, E_WARNING, "Width is out of range");
3002 RETURN_FALSE;
3003 }
3004
3005 if (trimmarker) {
3006 marker.val = (unsigned char *)trimmarker;
3007 marker.len = trimmarker_len;
3008 }
3009
3010 ret = mbfl_strimwidth(&string, &marker, &result, from, width);
3011
3012 if (ret == NULL) {
3013 RETURN_FALSE;
3014 }
3015 // TODO: avoid reallocation ???
3016 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3017 efree(ret->val);
3018 }
3019 /* }}} */
3020
3021
3022 /* See mbfl_no_encoding definition for list of unsupported encodings */
php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)3023 static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)
3024 {
3025 return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint)
3026 || (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap)
3027 || (no_enc >= mbfl_no_encoding_jis && no_enc <= mbfl_no_encoding_2022jpms)
3028 || (no_enc >= mbfl_no_encoding_cp50220 && no_enc <= mbfl_no_encoding_cp50222));
3029 }
3030
3031
3032 /* See mbfl_no_encoding definition for list of UTF-8 encodings */
php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)3033 static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
3034 {
3035 return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb);
3036 }
3037
php_mb_convert_encoding_ex(const char * input,size_t length,const mbfl_encoding * to_encoding,const mbfl_encoding * from_encoding,size_t * output_len)3038 MBSTRING_API char *php_mb_convert_encoding_ex(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding *from_encoding, size_t *output_len)
3039 {
3040 mbfl_string string, result, *ret;
3041 mbfl_buffer_converter *convd;
3042 char *output = NULL;
3043
3044 if (output_len) {
3045 *output_len = 0;
3046 }
3047
3048 /* initialize string */
3049 mbfl_string_init(&string);
3050 mbfl_string_init(&result);
3051 string.encoding = from_encoding;
3052 string.no_language = MBSTRG(language);
3053 string.val = (unsigned char *)input;
3054 string.len = length;
3055
3056 /* initialize converter */
3057 convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
3058 if (convd == NULL) {
3059 php_error_docref(NULL, E_WARNING, "Unable to create character encoding converter");
3060 return NULL;
3061 }
3062
3063 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3064 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3065
3066 /* do it */
3067 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3068 if (ret) {
3069 if (output_len) {
3070 *output_len = ret->len;
3071 }
3072 output = (char *)ret->val;
3073 }
3074
3075 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3076 mbfl_buffer_converter_delete(convd);
3077 return output;
3078 }
3079 /* }}} */
3080
3081 /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
php_mb_convert_encoding(const char * input,size_t length,const char * _to_encoding,const char * _from_encodings,size_t * output_len)3082 MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
3083 {
3084 const mbfl_encoding *from_encoding, *to_encoding;
3085
3086 if (output_len) {
3087 *output_len = 0;
3088 }
3089 if (!input) {
3090 return NULL;
3091 }
3092 /* new encoding */
3093 if (_to_encoding && strlen(_to_encoding)) {
3094 to_encoding = mbfl_name2encoding(_to_encoding);
3095 if (!to_encoding) {
3096 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
3097 return NULL;
3098 }
3099 } else {
3100 to_encoding = MBSTRG(current_internal_encoding);
3101 }
3102
3103 /* pre-conversion encoding */
3104 from_encoding = MBSTRG(current_internal_encoding);
3105 if (_from_encodings) {
3106 const mbfl_encoding **list = NULL;
3107 size_t size = 0;
3108 php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0);
3109 if (size == 1) {
3110 from_encoding = *list;
3111 } else if (size > 1) {
3112 /* auto detect */
3113 mbfl_string string;
3114 mbfl_string_init(&string);
3115 string.val = (unsigned char *)input;
3116 string.len = length;
3117 from_encoding = mbfl_identify_encoding(&string, list, size, MBSTRG(strict_detection));
3118 if (!from_encoding) {
3119 php_error_docref(NULL, E_WARNING, "Unable to detect character encoding");
3120 from_encoding = &mbfl_encoding_pass;
3121 }
3122 } else {
3123 php_error_docref(NULL, E_WARNING, "Illegal character encoding specified");
3124 }
3125 if (list != NULL) {
3126 efree((void *)list);
3127 }
3128 }
3129
3130 return php_mb_convert_encoding_ex(input, length, to_encoding, from_encoding, output_len);
3131 }
3132 /* }}} */
3133
php_mb_convert_encoding_recursive(HashTable * input,const char * _to_encoding,const char * _from_encodings)3134 MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const char *_to_encoding, const char *_from_encodings)
3135 {
3136 HashTable *output, *chash;
3137 zend_long idx;
3138 zend_string *key;
3139 zval *entry, entry_tmp;
3140 size_t ckey_len, cval_len;
3141 char *ckey, *cval;
3142
3143 if (!input) {
3144 return NULL;
3145 }
3146
3147 if (GC_IS_RECURSIVE(input)) {
3148 GC_UNPROTECT_RECURSION(input);
3149 php_error_docref(NULL, E_WARNING, "Cannot convert recursively referenced values");
3150 return NULL;
3151 }
3152 GC_TRY_PROTECT_RECURSION(input);
3153 output = zend_new_array(zend_hash_num_elements(input));
3154 ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) {
3155 /* convert key */
3156 if (key) {
3157 ckey = php_mb_convert_encoding(ZSTR_VAL(key), ZSTR_LEN(key), _to_encoding, _from_encodings, &ckey_len);
3158 key = zend_string_init(ckey, ckey_len, 0);
3159 efree(ckey);
3160 }
3161 /* convert value */
3162 ZEND_ASSERT(entry);
3163 switch(Z_TYPE_P(entry)) {
3164 case IS_STRING:
3165 cval = php_mb_convert_encoding(Z_STRVAL_P(entry), Z_STRLEN_P(entry), _to_encoding, _from_encodings, &cval_len);
3166 ZVAL_STRINGL(&entry_tmp, cval, cval_len);
3167 efree(cval);
3168 break;
3169 case IS_NULL:
3170 case IS_TRUE:
3171 case IS_FALSE:
3172 case IS_LONG:
3173 case IS_DOUBLE:
3174 ZVAL_COPY(&entry_tmp, entry);
3175 break;
3176 case IS_ARRAY:
3177 chash = php_mb_convert_encoding_recursive(HASH_OF(entry), _to_encoding, _from_encodings);
3178 if (chash) {
3179 ZVAL_ARR(&entry_tmp, chash);
3180 } else {
3181 ZVAL_EMPTY_ARRAY(&entry_tmp);
3182 }
3183 break;
3184 case IS_OBJECT:
3185 default:
3186 if (key) {
3187 zend_string_release(key);
3188 }
3189 php_error_docref(NULL, E_WARNING, "Object is not supported");
3190 continue;
3191 }
3192 if (key) {
3193 zend_hash_add(output, key, &entry_tmp);
3194 zend_string_release(key);
3195 } else {
3196 zend_hash_index_add(output, idx, &entry_tmp);
3197 }
3198 } ZEND_HASH_FOREACH_END();
3199 GC_TRY_UNPROTECT_RECURSION(input);
3200
3201 return output;
3202 }
3203 /* }}} */
3204
3205
3206 /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
3207 Returns converted string in desired encoding */
PHP_FUNCTION(mb_convert_encoding)3208 PHP_FUNCTION(mb_convert_encoding)
3209 {
3210 zval *input;
3211 char *arg_new;
3212 size_t new_len;
3213 zval *arg_old = NULL;
3214 size_t size, l, n;
3215 char *_from_encodings = NULL, *ret, *s_free = NULL;
3216
3217 zval *hash_entry;
3218 HashTable *target_hash;
3219
3220 if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z", &input, &arg_new, &new_len, &arg_old) == FAILURE) {
3221 return;
3222 }
3223
3224 if (Z_TYPE_P(input) != IS_STRING && Z_TYPE_P(input) != IS_ARRAY) {
3225 convert_to_string(input);
3226 }
3227
3228 if (arg_old) {
3229 switch (Z_TYPE_P(arg_old)) {
3230 case IS_ARRAY:
3231 target_hash = Z_ARRVAL_P(arg_old);
3232 _from_encodings = NULL;
3233
3234 ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
3235 zend_string *encoding_str = zval_get_string(hash_entry);
3236
3237 if ( _from_encodings) {
3238 l = strlen(_from_encodings);
3239 n = strlen(ZSTR_VAL(encoding_str));
3240 _from_encodings = erealloc(_from_encodings, l+n+2);
3241 memcpy(_from_encodings + l, ",", 1);
3242 memcpy(_from_encodings + l + 1, ZSTR_VAL(encoding_str), ZSTR_LEN(encoding_str) + 1);
3243 } else {
3244 _from_encodings = estrdup(ZSTR_VAL(encoding_str));
3245 }
3246 zend_string_release(encoding_str);
3247 } ZEND_HASH_FOREACH_END();
3248
3249 if (_from_encodings != NULL && !strlen(_from_encodings)) {
3250 efree(_from_encodings);
3251 _from_encodings = NULL;
3252 }
3253 s_free = _from_encodings;
3254 break;
3255 default:
3256 convert_to_string(arg_old);
3257 _from_encodings = Z_STRVAL_P(arg_old);
3258 break;
3259 }
3260 }
3261
3262 if (Z_TYPE_P(input) == IS_STRING) {
3263 /* new encoding */
3264 ret = php_mb_convert_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), arg_new, _from_encodings, &size);
3265 if (ret != NULL) {
3266 // TODO: avoid reallocation ???
3267 RETVAL_STRINGL(ret, size); /* the string is already strdup()'ed */
3268 efree(ret);
3269 } else {
3270 RETVAL_FALSE;
3271 }
3272 if (s_free) {
3273 efree(s_free);
3274 }
3275 } else {
3276 HashTable *tmp;
3277 tmp = php_mb_convert_encoding_recursive(HASH_OF(input), arg_new, _from_encodings);
3278 RETURN_ARR(tmp);
3279 }
3280
3281 return;
3282 }
3283 /* }}} */
3284
mbstring_convert_case(int case_mode,const char * str,size_t str_len,size_t * ret_len,const mbfl_encoding * enc)3285 static char *mbstring_convert_case(
3286 int case_mode, const char *str, size_t str_len, size_t *ret_len,
3287 const mbfl_encoding *enc) {
3288 return php_unicode_convert_case(
3289 case_mode, str, str_len, ret_len, enc,
3290 MBSTRG(current_filter_illegal_mode), MBSTRG(current_filter_illegal_substchar));
3291 }
3292
3293 /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
3294 Returns a case-folded version of sourcestring */
PHP_FUNCTION(mb_convert_case)3295 PHP_FUNCTION(mb_convert_case)
3296 {
3297 const char *from_encoding = NULL;
3298 char *str;
3299 size_t str_len, from_encoding_len;
3300 zend_long case_mode = 0;
3301 char *newstr;
3302 size_t ret_len;
3303 const mbfl_encoding *enc;
3304
3305 RETVAL_FALSE;
3306 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|s!", &str, &str_len,
3307 &case_mode, &from_encoding, &from_encoding_len) == FAILURE) {
3308 return;
3309 }
3310
3311 enc = php_mb_get_encoding(from_encoding);
3312 if (!enc) {
3313 return;
3314 }
3315
3316 if (case_mode < 0 || case_mode > PHP_UNICODE_CASE_MODE_MAX) {
3317 php_error_docref(NULL, E_WARNING, "Invalid case mode");
3318 return;
3319 }
3320
3321 newstr = mbstring_convert_case(case_mode, str, str_len, &ret_len, enc);
3322
3323 if (newstr) {
3324 // TODO: avoid reallocation ???
3325 RETVAL_STRINGL(newstr, ret_len);
3326 efree(newstr);
3327 }
3328 }
3329 /* }}} */
3330
3331 /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
3332 * Returns a uppercased version of sourcestring
3333 */
PHP_FUNCTION(mb_strtoupper)3334 PHP_FUNCTION(mb_strtoupper)
3335 {
3336 const char *from_encoding = NULL;
3337 char *str;
3338 size_t str_len, from_encoding_len;
3339 char *newstr;
3340 size_t ret_len;
3341 const mbfl_encoding *enc;
3342
3343 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len,
3344 &from_encoding, &from_encoding_len) == FAILURE) {
3345 return;
3346 }
3347
3348 enc = php_mb_get_encoding(from_encoding);
3349 if (!enc) {
3350 RETURN_FALSE;
3351 }
3352
3353 newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc);
3354
3355 if (newstr) {
3356 // TODO: avoid reallocation ???
3357 RETVAL_STRINGL(newstr, ret_len);
3358 efree(newstr);
3359 return;
3360 }
3361 RETURN_FALSE;
3362 }
3363 /* }}} */
3364
3365 /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
3366 * Returns a lowercased version of sourcestring
3367 */
PHP_FUNCTION(mb_strtolower)3368 PHP_FUNCTION(mb_strtolower)
3369 {
3370 const char *from_encoding = NULL;
3371 char *str;
3372 size_t str_len, from_encoding_len;
3373 char *newstr;
3374 size_t ret_len;
3375 const mbfl_encoding *enc;
3376
3377 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len,
3378 &from_encoding, &from_encoding_len) == FAILURE) {
3379 return;
3380 }
3381
3382 enc = php_mb_get_encoding(from_encoding);
3383 if (!enc) {
3384 RETURN_FALSE;
3385 }
3386
3387 newstr = mbstring_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc);
3388
3389 if (newstr) {
3390 // TODO: avoid reallocation ???
3391 RETVAL_STRINGL(newstr, ret_len);
3392 efree(newstr);
3393 return;
3394 }
3395 RETURN_FALSE;
3396 }
3397 /* }}} */
3398
3399 /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3400 Encodings of the given string is returned (as a string) */
PHP_FUNCTION(mb_detect_encoding)3401 PHP_FUNCTION(mb_detect_encoding)
3402 {
3403 char *str;
3404 size_t str_len;
3405 zend_bool strict=0;
3406 zval *encoding_list = NULL;
3407
3408 mbfl_string string;
3409 const mbfl_encoding *ret;
3410 const mbfl_encoding **elist, **list;
3411 size_t size;
3412
3413 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z!b", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3414 return;
3415 }
3416
3417 /* make encoding list */
3418 list = NULL;
3419 size = 0;
3420 if (encoding_list) {
3421 switch (Z_TYPE_P(encoding_list)) {
3422 case IS_ARRAY:
3423 if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0)) {
3424 if (list) {
3425 efree(list);
3426 list = NULL;
3427 size = 0;
3428 }
3429 }
3430 break;
3431 default:
3432 convert_to_string(encoding_list);
3433 if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0)) {
3434 if (list) {
3435 efree(list);
3436 list = NULL;
3437 size = 0;
3438 }
3439 }
3440 break;
3441 }
3442 if (size == 0) {
3443 php_error_docref(NULL, E_WARNING, "Illegal argument");
3444 }
3445 }
3446
3447 if (ZEND_NUM_ARGS() < 3) {
3448 strict = MBSTRG(strict_detection);
3449 }
3450
3451 if (size > 0 && list != NULL) {
3452 elist = list;
3453 } else {
3454 elist = MBSTRG(current_detect_order_list);
3455 size = MBSTRG(current_detect_order_list_size);
3456 }
3457
3458 mbfl_string_init(&string);
3459 string.no_language = MBSTRG(language);
3460 string.val = (unsigned char *)str;
3461 string.len = str_len;
3462 ret = mbfl_identify_encoding(&string, elist, size, strict);
3463
3464 if (list != NULL) {
3465 efree((void *)list);
3466 }
3467
3468 if (ret == NULL) {
3469 RETURN_FALSE;
3470 }
3471
3472 RETVAL_STRING((char *)ret->name);
3473 }
3474 /* }}} */
3475
3476 /* {{{ proto mixed mb_list_encodings()
3477 Returns an array of all supported entity encodings */
PHP_FUNCTION(mb_list_encodings)3478 PHP_FUNCTION(mb_list_encodings)
3479 {
3480 const mbfl_encoding **encodings;
3481 const mbfl_encoding *encoding;
3482 int i;
3483
3484 if (zend_parse_parameters_none() == FAILURE) {
3485 return;
3486 }
3487
3488 array_init(return_value);
3489 i = 0;
3490 encodings = mbfl_get_supported_encodings();
3491 while ((encoding = encodings[i++]) != NULL) {
3492 add_next_index_string(return_value, (char *) encoding->name);
3493 }
3494 }
3495 /* }}} */
3496
3497 /* {{{ proto array mb_encoding_aliases(string encoding)
3498 Returns an array of the aliases of a given encoding name */
PHP_FUNCTION(mb_encoding_aliases)3499 PHP_FUNCTION(mb_encoding_aliases)
3500 {
3501 const mbfl_encoding *encoding;
3502 char *name = NULL;
3503 size_t name_len;
3504
3505 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
3506 return;
3507 }
3508
3509 encoding = mbfl_name2encoding(name);
3510 if (!encoding) {
3511 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
3512 RETURN_FALSE;
3513 }
3514
3515 array_init(return_value);
3516 if (encoding->aliases != NULL) {
3517 const char **alias;
3518 for (alias = *encoding->aliases; *alias; ++alias) {
3519 add_next_index_string(return_value, (char *)*alias);
3520 }
3521 }
3522 }
3523 /* }}} */
3524
3525 /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3526 Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
PHP_FUNCTION(mb_encode_mimeheader)3527 PHP_FUNCTION(mb_encode_mimeheader)
3528 {
3529 const mbfl_encoding *charset, *transenc;
3530 mbfl_string string, result, *ret;
3531 char *charset_name = NULL;
3532 size_t charset_name_len;
3533 char *trans_enc_name = NULL;
3534 size_t trans_enc_name_len;
3535 char *linefeed = "\r\n";
3536 size_t linefeed_len;
3537 zend_long indent = 0;
3538
3539 mbfl_string_init(&string);
3540 string.no_language = MBSTRG(language);
3541 string.encoding = MBSTRG(current_internal_encoding);
3542
3543 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3544 return;
3545 }
3546
3547 charset = &mbfl_encoding_pass;
3548 transenc = &mbfl_encoding_base64;
3549
3550 if (charset_name != NULL) {
3551 charset = mbfl_name2encoding(charset_name);
3552 if (!charset) {
3553 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3554 RETURN_FALSE;
3555 }
3556 } else {
3557 const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3558 if (lang != NULL) {
3559 charset = mbfl_no2encoding(lang->mail_charset);
3560 transenc = mbfl_no2encoding(lang->mail_header_encoding);
3561 }
3562 }
3563
3564 if (trans_enc_name != NULL) {
3565 if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3566 transenc = &mbfl_encoding_base64;
3567 } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3568 transenc = &mbfl_encoding_qprint;
3569 }
3570 }
3571
3572 mbfl_string_init(&result);
3573 ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3574 if (ret != NULL) {
3575 // TODO: avoid reallocation ???
3576 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3577 efree(ret->val);
3578 } else {
3579 RETVAL_FALSE;
3580 }
3581 }
3582 /* }}} */
3583
3584 /* {{{ proto string mb_decode_mimeheader(string string)
3585 Decodes the MIME "encoded-word" in the string */
PHP_FUNCTION(mb_decode_mimeheader)3586 PHP_FUNCTION(mb_decode_mimeheader)
3587 {
3588 mbfl_string string, result, *ret;
3589
3590 mbfl_string_init(&string);
3591 string.no_language = MBSTRG(language);
3592 string.encoding = MBSTRG(current_internal_encoding);
3593
3594 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", (char **)&string.val, &string.len) == FAILURE) {
3595 return;
3596 }
3597
3598 mbfl_string_init(&result);
3599 ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
3600 if (ret != NULL) {
3601 // TODO: avoid reallocation ???
3602 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3603 efree(ret->val);
3604 } else {
3605 RETVAL_FALSE;
3606 }
3607 }
3608 /* }}} */
3609
3610 /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3611 Conversion between full-width character and half-width character (Japanese) */
PHP_FUNCTION(mb_convert_kana)3612 PHP_FUNCTION(mb_convert_kana)
3613 {
3614 int opt;
3615 mbfl_string string, result, *ret;
3616 char *optstr = NULL;
3617 size_t optstr_len;
3618 char *encname = NULL;
3619 size_t encname_len;
3620
3621 mbfl_string_init(&string);
3622
3623 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
3624 return;
3625 }
3626
3627 /* option */
3628 if (optstr != NULL) {
3629 char *p = optstr;
3630 size_t i = 0, n = optstr_len;
3631 opt = 0;
3632 while (i < n) {
3633 i++;
3634 switch (*p++) {
3635 case 'A':
3636 opt |= 0x1;
3637 break;
3638 case 'a':
3639 opt |= 0x10;
3640 break;
3641 case 'R':
3642 opt |= 0x2;
3643 break;
3644 case 'r':
3645 opt |= 0x20;
3646 break;
3647 case 'N':
3648 opt |= 0x4;
3649 break;
3650 case 'n':
3651 opt |= 0x40;
3652 break;
3653 case 'S':
3654 opt |= 0x8;
3655 break;
3656 case 's':
3657 opt |= 0x80;
3658 break;
3659 case 'K':
3660 opt |= 0x100;
3661 break;
3662 case 'k':
3663 opt |= 0x1000;
3664 break;
3665 case 'H':
3666 opt |= 0x200;
3667 break;
3668 case 'h':
3669 opt |= 0x2000;
3670 break;
3671 case 'V':
3672 opt |= 0x800;
3673 break;
3674 case 'C':
3675 opt |= 0x10000;
3676 break;
3677 case 'c':
3678 opt |= 0x20000;
3679 break;
3680 case 'M':
3681 opt |= 0x100000;
3682 break;
3683 case 'm':
3684 opt |= 0x200000;
3685 break;
3686 }
3687 }
3688 } else {
3689 opt = 0x900;
3690 }
3691
3692 /* encoding */
3693 string.no_language = MBSTRG(language);
3694 string.encoding = php_mb_get_encoding(encname);
3695 if (!string.encoding) {
3696 RETURN_FALSE;
3697 }
3698
3699 ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3700 if (ret != NULL) {
3701 // TODO: avoid reallocation ???
3702 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3703 efree(ret->val);
3704 } else {
3705 RETVAL_FALSE;
3706 }
3707 }
3708 /* }}} */
3709
mb_recursive_encoder_detector_feed(mbfl_encoding_detector * identd,zval * var,int * recursion_error)3710 static int mb_recursive_encoder_detector_feed(mbfl_encoding_detector *identd, zval *var, int *recursion_error) /* {{{ */
3711 {
3712 mbfl_string string;
3713 HashTable *ht;
3714 zval *entry;
3715
3716 ZVAL_DEREF(var);
3717 if (Z_TYPE_P(var) == IS_STRING) {
3718 string.val = (unsigned char *)Z_STRVAL_P(var);
3719 string.len = Z_STRLEN_P(var);
3720 if (mbfl_encoding_detector_feed(identd, &string)) {
3721 return 1; /* complete detecting */
3722 }
3723 } else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3724 if (Z_REFCOUNTED_P(var)) {
3725 if (Z_IS_RECURSIVE_P(var)) {
3726 *recursion_error = 1;
3727 return 0;
3728 }
3729 Z_PROTECT_RECURSION_P(var);
3730 }
3731
3732 ht = HASH_OF(var);
3733 if (ht != NULL) {
3734 ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
3735 if (mb_recursive_encoder_detector_feed(identd, entry, recursion_error)) {
3736 if (Z_REFCOUNTED_P(var)) {
3737 Z_UNPROTECT_RECURSION_P(var);
3738 }
3739 return 1;
3740 } else if (*recursion_error) {
3741 if (Z_REFCOUNTED_P(var)) {
3742 Z_UNPROTECT_RECURSION_P(var);
3743 }
3744 return 0;
3745 }
3746 } ZEND_HASH_FOREACH_END();
3747 }
3748
3749 if (Z_REFCOUNTED_P(var)) {
3750 Z_UNPROTECT_RECURSION_P(var);
3751 }
3752 }
3753 return 0;
3754 } /* }}} */
3755
mb_recursive_convert_variable(mbfl_buffer_converter * convd,zval * var)3756 static int mb_recursive_convert_variable(mbfl_buffer_converter *convd, zval *var) /* {{{ */
3757 {
3758 mbfl_string string, result, *ret;
3759 HashTable *ht;
3760 zval *entry, *orig_var;
3761
3762 orig_var = var;
3763 ZVAL_DEREF(var);
3764 if (Z_TYPE_P(var) == IS_STRING) {
3765 string.val = (unsigned char *)Z_STRVAL_P(var);
3766 string.len = Z_STRLEN_P(var);
3767 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3768 if (ret != NULL) {
3769 zval_ptr_dtor(orig_var);
3770 // TODO: avoid reallocation ???
3771 ZVAL_STRINGL(orig_var, (char *)ret->val, ret->len);
3772 efree(ret->val);
3773 }
3774 } else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3775 if (Z_TYPE_P(var) == IS_ARRAY) {
3776 SEPARATE_ARRAY(var);
3777 }
3778 if (Z_REFCOUNTED_P(var)) {
3779 if (Z_IS_RECURSIVE_P(var)) {
3780 return 1;
3781 }
3782 Z_PROTECT_RECURSION_P(var);
3783 }
3784
3785 ht = HASH_OF(var);
3786 if (ht != NULL) {
3787 ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
3788 if (mb_recursive_convert_variable(convd, entry)) {
3789 if (Z_REFCOUNTED_P(var)) {
3790 Z_UNPROTECT_RECURSION_P(var);
3791 }
3792 return 1;
3793 }
3794 } ZEND_HASH_FOREACH_END();
3795 }
3796
3797 if (Z_REFCOUNTED_P(var)) {
3798 Z_UNPROTECT_RECURSION_P(var);
3799 }
3800 }
3801 return 0;
3802 } /* }}} */
3803
3804 /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3805 Converts the string resource in variables to desired encoding */
PHP_FUNCTION(mb_convert_variables)3806 PHP_FUNCTION(mb_convert_variables)
3807 {
3808 zval *args, *zfrom_enc;
3809 mbfl_string string, result;
3810 const mbfl_encoding *from_encoding, *to_encoding;
3811 mbfl_encoding_detector *identd;
3812 mbfl_buffer_converter *convd;
3813 int n, argc;
3814 size_t to_enc_len;
3815 size_t elistsz;
3816 const mbfl_encoding **elist;
3817 char *to_enc;
3818 int recursion_error = 0;
3819
3820 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3821 return;
3822 }
3823
3824 /* new encoding */
3825 to_encoding = mbfl_name2encoding(to_enc);
3826 if (!to_encoding) {
3827 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3828 RETURN_FALSE;
3829 }
3830
3831 /* initialize string */
3832 mbfl_string_init(&string);
3833 mbfl_string_init(&result);
3834 from_encoding = MBSTRG(current_internal_encoding);
3835 string.encoding = from_encoding;
3836 string.no_language = MBSTRG(language);
3837
3838 /* pre-conversion encoding */
3839 elist = NULL;
3840 elistsz = 0;
3841 switch (Z_TYPE_P(zfrom_enc)) {
3842 case IS_ARRAY:
3843 php_mb_parse_encoding_array(zfrom_enc, &elist, &elistsz, 0);
3844 break;
3845 default:
3846 convert_to_string_ex(zfrom_enc);
3847 php_mb_parse_encoding_list(Z_STRVAL_P(zfrom_enc), Z_STRLEN_P(zfrom_enc), &elist, &elistsz, 0);
3848 break;
3849 }
3850
3851 if (elistsz == 0) {
3852 from_encoding = &mbfl_encoding_pass;
3853 } else if (elistsz == 1) {
3854 from_encoding = *elist;
3855 } else {
3856 /* auto detect */
3857 from_encoding = NULL;
3858 identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
3859 if (identd != NULL) {
3860 n = 0;
3861 while (n < argc) {
3862 if (mb_recursive_encoder_detector_feed(identd, &args[n], &recursion_error)) {
3863 break;
3864 }
3865 n++;
3866 }
3867 from_encoding = mbfl_encoding_detector_judge(identd);
3868 mbfl_encoding_detector_delete(identd);
3869 if (recursion_error) {
3870 if (elist != NULL) {
3871 efree((void *)elist);
3872 }
3873 php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
3874 RETURN_FALSE;
3875 }
3876 }
3877
3878 if (!from_encoding) {
3879 php_error_docref(NULL, E_WARNING, "Unable to detect encoding");
3880 from_encoding = &mbfl_encoding_pass;
3881 }
3882 }
3883 if (elist != NULL) {
3884 efree((void *)elist);
3885 }
3886 /* create converter */
3887 convd = NULL;
3888 if (from_encoding != &mbfl_encoding_pass) {
3889 convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
3890 if (convd == NULL) {
3891 php_error_docref(NULL, E_WARNING, "Unable to create converter");
3892 RETURN_FALSE;
3893 }
3894 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3895 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3896 }
3897
3898 /* convert */
3899 if (convd != NULL) {
3900 n = 0;
3901 while (n < argc) {
3902 zval *zv = &args[n];
3903
3904 ZVAL_DEREF(zv);
3905 recursion_error = mb_recursive_convert_variable(convd, zv);
3906 if (recursion_error) {
3907 break;
3908 }
3909 n++;
3910 }
3911
3912 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3913 mbfl_buffer_converter_delete(convd);
3914
3915 if (recursion_error) {
3916 php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
3917 RETURN_FALSE;
3918 }
3919 }
3920
3921 if (from_encoding) {
3922 RETURN_STRING(from_encoding->name);
3923 } else {
3924 RETURN_FALSE;
3925 }
3926 }
3927 /* }}} */
3928
3929 /* {{{ HTML numeric entity */
3930 /* {{{ static void php_mb_numericentity_exec() */
3931 static void
php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS,int type)3932 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
3933 {
3934 char *str, *encoding = NULL;
3935 size_t str_len, encoding_len;
3936 zval *zconvmap, *hash_entry;
3937 HashTable *target_hash;
3938 int i, *convmap, *mapelm, mapsize=0;
3939 zend_bool is_hex = 0;
3940 mbfl_string string, result, *ret;
3941
3942 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
3943 return;
3944 }
3945
3946 mbfl_string_init(&string);
3947 string.no_language = MBSTRG(language);
3948 string.encoding = MBSTRG(current_internal_encoding);
3949 string.val = (unsigned char *)str;
3950 string.len = str_len;
3951
3952 /* encoding */
3953 if (encoding && encoding_len > 0) {
3954 string.encoding = mbfl_name2encoding(encoding);
3955 if (!string.encoding) {
3956 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
3957 RETURN_FALSE;
3958 }
3959 }
3960
3961 if (type == 0 && is_hex) {
3962 type = 2; /* output in hex format */
3963 }
3964
3965 /* conversion map */
3966 convmap = NULL;
3967 if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
3968 target_hash = Z_ARRVAL_P(zconvmap);
3969 i = zend_hash_num_elements(target_hash);
3970 if (i > 0) {
3971 convmap = (int *)safe_emalloc(i, sizeof(int), 0);
3972 mapelm = convmap;
3973 mapsize = 0;
3974 ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
3975 *mapelm++ = zval_get_long(hash_entry);
3976 mapsize++;
3977 } ZEND_HASH_FOREACH_END();
3978 }
3979 }
3980 if (convmap == NULL) {
3981 RETURN_FALSE;
3982 }
3983 mapsize /= 4;
3984
3985 ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
3986 if (ret != NULL) {
3987 // TODO: avoid reallocation ???
3988 RETVAL_STRINGL((char *)ret->val, ret->len);
3989 efree(ret->val);
3990 } else {
3991 RETVAL_FALSE;
3992 }
3993 efree((void *)convmap);
3994 }
3995 /* }}} */
3996
3997 /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
3998 Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)3999 PHP_FUNCTION(mb_encode_numericentity)
4000 {
4001 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
4002 }
4003 /* }}} */
4004
4005 /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
4006 Converts HTML numeric entities to character code */
PHP_FUNCTION(mb_decode_numericentity)4007 PHP_FUNCTION(mb_decode_numericentity)
4008 {
4009 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
4010 }
4011 /* }}} */
4012 /* }}} */
4013
4014 /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
4015 * Sends an email message with MIME scheme
4016 */
4017
4018 #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \
4019 if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
4020 pos += 2; \
4021 while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \
4022 pos++; \
4023 } \
4024 continue; \
4025 }
4026
4027 #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len) \
4028 pp = str; \
4029 ee = pp + len; \
4030 while ((pp = memchr(pp, '\0', (ee - pp)))) { \
4031 *pp = ' '; \
4032 } \
4033
_php_mbstr_parse_mail_headers(HashTable * ht,const char * str,size_t str_len)4034 static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
4035 {
4036 const char *ps;
4037 size_t icnt;
4038 int state = 0;
4039 int crlf_state = -1;
4040 char *token = NULL;
4041 size_t token_pos = 0;
4042 zend_string *fld_name, *fld_val;
4043
4044 ps = str;
4045 icnt = str_len;
4046 fld_name = fld_val = NULL;
4047
4048 /*
4049 * C o n t e n t - T y p e : t e x t / h t m l \r\n
4050 * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
4051 * state 0 1 2 3
4052 *
4053 * C o n t e n t - T y p e : t e x t / h t m l \r\n
4054 * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
4055 * crlf_state -1 0 1 -1
4056 *
4057 */
4058
4059 while (icnt > 0) {
4060 switch (*ps) {
4061 case ':':
4062 if (crlf_state == 1) {
4063 token_pos++;
4064 }
4065
4066 if (state == 0 || state == 1) {
4067 if(token && token_pos > 0) {
4068 fld_name = zend_string_init(token, token_pos, 0);
4069 }
4070 state = 2;
4071 } else {
4072 token_pos++;
4073 }
4074
4075 crlf_state = 0;
4076 break;
4077
4078 case '\n':
4079 if (crlf_state == -1) {
4080 goto out;
4081 }
4082 crlf_state = -1;
4083 break;
4084
4085 case '\r':
4086 if (crlf_state == 1) {
4087 token_pos++;
4088 } else {
4089 crlf_state = 1;
4090 }
4091 break;
4092
4093 case ' ': case '\t':
4094 if (crlf_state == -1) {
4095 if (state == 3) {
4096 /* continuing from the previous line */
4097 state = 4;
4098 } else {
4099 /* simply skipping this new line */
4100 state = 5;
4101 }
4102 } else {
4103 if (crlf_state == 1) {
4104 token_pos++;
4105 }
4106 if (state == 1 || state == 3) {
4107 token_pos++;
4108 }
4109 }
4110 crlf_state = 0;
4111 break;
4112
4113 default:
4114 switch (state) {
4115 case 0:
4116 token = (char*)ps;
4117 token_pos = 0;
4118 state = 1;
4119 break;
4120
4121 case 2:
4122 if (crlf_state != -1) {
4123 token = (char*)ps;
4124 token_pos = 0;
4125
4126 state = 3;
4127 break;
4128 }
4129 /* break is missing intentionally */
4130
4131 case 3:
4132 if (crlf_state == -1) {
4133 if(token && token_pos > 0) {
4134 fld_val = zend_string_init(token, token_pos, 0);
4135 }
4136
4137 if (fld_name != NULL && fld_val != NULL) {
4138 zval val;
4139 /* FIXME: some locale free implementation is
4140 * really required here,,, */
4141 php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
4142 ZVAL_STR(&val, fld_val);
4143
4144 zend_hash_update(ht, fld_name, &val);
4145
4146 zend_string_release_ex(fld_name, 0);
4147 }
4148
4149 fld_name = fld_val = NULL;
4150 token = (char*)ps;
4151 token_pos = 0;
4152
4153 state = 1;
4154 }
4155 break;
4156
4157 case 4:
4158 token_pos++;
4159 state = 3;
4160 break;
4161 }
4162
4163 if (crlf_state == 1) {
4164 token_pos++;
4165 }
4166
4167 token_pos++;
4168
4169 crlf_state = 0;
4170 break;
4171 }
4172 ps++, icnt--;
4173 }
4174 out:
4175 if (state == 2) {
4176 token = "";
4177 token_pos = 0;
4178
4179 state = 3;
4180 }
4181 if (state == 3) {
4182 if(token && token_pos > 0) {
4183 fld_val = zend_string_init(token, token_pos, 0);
4184 }
4185 if (fld_name != NULL && fld_val != NULL) {
4186 zval val;
4187 /* FIXME: some locale free implementation is
4188 * really required here,,, */
4189 php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
4190 ZVAL_STR(&val, fld_val);
4191
4192 zend_hash_update(ht, fld_name, &val);
4193
4194 zend_string_release_ex(fld_name, 0);
4195 }
4196 }
4197 return state;
4198 }
4199
PHP_FUNCTION(mb_send_mail)4200 PHP_FUNCTION(mb_send_mail)
4201 {
4202 char *to;
4203 size_t to_len;
4204 char *message;
4205 size_t message_len;
4206 char *subject;
4207 size_t subject_len;
4208 zval *headers = NULL;
4209 zend_string *extra_cmd = NULL;
4210 zend_string *str_headers = NULL, *tmp_headers;
4211 size_t n, i;
4212 char *to_r = NULL;
4213 char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
4214 struct {
4215 int cnt_type:1;
4216 int cnt_trans_enc:1;
4217 } suppressed_hdrs = { 0, 0 };
4218
4219 char *message_buf = NULL, *subject_buf = NULL, *p;
4220 mbfl_string orig_str, conv_str;
4221 mbfl_string *pstr; /* pointer to mbfl string for return value */
4222 enum mbfl_no_encoding;
4223 const mbfl_encoding *tran_cs, /* transfar text charset */
4224 *head_enc, /* header transfar encoding */
4225 *body_enc; /* body transfar encoding */
4226 mbfl_memory_device device; /* automatic allocateable buffer for additional header */
4227 const mbfl_language *lang;
4228 int err = 0;
4229 HashTable ht_headers;
4230 zval *s;
4231 extern void mbfl_memory_device_unput(mbfl_memory_device *device);
4232 char *pp, *ee;
4233
4234 /* initialize */
4235 mbfl_memory_device_init(&device, 0, 0);
4236 mbfl_string_init(&orig_str);
4237 mbfl_string_init(&conv_str);
4238
4239 /* character-set, transfer-encoding */
4240 tran_cs = &mbfl_encoding_utf8;
4241 head_enc = &mbfl_encoding_base64;
4242 body_enc = &mbfl_encoding_base64;
4243 lang = mbfl_no2language(MBSTRG(language));
4244 if (lang != NULL) {
4245 tran_cs = mbfl_no2encoding(lang->mail_charset);
4246 head_enc = mbfl_no2encoding(lang->mail_header_encoding);
4247 body_enc = mbfl_no2encoding(lang->mail_body_encoding);
4248 }
4249
4250 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|zS", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &extra_cmd) == FAILURE) {
4251 return;
4252 }
4253
4254 /* ASCIIZ check */
4255 MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
4256 MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
4257 MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
4258 if (headers) {
4259 switch(Z_TYPE_P(headers)) {
4260 case IS_STRING:
4261 tmp_headers = zend_string_init(Z_STRVAL_P(headers), Z_STRLEN_P(headers), 0);
4262 MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(tmp_headers), ZSTR_LEN(tmp_headers));
4263 str_headers = php_trim(tmp_headers, NULL, 0, 2);
4264 zend_string_release_ex(tmp_headers, 0);
4265 break;
4266 case IS_ARRAY:
4267 str_headers = php_mail_build_headers(headers);
4268 break;
4269 default:
4270 php_error_docref(NULL, E_WARNING, "headers parameter must be string or array");
4271 RETURN_FALSE;
4272 }
4273 }
4274 if (extra_cmd) {
4275 MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(extra_cmd), ZSTR_LEN(extra_cmd));
4276 }
4277
4278 zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0);
4279
4280 if (str_headers != NULL) {
4281 _php_mbstr_parse_mail_headers(&ht_headers, ZSTR_VAL(str_headers), ZSTR_LEN(str_headers));
4282 }
4283
4284 if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) {
4285 char *tmp;
4286 char *param_name;
4287 char *charset = NULL;
4288
4289 ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
4290 p = strchr(Z_STRVAL_P(s), ';');
4291
4292 if (p != NULL) {
4293 /* skipping the padded spaces */
4294 do {
4295 ++p;
4296 } while (*p == ' ' || *p == '\t');
4297
4298 if (*p != '\0') {
4299 if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
4300 if (strcasecmp(param_name, "charset") == 0) {
4301 const mbfl_encoding *_tran_cs = tran_cs;
4302
4303 charset = php_strtok_r(NULL, "= \"", &tmp);
4304 if (charset != NULL) {
4305 _tran_cs = mbfl_name2encoding(charset);
4306 }
4307
4308 if (!_tran_cs) {
4309 php_error_docref(NULL, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
4310 _tran_cs = &mbfl_encoding_ascii;
4311 }
4312 tran_cs = _tran_cs;
4313 }
4314 }
4315 }
4316 }
4317 suppressed_hdrs.cnt_type = 1;
4318 }
4319
4320 if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1))) {
4321 const mbfl_encoding *_body_enc;
4322
4323 ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
4324 _body_enc = mbfl_name2encoding(Z_STRVAL_P(s));
4325 switch (_body_enc ? _body_enc->no_encoding : mbfl_no_encoding_invalid) {
4326 case mbfl_no_encoding_base64:
4327 case mbfl_no_encoding_7bit:
4328 case mbfl_no_encoding_8bit:
4329 body_enc = _body_enc;
4330 break;
4331
4332 default:
4333 php_error_docref(NULL, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", Z_STRVAL_P(s));
4334 body_enc = &mbfl_encoding_8bit;
4335 break;
4336 }
4337 suppressed_hdrs.cnt_trans_enc = 1;
4338 }
4339
4340 /* To: */
4341 if (to_len > 0) {
4342 to_r = estrndup(to, to_len);
4343 for (; to_len; to_len--) {
4344 if (!isspace((unsigned char) to_r[to_len - 1])) {
4345 break;
4346 }
4347 to_r[to_len - 1] = '\0';
4348 }
4349 for (i = 0; to_r[i]; i++) {
4350 if (iscntrl((unsigned char) to_r[i])) {
4351 /* According to RFC 822, section 3.1.1 long headers may be separated into
4352 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
4353 * To prevent these separators from being replaced with a space, we use the
4354 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
4355 */
4356 SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
4357 to_r[i] = ' ';
4358 }
4359 }
4360 } else {
4361 to_r = to;
4362 }
4363
4364 /* Subject: */
4365 orig_str.no_language = MBSTRG(language);
4366 orig_str.val = (unsigned char *)subject;
4367 orig_str.len = subject_len;
4368 orig_str.encoding = MBSTRG(current_internal_encoding);
4369 if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid
4370 || orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
4371 orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4372 }
4373 pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4374 if (pstr != NULL) {
4375 subject_buf = subject = (char *)pstr->val;
4376 }
4377
4378 /* message body */
4379 orig_str.no_language = MBSTRG(language);
4380 orig_str.val = (unsigned char *)message;
4381 orig_str.len = message_len;
4382 orig_str.encoding = MBSTRG(current_internal_encoding);
4383
4384 if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid
4385 || orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
4386 orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4387 }
4388
4389 pstr = NULL;
4390 {
4391 mbfl_string tmpstr;
4392
4393 if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4394 tmpstr.encoding = &mbfl_encoding_8bit;
4395 pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4396 efree(tmpstr.val);
4397 }
4398 }
4399 if (pstr != NULL) {
4400 message_buf = message = (char *)pstr->val;
4401 }
4402
4403 /* other headers */
4404 #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4405 #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4406 #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4407 #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4408 if (str_headers != NULL) {
4409 p = ZSTR_VAL(str_headers);
4410 n = ZSTR_LEN(str_headers);
4411 mbfl_memory_device_strncat(&device, p, n);
4412 if (n > 0 && p[n - 1] != '\n') {
4413 mbfl_memory_device_strncat(&device, "\n", 1);
4414 }
4415 zend_string_release_ex(str_headers, 0);
4416 }
4417
4418 if (!zend_hash_str_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4419 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4420 mbfl_memory_device_strncat(&device, "\n", 1);
4421 }
4422
4423 if (!suppressed_hdrs.cnt_type) {
4424 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4425
4426 p = (char *)mbfl_no2preferred_mime_name(tran_cs->no_encoding);
4427 if (p != NULL) {
4428 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4429 mbfl_memory_device_strcat(&device, p);
4430 }
4431 mbfl_memory_device_strncat(&device, "\n", 1);
4432 }
4433 if (!suppressed_hdrs.cnt_trans_enc) {
4434 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4435 p = (char *)mbfl_no2preferred_mime_name(body_enc->no_encoding);
4436 if (p == NULL) {
4437 p = "7bit";
4438 }
4439 mbfl_memory_device_strcat(&device, p);
4440 mbfl_memory_device_strncat(&device, "\n", 1);
4441 }
4442
4443 mbfl_memory_device_unput(&device);
4444 mbfl_memory_device_output('\0', &device);
4445 str_headers = zend_string_init((char *)device.buffer, strlen((char *)device.buffer), 0);
4446
4447 if (force_extra_parameters) {
4448 extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4449 } else if (extra_cmd) {
4450 extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd));
4451 }
4452
4453 if (!err && php_mail(to_r, subject, message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
4454 RETVAL_TRUE;
4455 } else {
4456 RETVAL_FALSE;
4457 }
4458
4459 if (extra_cmd) {
4460 zend_string_release_ex(extra_cmd, 0);
4461 }
4462
4463 if (to_r != to) {
4464 efree(to_r);
4465 }
4466 if (subject_buf) {
4467 efree((void *)subject_buf);
4468 }
4469 if (message_buf) {
4470 efree((void *)message_buf);
4471 }
4472 mbfl_memory_device_clear(&device);
4473 zend_hash_destroy(&ht_headers);
4474 if (str_headers) {
4475 zend_string_release_ex(str_headers, 0);
4476 }
4477 }
4478
4479 #undef SKIP_LONG_HEADER_SEP_MBSTRING
4480 #undef MAIL_ASCIIZ_CHECK_MBSTRING
4481 #undef PHP_MBSTR_MAIL_MIME_HEADER1
4482 #undef PHP_MBSTR_MAIL_MIME_HEADER2
4483 #undef PHP_MBSTR_MAIL_MIME_HEADER3
4484 #undef PHP_MBSTR_MAIL_MIME_HEADER4
4485 /* }}} */
4486
4487 /* {{{ proto mixed mb_get_info([string type])
4488 Returns the current settings of mbstring */
PHP_FUNCTION(mb_get_info)4489 PHP_FUNCTION(mb_get_info)
4490 {
4491 char *typ = NULL;
4492 size_t typ_len;
4493 size_t n;
4494 char *name;
4495 const struct mb_overload_def *over_func;
4496 zval row1, row2;
4497 const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4498 const mbfl_encoding **entry;
4499
4500 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
4501 return;
4502 }
4503
4504 if (!typ || !strcasecmp("all", typ)) {
4505 array_init(return_value);
4506 if (MBSTRG(current_internal_encoding)) {
4507 add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name);
4508 }
4509 if (MBSTRG(http_input_identify)) {
4510 add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name);
4511 }
4512 if (MBSTRG(current_http_output_encoding)) {
4513 add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name);
4514 }
4515 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
4516 add_assoc_string(return_value, "http_output_conv_mimetypes", name);
4517 }
4518 add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4519 if (MBSTRG(func_overload)){
4520 over_func = &(mb_ovld[0]);
4521 array_init(&row1);
4522 while (over_func->type > 0) {
4523 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4524 add_assoc_string(&row1, over_func->orig_func, over_func->ovld_func);
4525 }
4526 over_func++;
4527 }
4528 add_assoc_zval(return_value, "func_overload_list", &row1);
4529 } else {
4530 add_assoc_string(return_value, "func_overload_list", "no overload");
4531 }
4532 if (lang != NULL) {
4533 if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4534 add_assoc_string(return_value, "mail_charset", name);
4535 }
4536 if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4537 add_assoc_string(return_value, "mail_header_encoding", name);
4538 }
4539 if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4540 add_assoc_string(return_value, "mail_body_encoding", name);
4541 }
4542 }
4543 add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4544 if (MBSTRG(encoding_translation)) {
4545 add_assoc_string(return_value, "encoding_translation", "On");
4546 } else {
4547 add_assoc_string(return_value, "encoding_translation", "Off");
4548 }
4549 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4550 add_assoc_string(return_value, "language", name);
4551 }
4552 n = MBSTRG(current_detect_order_list_size);
4553 entry = MBSTRG(current_detect_order_list);
4554 if (n > 0) {
4555 size_t i;
4556 array_init(&row2);
4557 for (i = 0; i < n; i++) {
4558 add_next_index_string(&row2, (*entry)->name);
4559 entry++;
4560 }
4561 add_assoc_zval(return_value, "detect_order", &row2);
4562 }
4563 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4564 add_assoc_string(return_value, "substitute_character", "none");
4565 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4566 add_assoc_string(return_value, "substitute_character", "long");
4567 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4568 add_assoc_string(return_value, "substitute_character", "entity");
4569 } else {
4570 add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4571 }
4572 if (MBSTRG(strict_detection)) {
4573 add_assoc_string(return_value, "strict_detection", "On");
4574 } else {
4575 add_assoc_string(return_value, "strict_detection", "Off");
4576 }
4577 } else if (!strcasecmp("internal_encoding", typ)) {
4578 if (MBSTRG(current_internal_encoding)) {
4579 RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name);
4580 }
4581 } else if (!strcasecmp("http_input", typ)) {
4582 if (MBSTRG(http_input_identify)) {
4583 RETVAL_STRING((char *)MBSTRG(http_input_identify)->name);
4584 }
4585 } else if (!strcasecmp("http_output", typ)) {
4586 if (MBSTRG(current_http_output_encoding)) {
4587 RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name);
4588 }
4589 } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4590 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
4591 RETVAL_STRING(name);
4592 }
4593 } else if (!strcasecmp("func_overload", typ)) {
4594 RETVAL_LONG(MBSTRG(func_overload));
4595 } else if (!strcasecmp("func_overload_list", typ)) {
4596 if (MBSTRG(func_overload)){
4597 over_func = &(mb_ovld[0]);
4598 array_init(return_value);
4599 while (over_func->type > 0) {
4600 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4601 add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func);
4602 }
4603 over_func++;
4604 }
4605 } else {
4606 RETVAL_STRING("no overload");
4607 }
4608 } else if (!strcasecmp("mail_charset", typ)) {
4609 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4610 RETVAL_STRING(name);
4611 }
4612 } else if (!strcasecmp("mail_header_encoding", typ)) {
4613 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4614 RETVAL_STRING(name);
4615 }
4616 } else if (!strcasecmp("mail_body_encoding", typ)) {
4617 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4618 RETVAL_STRING(name);
4619 }
4620 } else if (!strcasecmp("illegal_chars", typ)) {
4621 RETVAL_LONG(MBSTRG(illegalchars));
4622 } else if (!strcasecmp("encoding_translation", typ)) {
4623 if (MBSTRG(encoding_translation)) {
4624 RETVAL_STRING("On");
4625 } else {
4626 RETVAL_STRING("Off");
4627 }
4628 } else if (!strcasecmp("language", typ)) {
4629 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4630 RETVAL_STRING(name);
4631 }
4632 } else if (!strcasecmp("detect_order", typ)) {
4633 n = MBSTRG(current_detect_order_list_size);
4634 entry = MBSTRG(current_detect_order_list);
4635 if (n > 0) {
4636 size_t i;
4637 array_init(return_value);
4638 for (i = 0; i < n; i++) {
4639 add_next_index_string(return_value, (*entry)->name);
4640 entry++;
4641 }
4642 }
4643 } else if (!strcasecmp("substitute_character", typ)) {
4644 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4645 RETVAL_STRING("none");
4646 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4647 RETVAL_STRING("long");
4648 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4649 RETVAL_STRING("entity");
4650 } else {
4651 RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4652 }
4653 } else if (!strcasecmp("strict_detection", typ)) {
4654 if (MBSTRG(strict_detection)) {
4655 RETVAL_STRING("On");
4656 } else {
4657 RETVAL_STRING("Off");
4658 }
4659 } else {
4660 RETURN_FALSE;
4661 }
4662 }
4663 /* }}} */
4664
4665
php_mb_init_convd(const mbfl_encoding * encoding)4666 static inline mbfl_buffer_converter *php_mb_init_convd(const mbfl_encoding *encoding)
4667 {
4668 mbfl_buffer_converter *convd;
4669
4670 convd = mbfl_buffer_converter_new(encoding, encoding, 0);
4671 if (convd == NULL) {
4672 return NULL;
4673 }
4674 mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4675 mbfl_buffer_converter_illegal_substchar(convd, 0);
4676 return convd;
4677 }
4678
4679
php_mb_check_encoding_impl(mbfl_buffer_converter * convd,const char * input,size_t length,const mbfl_encoding * encoding)4680 static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const char *input, size_t length, const mbfl_encoding *encoding) {
4681 mbfl_string string, result, *ret = NULL;
4682 size_t illegalchars = 0;
4683
4684 /* initialize string */
4685 mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding);
4686 mbfl_string_init(&result);
4687
4688 string.val = (unsigned char *) input;
4689 string.len = length;
4690
4691 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4692 illegalchars = mbfl_buffer_illegalchars(convd);
4693
4694 if (ret != NULL) {
4695 if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4696 mbfl_string_clear(&result);
4697 return 1;
4698 }
4699 mbfl_string_clear(&result);
4700 }
4701 return 0;
4702 }
4703
4704
php_mb_check_encoding(const char * input,size_t length,const char * enc)4705 MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc)
4706 {
4707 const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4708 mbfl_buffer_converter *convd;
4709
4710 if (input == NULL) {
4711 return MBSTRG(illegalchars) == 0;
4712 }
4713
4714 if (enc != NULL) {
4715 encoding = mbfl_name2encoding(enc);
4716 if (!encoding || encoding == &mbfl_encoding_pass) {
4717 php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", enc);
4718 return 0;
4719 }
4720 }
4721
4722 convd = php_mb_init_convd(encoding);
4723 if (convd == NULL) {
4724 php_error_docref(NULL, E_WARNING, "Unable to create converter");
4725 return 0;
4726 }
4727
4728 if (php_mb_check_encoding_impl(convd, input, length, encoding)) {
4729 mbfl_buffer_converter_delete(convd);
4730 return 1;
4731 }
4732 mbfl_buffer_converter_delete(convd);
4733 return 0;
4734 }
4735
4736
php_mb_check_encoding_recursive(HashTable * vars,const zend_string * enc)4737 MBSTRING_API int php_mb_check_encoding_recursive(HashTable *vars, const zend_string *enc)
4738 {
4739 const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4740 mbfl_buffer_converter *convd;
4741 zend_long idx;
4742 zend_string *key;
4743 zval *entry;
4744 int valid = 1;
4745
4746 (void)(idx);
4747
4748 if (enc != NULL) {
4749 encoding = mbfl_name2encoding(ZSTR_VAL(enc));
4750 if (!encoding || encoding == &mbfl_encoding_pass) {
4751 php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", ZSTR_VAL(enc));
4752 return 0;
4753 }
4754 }
4755
4756 convd = php_mb_init_convd(encoding);
4757 if (convd == NULL) {
4758 php_error_docref(NULL, E_WARNING, "Unable to create converter");
4759 return 0;
4760 }
4761
4762 if (GC_IS_RECURSIVE(vars)) {
4763 mbfl_buffer_converter_delete(convd);
4764 php_error_docref(NULL, E_WARNING, "Cannot not handle circular references");
4765 return 0;
4766 }
4767 GC_TRY_PROTECT_RECURSION(vars);
4768 ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) {
4769 ZVAL_DEREF(entry);
4770 if (key) {
4771 if (!php_mb_check_encoding_impl(convd, ZSTR_VAL(key), ZSTR_LEN(key), encoding)) {
4772 valid = 0;
4773 break;
4774 }
4775 }
4776 switch (Z_TYPE_P(entry)) {
4777 case IS_STRING:
4778 if (!php_mb_check_encoding_impl(convd, Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) {
4779 valid = 0;
4780 break;
4781 }
4782 break;
4783 case IS_ARRAY:
4784 if (!php_mb_check_encoding_recursive(HASH_OF(entry), enc)) {
4785 valid = 0;
4786 break;
4787 }
4788 break;
4789 case IS_LONG:
4790 case IS_DOUBLE:
4791 case IS_NULL:
4792 case IS_TRUE:
4793 case IS_FALSE:
4794 break;
4795 default:
4796 /* Other types are error. */
4797 valid = 0;
4798 break;
4799 }
4800 } ZEND_HASH_FOREACH_END();
4801 GC_TRY_UNPROTECT_RECURSION(vars);
4802 mbfl_buffer_converter_delete(convd);
4803 return valid;
4804 }
4805
4806
4807 /* {{{ proto bool mb_check_encoding([mixed var[, string encoding]])
4808 Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)4809 PHP_FUNCTION(mb_check_encoding)
4810 {
4811 zval *input = NULL;
4812 zend_string *enc = NULL;
4813
4814 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|zS", &input, &enc) == FAILURE) {
4815 return;
4816 }
4817
4818 /* FIXME: Actually check all inputs, except $_FILES file content. */
4819 if (input == NULL) {
4820 if (MBSTRG(illegalchars) == 0) {
4821 RETURN_TRUE;
4822 }
4823 RETURN_FALSE;
4824 }
4825
4826 if (Z_TYPE_P(input) == IS_ARRAY) {
4827 if (!php_mb_check_encoding_recursive(HASH_OF(input), enc)) {
4828 RETURN_FALSE;
4829 }
4830 } else {
4831 convert_to_string(input);
4832 if (!php_mb_check_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), enc ? ZSTR_VAL(enc): NULL)) {
4833 RETURN_FALSE;
4834 }
4835 }
4836 RETURN_TRUE;
4837 }
4838 /* }}} */
4839
4840
php_mb_ord(const char * str,size_t str_len,const char * enc_name)4841 static inline zend_long php_mb_ord(const char* str, size_t str_len, const char* enc_name)
4842 {
4843 const mbfl_encoding *enc;
4844 enum mbfl_no_encoding no_enc;
4845
4846 enc = php_mb_get_encoding(enc_name);
4847 if (!enc) {
4848 return -1;
4849 }
4850
4851 no_enc = enc->no_encoding;
4852 if (php_mb_is_unsupported_no_encoding(no_enc)) {
4853 php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc->name);
4854 return -1;
4855 }
4856
4857 if (str_len == 0) {
4858 php_error_docref(NULL, E_WARNING, "Empty string");
4859 return -1;
4860 }
4861
4862 {
4863 mbfl_wchar_device dev;
4864 mbfl_convert_filter *filter;
4865 zend_long cp;
4866
4867 mbfl_wchar_device_init(&dev);
4868 filter = mbfl_convert_filter_new(
4869 enc, &mbfl_encoding_wchar,
4870 mbfl_wchar_device_output, 0, &dev);
4871 if (!filter) {
4872 php_error_docref(NULL, E_WARNING, "Creation of filter failed");
4873 return -1;
4874 }
4875
4876 mbfl_convert_filter_feed_string(filter, (const unsigned char *) str, str_len);
4877 mbfl_convert_filter_flush(filter);
4878
4879 if (dev.pos < 1 || filter->num_illegalchar || dev.buffer[0] >= MBFL_WCSGROUP_UCS4MAX) {
4880 mbfl_convert_filter_delete(filter);
4881 mbfl_wchar_device_clear(&dev);
4882 return -1;
4883 }
4884
4885 cp = dev.buffer[0];
4886 mbfl_convert_filter_delete(filter);
4887 mbfl_wchar_device_clear(&dev);
4888 return cp;
4889 }
4890 }
4891
4892
4893 /* {{{ proto int|false mb_ord([string str[, string encoding]]) */
PHP_FUNCTION(mb_ord)4894 PHP_FUNCTION(mb_ord)
4895 {
4896 char* str;
4897 size_t str_len;
4898 char* enc = NULL;
4899 size_t enc_len;
4900 zend_long cp;
4901
4902 ZEND_PARSE_PARAMETERS_START(1, 2)
4903 Z_PARAM_STRING(str, str_len)
4904 Z_PARAM_OPTIONAL
4905 Z_PARAM_STRING(enc, enc_len)
4906 ZEND_PARSE_PARAMETERS_END();
4907
4908 cp = php_mb_ord(str, str_len, enc);
4909
4910 if (0 > cp) {
4911 RETURN_FALSE;
4912 }
4913
4914 RETURN_LONG(cp);
4915 }
4916 /* }}} */
4917
4918
php_mb_chr(zend_long cp,const char * enc_name)4919 static inline zend_string *php_mb_chr(zend_long cp, const char *enc_name)
4920 {
4921 const mbfl_encoding *enc;
4922 enum mbfl_no_encoding no_enc;
4923 zend_string *ret;
4924 char* buf;
4925 size_t buf_len;
4926
4927 enc = php_mb_get_encoding(enc_name);
4928 if (!enc) {
4929 return NULL;
4930 }
4931
4932 no_enc = enc->no_encoding;
4933 if (php_mb_is_unsupported_no_encoding(no_enc)) {
4934 php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc->name);
4935 return NULL;
4936 }
4937
4938 if (cp < 0 || cp > 0x10ffff) {
4939 return NULL;
4940 }
4941
4942 if (php_mb_is_no_encoding_utf8(no_enc)) {
4943 if (cp > 0xd7ff && 0xe000 > cp) {
4944 return NULL;
4945 }
4946
4947 if (cp < 0x80) {
4948 ret = ZSTR_CHAR(cp);
4949 } else if (cp < 0x800) {
4950 ret = zend_string_alloc(2, 0);
4951 ZSTR_VAL(ret)[0] = 0xc0 | (cp >> 6);
4952 ZSTR_VAL(ret)[1] = 0x80 | (cp & 0x3f);
4953 ZSTR_VAL(ret)[2] = 0;
4954 } else if (cp < 0x10000) {
4955 ret = zend_string_alloc(3, 0);
4956 ZSTR_VAL(ret)[0] = 0xe0 | (cp >> 12);
4957 ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 6) & 0x3f);
4958 ZSTR_VAL(ret)[2] = 0x80 | (cp & 0x3f);
4959 ZSTR_VAL(ret)[3] = 0;
4960 } else {
4961 ret = zend_string_alloc(4, 0);
4962 ZSTR_VAL(ret)[0] = 0xf0 | (cp >> 18);
4963 ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 12) & 0x3f);
4964 ZSTR_VAL(ret)[2] = 0x80 | ((cp >> 6) & 0x3f);
4965 ZSTR_VAL(ret)[3] = 0x80 | (cp & 0x3f);
4966 ZSTR_VAL(ret)[4] = 0;
4967 }
4968
4969 return ret;
4970 }
4971
4972 buf_len = 4;
4973 buf = (char *) emalloc(buf_len + 1);
4974 buf[0] = (cp >> 24) & 0xff;
4975 buf[1] = (cp >> 16) & 0xff;
4976 buf[2] = (cp >> 8) & 0xff;
4977 buf[3] = cp & 0xff;
4978 buf[4] = 0;
4979
4980 {
4981 char *ret_str;
4982 size_t ret_len;
4983 long orig_illegalchars = MBSTRG(illegalchars);
4984 MBSTRG(illegalchars) = 0;
4985 ret_str = php_mb_convert_encoding_ex(buf, buf_len, enc, &mbfl_encoding_ucs4be, &ret_len);
4986 if (MBSTRG(illegalchars) != 0) {
4987 efree(buf);
4988 efree(ret_str);
4989 MBSTRG(illegalchars) = orig_illegalchars;
4990 return NULL;
4991 }
4992
4993 ret = zend_string_init(ret_str, ret_len, 0);
4994 efree(ret_str);
4995 MBSTRG(illegalchars) = orig_illegalchars;
4996 }
4997
4998 efree(buf);
4999 return ret;
5000 }
5001
5002
5003 /* {{{ proto string|false mb_chr([int cp[, string encoding]]) */
PHP_FUNCTION(mb_chr)5004 PHP_FUNCTION(mb_chr)
5005 {
5006 zend_long cp;
5007 char* enc = NULL;
5008 size_t enc_len;
5009 zend_string* ret;
5010
5011 ZEND_PARSE_PARAMETERS_START(1, 2)
5012 Z_PARAM_LONG(cp)
5013 Z_PARAM_OPTIONAL
5014 Z_PARAM_STRING(enc, enc_len)
5015 ZEND_PARSE_PARAMETERS_END();
5016
5017 ret = php_mb_chr(cp, enc);
5018 if (ret == NULL) {
5019 RETURN_FALSE;
5020 }
5021
5022 RETURN_STR(ret);
5023 }
5024 /* }}} */
5025
5026
php_mb_scrub(const char * str,size_t str_len,const mbfl_encoding * enc,size_t * ret_len)5027 static inline char* php_mb_scrub(const char* str, size_t str_len, const mbfl_encoding *enc, size_t *ret_len)
5028 {
5029 return php_mb_convert_encoding_ex(str, str_len, enc, enc, ret_len);
5030 }
5031
5032
5033 /* {{{ proto string|false mb_scrub([string str[, string encoding]]) */
PHP_FUNCTION(mb_scrub)5034 PHP_FUNCTION(mb_scrub)
5035 {
5036 const mbfl_encoding *enc;
5037 char* str;
5038 size_t str_len;
5039 char *enc_name = NULL;
5040 size_t enc_name_len;
5041 char *ret;
5042 size_t ret_len;
5043
5044 ZEND_PARSE_PARAMETERS_START(1, 2)
5045 Z_PARAM_STRING(str, str_len)
5046 Z_PARAM_OPTIONAL
5047 Z_PARAM_STRING(enc_name, enc_name_len)
5048 ZEND_PARSE_PARAMETERS_END();
5049
5050 enc = php_mb_get_encoding(enc_name);
5051 if (!enc) {
5052 RETURN_FALSE;
5053 }
5054
5055 ret = php_mb_scrub(str, str_len, enc, &ret_len);
5056
5057 if (ret == NULL) {
5058 RETURN_FALSE;
5059 }
5060
5061 RETVAL_STRINGL(ret, ret_len);
5062 efree(ret);
5063 }
5064 /* }}} */
5065
5066
5067 /* {{{ php_mb_populate_current_detect_order_list */
php_mb_populate_current_detect_order_list(void)5068 static void php_mb_populate_current_detect_order_list(void)
5069 {
5070 const mbfl_encoding **entry = 0;
5071 size_t nentries;
5072
5073 if (MBSTRG(current_detect_order_list)) {
5074 return;
5075 }
5076
5077 if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
5078 nentries = MBSTRG(detect_order_list_size);
5079 entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
5080 memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
5081 } else {
5082 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
5083 size_t i;
5084 nentries = MBSTRG(default_detect_order_list_size);
5085 entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
5086 for (i = 0; i < nentries; i++) {
5087 entry[i] = mbfl_no2encoding(src[i]);
5088 }
5089 }
5090 MBSTRG(current_detect_order_list) = entry;
5091 MBSTRG(current_detect_order_list_size) = nentries;
5092 }
5093 /* }}} */
5094
5095 /* {{{ static int php_mb_encoding_translation() */
php_mb_encoding_translation(void)5096 static int php_mb_encoding_translation(void)
5097 {
5098 return MBSTRG(encoding_translation);
5099 }
5100 /* }}} */
5101
5102 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
php_mb_mbchar_bytes_ex(const char * s,const mbfl_encoding * enc)5103 MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
5104 {
5105 if (enc != NULL) {
5106 if (enc->flag & MBFL_ENCTYPE_MBCS) {
5107 if (enc->mblen_table != NULL) {
5108 if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
5109 }
5110 } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
5111 return 2;
5112 } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
5113 return 4;
5114 }
5115 }
5116 return 1;
5117 }
5118 /* }}} */
5119
5120 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
php_mb_mbchar_bytes(const char * s)5121 MBSTRING_API size_t php_mb_mbchar_bytes(const char *s)
5122 {
5123 return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
5124 }
5125 /* }}} */
5126
5127 /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
php_mb_safe_strrchr_ex(const char * s,unsigned int c,size_t nbytes,const mbfl_encoding * enc)5128 MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
5129 {
5130 register const char *p = s;
5131 char *last=NULL;
5132
5133 if (nbytes == (size_t)-1) {
5134 size_t nb = 0;
5135
5136 while (*p != '\0') {
5137 if (nb == 0) {
5138 if ((unsigned char)*p == (unsigned char)c) {
5139 last = (char *)p;
5140 }
5141 nb = php_mb_mbchar_bytes_ex(p, enc);
5142 if (nb == 0) {
5143 return NULL; /* something is going wrong! */
5144 }
5145 }
5146 --nb;
5147 ++p;
5148 }
5149 } else {
5150 register size_t bcnt = nbytes;
5151 register size_t nbytes_char;
5152 while (bcnt > 0) {
5153 if ((unsigned char)*p == (unsigned char)c) {
5154 last = (char *)p;
5155 }
5156 nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
5157 if (bcnt < nbytes_char) {
5158 return NULL;
5159 }
5160 p += nbytes_char;
5161 bcnt -= nbytes_char;
5162 }
5163 }
5164 return last;
5165 }
5166 /* }}} */
5167
5168 /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
php_mb_safe_strrchr(const char * s,unsigned int c,size_t nbytes)5169 MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes)
5170 {
5171 return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
5172 }
5173 /* }}} */
5174
5175 /* {{{ MBSTRING_API int php_mb_stripos()
5176 */
php_mb_stripos(int mode,const char * old_haystack,size_t old_haystack_len,const char * old_needle,size_t old_needle_len,zend_long offset,const char * from_encoding)5177 MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t old_haystack_len, const char *old_needle, size_t old_needle_len, zend_long offset, const char *from_encoding)
5178 {
5179 size_t n = (size_t) -1;
5180 mbfl_string haystack, needle;
5181 const mbfl_encoding *enc;
5182
5183 enc = php_mb_get_encoding(from_encoding);
5184 if (!enc) {
5185 return (size_t) -1;
5186 }
5187
5188 mbfl_string_init(&haystack);
5189 mbfl_string_init(&needle);
5190 haystack.no_language = MBSTRG(language);
5191 haystack.encoding = enc;
5192 needle.no_language = MBSTRG(language);
5193 needle.encoding = enc;
5194
5195 do {
5196 /* We're using simple case-folding here, because we'd have to deal with remapping of
5197 * offsets otherwise. */
5198
5199 size_t len = 0;
5200 haystack.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_haystack, old_haystack_len, &len, enc);
5201 haystack.len = len;
5202
5203 if (!haystack.val) {
5204 break;
5205 }
5206
5207 if (haystack.len == 0) {
5208 break;
5209 }
5210
5211 needle.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_needle, old_needle_len, &len, enc);
5212 needle.len = len;
5213
5214 if (!needle.val) {
5215 break;
5216 }
5217
5218 if (needle.len == 0) {
5219 break;
5220 }
5221
5222 if (offset != 0) {
5223 size_t haystack_char_len = mbfl_strlen(&haystack);
5224
5225 if (mode) {
5226 if ((offset > 0 && (size_t)offset > haystack_char_len) ||
5227 (offset < 0 && (size_t)(-offset) > haystack_char_len)) {
5228 php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
5229 break;
5230 }
5231 } else {
5232 if (offset < 0) {
5233 offset += (zend_long)haystack_char_len;
5234 }
5235 if (offset < 0 || (size_t)offset > haystack_char_len) {
5236 php_error_docref(NULL, E_WARNING, "Offset not contained in string");
5237 break;
5238 }
5239 }
5240 }
5241
5242 n = mbfl_strpos(&haystack, &needle, offset, mode);
5243 } while(0);
5244
5245 if (haystack.val) {
5246 efree(haystack.val);
5247 }
5248
5249 if (needle.val) {
5250 efree(needle.val);
5251 }
5252
5253 return n;
5254 }
5255 /* }}} */
5256
php_mb_gpc_get_detect_order(const zend_encoding *** list,size_t * list_size)5257 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size) /* {{{ */
5258 {
5259 *list = (const zend_encoding **)MBSTRG(http_input_list);
5260 *list_size = MBSTRG(http_input_list_size);
5261 }
5262 /* }}} */
5263
php_mb_gpc_set_input_encoding(const zend_encoding * encoding)5264 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding) /* {{{ */
5265 {
5266 MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
5267 }
5268 /* }}} */
5269
5270 #endif /* HAVE_MBSTRING */
5271
5272 /*
5273 * Local variables:
5274 * tab-width: 4
5275 * c-basic-offset: 4
5276 * End:
5277 * vim600: fdm=marker
5278 * vim: noet sw=4 ts=4
5279 */
5280