1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2017 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
16 | Rui Hirokawa <hirokawa@php.net> |
17 +----------------------------------------------------------------------+
18 */
19
20 /* $Id$ */
21
22 /*
23 * PHP 4 Multibyte String module "mbstring"
24 *
25 * History:
26 * 2000.5.19 Release php-4.0RC2_jstring-1.0
27 * 2001.4.1 Release php4_jstring-1.0.91
28 * 2001.4.30 Release php4_jstring-1.1 (contribute to The PHP Group)
29 * 2001.5.1 Renamed from jstring to mbstring (hirokawa@php.net)
30 */
31
32 /*
33 * PHP3 Internationalization support program.
34 *
35 * Copyright (c) 1999,2000 by the PHP3 internationalization team.
36 * All rights reserved.
37 *
38 * See README_PHP3-i18n-ja for more detail.
39 *
40 * Authors:
41 * Hironori Sato <satoh@jpnnet.com>
42 * Shigeru Kanemoto <sgk@happysize.co.jp>
43 * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
44 * Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
45 */
46
47 /* {{{ includes */
48 #ifdef HAVE_CONFIG_H
49 #include "config.h"
50 #endif
51
52 #include "php.h"
53 #include "php_ini.h"
54 #include "php_variables.h"
55 #include "mbstring.h"
56 #include "ext/standard/php_string.h"
57 #include "ext/standard/php_mail.h"
58 #include "ext/standard/exec.h"
59 #include "ext/standard/url.h"
60 #include "main/php_output.h"
61 #include "ext/standard/info.h"
62
63 #include "libmbfl/mbfl/mbfl_allocators.h"
64 #include "libmbfl/mbfl/mbfilter_pass.h"
65
66 #include "php_variables.h"
67 #include "php_globals.h"
68 #include "rfc1867.h"
69 #include "php_content_types.h"
70 #include "SAPI.h"
71 #include "php_unicode.h"
72 #include "TSRM.h"
73
74 #include "mb_gpc.h"
75
76 #if HAVE_MBREGEX
77 #include "php_mbregex.h"
78 #endif
79
80 #include "zend_multibyte.h"
81
82 #if HAVE_ONIG
83 #include "php_onig_compat.h"
84 #include <oniguruma.h>
85 #undef UChar
86 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
87 #include "ext/pcre/php_pcre.h"
88 #endif
89 /* }}} */
90
91 #if HAVE_MBSTRING
92
93 /* {{{ prototypes */
94 ZEND_DECLARE_MODULE_GLOBALS(mbstring)
95
96 static PHP_GINIT_FUNCTION(mbstring);
97 static PHP_GSHUTDOWN_FUNCTION(mbstring);
98
99 static void php_mb_populate_current_detect_order_list(void);
100
101 static int php_mb_encoding_translation(void);
102
103 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size);
104
105 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
106
107 /* }}} */
108
109 /* {{{ php_mb_default_identify_list */
110 typedef struct _php_mb_nls_ident_list {
111 enum mbfl_no_language lang;
112 const enum mbfl_no_encoding *list;
113 size_t list_size;
114 } php_mb_nls_ident_list;
115
116 static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
117 mbfl_no_encoding_ascii,
118 mbfl_no_encoding_jis,
119 mbfl_no_encoding_utf8,
120 mbfl_no_encoding_euc_jp,
121 mbfl_no_encoding_sjis
122 };
123
124 static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
125 mbfl_no_encoding_ascii,
126 mbfl_no_encoding_utf8,
127 mbfl_no_encoding_euc_cn,
128 mbfl_no_encoding_cp936
129 };
130
131 static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
132 mbfl_no_encoding_ascii,
133 mbfl_no_encoding_utf8,
134 mbfl_no_encoding_euc_tw,
135 mbfl_no_encoding_big5
136 };
137
138 static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
139 mbfl_no_encoding_ascii,
140 mbfl_no_encoding_utf8,
141 mbfl_no_encoding_euc_kr,
142 mbfl_no_encoding_uhc
143 };
144
145 static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
146 mbfl_no_encoding_ascii,
147 mbfl_no_encoding_utf8,
148 mbfl_no_encoding_koi8r,
149 mbfl_no_encoding_cp1251,
150 mbfl_no_encoding_cp866
151 };
152
153 static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
154 mbfl_no_encoding_ascii,
155 mbfl_no_encoding_utf8,
156 mbfl_no_encoding_armscii8
157 };
158
159 static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
160 mbfl_no_encoding_ascii,
161 mbfl_no_encoding_utf8,
162 mbfl_no_encoding_cp1254,
163 mbfl_no_encoding_8859_9
164 };
165
166 static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
167 mbfl_no_encoding_ascii,
168 mbfl_no_encoding_utf8,
169 mbfl_no_encoding_koi8u
170 };
171
172 static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
173 mbfl_no_encoding_ascii,
174 mbfl_no_encoding_utf8
175 };
176
177
178 static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
179 { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
180 { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
181 { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
182 { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
183 { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
184 { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
185 { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
186 { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
187 { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
188 };
189
190 /* }}} */
191
192 /* {{{ mb_overload_def mb_ovld[] */
193 static const struct mb_overload_def mb_ovld[] = {
194 {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
195 {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
196 {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
197 {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
198 {MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
199 {MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
200 {MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
201 {MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
202 {MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
203 {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
204 {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
205 {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
206 {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
207 {0, NULL, NULL, NULL}
208 };
209 /* }}} */
210
211 /* {{{ arginfo */
212 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
213 ZEND_ARG_INFO(0, language)
214 ZEND_END_ARG_INFO()
215
216 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
217 ZEND_ARG_INFO(0, encoding)
218 ZEND_END_ARG_INFO()
219
220 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
221 ZEND_ARG_INFO(0, type)
222 ZEND_END_ARG_INFO()
223
224 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
225 ZEND_ARG_INFO(0, encoding)
226 ZEND_END_ARG_INFO()
227
228 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
229 ZEND_ARG_INFO(0, encoding)
230 ZEND_END_ARG_INFO()
231
232 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
233 ZEND_ARG_INFO(0, substchar)
234 ZEND_END_ARG_INFO()
235
236 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
237 ZEND_ARG_INFO(0, encoding)
238 ZEND_END_ARG_INFO()
239
240 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
241 ZEND_ARG_INFO(0, encoded_string)
242 ZEND_ARG_INFO(1, result)
243 ZEND_END_ARG_INFO()
244
245 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
246 ZEND_ARG_INFO(0, contents)
247 ZEND_ARG_INFO(0, status)
248 ZEND_END_ARG_INFO()
249
250 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
251 ZEND_ARG_INFO(0, str)
252 ZEND_ARG_INFO(0, encoding)
253 ZEND_END_ARG_INFO()
254
255 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
256 ZEND_ARG_INFO(0, haystack)
257 ZEND_ARG_INFO(0, needle)
258 ZEND_ARG_INFO(0, offset)
259 ZEND_ARG_INFO(0, encoding)
260 ZEND_END_ARG_INFO()
261
262 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
263 ZEND_ARG_INFO(0, haystack)
264 ZEND_ARG_INFO(0, needle)
265 ZEND_ARG_INFO(0, offset)
266 ZEND_ARG_INFO(0, encoding)
267 ZEND_END_ARG_INFO()
268
269 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
270 ZEND_ARG_INFO(0, haystack)
271 ZEND_ARG_INFO(0, needle)
272 ZEND_ARG_INFO(0, offset)
273 ZEND_ARG_INFO(0, encoding)
274 ZEND_END_ARG_INFO()
275
276 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
277 ZEND_ARG_INFO(0, haystack)
278 ZEND_ARG_INFO(0, needle)
279 ZEND_ARG_INFO(0, offset)
280 ZEND_ARG_INFO(0, encoding)
281 ZEND_END_ARG_INFO()
282
283 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
284 ZEND_ARG_INFO(0, haystack)
285 ZEND_ARG_INFO(0, needle)
286 ZEND_ARG_INFO(0, part)
287 ZEND_ARG_INFO(0, encoding)
288 ZEND_END_ARG_INFO()
289
290 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
291 ZEND_ARG_INFO(0, haystack)
292 ZEND_ARG_INFO(0, needle)
293 ZEND_ARG_INFO(0, part)
294 ZEND_ARG_INFO(0, encoding)
295 ZEND_END_ARG_INFO()
296
297 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
298 ZEND_ARG_INFO(0, haystack)
299 ZEND_ARG_INFO(0, needle)
300 ZEND_ARG_INFO(0, part)
301 ZEND_ARG_INFO(0, encoding)
302 ZEND_END_ARG_INFO()
303
304 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
305 ZEND_ARG_INFO(0, haystack)
306 ZEND_ARG_INFO(0, needle)
307 ZEND_ARG_INFO(0, part)
308 ZEND_ARG_INFO(0, encoding)
309 ZEND_END_ARG_INFO()
310
311 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
312 ZEND_ARG_INFO(0, haystack)
313 ZEND_ARG_INFO(0, needle)
314 ZEND_ARG_INFO(0, encoding)
315 ZEND_END_ARG_INFO()
316
317 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
318 ZEND_ARG_INFO(0, str)
319 ZEND_ARG_INFO(0, start)
320 ZEND_ARG_INFO(0, length)
321 ZEND_ARG_INFO(0, encoding)
322 ZEND_END_ARG_INFO()
323
324 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
325 ZEND_ARG_INFO(0, str)
326 ZEND_ARG_INFO(0, start)
327 ZEND_ARG_INFO(0, length)
328 ZEND_ARG_INFO(0, encoding)
329 ZEND_END_ARG_INFO()
330
331 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
332 ZEND_ARG_INFO(0, str)
333 ZEND_ARG_INFO(0, encoding)
334 ZEND_END_ARG_INFO()
335
336 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
337 ZEND_ARG_INFO(0, str)
338 ZEND_ARG_INFO(0, start)
339 ZEND_ARG_INFO(0, width)
340 ZEND_ARG_INFO(0, trimmarker)
341 ZEND_ARG_INFO(0, encoding)
342 ZEND_END_ARG_INFO()
343
344 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
345 ZEND_ARG_INFO(0, str)
346 ZEND_ARG_INFO(0, to)
347 ZEND_ARG_INFO(0, from)
348 ZEND_END_ARG_INFO()
349
350 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
351 ZEND_ARG_INFO(0, sourcestring)
352 ZEND_ARG_INFO(0, mode)
353 ZEND_ARG_INFO(0, encoding)
354 ZEND_END_ARG_INFO()
355
356 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
357 ZEND_ARG_INFO(0, sourcestring)
358 ZEND_ARG_INFO(0, encoding)
359 ZEND_END_ARG_INFO()
360
361 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
362 ZEND_ARG_INFO(0, sourcestring)
363 ZEND_ARG_INFO(0, encoding)
364 ZEND_END_ARG_INFO()
365
366 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
367 ZEND_ARG_INFO(0, str)
368 ZEND_ARG_INFO(0, encoding_list)
369 ZEND_ARG_INFO(0, strict)
370 ZEND_END_ARG_INFO()
371
372 ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
373 ZEND_END_ARG_INFO()
374
375 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
376 ZEND_ARG_INFO(0, encoding)
377 ZEND_END_ARG_INFO()
378
379 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
380 ZEND_ARG_INFO(0, str)
381 ZEND_ARG_INFO(0, charset)
382 ZEND_ARG_INFO(0, transfer)
383 ZEND_ARG_INFO(0, linefeed)
384 ZEND_ARG_INFO(0, indent)
385 ZEND_END_ARG_INFO()
386
387 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
388 ZEND_ARG_INFO(0, string)
389 ZEND_END_ARG_INFO()
390
391 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
392 ZEND_ARG_INFO(0, str)
393 ZEND_ARG_INFO(0, option)
394 ZEND_ARG_INFO(0, encoding)
395 ZEND_END_ARG_INFO()
396
397 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 0, 0, 3)
398 ZEND_ARG_INFO(0, to)
399 ZEND_ARG_INFO(0, from)
400 ZEND_ARG_VARIADIC_INFO(1, vars)
401 ZEND_END_ARG_INFO()
402
403 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
404 ZEND_ARG_INFO(0, string)
405 ZEND_ARG_INFO(0, convmap)
406 ZEND_ARG_INFO(0, encoding)
407 ZEND_ARG_INFO(0, is_hex)
408 ZEND_END_ARG_INFO()
409
410 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
411 ZEND_ARG_INFO(0, string)
412 ZEND_ARG_INFO(0, convmap)
413 ZEND_ARG_INFO(0, encoding)
414 ZEND_END_ARG_INFO()
415
416 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
417 ZEND_ARG_INFO(0, to)
418 ZEND_ARG_INFO(0, subject)
419 ZEND_ARG_INFO(0, message)
420 ZEND_ARG_INFO(0, additional_headers)
421 ZEND_ARG_INFO(0, additional_parameters)
422 ZEND_END_ARG_INFO()
423
424 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
425 ZEND_ARG_INFO(0, type)
426 ZEND_END_ARG_INFO()
427
428 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
429 ZEND_ARG_INFO(0, var)
430 ZEND_ARG_INFO(0, encoding)
431 ZEND_END_ARG_INFO()
432
433 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
434 ZEND_ARG_INFO(0, encoding)
435 ZEND_END_ARG_INFO()
436
437 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
438 ZEND_ARG_INFO(0, pattern)
439 ZEND_ARG_INFO(0, string)
440 ZEND_ARG_INFO(1, registers)
441 ZEND_END_ARG_INFO()
442
443 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
444 ZEND_ARG_INFO(0, pattern)
445 ZEND_ARG_INFO(0, string)
446 ZEND_ARG_INFO(1, registers)
447 ZEND_END_ARG_INFO()
448
449 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
450 ZEND_ARG_INFO(0, pattern)
451 ZEND_ARG_INFO(0, replacement)
452 ZEND_ARG_INFO(0, string)
453 ZEND_ARG_INFO(0, option)
454 ZEND_END_ARG_INFO()
455
456 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
457 ZEND_ARG_INFO(0, pattern)
458 ZEND_ARG_INFO(0, replacement)
459 ZEND_ARG_INFO(0, string)
460 ZEND_ARG_INFO(0, option)
461 ZEND_END_ARG_INFO()
462
463 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3)
464 ZEND_ARG_INFO(0, pattern)
465 ZEND_ARG_INFO(0, callback)
466 ZEND_ARG_INFO(0, string)
467 ZEND_ARG_INFO(0, option)
468 ZEND_END_ARG_INFO()
469
470 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
471 ZEND_ARG_INFO(0, pattern)
472 ZEND_ARG_INFO(0, string)
473 ZEND_ARG_INFO(0, limit)
474 ZEND_END_ARG_INFO()
475
476 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
477 ZEND_ARG_INFO(0, pattern)
478 ZEND_ARG_INFO(0, string)
479 ZEND_ARG_INFO(0, option)
480 ZEND_END_ARG_INFO()
481
482 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
483 ZEND_ARG_INFO(0, pattern)
484 ZEND_ARG_INFO(0, option)
485 ZEND_END_ARG_INFO()
486
487 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
488 ZEND_ARG_INFO(0, pattern)
489 ZEND_ARG_INFO(0, option)
490 ZEND_END_ARG_INFO()
491
492 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
493 ZEND_ARG_INFO(0, pattern)
494 ZEND_ARG_INFO(0, option)
495 ZEND_END_ARG_INFO()
496
497 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
498 ZEND_ARG_INFO(0, string)
499 ZEND_ARG_INFO(0, pattern)
500 ZEND_ARG_INFO(0, option)
501 ZEND_END_ARG_INFO()
502
503 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
504 ZEND_END_ARG_INFO()
505
506 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
507 ZEND_END_ARG_INFO()
508
509 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
510 ZEND_ARG_INFO(0, position)
511 ZEND_END_ARG_INFO()
512
513 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
514 ZEND_ARG_INFO(0, options)
515 ZEND_END_ARG_INFO()
516 /* }}} */
517
518 /* {{{ zend_function_entry mbstring_functions[] */
519 const zend_function_entry mbstring_functions[] = {
520 PHP_FE(mb_convert_case, arginfo_mb_convert_case)
521 PHP_FE(mb_strtoupper, arginfo_mb_strtoupper)
522 PHP_FE(mb_strtolower, arginfo_mb_strtolower)
523 PHP_FE(mb_language, arginfo_mb_language)
524 PHP_FE(mb_internal_encoding, arginfo_mb_internal_encoding)
525 PHP_FE(mb_http_input, arginfo_mb_http_input)
526 PHP_FE(mb_http_output, arginfo_mb_http_output)
527 PHP_FE(mb_detect_order, arginfo_mb_detect_order)
528 PHP_FE(mb_substitute_character, arginfo_mb_substitute_character)
529 PHP_FE(mb_parse_str, arginfo_mb_parse_str)
530 PHP_FE(mb_output_handler, arginfo_mb_output_handler)
531 PHP_FE(mb_preferred_mime_name, arginfo_mb_preferred_mime_name)
532 PHP_FE(mb_strlen, arginfo_mb_strlen)
533 PHP_FE(mb_strpos, arginfo_mb_strpos)
534 PHP_FE(mb_strrpos, arginfo_mb_strrpos)
535 PHP_FE(mb_stripos, arginfo_mb_stripos)
536 PHP_FE(mb_strripos, arginfo_mb_strripos)
537 PHP_FE(mb_strstr, arginfo_mb_strstr)
538 PHP_FE(mb_strrchr, arginfo_mb_strrchr)
539 PHP_FE(mb_stristr, arginfo_mb_stristr)
540 PHP_FE(mb_strrichr, arginfo_mb_strrichr)
541 PHP_FE(mb_substr_count, arginfo_mb_substr_count)
542 PHP_FE(mb_substr, arginfo_mb_substr)
543 PHP_FE(mb_strcut, arginfo_mb_strcut)
544 PHP_FE(mb_strwidth, arginfo_mb_strwidth)
545 PHP_FE(mb_strimwidth, arginfo_mb_strimwidth)
546 PHP_FE(mb_convert_encoding, arginfo_mb_convert_encoding)
547 PHP_FE(mb_detect_encoding, arginfo_mb_detect_encoding)
548 PHP_FE(mb_list_encodings, arginfo_mb_list_encodings)
549 PHP_FE(mb_encoding_aliases, arginfo_mb_encoding_aliases)
550 PHP_FE(mb_convert_kana, arginfo_mb_convert_kana)
551 PHP_FE(mb_encode_mimeheader, arginfo_mb_encode_mimeheader)
552 PHP_FE(mb_decode_mimeheader, arginfo_mb_decode_mimeheader)
553 PHP_FE(mb_convert_variables, arginfo_mb_convert_variables)
554 PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity)
555 PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity)
556 PHP_FE(mb_send_mail, arginfo_mb_send_mail)
557 PHP_FE(mb_get_info, arginfo_mb_get_info)
558 PHP_FE(mb_check_encoding, arginfo_mb_check_encoding)
559 #if HAVE_MBREGEX
560 PHP_MBREGEX_FUNCTION_ENTRIES
561 #endif
562 PHP_FE_END
563 };
564 /* }}} */
565
566 /* {{{ zend_module_entry mbstring_module_entry */
567 zend_module_entry mbstring_module_entry = {
568 STANDARD_MODULE_HEADER,
569 "mbstring",
570 mbstring_functions,
571 PHP_MINIT(mbstring),
572 PHP_MSHUTDOWN(mbstring),
573 PHP_RINIT(mbstring),
574 PHP_RSHUTDOWN(mbstring),
575 PHP_MINFO(mbstring),
576 PHP_MBSTRING_VERSION,
577 PHP_MODULE_GLOBALS(mbstring),
578 PHP_GINIT(mbstring),
579 PHP_GSHUTDOWN(mbstring),
580 NULL,
581 STANDARD_MODULE_PROPERTIES_EX
582 };
583 /* }}} */
584
585 /* {{{ static sapi_post_entry php_post_entries[] */
586 static sapi_post_entry php_post_entries[] = {
587 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
588 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
589 { NULL, 0, NULL, NULL }
590 };
591 /* }}} */
592
593 #ifdef COMPILE_DL_MBSTRING
594 #ifdef ZTS
595 ZEND_TSRMLS_CACHE_DEFINE()
596 #endif
ZEND_GET_MODULE(mbstring)597 ZEND_GET_MODULE(mbstring)
598 #endif
599
600 static char *get_internal_encoding(void) {
601 if (PG(internal_encoding) && PG(internal_encoding)[0]) {
602 return PG(internal_encoding);
603 } else if (SG(default_charset)) {
604 return SG(default_charset);
605 }
606 return "";
607 }
608
get_input_encoding(void)609 static char *get_input_encoding(void) {
610 if (PG(input_encoding) && PG(input_encoding)[0]) {
611 return PG(input_encoding);
612 } else if (SG(default_charset)) {
613 return SG(default_charset);
614 }
615 return "";
616 }
617
get_output_encoding(void)618 static char *get_output_encoding(void) {
619 if (PG(output_encoding) && PG(output_encoding)[0]) {
620 return PG(output_encoding);
621 } else if (SG(default_charset)) {
622 return SG(default_charset);
623 }
624 return "";
625 }
626
627
628 /* {{{ allocators */
_php_mb_allocators_malloc(unsigned int sz)629 static void *_php_mb_allocators_malloc(unsigned int sz)
630 {
631 return emalloc(sz);
632 }
633
_php_mb_allocators_realloc(void * ptr,unsigned int sz)634 static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
635 {
636 return erealloc(ptr, sz);
637 }
638
_php_mb_allocators_calloc(unsigned int nelems,unsigned int szelem)639 static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
640 {
641 return ecalloc(nelems, szelem);
642 }
643
_php_mb_allocators_free(void * ptr)644 static void _php_mb_allocators_free(void *ptr)
645 {
646 efree(ptr);
647 }
648
_php_mb_allocators_pmalloc(unsigned int sz)649 static void *_php_mb_allocators_pmalloc(unsigned int sz)
650 {
651 return pemalloc(sz, 1);
652 }
653
_php_mb_allocators_prealloc(void * ptr,unsigned int sz)654 static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
655 {
656 return perealloc(ptr, sz, 1);
657 }
658
_php_mb_allocators_pfree(void * ptr)659 static void _php_mb_allocators_pfree(void *ptr)
660 {
661 pefree(ptr, 1);
662 }
663
664 static mbfl_allocators _php_mb_allocators = {
665 _php_mb_allocators_malloc,
666 _php_mb_allocators_realloc,
667 _php_mb_allocators_calloc,
668 _php_mb_allocators_free,
669 _php_mb_allocators_pmalloc,
670 _php_mb_allocators_prealloc,
671 _php_mb_allocators_pfree
672 };
673 /* }}} */
674
675 /* {{{ static sapi_post_entry mbstr_post_entries[] */
676 static sapi_post_entry mbstr_post_entries[] = {
677 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
678 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
679 { NULL, 0, NULL, NULL }
680 };
681 /* }}} */
682
683 /* {{{ static int php_mb_parse_encoding_list()
684 * Return 0 if input contains any illegal encoding, otherwise 1.
685 * Even if any illegal encoding is detected the result may contain a list
686 * of parsed encodings.
687 */
688 static int
php_mb_parse_encoding_list(const char * value,size_t value_length,const mbfl_encoding *** return_list,size_t * return_size,int persistent)689 php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
690 {
691 int size, bauto, ret = SUCCESS;
692 size_t n;
693 char *p, *p1, *p2, *endp, *tmpstr;
694 const mbfl_encoding **entry, **list;
695
696 list = NULL;
697 if (value == NULL || value_length <= 0) {
698 if (return_list) {
699 *return_list = NULL;
700 }
701 if (return_size) {
702 *return_size = 0;
703 }
704 return FAILURE;
705 } else {
706 /* copy the value string for work */
707 if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
708 tmpstr = (char *)estrndup(value+1, value_length-2);
709 value_length -= 2;
710 }
711 else
712 tmpstr = (char *)estrndup(value, value_length);
713 if (tmpstr == NULL) {
714 return FAILURE;
715 }
716 /* count the number of listed encoding names */
717 endp = tmpstr + value_length;
718 n = 1;
719 p1 = tmpstr;
720 while ((p2 = (char*)php_memnstr(p1, ",", 1, endp)) != NULL) {
721 p1 = p2 + 1;
722 n++;
723 }
724 size = n + MBSTRG(default_detect_order_list_size);
725 /* make list */
726 list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
727 if (list != NULL) {
728 entry = list;
729 n = 0;
730 bauto = 0;
731 p1 = tmpstr;
732 do {
733 p2 = p = (char*)php_memnstr(p1, ",", 1, endp);
734 if (p == NULL) {
735 p = endp;
736 }
737 *p = '\0';
738 /* trim spaces */
739 while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
740 p1++;
741 }
742 p--;
743 while (p > p1 && (*p == ' ' || *p == '\t')) {
744 *p = '\0';
745 p--;
746 }
747 /* convert to the encoding number and check encoding */
748 if (strcasecmp(p1, "auto") == 0) {
749 if (!bauto) {
750 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
751 const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
752 size_t i;
753 bauto = 1;
754 for (i = 0; i < identify_list_size; i++) {
755 *entry++ = mbfl_no2encoding(*src++);
756 n++;
757 }
758 }
759 } else {
760 const mbfl_encoding *encoding = mbfl_name2encoding(p1);
761 if (encoding) {
762 *entry++ = encoding;
763 n++;
764 } else {
765 ret = 0;
766 }
767 }
768 p1 = p2 + 1;
769 } while (n < size && p2 != NULL);
770 if (n > 0) {
771 if (return_list) {
772 *return_list = list;
773 } else {
774 pefree(list, persistent);
775 }
776 } else {
777 pefree(list, persistent);
778 if (return_list) {
779 *return_list = NULL;
780 }
781 ret = 0;
782 }
783 if (return_size) {
784 *return_size = n;
785 }
786 } else {
787 if (return_list) {
788 *return_list = NULL;
789 }
790 if (return_size) {
791 *return_size = 0;
792 }
793 ret = 0;
794 }
795 efree(tmpstr);
796 }
797
798 return ret;
799 }
800 /* }}} */
801
802 /* {{{ static int php_mb_parse_encoding_array()
803 * Return 0 if input contains any illegal encoding, otherwise 1.
804 * Even if any illegal encoding is detected the result may contain a list
805 * of parsed encodings.
806 */
807 static int
php_mb_parse_encoding_array(zval * array,const mbfl_encoding *** return_list,size_t * return_size,int persistent)808 php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
809 {
810 zval *hash_entry;
811 HashTable *target_hash;
812 int i, n, size, bauto, ret = SUCCESS;
813 const mbfl_encoding **list, **entry;
814
815 list = NULL;
816 if (Z_TYPE_P(array) == IS_ARRAY) {
817 target_hash = Z_ARRVAL_P(array);
818 i = zend_hash_num_elements(target_hash);
819 size = i + MBSTRG(default_detect_order_list_size);
820 list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
821 if (list != NULL) {
822 entry = list;
823 bauto = 0;
824 n = 0;
825 ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
826 convert_to_string_ex(hash_entry);
827 if (strcasecmp(Z_STRVAL_P(hash_entry), "auto") == 0) {
828 if (!bauto) {
829 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
830 const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
831 size_t j;
832
833 bauto = 1;
834 for (j = 0; j < identify_list_size; j++) {
835 *entry++ = mbfl_no2encoding(*src++);
836 n++;
837 }
838 }
839 } else {
840 const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_P(hash_entry));
841 if (encoding) {
842 *entry++ = encoding;
843 n++;
844 } else {
845 ret = FAILURE;
846 }
847 }
848 i--;
849 } ZEND_HASH_FOREACH_END();
850 if (n > 0) {
851 if (return_list) {
852 *return_list = list;
853 } else {
854 pefree(list, persistent);
855 }
856 } else {
857 pefree(list, persistent);
858 if (return_list) {
859 *return_list = NULL;
860 }
861 ret = FAILURE;
862 }
863 if (return_size) {
864 *return_size = n;
865 }
866 } else {
867 if (return_list) {
868 *return_list = NULL;
869 }
870 if (return_size) {
871 *return_size = 0;
872 }
873 ret = FAILURE;
874 }
875 }
876
877 return ret;
878 }
879 /* }}} */
880
881 /* {{{ zend_multibyte interface */
php_mb_zend_encoding_fetcher(const char * encoding_name)882 static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name)
883 {
884 return (const zend_encoding*)mbfl_name2encoding(encoding_name);
885 }
886
php_mb_zend_encoding_name_getter(const zend_encoding * encoding)887 static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
888 {
889 return ((const mbfl_encoding *)encoding)->name;
890 }
891
php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding * _encoding)892 static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
893 {
894 const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
895 if (encoding->flag & MBFL_ENCTYPE_SBCS) {
896 return 1;
897 }
898 if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
899 return 1;
900 }
901 return 0;
902 }
903
php_mb_zend_encoding_detector(const unsigned char * arg_string,size_t arg_length,const zend_encoding ** list,size_t list_size)904 static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size)
905 {
906 mbfl_string string;
907
908 if (!list) {
909 list = (const zend_encoding **)MBSTRG(current_detect_order_list);
910 list_size = MBSTRG(current_detect_order_list_size);
911 }
912
913 mbfl_string_init(&string);
914 string.no_language = MBSTRG(language);
915 string.val = (unsigned char *)arg_string;
916 string.len = arg_length;
917 return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0);
918 }
919
php_mb_zend_encoding_converter(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length,const zend_encoding * encoding_to,const zend_encoding * encoding_from)920 static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
921 {
922 mbfl_string string, result;
923 mbfl_buffer_converter *convd;
924 int status, loc;
925
926 /* new encoding */
927 /* initialize string */
928 mbfl_string_init(&string);
929 mbfl_string_init(&result);
930 string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding;
931 string.no_language = MBSTRG(language);
932 string.val = (unsigned char*)from;
933 string.len = from_length;
934
935 /* initialize converter */
936 convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
937 if (convd == NULL) {
938 return -1;
939 }
940 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
941 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
942
943 /* do it */
944 status = mbfl_buffer_converter_feed2(convd, &string, &loc);
945 if (status) {
946 mbfl_buffer_converter_delete(convd);
947 return (size_t)-1;
948 }
949
950 mbfl_buffer_converter_flush(convd);
951 if (!mbfl_buffer_converter_result(convd, &result)) {
952 mbfl_buffer_converter_delete(convd);
953 return (size_t)-1;
954 }
955
956 *to = result.val;
957 *to_length = result.len;
958
959 mbfl_buffer_converter_delete(convd);
960
961 return loc;
962 }
963
php_mb_zend_encoding_list_parser(const char * encoding_list,size_t encoding_list_len,const zend_encoding *** return_list,size_t * return_size,int persistent)964 static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent)
965 {
966 return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent);
967 }
968
php_mb_zend_internal_encoding_getter(void)969 static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
970 {
971 return (const zend_encoding *)MBSTRG(internal_encoding);
972 }
973
php_mb_zend_internal_encoding_setter(const zend_encoding * encoding)974 static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding)
975 {
976 MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
977 return SUCCESS;
978 }
979
980 static zend_multibyte_functions php_mb_zend_multibyte_functions = {
981 "mbstring",
982 php_mb_zend_encoding_fetcher,
983 php_mb_zend_encoding_name_getter,
984 php_mb_zend_encoding_lexer_compatibility_checker,
985 php_mb_zend_encoding_detector,
986 php_mb_zend_encoding_converter,
987 php_mb_zend_encoding_list_parser,
988 php_mb_zend_internal_encoding_getter,
989 php_mb_zend_internal_encoding_setter
990 };
991 /* }}} */
992
993 static void *_php_mb_compile_regex(const char *pattern);
994 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
995 static void _php_mb_free_regex(void *opaque);
996
997 #if HAVE_ONIG
998 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern)999 static void *_php_mb_compile_regex(const char *pattern)
1000 {
1001 php_mb_regex_t *retval;
1002 OnigErrorInfo err_info;
1003 int err_code;
1004
1005 if ((err_code = onig_new(&retval,
1006 (const OnigUChar *)pattern,
1007 (const OnigUChar *)pattern + strlen(pattern),
1008 ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
1009 ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
1010 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1011 onig_error_code_to_str(err_str, err_code, err_info);
1012 php_error_docref(NULL, E_WARNING, "%s: %s", pattern, err_str);
1013 retval = NULL;
1014 }
1015 return retval;
1016 }
1017 /* }}} */
1018
1019 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1020 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1021 {
1022 return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
1023 (const OnigUChar*)str + str_len, (const OnigUChar *)str,
1024 (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
1025 }
1026 /* }}} */
1027
1028 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1029 static void _php_mb_free_regex(void *opaque)
1030 {
1031 onig_free((php_mb_regex_t *)opaque);
1032 }
1033 /* }}} */
1034 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
1035 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern)1036 static void *_php_mb_compile_regex(const char *pattern)
1037 {
1038 pcre *retval;
1039 const char *err_str;
1040 int err_offset;
1041
1042 if (!(retval = pcre_compile(pattern,
1043 PCRE_CASELESS, &err_str, &err_offset, NULL))) {
1044 php_error_docref(NULL, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
1045 }
1046 return retval;
1047 }
1048 /* }}} */
1049
1050 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1051 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1052 {
1053 return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
1054 0, NULL, 0) >= 0;
1055 }
1056 /* }}} */
1057
1058 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1059 static void _php_mb_free_regex(void *opaque)
1060 {
1061 pcre_free(opaque);
1062 }
1063 /* }}} */
1064 #endif
1065
1066 /* {{{ php_mb_nls_get_default_detect_order_list */
php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,enum mbfl_no_encoding ** plist,size_t * plist_size)1067 static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
1068 {
1069 size_t i;
1070
1071 *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1072 *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1073
1074 for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
1075 if (php_mb_default_identify_list[i].lang == lang) {
1076 *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
1077 *plist_size = php_mb_default_identify_list[i].list_size;
1078 return 1;
1079 }
1080 }
1081 return 0;
1082 }
1083 /* }}} */
1084
php_mb_rfc1867_substring_conf(const zend_encoding * encoding,char * start,int len,char quote)1085 static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, int len, char quote)
1086 {
1087 char *result = emalloc(len + 2);
1088 char *resp = result;
1089 int i;
1090
1091 for (i = 0; i < len && start[i] != quote; ++i) {
1092 if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
1093 *resp++ = start[++i];
1094 } else {
1095 size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
1096
1097 while (j-- > 0 && i < len) {
1098 *resp++ = start[i++];
1099 }
1100 --i;
1101 }
1102 }
1103
1104 *resp = '\0';
1105 return result;
1106 }
1107
php_mb_rfc1867_getword(const zend_encoding * encoding,char ** line,char stop)1108 static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop) /* {{{ */
1109 {
1110 char *pos = *line, quote;
1111 char *res;
1112
1113 while (*pos && *pos != stop) {
1114 if ((quote = *pos) == '"' || quote == '\'') {
1115 ++pos;
1116 while (*pos && *pos != quote) {
1117 if (*pos == '\\' && pos[1] && pos[1] == quote) {
1118 pos += 2;
1119 } else {
1120 ++pos;
1121 }
1122 }
1123 if (*pos) {
1124 ++pos;
1125 }
1126 } else {
1127 pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1128
1129 }
1130 }
1131 if (*pos == '\0') {
1132 res = estrdup(*line);
1133 *line += strlen(*line);
1134 return res;
1135 }
1136
1137 res = estrndup(*line, pos - *line);
1138
1139 while (*pos == stop) {
1140 pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1141 }
1142
1143 *line = pos;
1144 return res;
1145 }
1146 /* }}} */
1147
php_mb_rfc1867_getword_conf(const zend_encoding * encoding,char * str)1148 static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str) /* {{{ */
1149 {
1150 while (*str && isspace(*(unsigned char *)str)) {
1151 ++str;
1152 }
1153
1154 if (!*str) {
1155 return estrdup("");
1156 }
1157
1158 if (*str == '"' || *str == '\'') {
1159 char quote = *str;
1160
1161 str++;
1162 return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote);
1163 } else {
1164 char *strend = str;
1165
1166 while (*strend && !isspace(*(unsigned char *)strend)) {
1167 ++strend;
1168 }
1169 return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0);
1170 }
1171 }
1172 /* }}} */
1173
php_mb_rfc1867_basename(const zend_encoding * encoding,char * filename)1174 static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename) /* {{{ */
1175 {
1176 char *s, *s2;
1177 const size_t filename_len = strlen(filename);
1178
1179 /* The \ check should technically be needed for win32 systems only where
1180 * it is a valid path separator. However, IE in all it's wisdom always sends
1181 * the full path of the file on the user's filesystem, which means that unless
1182 * the user does basename() they get a bogus file name. Until IE's user base drops
1183 * to nill or problem is fixed this code must remain enabled for all systems. */
1184 s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
1185 s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
1186
1187 if (s && s2) {
1188 if (s > s2) {
1189 return ++s;
1190 } else {
1191 return ++s2;
1192 }
1193 } else if (s) {
1194 return ++s;
1195 } else if (s2) {
1196 return ++s2;
1197 } else {
1198 return filename;
1199 }
1200 }
1201 /* }}} */
1202
1203 /* {{{ php.ini directive handler */
1204 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
PHP_INI_MH(OnUpdate_mbstring_language)1205 static PHP_INI_MH(OnUpdate_mbstring_language)
1206 {
1207 enum mbfl_no_language no_language;
1208
1209 no_language = mbfl_name2no_language(ZSTR_VAL(new_value));
1210 if (no_language == mbfl_no_language_invalid) {
1211 MBSTRG(language) = mbfl_no_language_neutral;
1212 return FAILURE;
1213 }
1214 MBSTRG(language) = no_language;
1215 php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
1216 return SUCCESS;
1217 }
1218 /* }}} */
1219
1220 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
PHP_INI_MH(OnUpdate_mbstring_detect_order)1221 static PHP_INI_MH(OnUpdate_mbstring_detect_order)
1222 {
1223 const mbfl_encoding **list;
1224 size_t size;
1225
1226 if (!new_value) {
1227 if (MBSTRG(detect_order_list)) {
1228 pefree(MBSTRG(detect_order_list), 1);
1229 }
1230 MBSTRG(detect_order_list) = NULL;
1231 MBSTRG(detect_order_list_size) = 0;
1232 return SUCCESS;
1233 }
1234
1235 if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) {
1236 return FAILURE;
1237 }
1238
1239 if (MBSTRG(detect_order_list)) {
1240 pefree(MBSTRG(detect_order_list), 1);
1241 }
1242 MBSTRG(detect_order_list) = list;
1243 MBSTRG(detect_order_list_size) = size;
1244 return SUCCESS;
1245 }
1246 /* }}} */
1247
1248 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
PHP_INI_MH(OnUpdate_mbstring_http_input)1249 static PHP_INI_MH(OnUpdate_mbstring_http_input)
1250 {
1251 const mbfl_encoding **list;
1252 size_t size;
1253
1254 if (!new_value || !ZSTR_VAL(new_value)) {
1255 if (MBSTRG(http_input_list)) {
1256 pefree(MBSTRG(http_input_list), 1);
1257 }
1258 if (SUCCESS == php_mb_parse_encoding_list(get_input_encoding(), strlen(get_input_encoding())+1, &list, &size, 1)) {
1259 MBSTRG(http_input_list) = list;
1260 MBSTRG(http_input_list_size) = size;
1261 return SUCCESS;
1262 }
1263 MBSTRG(http_input_list) = NULL;
1264 MBSTRG(http_input_list_size) = 0;
1265 return SUCCESS;
1266 }
1267
1268 if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) {
1269 return FAILURE;
1270 }
1271
1272 if (MBSTRG(http_input_list)) {
1273 pefree(MBSTRG(http_input_list), 1);
1274 }
1275 MBSTRG(http_input_list) = list;
1276 MBSTRG(http_input_list_size) = size;
1277
1278 if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1279 php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_input is deprecated");
1280 }
1281
1282 return SUCCESS;
1283 }
1284 /* }}} */
1285
1286 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
PHP_INI_MH(OnUpdate_mbstring_http_output)1287 static PHP_INI_MH(OnUpdate_mbstring_http_output)
1288 {
1289 const mbfl_encoding *encoding;
1290
1291 if (new_value == NULL || ZSTR_LEN(new_value) == 0) {
1292 encoding = mbfl_name2encoding(get_output_encoding());
1293 if (!encoding) {
1294 MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1295 MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1296 return SUCCESS;
1297 }
1298 } else {
1299 encoding = mbfl_name2encoding(ZSTR_VAL(new_value));
1300 if (!encoding) {
1301 MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1302 MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1303 return FAILURE;
1304 }
1305 }
1306 MBSTRG(http_output_encoding) = encoding;
1307 MBSTRG(current_http_output_encoding) = encoding;
1308
1309 if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1310 php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_output is deprecated");
1311 }
1312
1313 return SUCCESS;
1314 }
1315 /* }}} */
1316
1317 /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
_php_mb_ini_mbstring_internal_encoding_set(const char * new_value,uint new_value_length)1318 int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length)
1319 {
1320 const mbfl_encoding *encoding;
1321
1322 if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) {
1323 /* falls back to UTF-8 if an unknown encoding name is given */
1324 encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
1325 }
1326 MBSTRG(internal_encoding) = encoding;
1327 MBSTRG(current_internal_encoding) = encoding;
1328 #if HAVE_MBREGEX
1329 {
1330 const char *enc_name = new_value;
1331 if (FAILURE == php_mb_regex_set_default_mbctype(enc_name)) {
1332 /* falls back to UTF-8 if an unknown encoding name is given */
1333 enc_name = "UTF-8";
1334 php_mb_regex_set_default_mbctype(enc_name);
1335 }
1336 php_mb_regex_set_mbctype(new_value);
1337 }
1338 #endif
1339 return SUCCESS;
1340 }
1341 /* }}} */
1342
1343 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
PHP_INI_MH(OnUpdate_mbstring_internal_encoding)1344 static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1345 {
1346 if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1347 php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
1348 }
1349
1350 if (OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage) == FAILURE) {
1351 return FAILURE;
1352 }
1353
1354 if (stage & (PHP_INI_STAGE_STARTUP | PHP_INI_STAGE_SHUTDOWN | PHP_INI_STAGE_RUNTIME)) {
1355 if (new_value && ZSTR_LEN(new_value)) {
1356 return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
1357 } else {
1358 return _php_mb_ini_mbstring_internal_encoding_set(get_internal_encoding(), strlen(get_internal_encoding())+1);
1359 }
1360 } else {
1361 /* the corresponding mbstring globals needs to be set according to the
1362 * ini value in the later stage because it never falls back to the
1363 * default value if 1. no value for mbstring.internal_encoding is given,
1364 * 2. mbstring.language directive is processed in per-dir or runtime
1365 * context and 3. call to the handler for mbstring.language is done
1366 * after mbstring.internal_encoding is handled. */
1367 return SUCCESS;
1368 }
1369 }
1370 /* }}} */
1371
1372 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
PHP_INI_MH(OnUpdate_mbstring_substitute_character)1373 static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1374 {
1375 int c;
1376 char *endptr = NULL;
1377
1378 if (new_value != NULL) {
1379 if (strcasecmp("none", ZSTR_VAL(new_value)) == 0) {
1380 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1381 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1382 } else if (strcasecmp("long", ZSTR_VAL(new_value)) == 0) {
1383 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1384 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1385 } else if (strcasecmp("entity", ZSTR_VAL(new_value)) == 0) {
1386 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1387 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1388 } else {
1389 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1390 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1391 if (ZSTR_LEN(new_value) > 0) {
1392 c = strtol(ZSTR_VAL(new_value), &endptr, 0);
1393 if (*endptr == '\0') {
1394 MBSTRG(filter_illegal_substchar) = c;
1395 MBSTRG(current_filter_illegal_substchar) = c;
1396 }
1397 }
1398 }
1399 } else {
1400 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1401 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1402 MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
1403 MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
1404 }
1405
1406 return SUCCESS;
1407 }
1408 /* }}} */
1409
1410 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
PHP_INI_MH(OnUpdate_mbstring_encoding_translation)1411 static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1412 {
1413 if (new_value == NULL) {
1414 return FAILURE;
1415 }
1416
1417 OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
1418
1419 if (MBSTRG(encoding_translation)) {
1420 sapi_unregister_post_entry(php_post_entries);
1421 sapi_register_post_entries(mbstr_post_entries);
1422 } else {
1423 sapi_unregister_post_entry(mbstr_post_entries);
1424 sapi_register_post_entries(php_post_entries);
1425 }
1426
1427 return SUCCESS;
1428 }
1429 /* }}} */
1430
1431 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)1432 static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1433 {
1434 zend_string *tmp;
1435 void *re = NULL;
1436
1437 if (!new_value) {
1438 new_value = entry->orig_value;
1439 }
1440 tmp = php_trim(new_value, NULL, 0, 3);
1441
1442 if (ZSTR_LEN(tmp) > 0) {
1443 if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) {
1444 zend_string_release(tmp);
1445 return FAILURE;
1446 }
1447 }
1448
1449 if (MBSTRG(http_output_conv_mimetypes)) {
1450 _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1451 }
1452
1453 MBSTRG(http_output_conv_mimetypes) = re;
1454
1455 zend_string_release(tmp);
1456 return SUCCESS;
1457 }
1458 /* }}} */
1459 /* }}} */
1460
1461 /* {{{ php.ini directive registration */
1462 PHP_INI_BEGIN()
1463 PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1464 PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1465 PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
1466 PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
1467 STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
1468 PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1469 STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1470 PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1471
1472 STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1473 PHP_INI_SYSTEM | PHP_INI_PERDIR,
1474 OnUpdate_mbstring_encoding_translation,
1475 encoding_translation, zend_mbstring_globals, mbstring_globals)
1476 PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1477 "^(text/|application/xhtml\\+xml)",
1478 PHP_INI_ALL,
1479 OnUpdate_mbstring_http_output_conv_mimetypes)
1480
1481 STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1482 PHP_INI_ALL,
1483 OnUpdateLong,
1484 strict_detection, zend_mbstring_globals, mbstring_globals)
PHP_INI_END()1485 PHP_INI_END()
1486 /* }}} */
1487
1488 /* {{{ module global initialize handler */
1489 static PHP_GINIT_FUNCTION(mbstring)
1490 {
1491 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1492 ZEND_TSRMLS_CACHE_UPDATE();
1493 #endif
1494
1495 mbstring_globals->language = mbfl_no_language_uni;
1496 mbstring_globals->internal_encoding = NULL;
1497 mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1498 mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
1499 mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
1500 mbstring_globals->http_input_identify = NULL;
1501 mbstring_globals->http_input_identify_get = NULL;
1502 mbstring_globals->http_input_identify_post = NULL;
1503 mbstring_globals->http_input_identify_cookie = NULL;
1504 mbstring_globals->http_input_identify_string = NULL;
1505 mbstring_globals->http_input_list = NULL;
1506 mbstring_globals->http_input_list_size = 0;
1507 mbstring_globals->detect_order_list = NULL;
1508 mbstring_globals->detect_order_list_size = 0;
1509 mbstring_globals->current_detect_order_list = NULL;
1510 mbstring_globals->current_detect_order_list_size = 0;
1511 mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1512 mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1513 mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1514 mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */
1515 mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1516 mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */
1517 mbstring_globals->illegalchars = 0;
1518 mbstring_globals->func_overload = 0;
1519 mbstring_globals->encoding_translation = 0;
1520 mbstring_globals->strict_detection = 0;
1521 mbstring_globals->outconv = NULL;
1522 mbstring_globals->http_output_conv_mimetypes = NULL;
1523 #if HAVE_MBREGEX
1524 mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc();
1525 #endif
1526 }
1527 /* }}} */
1528
1529 /* {{{ PHP_GSHUTDOWN_FUNCTION */
PHP_GSHUTDOWN_FUNCTION(mbstring)1530 static PHP_GSHUTDOWN_FUNCTION(mbstring)
1531 {
1532 if (mbstring_globals->http_input_list) {
1533 free(mbstring_globals->http_input_list);
1534 }
1535 if (mbstring_globals->detect_order_list) {
1536 free(mbstring_globals->detect_order_list);
1537 }
1538 if (mbstring_globals->http_output_conv_mimetypes) {
1539 _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1540 }
1541 #if HAVE_MBREGEX
1542 php_mb_regex_globals_free(mbstring_globals->mb_regex_globals);
1543 #endif
1544 }
1545 /* }}} */
1546
1547 /* {{{ PHP_MINIT_FUNCTION(mbstring) */
PHP_MINIT_FUNCTION(mbstring)1548 PHP_MINIT_FUNCTION(mbstring)
1549 {
1550 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1551 ZEND_TSRMLS_CACHE_UPDATE();
1552 #endif
1553 __mbfl_allocators = &_php_mb_allocators;
1554
1555 REGISTER_INI_ENTRIES();
1556
1557 /* This is a global handler. Should not be set in a per-request handler. */
1558 sapi_register_treat_data(mbstr_treat_data);
1559
1560 /* Post handlers are stored in the thread-local context. */
1561 if (MBSTRG(encoding_translation)) {
1562 sapi_register_post_entries(mbstr_post_entries);
1563 }
1564
1565 REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1566 REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1567 REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1568
1569 REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1570 REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1571 REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1572
1573 #if HAVE_MBREGEX
1574 PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1575 #endif
1576
1577 if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions)) {
1578 return FAILURE;
1579 }
1580
1581 php_rfc1867_set_multibyte_callbacks(
1582 php_mb_encoding_translation,
1583 php_mb_gpc_get_detect_order,
1584 php_mb_gpc_set_input_encoding,
1585 php_mb_rfc1867_getword,
1586 php_mb_rfc1867_getword_conf,
1587 php_mb_rfc1867_basename);
1588
1589 return SUCCESS;
1590 }
1591 /* }}} */
1592
1593 /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
PHP_MSHUTDOWN_FUNCTION(mbstring)1594 PHP_MSHUTDOWN_FUNCTION(mbstring)
1595 {
1596 UNREGISTER_INI_ENTRIES();
1597
1598 #if HAVE_MBREGEX
1599 PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1600 #endif
1601
1602 return SUCCESS;
1603 }
1604 /* }}} */
1605
1606 /* {{{ PHP_RINIT_FUNCTION(mbstring) */
PHP_RINIT_FUNCTION(mbstring)1607 PHP_RINIT_FUNCTION(mbstring)
1608 {
1609 zend_function *func, *orig;
1610 const struct mb_overload_def *p;
1611
1612 MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1613 MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1614 MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1615 MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1616
1617 MBSTRG(illegalchars) = 0;
1618
1619 php_mb_populate_current_detect_order_list();
1620
1621 /* override original function. */
1622 if (MBSTRG(func_overload)){
1623 p = &(mb_ovld[0]);
1624
1625 CG(compiler_options) |= ZEND_COMPILE_NO_BUILTIN_STRLEN;
1626 while (p->type > 0) {
1627 if ((MBSTRG(func_overload) & p->type) == p->type &&
1628 !zend_hash_str_exists(EG(function_table), p->save_func, strlen(p->save_func))
1629 ) {
1630 func = zend_hash_str_find_ptr(EG(function_table), p->ovld_func, strlen(p->ovld_func));
1631
1632 if ((orig = zend_hash_str_find_ptr(EG(function_table), p->orig_func, strlen(p->orig_func))) == NULL) {
1633 php_error_docref("ref.mbstring", E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1634 return FAILURE;
1635 } else {
1636 ZEND_ASSERT(orig->type == ZEND_INTERNAL_FUNCTION);
1637 zend_hash_str_add_mem(EG(function_table), p->save_func, strlen(p->save_func), orig, sizeof(zend_internal_function));
1638 function_add_ref(orig);
1639
1640 if (zend_hash_str_update_mem(EG(function_table), p->orig_func, strlen(p->orig_func), func, sizeof(zend_internal_function)) == NULL) {
1641 php_error_docref("ref.mbstring", E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
1642 return FAILURE;
1643 }
1644
1645 function_add_ref(func);
1646 }
1647 }
1648 p++;
1649 }
1650 }
1651 #if HAVE_MBREGEX
1652 PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1653 #endif
1654 zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding));
1655
1656 return SUCCESS;
1657 }
1658 /* }}} */
1659
1660 /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
PHP_RSHUTDOWN_FUNCTION(mbstring)1661 PHP_RSHUTDOWN_FUNCTION(mbstring)
1662 {
1663 const struct mb_overload_def *p;
1664 zend_function *orig;
1665
1666 if (MBSTRG(current_detect_order_list) != NULL) {
1667 efree(MBSTRG(current_detect_order_list));
1668 MBSTRG(current_detect_order_list) = NULL;
1669 MBSTRG(current_detect_order_list_size) = 0;
1670 }
1671 if (MBSTRG(outconv) != NULL) {
1672 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1673 mbfl_buffer_converter_delete(MBSTRG(outconv));
1674 MBSTRG(outconv) = NULL;
1675 }
1676
1677 /* clear http input identification. */
1678 MBSTRG(http_input_identify) = NULL;
1679 MBSTRG(http_input_identify_post) = NULL;
1680 MBSTRG(http_input_identify_get) = NULL;
1681 MBSTRG(http_input_identify_cookie) = NULL;
1682 MBSTRG(http_input_identify_string) = NULL;
1683
1684 /* clear overloaded function. */
1685 if (MBSTRG(func_overload)){
1686 p = &(mb_ovld[0]);
1687 while (p->type > 0) {
1688 if ((MBSTRG(func_overload) & p->type) == p->type &&
1689 (orig = zend_hash_str_find_ptr(EG(function_table), p->save_func, strlen(p->save_func)))) {
1690
1691 zend_hash_str_update_mem(EG(function_table), p->orig_func, strlen(p->orig_func), orig, sizeof(zend_internal_function));
1692 function_add_ref(orig);
1693 zend_hash_str_del(EG(function_table), p->save_func, strlen(p->save_func));
1694 }
1695 p++;
1696 }
1697 CG(compiler_options) &= ~ZEND_COMPILE_NO_BUILTIN_STRLEN;
1698 }
1699
1700 #if HAVE_MBREGEX
1701 PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1702 #endif
1703
1704 return SUCCESS;
1705 }
1706 /* }}} */
1707
1708 /* {{{ PHP_MINFO_FUNCTION(mbstring) */
PHP_MINFO_FUNCTION(mbstring)1709 PHP_MINFO_FUNCTION(mbstring)
1710 {
1711 php_info_print_table_start();
1712 php_info_print_table_row(2, "Multibyte Support", "enabled");
1713 php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1714 php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1715 {
1716 char tmp[256];
1717 snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1718 php_info_print_table_row(2, "libmbfl version", tmp);
1719 }
1720 #if HAVE_ONIG
1721 {
1722 char tmp[256];
1723 snprintf(tmp, sizeof(tmp), "%d.%d.%d", ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY);
1724 php_info_print_table_row(2, "oniguruma version", tmp);
1725 }
1726 #endif
1727 php_info_print_table_end();
1728
1729 php_info_print_table_start();
1730 php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1731 php_info_print_table_end();
1732
1733 #if HAVE_MBREGEX
1734 PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1735 #endif
1736
1737 DISPLAY_INI_ENTRIES();
1738 }
1739 /* }}} */
1740
1741 /* {{{ proto string mb_language([string language])
1742 Sets the current language or Returns the current language as a string */
PHP_FUNCTION(mb_language)1743 PHP_FUNCTION(mb_language)
1744 {
1745 zend_string *name = NULL;
1746
1747 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|S", &name) == FAILURE) {
1748 return;
1749 }
1750 if (name == NULL) {
1751 RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)));
1752 } else {
1753 zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0);
1754 if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1755 php_error_docref(NULL, E_WARNING, "Unknown language \"%s\"", ZSTR_VAL(name));
1756 RETVAL_FALSE;
1757 } else {
1758 RETVAL_TRUE;
1759 }
1760 zend_string_release(ini_name);
1761 }
1762 }
1763 /* }}} */
1764
1765 /* {{{ proto string mb_internal_encoding([string encoding])
1766 Sets the current internal encoding or Returns the current internal encoding as a string */
PHP_FUNCTION(mb_internal_encoding)1767 PHP_FUNCTION(mb_internal_encoding)
1768 {
1769 const char *name = NULL;
1770 size_t name_len;
1771 const mbfl_encoding *encoding;
1772
1773 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
1774 return;
1775 }
1776 if (name == NULL) {
1777 name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
1778 if (name != NULL) {
1779 RETURN_STRING(name);
1780 } else {
1781 RETURN_FALSE;
1782 }
1783 } else {
1784 encoding = mbfl_name2encoding(name);
1785 if (!encoding) {
1786 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
1787 RETURN_FALSE;
1788 } else {
1789 MBSTRG(current_internal_encoding) = encoding;
1790 RETURN_TRUE;
1791 }
1792 }
1793 }
1794 /* }}} */
1795
1796 /* {{{ proto mixed mb_http_input([string type])
1797 Returns the input encoding */
PHP_FUNCTION(mb_http_input)1798 PHP_FUNCTION(mb_http_input)
1799 {
1800 char *typ = NULL;
1801 size_t typ_len;
1802 int retname;
1803 char *list, *temp;
1804 const mbfl_encoding *result = NULL;
1805
1806 retname = 1;
1807 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
1808 return;
1809 }
1810 if (typ == NULL) {
1811 result = MBSTRG(http_input_identify);
1812 } else {
1813 switch (*typ) {
1814 case 'G':
1815 case 'g':
1816 result = MBSTRG(http_input_identify_get);
1817 break;
1818 case 'P':
1819 case 'p':
1820 result = MBSTRG(http_input_identify_post);
1821 break;
1822 case 'C':
1823 case 'c':
1824 result = MBSTRG(http_input_identify_cookie);
1825 break;
1826 case 'S':
1827 case 's':
1828 result = MBSTRG(http_input_identify_string);
1829 break;
1830 case 'I':
1831 case 'i':
1832 {
1833 const mbfl_encoding **entry = MBSTRG(http_input_list);
1834 const size_t n = MBSTRG(http_input_list_size);
1835 size_t i;
1836 array_init(return_value);
1837 for (i = 0; i < n; i++) {
1838 add_next_index_string(return_value, (*entry)->name);
1839 entry++;
1840 }
1841 retname = 0;
1842 }
1843 break;
1844 case 'L':
1845 case 'l':
1846 {
1847 const mbfl_encoding **entry = MBSTRG(http_input_list);
1848 const size_t n = MBSTRG(http_input_list_size);
1849 size_t i;
1850 list = NULL;
1851 for (i = 0; i < n; i++) {
1852 if (list) {
1853 temp = list;
1854 spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
1855 efree(temp);
1856 if (!list) {
1857 break;
1858 }
1859 } else {
1860 list = estrdup((*entry)->name);
1861 }
1862 entry++;
1863 }
1864 }
1865 if (!list) {
1866 RETURN_FALSE;
1867 }
1868 RETVAL_STRING(list);
1869 efree(list);
1870 retname = 0;
1871 break;
1872 default:
1873 result = MBSTRG(http_input_identify);
1874 break;
1875 }
1876 }
1877
1878 if (retname) {
1879 if (result) {
1880 RETVAL_STRING(result->name);
1881 } else {
1882 RETVAL_FALSE;
1883 }
1884 }
1885 }
1886 /* }}} */
1887
1888 /* {{{ proto string mb_http_output([string encoding])
1889 Sets the current output_encoding or returns the current output_encoding as a string */
PHP_FUNCTION(mb_http_output)1890 PHP_FUNCTION(mb_http_output)
1891 {
1892 const char *name = NULL;
1893 size_t name_len;
1894 const mbfl_encoding *encoding;
1895
1896 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
1897 return;
1898 }
1899
1900 if (name == NULL) {
1901 name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
1902 if (name != NULL) {
1903 RETURN_STRING(name);
1904 } else {
1905 RETURN_FALSE;
1906 }
1907 } else {
1908 encoding = mbfl_name2encoding(name);
1909 if (!encoding) {
1910 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
1911 RETURN_FALSE;
1912 } else {
1913 MBSTRG(current_http_output_encoding) = encoding;
1914 RETURN_TRUE;
1915 }
1916 }
1917 }
1918 /* }}} */
1919
1920 /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1921 Sets the current detect_order or Return the current detect_order as a array */
PHP_FUNCTION(mb_detect_order)1922 PHP_FUNCTION(mb_detect_order)
1923 {
1924 zval *arg1 = NULL;
1925
1926 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
1927 return;
1928 }
1929
1930 if (!arg1) {
1931 size_t i;
1932 size_t n = MBSTRG(current_detect_order_list_size);
1933 const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1934 array_init(return_value);
1935 for (i = 0; i < n; i++) {
1936 add_next_index_string(return_value, (*entry)->name);
1937 entry++;
1938 }
1939 } else {
1940 const mbfl_encoding **list = NULL;
1941 size_t size = 0;
1942 switch (Z_TYPE_P(arg1)) {
1943 case IS_ARRAY:
1944 if (FAILURE == php_mb_parse_encoding_array(arg1, &list, &size, 0)) {
1945 if (list) {
1946 efree(list);
1947 }
1948 RETURN_FALSE;
1949 }
1950 break;
1951 default:
1952 convert_to_string_ex(arg1);
1953 if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(arg1), Z_STRLEN_P(arg1), &list, &size, 0)) {
1954 if (list) {
1955 efree(list);
1956 }
1957 RETURN_FALSE;
1958 }
1959 break;
1960 }
1961
1962 if (list == NULL) {
1963 RETURN_FALSE;
1964 }
1965
1966 if (MBSTRG(current_detect_order_list)) {
1967 efree(MBSTRG(current_detect_order_list));
1968 }
1969 MBSTRG(current_detect_order_list) = list;
1970 MBSTRG(current_detect_order_list_size) = size;
1971 RETURN_TRUE;
1972 }
1973 }
1974 /* }}} */
1975
1976 /* {{{ proto mixed mb_substitute_character([mixed substchar])
1977 Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)1978 PHP_FUNCTION(mb_substitute_character)
1979 {
1980 zval *arg1 = NULL;
1981
1982 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
1983 return;
1984 }
1985
1986 if (!arg1) {
1987 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
1988 RETURN_STRING("none");
1989 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1990 RETURN_STRING("long");
1991 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1992 RETURN_STRING("entity");
1993 } else {
1994 RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1995 }
1996 } else {
1997 RETVAL_TRUE;
1998
1999 switch (Z_TYPE_P(arg1)) {
2000 case IS_STRING:
2001 if (strncasecmp("none", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2002 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
2003 } else if (strncasecmp("long", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2004 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
2005 } else if (strncasecmp("entity", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2006 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
2007 } else {
2008 convert_to_long_ex(arg1);
2009
2010 if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) {
2011 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2012 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2013 } else {
2014 php_error_docref(NULL, E_WARNING, "Unknown character.");
2015 RETURN_FALSE;
2016 }
2017 }
2018 break;
2019 default:
2020 convert_to_long_ex(arg1);
2021 if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) {
2022 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2023 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2024 } else {
2025 php_error_docref(NULL, E_WARNING, "Unknown character.");
2026 RETURN_FALSE;
2027 }
2028 break;
2029 }
2030 }
2031 }
2032 /* }}} */
2033
2034 /* {{{ proto string mb_preferred_mime_name(string encoding)
2035 Return the preferred MIME name (charset) as a string */
PHP_FUNCTION(mb_preferred_mime_name)2036 PHP_FUNCTION(mb_preferred_mime_name)
2037 {
2038 enum mbfl_no_encoding no_encoding;
2039 char *name = NULL;
2040 size_t name_len;
2041
2042 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
2043 return;
2044 } else {
2045 no_encoding = mbfl_name2no_encoding(name);
2046 if (no_encoding == mbfl_no_encoding_invalid) {
2047 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
2048 RETVAL_FALSE;
2049 } else {
2050 const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
2051 if (preferred_name == NULL || *preferred_name == '\0') {
2052 php_error_docref(NULL, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
2053 RETVAL_FALSE;
2054 } else {
2055 RETVAL_STRING((char *)preferred_name);
2056 }
2057 }
2058 }
2059 }
2060 /* }}} */
2061
2062 #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
2063 #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
2064
2065 /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
2066 Parses GET/POST/COOKIE data and sets global variables */
PHP_FUNCTION(mb_parse_str)2067 PHP_FUNCTION(mb_parse_str)
2068 {
2069 zval *track_vars_array = NULL;
2070 char *encstr = NULL;
2071 size_t encstr_len;
2072 php_mb_encoding_handler_info_t info;
2073 const mbfl_encoding *detected;
2074
2075 track_vars_array = NULL;
2076 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z/", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
2077 return;
2078 }
2079
2080 if (track_vars_array != NULL) {
2081 /* Clear out the array */
2082 zval_dtor(track_vars_array);
2083 array_init(track_vars_array);
2084 }
2085
2086 encstr = estrndup(encstr, encstr_len);
2087
2088 info.data_type = PARSE_STRING;
2089 info.separator = PG(arg_separator).input;
2090 info.report_errors = 1;
2091 info.to_encoding = MBSTRG(current_internal_encoding);
2092 info.to_language = MBSTRG(language);
2093 info.from_encodings = MBSTRG(http_input_list);
2094 info.num_from_encodings = MBSTRG(http_input_list_size);
2095 info.from_language = MBSTRG(language);
2096
2097 if (track_vars_array != NULL) {
2098 detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr);
2099 } else {
2100 zval tmp;
2101 zend_array *symbol_table = zend_rebuild_symbol_table();
2102
2103 ZVAL_ARR(&tmp, symbol_table);
2104 detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr);
2105 }
2106
2107 MBSTRG(http_input_identify) = detected;
2108
2109 RETVAL_BOOL(detected);
2110
2111 if (encstr != NULL) efree(encstr);
2112 }
2113 /* }}} */
2114
2115 /* {{{ proto string mb_output_handler(string contents, int status)
2116 Returns string in output buffer converted to the http_output encoding */
PHP_FUNCTION(mb_output_handler)2117 PHP_FUNCTION(mb_output_handler)
2118 {
2119 char *arg_string;
2120 size_t arg_string_len;
2121 zend_long arg_status;
2122 mbfl_string string, result;
2123 const char *charset;
2124 char *p;
2125 const mbfl_encoding *encoding;
2126 int last_feed, len;
2127 unsigned char send_text_mimetype = 0;
2128 char *s, *mimetype = NULL;
2129
2130 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
2131 return;
2132 }
2133
2134 encoding = MBSTRG(current_http_output_encoding);
2135
2136 /* start phase only */
2137 if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
2138 /* delete the converter just in case. */
2139 if (MBSTRG(outconv)) {
2140 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2141 mbfl_buffer_converter_delete(MBSTRG(outconv));
2142 MBSTRG(outconv) = NULL;
2143 }
2144 if (encoding == &mbfl_encoding_pass) {
2145 RETURN_STRINGL(arg_string, arg_string_len);
2146 }
2147
2148 /* analyze mime type */
2149 if (SG(sapi_headers).mimetype &&
2150 _php_mb_match_regex(
2151 MBSTRG(http_output_conv_mimetypes),
2152 SG(sapi_headers).mimetype,
2153 strlen(SG(sapi_headers).mimetype))) {
2154 if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
2155 mimetype = estrdup(SG(sapi_headers).mimetype);
2156 } else {
2157 mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
2158 }
2159 send_text_mimetype = 1;
2160 } else if (SG(sapi_headers).send_default_content_type) {
2161 mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
2162 }
2163
2164 /* if content-type is not yet set, set it and activate the converter */
2165 if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
2166 charset = encoding->mime_name;
2167 if (charset) {
2168 len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
2169 if (sapi_add_header(p, len, 0) != FAILURE) {
2170 SG(sapi_headers).send_default_content_type = 0;
2171 }
2172 }
2173 /* activate the converter */
2174 MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0);
2175 if (send_text_mimetype){
2176 efree(mimetype);
2177 }
2178 }
2179 }
2180
2181 /* just return if the converter is not activated. */
2182 if (MBSTRG(outconv) == NULL) {
2183 RETURN_STRINGL(arg_string, arg_string_len);
2184 }
2185
2186 /* flag */
2187 last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
2188 /* mode */
2189 mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
2190 mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
2191
2192 /* feed the string */
2193 mbfl_string_init(&string);
2194 /* these are not needed. convd has encoding info.
2195 string.no_language = MBSTRG(language);
2196 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2197 */
2198 string.val = (unsigned char *)arg_string;
2199 string.len = arg_string_len;
2200 mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2201 if (last_feed) {
2202 mbfl_buffer_converter_flush(MBSTRG(outconv));
2203 }
2204 /* get the converter output, and return it */
2205 mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2206 // TODO: avoid reallocation ???
2207 RETVAL_STRINGL((char *)result.val, result.len); /* the string is already strdup()'ed */
2208 efree(result.val);
2209
2210 /* delete the converter if it is the last feed. */
2211 if (last_feed) {
2212 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2213 mbfl_buffer_converter_delete(MBSTRG(outconv));
2214 MBSTRG(outconv) = NULL;
2215 }
2216 }
2217 /* }}} */
2218
2219 /* {{{ proto int mb_strlen(string str [, string encoding])
2220 Get character numbers of a string */
PHP_FUNCTION(mb_strlen)2221 PHP_FUNCTION(mb_strlen)
2222 {
2223 int n;
2224 mbfl_string string;
2225 char *enc_name = NULL;
2226 size_t enc_name_len, string_len;
2227
2228 mbfl_string_init(&string);
2229
2230 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string_len, &enc_name, &enc_name_len) == FAILURE) {
2231 return;
2232 }
2233
2234 if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
2235 php_error_docref(NULL, E_WARNING, "String overflows the max allowed length of %u", UINT_MAX);
2236 return;
2237 }
2238
2239 string.len = (uint32_t)string_len;
2240
2241 string.no_language = MBSTRG(language);
2242 if (enc_name == NULL) {
2243 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2244 } else {
2245 string.no_encoding = mbfl_name2no_encoding(enc_name);
2246 if (string.no_encoding == mbfl_no_encoding_invalid) {
2247 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2248 RETURN_FALSE;
2249 }
2250 }
2251
2252 n = mbfl_strlen(&string);
2253 if (n >= 0) {
2254 RETVAL_LONG(n);
2255 } else {
2256 RETVAL_FALSE;
2257 }
2258 }
2259 /* }}} */
2260
2261 /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2262 Find position of first occurrence of a string within another */
PHP_FUNCTION(mb_strpos)2263 PHP_FUNCTION(mb_strpos)
2264 {
2265 int n, reverse = 0;
2266 zend_long offset = 0;
2267 mbfl_string haystack, needle;
2268 char *enc_name = NULL;
2269 size_t enc_name_len, haystack_len, needle_len;
2270
2271 mbfl_string_init(&haystack);
2272 mbfl_string_init(&needle);
2273 haystack.no_language = MBSTRG(language);
2274 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2275 needle.no_language = MBSTRG(language);
2276 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2277
2278 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2279 return;
2280 }
2281
2282 if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2283 php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2284 return;
2285 } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2286 php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2287 return;
2288 }
2289
2290 haystack.len = (uint32_t)haystack_len;
2291 needle.len = (uint32_t)needle_len;
2292
2293 if (enc_name != NULL) {
2294 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2295 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2296 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2297 RETURN_FALSE;
2298 }
2299 }
2300
2301 if (offset < 0 || offset > mbfl_strlen(&haystack)) {
2302 php_error_docref(NULL, E_WARNING, "Offset not contained in string");
2303 RETURN_FALSE;
2304 }
2305 if (needle.len == 0) {
2306 php_error_docref(NULL, E_WARNING, "Empty delimiter");
2307 RETURN_FALSE;
2308 }
2309
2310 n = mbfl_strpos(&haystack, &needle, offset, reverse);
2311 if (n >= 0) {
2312 RETVAL_LONG(n);
2313 } else {
2314 switch (-n) {
2315 case 1:
2316 break;
2317 case 2:
2318 php_error_docref(NULL, E_WARNING, "Needle has not positive length");
2319 break;
2320 case 4:
2321 php_error_docref(NULL, E_WARNING, "Unknown encoding or conversion error");
2322 break;
2323 case 8:
2324 php_error_docref(NULL, E_NOTICE, "Argument is empty");
2325 break;
2326 default:
2327 php_error_docref(NULL, E_WARNING, "Unknown error in mb_strpos");
2328 break;
2329 }
2330 RETVAL_FALSE;
2331 }
2332 }
2333 /* }}} */
2334
2335 /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2336 Find position of last occurrence of a string within another */
PHP_FUNCTION(mb_strrpos)2337 PHP_FUNCTION(mb_strrpos)
2338 {
2339 int n;
2340 mbfl_string haystack, needle;
2341 char *enc_name = NULL;
2342 size_t enc_name_len, haystack_len, needle_len;
2343 zval *zoffset = NULL;
2344 long offset = 0, str_flg;
2345 char *enc_name2 = NULL;
2346 int enc_name_len2;
2347
2348 mbfl_string_init(&haystack);
2349 mbfl_string_init(&needle);
2350 haystack.no_language = MBSTRG(language);
2351 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2352 needle.no_language = MBSTRG(language);
2353 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2354
2355 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|zs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2356 return;
2357 }
2358
2359 if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2360 php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2361 return;
2362 } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2363 php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2364 return;
2365 }
2366
2367 haystack.len = (uint32_t)haystack_len;
2368 needle.len = (uint32_t)needle_len;
2369
2370 if (zoffset) {
2371 if (Z_TYPE_P(zoffset) == IS_STRING) {
2372 enc_name2 = Z_STRVAL_P(zoffset);
2373 enc_name_len2 = Z_STRLEN_P(zoffset);
2374 str_flg = 1;
2375
2376 if (enc_name2 != NULL) {
2377 switch (*enc_name2) {
2378 case '0':
2379 case '1':
2380 case '2':
2381 case '3':
2382 case '4':
2383 case '5':
2384 case '6':
2385 case '7':
2386 case '8':
2387 case '9':
2388 case ' ':
2389 case '-':
2390 case '.':
2391 break;
2392 default :
2393 str_flg = 0;
2394 break;
2395 }
2396 }
2397
2398 if (str_flg) {
2399 convert_to_long_ex(zoffset);
2400 offset = Z_LVAL_P(zoffset);
2401 } else {
2402 enc_name = enc_name2;
2403 enc_name_len = enc_name_len2;
2404 }
2405 } else {
2406 convert_to_long_ex(zoffset);
2407 offset = Z_LVAL_P(zoffset);
2408 }
2409 }
2410
2411 if (enc_name != NULL) {
2412 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2413 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2414 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2415 RETURN_FALSE;
2416 }
2417 }
2418
2419 if (haystack.len <= 0) {
2420 RETURN_FALSE;
2421 }
2422 if (needle.len <= 0) {
2423 RETURN_FALSE;
2424 }
2425
2426 {
2427 int haystack_char_len = mbfl_strlen(&haystack);
2428 if ((offset > 0 && offset > haystack_char_len) ||
2429 (offset < 0 && -offset > haystack_char_len)) {
2430 php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
2431 RETURN_FALSE;
2432 }
2433 }
2434
2435 n = mbfl_strpos(&haystack, &needle, offset, 1);
2436 if (n >= 0) {
2437 RETVAL_LONG(n);
2438 } else {
2439 RETVAL_FALSE;
2440 }
2441 }
2442 /* }}} */
2443
2444 /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2445 Finds position of first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stripos)2446 PHP_FUNCTION(mb_stripos)
2447 {
2448 int n = -1;
2449 zend_long offset = 0;
2450 mbfl_string haystack, needle;
2451 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2452 size_t from_encoding_len, haystack_len, needle_len;
2453
2454 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2455 return;
2456 }
2457
2458 if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2459 php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2460 return;
2461 } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2462 php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2463 return;
2464 }
2465
2466 haystack.len = (uint32_t)haystack_len;
2467 needle.len = (uint32_t)needle_len;
2468
2469 if (needle.len == 0) {
2470 php_error_docref(NULL, E_WARNING, "Empty delimiter");
2471 RETURN_FALSE;
2472 }
2473 n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2474
2475 if (n >= 0) {
2476 RETVAL_LONG(n);
2477 } else {
2478 RETVAL_FALSE;
2479 }
2480 }
2481 /* }}} */
2482
2483 /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2484 Finds position of last occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_strripos)2485 PHP_FUNCTION(mb_strripos)
2486 {
2487 int n = -1;
2488 zend_long offset = 0;
2489 mbfl_string haystack, needle;
2490 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2491 size_t from_encoding_len, haystack_len, needle_len;
2492
2493 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2494 return;
2495 }
2496
2497 if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2498 php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2499 return;
2500 } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2501 php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2502 return;
2503 }
2504
2505 haystack.len = (uint32_t)haystack_len;
2506 needle.len = (uint32_t)needle_len;
2507
2508 n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2509
2510 if (n >= 0) {
2511 RETVAL_LONG(n);
2512 } else {
2513 RETVAL_FALSE;
2514 }
2515 }
2516 /* }}} */
2517
2518 /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2519 Finds first occurrence of a string within another */
PHP_FUNCTION(mb_strstr)2520 PHP_FUNCTION(mb_strstr)
2521 {
2522 int n, len, mblen;
2523 mbfl_string haystack, needle, result, *ret = NULL;
2524 char *enc_name = NULL;
2525 size_t enc_name_len, haystack_len, needle_len;
2526 zend_bool part = 0;
2527
2528 mbfl_string_init(&haystack);
2529 mbfl_string_init(&needle);
2530 haystack.no_language = MBSTRG(language);
2531 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2532 needle.no_language = MBSTRG(language);
2533 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2534
2535 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &enc_name, &enc_name_len) == FAILURE) {
2536 return;
2537 }
2538
2539 if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2540 php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2541 return;
2542 } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2543 php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2544 return;
2545 }
2546
2547 haystack.len = (uint32_t)haystack_len;
2548 needle.len = (uint32_t)needle_len;
2549
2550 if (enc_name != NULL) {
2551 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2552 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2553 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2554 RETURN_FALSE;
2555 }
2556 }
2557
2558 if (needle.len <= 0) {
2559 php_error_docref(NULL, E_WARNING, "Empty delimiter");
2560 RETURN_FALSE;
2561 }
2562 n = mbfl_strpos(&haystack, &needle, 0, 0);
2563 if (n >= 0) {
2564 mblen = mbfl_strlen(&haystack);
2565 if (part) {
2566 ret = mbfl_substr(&haystack, &result, 0, n);
2567 if (ret != NULL) {
2568 // TODO: avoid reallocation ???
2569 RETVAL_STRINGL((char *)ret->val, ret->len);
2570 efree(ret->val);
2571 } else {
2572 RETVAL_FALSE;
2573 }
2574 } else {
2575 len = (mblen - n);
2576 ret = mbfl_substr(&haystack, &result, n, len);
2577 if (ret != NULL) {
2578 // TODO: avoid reallocation ???
2579 RETVAL_STRINGL((char *)ret->val, ret->len);
2580 efree(ret->val);
2581 } else {
2582 RETVAL_FALSE;
2583 }
2584 }
2585 } else {
2586 RETVAL_FALSE;
2587 }
2588 }
2589 /* }}} */
2590
2591 /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2592 Finds the last occurrence of a character in a string within another */
PHP_FUNCTION(mb_strrchr)2593 PHP_FUNCTION(mb_strrchr)
2594 {
2595 int n, len, mblen;
2596 mbfl_string haystack, needle, result, *ret = NULL;
2597 char *enc_name = NULL;
2598 size_t enc_name_len, haystack_len, needle_len;
2599 zend_bool part = 0;
2600
2601 mbfl_string_init(&haystack);
2602 mbfl_string_init(&needle);
2603 haystack.no_language = MBSTRG(language);
2604 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2605 needle.no_language = MBSTRG(language);
2606 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2607
2608 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &enc_name, &enc_name_len) == FAILURE) {
2609 return;
2610 }
2611
2612 if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2613 php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2614 return;
2615 } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2616 php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2617 return;
2618 }
2619
2620 haystack.len = (uint32_t)haystack_len;
2621 needle.len = (uint32_t)needle_len;
2622
2623 if (enc_name != NULL) {
2624 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2625 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2626 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2627 RETURN_FALSE;
2628 }
2629 }
2630
2631 if (haystack.len <= 0) {
2632 RETURN_FALSE;
2633 }
2634 if (needle.len <= 0) {
2635 RETURN_FALSE;
2636 }
2637 n = mbfl_strpos(&haystack, &needle, 0, 1);
2638 if (n >= 0) {
2639 mblen = mbfl_strlen(&haystack);
2640 if (part) {
2641 ret = mbfl_substr(&haystack, &result, 0, n);
2642 if (ret != NULL) {
2643 // TODO: avoid reallocation ???
2644 RETVAL_STRINGL((char *)ret->val, ret->len);
2645 efree(ret->val);
2646 } else {
2647 RETVAL_FALSE;
2648 }
2649 } else {
2650 len = (mblen - n);
2651 ret = mbfl_substr(&haystack, &result, n, len);
2652 if (ret != NULL) {
2653 // TODO: avoid reallocation ???
2654 RETVAL_STRINGL((char *)ret->val, ret->len);
2655 efree(ret->val);
2656 } else {
2657 RETVAL_FALSE;
2658 }
2659 }
2660 } else {
2661 RETVAL_FALSE;
2662 }
2663 }
2664 /* }}} */
2665
2666 /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2667 Finds first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stristr)2668 PHP_FUNCTION(mb_stristr)
2669 {
2670 zend_bool part = 0;
2671 size_t from_encoding_len, len, mblen, haystack_len, needle_len;
2672 int n;
2673 mbfl_string haystack, needle, result, *ret = NULL;
2674 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2675 mbfl_string_init(&haystack);
2676 mbfl_string_init(&needle);
2677 haystack.no_language = MBSTRG(language);
2678 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2679 needle.no_language = MBSTRG(language);
2680 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2681
2682
2683 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2684 return;
2685 }
2686
2687 if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2688 php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2689 return;
2690 } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2691 php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2692 return;
2693 }
2694
2695 haystack.len = (uint32_t)haystack_len;
2696 needle.len = (uint32_t)needle_len;
2697
2698 if (!needle.len) {
2699 php_error_docref(NULL, E_WARNING, "Empty delimiter");
2700 RETURN_FALSE;
2701 }
2702
2703 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2704 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2705 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2706 RETURN_FALSE;
2707 }
2708
2709 n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2710
2711 if (n <0) {
2712 RETURN_FALSE;
2713 }
2714
2715 mblen = mbfl_strlen(&haystack);
2716
2717 if (part) {
2718 ret = mbfl_substr(&haystack, &result, 0, n);
2719 if (ret != NULL) {
2720 // TODO: avoid reallocation ???
2721 RETVAL_STRINGL((char *)ret->val, ret->len);
2722 efree(ret->val);
2723 } else {
2724 RETVAL_FALSE;
2725 }
2726 } else {
2727 len = (mblen - n);
2728 ret = mbfl_substr(&haystack, &result, n, len);
2729 if (ret != NULL) {
2730 // TODO: avoid reallocaton ???
2731 RETVAL_STRINGL((char *)ret->val, ret->len);
2732 efree(ret->val);
2733 } else {
2734 RETVAL_FALSE;
2735 }
2736 }
2737 }
2738 /* }}} */
2739
2740 /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2741 Finds the last occurrence of a character in a string within another, case insensitive */
PHP_FUNCTION(mb_strrichr)2742 PHP_FUNCTION(mb_strrichr)
2743 {
2744 zend_bool part = 0;
2745 int n, len, mblen;
2746 size_t from_encoding_len, haystack_len, needle_len;
2747 mbfl_string haystack, needle, result, *ret = NULL;
2748 const char *from_encoding = MBSTRG(current_internal_encoding)->name;
2749 mbfl_string_init(&haystack);
2750 mbfl_string_init(&needle);
2751 haystack.no_language = MBSTRG(language);
2752 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2753 needle.no_language = MBSTRG(language);
2754 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2755
2756
2757 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2758 return;
2759 }
2760
2761 if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2762 php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2763 return;
2764 } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2765 php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2766 return;
2767 }
2768
2769 haystack.len = (uint32_t)haystack_len;
2770 needle.len = (uint32_t)needle_len;
2771
2772 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2773 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2774 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2775 RETURN_FALSE;
2776 }
2777
2778 n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2779
2780 if (n <0) {
2781 RETURN_FALSE;
2782 }
2783
2784 mblen = mbfl_strlen(&haystack);
2785
2786 if (part) {
2787 ret = mbfl_substr(&haystack, &result, 0, n);
2788 if (ret != NULL) {
2789 // TODO: avoid reallocation ???
2790 RETVAL_STRINGL((char *)ret->val, ret->len);
2791 efree(ret->val);
2792 } else {
2793 RETVAL_FALSE;
2794 }
2795 } else {
2796 len = (mblen - n);
2797 ret = mbfl_substr(&haystack, &result, n, len);
2798 if (ret != NULL) {
2799 // TODO: avoid reallocation ???
2800 RETVAL_STRINGL((char *)ret->val, ret->len);
2801 efree(ret->val);
2802 } else {
2803 RETVAL_FALSE;
2804 }
2805 }
2806 }
2807 /* }}} */
2808
2809 /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2810 Count the number of substring occurrences */
PHP_FUNCTION(mb_substr_count)2811 PHP_FUNCTION(mb_substr_count)
2812 {
2813 int n;
2814 mbfl_string haystack, needle;
2815 char *enc_name = NULL;
2816 size_t enc_name_len, haystack_len, needle_len;
2817
2818 mbfl_string_init(&haystack);
2819 mbfl_string_init(&needle);
2820 haystack.no_language = MBSTRG(language);
2821 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2822 needle.no_language = MBSTRG(language);
2823 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2824
2825 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &enc_name, &enc_name_len) == FAILURE) {
2826 return;
2827 }
2828
2829 if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2830 php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2831 return;
2832 } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2833 php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2834 return;
2835 }
2836
2837 haystack.len = (uint32_t)haystack_len;
2838 needle.len = (uint32_t)needle_len;
2839
2840 if (enc_name != NULL) {
2841 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2842 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2843 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2844 RETURN_FALSE;
2845 }
2846 }
2847
2848 if (needle.len <= 0) {
2849 php_error_docref(NULL, E_WARNING, "Empty substring");
2850 RETURN_FALSE;
2851 }
2852
2853 n = mbfl_substr_count(&haystack, &needle);
2854 if (n >= 0) {
2855 RETVAL_LONG(n);
2856 } else {
2857 RETVAL_FALSE;
2858 }
2859 }
2860 /* }}} */
2861
2862 /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2863 Returns part of a string */
PHP_FUNCTION(mb_substr)2864 PHP_FUNCTION(mb_substr)
2865 {
2866 char *str, *encoding = NULL;
2867 zend_long from, len;
2868 int mblen;
2869 size_t str_len, encoding_len;
2870 zend_bool len_is_null = 1;
2871 mbfl_string string, result, *ret;
2872
2873 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", &str, &str_len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) {
2874 return;
2875 }
2876
2877 mbfl_string_init(&string);
2878 string.no_language = MBSTRG(language);
2879 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2880
2881 if (encoding) {
2882 string.no_encoding = mbfl_name2no_encoding(encoding);
2883 if (string.no_encoding == mbfl_no_encoding_invalid) {
2884 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
2885 RETURN_FALSE;
2886 }
2887 }
2888
2889 string.val = (unsigned char *)str;
2890 string.len = str_len;
2891
2892 if (len_is_null) {
2893 len = str_len;
2894 }
2895
2896 /* measures length */
2897 mblen = 0;
2898 if (from < 0 || len < 0) {
2899 mblen = mbfl_strlen(&string);
2900 }
2901
2902 /* if "from" position is negative, count start position from the end
2903 * of the string
2904 */
2905 if (from < 0) {
2906 from = mblen + from;
2907 if (from < 0) {
2908 from = 0;
2909 }
2910 }
2911
2912 /* if "length" position is negative, set it to the length
2913 * needed to stop that many chars from the end of the string
2914 */
2915 if (len < 0) {
2916 len = (mblen - from) + len;
2917 if (len < 0) {
2918 len = 0;
2919 }
2920 }
2921
2922 if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2923 && (from >= mbfl_strlen(&string))) {
2924 RETURN_FALSE;
2925 }
2926
2927 if (from > INT_MAX) {
2928 from = INT_MAX;
2929 }
2930 if (len > INT_MAX) {
2931 len = INT_MAX;
2932 }
2933
2934 ret = mbfl_substr(&string, &result, from, len);
2935 if (NULL == ret) {
2936 RETURN_FALSE;
2937 }
2938
2939 // TODO: avoid reallocation ???
2940 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2941 efree(ret->val);
2942 }
2943 /* }}} */
2944
2945 /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
2946 Returns part of a string */
PHP_FUNCTION(mb_strcut)2947 PHP_FUNCTION(mb_strcut)
2948 {
2949 char *encoding = NULL;
2950 zend_long from, len;
2951 size_t encoding_len, string_len;
2952 zend_bool len_is_null = 1;
2953 mbfl_string string, result, *ret;
2954
2955 mbfl_string_init(&string);
2956 string.no_language = MBSTRG(language);
2957 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2958
2959 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", (char **)&string.val, &string_len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) {
2960 return;
2961 }
2962
2963 if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
2964 php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
2965 return;
2966 }
2967
2968 string.len = (uint32_t)string_len;
2969
2970 if (encoding) {
2971 string.no_encoding = mbfl_name2no_encoding(encoding);
2972 if (string.no_encoding == mbfl_no_encoding_invalid) {
2973 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
2974 RETURN_FALSE;
2975 }
2976 }
2977
2978 if (len_is_null) {
2979 len = string.len;
2980 }
2981
2982 /* if "from" position is negative, count start position from the end
2983 * of the string
2984 */
2985 if (from < 0) {
2986 from = string.len + from;
2987 if (from < 0) {
2988 from = 0;
2989 }
2990 }
2991
2992 /* if "length" position is negative, set it to the length
2993 * needed to stop that many chars from the end of the string
2994 */
2995 if (len < 0) {
2996 len = (string.len - from) + len;
2997 if (len < 0) {
2998 len = 0;
2999 }
3000 }
3001
3002 if ((unsigned int)from > string.len) {
3003 RETURN_FALSE;
3004 }
3005
3006 ret = mbfl_strcut(&string, &result, from, len);
3007 if (ret == NULL) {
3008 RETURN_FALSE;
3009 }
3010
3011 // TODO: avoid reallocation ???
3012 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3013 efree(ret->val);
3014 }
3015 /* }}} */
3016
3017 /* {{{ proto int mb_strwidth(string str [, string encoding])
3018 Gets terminal width of a string */
PHP_FUNCTION(mb_strwidth)3019 PHP_FUNCTION(mb_strwidth)
3020 {
3021 int n;
3022 mbfl_string string;
3023 char *enc_name = NULL;
3024 size_t enc_name_len, string_len;
3025
3026 mbfl_string_init(&string);
3027
3028 string.no_language = MBSTRG(language);
3029 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3030
3031 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string_len, &enc_name, &enc_name_len) == FAILURE) {
3032 return;
3033 }
3034
3035 if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
3036 php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
3037 return;
3038 }
3039
3040 string.len = (uint32_t)string_len;
3041
3042 if (enc_name != NULL) {
3043 string.no_encoding = mbfl_name2no_encoding(enc_name);
3044 if (string.no_encoding == mbfl_no_encoding_invalid) {
3045 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
3046 RETURN_FALSE;
3047 }
3048 }
3049
3050 n = mbfl_strwidth(&string);
3051 if (n >= 0) {
3052 RETVAL_LONG(n);
3053 } else {
3054 RETVAL_FALSE;
3055 }
3056 }
3057 /* }}} */
3058
3059 /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
3060 Trim the string in terminal width */
PHP_FUNCTION(mb_strimwidth)3061 PHP_FUNCTION(mb_strimwidth)
3062 {
3063 char *str, *trimmarker = NULL, *encoding = NULL;
3064 zend_long from, width;
3065 size_t str_len, trimmarker_len, encoding_len;
3066 mbfl_string string, result, marker, *ret;
3067
3068 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
3069 return;
3070 }
3071
3072 mbfl_string_init(&string);
3073 mbfl_string_init(&marker);
3074 string.no_language = MBSTRG(language);
3075 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3076 marker.no_language = MBSTRG(language);
3077 marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3078 marker.val = NULL;
3079 marker.len = 0;
3080
3081 if (encoding) {
3082 string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
3083 if (string.no_encoding == mbfl_no_encoding_invalid) {
3084 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
3085 RETURN_FALSE;
3086 }
3087 }
3088
3089 string.val = (unsigned char *)str;
3090 string.len = str_len;
3091
3092 if (from < 0 || (size_t)from > str_len) {
3093 php_error_docref(NULL, E_WARNING, "Start position is out of range");
3094 RETURN_FALSE;
3095 }
3096
3097 if (width < 0) {
3098 php_error_docref(NULL, E_WARNING, "Width is negative value");
3099 RETURN_FALSE;
3100 }
3101
3102 if (trimmarker) {
3103 marker.val = (unsigned char *)trimmarker;
3104 marker.len = trimmarker_len;
3105 }
3106
3107 ret = mbfl_strimwidth(&string, &marker, &result, from, width);
3108
3109 if (ret == NULL) {
3110 RETURN_FALSE;
3111 }
3112 // TODO: avoid reallocation ???
3113 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3114 efree(ret->val);
3115 }
3116 /* }}} */
3117
3118 /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
php_mb_convert_encoding(const char * input,size_t length,const char * _to_encoding,const char * _from_encodings,size_t * output_len)3119 MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
3120 {
3121 mbfl_string string, result, *ret;
3122 const mbfl_encoding *from_encoding, *to_encoding;
3123 mbfl_buffer_converter *convd;
3124 size_t size;
3125 const mbfl_encoding **list;
3126 char *output=NULL;
3127
3128 if (output_len) {
3129 *output_len = 0;
3130 }
3131 if (!input) {
3132 return NULL;
3133 }
3134 /* new encoding */
3135 if (_to_encoding && strlen(_to_encoding)) {
3136 to_encoding = mbfl_name2encoding(_to_encoding);
3137 if (!to_encoding) {
3138 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
3139 return NULL;
3140 }
3141 } else {
3142 to_encoding = MBSTRG(current_internal_encoding);
3143 }
3144
3145 /* initialize string */
3146 mbfl_string_init(&string);
3147 mbfl_string_init(&result);
3148 from_encoding = MBSTRG(current_internal_encoding);
3149 string.no_encoding = from_encoding->no_encoding;
3150 string.no_language = MBSTRG(language);
3151 string.val = (unsigned char *)input;
3152 string.len = length;
3153
3154 /* pre-conversion encoding */
3155 if (_from_encodings) {
3156 list = NULL;
3157 size = 0;
3158 php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0);
3159 if (size == 1) {
3160 from_encoding = *list;
3161 string.no_encoding = from_encoding->no_encoding;
3162 } else if (size > 1) {
3163 /* auto detect */
3164 from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection));
3165 if (from_encoding) {
3166 string.no_encoding = from_encoding->no_encoding;
3167 } else {
3168 php_error_docref(NULL, E_WARNING, "Unable to detect character encoding");
3169 from_encoding = &mbfl_encoding_pass;
3170 to_encoding = from_encoding;
3171 string.no_encoding = from_encoding->no_encoding;
3172 }
3173 } else {
3174 php_error_docref(NULL, E_WARNING, "Illegal character encoding specified");
3175 }
3176 if (list != NULL) {
3177 efree((void *)list);
3178 }
3179 }
3180
3181 /* initialize converter */
3182 convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len);
3183 if (convd == NULL) {
3184 php_error_docref(NULL, E_WARNING, "Unable to create character encoding converter");
3185 return NULL;
3186 }
3187 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3188 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3189
3190 /* do it */
3191 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3192 if (ret) {
3193 if (output_len) {
3194 *output_len = ret->len;
3195 }
3196 output = (char *)ret->val;
3197 }
3198
3199 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3200 mbfl_buffer_converter_delete(convd);
3201 return output;
3202 }
3203 /* }}} */
3204
3205 /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
3206 Returns converted string in desired encoding */
PHP_FUNCTION(mb_convert_encoding)3207 PHP_FUNCTION(mb_convert_encoding)
3208 {
3209 char *arg_str, *arg_new;
3210 size_t str_len, new_len;
3211 zval *arg_old = NULL;
3212 size_t size, l, n;
3213 char *_from_encodings = NULL, *ret, *s_free = NULL;
3214
3215 zval *hash_entry;
3216 HashTable *target_hash;
3217
3218 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
3219 return;
3220 }
3221
3222 if (arg_old) {
3223 switch (Z_TYPE_P(arg_old)) {
3224 case IS_ARRAY:
3225 target_hash = Z_ARRVAL_P(arg_old);
3226 _from_encodings = NULL;
3227
3228 ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
3229
3230 convert_to_string_ex(hash_entry);
3231
3232 if ( _from_encodings) {
3233 l = strlen(_from_encodings);
3234 n = strlen(Z_STRVAL_P(hash_entry));
3235 _from_encodings = erealloc(_from_encodings, l+n+2);
3236 memcpy(_from_encodings + l, ",", 1);
3237 memcpy(_from_encodings + l + 1, Z_STRVAL_P(hash_entry), Z_STRLEN_P(hash_entry) + 1);
3238 } else {
3239 _from_encodings = estrdup(Z_STRVAL_P(hash_entry));
3240 }
3241 } ZEND_HASH_FOREACH_END();
3242
3243 if (_from_encodings != NULL && !strlen(_from_encodings)) {
3244 efree(_from_encodings);
3245 _from_encodings = NULL;
3246 }
3247 s_free = _from_encodings;
3248 break;
3249 default:
3250 convert_to_string(arg_old);
3251 _from_encodings = Z_STRVAL_P(arg_old);
3252 break;
3253 }
3254 }
3255
3256 /* new encoding */
3257 ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size);
3258 if (ret != NULL) {
3259 // TODO: avoid reallocation ???
3260 RETVAL_STRINGL(ret, size); /* the string is already strdup()'ed */
3261 efree(ret);
3262 } else {
3263 RETVAL_FALSE;
3264 }
3265
3266 if ( s_free) {
3267 efree(s_free);
3268 }
3269 }
3270 /* }}} */
3271
3272 /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
3273 Returns a case-folded version of sourcestring */
PHP_FUNCTION(mb_convert_case)3274 PHP_FUNCTION(mb_convert_case)
3275 {
3276 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3277 char *str;
3278 size_t str_len, from_encoding_len;
3279 zend_long case_mode = 0;
3280 char *newstr;
3281 size_t ret_len;
3282
3283 RETVAL_FALSE;
3284 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|s!", &str, &str_len,
3285 &case_mode, &from_encoding, &from_encoding_len) == FAILURE) {
3286 return;
3287 }
3288
3289 newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding);
3290
3291 if (newstr) {
3292 // TODO: avoid reallocation ???
3293 RETVAL_STRINGL(newstr, ret_len);
3294 efree(newstr);
3295 }
3296 }
3297 /* }}} */
3298
3299 /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
3300 * Returns a uppercased version of sourcestring
3301 */
PHP_FUNCTION(mb_strtoupper)3302 PHP_FUNCTION(mb_strtoupper)
3303 {
3304 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3305 char *str;
3306 size_t str_len, from_encoding_len;
3307 char *newstr;
3308 size_t ret_len;
3309
3310 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len,
3311 &from_encoding, &from_encoding_len) == FAILURE) {
3312 return;
3313 }
3314 newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding);
3315
3316 if (newstr) {
3317 // TODO: avoid reallocation ???
3318 RETVAL_STRINGL(newstr, ret_len);
3319 efree(newstr);
3320 return;
3321 }
3322 RETURN_FALSE;
3323 }
3324 /* }}} */
3325
3326 /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
3327 * Returns a lowercased version of sourcestring
3328 */
PHP_FUNCTION(mb_strtolower)3329 PHP_FUNCTION(mb_strtolower)
3330 {
3331 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3332 char *str;
3333 size_t str_len, from_encoding_len;
3334 char *newstr;
3335 size_t ret_len;
3336
3337 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len,
3338 &from_encoding, &from_encoding_len) == FAILURE) {
3339 return;
3340 }
3341 newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding);
3342
3343 if (newstr) {
3344 // TODO: avoid reallocation ???
3345 RETVAL_STRINGL(newstr, ret_len);
3346 efree(newstr);
3347 return;
3348 }
3349 RETURN_FALSE;
3350 }
3351 /* }}} */
3352
3353 /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3354 Encodings of the given string is returned (as a string) */
PHP_FUNCTION(mb_detect_encoding)3355 PHP_FUNCTION(mb_detect_encoding)
3356 {
3357 char *str;
3358 size_t str_len;
3359 zend_bool strict=0;
3360 zval *encoding_list = NULL;
3361
3362 mbfl_string string;
3363 const mbfl_encoding *ret;
3364 const mbfl_encoding **elist, **list;
3365 size_t size;
3366
3367 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z!b", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3368 return;
3369 }
3370
3371 /* make encoding list */
3372 list = NULL;
3373 size = 0;
3374 if (encoding_list) {
3375 switch (Z_TYPE_P(encoding_list)) {
3376 case IS_ARRAY:
3377 if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0)) {
3378 if (list) {
3379 efree(list);
3380 list = NULL;
3381 size = 0;
3382 }
3383 }
3384 break;
3385 default:
3386 convert_to_string(encoding_list);
3387 if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0)) {
3388 if (list) {
3389 efree(list);
3390 list = NULL;
3391 size = 0;
3392 }
3393 }
3394 break;
3395 }
3396 if (size <= 0) {
3397 php_error_docref(NULL, E_WARNING, "Illegal argument");
3398 }
3399 }
3400
3401 if (ZEND_NUM_ARGS() < 3) {
3402 strict = (zend_bool)MBSTRG(strict_detection);
3403 }
3404
3405 if (size > 0 && list != NULL) {
3406 elist = list;
3407 } else {
3408 elist = MBSTRG(current_detect_order_list);
3409 size = MBSTRG(current_detect_order_list_size);
3410 }
3411
3412 mbfl_string_init(&string);
3413 string.no_language = MBSTRG(language);
3414 string.val = (unsigned char *)str;
3415 string.len = str_len;
3416 ret = mbfl_identify_encoding2(&string, elist, size, strict);
3417
3418 if (list != NULL) {
3419 efree((void *)list);
3420 }
3421
3422 if (ret == NULL) {
3423 RETURN_FALSE;
3424 }
3425
3426 RETVAL_STRING((char *)ret->name);
3427 }
3428 /* }}} */
3429
3430 /* {{{ proto mixed mb_list_encodings()
3431 Returns an array of all supported entity encodings */
PHP_FUNCTION(mb_list_encodings)3432 PHP_FUNCTION(mb_list_encodings)
3433 {
3434 const mbfl_encoding **encodings;
3435 const mbfl_encoding *encoding;
3436 int i;
3437
3438 array_init(return_value);
3439 i = 0;
3440 encodings = mbfl_get_supported_encodings();
3441 while ((encoding = encodings[i++]) != NULL) {
3442 add_next_index_string(return_value, (char *) encoding->name);
3443 }
3444 }
3445 /* }}} */
3446
3447 /* {{{ proto array mb_encoding_aliases(string encoding)
3448 Returns an array of the aliases of a given encoding name */
PHP_FUNCTION(mb_encoding_aliases)3449 PHP_FUNCTION(mb_encoding_aliases)
3450 {
3451 const mbfl_encoding *encoding;
3452 char *name = NULL;
3453 size_t name_len;
3454
3455 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
3456 return;
3457 }
3458
3459 encoding = mbfl_name2encoding(name);
3460 if (!encoding) {
3461 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
3462 RETURN_FALSE;
3463 }
3464
3465 array_init(return_value);
3466 if (encoding->aliases != NULL) {
3467 const char **alias;
3468 for (alias = *encoding->aliases; *alias; ++alias) {
3469 add_next_index_string(return_value, (char *)*alias);
3470 }
3471 }
3472 }
3473 /* }}} */
3474
3475 /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3476 Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
PHP_FUNCTION(mb_encode_mimeheader)3477 PHP_FUNCTION(mb_encode_mimeheader)
3478 {
3479 enum mbfl_no_encoding charset, transenc;
3480 mbfl_string string, result, *ret;
3481 char *charset_name = NULL;
3482 size_t charset_name_len;
3483 char *trans_enc_name = NULL;
3484 size_t trans_enc_name_len;
3485 char *linefeed = "\r\n";
3486 size_t linefeed_len, string_len;
3487 zend_long indent = 0;
3488
3489 mbfl_string_init(&string);
3490 string.no_language = MBSTRG(language);
3491 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3492
3493 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|sssl", (char **)&string.val, &string_len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3494 return;
3495 }
3496
3497 if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
3498 php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
3499 return;
3500 }
3501
3502 string.len = (uint32_t)string_len;
3503
3504 charset = mbfl_no_encoding_pass;
3505 transenc = mbfl_no_encoding_base64;
3506
3507 if (charset_name != NULL) {
3508 charset = mbfl_name2no_encoding(charset_name);
3509 if (charset == mbfl_no_encoding_invalid) {
3510 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3511 RETURN_FALSE;
3512 }
3513 } else {
3514 const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3515 if (lang != NULL) {
3516 charset = lang->mail_charset;
3517 transenc = lang->mail_header_encoding;
3518 }
3519 }
3520
3521 if (trans_enc_name != NULL) {
3522 if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3523 transenc = mbfl_no_encoding_base64;
3524 } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3525 transenc = mbfl_no_encoding_qprint;
3526 }
3527 }
3528
3529 mbfl_string_init(&result);
3530 ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3531 if (ret != NULL) {
3532 // TODO: avoid reallocation ???
3533 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3534 efree(ret->val);
3535 } else {
3536 RETVAL_FALSE;
3537 }
3538 }
3539 /* }}} */
3540
3541 /* {{{ proto string mb_decode_mimeheader(string string)
3542 Decodes the MIME "encoded-word" in the string */
PHP_FUNCTION(mb_decode_mimeheader)3543 PHP_FUNCTION(mb_decode_mimeheader)
3544 {
3545 mbfl_string string, result, *ret;
3546 size_t string_len;
3547
3548 mbfl_string_init(&string);
3549 string.no_language = MBSTRG(language);
3550 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3551
3552 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", (char **)&string.val, &string_len) == FAILURE) {
3553 return;
3554 }
3555
3556 if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
3557 php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
3558 return;
3559 }
3560
3561 string.len = (uint32_t)string_len;
3562
3563 mbfl_string_init(&result);
3564 ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding);
3565 if (ret != NULL) {
3566 // TODO: avoid reallocation ???
3567 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3568 efree(ret->val);
3569 } else {
3570 RETVAL_FALSE;
3571 }
3572 }
3573 /* }}} */
3574
3575 /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3576 Conversion between full-width character and half-width character (Japanese) */
PHP_FUNCTION(mb_convert_kana)3577 PHP_FUNCTION(mb_convert_kana)
3578 {
3579 int opt, i;
3580 mbfl_string string, result, *ret;
3581 char *optstr = NULL;
3582 size_t optstr_len;
3583 char *encname = NULL;
3584 size_t encname_len, string_len;
3585
3586 mbfl_string_init(&string);
3587 string.no_language = MBSTRG(language);
3588 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3589
3590 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|ss", (char **)&string.val, &string_len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
3591 return;
3592 }
3593
3594 if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
3595 php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
3596 return;
3597 }
3598
3599 string.len = (uint32_t)string_len;
3600
3601 /* option */
3602 if (optstr != NULL) {
3603 char *p = optstr;
3604 int n = optstr_len;
3605 i = 0;
3606 opt = 0;
3607 while (i < n) {
3608 i++;
3609 switch (*p++) {
3610 case 'A':
3611 opt |= 0x1;
3612 break;
3613 case 'a':
3614 opt |= 0x10;
3615 break;
3616 case 'R':
3617 opt |= 0x2;
3618 break;
3619 case 'r':
3620 opt |= 0x20;
3621 break;
3622 case 'N':
3623 opt |= 0x4;
3624 break;
3625 case 'n':
3626 opt |= 0x40;
3627 break;
3628 case 'S':
3629 opt |= 0x8;
3630 break;
3631 case 's':
3632 opt |= 0x80;
3633 break;
3634 case 'K':
3635 opt |= 0x100;
3636 break;
3637 case 'k':
3638 opt |= 0x1000;
3639 break;
3640 case 'H':
3641 opt |= 0x200;
3642 break;
3643 case 'h':
3644 opt |= 0x2000;
3645 break;
3646 case 'V':
3647 opt |= 0x800;
3648 break;
3649 case 'C':
3650 opt |= 0x10000;
3651 break;
3652 case 'c':
3653 opt |= 0x20000;
3654 break;
3655 case 'M':
3656 opt |= 0x100000;
3657 break;
3658 case 'm':
3659 opt |= 0x200000;
3660 break;
3661 }
3662 }
3663 } else {
3664 opt = 0x900;
3665 }
3666
3667 /* encoding */
3668 if (encname != NULL) {
3669 string.no_encoding = mbfl_name2no_encoding(encname);
3670 if (string.no_encoding == mbfl_no_encoding_invalid) {
3671 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encname);
3672 RETURN_FALSE;
3673 }
3674 }
3675
3676 ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3677 if (ret != NULL) {
3678 // TODO: avoid reallocation ???
3679 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3680 efree(ret->val);
3681 } else {
3682 RETVAL_FALSE;
3683 }
3684 }
3685 /* }}} */
3686
3687 #define PHP_MBSTR_STACK_BLOCK_SIZE 32
3688
3689 /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3690 Converts the string resource in variables to desired encoding */
PHP_FUNCTION(mb_convert_variables)3691 PHP_FUNCTION(mb_convert_variables)
3692 {
3693 zval *args, *stack, *var, *hash_entry, *hash_entry_ptr, *zfrom_enc;
3694 HashTable *target_hash;
3695 mbfl_string string, result, *ret;
3696 const mbfl_encoding *from_encoding, *to_encoding;
3697 mbfl_encoding_detector *identd;
3698 mbfl_buffer_converter *convd;
3699 int n, argc, stack_level, stack_max;
3700 size_t to_enc_len;
3701 size_t elistsz;
3702 const mbfl_encoding **elist;
3703 char *to_enc;
3704 void *ptmp;
3705 int recursion_error = 0;
3706
3707 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3708 return;
3709 }
3710
3711 /* new encoding */
3712 to_encoding = mbfl_name2encoding(to_enc);
3713 if (!to_encoding) {
3714 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3715 RETURN_FALSE;
3716 }
3717
3718 /* initialize string */
3719 mbfl_string_init(&string);
3720 mbfl_string_init(&result);
3721 from_encoding = MBSTRG(current_internal_encoding);
3722 string.no_encoding = from_encoding->no_encoding;
3723 string.no_language = MBSTRG(language);
3724
3725 /* pre-conversion encoding */
3726 elist = NULL;
3727 elistsz = 0;
3728 switch (Z_TYPE_P(zfrom_enc)) {
3729 case IS_ARRAY:
3730 php_mb_parse_encoding_array(zfrom_enc, &elist, &elistsz, 0);
3731 break;
3732 default:
3733 convert_to_string_ex(zfrom_enc);
3734 php_mb_parse_encoding_list(Z_STRVAL_P(zfrom_enc), Z_STRLEN_P(zfrom_enc), &elist, &elistsz, 0);
3735 break;
3736 }
3737
3738 if (elistsz <= 0) {
3739 from_encoding = &mbfl_encoding_pass;
3740 } else if (elistsz == 1) {
3741 from_encoding = *elist;
3742 } else {
3743 /* auto detect */
3744 from_encoding = NULL;
3745 stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3746 stack = (zval *)safe_emalloc(stack_max, sizeof(zval), 0);
3747 stack_level = 0;
3748 identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection));
3749 if (identd != NULL) {
3750 n = 0;
3751 while (n < argc || stack_level > 0) {
3752 if (stack_level <= 0) {
3753 var = &args[n++];
3754 ZVAL_DEREF(var);
3755 SEPARATE_ZVAL_NOREF(var);
3756 if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3757 target_hash = HASH_OF(var);
3758 if (target_hash != NULL) {
3759 zend_hash_internal_pointer_reset(target_hash);
3760 }
3761 }
3762 } else {
3763 stack_level--;
3764 var = &stack[stack_level];
3765 }
3766 if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3767 target_hash = HASH_OF(var);
3768 if (target_hash != NULL) {
3769 while ((hash_entry = zend_hash_get_current_data(target_hash)) != NULL) {
3770 if (!Z_IMMUTABLE_P(var)) {
3771 if (++target_hash->u.v.nApplyCount > 1) {
3772 --target_hash->u.v.nApplyCount;
3773 recursion_error = 1;
3774 goto detect_end;
3775 }
3776 }
3777 zend_hash_move_forward(target_hash);
3778 if (Z_TYPE_P(hash_entry) == IS_INDIRECT) {
3779 hash_entry = Z_INDIRECT_P(hash_entry);
3780 }
3781 ZVAL_DEREF(hash_entry);
3782 if (Z_TYPE_P(hash_entry) == IS_ARRAY || Z_TYPE_P(hash_entry) == IS_OBJECT) {
3783 if (stack_level >= stack_max) {
3784 stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3785 ptmp = erealloc(stack, sizeof(zval) * stack_max);
3786 stack = (zval *)ptmp;
3787 }
3788 ZVAL_COPY_VALUE(&stack[stack_level], var);
3789 stack_level++;
3790 var = hash_entry;
3791 target_hash = HASH_OF(var);
3792 if (target_hash != NULL) {
3793 zend_hash_internal_pointer_reset(target_hash);
3794 continue;
3795 }
3796 } else if (Z_TYPE_P(hash_entry) == IS_STRING) {
3797 string.val = (unsigned char *)Z_STRVAL_P(hash_entry);
3798 string.len = Z_STRLEN_P(hash_entry);
3799 if (mbfl_encoding_detector_feed(identd, &string)) {
3800 goto detect_end; /* complete detecting */
3801 }
3802 }
3803 }
3804 }
3805 } else if (Z_TYPE_P(var) == IS_STRING) {
3806 string.val = (unsigned char *)Z_STRVAL_P(var);
3807 string.len = Z_STRLEN_P(var);
3808 if (mbfl_encoding_detector_feed(identd, &string)) {
3809 goto detect_end; /* complete detecting */
3810 }
3811 }
3812 }
3813 detect_end:
3814 from_encoding = mbfl_encoding_detector_judge2(identd);
3815 mbfl_encoding_detector_delete(identd);
3816 }
3817 if (recursion_error) {
3818 while(stack_level-- && (var = &stack[stack_level])) {
3819 if (!Z_IMMUTABLE_P(var)) {
3820 if (HASH_OF(var)->u.v.nApplyCount > 1) {
3821 HASH_OF(var)->u.v.nApplyCount--;
3822 }
3823 }
3824 }
3825 efree(stack);
3826 if (elist != NULL) {
3827 efree((void *)elist);
3828 }
3829 php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
3830 RETURN_FALSE;
3831 }
3832 efree(stack);
3833
3834 if (!from_encoding) {
3835 php_error_docref(NULL, E_WARNING, "Unable to detect encoding");
3836 from_encoding = &mbfl_encoding_pass;
3837 }
3838 }
3839 if (elist != NULL) {
3840 efree((void *)elist);
3841 }
3842 /* create converter */
3843 convd = NULL;
3844 if (from_encoding != &mbfl_encoding_pass) {
3845 convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0);
3846 if (convd == NULL) {
3847 php_error_docref(NULL, E_WARNING, "Unable to create converter");
3848 RETURN_FALSE;
3849 }
3850 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3851 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3852 }
3853
3854 /* convert */
3855 if (convd != NULL) {
3856 stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3857 stack = (zval*)safe_emalloc(stack_max, sizeof(zval), 0);
3858 stack_level = 0;
3859 n = 0;
3860 while (n < argc || stack_level > 0) {
3861 if (stack_level <= 0) {
3862 var = &args[n++];
3863 ZVAL_DEREF(var);
3864 SEPARATE_ZVAL_NOREF(var);
3865 if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3866 target_hash = HASH_OF(var);
3867 if (target_hash != NULL) {
3868 zend_hash_internal_pointer_reset(target_hash);
3869 }
3870 }
3871 } else {
3872 stack_level--;
3873 var = &stack[stack_level];
3874 }
3875 if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3876 target_hash = HASH_OF(var);
3877 if (target_hash != NULL) {
3878 while ((hash_entry_ptr = zend_hash_get_current_data(target_hash)) != NULL) {
3879 zend_hash_move_forward(target_hash);
3880 if (Z_TYPE_P(hash_entry_ptr) == IS_INDIRECT) {
3881 hash_entry_ptr = Z_INDIRECT_P(hash_entry_ptr);
3882 }
3883 hash_entry = hash_entry_ptr;
3884 ZVAL_DEREF(hash_entry);
3885 if (Z_TYPE_P(hash_entry) == IS_ARRAY || Z_TYPE_P(hash_entry) == IS_OBJECT) {
3886 if (!Z_IMMUTABLE_P(hash_entry)) {
3887 if (++(HASH_OF(hash_entry)->u.v.nApplyCount) > 1) {
3888 --(HASH_OF(hash_entry)->u.v.nApplyCount);
3889 recursion_error = 1;
3890 goto conv_end;
3891 }
3892 }
3893 if (stack_level >= stack_max) {
3894 stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3895 ptmp = erealloc(stack, sizeof(zval) * stack_max);
3896 stack = (zval *)ptmp;
3897 }
3898 ZVAL_COPY_VALUE(&stack[stack_level], var);
3899 stack_level++;
3900 var = hash_entry;
3901 SEPARATE_ZVAL(hash_entry);
3902 target_hash = HASH_OF(var);
3903 if (target_hash != NULL) {
3904 zend_hash_internal_pointer_reset(target_hash);
3905 continue;
3906 }
3907 } else if (Z_TYPE_P(hash_entry) == IS_STRING) {
3908 string.val = (unsigned char *)Z_STRVAL_P(hash_entry);
3909 string.len = Z_STRLEN_P(hash_entry);
3910 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3911 if (ret != NULL) {
3912 zval_ptr_dtor(hash_entry_ptr);
3913 // TODO: avoid reallocation ???
3914 ZVAL_STRINGL(hash_entry_ptr, (char *)ret->val, ret->len);
3915 efree(ret->val);
3916 }
3917 }
3918 }
3919 }
3920 } else if (Z_TYPE_P(var) == IS_STRING) {
3921 string.val = (unsigned char *)Z_STRVAL_P(var);
3922 string.len = Z_STRLEN_P(var);
3923 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3924 if (ret != NULL) {
3925 zval_ptr_dtor(var);
3926 // TODO: avoid reallocation ???
3927 ZVAL_STRINGL(var, (char *)ret->val, ret->len);
3928 efree(ret->val);
3929 }
3930 }
3931 }
3932
3933 conv_end:
3934 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3935 mbfl_buffer_converter_delete(convd);
3936
3937 if (recursion_error) {
3938 while(stack_level-- && (var = &stack[stack_level])) {
3939 if (!Z_IMMUTABLE_P(var)) {
3940 if (HASH_OF(var)->u.v.nApplyCount > 1) {
3941 HASH_OF(var)->u.v.nApplyCount--;
3942 }
3943 }
3944 }
3945 efree(stack);
3946 php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
3947 RETURN_FALSE;
3948 }
3949 efree(stack);
3950 }
3951
3952 if (from_encoding) {
3953 RETURN_STRING(from_encoding->name);
3954 } else {
3955 RETURN_FALSE;
3956 }
3957 }
3958 /* }}} */
3959
3960 /* {{{ HTML numeric entity */
3961 /* {{{ static void php_mb_numericentity_exec() */
3962 static void
php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS,int type)3963 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
3964 {
3965 char *str, *encoding = NULL;
3966 size_t str_len, encoding_len;
3967 zval *zconvmap, *hash_entry;
3968 HashTable *target_hash;
3969 int i, *convmap, *mapelm, mapsize=0;
3970 zend_bool is_hex = 0;
3971 mbfl_string string, result, *ret;
3972 enum mbfl_no_encoding no_encoding;
3973
3974 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
3975 return;
3976 }
3977
3978 mbfl_string_init(&string);
3979 string.no_language = MBSTRG(language);
3980 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3981 string.val = (unsigned char *)str;
3982 string.len = str_len;
3983
3984 /* encoding */
3985 if (encoding && encoding_len > 0) {
3986 no_encoding = mbfl_name2no_encoding(encoding);
3987 if (no_encoding == mbfl_no_encoding_invalid) {
3988 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
3989 RETURN_FALSE;
3990 } else {
3991 string.no_encoding = no_encoding;
3992 }
3993 }
3994
3995 if (type == 0 && is_hex) {
3996 type = 2; /* output in hex format */
3997 }
3998
3999 /* conversion map */
4000 convmap = NULL;
4001 if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
4002 target_hash = Z_ARRVAL_P(zconvmap);
4003 i = zend_hash_num_elements(target_hash);
4004 if (i > 0) {
4005 convmap = (int *)safe_emalloc(i, sizeof(int), 0);
4006 mapelm = convmap;
4007 mapsize = 0;
4008 ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
4009 convert_to_long_ex(hash_entry);
4010 *mapelm++ = Z_LVAL_P(hash_entry);
4011 mapsize++;
4012 } ZEND_HASH_FOREACH_END();
4013 }
4014 }
4015 if (convmap == NULL) {
4016 RETURN_FALSE;
4017 }
4018 mapsize /= 4;
4019
4020 ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
4021 if (ret != NULL) {
4022 // TODO: avoid reallocation ???
4023 RETVAL_STRINGL((char *)ret->val, ret->len);
4024 efree(ret->val);
4025 } else {
4026 RETVAL_FALSE;
4027 }
4028 efree((void *)convmap);
4029 }
4030 /* }}} */
4031
4032 /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
4033 Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)4034 PHP_FUNCTION(mb_encode_numericentity)
4035 {
4036 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
4037 }
4038 /* }}} */
4039
4040 /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
4041 Converts HTML numeric entities to character code */
PHP_FUNCTION(mb_decode_numericentity)4042 PHP_FUNCTION(mb_decode_numericentity)
4043 {
4044 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
4045 }
4046 /* }}} */
4047 /* }}} */
4048
4049 /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
4050 * Sends an email message with MIME scheme
4051 */
4052
4053 #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \
4054 if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
4055 pos += 2; \
4056 while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \
4057 pos++; \
4058 } \
4059 continue; \
4060 }
4061
4062 #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len) \
4063 pp = str; \
4064 ee = pp + len; \
4065 while ((pp = memchr(pp, '\0', (ee - pp)))) { \
4066 *pp = ' '; \
4067 } \
4068
_php_mbstr_parse_mail_headers(HashTable * ht,const char * str,size_t str_len)4069 static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
4070 {
4071 const char *ps;
4072 size_t icnt;
4073 int state = 0;
4074 int crlf_state = -1;
4075 char *token = NULL;
4076 size_t token_pos = 0;
4077 zend_string *fld_name, *fld_val;
4078
4079 ps = str;
4080 icnt = str_len;
4081 fld_name = fld_val = NULL;
4082
4083 /*
4084 * C o n t e n t - T y p e : t e x t / h t m l \r\n
4085 * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
4086 * state 0 1 2 3
4087 *
4088 * C o n t e n t - T y p e : t e x t / h t m l \r\n
4089 * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
4090 * crlf_state -1 0 1 -1
4091 *
4092 */
4093
4094 while (icnt > 0) {
4095 switch (*ps) {
4096 case ':':
4097 if (crlf_state == 1) {
4098 token_pos++;
4099 }
4100
4101 if (state == 0 || state == 1) {
4102 if(token && token_pos > 0) {
4103 fld_name = zend_string_init(token, token_pos, 0);
4104 }
4105 state = 2;
4106 } else {
4107 token_pos++;
4108 }
4109
4110 crlf_state = 0;
4111 break;
4112
4113 case '\n':
4114 if (crlf_state == -1) {
4115 goto out;
4116 }
4117 crlf_state = -1;
4118 break;
4119
4120 case '\r':
4121 if (crlf_state == 1) {
4122 token_pos++;
4123 } else {
4124 crlf_state = 1;
4125 }
4126 break;
4127
4128 case ' ': case '\t':
4129 if (crlf_state == -1) {
4130 if (state == 3) {
4131 /* continuing from the previous line */
4132 state = 4;
4133 } else {
4134 /* simply skipping this new line */
4135 state = 5;
4136 }
4137 } else {
4138 if (crlf_state == 1) {
4139 token_pos++;
4140 }
4141 if (state == 1 || state == 3) {
4142 token_pos++;
4143 }
4144 }
4145 crlf_state = 0;
4146 break;
4147
4148 default:
4149 switch (state) {
4150 case 0:
4151 token = (char*)ps;
4152 token_pos = 0;
4153 state = 1;
4154 break;
4155
4156 case 2:
4157 if (crlf_state != -1) {
4158 token = (char*)ps;
4159 token_pos = 0;
4160
4161 state = 3;
4162 break;
4163 }
4164 /* break is missing intentionally */
4165
4166 case 3:
4167 if (crlf_state == -1) {
4168 if(token && token_pos > 0) {
4169 fld_val = zend_string_init(token, token_pos, 0);
4170 }
4171
4172 if (fld_name != NULL && fld_val != NULL) {
4173 zval val;
4174 /* FIXME: some locale free implementation is
4175 * really required here,,, */
4176 php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
4177 ZVAL_STR(&val, fld_val);
4178
4179 zend_hash_update(ht, fld_name, &val);
4180
4181 zend_string_release(fld_name);
4182 }
4183
4184 fld_name = fld_val = NULL;
4185 token = (char*)ps;
4186 token_pos = 0;
4187
4188 state = 1;
4189 }
4190 break;
4191
4192 case 4:
4193 token_pos++;
4194 state = 3;
4195 break;
4196 }
4197
4198 if (crlf_state == 1) {
4199 token_pos++;
4200 }
4201
4202 token_pos++;
4203
4204 crlf_state = 0;
4205 break;
4206 }
4207 ps++, icnt--;
4208 }
4209 out:
4210 if (state == 2) {
4211 token = "";
4212 token_pos = 0;
4213
4214 state = 3;
4215 }
4216 if (state == 3) {
4217 if(token && token_pos > 0) {
4218 fld_val = zend_string_init(token, token_pos, 0);
4219 }
4220 if (fld_name != NULL && fld_val != NULL) {
4221 zval val;
4222 /* FIXME: some locale free implementation is
4223 * really required here,,, */
4224 php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
4225 ZVAL_STR(&val, fld_val);
4226
4227 zend_hash_update(ht, fld_name, &val);
4228
4229 zend_string_release(fld_name);
4230 }
4231 }
4232 return state;
4233 }
4234
PHP_FUNCTION(mb_send_mail)4235 PHP_FUNCTION(mb_send_mail)
4236 {
4237 int n;
4238 char *to = NULL;
4239 size_t to_len;
4240 char *message = NULL;
4241 size_t message_len;
4242 char *headers = NULL;
4243 size_t headers_len;
4244 char *subject = NULL;
4245 zend_string *extra_cmd = NULL;
4246 size_t subject_len;
4247 int i;
4248 char *to_r = NULL;
4249 char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
4250 struct {
4251 int cnt_type:1;
4252 int cnt_trans_enc:1;
4253 } suppressed_hdrs = { 0, 0 };
4254
4255 char *message_buf = NULL, *subject_buf = NULL, *p;
4256 mbfl_string orig_str, conv_str;
4257 mbfl_string *pstr; /* pointer to mbfl string for return value */
4258 enum mbfl_no_encoding
4259 tran_cs, /* transfar text charset */
4260 head_enc, /* header transfar encoding */
4261 body_enc; /* body transfar encoding */
4262 mbfl_memory_device device; /* automatic allocateable buffer for additional header */
4263 const mbfl_language *lang;
4264 int err = 0;
4265 HashTable ht_headers;
4266 zval *s;
4267 extern void mbfl_memory_device_unput(mbfl_memory_device *device);
4268 char *pp, *ee;
4269
4270 /* initialize */
4271 mbfl_memory_device_init(&device, 0, 0);
4272 mbfl_string_init(&orig_str);
4273 mbfl_string_init(&conv_str);
4274
4275 /* character-set, transfer-encoding */
4276 tran_cs = mbfl_no_encoding_utf8;
4277 head_enc = mbfl_no_encoding_base64;
4278 body_enc = mbfl_no_encoding_base64;
4279 lang = mbfl_no2language(MBSTRG(language));
4280 if (lang != NULL) {
4281 tran_cs = lang->mail_charset;
4282 head_enc = lang->mail_header_encoding;
4283 body_enc = lang->mail_body_encoding;
4284 }
4285
4286 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|sS", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd) == FAILURE) {
4287 return;
4288 }
4289
4290 /* ASCIIZ check */
4291 MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
4292 MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
4293 MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
4294 if (headers) {
4295 MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
4296 }
4297 if (extra_cmd) {
4298 MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(extra_cmd), ZSTR_LEN(extra_cmd));
4299 }
4300
4301 zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0);
4302
4303 if (headers != NULL) {
4304 _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
4305 }
4306
4307 if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) {
4308 char *tmp;
4309 char *param_name;
4310 char *charset = NULL;
4311
4312 ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
4313 p = strchr(Z_STRVAL_P(s), ';');
4314
4315 if (p != NULL) {
4316 /* skipping the padded spaces */
4317 do {
4318 ++p;
4319 } while (*p == ' ' || *p == '\t');
4320
4321 if (*p != '\0') {
4322 if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
4323 if (strcasecmp(param_name, "charset") == 0) {
4324 enum mbfl_no_encoding _tran_cs = tran_cs;
4325
4326 charset = php_strtok_r(NULL, "= \"", &tmp);
4327 if (charset != NULL) {
4328 _tran_cs = mbfl_name2no_encoding(charset);
4329 }
4330
4331 if (_tran_cs == mbfl_no_encoding_invalid) {
4332 php_error_docref(NULL, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
4333 _tran_cs = mbfl_no_encoding_ascii;
4334 }
4335 tran_cs = _tran_cs;
4336 }
4337 }
4338 }
4339 }
4340 suppressed_hdrs.cnt_type = 1;
4341 }
4342
4343 if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1))) {
4344 enum mbfl_no_encoding _body_enc;
4345
4346 ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
4347 _body_enc = mbfl_name2no_encoding(Z_STRVAL_P(s));
4348 switch (_body_enc) {
4349 case mbfl_no_encoding_base64:
4350 case mbfl_no_encoding_7bit:
4351 case mbfl_no_encoding_8bit:
4352 body_enc = _body_enc;
4353 break;
4354
4355 default:
4356 php_error_docref(NULL, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", Z_STRVAL_P(s));
4357 body_enc = mbfl_no_encoding_8bit;
4358 break;
4359 }
4360 suppressed_hdrs.cnt_trans_enc = 1;
4361 }
4362
4363 /* To: */
4364 if (to != NULL) {
4365 if (to_len > 0) {
4366 to_r = estrndup(to, to_len);
4367 for (; to_len; to_len--) {
4368 if (!isspace((unsigned char) to_r[to_len - 1])) {
4369 break;
4370 }
4371 to_r[to_len - 1] = '\0';
4372 }
4373 for (i = 0; to_r[i]; i++) {
4374 if (iscntrl((unsigned char) to_r[i])) {
4375 /* According to RFC 822, section 3.1.1 long headers may be separated into
4376 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
4377 * To prevent these separators from being replaced with a space, we use the
4378 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
4379 */
4380 SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
4381 to_r[i] = ' ';
4382 }
4383 }
4384 } else {
4385 to_r = to;
4386 }
4387 } else {
4388 php_error_docref(NULL, E_WARNING, "Missing To: field");
4389 err = 1;
4390 }
4391
4392 /* Subject: */
4393 if (subject != NULL) {
4394 orig_str.no_language = MBSTRG(language);
4395 orig_str.val = (unsigned char *)subject;
4396 orig_str.len = subject_len;
4397 orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4398 if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4399 const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4400 orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4401 }
4402 pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4403 if (pstr != NULL) {
4404 subject_buf = subject = (char *)pstr->val;
4405 }
4406 } else {
4407 php_error_docref(NULL, E_WARNING, "Missing Subject: field");
4408 err = 1;
4409 }
4410
4411 /* message body */
4412 if (message != NULL) {
4413 orig_str.no_language = MBSTRG(language);
4414 orig_str.val = (unsigned char *)message;
4415 orig_str.len = (unsigned int)message_len;
4416 orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4417
4418 if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4419 const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4420 orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4421 }
4422
4423 pstr = NULL;
4424 {
4425 mbfl_string tmpstr;
4426
4427 if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4428 tmpstr.no_encoding=mbfl_no_encoding_8bit;
4429 pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4430 efree(tmpstr.val);
4431 }
4432 }
4433 if (pstr != NULL) {
4434 message_buf = message = (char *)pstr->val;
4435 }
4436 } else {
4437 /* this is not really an error, so it is allowed. */
4438 php_error_docref(NULL, E_WARNING, "Empty message body");
4439 message = NULL;
4440 }
4441
4442 /* other headers */
4443 #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4444 #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4445 #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4446 #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4447 if (headers != NULL) {
4448 p = headers;
4449 n = headers_len;
4450 mbfl_memory_device_strncat(&device, p, n);
4451 if (n > 0 && p[n - 1] != '\n') {
4452 mbfl_memory_device_strncat(&device, "\n", 1);
4453 }
4454 }
4455
4456 if (!zend_hash_str_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4457 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4458 mbfl_memory_device_strncat(&device, "\n", 1);
4459 }
4460
4461 if (!suppressed_hdrs.cnt_type) {
4462 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4463
4464 p = (char *)mbfl_no2preferred_mime_name(tran_cs);
4465 if (p != NULL) {
4466 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4467 mbfl_memory_device_strcat(&device, p);
4468 }
4469 mbfl_memory_device_strncat(&device, "\n", 1);
4470 }
4471 if (!suppressed_hdrs.cnt_trans_enc) {
4472 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4473 p = (char *)mbfl_no2preferred_mime_name(body_enc);
4474 if (p == NULL) {
4475 p = "7bit";
4476 }
4477 mbfl_memory_device_strcat(&device, p);
4478 mbfl_memory_device_strncat(&device, "\n", 1);
4479 }
4480
4481 mbfl_memory_device_unput(&device);
4482 mbfl_memory_device_output('\0', &device);
4483 headers = (char *)device.buffer;
4484
4485 if (force_extra_parameters) {
4486 extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4487 } else if (extra_cmd) {
4488 extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd));
4489 }
4490
4491 if (!err && php_mail(to_r, subject, message, headers, extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
4492 RETVAL_TRUE;
4493 } else {
4494 RETVAL_FALSE;
4495 }
4496
4497 if (extra_cmd) {
4498 zend_string_release(extra_cmd);
4499 }
4500
4501 if (to_r != to) {
4502 efree(to_r);
4503 }
4504 if (subject_buf) {
4505 efree((void *)subject_buf);
4506 }
4507 if (message_buf) {
4508 efree((void *)message_buf);
4509 }
4510 mbfl_memory_device_clear(&device);
4511 zend_hash_destroy(&ht_headers);
4512 }
4513
4514 #undef SKIP_LONG_HEADER_SEP_MBSTRING
4515 #undef MAIL_ASCIIZ_CHECK_MBSTRING
4516 #undef PHP_MBSTR_MAIL_MIME_HEADER1
4517 #undef PHP_MBSTR_MAIL_MIME_HEADER2
4518 #undef PHP_MBSTR_MAIL_MIME_HEADER3
4519 #undef PHP_MBSTR_MAIL_MIME_HEADER4
4520 /* }}} */
4521
4522 /* {{{ proto mixed mb_get_info([string type])
4523 Returns the current settings of mbstring */
PHP_FUNCTION(mb_get_info)4524 PHP_FUNCTION(mb_get_info)
4525 {
4526 char *typ = NULL;
4527 size_t typ_len;
4528 size_t n;
4529 char *name;
4530 const struct mb_overload_def *over_func;
4531 zval row1, row2;
4532 const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4533 const mbfl_encoding **entry;
4534
4535 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
4536 return;
4537 }
4538
4539 if (!typ || !strcasecmp("all", typ)) {
4540 array_init(return_value);
4541 if (MBSTRG(current_internal_encoding)) {
4542 add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name);
4543 }
4544 if (MBSTRG(http_input_identify)) {
4545 add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name);
4546 }
4547 if (MBSTRG(current_http_output_encoding)) {
4548 add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name);
4549 }
4550 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
4551 add_assoc_string(return_value, "http_output_conv_mimetypes", name);
4552 }
4553 add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4554 if (MBSTRG(func_overload)){
4555 over_func = &(mb_ovld[0]);
4556 array_init(&row1);
4557 while (over_func->type > 0) {
4558 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4559 add_assoc_string(&row1, over_func->orig_func, over_func->ovld_func);
4560 }
4561 over_func++;
4562 }
4563 add_assoc_zval(return_value, "func_overload_list", &row1);
4564 } else {
4565 add_assoc_string(return_value, "func_overload_list", "no overload");
4566 }
4567 if (lang != NULL) {
4568 if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4569 add_assoc_string(return_value, "mail_charset", name);
4570 }
4571 if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4572 add_assoc_string(return_value, "mail_header_encoding", name);
4573 }
4574 if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4575 add_assoc_string(return_value, "mail_body_encoding", name);
4576 }
4577 }
4578 add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4579 if (MBSTRG(encoding_translation)) {
4580 add_assoc_string(return_value, "encoding_translation", "On");
4581 } else {
4582 add_assoc_string(return_value, "encoding_translation", "Off");
4583 }
4584 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4585 add_assoc_string(return_value, "language", name);
4586 }
4587 n = MBSTRG(current_detect_order_list_size);
4588 entry = MBSTRG(current_detect_order_list);
4589 if (n > 0) {
4590 size_t i;
4591 array_init(&row2);
4592 for (i = 0; i < n; i++) {
4593 add_next_index_string(&row2, (*entry)->name);
4594 entry++;
4595 }
4596 add_assoc_zval(return_value, "detect_order", &row2);
4597 }
4598 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4599 add_assoc_string(return_value, "substitute_character", "none");
4600 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4601 add_assoc_string(return_value, "substitute_character", "long");
4602 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4603 add_assoc_string(return_value, "substitute_character", "entity");
4604 } else {
4605 add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4606 }
4607 if (MBSTRG(strict_detection)) {
4608 add_assoc_string(return_value, "strict_detection", "On");
4609 } else {
4610 add_assoc_string(return_value, "strict_detection", "Off");
4611 }
4612 } else if (!strcasecmp("internal_encoding", typ)) {
4613 if (MBSTRG(current_internal_encoding)) {
4614 RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name);
4615 }
4616 } else if (!strcasecmp("http_input", typ)) {
4617 if (MBSTRG(http_input_identify)) {
4618 RETVAL_STRING((char *)MBSTRG(http_input_identify)->name);
4619 }
4620 } else if (!strcasecmp("http_output", typ)) {
4621 if (MBSTRG(current_http_output_encoding)) {
4622 RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name);
4623 }
4624 } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4625 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
4626 RETVAL_STRING(name);
4627 }
4628 } else if (!strcasecmp("func_overload", typ)) {
4629 RETVAL_LONG(MBSTRG(func_overload));
4630 } else if (!strcasecmp("func_overload_list", typ)) {
4631 if (MBSTRG(func_overload)){
4632 over_func = &(mb_ovld[0]);
4633 array_init(return_value);
4634 while (over_func->type > 0) {
4635 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4636 add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func);
4637 }
4638 over_func++;
4639 }
4640 } else {
4641 RETVAL_STRING("no overload");
4642 }
4643 } else if (!strcasecmp("mail_charset", typ)) {
4644 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4645 RETVAL_STRING(name);
4646 }
4647 } else if (!strcasecmp("mail_header_encoding", typ)) {
4648 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4649 RETVAL_STRING(name);
4650 }
4651 } else if (!strcasecmp("mail_body_encoding", typ)) {
4652 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4653 RETVAL_STRING(name);
4654 }
4655 } else if (!strcasecmp("illegal_chars", typ)) {
4656 RETVAL_LONG(MBSTRG(illegalchars));
4657 } else if (!strcasecmp("encoding_translation", typ)) {
4658 if (MBSTRG(encoding_translation)) {
4659 RETVAL_STRING("On");
4660 } else {
4661 RETVAL_STRING("Off");
4662 }
4663 } else if (!strcasecmp("language", typ)) {
4664 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4665 RETVAL_STRING(name);
4666 }
4667 } else if (!strcasecmp("detect_order", typ)) {
4668 n = MBSTRG(current_detect_order_list_size);
4669 entry = MBSTRG(current_detect_order_list);
4670 if (n > 0) {
4671 size_t i;
4672 array_init(return_value);
4673 for (i = 0; i < n; i++) {
4674 add_next_index_string(return_value, (*entry)->name);
4675 entry++;
4676 }
4677 }
4678 } else if (!strcasecmp("substitute_character", typ)) {
4679 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4680 RETVAL_STRING("none");
4681 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4682 RETVAL_STRING("long");
4683 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4684 RETVAL_STRING("entity");
4685 } else {
4686 RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4687 }
4688 } else if (!strcasecmp("strict_detection", typ)) {
4689 if (MBSTRG(strict_detection)) {
4690 RETVAL_STRING("On");
4691 } else {
4692 RETVAL_STRING("Off");
4693 }
4694 } else {
4695 RETURN_FALSE;
4696 }
4697 }
4698 /* }}} */
4699
4700 /* {{{ proto bool mb_check_encoding([string var[, string encoding]])
4701 Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)4702 PHP_FUNCTION(mb_check_encoding)
4703 {
4704 char *var = NULL;
4705 size_t var_len;
4706 char *enc = NULL;
4707 size_t enc_len;
4708 mbfl_buffer_converter *convd;
4709 const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4710 mbfl_string string, result, *ret = NULL;
4711 long illegalchars = 0;
4712
4713 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
4714 return;
4715 }
4716
4717 if (var == NULL) {
4718 RETURN_BOOL(MBSTRG(illegalchars) == 0);
4719 }
4720
4721 if (enc != NULL) {
4722 encoding = mbfl_name2encoding(enc);
4723 if (!encoding || encoding == &mbfl_encoding_pass) {
4724 php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", enc);
4725 RETURN_FALSE;
4726 }
4727 }
4728
4729 convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
4730 if (convd == NULL) {
4731 php_error_docref(NULL, E_WARNING, "Unable to create converter");
4732 RETURN_FALSE;
4733 }
4734 mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4735 mbfl_buffer_converter_illegal_substchar(convd, 0);
4736
4737 /* initialize string */
4738 mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
4739 mbfl_string_init(&result);
4740
4741 string.val = (unsigned char *)var;
4742 string.len = var_len;
4743 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4744 illegalchars = mbfl_buffer_illegalchars(convd);
4745 mbfl_buffer_converter_delete(convd);
4746
4747 RETVAL_FALSE;
4748 if (ret != NULL) {
4749 if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4750 RETVAL_TRUE;
4751 }
4752 mbfl_string_clear(&result);
4753 }
4754 }
4755 /* }}} */
4756
4757 /* {{{ php_mb_populate_current_detect_order_list */
php_mb_populate_current_detect_order_list(void)4758 static void php_mb_populate_current_detect_order_list(void)
4759 {
4760 const mbfl_encoding **entry = 0;
4761 size_t nentries;
4762
4763 if (MBSTRG(current_detect_order_list)) {
4764 return;
4765 }
4766
4767 if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
4768 nentries = MBSTRG(detect_order_list_size);
4769 entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4770 memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
4771 } else {
4772 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
4773 size_t i;
4774 nentries = MBSTRG(default_detect_order_list_size);
4775 entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4776 for (i = 0; i < nentries; i++) {
4777 entry[i] = mbfl_no2encoding(src[i]);
4778 }
4779 }
4780 MBSTRG(current_detect_order_list) = entry;
4781 MBSTRG(current_detect_order_list_size) = nentries;
4782 }
4783 /* }}} */
4784
4785 /* {{{ static int php_mb_encoding_translation() */
php_mb_encoding_translation(void)4786 static int php_mb_encoding_translation(void)
4787 {
4788 return MBSTRG(encoding_translation);
4789 }
4790 /* }}} */
4791
4792 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
php_mb_mbchar_bytes_ex(const char * s,const mbfl_encoding * enc)4793 MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
4794 {
4795 if (enc != NULL) {
4796 if (enc->flag & MBFL_ENCTYPE_MBCS) {
4797 if (enc->mblen_table != NULL) {
4798 if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
4799 }
4800 } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
4801 return 2;
4802 } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
4803 return 4;
4804 }
4805 }
4806 return 1;
4807 }
4808 /* }}} */
4809
4810 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
php_mb_mbchar_bytes(const char * s)4811 MBSTRING_API size_t php_mb_mbchar_bytes(const char *s)
4812 {
4813 return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
4814 }
4815 /* }}} */
4816
4817 /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
php_mb_safe_strrchr_ex(const char * s,unsigned int c,size_t nbytes,const mbfl_encoding * enc)4818 MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
4819 {
4820 register const char *p = s;
4821 char *last=NULL;
4822
4823 if (nbytes == (size_t)-1) {
4824 size_t nb = 0;
4825
4826 while (*p != '\0') {
4827 if (nb == 0) {
4828 if ((unsigned char)*p == (unsigned char)c) {
4829 last = (char *)p;
4830 }
4831 nb = php_mb_mbchar_bytes_ex(p, enc);
4832 if (nb == 0) {
4833 return NULL; /* something is going wrong! */
4834 }
4835 }
4836 --nb;
4837 ++p;
4838 }
4839 } else {
4840 register size_t bcnt = nbytes;
4841 register size_t nbytes_char;
4842 while (bcnt > 0) {
4843 if ((unsigned char)*p == (unsigned char)c) {
4844 last = (char *)p;
4845 }
4846 nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
4847 if (bcnt < nbytes_char) {
4848 return NULL;
4849 }
4850 p += nbytes_char;
4851 bcnt -= nbytes_char;
4852 }
4853 }
4854 return last;
4855 }
4856 /* }}} */
4857
4858 /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
php_mb_safe_strrchr(const char * s,unsigned int c,size_t nbytes)4859 MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes)
4860 {
4861 return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
4862 }
4863 /* }}} */
4864
4865 /* {{{ MBSTRING_API int php_mb_stripos()
4866 */
php_mb_stripos(int mode,const char * old_haystack,unsigned int old_haystack_len,const char * old_needle,unsigned int old_needle_len,long offset,const char * from_encoding)4867 MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding)
4868 {
4869 int n;
4870 mbfl_string haystack, needle;
4871 n = -1;
4872
4873 mbfl_string_init(&haystack);
4874 mbfl_string_init(&needle);
4875 haystack.no_language = MBSTRG(language);
4876 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4877 needle.no_language = MBSTRG(language);
4878 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4879
4880 do {
4881 size_t len = 0;
4882 haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding);
4883 haystack.len = len;
4884
4885 if (!haystack.val) {
4886 break;
4887 }
4888
4889 if (haystack.len <= 0) {
4890 break;
4891 }
4892
4893 needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding);
4894 needle.len = len;
4895
4896 if (!needle.val) {
4897 break;
4898 }
4899
4900 if (needle.len <= 0) {
4901 break;
4902 }
4903
4904 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
4905 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
4906 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
4907 break;
4908 }
4909
4910 {
4911 int haystack_char_len = mbfl_strlen(&haystack);
4912
4913 if (mode) {
4914 if ((offset > 0 && offset > haystack_char_len) ||
4915 (offset < 0 && -offset > haystack_char_len)) {
4916 php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
4917 break;
4918 }
4919 } else {
4920 if (offset < 0 || offset > haystack_char_len) {
4921 php_error_docref(NULL, E_WARNING, "Offset not contained in string");
4922 break;
4923 }
4924 }
4925 }
4926
4927 n = mbfl_strpos(&haystack, &needle, offset, mode);
4928 } while(0);
4929
4930 if (haystack.val) {
4931 efree(haystack.val);
4932 }
4933
4934 if (needle.val) {
4935 efree(needle.val);
4936 }
4937
4938 return n;
4939 }
4940 /* }}} */
4941
php_mb_gpc_get_detect_order(const zend_encoding *** list,size_t * list_size)4942 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size) /* {{{ */
4943 {
4944 *list = (const zend_encoding **)MBSTRG(http_input_list);
4945 *list_size = MBSTRG(http_input_list_size);
4946 }
4947 /* }}} */
4948
php_mb_gpc_set_input_encoding(const zend_encoding * encoding)4949 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding) /* {{{ */
4950 {
4951 MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
4952 }
4953 /* }}} */
4954
4955 #endif /* HAVE_MBSTRING */
4956
4957 /*
4958 * Local variables:
4959 * tab-width: 4
4960 * c-basic-offset: 4
4961 * End:
4962 * vim600: fdm=marker
4963 * vim: noet sw=4 ts=4
4964 */
4965