1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 5 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2016 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
16 | Rui Hirokawa <hirokawa@php.net> |
17 +----------------------------------------------------------------------+
18 */
19
20 /* $Id$ */
21
22 /*
23 * PHP 4 Multibyte String module "mbstring"
24 *
25 * History:
26 * 2000.5.19 Release php-4.0RC2_jstring-1.0
27 * 2001.4.1 Release php4_jstring-1.0.91
28 * 2001.4.30 Release php4_jstring-1.1 (contribute to The PHP Group)
29 * 2001.5.1 Renamed from jstring to mbstring (hirokawa@php.net)
30 */
31
32 /*
33 * PHP3 Internationalization support program.
34 *
35 * Copyright (c) 1999,2000 by the PHP3 internationalization team.
36 * All rights reserved.
37 *
38 * See README_PHP3-i18n-ja for more detail.
39 *
40 * Authors:
41 * Hironori Sato <satoh@jpnnet.com>
42 * Shigeru Kanemoto <sgk@happysize.co.jp>
43 * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
44 * Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
45 */
46
47 /* {{{ includes */
48 #ifdef HAVE_CONFIG_H
49 #include "config.h"
50 #endif
51
52 #include "php.h"
53 #include "php_ini.h"
54 #include "php_variables.h"
55 #include "mbstring.h"
56 #include "ext/standard/php_string.h"
57 #include "ext/standard/php_mail.h"
58 #include "ext/standard/exec.h"
59 #include "ext/standard/php_smart_str.h"
60 #include "ext/standard/url.h"
61 #include "main/php_output.h"
62 #include "ext/standard/info.h"
63
64 #include "libmbfl/mbfl/mbfl_allocators.h"
65 #include "libmbfl/mbfl/mbfilter_pass.h"
66
67 #include "php_variables.h"
68 #include "php_globals.h"
69 #include "rfc1867.h"
70 #include "php_content_types.h"
71 #include "SAPI.h"
72 #include "php_unicode.h"
73 #include "TSRM.h"
74
75 #include "mb_gpc.h"
76
77 #if HAVE_MBREGEX
78 #include "php_mbregex.h"
79 #endif
80
81 #include "zend_multibyte.h"
82
83 #if HAVE_ONIG
84 #include "php_onig_compat.h"
85 #include <oniguruma.h>
86 #undef UChar
87 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
88 #include "ext/pcre/php_pcre.h"
89 #endif
90 /* }}} */
91
92 #if HAVE_MBSTRING
93
94 /* {{{ prototypes */
95 ZEND_DECLARE_MODULE_GLOBALS(mbstring)
96
97 static PHP_GINIT_FUNCTION(mbstring);
98 static PHP_GSHUTDOWN_FUNCTION(mbstring);
99
100 static void php_mb_populate_current_detect_order_list(TSRMLS_D);
101
102 static int php_mb_encoding_translation(TSRMLS_D);
103
104 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC);
105
106 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC);
107
108 /* }}} */
109
110 /* {{{ php_mb_default_identify_list */
111 typedef struct _php_mb_nls_ident_list {
112 enum mbfl_no_language lang;
113 const enum mbfl_no_encoding *list;
114 size_t list_size;
115 } php_mb_nls_ident_list;
116
117 static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
118 mbfl_no_encoding_ascii,
119 mbfl_no_encoding_jis,
120 mbfl_no_encoding_utf8,
121 mbfl_no_encoding_euc_jp,
122 mbfl_no_encoding_sjis
123 };
124
125 static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
126 mbfl_no_encoding_ascii,
127 mbfl_no_encoding_utf8,
128 mbfl_no_encoding_euc_cn,
129 mbfl_no_encoding_cp936
130 };
131
132 static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
133 mbfl_no_encoding_ascii,
134 mbfl_no_encoding_utf8,
135 mbfl_no_encoding_euc_tw,
136 mbfl_no_encoding_big5
137 };
138
139 static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
140 mbfl_no_encoding_ascii,
141 mbfl_no_encoding_utf8,
142 mbfl_no_encoding_euc_kr,
143 mbfl_no_encoding_uhc
144 };
145
146 static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
147 mbfl_no_encoding_ascii,
148 mbfl_no_encoding_utf8,
149 mbfl_no_encoding_koi8r,
150 mbfl_no_encoding_cp1251,
151 mbfl_no_encoding_cp866
152 };
153
154 static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
155 mbfl_no_encoding_ascii,
156 mbfl_no_encoding_utf8,
157 mbfl_no_encoding_armscii8
158 };
159
160 static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
161 mbfl_no_encoding_ascii,
162 mbfl_no_encoding_utf8,
163 mbfl_no_encoding_cp1254,
164 mbfl_no_encoding_8859_9
165 };
166
167 static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
168 mbfl_no_encoding_ascii,
169 mbfl_no_encoding_utf8,
170 mbfl_no_encoding_koi8u
171 };
172
173 static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
174 mbfl_no_encoding_ascii,
175 mbfl_no_encoding_utf8
176 };
177
178
179 static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
180 { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
181 { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
182 { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
183 { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
184 { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
185 { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
186 { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
187 { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
188 { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
189 };
190
191 /* }}} */
192
193 /* {{{ mb_overload_def mb_ovld[] */
194 static const struct mb_overload_def mb_ovld[] = {
195 {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
196 {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
197 {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
198 {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
199 {MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
200 {MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
201 {MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
202 {MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
203 {MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
204 {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
205 {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
206 {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
207 {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
208 #if HAVE_MBREGEX
209 {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"},
210 {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"},
211 {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"},
212 {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"},
213 {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},
214 #endif
215 {0, NULL, NULL, NULL}
216 };
217 /* }}} */
218
219 /* {{{ arginfo */
220 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
221 ZEND_ARG_INFO(0, language)
222 ZEND_END_ARG_INFO()
223
224 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
225 ZEND_ARG_INFO(0, encoding)
226 ZEND_END_ARG_INFO()
227
228 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
229 ZEND_ARG_INFO(0, type)
230 ZEND_END_ARG_INFO()
231
232 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
233 ZEND_ARG_INFO(0, encoding)
234 ZEND_END_ARG_INFO()
235
236 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
237 ZEND_ARG_INFO(0, encoding)
238 ZEND_END_ARG_INFO()
239
240 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
241 ZEND_ARG_INFO(0, substchar)
242 ZEND_END_ARG_INFO()
243
244 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
245 ZEND_ARG_INFO(0, encoding)
246 ZEND_END_ARG_INFO()
247
248 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
249 ZEND_ARG_INFO(0, encoded_string)
250 ZEND_ARG_INFO(1, result)
251 ZEND_END_ARG_INFO()
252
253 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
254 ZEND_ARG_INFO(0, contents)
255 ZEND_ARG_INFO(0, status)
256 ZEND_END_ARG_INFO()
257
258 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
259 ZEND_ARG_INFO(0, str)
260 ZEND_ARG_INFO(0, encoding)
261 ZEND_END_ARG_INFO()
262
263 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
264 ZEND_ARG_INFO(0, haystack)
265 ZEND_ARG_INFO(0, needle)
266 ZEND_ARG_INFO(0, offset)
267 ZEND_ARG_INFO(0, encoding)
268 ZEND_END_ARG_INFO()
269
270 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
271 ZEND_ARG_INFO(0, haystack)
272 ZEND_ARG_INFO(0, needle)
273 ZEND_ARG_INFO(0, offset)
274 ZEND_ARG_INFO(0, encoding)
275 ZEND_END_ARG_INFO()
276
277 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
278 ZEND_ARG_INFO(0, haystack)
279 ZEND_ARG_INFO(0, needle)
280 ZEND_ARG_INFO(0, offset)
281 ZEND_ARG_INFO(0, encoding)
282 ZEND_END_ARG_INFO()
283
284 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
285 ZEND_ARG_INFO(0, haystack)
286 ZEND_ARG_INFO(0, needle)
287 ZEND_ARG_INFO(0, offset)
288 ZEND_ARG_INFO(0, encoding)
289 ZEND_END_ARG_INFO()
290
291 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
292 ZEND_ARG_INFO(0, haystack)
293 ZEND_ARG_INFO(0, needle)
294 ZEND_ARG_INFO(0, part)
295 ZEND_ARG_INFO(0, encoding)
296 ZEND_END_ARG_INFO()
297
298 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
299 ZEND_ARG_INFO(0, haystack)
300 ZEND_ARG_INFO(0, needle)
301 ZEND_ARG_INFO(0, part)
302 ZEND_ARG_INFO(0, encoding)
303 ZEND_END_ARG_INFO()
304
305 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
306 ZEND_ARG_INFO(0, haystack)
307 ZEND_ARG_INFO(0, needle)
308 ZEND_ARG_INFO(0, part)
309 ZEND_ARG_INFO(0, encoding)
310 ZEND_END_ARG_INFO()
311
312 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
313 ZEND_ARG_INFO(0, haystack)
314 ZEND_ARG_INFO(0, needle)
315 ZEND_ARG_INFO(0, part)
316 ZEND_ARG_INFO(0, encoding)
317 ZEND_END_ARG_INFO()
318
319 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
320 ZEND_ARG_INFO(0, haystack)
321 ZEND_ARG_INFO(0, needle)
322 ZEND_ARG_INFO(0, encoding)
323 ZEND_END_ARG_INFO()
324
325 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
326 ZEND_ARG_INFO(0, str)
327 ZEND_ARG_INFO(0, start)
328 ZEND_ARG_INFO(0, length)
329 ZEND_ARG_INFO(0, encoding)
330 ZEND_END_ARG_INFO()
331
332 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
333 ZEND_ARG_INFO(0, str)
334 ZEND_ARG_INFO(0, start)
335 ZEND_ARG_INFO(0, length)
336 ZEND_ARG_INFO(0, encoding)
337 ZEND_END_ARG_INFO()
338
339 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
340 ZEND_ARG_INFO(0, str)
341 ZEND_ARG_INFO(0, encoding)
342 ZEND_END_ARG_INFO()
343
344 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
345 ZEND_ARG_INFO(0, str)
346 ZEND_ARG_INFO(0, start)
347 ZEND_ARG_INFO(0, width)
348 ZEND_ARG_INFO(0, trimmarker)
349 ZEND_ARG_INFO(0, encoding)
350 ZEND_END_ARG_INFO()
351
352 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
353 ZEND_ARG_INFO(0, str)
354 ZEND_ARG_INFO(0, to)
355 ZEND_ARG_INFO(0, from)
356 ZEND_END_ARG_INFO()
357
358 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
359 ZEND_ARG_INFO(0, sourcestring)
360 ZEND_ARG_INFO(0, mode)
361 ZEND_ARG_INFO(0, encoding)
362 ZEND_END_ARG_INFO()
363
364 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
365 ZEND_ARG_INFO(0, sourcestring)
366 ZEND_ARG_INFO(0, encoding)
367 ZEND_END_ARG_INFO()
368
369 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
370 ZEND_ARG_INFO(0, sourcestring)
371 ZEND_ARG_INFO(0, encoding)
372 ZEND_END_ARG_INFO()
373
374 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
375 ZEND_ARG_INFO(0, str)
376 ZEND_ARG_INFO(0, encoding_list)
377 ZEND_ARG_INFO(0, strict)
378 ZEND_END_ARG_INFO()
379
380 ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
381 ZEND_END_ARG_INFO()
382
383 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
384 ZEND_ARG_INFO(0, encoding)
385 ZEND_END_ARG_INFO()
386
387 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
388 ZEND_ARG_INFO(0, str)
389 ZEND_ARG_INFO(0, charset)
390 ZEND_ARG_INFO(0, transfer)
391 ZEND_ARG_INFO(0, linefeed)
392 ZEND_ARG_INFO(0, indent)
393 ZEND_END_ARG_INFO()
394
395 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
396 ZEND_ARG_INFO(0, string)
397 ZEND_END_ARG_INFO()
398
399 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
400 ZEND_ARG_INFO(0, str)
401 ZEND_ARG_INFO(0, option)
402 ZEND_ARG_INFO(0, encoding)
403 ZEND_END_ARG_INFO()
404
405 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 0, 0, 3)
406 ZEND_ARG_INFO(0, to)
407 ZEND_ARG_INFO(0, from)
408 ZEND_ARG_VARIADIC_INFO(1, vars)
409 ZEND_END_ARG_INFO()
410
411 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
412 ZEND_ARG_INFO(0, string)
413 ZEND_ARG_INFO(0, convmap)
414 ZEND_ARG_INFO(0, encoding)
415 ZEND_ARG_INFO(0, is_hex)
416 ZEND_END_ARG_INFO()
417
418 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
419 ZEND_ARG_INFO(0, string)
420 ZEND_ARG_INFO(0, convmap)
421 ZEND_ARG_INFO(0, encoding)
422 ZEND_END_ARG_INFO()
423
424 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
425 ZEND_ARG_INFO(0, to)
426 ZEND_ARG_INFO(0, subject)
427 ZEND_ARG_INFO(0, message)
428 ZEND_ARG_INFO(0, additional_headers)
429 ZEND_ARG_INFO(0, additional_parameters)
430 ZEND_END_ARG_INFO()
431
432 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
433 ZEND_ARG_INFO(0, type)
434 ZEND_END_ARG_INFO()
435
436 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
437 ZEND_ARG_INFO(0, var)
438 ZEND_ARG_INFO(0, encoding)
439 ZEND_END_ARG_INFO()
440
441 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
442 ZEND_ARG_INFO(0, encoding)
443 ZEND_END_ARG_INFO()
444
445 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
446 ZEND_ARG_INFO(0, pattern)
447 ZEND_ARG_INFO(0, string)
448 ZEND_ARG_INFO(1, registers)
449 ZEND_END_ARG_INFO()
450
451 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
452 ZEND_ARG_INFO(0, pattern)
453 ZEND_ARG_INFO(0, string)
454 ZEND_ARG_INFO(1, registers)
455 ZEND_END_ARG_INFO()
456
457 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
458 ZEND_ARG_INFO(0, pattern)
459 ZEND_ARG_INFO(0, replacement)
460 ZEND_ARG_INFO(0, string)
461 ZEND_ARG_INFO(0, option)
462 ZEND_END_ARG_INFO()
463
464 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
465 ZEND_ARG_INFO(0, pattern)
466 ZEND_ARG_INFO(0, replacement)
467 ZEND_ARG_INFO(0, string)
468 ZEND_END_ARG_INFO()
469
470 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3)
471 ZEND_ARG_INFO(0, pattern)
472 ZEND_ARG_INFO(0, callback)
473 ZEND_ARG_INFO(0, string)
474 ZEND_ARG_INFO(0, option)
475 ZEND_END_ARG_INFO()
476
477 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
478 ZEND_ARG_INFO(0, pattern)
479 ZEND_ARG_INFO(0, string)
480 ZEND_ARG_INFO(0, limit)
481 ZEND_END_ARG_INFO()
482
483 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
484 ZEND_ARG_INFO(0, pattern)
485 ZEND_ARG_INFO(0, string)
486 ZEND_ARG_INFO(0, option)
487 ZEND_END_ARG_INFO()
488
489 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
490 ZEND_ARG_INFO(0, pattern)
491 ZEND_ARG_INFO(0, option)
492 ZEND_END_ARG_INFO()
493
494 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
495 ZEND_ARG_INFO(0, pattern)
496 ZEND_ARG_INFO(0, option)
497 ZEND_END_ARG_INFO()
498
499 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
500 ZEND_ARG_INFO(0, pattern)
501 ZEND_ARG_INFO(0, option)
502 ZEND_END_ARG_INFO()
503
504 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
505 ZEND_ARG_INFO(0, string)
506 ZEND_ARG_INFO(0, pattern)
507 ZEND_ARG_INFO(0, option)
508 ZEND_END_ARG_INFO()
509
510 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
511 ZEND_END_ARG_INFO()
512
513 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
514 ZEND_END_ARG_INFO()
515
516 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
517 ZEND_ARG_INFO(0, position)
518 ZEND_END_ARG_INFO()
519
520 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
521 ZEND_ARG_INFO(0, options)
522 ZEND_END_ARG_INFO()
523 /* }}} */
524
525 /* {{{ zend_function_entry mbstring_functions[] */
526 const zend_function_entry mbstring_functions[] = {
527 PHP_FE(mb_convert_case, arginfo_mb_convert_case)
528 PHP_FE(mb_strtoupper, arginfo_mb_strtoupper)
529 PHP_FE(mb_strtolower, arginfo_mb_strtolower)
530 PHP_FE(mb_language, arginfo_mb_language)
531 PHP_FE(mb_internal_encoding, arginfo_mb_internal_encoding)
532 PHP_FE(mb_http_input, arginfo_mb_http_input)
533 PHP_FE(mb_http_output, arginfo_mb_http_output)
534 PHP_FE(mb_detect_order, arginfo_mb_detect_order)
535 PHP_FE(mb_substitute_character, arginfo_mb_substitute_character)
536 PHP_FE(mb_parse_str, arginfo_mb_parse_str)
537 PHP_FE(mb_output_handler, arginfo_mb_output_handler)
538 PHP_FE(mb_preferred_mime_name, arginfo_mb_preferred_mime_name)
539 PHP_FE(mb_strlen, arginfo_mb_strlen)
540 PHP_FE(mb_strpos, arginfo_mb_strpos)
541 PHP_FE(mb_strrpos, arginfo_mb_strrpos)
542 PHP_FE(mb_stripos, arginfo_mb_stripos)
543 PHP_FE(mb_strripos, arginfo_mb_strripos)
544 PHP_FE(mb_strstr, arginfo_mb_strstr)
545 PHP_FE(mb_strrchr, arginfo_mb_strrchr)
546 PHP_FE(mb_stristr, arginfo_mb_stristr)
547 PHP_FE(mb_strrichr, arginfo_mb_strrichr)
548 PHP_FE(mb_substr_count, arginfo_mb_substr_count)
549 PHP_FE(mb_substr, arginfo_mb_substr)
550 PHP_FE(mb_strcut, arginfo_mb_strcut)
551 PHP_FE(mb_strwidth, arginfo_mb_strwidth)
552 PHP_FE(mb_strimwidth, arginfo_mb_strimwidth)
553 PHP_FE(mb_convert_encoding, arginfo_mb_convert_encoding)
554 PHP_FE(mb_detect_encoding, arginfo_mb_detect_encoding)
555 PHP_FE(mb_list_encodings, arginfo_mb_list_encodings)
556 PHP_FE(mb_encoding_aliases, arginfo_mb_encoding_aliases)
557 PHP_FE(mb_convert_kana, arginfo_mb_convert_kana)
558 PHP_FE(mb_encode_mimeheader, arginfo_mb_encode_mimeheader)
559 PHP_FE(mb_decode_mimeheader, arginfo_mb_decode_mimeheader)
560 PHP_FE(mb_convert_variables, arginfo_mb_convert_variables)
561 PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity)
562 PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity)
563 PHP_FE(mb_send_mail, arginfo_mb_send_mail)
564 PHP_FE(mb_get_info, arginfo_mb_get_info)
565 PHP_FE(mb_check_encoding, arginfo_mb_check_encoding)
566 #if HAVE_MBREGEX
567 PHP_MBREGEX_FUNCTION_ENTRIES
568 #endif
569 PHP_FE_END
570 };
571 /* }}} */
572
573 /* {{{ zend_module_entry mbstring_module_entry */
574 zend_module_entry mbstring_module_entry = {
575 STANDARD_MODULE_HEADER,
576 "mbstring",
577 mbstring_functions,
578 PHP_MINIT(mbstring),
579 PHP_MSHUTDOWN(mbstring),
580 PHP_RINIT(mbstring),
581 PHP_RSHUTDOWN(mbstring),
582 PHP_MINFO(mbstring),
583 NO_VERSION_YET,
584 PHP_MODULE_GLOBALS(mbstring),
585 PHP_GINIT(mbstring),
586 PHP_GSHUTDOWN(mbstring),
587 NULL,
588 STANDARD_MODULE_PROPERTIES_EX
589 };
590 /* }}} */
591
592 /* {{{ static sapi_post_entry php_post_entries[] */
593 static sapi_post_entry php_post_entries[] = {
594 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
595 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
596 { NULL, 0, NULL, NULL }
597 };
598 /* }}} */
599
600 #ifdef COMPILE_DL_MBSTRING
ZEND_GET_MODULE(mbstring)601 ZEND_GET_MODULE(mbstring)
602 #endif
603
604 static char *get_internal_encoding(TSRMLS_D) {
605 if (PG(internal_encoding) && PG(internal_encoding)[0]) {
606 return PG(internal_encoding);
607 } else if (SG(default_charset)) {
608 return SG(default_charset);
609 }
610 return "";
611 }
612
get_input_encoding(TSRMLS_D)613 static char *get_input_encoding(TSRMLS_D) {
614 if (PG(input_encoding) && PG(input_encoding)[0]) {
615 return PG(input_encoding);
616 } else if (SG(default_charset)) {
617 return SG(default_charset);
618 }
619 return "";
620 }
621
get_output_encoding(TSRMLS_D)622 static char *get_output_encoding(TSRMLS_D) {
623 if (PG(output_encoding) && PG(output_encoding)[0]) {
624 return PG(output_encoding);
625 } else if (SG(default_charset)) {
626 return SG(default_charset);
627 }
628 return "";
629 }
630
631
632 /* {{{ allocators */
_php_mb_allocators_malloc(unsigned int sz)633 static void *_php_mb_allocators_malloc(unsigned int sz)
634 {
635 return emalloc(sz);
636 }
637
_php_mb_allocators_realloc(void * ptr,unsigned int sz)638 static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
639 {
640 return erealloc(ptr, sz);
641 }
642
_php_mb_allocators_calloc(unsigned int nelems,unsigned int szelem)643 static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
644 {
645 return ecalloc(nelems, szelem);
646 }
647
_php_mb_allocators_free(void * ptr)648 static void _php_mb_allocators_free(void *ptr)
649 {
650 efree(ptr);
651 }
652
_php_mb_allocators_pmalloc(unsigned int sz)653 static void *_php_mb_allocators_pmalloc(unsigned int sz)
654 {
655 return pemalloc(sz, 1);
656 }
657
_php_mb_allocators_prealloc(void * ptr,unsigned int sz)658 static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
659 {
660 return perealloc(ptr, sz, 1);
661 }
662
_php_mb_allocators_pfree(void * ptr)663 static void _php_mb_allocators_pfree(void *ptr)
664 {
665 pefree(ptr, 1);
666 }
667
668 static mbfl_allocators _php_mb_allocators = {
669 _php_mb_allocators_malloc,
670 _php_mb_allocators_realloc,
671 _php_mb_allocators_calloc,
672 _php_mb_allocators_free,
673 _php_mb_allocators_pmalloc,
674 _php_mb_allocators_prealloc,
675 _php_mb_allocators_pfree
676 };
677 /* }}} */
678
679 /* {{{ static sapi_post_entry mbstr_post_entries[] */
680 static sapi_post_entry mbstr_post_entries[] = {
681 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
682 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
683 { NULL, 0, NULL, NULL }
684 };
685 /* }}} */
686
687 /* {{{ static int php_mb_parse_encoding_list()
688 * Return 0 if input contains any illegal encoding, otherwise 1.
689 * Even if any illegal encoding is detected the result may contain a list
690 * of parsed encodings.
691 */
692 static int
php_mb_parse_encoding_list(const char * value,size_t value_length,const mbfl_encoding *** return_list,size_t * return_size,int persistent TSRMLS_DC)693 php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
694 {
695 int size, bauto, ret = SUCCESS;
696 size_t n;
697 char *p, *p1, *p2, *endp, *tmpstr;
698 const mbfl_encoding **entry, **list;
699
700 list = NULL;
701 if (value == NULL || value_length <= 0) {
702 if (return_list) {
703 *return_list = NULL;
704 }
705 if (return_size) {
706 *return_size = 0;
707 }
708 return FAILURE;
709 } else {
710 /* copy the value string for work */
711 if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
712 tmpstr = (char *)estrndup(value+1, value_length-2);
713 value_length -= 2;
714 }
715 else
716 tmpstr = (char *)estrndup(value, value_length);
717 if (tmpstr == NULL) {
718 return FAILURE;
719 }
720 /* count the number of listed encoding names */
721 endp = tmpstr + value_length;
722 n = 1;
723 p1 = tmpstr;
724 while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) {
725 p1 = p2 + 1;
726 n++;
727 }
728 size = n + MBSTRG(default_detect_order_list_size);
729 /* make list */
730 list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
731 if (list != NULL) {
732 entry = list;
733 n = 0;
734 bauto = 0;
735 p1 = tmpstr;
736 do {
737 p2 = p = php_memnstr(p1, ",", 1, endp);
738 if (p == NULL) {
739 p = endp;
740 }
741 *p = '\0';
742 /* trim spaces */
743 while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
744 p1++;
745 }
746 p--;
747 while (p > p1 && (*p == ' ' || *p == '\t')) {
748 *p = '\0';
749 p--;
750 }
751 /* convert to the encoding number and check encoding */
752 if (strcasecmp(p1, "auto") == 0) {
753 if (!bauto) {
754 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
755 const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
756 size_t i;
757 bauto = 1;
758 for (i = 0; i < identify_list_size; i++) {
759 *entry++ = mbfl_no2encoding(*src++);
760 n++;
761 }
762 }
763 } else {
764 const mbfl_encoding *encoding = mbfl_name2encoding(p1);
765 if (encoding) {
766 *entry++ = encoding;
767 n++;
768 } else {
769 ret = 0;
770 }
771 }
772 p1 = p2 + 1;
773 } while (n < size && p2 != NULL);
774 if (n > 0) {
775 if (return_list) {
776 *return_list = list;
777 } else {
778 pefree(list, persistent);
779 }
780 } else {
781 pefree(list, persistent);
782 if (return_list) {
783 *return_list = NULL;
784 }
785 ret = 0;
786 }
787 if (return_size) {
788 *return_size = n;
789 }
790 } else {
791 if (return_list) {
792 *return_list = NULL;
793 }
794 if (return_size) {
795 *return_size = 0;
796 }
797 ret = 0;
798 }
799 efree(tmpstr);
800 }
801
802 return ret;
803 }
804 /* }}} */
805
806 /* {{{ static int php_mb_parse_encoding_array()
807 * Return 0 if input contains any illegal encoding, otherwise 1.
808 * Even if any illegal encoding is detected the result may contain a list
809 * of parsed encodings.
810 */
811 static int
php_mb_parse_encoding_array(zval * array,const mbfl_encoding *** return_list,size_t * return_size,int persistent TSRMLS_DC)812 php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
813 {
814 zval **hash_entry;
815 HashTable *target_hash;
816 int i, n, size, bauto, ret = SUCCESS;
817 const mbfl_encoding **list, **entry;
818
819 list = NULL;
820 if (Z_TYPE_P(array) == IS_ARRAY) {
821 target_hash = Z_ARRVAL_P(array);
822 zend_hash_internal_pointer_reset(target_hash);
823 i = zend_hash_num_elements(target_hash);
824 size = i + MBSTRG(default_detect_order_list_size);
825 list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
826 if (list != NULL) {
827 entry = list;
828 bauto = 0;
829 n = 0;
830 while (i > 0) {
831 if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
832 break;
833 }
834 convert_to_string_ex(hash_entry);
835 if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
836 if (!bauto) {
837 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
838 const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
839 size_t j;
840
841 bauto = 1;
842 for (j = 0; j < identify_list_size; j++) {
843 *entry++ = mbfl_no2encoding(*src++);
844 n++;
845 }
846 }
847 } else {
848 const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_PP(hash_entry));
849 if (encoding) {
850 *entry++ = encoding;
851 n++;
852 } else {
853 ret = FAILURE;
854 }
855 }
856 zend_hash_move_forward(target_hash);
857 i--;
858 }
859 if (n > 0) {
860 if (return_list) {
861 *return_list = list;
862 } else {
863 pefree(list, persistent);
864 }
865 } else {
866 pefree(list, persistent);
867 if (return_list) {
868 *return_list = NULL;
869 }
870 ret = FAILURE;
871 }
872 if (return_size) {
873 *return_size = n;
874 }
875 } else {
876 if (return_list) {
877 *return_list = NULL;
878 }
879 if (return_size) {
880 *return_size = 0;
881 }
882 ret = FAILURE;
883 }
884 }
885
886 return ret;
887 }
888 /* }}} */
889
890 /* {{{ zend_multibyte interface */
php_mb_zend_encoding_fetcher(const char * encoding_name TSRMLS_DC)891 static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name TSRMLS_DC)
892 {
893 return (const zend_encoding*)mbfl_name2encoding(encoding_name);
894 }
895
php_mb_zend_encoding_name_getter(const zend_encoding * encoding)896 static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
897 {
898 return ((const mbfl_encoding *)encoding)->name;
899 }
900
php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding * _encoding)901 static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
902 {
903 const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
904 if (encoding->flag & MBFL_ENCTYPE_SBCS) {
905 return 1;
906 }
907 if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
908 return 1;
909 }
910 return 0;
911 }
912
php_mb_zend_encoding_detector(const unsigned char * arg_string,size_t arg_length,const zend_encoding ** list,size_t list_size TSRMLS_DC)913 static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size TSRMLS_DC)
914 {
915 mbfl_string string;
916
917 if (!list) {
918 list = (const zend_encoding **)MBSTRG(current_detect_order_list);
919 list_size = MBSTRG(current_detect_order_list_size);
920 }
921
922 mbfl_string_init(&string);
923 string.no_language = MBSTRG(language);
924 string.val = (unsigned char *)arg_string;
925 string.len = arg_length;
926 return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0);
927 }
928
php_mb_zend_encoding_converter(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length,const zend_encoding * encoding_to,const zend_encoding * encoding_from TSRMLS_DC)929 static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC)
930 {
931 mbfl_string string, result;
932 mbfl_buffer_converter *convd;
933 int status, loc;
934
935 /* new encoding */
936 /* initialize string */
937 mbfl_string_init(&string);
938 mbfl_string_init(&result);
939 string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding;
940 string.no_language = MBSTRG(language);
941 string.val = (unsigned char*)from;
942 string.len = from_length;
943
944 /* initialize converter */
945 convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
946 if (convd == NULL) {
947 return -1;
948 }
949 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
950 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
951
952 /* do it */
953 status = mbfl_buffer_converter_feed2(convd, &string, &loc);
954 if (status) {
955 mbfl_buffer_converter_delete(convd);
956 return (size_t)-1;
957 }
958
959 mbfl_buffer_converter_flush(convd);
960 if (!mbfl_buffer_converter_result(convd, &result)) {
961 mbfl_buffer_converter_delete(convd);
962 return (size_t)-1;
963 }
964
965 *to = result.val;
966 *to_length = result.len;
967
968 mbfl_buffer_converter_delete(convd);
969
970 return loc;
971 }
972
php_mb_zend_encoding_list_parser(const char * encoding_list,size_t encoding_list_len,const zend_encoding *** return_list,size_t * return_size,int persistent TSRMLS_DC)973 static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
974 {
975 return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent TSRMLS_CC);
976 }
977
php_mb_zend_internal_encoding_getter(TSRMLS_D)978 static const zend_encoding *php_mb_zend_internal_encoding_getter(TSRMLS_D)
979 {
980 return (const zend_encoding *)MBSTRG(internal_encoding);
981 }
982
php_mb_zend_internal_encoding_setter(const zend_encoding * encoding TSRMLS_DC)983 static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding TSRMLS_DC)
984 {
985 MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
986 return SUCCESS;
987 }
988
989 static zend_multibyte_functions php_mb_zend_multibyte_functions = {
990 "mbstring",
991 php_mb_zend_encoding_fetcher,
992 php_mb_zend_encoding_name_getter,
993 php_mb_zend_encoding_lexer_compatibility_checker,
994 php_mb_zend_encoding_detector,
995 php_mb_zend_encoding_converter,
996 php_mb_zend_encoding_list_parser,
997 php_mb_zend_internal_encoding_getter,
998 php_mb_zend_internal_encoding_setter
999 };
1000 /* }}} */
1001
1002 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC);
1003 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
1004 static void _php_mb_free_regex(void *opaque);
1005
1006 #if HAVE_ONIG
1007 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern TSRMLS_DC)1008 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
1009 {
1010 php_mb_regex_t *retval;
1011 OnigErrorInfo err_info;
1012 int err_code;
1013
1014 if ((err_code = onig_new(&retval,
1015 (const OnigUChar *)pattern,
1016 (const OnigUChar *)pattern + strlen(pattern),
1017 ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
1018 ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
1019 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1020 onig_error_code_to_str(err_str, err_code, err_info);
1021 php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", pattern, err_str);
1022 retval = NULL;
1023 }
1024 return retval;
1025 }
1026 /* }}} */
1027
1028 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1029 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1030 {
1031 return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
1032 (const OnigUChar*)str + str_len, (const OnigUChar *)str,
1033 (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
1034 }
1035 /* }}} */
1036
1037 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1038 static void _php_mb_free_regex(void *opaque)
1039 {
1040 onig_free((php_mb_regex_t *)opaque);
1041 }
1042 /* }}} */
1043 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
1044 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern TSRMLS_DC)1045 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
1046 {
1047 pcre *retval;
1048 const char *err_str;
1049 int err_offset;
1050
1051 if (!(retval = pcre_compile(pattern,
1052 PCRE_CASELESS, &err_str, &err_offset, NULL))) {
1053 php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
1054 }
1055 return retval;
1056 }
1057 /* }}} */
1058
1059 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1060 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1061 {
1062 return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
1063 0, NULL, 0) >= 0;
1064 }
1065 /* }}} */
1066
1067 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1068 static void _php_mb_free_regex(void *opaque)
1069 {
1070 pcre_free(opaque);
1071 }
1072 /* }}} */
1073 #endif
1074
1075 /* {{{ php_mb_nls_get_default_detect_order_list */
php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,enum mbfl_no_encoding ** plist,size_t * plist_size)1076 static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
1077 {
1078 size_t i;
1079
1080 *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1081 *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1082
1083 for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
1084 if (php_mb_default_identify_list[i].lang == lang) {
1085 *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
1086 *plist_size = php_mb_default_identify_list[i].list_size;
1087 return 1;
1088 }
1089 }
1090 return 0;
1091 }
1092 /* }}} */
1093
php_mb_rfc1867_substring_conf(const zend_encoding * encoding,char * start,int len,char quote TSRMLS_DC)1094 static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, int len, char quote TSRMLS_DC)
1095 {
1096 char *result = emalloc(len + 2);
1097 char *resp = result;
1098 int i;
1099
1100 for (i = 0; i < len && start[i] != quote; ++i) {
1101 if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
1102 *resp++ = start[++i];
1103 } else {
1104 size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
1105
1106 while (j-- > 0 && i < len) {
1107 *resp++ = start[i++];
1108 }
1109 --i;
1110 }
1111 }
1112
1113 *resp = '\0';
1114 return result;
1115 }
1116
php_mb_rfc1867_getword(const zend_encoding * encoding,char ** line,char stop TSRMLS_DC)1117 static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop TSRMLS_DC) /* {{{ */
1118 {
1119 char *pos = *line, quote;
1120 char *res;
1121
1122 while (*pos && *pos != stop) {
1123 if ((quote = *pos) == '"' || quote == '\'') {
1124 ++pos;
1125 while (*pos && *pos != quote) {
1126 if (*pos == '\\' && pos[1] && pos[1] == quote) {
1127 pos += 2;
1128 } else {
1129 ++pos;
1130 }
1131 }
1132 if (*pos) {
1133 ++pos;
1134 }
1135 } else {
1136 pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1137
1138 }
1139 }
1140 if (*pos == '\0') {
1141 res = estrdup(*line);
1142 *line += strlen(*line);
1143 return res;
1144 }
1145
1146 res = estrndup(*line, pos - *line);
1147
1148 while (*pos == stop) {
1149 pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1150 }
1151
1152 *line = pos;
1153 return res;
1154 }
1155 /* }}} */
1156
php_mb_rfc1867_getword_conf(const zend_encoding * encoding,char * str TSRMLS_DC)1157 static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str TSRMLS_DC) /* {{{ */
1158 {
1159 while (*str && isspace(*(unsigned char *)str)) {
1160 ++str;
1161 }
1162
1163 if (!*str) {
1164 return estrdup("");
1165 }
1166
1167 if (*str == '"' || *str == '\'') {
1168 char quote = *str;
1169
1170 str++;
1171 return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote TSRMLS_CC);
1172 } else {
1173 char *strend = str;
1174
1175 while (*strend && !isspace(*(unsigned char *)strend)) {
1176 ++strend;
1177 }
1178 return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0 TSRMLS_CC);
1179 }
1180 }
1181 /* }}} */
1182
php_mb_rfc1867_basename(const zend_encoding * encoding,char * filename TSRMLS_DC)1183 static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename TSRMLS_DC) /* {{{ */
1184 {
1185 char *s, *s2;
1186 const size_t filename_len = strlen(filename);
1187
1188 /* The \ check should technically be needed for win32 systems only where
1189 * it is a valid path separator. However, IE in all it's wisdom always sends
1190 * the full path of the file on the user's filesystem, which means that unless
1191 * the user does basename() they get a bogus file name. Until IE's user base drops
1192 * to nill or problem is fixed this code must remain enabled for all systems. */
1193 s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
1194 s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
1195
1196 if (s && s2) {
1197 if (s > s2) {
1198 return ++s;
1199 } else {
1200 return ++s2;
1201 }
1202 } else if (s) {
1203 return ++s;
1204 } else if (s2) {
1205 return ++s2;
1206 } else {
1207 return filename;
1208 }
1209 }
1210 /* }}} */
1211
1212 /* {{{ php.ini directive handler */
1213 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
PHP_INI_MH(OnUpdate_mbstring_language)1214 static PHP_INI_MH(OnUpdate_mbstring_language)
1215 {
1216 enum mbfl_no_language no_language;
1217
1218 no_language = mbfl_name2no_language(new_value);
1219 if (no_language == mbfl_no_language_invalid) {
1220 MBSTRG(language) = mbfl_no_language_neutral;
1221 return FAILURE;
1222 }
1223 MBSTRG(language) = no_language;
1224 php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
1225 return SUCCESS;
1226 }
1227 /* }}} */
1228
1229 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
PHP_INI_MH(OnUpdate_mbstring_detect_order)1230 static PHP_INI_MH(OnUpdate_mbstring_detect_order)
1231 {
1232 const mbfl_encoding **list;
1233 size_t size;
1234
1235 if (!new_value) {
1236 if (MBSTRG(detect_order_list)) {
1237 pefree(MBSTRG(detect_order_list), 1);
1238 }
1239 MBSTRG(detect_order_list) = NULL;
1240 MBSTRG(detect_order_list_size) = 0;
1241 return SUCCESS;
1242 }
1243
1244 if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1245 return FAILURE;
1246 }
1247
1248 if (MBSTRG(detect_order_list)) {
1249 pefree(MBSTRG(detect_order_list), 1);
1250 }
1251 MBSTRG(detect_order_list) = list;
1252 MBSTRG(detect_order_list_size) = size;
1253 return SUCCESS;
1254 }
1255 /* }}} */
1256
1257 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
PHP_INI_MH(OnUpdate_mbstring_http_input)1258 static PHP_INI_MH(OnUpdate_mbstring_http_input)
1259 {
1260 const mbfl_encoding **list;
1261 size_t size;
1262
1263 if (!new_value || !new_value_length) {
1264 if (MBSTRG(http_input_list)) {
1265 pefree(MBSTRG(http_input_list), 1);
1266 }
1267 if (SUCCESS == php_mb_parse_encoding_list(get_input_encoding(TSRMLS_C), strlen(get_input_encoding(TSRMLS_C))+1, &list, &size, 1 TSRMLS_CC)) {
1268 MBSTRG(http_input_list) = list;
1269 MBSTRG(http_input_list_size) = size;
1270 return SUCCESS;
1271 }
1272 MBSTRG(http_input_list) = NULL;
1273 MBSTRG(http_input_list_size) = 0;
1274 return SUCCESS;
1275 }
1276
1277 if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1278 return FAILURE;
1279 }
1280
1281 if (MBSTRG(http_input_list)) {
1282 pefree(MBSTRG(http_input_list), 1);
1283 }
1284 MBSTRG(http_input_list) = list;
1285 MBSTRG(http_input_list_size) = size;
1286
1287 if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1288 php_error_docref("ref.mbstring" TSRMLS_CC, E_DEPRECATED, "Use of mbstring.http_input is deprecated");
1289 }
1290
1291 return SUCCESS;
1292 }
1293 /* }}} */
1294
1295 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
PHP_INI_MH(OnUpdate_mbstring_http_output)1296 static PHP_INI_MH(OnUpdate_mbstring_http_output)
1297 {
1298 const mbfl_encoding *encoding;
1299
1300 if (new_value == NULL || new_value_length == 0) {
1301 encoding = mbfl_name2encoding(get_output_encoding(TSRMLS_C));
1302 if (!encoding) {
1303 MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1304 MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1305 return SUCCESS;
1306 }
1307 } else {
1308 encoding = mbfl_name2encoding(new_value);
1309 if (!encoding) {
1310 MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1311 MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1312 return FAILURE;
1313 }
1314 }
1315 MBSTRG(http_output_encoding) = encoding;
1316 MBSTRG(current_http_output_encoding) = encoding;
1317
1318 if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1319 php_error_docref("ref.mbstring" TSRMLS_CC, E_DEPRECATED, "Use of mbstring.http_output is deprecated");
1320 }
1321
1322 return SUCCESS;
1323 }
1324 /* }}} */
1325
1326 /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
_php_mb_ini_mbstring_internal_encoding_set(const char * new_value,uint new_value_length TSRMLS_DC)1327 int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
1328 {
1329 const mbfl_encoding *encoding;
1330
1331 if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) {
1332 /* falls back to UTF-8 if an unknown encoding name is given */
1333 encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
1334 }
1335 MBSTRG(internal_encoding) = encoding;
1336 MBSTRG(current_internal_encoding) = encoding;
1337 #if HAVE_MBREGEX
1338 {
1339 const char *enc_name = new_value;
1340 if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
1341 /* falls back to UTF-8 if an unknown encoding name is given */
1342 enc_name = "UTF-8";
1343 php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
1344 }
1345 php_mb_regex_set_mbctype(new_value TSRMLS_CC);
1346 }
1347 #endif
1348 return SUCCESS;
1349 }
1350 /* }}} */
1351
1352 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
PHP_INI_MH(OnUpdate_mbstring_internal_encoding)1353 static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1354 {
1355 if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1356 php_error_docref("ref.mbstring" TSRMLS_CC, E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
1357 }
1358
1359 if (OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC) == FAILURE) {
1360 return FAILURE;
1361 }
1362
1363 if (stage & (PHP_INI_STAGE_STARTUP | PHP_INI_STAGE_SHUTDOWN | PHP_INI_STAGE_RUNTIME)) {
1364 if (new_value && new_value_length) {
1365 return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
1366 } else {
1367 return _php_mb_ini_mbstring_internal_encoding_set(get_internal_encoding(TSRMLS_C), strlen(get_internal_encoding(TSRMLS_C))+1 TSRMLS_CC);
1368 }
1369 } else {
1370 /* the corresponding mbstring globals needs to be set according to the
1371 * ini value in the later stage because it never falls back to the
1372 * default value if 1. no value for mbstring.internal_encoding is given,
1373 * 2. mbstring.language directive is processed in per-dir or runtime
1374 * context and 3. call to the handler for mbstring.language is done
1375 * after mbstring.internal_encoding is handled. */
1376 return SUCCESS;
1377 }
1378 }
1379 /* }}} */
1380
1381 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
PHP_INI_MH(OnUpdate_mbstring_substitute_character)1382 static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1383 {
1384 int c;
1385 char *endptr = NULL;
1386
1387 if (new_value != NULL) {
1388 if (strcasecmp("none", new_value) == 0) {
1389 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1390 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1391 } else if (strcasecmp("long", new_value) == 0) {
1392 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1393 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1394 } else if (strcasecmp("entity", new_value) == 0) {
1395 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1396 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1397 } else {
1398 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1399 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1400 if (new_value_length >0) {
1401 c = strtol(new_value, &endptr, 0);
1402 if (*endptr == '\0') {
1403 MBSTRG(filter_illegal_substchar) = c;
1404 MBSTRG(current_filter_illegal_substchar) = c;
1405 }
1406 }
1407 }
1408 } else {
1409 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1410 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1411 MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
1412 MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
1413 }
1414
1415 return SUCCESS;
1416 }
1417 /* }}} */
1418
1419 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
PHP_INI_MH(OnUpdate_mbstring_encoding_translation)1420 static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1421 {
1422 if (new_value == NULL) {
1423 return FAILURE;
1424 }
1425
1426 OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
1427
1428 if (MBSTRG(encoding_translation)) {
1429 sapi_unregister_post_entry(php_post_entries TSRMLS_CC);
1430 sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1431 } else {
1432 sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC);
1433 sapi_register_post_entries(php_post_entries TSRMLS_CC);
1434 }
1435
1436 return SUCCESS;
1437 }
1438 /* }}} */
1439
1440 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)1441 static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1442 {
1443 zval tmp;
1444 void *re = NULL;
1445
1446 if (!new_value) {
1447 new_value = entry->orig_value;
1448 new_value_length = entry->orig_value_length;
1449 }
1450 php_trim(new_value, new_value_length, NULL, 0, &tmp, 3 TSRMLS_CC);
1451
1452 if (Z_STRLEN(tmp) > 0) {
1453 if (!(re = _php_mb_compile_regex(Z_STRVAL(tmp) TSRMLS_CC))) {
1454 zval_dtor(&tmp);
1455 return FAILURE;
1456 }
1457 }
1458
1459 if (MBSTRG(http_output_conv_mimetypes)) {
1460 _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1461 }
1462
1463 MBSTRG(http_output_conv_mimetypes) = re;
1464
1465 zval_dtor(&tmp);
1466 return SUCCESS;
1467 }
1468 /* }}} */
1469 /* }}} */
1470
1471 /* {{{ php.ini directive registration */
1472 PHP_INI_BEGIN()
1473 PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1474 PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1475 PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
1476 PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
1477 STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
1478 PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1479 STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1480 PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1481
1482 STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1483 PHP_INI_SYSTEM | PHP_INI_PERDIR,
1484 OnUpdate_mbstring_encoding_translation,
1485 encoding_translation, zend_mbstring_globals, mbstring_globals)
1486 PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1487 "^(text/|application/xhtml\\+xml)",
1488 PHP_INI_ALL,
1489 OnUpdate_mbstring_http_output_conv_mimetypes)
1490
1491 STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1492 PHP_INI_ALL,
1493 OnUpdateLong,
1494 strict_detection, zend_mbstring_globals, mbstring_globals)
PHP_INI_END()1495 PHP_INI_END()
1496 /* }}} */
1497
1498 /* {{{ module global initialize handler */
1499 static PHP_GINIT_FUNCTION(mbstring)
1500 {
1501 mbstring_globals->language = mbfl_no_language_uni;
1502 mbstring_globals->internal_encoding = NULL;
1503 mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1504 mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
1505 mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
1506 mbstring_globals->http_input_identify = NULL;
1507 mbstring_globals->http_input_identify_get = NULL;
1508 mbstring_globals->http_input_identify_post = NULL;
1509 mbstring_globals->http_input_identify_cookie = NULL;
1510 mbstring_globals->http_input_identify_string = NULL;
1511 mbstring_globals->http_input_list = NULL;
1512 mbstring_globals->http_input_list_size = 0;
1513 mbstring_globals->detect_order_list = NULL;
1514 mbstring_globals->detect_order_list_size = 0;
1515 mbstring_globals->current_detect_order_list = NULL;
1516 mbstring_globals->current_detect_order_list_size = 0;
1517 mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1518 mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1519 mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1520 mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */
1521 mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1522 mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */
1523 mbstring_globals->illegalchars = 0;
1524 mbstring_globals->func_overload = 0;
1525 mbstring_globals->encoding_translation = 0;
1526 mbstring_globals->strict_detection = 0;
1527 mbstring_globals->outconv = NULL;
1528 mbstring_globals->http_output_conv_mimetypes = NULL;
1529 #if HAVE_MBREGEX
1530 mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(TSRMLS_C);
1531 #endif
1532 }
1533 /* }}} */
1534
1535 /* {{{ PHP_GSHUTDOWN_FUNCTION */
PHP_GSHUTDOWN_FUNCTION(mbstring)1536 static PHP_GSHUTDOWN_FUNCTION(mbstring)
1537 {
1538 if (mbstring_globals->http_input_list) {
1539 free(mbstring_globals->http_input_list);
1540 }
1541 if (mbstring_globals->detect_order_list) {
1542 free(mbstring_globals->detect_order_list);
1543 }
1544 if (mbstring_globals->http_output_conv_mimetypes) {
1545 _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1546 }
1547 #if HAVE_MBREGEX
1548 php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC);
1549 #endif
1550 }
1551 /* }}} */
1552
1553 /* {{{ PHP_MINIT_FUNCTION(mbstring) */
PHP_MINIT_FUNCTION(mbstring)1554 PHP_MINIT_FUNCTION(mbstring)
1555 {
1556 __mbfl_allocators = &_php_mb_allocators;
1557
1558 REGISTER_INI_ENTRIES();
1559
1560 /* This is a global handler. Should not be set in a per-request handler. */
1561 sapi_register_treat_data(mbstr_treat_data TSRMLS_CC);
1562
1563 /* Post handlers are stored in the thread-local context. */
1564 if (MBSTRG(encoding_translation)) {
1565 sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1566 }
1567
1568 REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1569 REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1570 REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1571
1572 REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1573 REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1574 REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1575
1576 #if HAVE_MBREGEX
1577 PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1578 #endif
1579
1580 if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions TSRMLS_CC)) {
1581 return FAILURE;
1582 }
1583
1584 php_rfc1867_set_multibyte_callbacks(
1585 php_mb_encoding_translation,
1586 php_mb_gpc_get_detect_order,
1587 php_mb_gpc_set_input_encoding,
1588 php_mb_rfc1867_getword,
1589 php_mb_rfc1867_getword_conf,
1590 php_mb_rfc1867_basename);
1591
1592 return SUCCESS;
1593 }
1594 /* }}} */
1595
1596 /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
PHP_MSHUTDOWN_FUNCTION(mbstring)1597 PHP_MSHUTDOWN_FUNCTION(mbstring)
1598 {
1599 UNREGISTER_INI_ENTRIES();
1600
1601 #if HAVE_MBREGEX
1602 PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1603 #endif
1604
1605 return SUCCESS;
1606 }
1607 /* }}} */
1608
1609 /* {{{ PHP_RINIT_FUNCTION(mbstring) */
PHP_RINIT_FUNCTION(mbstring)1610 PHP_RINIT_FUNCTION(mbstring)
1611 {
1612 zend_function *func, *orig;
1613 const struct mb_overload_def *p;
1614
1615 MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1616 MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1617 MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1618 MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1619
1620 MBSTRG(illegalchars) = 0;
1621
1622 php_mb_populate_current_detect_order_list(TSRMLS_C);
1623
1624 /* override original function. */
1625 if (MBSTRG(func_overload)){
1626 p = &(mb_ovld[0]);
1627
1628 while (p->type > 0) {
1629 if ((MBSTRG(func_overload) & p->type) == p->type &&
1630 zend_hash_find(EG(function_table), p->save_func,
1631 strlen(p->save_func)+1, (void **)&orig) != SUCCESS) {
1632
1633 zend_hash_find(EG(function_table), p->ovld_func, strlen(p->ovld_func)+1 , (void **)&func);
1634
1635 if (zend_hash_find(EG(function_table), p->orig_func, strlen(p->orig_func)+1, (void **)&orig) != SUCCESS) {
1636 php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1637 return FAILURE;
1638 } else {
1639 zend_hash_add(EG(function_table), p->save_func, strlen(p->save_func)+1, orig, sizeof(zend_function), NULL);
1640
1641 if (zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, func, sizeof(zend_function),
1642 NULL) == FAILURE) {
1643 php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
1644 return FAILURE;
1645 }
1646 }
1647 }
1648 p++;
1649 }
1650 }
1651 #if HAVE_MBREGEX
1652 PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1653 #endif
1654 zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding) TSRMLS_CC);
1655
1656 return SUCCESS;
1657 }
1658 /* }}} */
1659
1660 /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
PHP_RSHUTDOWN_FUNCTION(mbstring)1661 PHP_RSHUTDOWN_FUNCTION(mbstring)
1662 {
1663 const struct mb_overload_def *p;
1664 zend_function *orig;
1665
1666 if (MBSTRG(current_detect_order_list) != NULL) {
1667 efree(MBSTRG(current_detect_order_list));
1668 MBSTRG(current_detect_order_list) = NULL;
1669 MBSTRG(current_detect_order_list_size) = 0;
1670 }
1671 if (MBSTRG(outconv) != NULL) {
1672 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1673 mbfl_buffer_converter_delete(MBSTRG(outconv));
1674 MBSTRG(outconv) = NULL;
1675 }
1676
1677 /* clear http input identification. */
1678 MBSTRG(http_input_identify) = NULL;
1679 MBSTRG(http_input_identify_post) = NULL;
1680 MBSTRG(http_input_identify_get) = NULL;
1681 MBSTRG(http_input_identify_cookie) = NULL;
1682 MBSTRG(http_input_identify_string) = NULL;
1683
1684 /* clear overloaded function. */
1685 if (MBSTRG(func_overload)){
1686 p = &(mb_ovld[0]);
1687 while (p->type > 0) {
1688 if ((MBSTRG(func_overload) & p->type) == p->type &&
1689 zend_hash_find(EG(function_table), p->save_func,
1690 strlen(p->save_func)+1, (void **)&orig) == SUCCESS) {
1691
1692 zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, orig, sizeof(zend_function), NULL);
1693 zend_hash_del(EG(function_table), p->save_func, strlen(p->save_func)+1);
1694 }
1695 p++;
1696 }
1697 }
1698
1699 #if HAVE_MBREGEX
1700 PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1701 #endif
1702
1703 return SUCCESS;
1704 }
1705 /* }}} */
1706
1707 /* {{{ PHP_MINFO_FUNCTION(mbstring) */
PHP_MINFO_FUNCTION(mbstring)1708 PHP_MINFO_FUNCTION(mbstring)
1709 {
1710 php_info_print_table_start();
1711 php_info_print_table_row(2, "Multibyte Support", "enabled");
1712 php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1713 php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1714 {
1715 char tmp[256];
1716 snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1717 php_info_print_table_row(2, "libmbfl version", tmp);
1718 }
1719 php_info_print_table_end();
1720
1721 php_info_print_table_start();
1722 php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1723 php_info_print_table_end();
1724
1725 #if HAVE_MBREGEX
1726 PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1727 #endif
1728
1729 DISPLAY_INI_ENTRIES();
1730 }
1731 /* }}} */
1732
1733 /* {{{ proto string mb_language([string language])
1734 Sets the current language or Returns the current language as a string */
PHP_FUNCTION(mb_language)1735 PHP_FUNCTION(mb_language)
1736 {
1737 char *name = NULL;
1738 int name_len = 0;
1739
1740 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1741 return;
1742 }
1743 if (name == NULL) {
1744 RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)), 1);
1745 } else {
1746 if (FAILURE == zend_alter_ini_entry(
1747 "mbstring.language", sizeof("mbstring.language"),
1748 name, name_len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1749 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name);
1750 RETVAL_FALSE;
1751 } else {
1752 RETVAL_TRUE;
1753 }
1754 }
1755 }
1756 /* }}} */
1757
1758 /* {{{ proto string mb_internal_encoding([string encoding])
1759 Sets the current internal encoding or Returns the current internal encoding as a string */
PHP_FUNCTION(mb_internal_encoding)1760 PHP_FUNCTION(mb_internal_encoding)
1761 {
1762 const char *name = NULL;
1763 int name_len;
1764 const mbfl_encoding *encoding;
1765
1766 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1767 RETURN_FALSE;
1768 }
1769 if (name == NULL) {
1770 name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
1771 if (name != NULL) {
1772 RETURN_STRING(name, 1);
1773 } else {
1774 RETURN_FALSE;
1775 }
1776 } else {
1777 encoding = mbfl_name2encoding(name);
1778 if (!encoding) {
1779 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1780 RETURN_FALSE;
1781 } else {
1782 MBSTRG(current_internal_encoding) = encoding;
1783 RETURN_TRUE;
1784 }
1785 }
1786 }
1787 /* }}} */
1788
1789 /* {{{ proto mixed mb_http_input([string type])
1790 Returns the input encoding */
PHP_FUNCTION(mb_http_input)1791 PHP_FUNCTION(mb_http_input)
1792 {
1793 char *typ = NULL;
1794 int typ_len;
1795 int retname;
1796 char *list, *temp;
1797 const mbfl_encoding *result = NULL;
1798
1799 retname = 1;
1800 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
1801 RETURN_FALSE;
1802 }
1803 if (typ == NULL) {
1804 result = MBSTRG(http_input_identify);
1805 } else {
1806 switch (*typ) {
1807 case 'G':
1808 case 'g':
1809 result = MBSTRG(http_input_identify_get);
1810 break;
1811 case 'P':
1812 case 'p':
1813 result = MBSTRG(http_input_identify_post);
1814 break;
1815 case 'C':
1816 case 'c':
1817 result = MBSTRG(http_input_identify_cookie);
1818 break;
1819 case 'S':
1820 case 's':
1821 result = MBSTRG(http_input_identify_string);
1822 break;
1823 case 'I':
1824 case 'i':
1825 {
1826 const mbfl_encoding **entry = MBSTRG(http_input_list);
1827 const size_t n = MBSTRG(http_input_list_size);
1828 size_t i;
1829 array_init(return_value);
1830 for (i = 0; i < n; i++) {
1831 add_next_index_string(return_value, (*entry)->name, 1);
1832 entry++;
1833 }
1834 retname = 0;
1835 }
1836 break;
1837 case 'L':
1838 case 'l':
1839 {
1840 const mbfl_encoding **entry = MBSTRG(http_input_list);
1841 const size_t n = MBSTRG(http_input_list_size);
1842 size_t i;
1843 list = NULL;
1844 for (i = 0; i < n; i++) {
1845 if (list) {
1846 temp = list;
1847 spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
1848 efree(temp);
1849 if (!list) {
1850 break;
1851 }
1852 } else {
1853 list = estrdup((*entry)->name);
1854 }
1855 entry++;
1856 }
1857 }
1858 if (!list) {
1859 RETURN_FALSE;
1860 }
1861 RETVAL_STRING(list, 0);
1862 retname = 0;
1863 break;
1864 default:
1865 result = MBSTRG(http_input_identify);
1866 break;
1867 }
1868 }
1869
1870 if (retname) {
1871 if (result) {
1872 RETVAL_STRING(result->name, 1);
1873 } else {
1874 RETVAL_FALSE;
1875 }
1876 }
1877 }
1878 /* }}} */
1879
1880 /* {{{ proto string mb_http_output([string encoding])
1881 Sets the current output_encoding or returns the current output_encoding as a string */
PHP_FUNCTION(mb_http_output)1882 PHP_FUNCTION(mb_http_output)
1883 {
1884 const char *name = NULL;
1885 int name_len;
1886 const mbfl_encoding *encoding;
1887
1888 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) {
1889 RETURN_FALSE;
1890 }
1891
1892 if (name == NULL) {
1893 name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
1894 if (name != NULL) {
1895 RETURN_STRING(name, 1);
1896 } else {
1897 RETURN_FALSE;
1898 }
1899 } else {
1900 encoding = mbfl_name2encoding(name);
1901 if (!encoding) {
1902 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1903 RETURN_FALSE;
1904 } else {
1905 MBSTRG(current_http_output_encoding) = encoding;
1906 RETURN_TRUE;
1907 }
1908 }
1909 }
1910 /* }}} */
1911
1912 /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1913 Sets the current detect_order or Return the current detect_order as a array */
PHP_FUNCTION(mb_detect_order)1914 PHP_FUNCTION(mb_detect_order)
1915 {
1916 zval **arg1 = NULL;
1917
1918 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1919 return;
1920 }
1921
1922 if (!arg1) {
1923 size_t i;
1924 size_t n = MBSTRG(current_detect_order_list_size);
1925 const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1926 array_init(return_value);
1927 for (i = 0; i < n; i++) {
1928 add_next_index_string(return_value, (*entry)->name, 1);
1929 entry++;
1930 }
1931 } else {
1932 const mbfl_encoding **list = NULL;
1933 size_t size = 0;
1934 switch (Z_TYPE_PP(arg1)) {
1935 case IS_ARRAY:
1936 if (FAILURE == php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
1937 if (list) {
1938 efree(list);
1939 }
1940 RETURN_FALSE;
1941 }
1942 break;
1943 default:
1944 convert_to_string_ex(arg1);
1945 if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
1946 if (list) {
1947 efree(list);
1948 }
1949 RETURN_FALSE;
1950 }
1951 break;
1952 }
1953
1954 if (list == NULL) {
1955 RETURN_FALSE;
1956 }
1957
1958 if (MBSTRG(current_detect_order_list)) {
1959 efree(MBSTRG(current_detect_order_list));
1960 }
1961 MBSTRG(current_detect_order_list) = list;
1962 MBSTRG(current_detect_order_list_size) = size;
1963 RETURN_TRUE;
1964 }
1965 }
1966 /* }}} */
1967
1968 /* {{{ proto mixed mb_substitute_character([mixed substchar])
1969 Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)1970 PHP_FUNCTION(mb_substitute_character)
1971 {
1972 zval **arg1 = NULL;
1973
1974 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1975 return;
1976 }
1977
1978 if (!arg1) {
1979 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
1980 RETURN_STRING("none", 1);
1981 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1982 RETURN_STRING("long", 1);
1983 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1984 RETURN_STRING("entity", 1);
1985 } else {
1986 RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1987 }
1988 } else {
1989 RETVAL_TRUE;
1990
1991 switch (Z_TYPE_PP(arg1)) {
1992 case IS_STRING:
1993 if (strncasecmp("none", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1994 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1995 } else if (strncasecmp("long", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1996 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1997 } else if (strncasecmp("entity", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1998 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1999 } else {
2000 convert_to_long_ex(arg1);
2001
2002 if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
2003 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2004 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
2005 } else {
2006 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
2007 RETURN_FALSE;
2008 }
2009 }
2010 break;
2011 default:
2012 convert_to_long_ex(arg1);
2013 if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
2014 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2015 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
2016 } else {
2017 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
2018 RETURN_FALSE;
2019 }
2020 break;
2021 }
2022 }
2023 }
2024 /* }}} */
2025
2026 /* {{{ proto string mb_preferred_mime_name(string encoding)
2027 Return the preferred MIME name (charset) as a string */
PHP_FUNCTION(mb_preferred_mime_name)2028 PHP_FUNCTION(mb_preferred_mime_name)
2029 {
2030 enum mbfl_no_encoding no_encoding;
2031 char *name = NULL;
2032 int name_len;
2033
2034 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
2035 return;
2036 } else {
2037 no_encoding = mbfl_name2no_encoding(name);
2038 if (no_encoding == mbfl_no_encoding_invalid) {
2039 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
2040 RETVAL_FALSE;
2041 } else {
2042 const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
2043 if (preferred_name == NULL || *preferred_name == '\0') {
2044 php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
2045 RETVAL_FALSE;
2046 } else {
2047 RETVAL_STRING((char *)preferred_name, 1);
2048 }
2049 }
2050 }
2051 }
2052 /* }}} */
2053
2054 #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
2055 #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
2056
2057 /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
2058 Parses GET/POST/COOKIE data and sets global variables */
PHP_FUNCTION(mb_parse_str)2059 PHP_FUNCTION(mb_parse_str)
2060 {
2061 zval *track_vars_array = NULL;
2062 char *encstr = NULL;
2063 int encstr_len;
2064 php_mb_encoding_handler_info_t info;
2065 const mbfl_encoding *detected;
2066
2067 track_vars_array = NULL;
2068 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
2069 return;
2070 }
2071
2072 if (track_vars_array != NULL) {
2073 /* Clear out the array */
2074 zval_dtor(track_vars_array);
2075 array_init(track_vars_array);
2076 }
2077
2078 encstr = estrndup(encstr, encstr_len);
2079
2080 info.data_type = PARSE_STRING;
2081 info.separator = PG(arg_separator).input;
2082 info.report_errors = 1;
2083 info.to_encoding = MBSTRG(current_internal_encoding);
2084 info.to_language = MBSTRG(language);
2085 info.from_encodings = MBSTRG(http_input_list);
2086 info.num_from_encodings = MBSTRG(http_input_list_size);
2087 info.from_language = MBSTRG(language);
2088
2089 if (track_vars_array != NULL) {
2090 detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
2091 } else {
2092 zval tmp;
2093 if (!EG(active_symbol_table)) {
2094 zend_rebuild_symbol_table(TSRMLS_C);
2095 }
2096 Z_ARRVAL(tmp) = EG(active_symbol_table);
2097 detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr TSRMLS_CC);
2098 }
2099
2100 MBSTRG(http_input_identify) = detected;
2101
2102 RETVAL_BOOL(detected);
2103
2104 if (encstr != NULL) efree(encstr);
2105 }
2106 /* }}} */
2107
2108 /* {{{ proto string mb_output_handler(string contents, int status)
2109 Returns string in output buffer converted to the http_output encoding */
PHP_FUNCTION(mb_output_handler)2110 PHP_FUNCTION(mb_output_handler)
2111 {
2112 char *arg_string;
2113 int arg_string_len;
2114 long arg_status;
2115 mbfl_string string, result;
2116 const char *charset;
2117 char *p;
2118 const mbfl_encoding *encoding;
2119 int last_feed, len;
2120 unsigned char send_text_mimetype = 0;
2121 char *s, *mimetype = NULL;
2122
2123 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
2124 return;
2125 }
2126
2127 encoding = MBSTRG(current_http_output_encoding);
2128
2129 /* start phase only */
2130 if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
2131 /* delete the converter just in case. */
2132 if (MBSTRG(outconv)) {
2133 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2134 mbfl_buffer_converter_delete(MBSTRG(outconv));
2135 MBSTRG(outconv) = NULL;
2136 }
2137 if (encoding == &mbfl_encoding_pass) {
2138 RETURN_STRINGL(arg_string, arg_string_len, 1);
2139 }
2140
2141 /* analyze mime type */
2142 if (SG(sapi_headers).mimetype &&
2143 _php_mb_match_regex(
2144 MBSTRG(http_output_conv_mimetypes),
2145 SG(sapi_headers).mimetype,
2146 strlen(SG(sapi_headers).mimetype))) {
2147 if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
2148 mimetype = estrdup(SG(sapi_headers).mimetype);
2149 } else {
2150 mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
2151 }
2152 send_text_mimetype = 1;
2153 } else if (SG(sapi_headers).send_default_content_type) {
2154 mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
2155 }
2156
2157 /* if content-type is not yet set, set it and activate the converter */
2158 if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
2159 charset = encoding->mime_name;
2160 if (charset) {
2161 len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
2162 if (sapi_add_header(p, len, 0) != FAILURE) {
2163 SG(sapi_headers).send_default_content_type = 0;
2164 }
2165 }
2166 /* activate the converter */
2167 MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0);
2168 if (send_text_mimetype){
2169 efree(mimetype);
2170 }
2171 }
2172 }
2173
2174 /* just return if the converter is not activated. */
2175 if (MBSTRG(outconv) == NULL) {
2176 RETURN_STRINGL(arg_string, arg_string_len, 1);
2177 }
2178
2179 /* flag */
2180 last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
2181 /* mode */
2182 mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
2183 mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
2184
2185 /* feed the string */
2186 mbfl_string_init(&string);
2187 /* these are not needed. convd has encoding info.
2188 string.no_language = MBSTRG(language);
2189 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2190 */
2191 string.val = (unsigned char *)arg_string;
2192 string.len = arg_string_len;
2193 mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2194 if (last_feed) {
2195 mbfl_buffer_converter_flush(MBSTRG(outconv));
2196 }
2197 /* get the converter output, and return it */
2198 mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2199 RETVAL_STRINGL((char *)result.val, result.len, 0); /* the string is already strdup()'ed */
2200
2201 /* delete the converter if it is the last feed. */
2202 if (last_feed) {
2203 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2204 mbfl_buffer_converter_delete(MBSTRG(outconv));
2205 MBSTRG(outconv) = NULL;
2206 }
2207 }
2208 /* }}} */
2209
2210 /* {{{ proto int mb_strlen(string str [, string encoding])
2211 Get character numbers of a string */
PHP_FUNCTION(mb_strlen)2212 PHP_FUNCTION(mb_strlen)
2213 {
2214 int n;
2215 mbfl_string string;
2216 char *enc_name = NULL;
2217 int enc_name_len;
2218
2219 mbfl_string_init(&string);
2220
2221 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2222 RETURN_FALSE;
2223 }
2224
2225 string.no_language = MBSTRG(language);
2226 if (enc_name == NULL) {
2227 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2228 } else {
2229 string.no_encoding = mbfl_name2no_encoding(enc_name);
2230 if (string.no_encoding == mbfl_no_encoding_invalid) {
2231 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2232 RETURN_FALSE;
2233 }
2234 }
2235
2236 n = mbfl_strlen(&string);
2237 if (n >= 0) {
2238 RETVAL_LONG(n);
2239 } else {
2240 RETVAL_FALSE;
2241 }
2242 }
2243 /* }}} */
2244
2245 /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2246 Find position of first occurrence of a string within another */
PHP_FUNCTION(mb_strpos)2247 PHP_FUNCTION(mb_strpos)
2248 {
2249 int n, reverse = 0;
2250 long offset;
2251 mbfl_string haystack, needle;
2252 char *enc_name = NULL;
2253 int enc_name_len;
2254
2255 mbfl_string_init(&haystack);
2256 mbfl_string_init(&needle);
2257 haystack.no_language = MBSTRG(language);
2258 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2259 needle.no_language = MBSTRG(language);
2260 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2261 offset = 0;
2262
2263 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2264 RETURN_FALSE;
2265 }
2266
2267 if (enc_name != NULL) {
2268 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2269 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2270 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2271 RETURN_FALSE;
2272 }
2273 }
2274
2275 if (offset < 0 || offset > mbfl_strlen(&haystack)) {
2276 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
2277 RETURN_FALSE;
2278 }
2279 if (needle.len == 0) {
2280 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2281 RETURN_FALSE;
2282 }
2283
2284 n = mbfl_strpos(&haystack, &needle, offset, reverse);
2285 if (n >= 0) {
2286 RETVAL_LONG(n);
2287 } else {
2288 switch (-n) {
2289 case 1:
2290 break;
2291 case 2:
2292 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length");
2293 break;
2294 case 4:
2295 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error");
2296 break;
2297 case 8:
2298 php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty");
2299 break;
2300 default:
2301 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos");
2302 break;
2303 }
2304 RETVAL_FALSE;
2305 }
2306 }
2307 /* }}} */
2308
2309 /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2310 Find position of last occurrence of a string within another */
PHP_FUNCTION(mb_strrpos)2311 PHP_FUNCTION(mb_strrpos)
2312 {
2313 int n;
2314 mbfl_string haystack, needle;
2315 char *enc_name = NULL;
2316 int enc_name_len;
2317 zval **zoffset = NULL;
2318 long offset = 0, str_flg;
2319 char *enc_name2 = NULL;
2320 int enc_name_len2;
2321
2322 mbfl_string_init(&haystack);
2323 mbfl_string_init(&needle);
2324 haystack.no_language = MBSTRG(language);
2325 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2326 needle.no_language = MBSTRG(language);
2327 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2328
2329 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2330 RETURN_FALSE;
2331 }
2332
2333 if (zoffset) {
2334 if (Z_TYPE_PP(zoffset) == IS_STRING) {
2335 enc_name2 = Z_STRVAL_PP(zoffset);
2336 enc_name_len2 = Z_STRLEN_PP(zoffset);
2337 str_flg = 1;
2338
2339 if (enc_name2 != NULL) {
2340 switch (*enc_name2) {
2341 case '0':
2342 case '1':
2343 case '2':
2344 case '3':
2345 case '4':
2346 case '5':
2347 case '6':
2348 case '7':
2349 case '8':
2350 case '9':
2351 case ' ':
2352 case '-':
2353 case '.':
2354 break;
2355 default :
2356 str_flg = 0;
2357 break;
2358 }
2359 }
2360
2361 if (str_flg) {
2362 convert_to_long_ex(zoffset);
2363 offset = Z_LVAL_PP(zoffset);
2364 } else {
2365 enc_name = enc_name2;
2366 enc_name_len = enc_name_len2;
2367 }
2368 } else {
2369 convert_to_long_ex(zoffset);
2370 offset = Z_LVAL_PP(zoffset);
2371 }
2372 }
2373
2374 if (enc_name != NULL) {
2375 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2376 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2377 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2378 RETURN_FALSE;
2379 }
2380 }
2381
2382 if (haystack.len <= 0) {
2383 RETURN_FALSE;
2384 }
2385 if (needle.len <= 0) {
2386 RETURN_FALSE;
2387 }
2388
2389 {
2390 int haystack_char_len = mbfl_strlen(&haystack);
2391 if ((offset > 0 && offset > haystack_char_len) ||
2392 (offset < 0 && -offset > haystack_char_len)) {
2393 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
2394 RETURN_FALSE;
2395 }
2396 }
2397
2398 n = mbfl_strpos(&haystack, &needle, offset, 1);
2399 if (n >= 0) {
2400 RETVAL_LONG(n);
2401 } else {
2402 RETVAL_FALSE;
2403 }
2404 }
2405 /* }}} */
2406
2407 /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2408 Finds position of first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stripos)2409 PHP_FUNCTION(mb_stripos)
2410 {
2411 int n;
2412 long offset;
2413 mbfl_string haystack, needle;
2414 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2415 int from_encoding_len;
2416 n = -1;
2417 offset = 0;
2418
2419 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2420 RETURN_FALSE;
2421 }
2422 if (needle.len == 0) {
2423 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2424 RETURN_FALSE;
2425 }
2426 n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2427
2428 if (n >= 0) {
2429 RETVAL_LONG(n);
2430 } else {
2431 RETVAL_FALSE;
2432 }
2433 }
2434 /* }}} */
2435
2436 /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2437 Finds position of last occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_strripos)2438 PHP_FUNCTION(mb_strripos)
2439 {
2440 int n;
2441 long offset;
2442 mbfl_string haystack, needle;
2443 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2444 int from_encoding_len;
2445 n = -1;
2446 offset = 0;
2447
2448 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2449 RETURN_FALSE;
2450 }
2451
2452 n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2453
2454 if (n >= 0) {
2455 RETVAL_LONG(n);
2456 } else {
2457 RETVAL_FALSE;
2458 }
2459 }
2460 /* }}} */
2461
2462 /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2463 Finds first occurrence of a string within another */
PHP_FUNCTION(mb_strstr)2464 PHP_FUNCTION(mb_strstr)
2465 {
2466 int n, len, mblen;
2467 mbfl_string haystack, needle, result, *ret = NULL;
2468 char *enc_name = NULL;
2469 int enc_name_len;
2470 zend_bool part = 0;
2471
2472 mbfl_string_init(&haystack);
2473 mbfl_string_init(&needle);
2474 haystack.no_language = MBSTRG(language);
2475 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2476 needle.no_language = MBSTRG(language);
2477 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2478
2479 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2480 RETURN_FALSE;
2481 }
2482
2483 if (enc_name != NULL) {
2484 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2485 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2486 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2487 RETURN_FALSE;
2488 }
2489 }
2490
2491 if (needle.len <= 0) {
2492 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2493 RETURN_FALSE;
2494 }
2495 n = mbfl_strpos(&haystack, &needle, 0, 0);
2496 if (n >= 0) {
2497 mblen = mbfl_strlen(&haystack);
2498 if (part) {
2499 ret = mbfl_substr(&haystack, &result, 0, n);
2500 if (ret != NULL) {
2501 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2502 } else {
2503 RETVAL_FALSE;
2504 }
2505 } else {
2506 len = (mblen - n);
2507 ret = mbfl_substr(&haystack, &result, n, len);
2508 if (ret != NULL) {
2509 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2510 } else {
2511 RETVAL_FALSE;
2512 }
2513 }
2514 } else {
2515 RETVAL_FALSE;
2516 }
2517 }
2518 /* }}} */
2519
2520 /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2521 Finds the last occurrence of a character in a string within another */
PHP_FUNCTION(mb_strrchr)2522 PHP_FUNCTION(mb_strrchr)
2523 {
2524 int n, len, mblen;
2525 mbfl_string haystack, needle, result, *ret = NULL;
2526 char *enc_name = NULL;
2527 int enc_name_len;
2528 zend_bool part = 0;
2529
2530 mbfl_string_init(&haystack);
2531 mbfl_string_init(&needle);
2532 haystack.no_language = MBSTRG(language);
2533 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2534 needle.no_language = MBSTRG(language);
2535 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2536
2537 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2538 RETURN_FALSE;
2539 }
2540
2541 if (enc_name != NULL) {
2542 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2543 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2544 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2545 RETURN_FALSE;
2546 }
2547 }
2548
2549 if (haystack.len <= 0) {
2550 RETURN_FALSE;
2551 }
2552 if (needle.len <= 0) {
2553 RETURN_FALSE;
2554 }
2555 n = mbfl_strpos(&haystack, &needle, 0, 1);
2556 if (n >= 0) {
2557 mblen = mbfl_strlen(&haystack);
2558 if (part) {
2559 ret = mbfl_substr(&haystack, &result, 0, n);
2560 if (ret != NULL) {
2561 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2562 } else {
2563 RETVAL_FALSE;
2564 }
2565 } else {
2566 len = (mblen - n);
2567 ret = mbfl_substr(&haystack, &result, n, len);
2568 if (ret != NULL) {
2569 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2570 } else {
2571 RETVAL_FALSE;
2572 }
2573 }
2574 } else {
2575 RETVAL_FALSE;
2576 }
2577 }
2578 /* }}} */
2579
2580 /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2581 Finds first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stristr)2582 PHP_FUNCTION(mb_stristr)
2583 {
2584 zend_bool part = 0;
2585 unsigned int from_encoding_len, len, mblen;
2586 int n;
2587 mbfl_string haystack, needle, result, *ret = NULL;
2588 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2589 mbfl_string_init(&haystack);
2590 mbfl_string_init(&needle);
2591 haystack.no_language = MBSTRG(language);
2592 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2593 needle.no_language = MBSTRG(language);
2594 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2595
2596
2597 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2598 RETURN_FALSE;
2599 }
2600
2601 if (!needle.len) {
2602 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2603 RETURN_FALSE;
2604 }
2605
2606 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2607 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2608 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2609 RETURN_FALSE;
2610 }
2611
2612 n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2613
2614 if (n <0) {
2615 RETURN_FALSE;
2616 }
2617
2618 mblen = mbfl_strlen(&haystack);
2619
2620 if (part) {
2621 ret = mbfl_substr(&haystack, &result, 0, n);
2622 if (ret != NULL) {
2623 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2624 } else {
2625 RETVAL_FALSE;
2626 }
2627 } else {
2628 len = (mblen - n);
2629 ret = mbfl_substr(&haystack, &result, n, len);
2630 if (ret != NULL) {
2631 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2632 } else {
2633 RETVAL_FALSE;
2634 }
2635 }
2636 }
2637 /* }}} */
2638
2639 /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2640 Finds the last occurrence of a character in a string within another, case insensitive */
PHP_FUNCTION(mb_strrichr)2641 PHP_FUNCTION(mb_strrichr)
2642 {
2643 zend_bool part = 0;
2644 int n, from_encoding_len, len, mblen;
2645 mbfl_string haystack, needle, result, *ret = NULL;
2646 const char *from_encoding = MBSTRG(current_internal_encoding)->name;
2647 mbfl_string_init(&haystack);
2648 mbfl_string_init(&needle);
2649 haystack.no_language = MBSTRG(language);
2650 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2651 needle.no_language = MBSTRG(language);
2652 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2653
2654
2655 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2656 RETURN_FALSE;
2657 }
2658
2659 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2660 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2661 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2662 RETURN_FALSE;
2663 }
2664
2665 n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2666
2667 if (n <0) {
2668 RETURN_FALSE;
2669 }
2670
2671 mblen = mbfl_strlen(&haystack);
2672
2673 if (part) {
2674 ret = mbfl_substr(&haystack, &result, 0, n);
2675 if (ret != NULL) {
2676 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2677 } else {
2678 RETVAL_FALSE;
2679 }
2680 } else {
2681 len = (mblen - n);
2682 ret = mbfl_substr(&haystack, &result, n, len);
2683 if (ret != NULL) {
2684 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2685 } else {
2686 RETVAL_FALSE;
2687 }
2688 }
2689 }
2690 /* }}} */
2691
2692 /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2693 Count the number of substring occurrences */
PHP_FUNCTION(mb_substr_count)2694 PHP_FUNCTION(mb_substr_count)
2695 {
2696 int n;
2697 mbfl_string haystack, needle;
2698 char *enc_name = NULL;
2699 int enc_name_len;
2700
2701 mbfl_string_init(&haystack);
2702 mbfl_string_init(&needle);
2703 haystack.no_language = MBSTRG(language);
2704 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2705 needle.no_language = MBSTRG(language);
2706 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2707
2708 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
2709 return;
2710 }
2711
2712 if (enc_name != NULL) {
2713 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2714 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2715 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2716 RETURN_FALSE;
2717 }
2718 }
2719
2720 if (needle.len <= 0) {
2721 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring");
2722 RETURN_FALSE;
2723 }
2724
2725 n = mbfl_substr_count(&haystack, &needle);
2726 if (n >= 0) {
2727 RETVAL_LONG(n);
2728 } else {
2729 RETVAL_FALSE;
2730 }
2731 }
2732 /* }}} */
2733
2734 /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2735 Returns part of a string */
PHP_FUNCTION(mb_substr)2736 PHP_FUNCTION(mb_substr)
2737 {
2738 size_t argc = ZEND_NUM_ARGS();
2739 char *str, *encoding;
2740 long from, len;
2741 int mblen, str_len, encoding_len;
2742 zval **z_len = NULL;
2743 mbfl_string string, result, *ret;
2744
2745 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|Zs", &str, &str_len, &from, &z_len, &encoding, &encoding_len) == FAILURE) {
2746 return;
2747 }
2748
2749 mbfl_string_init(&string);
2750 string.no_language = MBSTRG(language);
2751 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2752
2753 if (argc == 4) {
2754 string.no_encoding = mbfl_name2no_encoding(encoding);
2755 if (string.no_encoding == mbfl_no_encoding_invalid) {
2756 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2757 RETURN_FALSE;
2758 }
2759 }
2760
2761 string.val = (unsigned char *)str;
2762 string.len = str_len;
2763
2764 if (argc < 3 || Z_TYPE_PP(z_len) == IS_NULL) {
2765 len = str_len;
2766 } else {
2767 convert_to_long_ex(z_len);
2768 len = Z_LVAL_PP(z_len);
2769 }
2770
2771 /* measures length */
2772 mblen = 0;
2773 if (from < 0 || len < 0) {
2774 mblen = mbfl_strlen(&string);
2775 }
2776
2777 /* if "from" position is negative, count start position from the end
2778 * of the string
2779 */
2780 if (from < 0) {
2781 from = mblen + from;
2782 if (from < 0) {
2783 from = 0;
2784 }
2785 }
2786
2787 /* if "length" position is negative, set it to the length
2788 * needed to stop that many chars from the end of the string
2789 */
2790 if (len < 0) {
2791 len = (mblen - from) + len;
2792 if (len < 0) {
2793 len = 0;
2794 }
2795 }
2796
2797 if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2798 && (from >= mbfl_strlen(&string))) {
2799 RETURN_FALSE;
2800 }
2801
2802 if (from > INT_MAX) {
2803 from = INT_MAX;
2804 }
2805 if (len > INT_MAX) {
2806 len = INT_MAX;
2807 }
2808
2809 ret = mbfl_substr(&string, &result, from, len);
2810 if (NULL == ret) {
2811 RETURN_FALSE;
2812 }
2813
2814 RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2815 }
2816 /* }}} */
2817
2818 /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
2819 Returns part of a string */
PHP_FUNCTION(mb_strcut)2820 PHP_FUNCTION(mb_strcut)
2821 {
2822 size_t argc = ZEND_NUM_ARGS();
2823 char *encoding;
2824 long from, len;
2825 int encoding_len;
2826 zval **z_len = NULL;
2827 mbfl_string string, result, *ret;
2828
2829 mbfl_string_init(&string);
2830 string.no_language = MBSTRG(language);
2831 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2832
2833 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|Zs", (char **)&string.val, (int **)&string.len, &from, &z_len, &encoding, &encoding_len) == FAILURE) {
2834 return;
2835 }
2836
2837 if (argc == 4) {
2838 string.no_encoding = mbfl_name2no_encoding(encoding);
2839 if (string.no_encoding == mbfl_no_encoding_invalid) {
2840 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2841 RETURN_FALSE;
2842 }
2843 }
2844
2845 if (argc < 3 || Z_TYPE_PP(z_len) == IS_NULL) {
2846 len = string.len;
2847 } else {
2848 convert_to_long_ex(z_len);
2849 len = Z_LVAL_PP(z_len);
2850 }
2851
2852 /* if "from" position is negative, count start position from the end
2853 * of the string
2854 */
2855 if (from < 0) {
2856 from = string.len + from;
2857 if (from < 0) {
2858 from = 0;
2859 }
2860 }
2861
2862 /* if "length" position is negative, set it to the length
2863 * needed to stop that many chars from the end of the string
2864 */
2865 if (len < 0) {
2866 len = (string.len - from) + len;
2867 if (len < 0) {
2868 len = 0;
2869 }
2870 }
2871
2872 if ((unsigned int)from > string.len) {
2873 RETURN_FALSE;
2874 }
2875
2876 ret = mbfl_strcut(&string, &result, from, len);
2877 if (ret == NULL) {
2878 RETURN_FALSE;
2879 }
2880
2881 RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2882 }
2883 /* }}} */
2884
2885 /* {{{ proto int mb_strwidth(string str [, string encoding])
2886 Gets terminal width of a string */
PHP_FUNCTION(mb_strwidth)2887 PHP_FUNCTION(mb_strwidth)
2888 {
2889 int n;
2890 mbfl_string string;
2891 char *enc_name = NULL;
2892 int enc_name_len;
2893
2894 mbfl_string_init(&string);
2895
2896 string.no_language = MBSTRG(language);
2897 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2898
2899 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2900 return;
2901 }
2902
2903 if (enc_name != NULL) {
2904 string.no_encoding = mbfl_name2no_encoding(enc_name);
2905 if (string.no_encoding == mbfl_no_encoding_invalid) {
2906 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2907 RETURN_FALSE;
2908 }
2909 }
2910
2911 n = mbfl_strwidth(&string);
2912 if (n >= 0) {
2913 RETVAL_LONG(n);
2914 } else {
2915 RETVAL_FALSE;
2916 }
2917 }
2918 /* }}} */
2919
2920 /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
2921 Trim the string in terminal width */
PHP_FUNCTION(mb_strimwidth)2922 PHP_FUNCTION(mb_strimwidth)
2923 {
2924 char *str, *trimmarker, *encoding;
2925 long from, width;
2926 int str_len, trimmarker_len, encoding_len;
2927 mbfl_string string, result, marker, *ret;
2928
2929 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
2930 return;
2931 }
2932
2933 mbfl_string_init(&string);
2934 mbfl_string_init(&marker);
2935 string.no_language = MBSTRG(language);
2936 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2937 marker.no_language = MBSTRG(language);
2938 marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2939 marker.val = NULL;
2940 marker.len = 0;
2941
2942 if (ZEND_NUM_ARGS() == 5) {
2943 string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
2944 if (string.no_encoding == mbfl_no_encoding_invalid) {
2945 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2946 RETURN_FALSE;
2947 }
2948 }
2949
2950 string.val = (unsigned char *)str;
2951 string.len = str_len;
2952
2953 if (from < 0 || from > str_len) {
2954 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of range");
2955 RETURN_FALSE;
2956 }
2957
2958 if (width < 0) {
2959 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value");
2960 RETURN_FALSE;
2961 }
2962
2963 if (ZEND_NUM_ARGS() >= 4) {
2964 marker.val = (unsigned char *)trimmarker;
2965 marker.len = trimmarker_len;
2966 }
2967
2968 ret = mbfl_strimwidth(&string, &marker, &result, from, width);
2969
2970 if (ret == NULL) {
2971 RETURN_FALSE;
2972 }
2973
2974 RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2975 }
2976 /* }}} */
2977
2978 /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
php_mb_convert_encoding(const char * input,size_t length,const char * _to_encoding,const char * _from_encodings,size_t * output_len TSRMLS_DC)2979 MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC)
2980 {
2981 mbfl_string string, result, *ret;
2982 const mbfl_encoding *from_encoding, *to_encoding;
2983 mbfl_buffer_converter *convd;
2984 size_t size;
2985 const mbfl_encoding **list;
2986 char *output=NULL;
2987
2988 if (output_len) {
2989 *output_len = 0;
2990 }
2991 if (!input) {
2992 return NULL;
2993 }
2994 /* new encoding */
2995 if (_to_encoding && strlen(_to_encoding)) {
2996 to_encoding = mbfl_name2encoding(_to_encoding);
2997 if (!to_encoding) {
2998 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
2999 return NULL;
3000 }
3001 } else {
3002 to_encoding = MBSTRG(current_internal_encoding);
3003 }
3004
3005 /* initialize string */
3006 mbfl_string_init(&string);
3007 mbfl_string_init(&result);
3008 from_encoding = MBSTRG(current_internal_encoding);
3009 string.no_encoding = from_encoding->no_encoding;
3010 string.no_language = MBSTRG(language);
3011 string.val = (unsigned char *)input;
3012 string.len = length;
3013
3014 /* pre-conversion encoding */
3015 if (_from_encodings) {
3016 list = NULL;
3017 size = 0;
3018 php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
3019 if (size == 1) {
3020 from_encoding = *list;
3021 string.no_encoding = from_encoding->no_encoding;
3022 } else if (size > 1) {
3023 /* auto detect */
3024 from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection));
3025 if (from_encoding) {
3026 string.no_encoding = from_encoding->no_encoding;
3027 } else {
3028 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
3029 from_encoding = &mbfl_encoding_pass;
3030 to_encoding = from_encoding;
3031 string.no_encoding = from_encoding->no_encoding;
3032 }
3033 } else {
3034 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
3035 }
3036 if (list != NULL) {
3037 efree((void *)list);
3038 }
3039 }
3040
3041 /* initialize converter */
3042 convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len);
3043 if (convd == NULL) {
3044 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
3045 return NULL;
3046 }
3047 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3048 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3049
3050 /* do it */
3051 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3052 if (ret) {
3053 if (output_len) {
3054 *output_len = ret->len;
3055 }
3056 output = (char *)ret->val;
3057 }
3058
3059 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3060 mbfl_buffer_converter_delete(convd);
3061 return output;
3062 }
3063 /* }}} */
3064
3065 /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
3066 Returns converted string in desired encoding */
PHP_FUNCTION(mb_convert_encoding)3067 PHP_FUNCTION(mb_convert_encoding)
3068 {
3069 char *arg_str, *arg_new;
3070 int str_len, new_len;
3071 zval *arg_old;
3072 int i;
3073 size_t size, l, n;
3074 char *_from_encodings = NULL, *ret, *s_free = NULL;
3075
3076 zval **hash_entry;
3077 HashTable *target_hash;
3078
3079 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
3080 return;
3081 }
3082
3083 if (ZEND_NUM_ARGS() == 3) {
3084 switch (Z_TYPE_P(arg_old)) {
3085 case IS_ARRAY:
3086 target_hash = Z_ARRVAL_P(arg_old);
3087 zend_hash_internal_pointer_reset(target_hash);
3088 i = zend_hash_num_elements(target_hash);
3089 _from_encodings = NULL;
3090
3091 while (i > 0) {
3092 if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
3093 break;
3094 }
3095
3096 convert_to_string_ex(hash_entry);
3097
3098 if ( _from_encodings) {
3099 l = strlen(_from_encodings);
3100 n = strlen(Z_STRVAL_PP(hash_entry));
3101 _from_encodings = erealloc(_from_encodings, l+n+2);
3102 strcpy(_from_encodings+l, ",");
3103 strcpy(_from_encodings+l+1, Z_STRVAL_PP(hash_entry));
3104 } else {
3105 _from_encodings = estrdup(Z_STRVAL_PP(hash_entry));
3106 }
3107
3108 zend_hash_move_forward(target_hash);
3109 i--;
3110 }
3111
3112 if (_from_encodings != NULL && !strlen(_from_encodings)) {
3113 efree(_from_encodings);
3114 _from_encodings = NULL;
3115 }
3116 s_free = _from_encodings;
3117 break;
3118 default:
3119 convert_to_string(arg_old);
3120 _from_encodings = Z_STRVAL_P(arg_old);
3121 break;
3122 }
3123 }
3124
3125 /* new encoding */
3126 ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size TSRMLS_CC);
3127 if (ret != NULL) {
3128 RETVAL_STRINGL(ret, size, 0); /* the string is already strdup()'ed */
3129 } else {
3130 RETVAL_FALSE;
3131 }
3132
3133 if ( s_free) {
3134 efree(s_free);
3135 }
3136 }
3137 /* }}} */
3138
3139 /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
3140 Returns a case-folded version of sourcestring */
PHP_FUNCTION(mb_convert_case)3141 PHP_FUNCTION(mb_convert_case)
3142 {
3143 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3144 char *str;
3145 int str_len, from_encoding_len;
3146 long case_mode = 0;
3147 char *newstr;
3148 size_t ret_len;
3149
3150 RETVAL_FALSE;
3151 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len,
3152 &case_mode, &from_encoding, &from_encoding_len) == FAILURE)
3153 RETURN_FALSE;
3154
3155 newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3156
3157 if (newstr) {
3158 RETVAL_STRINGL(newstr, ret_len, 0);
3159 }
3160 }
3161 /* }}} */
3162
3163 /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
3164 * Returns a uppercased version of sourcestring
3165 */
PHP_FUNCTION(mb_strtoupper)3166 PHP_FUNCTION(mb_strtoupper)
3167 {
3168 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3169 char *str;
3170 int str_len, from_encoding_len;
3171 char *newstr;
3172 size_t ret_len;
3173
3174 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
3175 &from_encoding, &from_encoding_len) == FAILURE) {
3176 return;
3177 }
3178 newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3179
3180 if (newstr) {
3181 RETURN_STRINGL(newstr, ret_len, 0);
3182 }
3183 RETURN_FALSE;
3184 }
3185 /* }}} */
3186
3187 /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
3188 * Returns a lowercased version of sourcestring
3189 */
PHP_FUNCTION(mb_strtolower)3190 PHP_FUNCTION(mb_strtolower)
3191 {
3192 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3193 char *str;
3194 int str_len, from_encoding_len;
3195 char *newstr;
3196 size_t ret_len;
3197
3198 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
3199 &from_encoding, &from_encoding_len) == FAILURE) {
3200 return;
3201 }
3202 newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3203
3204 if (newstr) {
3205 RETURN_STRINGL(newstr, ret_len, 0);
3206 }
3207 RETURN_FALSE;
3208 }
3209 /* }}} */
3210
3211 /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3212 Encodings of the given string is returned (as a string) */
PHP_FUNCTION(mb_detect_encoding)3213 PHP_FUNCTION(mb_detect_encoding)
3214 {
3215 char *str;
3216 int str_len;
3217 zend_bool strict=0;
3218 zval *encoding_list;
3219
3220 mbfl_string string;
3221 const mbfl_encoding *ret;
3222 const mbfl_encoding **elist, **list;
3223 size_t size;
3224
3225 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3226 return;
3227 }
3228
3229 /* make encoding list */
3230 list = NULL;
3231 size = 0;
3232 if (ZEND_NUM_ARGS() >= 2 && !ZVAL_IS_NULL(encoding_list)) {
3233 switch (Z_TYPE_P(encoding_list)) {
3234 case IS_ARRAY:
3235 if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) {
3236 if (list) {
3237 efree(list);
3238 list = NULL;
3239 size = 0;
3240 }
3241 }
3242 break;
3243 default:
3244 convert_to_string(encoding_list);
3245 if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) {
3246 if (list) {
3247 efree(list);
3248 list = NULL;
3249 size = 0;
3250 }
3251 }
3252 break;
3253 }
3254 if (size <= 0) {
3255 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument");
3256 }
3257 }
3258
3259 if (ZEND_NUM_ARGS() < 3) {
3260 strict = (zend_bool)MBSTRG(strict_detection);
3261 }
3262
3263 if (size > 0 && list != NULL) {
3264 elist = list;
3265 } else {
3266 elist = MBSTRG(current_detect_order_list);
3267 size = MBSTRG(current_detect_order_list_size);
3268 }
3269
3270 mbfl_string_init(&string);
3271 string.no_language = MBSTRG(language);
3272 string.val = (unsigned char *)str;
3273 string.len = str_len;
3274 ret = mbfl_identify_encoding2(&string, elist, size, strict);
3275
3276 if (list != NULL) {
3277 efree((void *)list);
3278 }
3279
3280 if (ret == NULL) {
3281 RETURN_FALSE;
3282 }
3283
3284 RETVAL_STRING((char *)ret->name, 1);
3285 }
3286 /* }}} */
3287
3288 /* {{{ proto mixed mb_list_encodings()
3289 Returns an array of all supported entity encodings */
PHP_FUNCTION(mb_list_encodings)3290 PHP_FUNCTION(mb_list_encodings)
3291 {
3292 const mbfl_encoding **encodings;
3293 const mbfl_encoding *encoding;
3294 int i;
3295
3296 array_init(return_value);
3297 i = 0;
3298 encodings = mbfl_get_supported_encodings();
3299 while ((encoding = encodings[i++]) != NULL) {
3300 add_next_index_string(return_value, (char *) encoding->name, 1);
3301 }
3302 }
3303 /* }}} */
3304
3305 /* {{{ proto array mb_encoding_aliases(string encoding)
3306 Returns an array of the aliases of a given encoding name */
PHP_FUNCTION(mb_encoding_aliases)3307 PHP_FUNCTION(mb_encoding_aliases)
3308 {
3309 const mbfl_encoding *encoding;
3310 char *name = NULL;
3311 int name_len;
3312
3313 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
3314 RETURN_FALSE;
3315 }
3316
3317 encoding = mbfl_name2encoding(name);
3318 if (!encoding) {
3319 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
3320 RETURN_FALSE;
3321 }
3322
3323 array_init(return_value);
3324 if (encoding->aliases != NULL) {
3325 const char **alias;
3326 for (alias = *encoding->aliases; *alias; ++alias) {
3327 add_next_index_string(return_value, (char *)*alias, 1);
3328 }
3329 }
3330 }
3331 /* }}} */
3332
3333 /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3334 Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
PHP_FUNCTION(mb_encode_mimeheader)3335 PHP_FUNCTION(mb_encode_mimeheader)
3336 {
3337 enum mbfl_no_encoding charset, transenc;
3338 mbfl_string string, result, *ret;
3339 char *charset_name = NULL;
3340 int charset_name_len;
3341 char *trans_enc_name = NULL;
3342 int trans_enc_name_len;
3343 char *linefeed = "\r\n";
3344 int linefeed_len;
3345 long indent = 0;
3346
3347 mbfl_string_init(&string);
3348 string.no_language = MBSTRG(language);
3349 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3350
3351 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3352 return;
3353 }
3354
3355 charset = mbfl_no_encoding_pass;
3356 transenc = mbfl_no_encoding_base64;
3357
3358 if (charset_name != NULL) {
3359 charset = mbfl_name2no_encoding(charset_name);
3360 if (charset == mbfl_no_encoding_invalid) {
3361 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3362 RETURN_FALSE;
3363 }
3364 } else {
3365 const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3366 if (lang != NULL) {
3367 charset = lang->mail_charset;
3368 transenc = lang->mail_header_encoding;
3369 }
3370 }
3371
3372 if (trans_enc_name != NULL) {
3373 if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3374 transenc = mbfl_no_encoding_base64;
3375 } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3376 transenc = mbfl_no_encoding_qprint;
3377 }
3378 }
3379
3380 mbfl_string_init(&result);
3381 ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3382 if (ret != NULL) {
3383 RETVAL_STRINGL_CHECK((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
3384 } else {
3385 RETVAL_FALSE;
3386 }
3387 }
3388 /* }}} */
3389
3390 /* {{{ proto string mb_decode_mimeheader(string string)
3391 Decodes the MIME "encoded-word" in the string */
PHP_FUNCTION(mb_decode_mimeheader)3392 PHP_FUNCTION(mb_decode_mimeheader)
3393 {
3394 mbfl_string string, result, *ret;
3395
3396 mbfl_string_init(&string);
3397 string.no_language = MBSTRG(language);
3398 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3399
3400 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
3401 return;
3402 }
3403
3404 mbfl_string_init(&result);
3405 ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding);
3406 if (ret != NULL) {
3407 RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
3408 } else {
3409 RETVAL_FALSE;
3410 }
3411 }
3412 /* }}} */
3413
3414 /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3415 Conversion between full-width character and half-width character (Japanese) */
PHP_FUNCTION(mb_convert_kana)3416 PHP_FUNCTION(mb_convert_kana)
3417 {
3418 int opt, i;
3419 mbfl_string string, result, *ret;
3420 char *optstr = NULL;
3421 int optstr_len;
3422 char *encname = NULL;
3423 int encname_len;
3424
3425 mbfl_string_init(&string);
3426 string.no_language = MBSTRG(language);
3427 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3428
3429 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
3430 return;
3431 }
3432
3433 /* option */
3434 if (optstr != NULL) {
3435 char *p = optstr;
3436 int n = optstr_len;
3437 i = 0;
3438 opt = 0;
3439 while (i < n) {
3440 i++;
3441 switch (*p++) {
3442 case 'A':
3443 opt |= 0x1;
3444 break;
3445 case 'a':
3446 opt |= 0x10;
3447 break;
3448 case 'R':
3449 opt |= 0x2;
3450 break;
3451 case 'r':
3452 opt |= 0x20;
3453 break;
3454 case 'N':
3455 opt |= 0x4;
3456 break;
3457 case 'n':
3458 opt |= 0x40;
3459 break;
3460 case 'S':
3461 opt |= 0x8;
3462 break;
3463 case 's':
3464 opt |= 0x80;
3465 break;
3466 case 'K':
3467 opt |= 0x100;
3468 break;
3469 case 'k':
3470 opt |= 0x1000;
3471 break;
3472 case 'H':
3473 opt |= 0x200;
3474 break;
3475 case 'h':
3476 opt |= 0x2000;
3477 break;
3478 case 'V':
3479 opt |= 0x800;
3480 break;
3481 case 'C':
3482 opt |= 0x10000;
3483 break;
3484 case 'c':
3485 opt |= 0x20000;
3486 break;
3487 case 'M':
3488 opt |= 0x100000;
3489 break;
3490 case 'm':
3491 opt |= 0x200000;
3492 break;
3493 }
3494 }
3495 } else {
3496 opt = 0x900;
3497 }
3498
3499 /* encoding */
3500 if (encname != NULL) {
3501 string.no_encoding = mbfl_name2no_encoding(encname);
3502 if (string.no_encoding == mbfl_no_encoding_invalid) {
3503 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname);
3504 RETURN_FALSE;
3505 }
3506 }
3507
3508 ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3509 if (ret != NULL) {
3510 RETVAL_STRINGL_CHECK((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
3511 } else {
3512 RETVAL_FALSE;
3513 }
3514 }
3515 /* }}} */
3516
3517 #define PHP_MBSTR_STACK_BLOCK_SIZE 32
3518
3519 /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3520 Converts the string resource in variables to desired encoding */
PHP_FUNCTION(mb_convert_variables)3521 PHP_FUNCTION(mb_convert_variables)
3522 {
3523 zval ***args, ***stack, **var, **hash_entry, **zfrom_enc;
3524 HashTable *target_hash;
3525 mbfl_string string, result, *ret;
3526 const mbfl_encoding *from_encoding, *to_encoding;
3527 mbfl_encoding_detector *identd;
3528 mbfl_buffer_converter *convd;
3529 int n, to_enc_len, argc, stack_level, stack_max;
3530 size_t elistsz;
3531 const mbfl_encoding **elist;
3532 char *to_enc;
3533 void *ptmp;
3534 int recursion_error = 0;
3535
3536 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3537 return;
3538 }
3539
3540 /* new encoding */
3541 to_encoding = mbfl_name2encoding(to_enc);
3542 if (!to_encoding) {
3543 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3544 efree(args);
3545 RETURN_FALSE;
3546 }
3547
3548 /* initialize string */
3549 mbfl_string_init(&string);
3550 mbfl_string_init(&result);
3551 from_encoding = MBSTRG(current_internal_encoding);
3552 string.no_encoding = from_encoding->no_encoding;
3553 string.no_language = MBSTRG(language);
3554
3555 /* pre-conversion encoding */
3556 elist = NULL;
3557 elistsz = 0;
3558 switch (Z_TYPE_PP(zfrom_enc)) {
3559 case IS_ARRAY:
3560 php_mb_parse_encoding_array(*zfrom_enc, &elist, &elistsz, 0 TSRMLS_CC);
3561 break;
3562 default:
3563 convert_to_string_ex(zfrom_enc);
3564 php_mb_parse_encoding_list(Z_STRVAL_PP(zfrom_enc), Z_STRLEN_PP(zfrom_enc), &elist, &elistsz, 0 TSRMLS_CC);
3565 break;
3566 }
3567 if (elistsz <= 0) {
3568 from_encoding = &mbfl_encoding_pass;
3569 } else if (elistsz == 1) {
3570 from_encoding = *elist;
3571 } else {
3572 /* auto detect */
3573 from_encoding = NULL;
3574 stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3575 stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3576 stack_level = 0;
3577 identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection));
3578 if (identd != NULL) {
3579 n = 0;
3580 while (n < argc || stack_level > 0) {
3581 if (stack_level <= 0) {
3582 var = args[n++];
3583 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3584 target_hash = HASH_OF(*var);
3585 if (target_hash != NULL) {
3586 zend_hash_internal_pointer_reset(target_hash);
3587 }
3588 }
3589 } else {
3590 stack_level--;
3591 var = stack[stack_level];
3592 }
3593 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3594 target_hash = HASH_OF(*var);
3595 if (target_hash != NULL) {
3596 while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3597 if (++target_hash->nApplyCount > 1) {
3598 --target_hash->nApplyCount;
3599 recursion_error = 1;
3600 goto detect_end;
3601 }
3602 zend_hash_move_forward(target_hash);
3603 if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3604 if (stack_level >= stack_max) {
3605 stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3606 ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3607 stack = (zval ***)ptmp;
3608 }
3609 stack[stack_level] = var;
3610 stack_level++;
3611 var = hash_entry;
3612 target_hash = HASH_OF(*var);
3613 if (target_hash != NULL) {
3614 zend_hash_internal_pointer_reset(target_hash);
3615 continue;
3616 }
3617 } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3618 string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3619 string.len = Z_STRLEN_PP(hash_entry);
3620 if (mbfl_encoding_detector_feed(identd, &string)) {
3621 goto detect_end; /* complete detecting */
3622 }
3623 }
3624 }
3625 }
3626 } else if (Z_TYPE_PP(var) == IS_STRING) {
3627 string.val = (unsigned char *)Z_STRVAL_PP(var);
3628 string.len = Z_STRLEN_PP(var);
3629 if (mbfl_encoding_detector_feed(identd, &string)) {
3630 goto detect_end; /* complete detecting */
3631 }
3632 }
3633 }
3634 detect_end:
3635 from_encoding = mbfl_encoding_detector_judge2(identd);
3636 mbfl_encoding_detector_delete(identd);
3637 }
3638 if (recursion_error) {
3639 while(stack_level-- && (var = stack[stack_level])) {
3640 if (HASH_OF(*var)->nApplyCount > 1) {
3641 HASH_OF(*var)->nApplyCount--;
3642 }
3643 }
3644 efree(stack);
3645 efree(args);
3646 if (elist != NULL) {
3647 efree((void *)elist);
3648 }
3649 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot handle recursive references");
3650 RETURN_FALSE;
3651 }
3652 efree(stack);
3653
3654 if (!from_encoding) {
3655 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
3656 from_encoding = &mbfl_encoding_pass;
3657 }
3658 }
3659 if (elist != NULL) {
3660 efree((void *)elist);
3661 }
3662 /* create converter */
3663 convd = NULL;
3664 if (from_encoding != &mbfl_encoding_pass) {
3665 convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0);
3666 if (convd == NULL) {
3667 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
3668 RETURN_FALSE;
3669 }
3670 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3671 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3672 }
3673
3674 /* convert */
3675 if (convd != NULL) {
3676 stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3677 stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3678 stack_level = 0;
3679 n = 0;
3680 while (n < argc || stack_level > 0) {
3681 if (stack_level <= 0) {
3682 var = args[n++];
3683 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3684 target_hash = HASH_OF(*var);
3685 if (target_hash != NULL) {
3686 zend_hash_internal_pointer_reset(target_hash);
3687 }
3688 }
3689 } else {
3690 stack_level--;
3691 var = stack[stack_level];
3692 }
3693 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3694 target_hash = HASH_OF(*var);
3695 if (target_hash != NULL) {
3696 while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3697 zend_hash_move_forward(target_hash);
3698 if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3699 if (++(HASH_OF(*hash_entry)->nApplyCount) > 1) {
3700 --(HASH_OF(*hash_entry)->nApplyCount);
3701 recursion_error = 1;
3702 goto conv_end;
3703 }
3704 if (stack_level >= stack_max) {
3705 stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3706 ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3707 stack = (zval ***)ptmp;
3708 }
3709 stack[stack_level] = var;
3710 stack_level++;
3711 var = hash_entry;
3712 SEPARATE_ZVAL_IF_NOT_REF(hash_entry);
3713 target_hash = HASH_OF(*var);
3714 if (target_hash != NULL) {
3715 zend_hash_internal_pointer_reset(target_hash);
3716 continue;
3717 }
3718 } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3719 string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3720 string.len = Z_STRLEN_PP(hash_entry);
3721 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3722 if (ret != NULL) {
3723 if (Z_REFCOUNT_PP(hash_entry) > 1) {
3724 Z_DELREF_PP(hash_entry);
3725 MAKE_STD_ZVAL(*hash_entry);
3726 } else {
3727 zval_dtor(*hash_entry);
3728 }
3729 ZVAL_STRINGL(*hash_entry, (char *)ret->val, ret->len, 0);
3730 }
3731 }
3732 }
3733 }
3734 } else if (Z_TYPE_PP(var) == IS_STRING) {
3735 string.val = (unsigned char *)Z_STRVAL_PP(var);
3736 string.len = Z_STRLEN_PP(var);
3737 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3738 if (ret != NULL) {
3739 zval_dtor(*var);
3740 ZVAL_STRINGL(*var, (char *)ret->val, ret->len, 0);
3741 }
3742 }
3743 }
3744
3745 conv_end:
3746 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3747 mbfl_buffer_converter_delete(convd);
3748
3749 if (recursion_error) {
3750 while(stack_level-- && (var = stack[stack_level])) {
3751 if (HASH_OF(*var)->nApplyCount > 1) {
3752 HASH_OF(*var)->nApplyCount--;
3753 }
3754 }
3755 efree(stack);
3756 efree(args);
3757 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot handle recursive references");
3758 RETURN_FALSE;
3759 }
3760 efree(stack);
3761 }
3762
3763 efree(args);
3764
3765 if (from_encoding) {
3766 RETURN_STRING(from_encoding->name, 1);
3767 } else {
3768 RETURN_FALSE;
3769 }
3770 }
3771 /* }}} */
3772
3773 /* {{{ HTML numeric entity */
3774 /* {{{ static void php_mb_numericentity_exec() */
3775 static void
php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS,int type)3776 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
3777 {
3778 char *str, *encoding;
3779 int str_len, encoding_len;
3780 zval *zconvmap, **hash_entry;
3781 HashTable *target_hash;
3782 size_t argc = ZEND_NUM_ARGS();
3783 int i, *convmap, *mapelm, mapsize=0;
3784 zend_bool is_hex = 0;
3785 mbfl_string string, result, *ret;
3786 enum mbfl_no_encoding no_encoding;
3787
3788 if (zend_parse_parameters(argc TSRMLS_CC, "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
3789 return;
3790 }
3791
3792 mbfl_string_init(&string);
3793 string.no_language = MBSTRG(language);
3794 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3795 string.val = (unsigned char *)str;
3796 string.len = str_len;
3797
3798 /* encoding */
3799 if ((argc == 3 || argc == 4) && encoding_len > 0) {
3800 no_encoding = mbfl_name2no_encoding(encoding);
3801 if (no_encoding == mbfl_no_encoding_invalid) {
3802 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
3803 RETURN_FALSE;
3804 } else {
3805 string.no_encoding = no_encoding;
3806 }
3807 }
3808
3809 if (argc == 4) {
3810 if (type == 0 && is_hex) {
3811 type = 2; /* output in hex format */
3812 }
3813 }
3814
3815 /* conversion map */
3816 convmap = NULL;
3817 if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
3818 target_hash = Z_ARRVAL_P(zconvmap);
3819 zend_hash_internal_pointer_reset(target_hash);
3820 i = zend_hash_num_elements(target_hash);
3821 if (i > 0) {
3822 convmap = (int *)safe_emalloc(i, sizeof(int), 0);
3823 mapelm = convmap;
3824 mapsize = 0;
3825 while (i > 0) {
3826 if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
3827 break;
3828 }
3829 convert_to_long_ex(hash_entry);
3830 *mapelm++ = Z_LVAL_PP(hash_entry);
3831 mapsize++;
3832 i--;
3833 zend_hash_move_forward(target_hash);
3834 }
3835 }
3836 }
3837 if (convmap == NULL) {
3838 RETURN_FALSE;
3839 }
3840 mapsize /= 4;
3841
3842 ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
3843 if (ret != NULL) {
3844 RETVAL_STRINGL_CHECK((char *)ret->val, ret->len, 0);
3845 } else {
3846 RETVAL_FALSE;
3847 }
3848 efree((void *)convmap);
3849 }
3850 /* }}} */
3851
3852 /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
3853 Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)3854 PHP_FUNCTION(mb_encode_numericentity)
3855 {
3856 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
3857 }
3858 /* }}} */
3859
3860 /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
3861 Converts HTML numeric entities to character code */
PHP_FUNCTION(mb_decode_numericentity)3862 PHP_FUNCTION(mb_decode_numericentity)
3863 {
3864 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
3865 }
3866 /* }}} */
3867 /* }}} */
3868
3869 /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
3870 * Sends an email message with MIME scheme
3871 */
3872
3873 #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \
3874 if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
3875 pos += 2; \
3876 while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \
3877 pos++; \
3878 } \
3879 continue; \
3880 }
3881
3882 #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len) \
3883 pp = str; \
3884 ee = pp + len; \
3885 while ((pp = memchr(pp, '\0', (ee - pp)))) { \
3886 *pp = ' '; \
3887 } \
3888
3889 #define APPEND_ONE_CHAR(ch) do { \
3890 if (token.a > 0) { \
3891 smart_str_appendc(&token, ch); \
3892 } else {\
3893 token.len++; \
3894 } \
3895 } while (0)
3896
3897 #define SEPARATE_SMART_STR(str) do {\
3898 if ((str)->a == 0) { \
3899 char *tmp_ptr; \
3900 (str)->a = 1; \
3901 while ((str)->a < (str)->len) { \
3902 (str)->a <<= 1; \
3903 } \
3904 tmp_ptr = emalloc((str)->a + 1); \
3905 memcpy(tmp_ptr, (str)->c, (str)->len); \
3906 (str)->c = tmp_ptr; \
3907 } \
3908 } while (0)
3909
my_smart_str_dtor(smart_str * s)3910 static void my_smart_str_dtor(smart_str *s)
3911 {
3912 if (s->a > 0) {
3913 smart_str_free(s);
3914 }
3915 }
3916
_php_mbstr_parse_mail_headers(HashTable * ht,const char * str,size_t str_len)3917 static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
3918 {
3919 const char *ps;
3920 size_t icnt;
3921 int state = 0;
3922 int crlf_state = -1;
3923
3924 smart_str token = { 0, 0, 0 };
3925 smart_str fld_name = { 0, 0, 0 }, fld_val = { 0, 0, 0 };
3926
3927 ps = str;
3928 icnt = str_len;
3929
3930 /*
3931 * C o n t e n t - T y p e : t e x t / h t m l \r\n
3932 * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
3933 * state 0 1 2 3
3934 *
3935 * C o n t e n t - T y p e : t e x t / h t m l \r\n
3936 * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
3937 * crlf_state -1 0 1 -1
3938 *
3939 */
3940
3941 while (icnt > 0) {
3942 switch (*ps) {
3943 case ':':
3944 if (crlf_state == 1) {
3945 APPEND_ONE_CHAR('\r');
3946 }
3947
3948 if (state == 0 || state == 1) {
3949 fld_name = token;
3950
3951 state = 2;
3952 } else {
3953 APPEND_ONE_CHAR(*ps);
3954 }
3955
3956 crlf_state = 0;
3957 break;
3958
3959 case '\n':
3960 if (crlf_state == -1) {
3961 goto out;
3962 }
3963 crlf_state = -1;
3964 break;
3965
3966 case '\r':
3967 if (crlf_state == 1) {
3968 APPEND_ONE_CHAR('\r');
3969 } else {
3970 crlf_state = 1;
3971 }
3972 break;
3973
3974 case ' ': case '\t':
3975 if (crlf_state == -1) {
3976 if (state == 3) {
3977 /* continuing from the previous line */
3978 SEPARATE_SMART_STR(&token);
3979 state = 4;
3980 } else {
3981 /* simply skipping this new line */
3982 state = 5;
3983 }
3984 } else {
3985 if (crlf_state == 1) {
3986 APPEND_ONE_CHAR('\r');
3987 }
3988 if (state == 1 || state == 3) {
3989 APPEND_ONE_CHAR(*ps);
3990 }
3991 }
3992 crlf_state = 0;
3993 break;
3994
3995 default:
3996 switch (state) {
3997 case 0:
3998 token.c = (char *)ps;
3999 token.len = 0;
4000 token.a = 0;
4001 state = 1;
4002 break;
4003
4004 case 2:
4005 if (crlf_state != -1) {
4006 token.c = (char *)ps;
4007 token.len = 0;
4008 token.a = 0;
4009
4010 state = 3;
4011 break;
4012 }
4013 /* break is missing intentionally */
4014
4015 case 3:
4016 if (crlf_state == -1) {
4017 fld_val = token;
4018
4019 if (fld_name.c != NULL && fld_val.c != NULL) {
4020 char *dummy;
4021
4022 /* FIXME: some locale free implementation is
4023 * really required here,,, */
4024 SEPARATE_SMART_STR(&fld_name);
4025 php_strtoupper(fld_name.c, fld_name.len);
4026
4027 zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
4028
4029 my_smart_str_dtor(&fld_name);
4030 }
4031
4032 memset(&fld_name, 0, sizeof(smart_str));
4033 memset(&fld_val, 0, sizeof(smart_str));
4034
4035 token.c = (char *)ps;
4036 token.len = 0;
4037 token.a = 0;
4038
4039 state = 1;
4040 }
4041 break;
4042
4043 case 4:
4044 APPEND_ONE_CHAR(' ');
4045 state = 3;
4046 break;
4047 }
4048
4049 if (crlf_state == 1) {
4050 APPEND_ONE_CHAR('\r');
4051 }
4052
4053 APPEND_ONE_CHAR(*ps);
4054
4055 crlf_state = 0;
4056 break;
4057 }
4058 ps++, icnt--;
4059 }
4060 out:
4061 if (state == 2) {
4062 token.c = "";
4063 token.len = 0;
4064 token.a = 0;
4065
4066 state = 3;
4067 }
4068 if (state == 3) {
4069 fld_val = token;
4070
4071 if (fld_name.c != NULL && fld_val.c != NULL) {
4072 void *dummy;
4073
4074 /* FIXME: some locale free implementation is
4075 * really required here,,, */
4076 SEPARATE_SMART_STR(&fld_name);
4077 php_strtoupper(fld_name.c, fld_name.len);
4078
4079 zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
4080
4081 my_smart_str_dtor(&fld_name);
4082 }
4083 }
4084 return state;
4085 }
4086
PHP_FUNCTION(mb_send_mail)4087 PHP_FUNCTION(mb_send_mail)
4088 {
4089 int n;
4090 char *to = NULL;
4091 int to_len;
4092 char *message = NULL;
4093 int message_len;
4094 char *headers = NULL;
4095 int headers_len;
4096 char *subject = NULL;
4097 int subject_len;
4098 char *extra_cmd = NULL;
4099 int extra_cmd_len;
4100 int i;
4101 char *to_r = NULL;
4102 char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
4103 struct {
4104 int cnt_type:1;
4105 int cnt_trans_enc:1;
4106 } suppressed_hdrs = { 0, 0 };
4107
4108 char *message_buf = NULL, *subject_buf = NULL, *p;
4109 mbfl_string orig_str, conv_str;
4110 mbfl_string *pstr; /* pointer to mbfl string for return value */
4111 enum mbfl_no_encoding
4112 tran_cs, /* transfar text charset */
4113 head_enc, /* header transfar encoding */
4114 body_enc; /* body transfar encoding */
4115 mbfl_memory_device device; /* automatic allocateable buffer for additional header */
4116 const mbfl_language *lang;
4117 int err = 0;
4118 HashTable ht_headers;
4119 smart_str *s;
4120 extern void mbfl_memory_device_unput(mbfl_memory_device *device);
4121 char *pp, *ee;
4122
4123 /* initialize */
4124 mbfl_memory_device_init(&device, 0, 0);
4125 mbfl_string_init(&orig_str);
4126 mbfl_string_init(&conv_str);
4127
4128 /* character-set, transfer-encoding */
4129 tran_cs = mbfl_no_encoding_utf8;
4130 head_enc = mbfl_no_encoding_base64;
4131 body_enc = mbfl_no_encoding_base64;
4132 lang = mbfl_no2language(MBSTRG(language));
4133 if (lang != NULL) {
4134 tran_cs = lang->mail_charset;
4135 head_enc = lang->mail_header_encoding;
4136 body_enc = lang->mail_body_encoding;
4137 }
4138
4139 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|ss", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd, &extra_cmd_len) == FAILURE) {
4140 return;
4141 }
4142
4143 /* ASCIIZ check */
4144 MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
4145 MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
4146 MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
4147 if (headers) {
4148 MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
4149 }
4150 if (extra_cmd) {
4151 MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd, extra_cmd_len);
4152 }
4153
4154 zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0);
4155
4156 if (headers != NULL) {
4157 _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
4158 }
4159
4160 if (zend_hash_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1, (void **)&s) == SUCCESS) {
4161 char *tmp;
4162 char *param_name;
4163 char *charset = NULL;
4164
4165 SEPARATE_SMART_STR(s);
4166 smart_str_0(s);
4167
4168 p = strchr(s->c, ';');
4169
4170 if (p != NULL) {
4171 /* skipping the padded spaces */
4172 do {
4173 ++p;
4174 } while (*p == ' ' || *p == '\t');
4175
4176 if (*p != '\0') {
4177 if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
4178 if (strcasecmp(param_name, "charset") == 0) {
4179 enum mbfl_no_encoding _tran_cs = tran_cs;
4180
4181 charset = php_strtok_r(NULL, "= \"", &tmp);
4182 if (charset != NULL) {
4183 _tran_cs = mbfl_name2no_encoding(charset);
4184 }
4185
4186 if (_tran_cs == mbfl_no_encoding_invalid) {
4187 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
4188 _tran_cs = mbfl_no_encoding_ascii;
4189 }
4190 tran_cs = _tran_cs;
4191 }
4192 }
4193 }
4194 }
4195 suppressed_hdrs.cnt_type = 1;
4196 }
4197
4198 if (zend_hash_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1, (void **)&s) == SUCCESS) {
4199 enum mbfl_no_encoding _body_enc;
4200 SEPARATE_SMART_STR(s);
4201 smart_str_0(s);
4202
4203 _body_enc = mbfl_name2no_encoding(s->c);
4204 switch (_body_enc) {
4205 case mbfl_no_encoding_base64:
4206 case mbfl_no_encoding_7bit:
4207 case mbfl_no_encoding_8bit:
4208 body_enc = _body_enc;
4209 break;
4210
4211 default:
4212 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", s->c);
4213 body_enc = mbfl_no_encoding_8bit;
4214 break;
4215 }
4216 suppressed_hdrs.cnt_trans_enc = 1;
4217 }
4218
4219 /* To: */
4220 if (to != NULL) {
4221 if (to_len > 0) {
4222 to_r = estrndup(to, to_len);
4223 for (; to_len; to_len--) {
4224 if (!isspace((unsigned char) to_r[to_len - 1])) {
4225 break;
4226 }
4227 to_r[to_len - 1] = '\0';
4228 }
4229 for (i = 0; to_r[i]; i++) {
4230 if (iscntrl((unsigned char) to_r[i])) {
4231 /* According to RFC 822, section 3.1.1 long headers may be separated into
4232 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
4233 * To prevent these separators from being replaced with a space, we use the
4234 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
4235 */
4236 SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
4237 to_r[i] = ' ';
4238 }
4239 }
4240 } else {
4241 to_r = to;
4242 }
4243 } else {
4244 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
4245 err = 1;
4246 }
4247
4248 /* Subject: */
4249 if (subject != NULL && subject_len >= 0) {
4250 orig_str.no_language = MBSTRG(language);
4251 orig_str.val = (unsigned char *)subject;
4252 orig_str.len = subject_len;
4253 orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4254 if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4255 const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4256 orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4257 }
4258 pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4259 if (pstr != NULL) {
4260 subject_buf = subject = (char *)pstr->val;
4261 }
4262 } else {
4263 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field");
4264 err = 1;
4265 }
4266
4267 /* message body */
4268 if (message != NULL) {
4269 orig_str.no_language = MBSTRG(language);
4270 orig_str.val = (unsigned char *)message;
4271 orig_str.len = (unsigned int)message_len;
4272 orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4273
4274 if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4275 const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4276 orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4277 }
4278
4279 pstr = NULL;
4280 {
4281 mbfl_string tmpstr;
4282
4283 if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4284 tmpstr.no_encoding=mbfl_no_encoding_8bit;
4285 pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4286 efree(tmpstr.val);
4287 }
4288 }
4289 if (pstr != NULL) {
4290 message_buf = message = (char *)pstr->val;
4291 }
4292 } else {
4293 /* this is not really an error, so it is allowed. */
4294 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body");
4295 message = NULL;
4296 }
4297
4298 /* other headers */
4299 #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4300 #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4301 #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4302 #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4303 if (headers != NULL) {
4304 p = headers;
4305 n = headers_len;
4306 mbfl_memory_device_strncat(&device, p, n);
4307 if (n > 0 && p[n - 1] != '\n') {
4308 mbfl_memory_device_strncat(&device, "\n", 1);
4309 }
4310 }
4311
4312 if (!zend_hash_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4313 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4314 mbfl_memory_device_strncat(&device, "\n", 1);
4315 }
4316
4317 if (!suppressed_hdrs.cnt_type) {
4318 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4319
4320 p = (char *)mbfl_no2preferred_mime_name(tran_cs);
4321 if (p != NULL) {
4322 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4323 mbfl_memory_device_strcat(&device, p);
4324 }
4325 mbfl_memory_device_strncat(&device, "\n", 1);
4326 }
4327 if (!suppressed_hdrs.cnt_trans_enc) {
4328 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4329 p = (char *)mbfl_no2preferred_mime_name(body_enc);
4330 if (p == NULL) {
4331 p = "7bit";
4332 }
4333 mbfl_memory_device_strcat(&device, p);
4334 mbfl_memory_device_strncat(&device, "\n", 1);
4335 }
4336
4337 mbfl_memory_device_unput(&device);
4338 mbfl_memory_device_output('\0', &device);
4339 headers = (char *)device.buffer;
4340
4341 if (force_extra_parameters) {
4342 extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4343 } else if (extra_cmd) {
4344 extra_cmd = php_escape_shell_cmd(extra_cmd);
4345 }
4346
4347 if (!err && php_mail(to_r, subject, message, headers, extra_cmd TSRMLS_CC)) {
4348 RETVAL_TRUE;
4349 } else {
4350 RETVAL_FALSE;
4351 }
4352
4353 if (extra_cmd) {
4354 efree(extra_cmd);
4355 }
4356 if (to_r != to) {
4357 efree(to_r);
4358 }
4359 if (subject_buf) {
4360 efree((void *)subject_buf);
4361 }
4362 if (message_buf) {
4363 efree((void *)message_buf);
4364 }
4365 mbfl_memory_device_clear(&device);
4366 zend_hash_destroy(&ht_headers);
4367 }
4368
4369 #undef SKIP_LONG_HEADER_SEP_MBSTRING
4370 #undef MAIL_ASCIIZ_CHECK_MBSTRING
4371 #undef APPEND_ONE_CHAR
4372 #undef SEPARATE_SMART_STR
4373 #undef PHP_MBSTR_MAIL_MIME_HEADER1
4374 #undef PHP_MBSTR_MAIL_MIME_HEADER2
4375 #undef PHP_MBSTR_MAIL_MIME_HEADER3
4376 #undef PHP_MBSTR_MAIL_MIME_HEADER4
4377 /* }}} */
4378
4379 /* {{{ proto mixed mb_get_info([string type])
4380 Returns the current settings of mbstring */
PHP_FUNCTION(mb_get_info)4381 PHP_FUNCTION(mb_get_info)
4382 {
4383 char *typ = NULL;
4384 int typ_len;
4385 size_t n;
4386 char *name;
4387 const struct mb_overload_def *over_func;
4388 zval *row1, *row2;
4389 const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4390 const mbfl_encoding **entry;
4391
4392 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
4393 RETURN_FALSE;
4394 }
4395
4396 if (!typ || !strcasecmp("all", typ)) {
4397 array_init(return_value);
4398 if (MBSTRG(current_internal_encoding)) {
4399 add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name, 1);
4400 }
4401 if (MBSTRG(http_input_identify)) {
4402 add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name, 1);
4403 }
4404 if (MBSTRG(current_http_output_encoding)) {
4405 add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name, 1);
4406 }
4407 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4408 add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1);
4409 }
4410 add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4411 if (MBSTRG(func_overload)){
4412 over_func = &(mb_ovld[0]);
4413 MAKE_STD_ZVAL(row1);
4414 array_init(row1);
4415 while (over_func->type > 0) {
4416 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4417 add_assoc_string(row1, over_func->orig_func, over_func->ovld_func, 1);
4418 }
4419 over_func++;
4420 }
4421 add_assoc_zval(return_value, "func_overload_list", row1);
4422 } else {
4423 add_assoc_string(return_value, "func_overload_list", "no overload", 1);
4424 }
4425 if (lang != NULL) {
4426 if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4427 add_assoc_string(return_value, "mail_charset", name, 1);
4428 }
4429 if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4430 add_assoc_string(return_value, "mail_header_encoding", name, 1);
4431 }
4432 if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4433 add_assoc_string(return_value, "mail_body_encoding", name, 1);
4434 }
4435 }
4436 add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4437 if (MBSTRG(encoding_translation)) {
4438 add_assoc_string(return_value, "encoding_translation", "On", 1);
4439 } else {
4440 add_assoc_string(return_value, "encoding_translation", "Off", 1);
4441 }
4442 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4443 add_assoc_string(return_value, "language", name, 1);
4444 }
4445 n = MBSTRG(current_detect_order_list_size);
4446 entry = MBSTRG(current_detect_order_list);
4447 if (n > 0) {
4448 size_t i;
4449 MAKE_STD_ZVAL(row2);
4450 array_init(row2);
4451 for (i = 0; i < n; i++) {
4452 add_next_index_string(row2, (*entry)->name, 1);
4453 entry++;
4454 }
4455 add_assoc_zval(return_value, "detect_order", row2);
4456 }
4457 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4458 add_assoc_string(return_value, "substitute_character", "none", 1);
4459 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4460 add_assoc_string(return_value, "substitute_character", "long", 1);
4461 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4462 add_assoc_string(return_value, "substitute_character", "entity", 1);
4463 } else {
4464 add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4465 }
4466 if (MBSTRG(strict_detection)) {
4467 add_assoc_string(return_value, "strict_detection", "On", 1);
4468 } else {
4469 add_assoc_string(return_value, "strict_detection", "Off", 1);
4470 }
4471 } else if (!strcasecmp("internal_encoding", typ)) {
4472 if (MBSTRG(current_internal_encoding)) {
4473 RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name, 1);
4474 }
4475 } else if (!strcasecmp("http_input", typ)) {
4476 if (MBSTRG(http_input_identify)) {
4477 RETVAL_STRING((char *)MBSTRG(http_input_identify)->name, 1);
4478 }
4479 } else if (!strcasecmp("http_output", typ)) {
4480 if (MBSTRG(current_http_output_encoding)) {
4481 RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name, 1);
4482 }
4483 } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4484 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4485 RETVAL_STRING(name, 1);
4486 }
4487 } else if (!strcasecmp("func_overload", typ)) {
4488 RETVAL_LONG(MBSTRG(func_overload));
4489 } else if (!strcasecmp("func_overload_list", typ)) {
4490 if (MBSTRG(func_overload)){
4491 over_func = &(mb_ovld[0]);
4492 array_init(return_value);
4493 while (over_func->type > 0) {
4494 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4495 add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func, 1);
4496 }
4497 over_func++;
4498 }
4499 } else {
4500 RETVAL_STRING("no overload", 1);
4501 }
4502 } else if (!strcasecmp("mail_charset", typ)) {
4503 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4504 RETVAL_STRING(name, 1);
4505 }
4506 } else if (!strcasecmp("mail_header_encoding", typ)) {
4507 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4508 RETVAL_STRING(name, 1);
4509 }
4510 } else if (!strcasecmp("mail_body_encoding", typ)) {
4511 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4512 RETVAL_STRING(name, 1);
4513 }
4514 } else if (!strcasecmp("illegal_chars", typ)) {
4515 RETVAL_LONG(MBSTRG(illegalchars));
4516 } else if (!strcasecmp("encoding_translation", typ)) {
4517 if (MBSTRG(encoding_translation)) {
4518 RETVAL_STRING("On", 1);
4519 } else {
4520 RETVAL_STRING("Off", 1);
4521 }
4522 } else if (!strcasecmp("language", typ)) {
4523 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4524 RETVAL_STRING(name, 1);
4525 }
4526 } else if (!strcasecmp("detect_order", typ)) {
4527 n = MBSTRG(current_detect_order_list_size);
4528 entry = MBSTRG(current_detect_order_list);
4529 if (n > 0) {
4530 size_t i;
4531 array_init(return_value);
4532 for (i = 0; i < n; i++) {
4533 add_next_index_string(return_value, (*entry)->name, 1);
4534 entry++;
4535 }
4536 }
4537 } else if (!strcasecmp("substitute_character", typ)) {
4538 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4539 RETVAL_STRING("none", 1);
4540 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4541 RETVAL_STRING("long", 1);
4542 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4543 RETVAL_STRING("entity", 1);
4544 } else {
4545 RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4546 }
4547 } else if (!strcasecmp("strict_detection", typ)) {
4548 if (MBSTRG(strict_detection)) {
4549 RETVAL_STRING("On", 1);
4550 } else {
4551 RETVAL_STRING("Off", 1);
4552 }
4553 } else {
4554 RETURN_FALSE;
4555 }
4556 }
4557 /* }}} */
4558
4559 /* {{{ proto bool mb_check_encoding([string var[, string encoding]])
4560 Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)4561 PHP_FUNCTION(mb_check_encoding)
4562 {
4563 char *var = NULL;
4564 int var_len;
4565 char *enc = NULL;
4566 int enc_len;
4567 mbfl_buffer_converter *convd;
4568 const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4569 mbfl_string string, result, *ret = NULL;
4570 long illegalchars = 0;
4571
4572 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
4573 RETURN_FALSE;
4574 }
4575
4576 if (var == NULL) {
4577 RETURN_BOOL(MBSTRG(illegalchars) == 0);
4578 }
4579
4580 if (enc != NULL) {
4581 encoding = mbfl_name2encoding(enc);
4582 if (!encoding || encoding == &mbfl_encoding_pass) {
4583 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc);
4584 RETURN_FALSE;
4585 }
4586 }
4587
4588 convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
4589 if (convd == NULL) {
4590 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
4591 RETURN_FALSE;
4592 }
4593 mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4594 mbfl_buffer_converter_illegal_substchar(convd, 0);
4595
4596 /* initialize string */
4597 mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
4598 mbfl_string_init(&result);
4599
4600 string.val = (unsigned char *)var;
4601 string.len = var_len;
4602 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4603 illegalchars = mbfl_buffer_illegalchars(convd);
4604 mbfl_buffer_converter_delete(convd);
4605
4606 RETVAL_FALSE;
4607 if (ret != NULL) {
4608 if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4609 RETVAL_TRUE;
4610 }
4611 mbfl_string_clear(&result);
4612 }
4613 }
4614 /* }}} */
4615
4616
4617 /* {{{ php_mb_populate_current_detect_order_list */
php_mb_populate_current_detect_order_list(TSRMLS_D)4618 static void php_mb_populate_current_detect_order_list(TSRMLS_D)
4619 {
4620 const mbfl_encoding **entry = 0;
4621 size_t nentries;
4622
4623 if (MBSTRG(current_detect_order_list)) {
4624 return;
4625 }
4626
4627 if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
4628 nentries = MBSTRG(detect_order_list_size);
4629 entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4630 memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
4631 } else {
4632 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
4633 size_t i;
4634 nentries = MBSTRG(default_detect_order_list_size);
4635 entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4636 for (i = 0; i < nentries; i++) {
4637 entry[i] = mbfl_no2encoding(src[i]);
4638 }
4639 }
4640 MBSTRG(current_detect_order_list) = entry;
4641 MBSTRG(current_detect_order_list_size) = nentries;
4642 }
4643
4644 /* {{{ static int php_mb_encoding_translation() */
php_mb_encoding_translation(TSRMLS_D)4645 static int php_mb_encoding_translation(TSRMLS_D)
4646 {
4647 return MBSTRG(encoding_translation);
4648 }
4649 /* }}} */
4650
4651 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
php_mb_mbchar_bytes_ex(const char * s,const mbfl_encoding * enc)4652 MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
4653 {
4654 if (enc != NULL) {
4655 if (enc->flag & MBFL_ENCTYPE_MBCS) {
4656 if (enc->mblen_table != NULL) {
4657 if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
4658 }
4659 } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
4660 return 2;
4661 } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
4662 return 4;
4663 }
4664 }
4665 return 1;
4666 }
4667 /* }}} */
4668
4669 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
php_mb_mbchar_bytes(const char * s TSRMLS_DC)4670 MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
4671 {
4672 return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
4673 }
4674 /* }}} */
4675
4676 /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
php_mb_safe_strrchr_ex(const char * s,unsigned int c,size_t nbytes,const mbfl_encoding * enc)4677 MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
4678 {
4679 register const char *p = s;
4680 char *last=NULL;
4681
4682 if (nbytes == (size_t)-1) {
4683 size_t nb = 0;
4684
4685 while (*p != '\0') {
4686 if (nb == 0) {
4687 if ((unsigned char)*p == (unsigned char)c) {
4688 last = (char *)p;
4689 }
4690 nb = php_mb_mbchar_bytes_ex(p, enc);
4691 if (nb == 0) {
4692 return NULL; /* something is going wrong! */
4693 }
4694 }
4695 --nb;
4696 ++p;
4697 }
4698 } else {
4699 register size_t bcnt = nbytes;
4700 register size_t nbytes_char;
4701 while (bcnt > 0) {
4702 if ((unsigned char)*p == (unsigned char)c) {
4703 last = (char *)p;
4704 }
4705 nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
4706 if (bcnt < nbytes_char) {
4707 return NULL;
4708 }
4709 p += nbytes_char;
4710 bcnt -= nbytes_char;
4711 }
4712 }
4713 return last;
4714 }
4715 /* }}} */
4716
4717 /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
php_mb_safe_strrchr(const char * s,unsigned int c,size_t nbytes TSRMLS_DC)4718 MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
4719 {
4720 return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
4721 }
4722 /* }}} */
4723
4724 /* {{{ MBSTRING_API int php_mb_stripos()
4725 */
php_mb_stripos(int mode,const char * old_haystack,unsigned int old_haystack_len,const char * old_needle,unsigned int old_needle_len,long offset,const char * from_encoding TSRMLS_DC)4726 MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC)
4727 {
4728 int n;
4729 mbfl_string haystack, needle;
4730 n = -1;
4731
4732 mbfl_string_init(&haystack);
4733 mbfl_string_init(&needle);
4734 haystack.no_language = MBSTRG(language);
4735 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4736 needle.no_language = MBSTRG(language);
4737 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4738
4739 do {
4740 size_t len = 0;
4741 haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding TSRMLS_CC);
4742 haystack.len = len;
4743
4744 if (!haystack.val) {
4745 break;
4746 }
4747
4748 if (haystack.len <= 0) {
4749 break;
4750 }
4751
4752 needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding TSRMLS_CC);
4753 needle.len = len;
4754
4755 if (!needle.val) {
4756 break;
4757 }
4758
4759 if (needle.len <= 0) {
4760 break;
4761 }
4762
4763 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
4764 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
4765 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
4766 break;
4767 }
4768
4769 {
4770 int haystack_char_len = mbfl_strlen(&haystack);
4771
4772 if (mode) {
4773 if ((offset > 0 && offset > haystack_char_len) ||
4774 (offset < 0 && -offset > haystack_char_len)) {
4775 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
4776 break;
4777 }
4778 } else {
4779 if (offset < 0 || offset > haystack_char_len) {
4780 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
4781 break;
4782 }
4783 }
4784 }
4785
4786 n = mbfl_strpos(&haystack, &needle, offset, mode);
4787 } while(0);
4788
4789 if (haystack.val) {
4790 efree(haystack.val);
4791 }
4792
4793 if (needle.val) {
4794 efree(needle.val);
4795 }
4796
4797 return n;
4798 }
4799 /* }}} */
4800
php_mb_gpc_get_detect_order(const zend_encoding *** list,size_t * list_size TSRMLS_DC)4801 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC) /* {{{ */
4802 {
4803 *list = (const zend_encoding **)MBSTRG(http_input_list);
4804 *list_size = MBSTRG(http_input_list_size);
4805 }
4806 /* }}} */
4807
php_mb_gpc_set_input_encoding(const zend_encoding * encoding TSRMLS_DC)4808 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC) /* {{{ */
4809 {
4810 MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
4811 }
4812 /* }}} */
4813
4814 #endif /* HAVE_MBSTRING */
4815
4816 /*
4817 * Local variables:
4818 * tab-width: 4
4819 * c-basic-offset: 4
4820 * End:
4821 * vim600: fdm=marker
4822 * vim: noet sw=4 ts=4
4823 */
4824