1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 5 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2014 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
16 | Rui Hirokawa <hirokawa@php.net> |
17 +----------------------------------------------------------------------+
18 */
19
20 /* $Id$ */
21
22 /*
23 * PHP 4 Multibyte String module "mbstring"
24 *
25 * History:
26 * 2000.5.19 Release php-4.0RC2_jstring-1.0
27 * 2001.4.1 Release php4_jstring-1.0.91
28 * 2001.4.30 Release php4_jstring-1.1 (contribute to The PHP Group)
29 * 2001.5.1 Renamed from jstring to mbstring (hirokawa@php.net)
30 */
31
32 /*
33 * PHP3 Internationalization support program.
34 *
35 * Copyright (c) 1999,2000 by the PHP3 internationalization team.
36 * All rights reserved.
37 *
38 * See README_PHP3-i18n-ja for more detail.
39 *
40 * Authors:
41 * Hironori Sato <satoh@jpnnet.com>
42 * Shigeru Kanemoto <sgk@happysize.co.jp>
43 * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
44 * Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
45 */
46
47 /* {{{ includes */
48 #ifdef HAVE_CONFIG_H
49 #include "config.h"
50 #endif
51
52 #include "php.h"
53 #include "php_ini.h"
54 #include "php_variables.h"
55 #include "mbstring.h"
56 #include "ext/standard/php_string.h"
57 #include "ext/standard/php_mail.h"
58 #include "ext/standard/exec.h"
59 #include "ext/standard/php_smart_str.h"
60 #include "ext/standard/url.h"
61 #include "main/php_output.h"
62 #include "ext/standard/info.h"
63
64 #include "libmbfl/mbfl/mbfl_allocators.h"
65 #include "libmbfl/mbfl/mbfilter_pass.h"
66
67 #include "php_variables.h"
68 #include "php_globals.h"
69 #include "rfc1867.h"
70 #include "php_content_types.h"
71 #include "SAPI.h"
72 #include "php_unicode.h"
73 #include "TSRM.h"
74
75 #include "mb_gpc.h"
76
77 #if HAVE_MBREGEX
78 #include "php_mbregex.h"
79 #endif
80
81 #include "zend_multibyte.h"
82
83 #if HAVE_ONIG
84 #include "php_onig_compat.h"
85 #include <oniguruma.h>
86 #undef UChar
87 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
88 #include "ext/pcre/php_pcre.h"
89 #endif
90 /* }}} */
91
92 #if HAVE_MBSTRING
93
94 /* {{{ prototypes */
95 ZEND_DECLARE_MODULE_GLOBALS(mbstring)
96
97 static PHP_GINIT_FUNCTION(mbstring);
98 static PHP_GSHUTDOWN_FUNCTION(mbstring);
99
100 static void php_mb_populate_current_detect_order_list(TSRMLS_D);
101
102 static int php_mb_encoding_translation(TSRMLS_D);
103
104 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC);
105
106 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC);
107
108 /* }}} */
109
110 /* {{{ php_mb_default_identify_list */
111 typedef struct _php_mb_nls_ident_list {
112 enum mbfl_no_language lang;
113 const enum mbfl_no_encoding *list;
114 size_t list_size;
115 } php_mb_nls_ident_list;
116
117 static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
118 mbfl_no_encoding_ascii,
119 mbfl_no_encoding_jis,
120 mbfl_no_encoding_utf8,
121 mbfl_no_encoding_euc_jp,
122 mbfl_no_encoding_sjis
123 };
124
125 static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
126 mbfl_no_encoding_ascii,
127 mbfl_no_encoding_utf8,
128 mbfl_no_encoding_euc_cn,
129 mbfl_no_encoding_cp936
130 };
131
132 static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
133 mbfl_no_encoding_ascii,
134 mbfl_no_encoding_utf8,
135 mbfl_no_encoding_euc_tw,
136 mbfl_no_encoding_big5
137 };
138
139 static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
140 mbfl_no_encoding_ascii,
141 mbfl_no_encoding_utf8,
142 mbfl_no_encoding_euc_kr,
143 mbfl_no_encoding_uhc
144 };
145
146 static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
147 mbfl_no_encoding_ascii,
148 mbfl_no_encoding_utf8,
149 mbfl_no_encoding_koi8r,
150 mbfl_no_encoding_cp1251,
151 mbfl_no_encoding_cp866
152 };
153
154 static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
155 mbfl_no_encoding_ascii,
156 mbfl_no_encoding_utf8,
157 mbfl_no_encoding_armscii8
158 };
159
160 static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
161 mbfl_no_encoding_ascii,
162 mbfl_no_encoding_utf8,
163 mbfl_no_encoding_cp1254,
164 mbfl_no_encoding_8859_9
165 };
166
167 static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
168 mbfl_no_encoding_ascii,
169 mbfl_no_encoding_utf8,
170 mbfl_no_encoding_koi8u
171 };
172
173 static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
174 mbfl_no_encoding_ascii,
175 mbfl_no_encoding_utf8
176 };
177
178
179 static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
180 { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
181 { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
182 { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
183 { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
184 { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
185 { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
186 { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
187 { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
188 { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
189 };
190
191 /* }}} */
192
193 /* {{{ mb_overload_def mb_ovld[] */
194 static const struct mb_overload_def mb_ovld[] = {
195 {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
196 {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
197 {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
198 {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
199 {MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
200 {MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
201 {MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
202 {MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
203 {MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
204 {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
205 {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
206 {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
207 {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
208 #if HAVE_MBREGEX
209 {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"},
210 {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"},
211 {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"},
212 {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"},
213 {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},
214 #endif
215 {0, NULL, NULL, NULL}
216 };
217 /* }}} */
218
219 /* {{{ arginfo */
220 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
221 ZEND_ARG_INFO(0, language)
222 ZEND_END_ARG_INFO()
223
224 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
225 ZEND_ARG_INFO(0, encoding)
226 ZEND_END_ARG_INFO()
227
228 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
229 ZEND_ARG_INFO(0, type)
230 ZEND_END_ARG_INFO()
231
232 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
233 ZEND_ARG_INFO(0, encoding)
234 ZEND_END_ARG_INFO()
235
236 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
237 ZEND_ARG_INFO(0, encoding)
238 ZEND_END_ARG_INFO()
239
240 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
241 ZEND_ARG_INFO(0, substchar)
242 ZEND_END_ARG_INFO()
243
244 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
245 ZEND_ARG_INFO(0, encoding)
246 ZEND_END_ARG_INFO()
247
248 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
249 ZEND_ARG_INFO(0, encoded_string)
250 ZEND_ARG_INFO(1, result)
251 ZEND_END_ARG_INFO()
252
253 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
254 ZEND_ARG_INFO(0, contents)
255 ZEND_ARG_INFO(0, status)
256 ZEND_END_ARG_INFO()
257
258 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
259 ZEND_ARG_INFO(0, str)
260 ZEND_ARG_INFO(0, encoding)
261 ZEND_END_ARG_INFO()
262
263 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
264 ZEND_ARG_INFO(0, haystack)
265 ZEND_ARG_INFO(0, needle)
266 ZEND_ARG_INFO(0, offset)
267 ZEND_ARG_INFO(0, encoding)
268 ZEND_END_ARG_INFO()
269
270 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
271 ZEND_ARG_INFO(0, haystack)
272 ZEND_ARG_INFO(0, needle)
273 ZEND_ARG_INFO(0, offset)
274 ZEND_ARG_INFO(0, encoding)
275 ZEND_END_ARG_INFO()
276
277 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
278 ZEND_ARG_INFO(0, haystack)
279 ZEND_ARG_INFO(0, needle)
280 ZEND_ARG_INFO(0, offset)
281 ZEND_ARG_INFO(0, encoding)
282 ZEND_END_ARG_INFO()
283
284 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
285 ZEND_ARG_INFO(0, haystack)
286 ZEND_ARG_INFO(0, needle)
287 ZEND_ARG_INFO(0, offset)
288 ZEND_ARG_INFO(0, encoding)
289 ZEND_END_ARG_INFO()
290
291 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
292 ZEND_ARG_INFO(0, haystack)
293 ZEND_ARG_INFO(0, needle)
294 ZEND_ARG_INFO(0, part)
295 ZEND_ARG_INFO(0, encoding)
296 ZEND_END_ARG_INFO()
297
298 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
299 ZEND_ARG_INFO(0, haystack)
300 ZEND_ARG_INFO(0, needle)
301 ZEND_ARG_INFO(0, part)
302 ZEND_ARG_INFO(0, encoding)
303 ZEND_END_ARG_INFO()
304
305 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
306 ZEND_ARG_INFO(0, haystack)
307 ZEND_ARG_INFO(0, needle)
308 ZEND_ARG_INFO(0, part)
309 ZEND_ARG_INFO(0, encoding)
310 ZEND_END_ARG_INFO()
311
312 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
313 ZEND_ARG_INFO(0, haystack)
314 ZEND_ARG_INFO(0, needle)
315 ZEND_ARG_INFO(0, part)
316 ZEND_ARG_INFO(0, encoding)
317 ZEND_END_ARG_INFO()
318
319 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
320 ZEND_ARG_INFO(0, haystack)
321 ZEND_ARG_INFO(0, needle)
322 ZEND_ARG_INFO(0, encoding)
323 ZEND_END_ARG_INFO()
324
325 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
326 ZEND_ARG_INFO(0, str)
327 ZEND_ARG_INFO(0, start)
328 ZEND_ARG_INFO(0, length)
329 ZEND_ARG_INFO(0, encoding)
330 ZEND_END_ARG_INFO()
331
332 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
333 ZEND_ARG_INFO(0, str)
334 ZEND_ARG_INFO(0, start)
335 ZEND_ARG_INFO(0, length)
336 ZEND_ARG_INFO(0, encoding)
337 ZEND_END_ARG_INFO()
338
339 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
340 ZEND_ARG_INFO(0, str)
341 ZEND_ARG_INFO(0, encoding)
342 ZEND_END_ARG_INFO()
343
344 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
345 ZEND_ARG_INFO(0, str)
346 ZEND_ARG_INFO(0, start)
347 ZEND_ARG_INFO(0, width)
348 ZEND_ARG_INFO(0, trimmarker)
349 ZEND_ARG_INFO(0, encoding)
350 ZEND_END_ARG_INFO()
351
352 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
353 ZEND_ARG_INFO(0, str)
354 ZEND_ARG_INFO(0, to)
355 ZEND_ARG_INFO(0, from)
356 ZEND_END_ARG_INFO()
357
358 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
359 ZEND_ARG_INFO(0, sourcestring)
360 ZEND_ARG_INFO(0, mode)
361 ZEND_ARG_INFO(0, encoding)
362 ZEND_END_ARG_INFO()
363
364 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
365 ZEND_ARG_INFO(0, sourcestring)
366 ZEND_ARG_INFO(0, encoding)
367 ZEND_END_ARG_INFO()
368
369 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
370 ZEND_ARG_INFO(0, sourcestring)
371 ZEND_ARG_INFO(0, encoding)
372 ZEND_END_ARG_INFO()
373
374 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
375 ZEND_ARG_INFO(0, str)
376 ZEND_ARG_INFO(0, encoding_list)
377 ZEND_ARG_INFO(0, strict)
378 ZEND_END_ARG_INFO()
379
380 ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
381 ZEND_END_ARG_INFO()
382
383 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
384 ZEND_ARG_INFO(0, encoding)
385 ZEND_END_ARG_INFO()
386
387 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
388 ZEND_ARG_INFO(0, str)
389 ZEND_ARG_INFO(0, charset)
390 ZEND_ARG_INFO(0, transfer)
391 ZEND_ARG_INFO(0, linefeed)
392 ZEND_ARG_INFO(0, indent)
393 ZEND_END_ARG_INFO()
394
395 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
396 ZEND_ARG_INFO(0, string)
397 ZEND_END_ARG_INFO()
398
399 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
400 ZEND_ARG_INFO(0, str)
401 ZEND_ARG_INFO(0, option)
402 ZEND_ARG_INFO(0, encoding)
403 ZEND_END_ARG_INFO()
404
405 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 1, 0, 3)
406 ZEND_ARG_INFO(0, to)
407 ZEND_ARG_INFO(0, from)
408 ZEND_ARG_INFO(1, ...)
409 ZEND_END_ARG_INFO()
410
411 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
412 ZEND_ARG_INFO(0, string)
413 ZEND_ARG_INFO(0, convmap)
414 ZEND_ARG_INFO(0, encoding)
415 ZEND_ARG_INFO(0, is_hex)
416 ZEND_END_ARG_INFO()
417
418 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
419 ZEND_ARG_INFO(0, string)
420 ZEND_ARG_INFO(0, convmap)
421 ZEND_ARG_INFO(0, encoding)
422 ZEND_END_ARG_INFO()
423
424 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
425 ZEND_ARG_INFO(0, to)
426 ZEND_ARG_INFO(0, subject)
427 ZEND_ARG_INFO(0, message)
428 ZEND_ARG_INFO(0, additional_headers)
429 ZEND_ARG_INFO(0, additional_parameters)
430 ZEND_END_ARG_INFO()
431
432 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
433 ZEND_ARG_INFO(0, type)
434 ZEND_END_ARG_INFO()
435
436 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
437 ZEND_ARG_INFO(0, var)
438 ZEND_ARG_INFO(0, encoding)
439 ZEND_END_ARG_INFO()
440
441 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
442 ZEND_ARG_INFO(0, encoding)
443 ZEND_END_ARG_INFO()
444
445 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
446 ZEND_ARG_INFO(0, pattern)
447 ZEND_ARG_INFO(0, string)
448 ZEND_ARG_INFO(1, registers)
449 ZEND_END_ARG_INFO()
450
451 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
452 ZEND_ARG_INFO(0, pattern)
453 ZEND_ARG_INFO(0, string)
454 ZEND_ARG_INFO(1, registers)
455 ZEND_END_ARG_INFO()
456
457 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
458 ZEND_ARG_INFO(0, pattern)
459 ZEND_ARG_INFO(0, replacement)
460 ZEND_ARG_INFO(0, string)
461 ZEND_ARG_INFO(0, option)
462 ZEND_END_ARG_INFO()
463
464 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
465 ZEND_ARG_INFO(0, pattern)
466 ZEND_ARG_INFO(0, replacement)
467 ZEND_ARG_INFO(0, string)
468 ZEND_END_ARG_INFO()
469
470 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3)
471 ZEND_ARG_INFO(0, pattern)
472 ZEND_ARG_INFO(0, callback)
473 ZEND_ARG_INFO(0, string)
474 ZEND_ARG_INFO(0, option)
475 ZEND_END_ARG_INFO()
476
477 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
478 ZEND_ARG_INFO(0, pattern)
479 ZEND_ARG_INFO(0, string)
480 ZEND_ARG_INFO(0, limit)
481 ZEND_END_ARG_INFO()
482
483 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
484 ZEND_ARG_INFO(0, pattern)
485 ZEND_ARG_INFO(0, string)
486 ZEND_ARG_INFO(0, option)
487 ZEND_END_ARG_INFO()
488
489 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
490 ZEND_ARG_INFO(0, pattern)
491 ZEND_ARG_INFO(0, option)
492 ZEND_END_ARG_INFO()
493
494 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
495 ZEND_ARG_INFO(0, pattern)
496 ZEND_ARG_INFO(0, option)
497 ZEND_END_ARG_INFO()
498
499 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
500 ZEND_ARG_INFO(0, pattern)
501 ZEND_ARG_INFO(0, option)
502 ZEND_END_ARG_INFO()
503
504 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
505 ZEND_ARG_INFO(0, string)
506 ZEND_ARG_INFO(0, pattern)
507 ZEND_ARG_INFO(0, option)
508 ZEND_END_ARG_INFO()
509
510 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
511 ZEND_END_ARG_INFO()
512
513 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
514 ZEND_END_ARG_INFO()
515
516 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
517 ZEND_ARG_INFO(0, position)
518 ZEND_END_ARG_INFO()
519
520 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
521 ZEND_ARG_INFO(0, options)
522 ZEND_END_ARG_INFO()
523 /* }}} */
524
525 /* {{{ zend_function_entry mbstring_functions[] */
526 const zend_function_entry mbstring_functions[] = {
527 PHP_FE(mb_convert_case, arginfo_mb_convert_case)
528 PHP_FE(mb_strtoupper, arginfo_mb_strtoupper)
529 PHP_FE(mb_strtolower, arginfo_mb_strtolower)
530 PHP_FE(mb_language, arginfo_mb_language)
531 PHP_FE(mb_internal_encoding, arginfo_mb_internal_encoding)
532 PHP_FE(mb_http_input, arginfo_mb_http_input)
533 PHP_FE(mb_http_output, arginfo_mb_http_output)
534 PHP_FE(mb_detect_order, arginfo_mb_detect_order)
535 PHP_FE(mb_substitute_character, arginfo_mb_substitute_character)
536 PHP_FE(mb_parse_str, arginfo_mb_parse_str)
537 PHP_FE(mb_output_handler, arginfo_mb_output_handler)
538 PHP_FE(mb_preferred_mime_name, arginfo_mb_preferred_mime_name)
539 PHP_FE(mb_strlen, arginfo_mb_strlen)
540 PHP_FE(mb_strpos, arginfo_mb_strpos)
541 PHP_FE(mb_strrpos, arginfo_mb_strrpos)
542 PHP_FE(mb_stripos, arginfo_mb_stripos)
543 PHP_FE(mb_strripos, arginfo_mb_strripos)
544 PHP_FE(mb_strstr, arginfo_mb_strstr)
545 PHP_FE(mb_strrchr, arginfo_mb_strrchr)
546 PHP_FE(mb_stristr, arginfo_mb_stristr)
547 PHP_FE(mb_strrichr, arginfo_mb_strrichr)
548 PHP_FE(mb_substr_count, arginfo_mb_substr_count)
549 PHP_FE(mb_substr, arginfo_mb_substr)
550 PHP_FE(mb_strcut, arginfo_mb_strcut)
551 PHP_FE(mb_strwidth, arginfo_mb_strwidth)
552 PHP_FE(mb_strimwidth, arginfo_mb_strimwidth)
553 PHP_FE(mb_convert_encoding, arginfo_mb_convert_encoding)
554 PHP_FE(mb_detect_encoding, arginfo_mb_detect_encoding)
555 PHP_FE(mb_list_encodings, arginfo_mb_list_encodings)
556 PHP_FE(mb_encoding_aliases, arginfo_mb_encoding_aliases)
557 PHP_FE(mb_convert_kana, arginfo_mb_convert_kana)
558 PHP_FE(mb_encode_mimeheader, arginfo_mb_encode_mimeheader)
559 PHP_FE(mb_decode_mimeheader, arginfo_mb_decode_mimeheader)
560 PHP_FE(mb_convert_variables, arginfo_mb_convert_variables)
561 PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity)
562 PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity)
563 PHP_FE(mb_send_mail, arginfo_mb_send_mail)
564 PHP_FE(mb_get_info, arginfo_mb_get_info)
565 PHP_FE(mb_check_encoding, arginfo_mb_check_encoding)
566 #if HAVE_MBREGEX
567 PHP_MBREGEX_FUNCTION_ENTRIES
568 #endif
569 PHP_FE_END
570 };
571 /* }}} */
572
573 /* {{{ zend_module_entry mbstring_module_entry */
574 zend_module_entry mbstring_module_entry = {
575 STANDARD_MODULE_HEADER,
576 "mbstring",
577 mbstring_functions,
578 PHP_MINIT(mbstring),
579 PHP_MSHUTDOWN(mbstring),
580 PHP_RINIT(mbstring),
581 PHP_RSHUTDOWN(mbstring),
582 PHP_MINFO(mbstring),
583 NO_VERSION_YET,
584 PHP_MODULE_GLOBALS(mbstring),
585 PHP_GINIT(mbstring),
586 PHP_GSHUTDOWN(mbstring),
587 NULL,
588 STANDARD_MODULE_PROPERTIES_EX
589 };
590 /* }}} */
591
592 /* {{{ static sapi_post_entry php_post_entries[] */
593 static sapi_post_entry php_post_entries[] = {
594 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
595 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
596 { NULL, 0, NULL, NULL }
597 };
598 /* }}} */
599
600 #ifdef COMPILE_DL_MBSTRING
ZEND_GET_MODULE(mbstring)601 ZEND_GET_MODULE(mbstring)
602 #endif
603
604 /* {{{ allocators */
605 static void *_php_mb_allocators_malloc(unsigned int sz)
606 {
607 return emalloc(sz);
608 }
609
_php_mb_allocators_realloc(void * ptr,unsigned int sz)610 static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
611 {
612 return erealloc(ptr, sz);
613 }
614
_php_mb_allocators_calloc(unsigned int nelems,unsigned int szelem)615 static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
616 {
617 return ecalloc(nelems, szelem);
618 }
619
_php_mb_allocators_free(void * ptr)620 static void _php_mb_allocators_free(void *ptr)
621 {
622 efree(ptr);
623 }
624
_php_mb_allocators_pmalloc(unsigned int sz)625 static void *_php_mb_allocators_pmalloc(unsigned int sz)
626 {
627 return pemalloc(sz, 1);
628 }
629
_php_mb_allocators_prealloc(void * ptr,unsigned int sz)630 static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
631 {
632 return perealloc(ptr, sz, 1);
633 }
634
_php_mb_allocators_pfree(void * ptr)635 static void _php_mb_allocators_pfree(void *ptr)
636 {
637 pefree(ptr, 1);
638 }
639
640 static mbfl_allocators _php_mb_allocators = {
641 _php_mb_allocators_malloc,
642 _php_mb_allocators_realloc,
643 _php_mb_allocators_calloc,
644 _php_mb_allocators_free,
645 _php_mb_allocators_pmalloc,
646 _php_mb_allocators_prealloc,
647 _php_mb_allocators_pfree
648 };
649 /* }}} */
650
651 /* {{{ static sapi_post_entry mbstr_post_entries[] */
652 static sapi_post_entry mbstr_post_entries[] = {
653 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
654 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
655 { NULL, 0, NULL, NULL }
656 };
657 /* }}} */
658
659 /* {{{ static int php_mb_parse_encoding_list()
660 * Return 0 if input contains any illegal encoding, otherwise 1.
661 * Even if any illegal encoding is detected the result may contain a list
662 * of parsed encodings.
663 */
664 static int
php_mb_parse_encoding_list(const char * value,size_t value_length,const mbfl_encoding *** return_list,size_t * return_size,int persistent TSRMLS_DC)665 php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
666 {
667 int size, bauto, ret = SUCCESS;
668 size_t n;
669 char *p, *p1, *p2, *endp, *tmpstr;
670 const mbfl_encoding **entry, **list;
671
672 list = NULL;
673 if (value == NULL || value_length <= 0) {
674 if (return_list) {
675 *return_list = NULL;
676 }
677 if (return_size) {
678 *return_size = 0;
679 }
680 return FAILURE;
681 } else {
682 /* copy the value string for work */
683 if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
684 tmpstr = (char *)estrndup(value+1, value_length-2);
685 value_length -= 2;
686 }
687 else
688 tmpstr = (char *)estrndup(value, value_length);
689 if (tmpstr == NULL) {
690 return FAILURE;
691 }
692 /* count the number of listed encoding names */
693 endp = tmpstr + value_length;
694 n = 1;
695 p1 = tmpstr;
696 while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) {
697 p1 = p2 + 1;
698 n++;
699 }
700 size = n + MBSTRG(default_detect_order_list_size);
701 /* make list */
702 list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
703 if (list != NULL) {
704 entry = list;
705 n = 0;
706 bauto = 0;
707 p1 = tmpstr;
708 do {
709 p2 = p = php_memnstr(p1, ",", 1, endp);
710 if (p == NULL) {
711 p = endp;
712 }
713 *p = '\0';
714 /* trim spaces */
715 while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
716 p1++;
717 }
718 p--;
719 while (p > p1 && (*p == ' ' || *p == '\t')) {
720 *p = '\0';
721 p--;
722 }
723 /* convert to the encoding number and check encoding */
724 if (strcasecmp(p1, "auto") == 0) {
725 if (!bauto) {
726 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
727 const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
728 size_t i;
729 bauto = 1;
730 for (i = 0; i < identify_list_size; i++) {
731 *entry++ = mbfl_no2encoding(*src++);
732 n++;
733 }
734 }
735 } else {
736 const mbfl_encoding *encoding = mbfl_name2encoding(p1);
737 if (encoding) {
738 *entry++ = encoding;
739 n++;
740 } else {
741 ret = 0;
742 }
743 }
744 p1 = p2 + 1;
745 } while (n < size && p2 != NULL);
746 if (n > 0) {
747 if (return_list) {
748 *return_list = list;
749 } else {
750 pefree(list, persistent);
751 }
752 } else {
753 pefree(list, persistent);
754 if (return_list) {
755 *return_list = NULL;
756 }
757 ret = 0;
758 }
759 if (return_size) {
760 *return_size = n;
761 }
762 } else {
763 if (return_list) {
764 *return_list = NULL;
765 }
766 if (return_size) {
767 *return_size = 0;
768 }
769 ret = 0;
770 }
771 efree(tmpstr);
772 }
773
774 return ret;
775 }
776 /* }}} */
777
778 /* {{{ static int php_mb_parse_encoding_array()
779 * Return 0 if input contains any illegal encoding, otherwise 1.
780 * Even if any illegal encoding is detected the result may contain a list
781 * of parsed encodings.
782 */
783 static int
php_mb_parse_encoding_array(zval * array,const mbfl_encoding *** return_list,size_t * return_size,int persistent TSRMLS_DC)784 php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
785 {
786 zval **hash_entry;
787 HashTable *target_hash;
788 int i, n, size, bauto, ret = SUCCESS;
789 const mbfl_encoding **list, **entry;
790
791 list = NULL;
792 if (Z_TYPE_P(array) == IS_ARRAY) {
793 target_hash = Z_ARRVAL_P(array);
794 zend_hash_internal_pointer_reset(target_hash);
795 i = zend_hash_num_elements(target_hash);
796 size = i + MBSTRG(default_detect_order_list_size);
797 list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
798 if (list != NULL) {
799 entry = list;
800 bauto = 0;
801 n = 0;
802 while (i > 0) {
803 if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
804 break;
805 }
806 convert_to_string_ex(hash_entry);
807 if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
808 if (!bauto) {
809 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
810 const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
811 size_t j;
812
813 bauto = 1;
814 for (j = 0; j < identify_list_size; j++) {
815 *entry++ = mbfl_no2encoding(*src++);
816 n++;
817 }
818 }
819 } else {
820 const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_PP(hash_entry));
821 if (encoding) {
822 *entry++ = encoding;
823 n++;
824 } else {
825 ret = FAILURE;
826 }
827 }
828 zend_hash_move_forward(target_hash);
829 i--;
830 }
831 if (n > 0) {
832 if (return_list) {
833 *return_list = list;
834 } else {
835 pefree(list, persistent);
836 }
837 } else {
838 pefree(list, persistent);
839 if (return_list) {
840 *return_list = NULL;
841 }
842 ret = FAILURE;
843 }
844 if (return_size) {
845 *return_size = n;
846 }
847 } else {
848 if (return_list) {
849 *return_list = NULL;
850 }
851 if (return_size) {
852 *return_size = 0;
853 }
854 ret = FAILURE;
855 }
856 }
857
858 return ret;
859 }
860 /* }}} */
861
862 /* {{{ zend_multibyte interface */
php_mb_zend_encoding_fetcher(const char * encoding_name TSRMLS_DC)863 static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name TSRMLS_DC)
864 {
865 return (const zend_encoding*)mbfl_name2encoding(encoding_name);
866 }
867
php_mb_zend_encoding_name_getter(const zend_encoding * encoding)868 static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
869 {
870 return ((const mbfl_encoding *)encoding)->name;
871 }
872
php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding * _encoding)873 static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
874 {
875 const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
876 if (encoding->flag & MBFL_ENCTYPE_SBCS) {
877 return 1;
878 }
879 if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
880 return 1;
881 }
882 return 0;
883 }
884
php_mb_zend_encoding_detector(const unsigned char * arg_string,size_t arg_length,const zend_encoding ** list,size_t list_size TSRMLS_DC)885 static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size TSRMLS_DC)
886 {
887 mbfl_string string;
888
889 if (!list) {
890 list = (const zend_encoding **)MBSTRG(current_detect_order_list);
891 list_size = MBSTRG(current_detect_order_list_size);
892 }
893
894 mbfl_string_init(&string);
895 string.no_language = MBSTRG(language);
896 string.val = (unsigned char *)arg_string;
897 string.len = arg_length;
898 return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0);
899 }
900
php_mb_zend_encoding_converter(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length,const zend_encoding * encoding_to,const zend_encoding * encoding_from TSRMLS_DC)901 static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC)
902 {
903 mbfl_string string, result;
904 mbfl_buffer_converter *convd;
905 int status, loc;
906
907 /* new encoding */
908 /* initialize string */
909 mbfl_string_init(&string);
910 mbfl_string_init(&result);
911 string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding;
912 string.no_language = MBSTRG(language);
913 string.val = (unsigned char*)from;
914 string.len = from_length;
915
916 /* initialize converter */
917 convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
918 if (convd == NULL) {
919 return -1;
920 }
921 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
922 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
923
924 /* do it */
925 status = mbfl_buffer_converter_feed2(convd, &string, &loc);
926 if (status) {
927 mbfl_buffer_converter_delete(convd);
928 return (size_t)-1;
929 }
930
931 mbfl_buffer_converter_flush(convd);
932 if (!mbfl_buffer_converter_result(convd, &result)) {
933 mbfl_buffer_converter_delete(convd);
934 return (size_t)-1;
935 }
936
937 *to = result.val;
938 *to_length = result.len;
939
940 mbfl_buffer_converter_delete(convd);
941
942 return loc;
943 }
944
php_mb_zend_encoding_list_parser(const char * encoding_list,size_t encoding_list_len,const zend_encoding *** return_list,size_t * return_size,int persistent TSRMLS_DC)945 static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
946 {
947 return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent TSRMLS_CC);
948 }
949
php_mb_zend_internal_encoding_getter(TSRMLS_D)950 static const zend_encoding *php_mb_zend_internal_encoding_getter(TSRMLS_D)
951 {
952 return (const zend_encoding *)MBSTRG(internal_encoding);
953 }
954
php_mb_zend_internal_encoding_setter(const zend_encoding * encoding TSRMLS_DC)955 static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding TSRMLS_DC)
956 {
957 MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
958 return SUCCESS;
959 }
960
961 static zend_multibyte_functions php_mb_zend_multibyte_functions = {
962 "mbstring",
963 php_mb_zend_encoding_fetcher,
964 php_mb_zend_encoding_name_getter,
965 php_mb_zend_encoding_lexer_compatibility_checker,
966 php_mb_zend_encoding_detector,
967 php_mb_zend_encoding_converter,
968 php_mb_zend_encoding_list_parser,
969 php_mb_zend_internal_encoding_getter,
970 php_mb_zend_internal_encoding_setter
971 };
972 /* }}} */
973
974 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC);
975 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
976 static void _php_mb_free_regex(void *opaque);
977
978 #if HAVE_ONIG
979 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern TSRMLS_DC)980 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
981 {
982 php_mb_regex_t *retval;
983 OnigErrorInfo err_info;
984 int err_code;
985
986 if ((err_code = onig_new(&retval,
987 (const OnigUChar *)pattern,
988 (const OnigUChar *)pattern + strlen(pattern),
989 ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
990 ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
991 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
992 onig_error_code_to_str(err_str, err_code, err_info);
993 php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", pattern, err_str);
994 retval = NULL;
995 }
996 return retval;
997 }
998 /* }}} */
999
1000 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1001 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1002 {
1003 return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
1004 (const OnigUChar*)str + str_len, (const OnigUChar *)str,
1005 (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
1006 }
1007 /* }}} */
1008
1009 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1010 static void _php_mb_free_regex(void *opaque)
1011 {
1012 onig_free((php_mb_regex_t *)opaque);
1013 }
1014 /* }}} */
1015 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
1016 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern TSRMLS_DC)1017 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
1018 {
1019 pcre *retval;
1020 const char *err_str;
1021 int err_offset;
1022
1023 if (!(retval = pcre_compile(pattern,
1024 PCRE_CASELESS, &err_str, &err_offset, NULL))) {
1025 php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
1026 }
1027 return retval;
1028 }
1029 /* }}} */
1030
1031 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)1032 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1033 {
1034 return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
1035 0, NULL, 0) >= 0;
1036 }
1037 /* }}} */
1038
1039 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)1040 static void _php_mb_free_regex(void *opaque)
1041 {
1042 pcre_free(opaque);
1043 }
1044 /* }}} */
1045 #endif
1046
1047 /* {{{ php_mb_nls_get_default_detect_order_list */
php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,enum mbfl_no_encoding ** plist,size_t * plist_size)1048 static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
1049 {
1050 size_t i;
1051
1052 *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1053 *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1054
1055 for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
1056 if (php_mb_default_identify_list[i].lang == lang) {
1057 *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
1058 *plist_size = php_mb_default_identify_list[i].list_size;
1059 return 1;
1060 }
1061 }
1062 return 0;
1063 }
1064 /* }}} */
1065
php_mb_rfc1867_substring_conf(const zend_encoding * encoding,char * start,int len,char quote TSRMLS_DC)1066 static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, int len, char quote TSRMLS_DC)
1067 {
1068 char *result = emalloc(len + 2);
1069 char *resp = result;
1070 int i;
1071
1072 for (i = 0; i < len && start[i] != quote; ++i) {
1073 if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
1074 *resp++ = start[++i];
1075 } else {
1076 size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
1077
1078 while (j-- > 0 && i < len) {
1079 *resp++ = start[i++];
1080 }
1081 --i;
1082 }
1083 }
1084
1085 *resp = '\0';
1086 return result;
1087 }
1088
php_mb_rfc1867_getword(const zend_encoding * encoding,char ** line,char stop TSRMLS_DC)1089 static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop TSRMLS_DC) /* {{{ */
1090 {
1091 char *pos = *line, quote;
1092 char *res;
1093
1094 while (*pos && *pos != stop) {
1095 if ((quote = *pos) == '"' || quote == '\'') {
1096 ++pos;
1097 while (*pos && *pos != quote) {
1098 if (*pos == '\\' && pos[1] && pos[1] == quote) {
1099 pos += 2;
1100 } else {
1101 ++pos;
1102 }
1103 }
1104 if (*pos) {
1105 ++pos;
1106 }
1107 } else {
1108 pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1109
1110 }
1111 }
1112 if (*pos == '\0') {
1113 res = estrdup(*line);
1114 *line += strlen(*line);
1115 return res;
1116 }
1117
1118 res = estrndup(*line, pos - *line);
1119
1120 while (*pos == stop) {
1121 pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1122 }
1123
1124 *line = pos;
1125 return res;
1126 }
1127 /* }}} */
1128
php_mb_rfc1867_getword_conf(const zend_encoding * encoding,char * str TSRMLS_DC)1129 static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str TSRMLS_DC) /* {{{ */
1130 {
1131 while (*str && isspace(*(unsigned char *)str)) {
1132 ++str;
1133 }
1134
1135 if (!*str) {
1136 return estrdup("");
1137 }
1138
1139 if (*str == '"' || *str == '\'') {
1140 char quote = *str;
1141
1142 str++;
1143 return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote TSRMLS_CC);
1144 } else {
1145 char *strend = str;
1146
1147 while (*strend && !isspace(*(unsigned char *)strend)) {
1148 ++strend;
1149 }
1150 return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0 TSRMLS_CC);
1151 }
1152 }
1153 /* }}} */
1154
php_mb_rfc1867_basename(const zend_encoding * encoding,char * filename TSRMLS_DC)1155 static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename TSRMLS_DC) /* {{{ */
1156 {
1157 char *s, *s2;
1158 const size_t filename_len = strlen(filename);
1159
1160 /* The \ check should technically be needed for win32 systems only where
1161 * it is a valid path separator. However, IE in all it's wisdom always sends
1162 * the full path of the file on the user's filesystem, which means that unless
1163 * the user does basename() they get a bogus file name. Until IE's user base drops
1164 * to nill or problem is fixed this code must remain enabled for all systems. */
1165 s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
1166 s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
1167
1168 if (s && s2) {
1169 if (s > s2) {
1170 return ++s;
1171 } else {
1172 return ++s2;
1173 }
1174 } else if (s) {
1175 return ++s;
1176 } else if (s2) {
1177 return ++s2;
1178 } else {
1179 return filename;
1180 }
1181 }
1182 /* }}} */
1183
1184 /* {{{ php.ini directive handler */
1185 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
PHP_INI_MH(OnUpdate_mbstring_language)1186 static PHP_INI_MH(OnUpdate_mbstring_language)
1187 {
1188 enum mbfl_no_language no_language;
1189
1190 no_language = mbfl_name2no_language(new_value);
1191 if (no_language == mbfl_no_language_invalid) {
1192 MBSTRG(language) = mbfl_no_language_neutral;
1193 return FAILURE;
1194 }
1195 MBSTRG(language) = no_language;
1196 php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
1197 return SUCCESS;
1198 }
1199 /* }}} */
1200
1201 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
PHP_INI_MH(OnUpdate_mbstring_detect_order)1202 static PHP_INI_MH(OnUpdate_mbstring_detect_order)
1203 {
1204 const mbfl_encoding **list;
1205 size_t size;
1206
1207 if (!new_value) {
1208 if (MBSTRG(detect_order_list)) {
1209 pefree(MBSTRG(detect_order_list), 1);
1210 }
1211 MBSTRG(detect_order_list) = NULL;
1212 MBSTRG(detect_order_list_size) = 0;
1213 return SUCCESS;
1214 }
1215
1216 if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1217 return FAILURE;
1218 }
1219
1220 if (MBSTRG(detect_order_list)) {
1221 pefree(MBSTRG(detect_order_list), 1);
1222 }
1223 MBSTRG(detect_order_list) = list;
1224 MBSTRG(detect_order_list_size) = size;
1225 return SUCCESS;
1226 }
1227 /* }}} */
1228
1229 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
PHP_INI_MH(OnUpdate_mbstring_http_input)1230 static PHP_INI_MH(OnUpdate_mbstring_http_input)
1231 {
1232 const mbfl_encoding **list;
1233 size_t size;
1234
1235 if (!new_value) {
1236 if (MBSTRG(http_input_list)) {
1237 pefree(MBSTRG(http_input_list), 1);
1238 }
1239 MBSTRG(http_input_list) = NULL;
1240 MBSTRG(http_input_list_size) = 0;
1241 return SUCCESS;
1242 }
1243
1244 if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1245 return FAILURE;
1246 }
1247
1248 if (MBSTRG(http_input_list)) {
1249 pefree(MBSTRG(http_input_list), 1);
1250 }
1251 MBSTRG(http_input_list) = list;
1252 MBSTRG(http_input_list_size) = size;
1253
1254 return SUCCESS;
1255 }
1256 /* }}} */
1257
1258 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
PHP_INI_MH(OnUpdate_mbstring_http_output)1259 static PHP_INI_MH(OnUpdate_mbstring_http_output)
1260 {
1261 const mbfl_encoding *encoding;
1262
1263 if (new_value == NULL || new_value_length == 0) {
1264 MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1265 MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1266 return SUCCESS;
1267 }
1268
1269 encoding = mbfl_name2encoding(new_value);
1270 if (!encoding) {
1271 MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1272 MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1273 return FAILURE;
1274 }
1275
1276 MBSTRG(http_output_encoding) = encoding;
1277 MBSTRG(current_http_output_encoding) = encoding;
1278 return SUCCESS;
1279 }
1280 /* }}} */
1281
1282 /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
_php_mb_ini_mbstring_internal_encoding_set(const char * new_value,uint new_value_length TSRMLS_DC)1283 int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
1284 {
1285 const mbfl_encoding *encoding;
1286
1287 if (!new_value || new_value_length == 0 || !(encoding = mbfl_name2encoding(new_value))) {
1288 switch (MBSTRG(language)) {
1289 case mbfl_no_language_uni:
1290 encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
1291 break;
1292 case mbfl_no_language_japanese:
1293 encoding = mbfl_no2encoding(mbfl_no_encoding_euc_jp);
1294 break;
1295 case mbfl_no_language_korean:
1296 encoding = mbfl_no2encoding(mbfl_no_encoding_euc_kr);
1297 break;
1298 case mbfl_no_language_simplified_chinese:
1299 encoding = mbfl_no2encoding(mbfl_no_encoding_euc_cn);
1300 break;
1301 case mbfl_no_language_traditional_chinese:
1302 encoding = mbfl_no2encoding(mbfl_no_encoding_euc_tw);
1303 break;
1304 case mbfl_no_language_russian:
1305 encoding = mbfl_no2encoding(mbfl_no_encoding_koi8r);
1306 break;
1307 case mbfl_no_language_german:
1308 encoding = mbfl_no2encoding(mbfl_no_encoding_8859_15);
1309 break;
1310 case mbfl_no_language_armenian:
1311 encoding = mbfl_no2encoding(mbfl_no_encoding_armscii8);
1312 break;
1313 case mbfl_no_language_turkish:
1314 encoding = mbfl_no2encoding(mbfl_no_encoding_8859_9);
1315 break;
1316 default:
1317 encoding = mbfl_no2encoding(mbfl_no_encoding_8859_1);
1318 break;
1319 }
1320 }
1321 MBSTRG(internal_encoding) = encoding;
1322 MBSTRG(current_internal_encoding) = encoding;
1323 #if HAVE_MBREGEX
1324 {
1325 const char *enc_name = new_value;
1326 if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
1327 /* falls back to EUC-JP if an unknown encoding name is given */
1328 enc_name = "EUC-JP";
1329 php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
1330 }
1331 php_mb_regex_set_mbctype(new_value TSRMLS_CC);
1332 }
1333 #endif
1334 return SUCCESS;
1335 }
1336 /* }}} */
1337
1338 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
PHP_INI_MH(OnUpdate_mbstring_internal_encoding)1339 static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1340 {
1341 if (OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC) == FAILURE) {
1342 return FAILURE;
1343 }
1344 if (stage == PHP_INI_STAGE_STARTUP || stage == PHP_INI_STAGE_SHUTDOWN
1345 || stage == PHP_INI_STAGE_RUNTIME) {
1346 return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
1347 } else {
1348 /* the corresponding mbstring globals needs to be set according to the
1349 * ini value in the later stage because it never falls back to the
1350 * default value if 1. no value for mbstring.internal_encoding is given,
1351 * 2. mbstring.language directive is processed in per-dir or runtime
1352 * context and 3. call to the handler for mbstring.language is done
1353 * after mbstring.internal_encoding is handled. */
1354 return SUCCESS;
1355 }
1356 }
1357 /* }}} */
1358
1359 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
PHP_INI_MH(OnUpdate_mbstring_substitute_character)1360 static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1361 {
1362 int c;
1363 char *endptr = NULL;
1364
1365 if (new_value != NULL) {
1366 if (strcasecmp("none", new_value) == 0) {
1367 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1368 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1369 } else if (strcasecmp("long", new_value) == 0) {
1370 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1371 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1372 } else if (strcasecmp("entity", new_value) == 0) {
1373 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1374 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1375 } else {
1376 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1377 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1378 if (new_value_length >0) {
1379 c = strtol(new_value, &endptr, 0);
1380 if (*endptr == '\0') {
1381 MBSTRG(filter_illegal_substchar) = c;
1382 MBSTRG(current_filter_illegal_substchar) = c;
1383 }
1384 }
1385 }
1386 } else {
1387 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1388 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1389 MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
1390 MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
1391 }
1392
1393 return SUCCESS;
1394 }
1395 /* }}} */
1396
1397 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
PHP_INI_MH(OnUpdate_mbstring_encoding_translation)1398 static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1399 {
1400 if (new_value == NULL) {
1401 return FAILURE;
1402 }
1403
1404 OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
1405
1406 if (MBSTRG(encoding_translation)) {
1407 sapi_unregister_post_entry(php_post_entries TSRMLS_CC);
1408 sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1409 } else {
1410 sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC);
1411 sapi_register_post_entries(php_post_entries TSRMLS_CC);
1412 }
1413
1414 return SUCCESS;
1415 }
1416 /* }}} */
1417
1418 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)1419 static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1420 {
1421 zval tmp;
1422 void *re = NULL;
1423
1424 if (!new_value) {
1425 new_value = entry->orig_value;
1426 new_value_length = entry->orig_value_length;
1427 }
1428 php_trim(new_value, new_value_length, NULL, 0, &tmp, 3 TSRMLS_CC);
1429
1430 if (Z_STRLEN(tmp) > 0) {
1431 if (!(re = _php_mb_compile_regex(Z_STRVAL(tmp) TSRMLS_CC))) {
1432 zval_dtor(&tmp);
1433 return FAILURE;
1434 }
1435 }
1436
1437 if (MBSTRG(http_output_conv_mimetypes)) {
1438 _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1439 }
1440
1441 MBSTRG(http_output_conv_mimetypes) = re;
1442
1443 zval_dtor(&tmp);
1444 return SUCCESS;
1445 }
1446 /* }}} */
1447 /* }}} */
1448
1449 /* {{{ php.ini directive registration */
1450 PHP_INI_BEGIN()
1451 PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1452 PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1453 PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input)
1454 PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output)
1455 STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
1456 PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1457 STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1458 PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1459
1460 STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1461 PHP_INI_SYSTEM | PHP_INI_PERDIR,
1462 OnUpdate_mbstring_encoding_translation,
1463 encoding_translation, zend_mbstring_globals, mbstring_globals)
1464 PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1465 "^(text/|application/xhtml\\+xml)",
1466 PHP_INI_ALL,
1467 OnUpdate_mbstring_http_output_conv_mimetypes)
1468
1469 STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1470 PHP_INI_ALL,
1471 OnUpdateLong,
1472 strict_detection, zend_mbstring_globals, mbstring_globals)
PHP_INI_END()1473 PHP_INI_END()
1474 /* }}} */
1475
1476 /* {{{ module global initialize handler */
1477 static PHP_GINIT_FUNCTION(mbstring)
1478 {
1479 mbstring_globals->language = mbfl_no_language_uni;
1480 mbstring_globals->internal_encoding = NULL;
1481 mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1482 mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
1483 mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
1484 mbstring_globals->http_input_identify = NULL;
1485 mbstring_globals->http_input_identify_get = NULL;
1486 mbstring_globals->http_input_identify_post = NULL;
1487 mbstring_globals->http_input_identify_cookie = NULL;
1488 mbstring_globals->http_input_identify_string = NULL;
1489 mbstring_globals->http_input_list = NULL;
1490 mbstring_globals->http_input_list_size = 0;
1491 mbstring_globals->detect_order_list = NULL;
1492 mbstring_globals->detect_order_list_size = 0;
1493 mbstring_globals->current_detect_order_list = NULL;
1494 mbstring_globals->current_detect_order_list_size = 0;
1495 mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1496 mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1497 mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1498 mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */
1499 mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1500 mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */
1501 mbstring_globals->illegalchars = 0;
1502 mbstring_globals->func_overload = 0;
1503 mbstring_globals->encoding_translation = 0;
1504 mbstring_globals->strict_detection = 0;
1505 mbstring_globals->outconv = NULL;
1506 mbstring_globals->http_output_conv_mimetypes = NULL;
1507 #if HAVE_MBREGEX
1508 mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(TSRMLS_C);
1509 #endif
1510 }
1511 /* }}} */
1512
1513 /* {{{ PHP_GSHUTDOWN_FUNCTION */
PHP_GSHUTDOWN_FUNCTION(mbstring)1514 static PHP_GSHUTDOWN_FUNCTION(mbstring)
1515 {
1516 if (mbstring_globals->http_input_list) {
1517 free(mbstring_globals->http_input_list);
1518 }
1519 if (mbstring_globals->detect_order_list) {
1520 free(mbstring_globals->detect_order_list);
1521 }
1522 if (mbstring_globals->http_output_conv_mimetypes) {
1523 _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1524 }
1525 #if HAVE_MBREGEX
1526 php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC);
1527 #endif
1528 }
1529 /* }}} */
1530
1531 /* {{{ PHP_MINIT_FUNCTION(mbstring) */
PHP_MINIT_FUNCTION(mbstring)1532 PHP_MINIT_FUNCTION(mbstring)
1533 {
1534 __mbfl_allocators = &_php_mb_allocators;
1535
1536 REGISTER_INI_ENTRIES();
1537
1538 /* This is a global handler. Should not be set in a per-request handler. */
1539 sapi_register_treat_data(mbstr_treat_data TSRMLS_CC);
1540
1541 /* Post handlers are stored in the thread-local context. */
1542 if (MBSTRG(encoding_translation)) {
1543 sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1544 }
1545
1546 REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1547 REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1548 REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1549
1550 REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1551 REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1552 REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1553
1554 #if HAVE_MBREGEX
1555 PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1556 #endif
1557
1558 if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions TSRMLS_CC)) {
1559 return FAILURE;
1560 }
1561
1562 php_rfc1867_set_multibyte_callbacks(
1563 php_mb_encoding_translation,
1564 php_mb_gpc_get_detect_order,
1565 php_mb_gpc_set_input_encoding,
1566 php_mb_rfc1867_getword,
1567 php_mb_rfc1867_getword_conf,
1568 php_mb_rfc1867_basename);
1569
1570 return SUCCESS;
1571 }
1572 /* }}} */
1573
1574 /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
PHP_MSHUTDOWN_FUNCTION(mbstring)1575 PHP_MSHUTDOWN_FUNCTION(mbstring)
1576 {
1577 UNREGISTER_INI_ENTRIES();
1578
1579 #if HAVE_MBREGEX
1580 PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1581 #endif
1582
1583 return SUCCESS;
1584 }
1585 /* }}} */
1586
1587 /* {{{ PHP_RINIT_FUNCTION(mbstring) */
PHP_RINIT_FUNCTION(mbstring)1588 PHP_RINIT_FUNCTION(mbstring)
1589 {
1590 zend_function *func, *orig;
1591 const struct mb_overload_def *p;
1592
1593 MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1594 MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1595 MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1596 MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1597
1598 MBSTRG(illegalchars) = 0;
1599
1600 php_mb_populate_current_detect_order_list(TSRMLS_C);
1601
1602 /* override original function. */
1603 if (MBSTRG(func_overload)){
1604 p = &(mb_ovld[0]);
1605
1606 while (p->type > 0) {
1607 if ((MBSTRG(func_overload) & p->type) == p->type &&
1608 zend_hash_find(EG(function_table), p->save_func,
1609 strlen(p->save_func)+1, (void **)&orig) != SUCCESS) {
1610
1611 zend_hash_find(EG(function_table), p->ovld_func, strlen(p->ovld_func)+1 , (void **)&func);
1612
1613 if (zend_hash_find(EG(function_table), p->orig_func, strlen(p->orig_func)+1, (void **)&orig) != SUCCESS) {
1614 php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1615 return FAILURE;
1616 } else {
1617 zend_hash_add(EG(function_table), p->save_func, strlen(p->save_func)+1, orig, sizeof(zend_function), NULL);
1618
1619 if (zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, func, sizeof(zend_function),
1620 NULL) == FAILURE) {
1621 php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
1622 return FAILURE;
1623 }
1624 }
1625 }
1626 p++;
1627 }
1628 }
1629 #if HAVE_MBREGEX
1630 PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1631 #endif
1632 zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding) TSRMLS_CC);
1633
1634 return SUCCESS;
1635 }
1636 /* }}} */
1637
1638 /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
PHP_RSHUTDOWN_FUNCTION(mbstring)1639 PHP_RSHUTDOWN_FUNCTION(mbstring)
1640 {
1641 const struct mb_overload_def *p;
1642 zend_function *orig;
1643
1644 if (MBSTRG(current_detect_order_list) != NULL) {
1645 efree(MBSTRG(current_detect_order_list));
1646 MBSTRG(current_detect_order_list) = NULL;
1647 MBSTRG(current_detect_order_list_size) = 0;
1648 }
1649 if (MBSTRG(outconv) != NULL) {
1650 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1651 mbfl_buffer_converter_delete(MBSTRG(outconv));
1652 MBSTRG(outconv) = NULL;
1653 }
1654
1655 /* clear http input identification. */
1656 MBSTRG(http_input_identify) = NULL;
1657 MBSTRG(http_input_identify_post) = NULL;
1658 MBSTRG(http_input_identify_get) = NULL;
1659 MBSTRG(http_input_identify_cookie) = NULL;
1660 MBSTRG(http_input_identify_string) = NULL;
1661
1662 /* clear overloaded function. */
1663 if (MBSTRG(func_overload)){
1664 p = &(mb_ovld[0]);
1665 while (p->type > 0) {
1666 if ((MBSTRG(func_overload) & p->type) == p->type &&
1667 zend_hash_find(EG(function_table), p->save_func,
1668 strlen(p->save_func)+1, (void **)&orig) == SUCCESS) {
1669
1670 zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, orig, sizeof(zend_function), NULL);
1671 zend_hash_del(EG(function_table), p->save_func, strlen(p->save_func)+1);
1672 }
1673 p++;
1674 }
1675 }
1676
1677 #if HAVE_MBREGEX
1678 PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1679 #endif
1680
1681 return SUCCESS;
1682 }
1683 /* }}} */
1684
1685 /* {{{ PHP_MINFO_FUNCTION(mbstring) */
PHP_MINFO_FUNCTION(mbstring)1686 PHP_MINFO_FUNCTION(mbstring)
1687 {
1688 php_info_print_table_start();
1689 php_info_print_table_row(2, "Multibyte Support", "enabled");
1690 php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1691 php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1692 {
1693 char tmp[256];
1694 snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1695 php_info_print_table_row(2, "libmbfl version", tmp);
1696 }
1697 php_info_print_table_end();
1698
1699 php_info_print_table_start();
1700 php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1701 php_info_print_table_end();
1702
1703 #if HAVE_MBREGEX
1704 PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1705 #endif
1706
1707 DISPLAY_INI_ENTRIES();
1708 }
1709 /* }}} */
1710
1711 /* {{{ proto string mb_language([string language])
1712 Sets the current language or Returns the current language as a string */
PHP_FUNCTION(mb_language)1713 PHP_FUNCTION(mb_language)
1714 {
1715 char *name = NULL;
1716 int name_len = 0;
1717
1718 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1719 return;
1720 }
1721 if (name == NULL) {
1722 RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)), 1);
1723 } else {
1724 if (FAILURE == zend_alter_ini_entry(
1725 "mbstring.language", sizeof("mbstring.language"),
1726 name, name_len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1727 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name);
1728 RETVAL_FALSE;
1729 } else {
1730 RETVAL_TRUE;
1731 }
1732 }
1733 }
1734 /* }}} */
1735
1736 /* {{{ proto string mb_internal_encoding([string encoding])
1737 Sets the current internal encoding or Returns the current internal encoding as a string */
PHP_FUNCTION(mb_internal_encoding)1738 PHP_FUNCTION(mb_internal_encoding)
1739 {
1740 const char *name = NULL;
1741 int name_len;
1742 const mbfl_encoding *encoding;
1743
1744 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1745 RETURN_FALSE;
1746 }
1747 if (name == NULL) {
1748 name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
1749 if (name != NULL) {
1750 RETURN_STRING(name, 1);
1751 } else {
1752 RETURN_FALSE;
1753 }
1754 } else {
1755 encoding = mbfl_name2encoding(name);
1756 if (!encoding) {
1757 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1758 RETURN_FALSE;
1759 } else {
1760 MBSTRG(current_internal_encoding) = encoding;
1761 RETURN_TRUE;
1762 }
1763 }
1764 }
1765 /* }}} */
1766
1767 /* {{{ proto mixed mb_http_input([string type])
1768 Returns the input encoding */
PHP_FUNCTION(mb_http_input)1769 PHP_FUNCTION(mb_http_input)
1770 {
1771 char *typ = NULL;
1772 int typ_len;
1773 int retname;
1774 char *list, *temp;
1775 const mbfl_encoding *result = NULL;
1776
1777 retname = 1;
1778 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
1779 RETURN_FALSE;
1780 }
1781 if (typ == NULL) {
1782 result = MBSTRG(http_input_identify);
1783 } else {
1784 switch (*typ) {
1785 case 'G':
1786 case 'g':
1787 result = MBSTRG(http_input_identify_get);
1788 break;
1789 case 'P':
1790 case 'p':
1791 result = MBSTRG(http_input_identify_post);
1792 break;
1793 case 'C':
1794 case 'c':
1795 result = MBSTRG(http_input_identify_cookie);
1796 break;
1797 case 'S':
1798 case 's':
1799 result = MBSTRG(http_input_identify_string);
1800 break;
1801 case 'I':
1802 case 'i':
1803 {
1804 const mbfl_encoding **entry = MBSTRG(http_input_list);
1805 const size_t n = MBSTRG(http_input_list_size);
1806 size_t i;
1807 array_init(return_value);
1808 for (i = 0; i < n; i++) {
1809 add_next_index_string(return_value, (*entry)->name, 1);
1810 entry++;
1811 }
1812 retname = 0;
1813 }
1814 break;
1815 case 'L':
1816 case 'l':
1817 {
1818 const mbfl_encoding **entry = MBSTRG(http_input_list);
1819 const size_t n = MBSTRG(http_input_list_size);
1820 size_t i;
1821 list = NULL;
1822 for (i = 0; i < n; i++) {
1823 if (list) {
1824 temp = list;
1825 spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
1826 efree(temp);
1827 if (!list) {
1828 break;
1829 }
1830 } else {
1831 list = estrdup((*entry)->name);
1832 }
1833 entry++;
1834 }
1835 }
1836 if (!list) {
1837 RETURN_FALSE;
1838 }
1839 RETVAL_STRING(list, 0);
1840 retname = 0;
1841 break;
1842 default:
1843 result = MBSTRG(http_input_identify);
1844 break;
1845 }
1846 }
1847
1848 if (retname) {
1849 if (result) {
1850 RETVAL_STRING(result->name, 1);
1851 } else {
1852 RETVAL_FALSE;
1853 }
1854 }
1855 }
1856 /* }}} */
1857
1858 /* {{{ proto string mb_http_output([string encoding])
1859 Sets the current output_encoding or returns the current output_encoding as a string */
PHP_FUNCTION(mb_http_output)1860 PHP_FUNCTION(mb_http_output)
1861 {
1862 const char *name = NULL;
1863 int name_len;
1864 const mbfl_encoding *encoding;
1865
1866 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) {
1867 RETURN_FALSE;
1868 }
1869
1870 if (name == NULL) {
1871 name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
1872 if (name != NULL) {
1873 RETURN_STRING(name, 1);
1874 } else {
1875 RETURN_FALSE;
1876 }
1877 } else {
1878 encoding = mbfl_name2encoding(name);
1879 if (!encoding) {
1880 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1881 RETURN_FALSE;
1882 } else {
1883 MBSTRG(current_http_output_encoding) = encoding;
1884 RETURN_TRUE;
1885 }
1886 }
1887 }
1888 /* }}} */
1889
1890 /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1891 Sets the current detect_order or Return the current detect_order as a array */
PHP_FUNCTION(mb_detect_order)1892 PHP_FUNCTION(mb_detect_order)
1893 {
1894 zval **arg1 = NULL;
1895
1896 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1897 return;
1898 }
1899
1900 if (!arg1) {
1901 size_t i;
1902 size_t n = MBSTRG(current_detect_order_list_size);
1903 const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1904 array_init(return_value);
1905 for (i = 0; i < n; i++) {
1906 add_next_index_string(return_value, (*entry)->name, 1);
1907 entry++;
1908 }
1909 } else {
1910 const mbfl_encoding **list = NULL;
1911 size_t size = 0;
1912 switch (Z_TYPE_PP(arg1)) {
1913 case IS_ARRAY:
1914 if (FAILURE == php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
1915 if (list) {
1916 efree(list);
1917 }
1918 RETURN_FALSE;
1919 }
1920 break;
1921 default:
1922 convert_to_string_ex(arg1);
1923 if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
1924 if (list) {
1925 efree(list);
1926 }
1927 RETURN_FALSE;
1928 }
1929 break;
1930 }
1931
1932 if (list == NULL) {
1933 RETURN_FALSE;
1934 }
1935
1936 if (MBSTRG(current_detect_order_list)) {
1937 efree(MBSTRG(current_detect_order_list));
1938 }
1939 MBSTRG(current_detect_order_list) = list;
1940 MBSTRG(current_detect_order_list_size) = size;
1941 RETURN_TRUE;
1942 }
1943 }
1944 /* }}} */
1945
1946 /* {{{ proto mixed mb_substitute_character([mixed substchar])
1947 Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)1948 PHP_FUNCTION(mb_substitute_character)
1949 {
1950 zval **arg1 = NULL;
1951
1952 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1953 return;
1954 }
1955
1956 if (!arg1) {
1957 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
1958 RETURN_STRING("none", 1);
1959 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1960 RETURN_STRING("long", 1);
1961 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1962 RETURN_STRING("entity", 1);
1963 } else {
1964 RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1965 }
1966 } else {
1967 RETVAL_TRUE;
1968
1969 switch (Z_TYPE_PP(arg1)) {
1970 case IS_STRING:
1971 if (strncasecmp("none", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1972 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1973 } else if (strncasecmp("long", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1974 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1975 } else if (strncasecmp("entity", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1976 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1977 } else {
1978 convert_to_long_ex(arg1);
1979
1980 if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
1981 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1982 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
1983 } else {
1984 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
1985 RETURN_FALSE;
1986 }
1987 }
1988 break;
1989 default:
1990 convert_to_long_ex(arg1);
1991 if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
1992 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1993 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
1994 } else {
1995 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
1996 RETURN_FALSE;
1997 }
1998 break;
1999 }
2000 }
2001 }
2002 /* }}} */
2003
2004 /* {{{ proto string mb_preferred_mime_name(string encoding)
2005 Return the preferred MIME name (charset) as a string */
PHP_FUNCTION(mb_preferred_mime_name)2006 PHP_FUNCTION(mb_preferred_mime_name)
2007 {
2008 enum mbfl_no_encoding no_encoding;
2009 char *name = NULL;
2010 int name_len;
2011
2012 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
2013 return;
2014 } else {
2015 no_encoding = mbfl_name2no_encoding(name);
2016 if (no_encoding == mbfl_no_encoding_invalid) {
2017 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
2018 RETVAL_FALSE;
2019 } else {
2020 const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
2021 if (preferred_name == NULL || *preferred_name == '\0') {
2022 php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
2023 RETVAL_FALSE;
2024 } else {
2025 RETVAL_STRING((char *)preferred_name, 1);
2026 }
2027 }
2028 }
2029 }
2030 /* }}} */
2031
2032 #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
2033 #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
2034
2035 /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
2036 Parses GET/POST/COOKIE data and sets global variables */
PHP_FUNCTION(mb_parse_str)2037 PHP_FUNCTION(mb_parse_str)
2038 {
2039 zval *track_vars_array = NULL;
2040 char *encstr = NULL;
2041 int encstr_len;
2042 php_mb_encoding_handler_info_t info;
2043 const mbfl_encoding *detected;
2044
2045 track_vars_array = NULL;
2046 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
2047 return;
2048 }
2049
2050 if (track_vars_array != NULL) {
2051 /* Clear out the array */
2052 zval_dtor(track_vars_array);
2053 array_init(track_vars_array);
2054 }
2055
2056 encstr = estrndup(encstr, encstr_len);
2057
2058 info.data_type = PARSE_STRING;
2059 info.separator = PG(arg_separator).input;
2060 info.report_errors = 1;
2061 info.to_encoding = MBSTRG(current_internal_encoding);
2062 info.to_language = MBSTRG(language);
2063 info.from_encodings = MBSTRG(http_input_list);
2064 info.num_from_encodings = MBSTRG(http_input_list_size);
2065 info.from_language = MBSTRG(language);
2066
2067 if (track_vars_array != NULL) {
2068 detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
2069 } else {
2070 zval tmp;
2071 if (!EG(active_symbol_table)) {
2072 zend_rebuild_symbol_table(TSRMLS_C);
2073 }
2074 Z_ARRVAL(tmp) = EG(active_symbol_table);
2075 detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr TSRMLS_CC);
2076 }
2077
2078 MBSTRG(http_input_identify) = detected;
2079
2080 RETVAL_BOOL(detected);
2081
2082 if (encstr != NULL) efree(encstr);
2083 }
2084 /* }}} */
2085
2086 /* {{{ proto string mb_output_handler(string contents, int status)
2087 Returns string in output buffer converted to the http_output encoding */
PHP_FUNCTION(mb_output_handler)2088 PHP_FUNCTION(mb_output_handler)
2089 {
2090 char *arg_string;
2091 int arg_string_len;
2092 long arg_status;
2093 mbfl_string string, result;
2094 const char *charset;
2095 char *p;
2096 const mbfl_encoding *encoding;
2097 int last_feed, len;
2098 unsigned char send_text_mimetype = 0;
2099 char *s, *mimetype = NULL;
2100
2101 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
2102 return;
2103 }
2104
2105 encoding = MBSTRG(current_http_output_encoding);
2106
2107 /* start phase only */
2108 if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
2109 /* delete the converter just in case. */
2110 if (MBSTRG(outconv)) {
2111 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2112 mbfl_buffer_converter_delete(MBSTRG(outconv));
2113 MBSTRG(outconv) = NULL;
2114 }
2115 if (encoding == &mbfl_encoding_pass) {
2116 RETURN_STRINGL(arg_string, arg_string_len, 1);
2117 }
2118
2119 /* analyze mime type */
2120 if (SG(sapi_headers).mimetype &&
2121 _php_mb_match_regex(
2122 MBSTRG(http_output_conv_mimetypes),
2123 SG(sapi_headers).mimetype,
2124 strlen(SG(sapi_headers).mimetype))) {
2125 if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
2126 mimetype = estrdup(SG(sapi_headers).mimetype);
2127 } else {
2128 mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
2129 }
2130 send_text_mimetype = 1;
2131 } else if (SG(sapi_headers).send_default_content_type) {
2132 mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
2133 }
2134
2135 /* if content-type is not yet set, set it and activate the converter */
2136 if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
2137 charset = encoding->mime_name;
2138 if (charset) {
2139 len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
2140 if (sapi_add_header(p, len, 0) != FAILURE) {
2141 SG(sapi_headers).send_default_content_type = 0;
2142 }
2143 }
2144 /* activate the converter */
2145 MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0);
2146 if (send_text_mimetype){
2147 efree(mimetype);
2148 }
2149 }
2150 }
2151
2152 /* just return if the converter is not activated. */
2153 if (MBSTRG(outconv) == NULL) {
2154 RETURN_STRINGL(arg_string, arg_string_len, 1);
2155 }
2156
2157 /* flag */
2158 last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
2159 /* mode */
2160 mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
2161 mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
2162
2163 /* feed the string */
2164 mbfl_string_init(&string);
2165 string.no_language = MBSTRG(language);
2166 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2167 string.val = (unsigned char *)arg_string;
2168 string.len = arg_string_len;
2169 mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2170 if (last_feed) {
2171 mbfl_buffer_converter_flush(MBSTRG(outconv));
2172 }
2173 /* get the converter output, and return it */
2174 mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2175 RETVAL_STRINGL((char *)result.val, result.len, 0); /* the string is already strdup()'ed */
2176
2177 /* delete the converter if it is the last feed. */
2178 if (last_feed) {
2179 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2180 mbfl_buffer_converter_delete(MBSTRG(outconv));
2181 MBSTRG(outconv) = NULL;
2182 }
2183 }
2184 /* }}} */
2185
2186 /* {{{ proto int mb_strlen(string str [, string encoding])
2187 Get character numbers of a string */
PHP_FUNCTION(mb_strlen)2188 PHP_FUNCTION(mb_strlen)
2189 {
2190 int n;
2191 mbfl_string string;
2192 char *enc_name = NULL;
2193 int enc_name_len;
2194
2195 mbfl_string_init(&string);
2196
2197 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2198 RETURN_FALSE;
2199 }
2200
2201 string.no_language = MBSTRG(language);
2202 if (enc_name == NULL) {
2203 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2204 } else {
2205 string.no_encoding = mbfl_name2no_encoding(enc_name);
2206 if (string.no_encoding == mbfl_no_encoding_invalid) {
2207 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2208 RETURN_FALSE;
2209 }
2210 }
2211
2212 n = mbfl_strlen(&string);
2213 if (n >= 0) {
2214 RETVAL_LONG(n);
2215 } else {
2216 RETVAL_FALSE;
2217 }
2218 }
2219 /* }}} */
2220
2221 /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2222 Find position of first occurrence of a string within another */
PHP_FUNCTION(mb_strpos)2223 PHP_FUNCTION(mb_strpos)
2224 {
2225 int n, reverse = 0;
2226 long offset;
2227 mbfl_string haystack, needle;
2228 char *enc_name = NULL;
2229 int enc_name_len;
2230
2231 mbfl_string_init(&haystack);
2232 mbfl_string_init(&needle);
2233 haystack.no_language = MBSTRG(language);
2234 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2235 needle.no_language = MBSTRG(language);
2236 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2237 offset = 0;
2238
2239 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2240 RETURN_FALSE;
2241 }
2242
2243 if (enc_name != NULL) {
2244 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2245 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2246 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2247 RETURN_FALSE;
2248 }
2249 }
2250
2251 if (offset < 0 || offset > mbfl_strlen(&haystack)) {
2252 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
2253 RETURN_FALSE;
2254 }
2255 if (needle.len == 0) {
2256 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2257 RETURN_FALSE;
2258 }
2259
2260 n = mbfl_strpos(&haystack, &needle, offset, reverse);
2261 if (n >= 0) {
2262 RETVAL_LONG(n);
2263 } else {
2264 switch (-n) {
2265 case 1:
2266 break;
2267 case 2:
2268 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length");
2269 break;
2270 case 4:
2271 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error");
2272 break;
2273 case 8:
2274 php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty");
2275 break;
2276 default:
2277 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos");
2278 break;
2279 }
2280 RETVAL_FALSE;
2281 }
2282 }
2283 /* }}} */
2284
2285 /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2286 Find position of last occurrence of a string within another */
PHP_FUNCTION(mb_strrpos)2287 PHP_FUNCTION(mb_strrpos)
2288 {
2289 int n;
2290 mbfl_string haystack, needle;
2291 char *enc_name = NULL;
2292 int enc_name_len;
2293 zval **zoffset = NULL;
2294 long offset = 0, str_flg;
2295 char *enc_name2 = NULL;
2296 int enc_name_len2;
2297
2298 mbfl_string_init(&haystack);
2299 mbfl_string_init(&needle);
2300 haystack.no_language = MBSTRG(language);
2301 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2302 needle.no_language = MBSTRG(language);
2303 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2304
2305 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2306 RETURN_FALSE;
2307 }
2308
2309 if (zoffset) {
2310 if (Z_TYPE_PP(zoffset) == IS_STRING) {
2311 enc_name2 = Z_STRVAL_PP(zoffset);
2312 enc_name_len2 = Z_STRLEN_PP(zoffset);
2313 str_flg = 1;
2314
2315 if (enc_name2 != NULL) {
2316 switch (*enc_name2) {
2317 case '0':
2318 case '1':
2319 case '2':
2320 case '3':
2321 case '4':
2322 case '5':
2323 case '6':
2324 case '7':
2325 case '8':
2326 case '9':
2327 case ' ':
2328 case '-':
2329 case '.':
2330 break;
2331 default :
2332 str_flg = 0;
2333 break;
2334 }
2335 }
2336
2337 if (str_flg) {
2338 convert_to_long_ex(zoffset);
2339 offset = Z_LVAL_PP(zoffset);
2340 } else {
2341 enc_name = enc_name2;
2342 enc_name_len = enc_name_len2;
2343 }
2344 } else {
2345 convert_to_long_ex(zoffset);
2346 offset = Z_LVAL_PP(zoffset);
2347 }
2348 }
2349
2350 if (enc_name != NULL) {
2351 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2352 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2353 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2354 RETURN_FALSE;
2355 }
2356 }
2357
2358 if (haystack.len <= 0) {
2359 RETURN_FALSE;
2360 }
2361 if (needle.len <= 0) {
2362 RETURN_FALSE;
2363 }
2364
2365 {
2366 int haystack_char_len = mbfl_strlen(&haystack);
2367 if ((offset > 0 && offset > haystack_char_len) ||
2368 (offset < 0 && -offset > haystack_char_len)) {
2369 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
2370 RETURN_FALSE;
2371 }
2372 }
2373
2374 n = mbfl_strpos(&haystack, &needle, offset, 1);
2375 if (n >= 0) {
2376 RETVAL_LONG(n);
2377 } else {
2378 RETVAL_FALSE;
2379 }
2380 }
2381 /* }}} */
2382
2383 /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2384 Finds position of first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stripos)2385 PHP_FUNCTION(mb_stripos)
2386 {
2387 int n;
2388 long offset;
2389 mbfl_string haystack, needle;
2390 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2391 int from_encoding_len;
2392 n = -1;
2393 offset = 0;
2394
2395 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2396 RETURN_FALSE;
2397 }
2398 if (needle.len == 0) {
2399 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2400 RETURN_FALSE;
2401 }
2402 n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2403
2404 if (n >= 0) {
2405 RETVAL_LONG(n);
2406 } else {
2407 RETVAL_FALSE;
2408 }
2409 }
2410 /* }}} */
2411
2412 /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2413 Finds position of last occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_strripos)2414 PHP_FUNCTION(mb_strripos)
2415 {
2416 int n;
2417 long offset;
2418 mbfl_string haystack, needle;
2419 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2420 int from_encoding_len;
2421 n = -1;
2422 offset = 0;
2423
2424 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2425 RETURN_FALSE;
2426 }
2427
2428 n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2429
2430 if (n >= 0) {
2431 RETVAL_LONG(n);
2432 } else {
2433 RETVAL_FALSE;
2434 }
2435 }
2436 /* }}} */
2437
2438 /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2439 Finds first occurrence of a string within another */
PHP_FUNCTION(mb_strstr)2440 PHP_FUNCTION(mb_strstr)
2441 {
2442 int n, len, mblen;
2443 mbfl_string haystack, needle, result, *ret = NULL;
2444 char *enc_name = NULL;
2445 int enc_name_len;
2446 zend_bool part = 0;
2447
2448 mbfl_string_init(&haystack);
2449 mbfl_string_init(&needle);
2450 haystack.no_language = MBSTRG(language);
2451 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2452 needle.no_language = MBSTRG(language);
2453 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2454
2455 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2456 RETURN_FALSE;
2457 }
2458
2459 if (enc_name != NULL) {
2460 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2461 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2462 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2463 RETURN_FALSE;
2464 }
2465 }
2466
2467 if (needle.len <= 0) {
2468 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2469 RETURN_FALSE;
2470 }
2471 n = mbfl_strpos(&haystack, &needle, 0, 0);
2472 if (n >= 0) {
2473 mblen = mbfl_strlen(&haystack);
2474 if (part) {
2475 ret = mbfl_substr(&haystack, &result, 0, n);
2476 if (ret != NULL) {
2477 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2478 } else {
2479 RETVAL_FALSE;
2480 }
2481 } else {
2482 len = (mblen - n);
2483 ret = mbfl_substr(&haystack, &result, n, len);
2484 if (ret != NULL) {
2485 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2486 } else {
2487 RETVAL_FALSE;
2488 }
2489 }
2490 } else {
2491 RETVAL_FALSE;
2492 }
2493 }
2494 /* }}} */
2495
2496 /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2497 Finds the last occurrence of a character in a string within another */
PHP_FUNCTION(mb_strrchr)2498 PHP_FUNCTION(mb_strrchr)
2499 {
2500 int n, len, mblen;
2501 mbfl_string haystack, needle, result, *ret = NULL;
2502 char *enc_name = NULL;
2503 int enc_name_len;
2504 zend_bool part = 0;
2505
2506 mbfl_string_init(&haystack);
2507 mbfl_string_init(&needle);
2508 haystack.no_language = MBSTRG(language);
2509 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2510 needle.no_language = MBSTRG(language);
2511 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2512
2513 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2514 RETURN_FALSE;
2515 }
2516
2517 if (enc_name != NULL) {
2518 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2519 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2520 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2521 RETURN_FALSE;
2522 }
2523 }
2524
2525 if (haystack.len <= 0) {
2526 RETURN_FALSE;
2527 }
2528 if (needle.len <= 0) {
2529 RETURN_FALSE;
2530 }
2531 n = mbfl_strpos(&haystack, &needle, 0, 1);
2532 if (n >= 0) {
2533 mblen = mbfl_strlen(&haystack);
2534 if (part) {
2535 ret = mbfl_substr(&haystack, &result, 0, n);
2536 if (ret != NULL) {
2537 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2538 } else {
2539 RETVAL_FALSE;
2540 }
2541 } else {
2542 len = (mblen - n);
2543 ret = mbfl_substr(&haystack, &result, n, len);
2544 if (ret != NULL) {
2545 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2546 } else {
2547 RETVAL_FALSE;
2548 }
2549 }
2550 } else {
2551 RETVAL_FALSE;
2552 }
2553 }
2554 /* }}} */
2555
2556 /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2557 Finds first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stristr)2558 PHP_FUNCTION(mb_stristr)
2559 {
2560 zend_bool part = 0;
2561 unsigned int from_encoding_len, len, mblen;
2562 int n;
2563 mbfl_string haystack, needle, result, *ret = NULL;
2564 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2565 mbfl_string_init(&haystack);
2566 mbfl_string_init(&needle);
2567 haystack.no_language = MBSTRG(language);
2568 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2569 needle.no_language = MBSTRG(language);
2570 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2571
2572
2573 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2574 RETURN_FALSE;
2575 }
2576
2577 if (!needle.len) {
2578 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2579 RETURN_FALSE;
2580 }
2581
2582 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2583 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2584 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2585 RETURN_FALSE;
2586 }
2587
2588 n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2589
2590 if (n <0) {
2591 RETURN_FALSE;
2592 }
2593
2594 mblen = mbfl_strlen(&haystack);
2595
2596 if (part) {
2597 ret = mbfl_substr(&haystack, &result, 0, n);
2598 if (ret != NULL) {
2599 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2600 } else {
2601 RETVAL_FALSE;
2602 }
2603 } else {
2604 len = (mblen - n);
2605 ret = mbfl_substr(&haystack, &result, n, len);
2606 if (ret != NULL) {
2607 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2608 } else {
2609 RETVAL_FALSE;
2610 }
2611 }
2612 }
2613 /* }}} */
2614
2615 /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2616 Finds the last occurrence of a character in a string within another, case insensitive */
PHP_FUNCTION(mb_strrichr)2617 PHP_FUNCTION(mb_strrichr)
2618 {
2619 zend_bool part = 0;
2620 int n, from_encoding_len, len, mblen;
2621 mbfl_string haystack, needle, result, *ret = NULL;
2622 const char *from_encoding = MBSTRG(current_internal_encoding)->name;
2623 mbfl_string_init(&haystack);
2624 mbfl_string_init(&needle);
2625 haystack.no_language = MBSTRG(language);
2626 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2627 needle.no_language = MBSTRG(language);
2628 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2629
2630
2631 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2632 RETURN_FALSE;
2633 }
2634
2635 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2636 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2637 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2638 RETURN_FALSE;
2639 }
2640
2641 n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2642
2643 if (n <0) {
2644 RETURN_FALSE;
2645 }
2646
2647 mblen = mbfl_strlen(&haystack);
2648
2649 if (part) {
2650 ret = mbfl_substr(&haystack, &result, 0, n);
2651 if (ret != NULL) {
2652 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2653 } else {
2654 RETVAL_FALSE;
2655 }
2656 } else {
2657 len = (mblen - n);
2658 ret = mbfl_substr(&haystack, &result, n, len);
2659 if (ret != NULL) {
2660 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2661 } else {
2662 RETVAL_FALSE;
2663 }
2664 }
2665 }
2666 /* }}} */
2667
2668 /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2669 Count the number of substring occurrences */
PHP_FUNCTION(mb_substr_count)2670 PHP_FUNCTION(mb_substr_count)
2671 {
2672 int n;
2673 mbfl_string haystack, needle;
2674 char *enc_name = NULL;
2675 int enc_name_len;
2676
2677 mbfl_string_init(&haystack);
2678 mbfl_string_init(&needle);
2679 haystack.no_language = MBSTRG(language);
2680 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2681 needle.no_language = MBSTRG(language);
2682 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2683
2684 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
2685 return;
2686 }
2687
2688 if (enc_name != NULL) {
2689 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2690 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2691 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2692 RETURN_FALSE;
2693 }
2694 }
2695
2696 if (needle.len <= 0) {
2697 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring");
2698 RETURN_FALSE;
2699 }
2700
2701 n = mbfl_substr_count(&haystack, &needle);
2702 if (n >= 0) {
2703 RETVAL_LONG(n);
2704 } else {
2705 RETVAL_FALSE;
2706 }
2707 }
2708 /* }}} */
2709
2710 /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2711 Returns part of a string */
PHP_FUNCTION(mb_substr)2712 PHP_FUNCTION(mb_substr)
2713 {
2714 size_t argc = ZEND_NUM_ARGS();
2715 char *str, *encoding;
2716 long from, len;
2717 int mblen, str_len, encoding_len;
2718 zval **z_len = NULL;
2719 mbfl_string string, result, *ret;
2720
2721 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|Zs", &str, &str_len, &from, &z_len, &encoding, &encoding_len) == FAILURE) {
2722 return;
2723 }
2724
2725 mbfl_string_init(&string);
2726 string.no_language = MBSTRG(language);
2727 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2728
2729 if (argc == 4) {
2730 string.no_encoding = mbfl_name2no_encoding(encoding);
2731 if (string.no_encoding == mbfl_no_encoding_invalid) {
2732 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2733 RETURN_FALSE;
2734 }
2735 }
2736
2737 string.val = (unsigned char *)str;
2738 string.len = str_len;
2739
2740 if (argc < 3 || Z_TYPE_PP(z_len) == IS_NULL) {
2741 len = str_len;
2742 } else {
2743 convert_to_long_ex(z_len);
2744 len = Z_LVAL_PP(z_len);
2745 }
2746
2747 /* measures length */
2748 mblen = 0;
2749 if (from < 0 || len < 0) {
2750 mblen = mbfl_strlen(&string);
2751 }
2752
2753 /* if "from" position is negative, count start position from the end
2754 * of the string
2755 */
2756 if (from < 0) {
2757 from = mblen + from;
2758 if (from < 0) {
2759 from = 0;
2760 }
2761 }
2762
2763 /* if "length" position is negative, set it to the length
2764 * needed to stop that many chars from the end of the string
2765 */
2766 if (len < 0) {
2767 len = (mblen - from) + len;
2768 if (len < 0) {
2769 len = 0;
2770 }
2771 }
2772
2773 if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2774 && (from >= mbfl_strlen(&string))) {
2775 RETURN_FALSE;
2776 }
2777
2778 ret = mbfl_substr(&string, &result, from, len);
2779 if (NULL == ret) {
2780 RETURN_FALSE;
2781 }
2782
2783 RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2784 }
2785 /* }}} */
2786
2787 /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
2788 Returns part of a string */
PHP_FUNCTION(mb_strcut)2789 PHP_FUNCTION(mb_strcut)
2790 {
2791 size_t argc = ZEND_NUM_ARGS();
2792 char *encoding;
2793 long from, len;
2794 int encoding_len;
2795 zval **z_len = NULL;
2796 mbfl_string string, result, *ret;
2797
2798 mbfl_string_init(&string);
2799 string.no_language = MBSTRG(language);
2800 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2801
2802 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|Zs", (char **)&string.val, (int **)&string.len, &from, &z_len, &encoding, &encoding_len) == FAILURE) {
2803 return;
2804 }
2805
2806 if (argc == 4) {
2807 string.no_encoding = mbfl_name2no_encoding(encoding);
2808 if (string.no_encoding == mbfl_no_encoding_invalid) {
2809 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2810 RETURN_FALSE;
2811 }
2812 }
2813
2814 if (argc < 3 || Z_TYPE_PP(z_len) == IS_NULL) {
2815 len = string.len;
2816 } else {
2817 convert_to_long_ex(z_len);
2818 len = Z_LVAL_PP(z_len);
2819 }
2820
2821 /* if "from" position is negative, count start position from the end
2822 * of the string
2823 */
2824 if (from < 0) {
2825 from = string.len + from;
2826 if (from < 0) {
2827 from = 0;
2828 }
2829 }
2830
2831 /* if "length" position is negative, set it to the length
2832 * needed to stop that many chars from the end of the string
2833 */
2834 if (len < 0) {
2835 len = (string.len - from) + len;
2836 if (len < 0) {
2837 len = 0;
2838 }
2839 }
2840
2841 if ((unsigned int)from > string.len) {
2842 RETURN_FALSE;
2843 }
2844
2845 ret = mbfl_strcut(&string, &result, from, len);
2846 if (ret == NULL) {
2847 RETURN_FALSE;
2848 }
2849
2850 RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2851 }
2852 /* }}} */
2853
2854 /* {{{ proto int mb_strwidth(string str [, string encoding])
2855 Gets terminal width of a string */
PHP_FUNCTION(mb_strwidth)2856 PHP_FUNCTION(mb_strwidth)
2857 {
2858 int n;
2859 mbfl_string string;
2860 char *enc_name = NULL;
2861 int enc_name_len;
2862
2863 mbfl_string_init(&string);
2864
2865 string.no_language = MBSTRG(language);
2866 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2867
2868 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2869 return;
2870 }
2871
2872 if (enc_name != NULL) {
2873 string.no_encoding = mbfl_name2no_encoding(enc_name);
2874 if (string.no_encoding == mbfl_no_encoding_invalid) {
2875 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2876 RETURN_FALSE;
2877 }
2878 }
2879
2880 n = mbfl_strwidth(&string);
2881 if (n >= 0) {
2882 RETVAL_LONG(n);
2883 } else {
2884 RETVAL_FALSE;
2885 }
2886 }
2887 /* }}} */
2888
2889 /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
2890 Trim the string in terminal width */
PHP_FUNCTION(mb_strimwidth)2891 PHP_FUNCTION(mb_strimwidth)
2892 {
2893 char *str, *trimmarker, *encoding;
2894 long from, width;
2895 int str_len, trimmarker_len, encoding_len;
2896 mbfl_string string, result, marker, *ret;
2897
2898 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
2899 return;
2900 }
2901
2902 mbfl_string_init(&string);
2903 mbfl_string_init(&marker);
2904 string.no_language = MBSTRG(language);
2905 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2906 marker.no_language = MBSTRG(language);
2907 marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2908 marker.val = NULL;
2909 marker.len = 0;
2910
2911 if (ZEND_NUM_ARGS() == 5) {
2912 string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
2913 if (string.no_encoding == mbfl_no_encoding_invalid) {
2914 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2915 RETURN_FALSE;
2916 }
2917 }
2918
2919 string.val = (unsigned char *)str;
2920 string.len = str_len;
2921
2922 if (from < 0 || from > str_len) {
2923 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of range");
2924 RETURN_FALSE;
2925 }
2926
2927 if (width < 0) {
2928 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value");
2929 RETURN_FALSE;
2930 }
2931
2932 if (ZEND_NUM_ARGS() >= 4) {
2933 marker.val = (unsigned char *)trimmarker;
2934 marker.len = trimmarker_len;
2935 }
2936
2937 ret = mbfl_strimwidth(&string, &marker, &result, from, width);
2938
2939 if (ret == NULL) {
2940 RETURN_FALSE;
2941 }
2942
2943 RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2944 }
2945 /* }}} */
2946
2947 /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
php_mb_convert_encoding(const char * input,size_t length,const char * _to_encoding,const char * _from_encodings,size_t * output_len TSRMLS_DC)2948 MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC)
2949 {
2950 mbfl_string string, result, *ret;
2951 const mbfl_encoding *from_encoding, *to_encoding;
2952 mbfl_buffer_converter *convd;
2953 size_t size;
2954 const mbfl_encoding **list;
2955 char *output=NULL;
2956
2957 if (output_len) {
2958 *output_len = 0;
2959 }
2960 if (!input) {
2961 return NULL;
2962 }
2963 /* new encoding */
2964 if (_to_encoding && strlen(_to_encoding)) {
2965 to_encoding = mbfl_name2encoding(_to_encoding);
2966 if (!to_encoding) {
2967 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
2968 return NULL;
2969 }
2970 } else {
2971 to_encoding = MBSTRG(current_internal_encoding);
2972 }
2973
2974 /* initialize string */
2975 mbfl_string_init(&string);
2976 mbfl_string_init(&result);
2977 from_encoding = MBSTRG(current_internal_encoding);
2978 string.no_encoding = from_encoding->no_encoding;
2979 string.no_language = MBSTRG(language);
2980 string.val = (unsigned char *)input;
2981 string.len = length;
2982
2983 /* pre-conversion encoding */
2984 if (_from_encodings) {
2985 list = NULL;
2986 size = 0;
2987 php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
2988 if (size == 1) {
2989 from_encoding = *list;
2990 string.no_encoding = from_encoding->no_encoding;
2991 } else if (size > 1) {
2992 /* auto detect */
2993 from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection));
2994 if (from_encoding) {
2995 string.no_encoding = from_encoding->no_encoding;
2996 } else {
2997 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
2998 from_encoding = &mbfl_encoding_pass;
2999 to_encoding = from_encoding;
3000 string.no_encoding = from_encoding->no_encoding;
3001 }
3002 } else {
3003 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
3004 }
3005 if (list != NULL) {
3006 efree((void *)list);
3007 }
3008 }
3009
3010 /* initialize converter */
3011 convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len);
3012 if (convd == NULL) {
3013 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
3014 return NULL;
3015 }
3016 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3017 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3018
3019 /* do it */
3020 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3021 if (ret) {
3022 if (output_len) {
3023 *output_len = ret->len;
3024 }
3025 output = (char *)ret->val;
3026 }
3027
3028 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3029 mbfl_buffer_converter_delete(convd);
3030 return output;
3031 }
3032 /* }}} */
3033
3034 /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
3035 Returns converted string in desired encoding */
PHP_FUNCTION(mb_convert_encoding)3036 PHP_FUNCTION(mb_convert_encoding)
3037 {
3038 char *arg_str, *arg_new;
3039 int str_len, new_len;
3040 zval *arg_old;
3041 int i;
3042 size_t size, l, n;
3043 char *_from_encodings = NULL, *ret, *s_free = NULL;
3044
3045 zval **hash_entry;
3046 HashTable *target_hash;
3047
3048 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
3049 return;
3050 }
3051
3052 if (ZEND_NUM_ARGS() == 3) {
3053 switch (Z_TYPE_P(arg_old)) {
3054 case IS_ARRAY:
3055 target_hash = Z_ARRVAL_P(arg_old);
3056 zend_hash_internal_pointer_reset(target_hash);
3057 i = zend_hash_num_elements(target_hash);
3058 _from_encodings = NULL;
3059
3060 while (i > 0) {
3061 if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
3062 break;
3063 }
3064
3065 convert_to_string_ex(hash_entry);
3066
3067 if ( _from_encodings) {
3068 l = strlen(_from_encodings);
3069 n = strlen(Z_STRVAL_PP(hash_entry));
3070 _from_encodings = erealloc(_from_encodings, l+n+2);
3071 strcpy(_from_encodings+l, ",");
3072 strcpy(_from_encodings+l+1, Z_STRVAL_PP(hash_entry));
3073 } else {
3074 _from_encodings = estrdup(Z_STRVAL_PP(hash_entry));
3075 }
3076
3077 zend_hash_move_forward(target_hash);
3078 i--;
3079 }
3080
3081 if (_from_encodings != NULL && !strlen(_from_encodings)) {
3082 efree(_from_encodings);
3083 _from_encodings = NULL;
3084 }
3085 s_free = _from_encodings;
3086 break;
3087 default:
3088 convert_to_string(arg_old);
3089 _from_encodings = Z_STRVAL_P(arg_old);
3090 break;
3091 }
3092 }
3093
3094 /* new encoding */
3095 ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size TSRMLS_CC);
3096 if (ret != NULL) {
3097 RETVAL_STRINGL(ret, size, 0); /* the string is already strdup()'ed */
3098 } else {
3099 RETVAL_FALSE;
3100 }
3101
3102 if ( s_free) {
3103 efree(s_free);
3104 }
3105 }
3106 /* }}} */
3107
3108 /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
3109 Returns a case-folded version of sourcestring */
PHP_FUNCTION(mb_convert_case)3110 PHP_FUNCTION(mb_convert_case)
3111 {
3112 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3113 char *str;
3114 int str_len, from_encoding_len;
3115 long case_mode = 0;
3116 char *newstr;
3117 size_t ret_len;
3118
3119 RETVAL_FALSE;
3120 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len,
3121 &case_mode, &from_encoding, &from_encoding_len) == FAILURE)
3122 RETURN_FALSE;
3123
3124 newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3125
3126 if (newstr) {
3127 RETVAL_STRINGL(newstr, ret_len, 0);
3128 }
3129 }
3130 /* }}} */
3131
3132 /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
3133 * Returns a uppercased version of sourcestring
3134 */
PHP_FUNCTION(mb_strtoupper)3135 PHP_FUNCTION(mb_strtoupper)
3136 {
3137 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3138 char *str;
3139 int str_len, from_encoding_len;
3140 char *newstr;
3141 size_t ret_len;
3142
3143 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
3144 &from_encoding, &from_encoding_len) == FAILURE) {
3145 return;
3146 }
3147 newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3148
3149 if (newstr) {
3150 RETURN_STRINGL(newstr, ret_len, 0);
3151 }
3152 RETURN_FALSE;
3153 }
3154 /* }}} */
3155
3156 /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
3157 * Returns a lowercased version of sourcestring
3158 */
PHP_FUNCTION(mb_strtolower)3159 PHP_FUNCTION(mb_strtolower)
3160 {
3161 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3162 char *str;
3163 int str_len, from_encoding_len;
3164 char *newstr;
3165 size_t ret_len;
3166
3167 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
3168 &from_encoding, &from_encoding_len) == FAILURE) {
3169 return;
3170 }
3171 newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3172
3173 if (newstr) {
3174 RETURN_STRINGL(newstr, ret_len, 0);
3175 }
3176 RETURN_FALSE;
3177 }
3178 /* }}} */
3179
3180 /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3181 Encodings of the given string is returned (as a string) */
PHP_FUNCTION(mb_detect_encoding)3182 PHP_FUNCTION(mb_detect_encoding)
3183 {
3184 char *str;
3185 int str_len;
3186 zend_bool strict=0;
3187 zval *encoding_list;
3188
3189 mbfl_string string;
3190 const mbfl_encoding *ret;
3191 const mbfl_encoding **elist, **list;
3192 size_t size;
3193
3194 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3195 return;
3196 }
3197
3198 /* make encoding list */
3199 list = NULL;
3200 size = 0;
3201 if (ZEND_NUM_ARGS() >= 2 && !ZVAL_IS_NULL(encoding_list)) {
3202 switch (Z_TYPE_P(encoding_list)) {
3203 case IS_ARRAY:
3204 if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) {
3205 if (list) {
3206 efree(list);
3207 list = NULL;
3208 size = 0;
3209 }
3210 }
3211 break;
3212 default:
3213 convert_to_string(encoding_list);
3214 if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) {
3215 if (list) {
3216 efree(list);
3217 list = NULL;
3218 size = 0;
3219 }
3220 }
3221 break;
3222 }
3223 if (size <= 0) {
3224 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument");
3225 }
3226 }
3227
3228 if (ZEND_NUM_ARGS() < 3) {
3229 strict = (zend_bool)MBSTRG(strict_detection);
3230 }
3231
3232 if (size > 0 && list != NULL) {
3233 elist = list;
3234 } else {
3235 elist = MBSTRG(current_detect_order_list);
3236 size = MBSTRG(current_detect_order_list_size);
3237 }
3238
3239 mbfl_string_init(&string);
3240 string.no_language = MBSTRG(language);
3241 string.val = (unsigned char *)str;
3242 string.len = str_len;
3243 ret = mbfl_identify_encoding2(&string, elist, size, strict);
3244
3245 if (list != NULL) {
3246 efree((void *)list);
3247 }
3248
3249 if (ret == NULL) {
3250 RETURN_FALSE;
3251 }
3252
3253 RETVAL_STRING((char *)ret->name, 1);
3254 }
3255 /* }}} */
3256
3257 /* {{{ proto mixed mb_list_encodings()
3258 Returns an array of all supported entity encodings */
PHP_FUNCTION(mb_list_encodings)3259 PHP_FUNCTION(mb_list_encodings)
3260 {
3261 const mbfl_encoding **encodings;
3262 const mbfl_encoding *encoding;
3263 int i;
3264
3265 array_init(return_value);
3266 i = 0;
3267 encodings = mbfl_get_supported_encodings();
3268 while ((encoding = encodings[i++]) != NULL) {
3269 add_next_index_string(return_value, (char *) encoding->name, 1);
3270 }
3271 }
3272 /* }}} */
3273
3274 /* {{{ proto array mb_encoding_aliases(string encoding)
3275 Returns an array of the aliases of a given encoding name */
PHP_FUNCTION(mb_encoding_aliases)3276 PHP_FUNCTION(mb_encoding_aliases)
3277 {
3278 const mbfl_encoding *encoding;
3279 char *name = NULL;
3280 int name_len;
3281
3282 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
3283 RETURN_FALSE;
3284 }
3285
3286 encoding = mbfl_name2encoding(name);
3287 if (!encoding) {
3288 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
3289 RETURN_FALSE;
3290 }
3291
3292 array_init(return_value);
3293 if (encoding->aliases != NULL) {
3294 const char **alias;
3295 for (alias = *encoding->aliases; *alias; ++alias) {
3296 add_next_index_string(return_value, (char *)*alias, 1);
3297 }
3298 }
3299 }
3300 /* }}} */
3301
3302 /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3303 Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
PHP_FUNCTION(mb_encode_mimeheader)3304 PHP_FUNCTION(mb_encode_mimeheader)
3305 {
3306 enum mbfl_no_encoding charset, transenc;
3307 mbfl_string string, result, *ret;
3308 char *charset_name = NULL;
3309 int charset_name_len;
3310 char *trans_enc_name = NULL;
3311 int trans_enc_name_len;
3312 char *linefeed = "\r\n";
3313 int linefeed_len;
3314 long indent = 0;
3315
3316 mbfl_string_init(&string);
3317 string.no_language = MBSTRG(language);
3318 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3319
3320 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3321 return;
3322 }
3323
3324 charset = mbfl_no_encoding_pass;
3325 transenc = mbfl_no_encoding_base64;
3326
3327 if (charset_name != NULL) {
3328 charset = mbfl_name2no_encoding(charset_name);
3329 if (charset == mbfl_no_encoding_invalid) {
3330 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3331 RETURN_FALSE;
3332 }
3333 } else {
3334 const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3335 if (lang != NULL) {
3336 charset = lang->mail_charset;
3337 transenc = lang->mail_header_encoding;
3338 }
3339 }
3340
3341 if (trans_enc_name != NULL) {
3342 if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3343 transenc = mbfl_no_encoding_base64;
3344 } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3345 transenc = mbfl_no_encoding_qprint;
3346 }
3347 }
3348
3349 mbfl_string_init(&result);
3350 ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3351 if (ret != NULL) {
3352 RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
3353 } else {
3354 RETVAL_FALSE;
3355 }
3356 }
3357 /* }}} */
3358
3359 /* {{{ proto string mb_decode_mimeheader(string string)
3360 Decodes the MIME "encoded-word" in the string */
PHP_FUNCTION(mb_decode_mimeheader)3361 PHP_FUNCTION(mb_decode_mimeheader)
3362 {
3363 mbfl_string string, result, *ret;
3364
3365 mbfl_string_init(&string);
3366 string.no_language = MBSTRG(language);
3367 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3368
3369 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
3370 return;
3371 }
3372
3373 mbfl_string_init(&result);
3374 ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding);
3375 if (ret != NULL) {
3376 RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
3377 } else {
3378 RETVAL_FALSE;
3379 }
3380 }
3381 /* }}} */
3382
3383 /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3384 Conversion between full-width character and half-width character (Japanese) */
PHP_FUNCTION(mb_convert_kana)3385 PHP_FUNCTION(mb_convert_kana)
3386 {
3387 int opt, i;
3388 mbfl_string string, result, *ret;
3389 char *optstr = NULL;
3390 int optstr_len;
3391 char *encname = NULL;
3392 int encname_len;
3393
3394 mbfl_string_init(&string);
3395 string.no_language = MBSTRG(language);
3396 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3397
3398 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
3399 return;
3400 }
3401
3402 /* option */
3403 if (optstr != NULL) {
3404 char *p = optstr;
3405 int n = optstr_len;
3406 i = 0;
3407 opt = 0;
3408 while (i < n) {
3409 i++;
3410 switch (*p++) {
3411 case 'A':
3412 opt |= 0x1;
3413 break;
3414 case 'a':
3415 opt |= 0x10;
3416 break;
3417 case 'R':
3418 opt |= 0x2;
3419 break;
3420 case 'r':
3421 opt |= 0x20;
3422 break;
3423 case 'N':
3424 opt |= 0x4;
3425 break;
3426 case 'n':
3427 opt |= 0x40;
3428 break;
3429 case 'S':
3430 opt |= 0x8;
3431 break;
3432 case 's':
3433 opt |= 0x80;
3434 break;
3435 case 'K':
3436 opt |= 0x100;
3437 break;
3438 case 'k':
3439 opt |= 0x1000;
3440 break;
3441 case 'H':
3442 opt |= 0x200;
3443 break;
3444 case 'h':
3445 opt |= 0x2000;
3446 break;
3447 case 'V':
3448 opt |= 0x800;
3449 break;
3450 case 'C':
3451 opt |= 0x10000;
3452 break;
3453 case 'c':
3454 opt |= 0x20000;
3455 break;
3456 case 'M':
3457 opt |= 0x100000;
3458 break;
3459 case 'm':
3460 opt |= 0x200000;
3461 break;
3462 }
3463 }
3464 } else {
3465 opt = 0x900;
3466 }
3467
3468 /* encoding */
3469 if (encname != NULL) {
3470 string.no_encoding = mbfl_name2no_encoding(encname);
3471 if (string.no_encoding == mbfl_no_encoding_invalid) {
3472 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname);
3473 RETURN_FALSE;
3474 }
3475 }
3476
3477 ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3478 if (ret != NULL) {
3479 RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
3480 } else {
3481 RETVAL_FALSE;
3482 }
3483 }
3484 /* }}} */
3485
3486 #define PHP_MBSTR_STACK_BLOCK_SIZE 32
3487
3488 /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3489 Converts the string resource in variables to desired encoding */
PHP_FUNCTION(mb_convert_variables)3490 PHP_FUNCTION(mb_convert_variables)
3491 {
3492 zval ***args, ***stack, **var, **hash_entry, **zfrom_enc;
3493 HashTable *target_hash;
3494 mbfl_string string, result, *ret;
3495 const mbfl_encoding *from_encoding, *to_encoding;
3496 mbfl_encoding_detector *identd;
3497 mbfl_buffer_converter *convd;
3498 int n, to_enc_len, argc, stack_level, stack_max;
3499 size_t elistsz;
3500 const mbfl_encoding **elist;
3501 char *to_enc;
3502 void *ptmp;
3503
3504 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3505 return;
3506 }
3507
3508 /* new encoding */
3509 to_encoding = mbfl_name2encoding(to_enc);
3510 if (!to_encoding) {
3511 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3512 efree(args);
3513 RETURN_FALSE;
3514 }
3515
3516 /* initialize string */
3517 mbfl_string_init(&string);
3518 mbfl_string_init(&result);
3519 from_encoding = MBSTRG(current_internal_encoding);
3520 string.no_encoding = from_encoding->no_encoding;
3521 string.no_language = MBSTRG(language);
3522
3523 /* pre-conversion encoding */
3524 elist = NULL;
3525 elistsz = 0;
3526 switch (Z_TYPE_PP(zfrom_enc)) {
3527 case IS_ARRAY:
3528 php_mb_parse_encoding_array(*zfrom_enc, &elist, &elistsz, 0 TSRMLS_CC);
3529 break;
3530 default:
3531 convert_to_string_ex(zfrom_enc);
3532 php_mb_parse_encoding_list(Z_STRVAL_PP(zfrom_enc), Z_STRLEN_PP(zfrom_enc), &elist, &elistsz, 0 TSRMLS_CC);
3533 break;
3534 }
3535 if (elistsz <= 0) {
3536 from_encoding = &mbfl_encoding_pass;
3537 } else if (elistsz == 1) {
3538 from_encoding = *elist;
3539 } else {
3540 /* auto detect */
3541 from_encoding = NULL;
3542 stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3543 stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3544 stack_level = 0;
3545 identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection));
3546 if (identd != NULL) {
3547 n = 0;
3548 while (n < argc || stack_level > 0) {
3549 if (stack_level <= 0) {
3550 var = args[n++];
3551 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3552 target_hash = HASH_OF(*var);
3553 if (target_hash != NULL) {
3554 zend_hash_internal_pointer_reset(target_hash);
3555 }
3556 }
3557 } else {
3558 stack_level--;
3559 var = stack[stack_level];
3560 }
3561 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3562 target_hash = HASH_OF(*var);
3563 if (target_hash != NULL) {
3564 while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3565 zend_hash_move_forward(target_hash);
3566 if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3567 if (stack_level >= stack_max) {
3568 stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3569 ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3570 stack = (zval ***)ptmp;
3571 }
3572 stack[stack_level] = var;
3573 stack_level++;
3574 var = hash_entry;
3575 target_hash = HASH_OF(*var);
3576 if (target_hash != NULL) {
3577 zend_hash_internal_pointer_reset(target_hash);
3578 continue;
3579 }
3580 } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3581 string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3582 string.len = Z_STRLEN_PP(hash_entry);
3583 if (mbfl_encoding_detector_feed(identd, &string)) {
3584 goto detect_end; /* complete detecting */
3585 }
3586 }
3587 }
3588 }
3589 } else if (Z_TYPE_PP(var) == IS_STRING) {
3590 string.val = (unsigned char *)Z_STRVAL_PP(var);
3591 string.len = Z_STRLEN_PP(var);
3592 if (mbfl_encoding_detector_feed(identd, &string)) {
3593 goto detect_end; /* complete detecting */
3594 }
3595 }
3596 }
3597 detect_end:
3598 from_encoding = mbfl_encoding_detector_judge2(identd);
3599 mbfl_encoding_detector_delete(identd);
3600 }
3601 efree(stack);
3602
3603 if (!from_encoding) {
3604 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
3605 from_encoding = &mbfl_encoding_pass;
3606 }
3607 }
3608 if (elist != NULL) {
3609 efree((void *)elist);
3610 }
3611 /* create converter */
3612 convd = NULL;
3613 if (from_encoding != &mbfl_encoding_pass) {
3614 convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0);
3615 if (convd == NULL) {
3616 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
3617 RETURN_FALSE;
3618 }
3619 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3620 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3621 }
3622
3623 /* convert */
3624 if (convd != NULL) {
3625 stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3626 stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3627 stack_level = 0;
3628 n = 0;
3629 while (n < argc || stack_level > 0) {
3630 if (stack_level <= 0) {
3631 var = args[n++];
3632 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3633 target_hash = HASH_OF(*var);
3634 if (target_hash != NULL) {
3635 zend_hash_internal_pointer_reset(target_hash);
3636 }
3637 }
3638 } else {
3639 stack_level--;
3640 var = stack[stack_level];
3641 }
3642 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3643 target_hash = HASH_OF(*var);
3644 if (target_hash != NULL) {
3645 while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3646 zend_hash_move_forward(target_hash);
3647 if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3648 if (stack_level >= stack_max) {
3649 stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3650 ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3651 stack = (zval ***)ptmp;
3652 }
3653 stack[stack_level] = var;
3654 stack_level++;
3655 var = hash_entry;
3656 SEPARATE_ZVAL(hash_entry);
3657 target_hash = HASH_OF(*var);
3658 if (target_hash != NULL) {
3659 zend_hash_internal_pointer_reset(target_hash);
3660 continue;
3661 }
3662 } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3663 string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3664 string.len = Z_STRLEN_PP(hash_entry);
3665 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3666 if (ret != NULL) {
3667 if (Z_REFCOUNT_PP(hash_entry) > 1) {
3668 Z_DELREF_PP(hash_entry);
3669 MAKE_STD_ZVAL(*hash_entry);
3670 } else {
3671 zval_dtor(*hash_entry);
3672 }
3673 ZVAL_STRINGL(*hash_entry, (char *)ret->val, ret->len, 0);
3674 }
3675 }
3676 }
3677 }
3678 } else if (Z_TYPE_PP(var) == IS_STRING) {
3679 string.val = (unsigned char *)Z_STRVAL_PP(var);
3680 string.len = Z_STRLEN_PP(var);
3681 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3682 if (ret != NULL) {
3683 zval_dtor(*var);
3684 ZVAL_STRINGL(*var, (char *)ret->val, ret->len, 0);
3685 }
3686 }
3687 }
3688 efree(stack);
3689
3690 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3691 mbfl_buffer_converter_delete(convd);
3692 }
3693
3694 efree(args);
3695
3696 if (from_encoding) {
3697 RETURN_STRING(from_encoding->name, 1);
3698 } else {
3699 RETURN_FALSE;
3700 }
3701 }
3702 /* }}} */
3703
3704 /* {{{ HTML numeric entity */
3705 /* {{{ static void php_mb_numericentity_exec() */
3706 static void
php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS,int type)3707 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
3708 {
3709 char *str, *encoding;
3710 int str_len, encoding_len;
3711 zval *zconvmap, **hash_entry;
3712 HashTable *target_hash;
3713 size_t argc = ZEND_NUM_ARGS();
3714 int i, *convmap, *mapelm, mapsize=0;
3715 zend_bool is_hex = 0;
3716 mbfl_string string, result, *ret;
3717 enum mbfl_no_encoding no_encoding;
3718
3719 if (zend_parse_parameters(argc TSRMLS_CC, "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
3720 return;
3721 }
3722
3723 mbfl_string_init(&string);
3724 string.no_language = MBSTRG(language);
3725 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3726 string.val = (unsigned char *)str;
3727 string.len = str_len;
3728
3729 /* encoding */
3730 if ((argc == 3 || argc == 4) && encoding_len > 0) {
3731 no_encoding = mbfl_name2no_encoding(encoding);
3732 if (no_encoding == mbfl_no_encoding_invalid) {
3733 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
3734 RETURN_FALSE;
3735 } else {
3736 string.no_encoding = no_encoding;
3737 }
3738 }
3739
3740 if (argc == 4) {
3741 if (type == 0 && is_hex) {
3742 type = 2; /* output in hex format */
3743 }
3744 }
3745
3746 /* conversion map */
3747 convmap = NULL;
3748 if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
3749 target_hash = Z_ARRVAL_P(zconvmap);
3750 zend_hash_internal_pointer_reset(target_hash);
3751 i = zend_hash_num_elements(target_hash);
3752 if (i > 0) {
3753 convmap = (int *)safe_emalloc(i, sizeof(int), 0);
3754 mapelm = convmap;
3755 mapsize = 0;
3756 while (i > 0) {
3757 if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
3758 break;
3759 }
3760 convert_to_long_ex(hash_entry);
3761 *mapelm++ = Z_LVAL_PP(hash_entry);
3762 mapsize++;
3763 i--;
3764 zend_hash_move_forward(target_hash);
3765 }
3766 }
3767 }
3768 if (convmap == NULL) {
3769 RETURN_FALSE;
3770 }
3771 mapsize /= 4;
3772
3773 ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
3774 if (ret != NULL) {
3775 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
3776 } else {
3777 RETVAL_FALSE;
3778 }
3779 efree((void *)convmap);
3780 }
3781 /* }}} */
3782
3783 /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
3784 Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)3785 PHP_FUNCTION(mb_encode_numericentity)
3786 {
3787 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
3788 }
3789 /* }}} */
3790
3791 /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
3792 Converts HTML numeric entities to character code */
PHP_FUNCTION(mb_decode_numericentity)3793 PHP_FUNCTION(mb_decode_numericentity)
3794 {
3795 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
3796 }
3797 /* }}} */
3798 /* }}} */
3799
3800 /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
3801 * Sends an email message with MIME scheme
3802 */
3803
3804 #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \
3805 if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
3806 pos += 2; \
3807 while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \
3808 pos++; \
3809 } \
3810 continue; \
3811 }
3812
3813 #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len) \
3814 pp = str; \
3815 ee = pp + len; \
3816 while ((pp = memchr(pp, '\0', (ee - pp)))) { \
3817 *pp = ' '; \
3818 } \
3819
3820 #define APPEND_ONE_CHAR(ch) do { \
3821 if (token.a > 0) { \
3822 smart_str_appendc(&token, ch); \
3823 } else {\
3824 token.len++; \
3825 } \
3826 } while (0)
3827
3828 #define SEPARATE_SMART_STR(str) do {\
3829 if ((str)->a == 0) { \
3830 char *tmp_ptr; \
3831 (str)->a = 1; \
3832 while ((str)->a < (str)->len) { \
3833 (str)->a <<= 1; \
3834 } \
3835 tmp_ptr = emalloc((str)->a + 1); \
3836 memcpy(tmp_ptr, (str)->c, (str)->len); \
3837 (str)->c = tmp_ptr; \
3838 } \
3839 } while (0)
3840
my_smart_str_dtor(smart_str * s)3841 static void my_smart_str_dtor(smart_str *s)
3842 {
3843 if (s->a > 0) {
3844 smart_str_free(s);
3845 }
3846 }
3847
_php_mbstr_parse_mail_headers(HashTable * ht,const char * str,size_t str_len)3848 static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
3849 {
3850 const char *ps;
3851 size_t icnt;
3852 int state = 0;
3853 int crlf_state = -1;
3854
3855 smart_str token = { 0, 0, 0 };
3856 smart_str fld_name = { 0, 0, 0 }, fld_val = { 0, 0, 0 };
3857
3858 ps = str;
3859 icnt = str_len;
3860
3861 /*
3862 * C o n t e n t - T y p e : t e x t / h t m l \r\n
3863 * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
3864 * state 0 1 2 3
3865 *
3866 * C o n t e n t - T y p e : t e x t / h t m l \r\n
3867 * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
3868 * crlf_state -1 0 1 -1
3869 *
3870 */
3871
3872 while (icnt > 0) {
3873 switch (*ps) {
3874 case ':':
3875 if (crlf_state == 1) {
3876 APPEND_ONE_CHAR('\r');
3877 }
3878
3879 if (state == 0 || state == 1) {
3880 fld_name = token;
3881
3882 state = 2;
3883 } else {
3884 APPEND_ONE_CHAR(*ps);
3885 }
3886
3887 crlf_state = 0;
3888 break;
3889
3890 case '\n':
3891 if (crlf_state == -1) {
3892 goto out;
3893 }
3894 crlf_state = -1;
3895 break;
3896
3897 case '\r':
3898 if (crlf_state == 1) {
3899 APPEND_ONE_CHAR('\r');
3900 } else {
3901 crlf_state = 1;
3902 }
3903 break;
3904
3905 case ' ': case '\t':
3906 if (crlf_state == -1) {
3907 if (state == 3) {
3908 /* continuing from the previous line */
3909 SEPARATE_SMART_STR(&token);
3910 state = 4;
3911 } else {
3912 /* simply skipping this new line */
3913 state = 5;
3914 }
3915 } else {
3916 if (crlf_state == 1) {
3917 APPEND_ONE_CHAR('\r');
3918 }
3919 if (state == 1 || state == 3) {
3920 APPEND_ONE_CHAR(*ps);
3921 }
3922 }
3923 crlf_state = 0;
3924 break;
3925
3926 default:
3927 switch (state) {
3928 case 0:
3929 token.c = (char *)ps;
3930 token.len = 0;
3931 token.a = 0;
3932 state = 1;
3933 break;
3934
3935 case 2:
3936 if (crlf_state != -1) {
3937 token.c = (char *)ps;
3938 token.len = 0;
3939 token.a = 0;
3940
3941 state = 3;
3942 break;
3943 }
3944 /* break is missing intentionally */
3945
3946 case 3:
3947 if (crlf_state == -1) {
3948 fld_val = token;
3949
3950 if (fld_name.c != NULL && fld_val.c != NULL) {
3951 char *dummy;
3952
3953 /* FIXME: some locale free implementation is
3954 * really required here,,, */
3955 SEPARATE_SMART_STR(&fld_name);
3956 php_strtoupper(fld_name.c, fld_name.len);
3957
3958 zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
3959
3960 my_smart_str_dtor(&fld_name);
3961 }
3962
3963 memset(&fld_name, 0, sizeof(smart_str));
3964 memset(&fld_val, 0, sizeof(smart_str));
3965
3966 token.c = (char *)ps;
3967 token.len = 0;
3968 token.a = 0;
3969
3970 state = 1;
3971 }
3972 break;
3973
3974 case 4:
3975 APPEND_ONE_CHAR(' ');
3976 state = 3;
3977 break;
3978 }
3979
3980 if (crlf_state == 1) {
3981 APPEND_ONE_CHAR('\r');
3982 }
3983
3984 APPEND_ONE_CHAR(*ps);
3985
3986 crlf_state = 0;
3987 break;
3988 }
3989 ps++, icnt--;
3990 }
3991 out:
3992 if (state == 2) {
3993 token.c = "";
3994 token.len = 0;
3995 token.a = 0;
3996
3997 state = 3;
3998 }
3999 if (state == 3) {
4000 fld_val = token;
4001
4002 if (fld_name.c != NULL && fld_val.c != NULL) {
4003 void *dummy;
4004
4005 /* FIXME: some locale free implementation is
4006 * really required here,,, */
4007 SEPARATE_SMART_STR(&fld_name);
4008 php_strtoupper(fld_name.c, fld_name.len);
4009
4010 zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
4011
4012 my_smart_str_dtor(&fld_name);
4013 }
4014 }
4015 return state;
4016 }
4017
PHP_FUNCTION(mb_send_mail)4018 PHP_FUNCTION(mb_send_mail)
4019 {
4020 int n;
4021 char *to = NULL;
4022 int to_len;
4023 char *message = NULL;
4024 int message_len;
4025 char *headers = NULL;
4026 int headers_len;
4027 char *subject = NULL;
4028 int subject_len;
4029 char *extra_cmd = NULL;
4030 int extra_cmd_len;
4031 int i;
4032 char *to_r = NULL;
4033 char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
4034 struct {
4035 int cnt_type:1;
4036 int cnt_trans_enc:1;
4037 } suppressed_hdrs = { 0, 0 };
4038
4039 char *message_buf = NULL, *subject_buf = NULL, *p;
4040 mbfl_string orig_str, conv_str;
4041 mbfl_string *pstr; /* pointer to mbfl string for return value */
4042 enum mbfl_no_encoding
4043 tran_cs, /* transfar text charset */
4044 head_enc, /* header transfar encoding */
4045 body_enc; /* body transfar encoding */
4046 mbfl_memory_device device; /* automatic allocateable buffer for additional header */
4047 const mbfl_language *lang;
4048 int err = 0;
4049 HashTable ht_headers;
4050 smart_str *s;
4051 extern void mbfl_memory_device_unput(mbfl_memory_device *device);
4052 char *pp, *ee;
4053
4054 /* initialize */
4055 mbfl_memory_device_init(&device, 0, 0);
4056 mbfl_string_init(&orig_str);
4057 mbfl_string_init(&conv_str);
4058
4059 /* character-set, transfer-encoding */
4060 tran_cs = mbfl_no_encoding_utf8;
4061 head_enc = mbfl_no_encoding_base64;
4062 body_enc = mbfl_no_encoding_base64;
4063 lang = mbfl_no2language(MBSTRG(language));
4064 if (lang != NULL) {
4065 tran_cs = lang->mail_charset;
4066 head_enc = lang->mail_header_encoding;
4067 body_enc = lang->mail_body_encoding;
4068 }
4069
4070 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|ss", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd, &extra_cmd_len) == FAILURE) {
4071 return;
4072 }
4073
4074 /* ASCIIZ check */
4075 MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
4076 MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
4077 MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
4078 if (headers) {
4079 MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
4080 }
4081 if (extra_cmd) {
4082 MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd, extra_cmd_len);
4083 }
4084
4085 zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0);
4086
4087 if (headers != NULL) {
4088 _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
4089 }
4090
4091 if (zend_hash_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1, (void **)&s) == SUCCESS) {
4092 char *tmp;
4093 char *param_name;
4094 char *charset = NULL;
4095
4096 SEPARATE_SMART_STR(s);
4097 smart_str_0(s);
4098
4099 p = strchr(s->c, ';');
4100
4101 if (p != NULL) {
4102 /* skipping the padded spaces */
4103 do {
4104 ++p;
4105 } while (*p == ' ' || *p == '\t');
4106
4107 if (*p != '\0') {
4108 if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
4109 if (strcasecmp(param_name, "charset") == 0) {
4110 enum mbfl_no_encoding _tran_cs = tran_cs;
4111
4112 charset = php_strtok_r(NULL, "= \"", &tmp);
4113 if (charset != NULL) {
4114 _tran_cs = mbfl_name2no_encoding(charset);
4115 }
4116
4117 if (_tran_cs == mbfl_no_encoding_invalid) {
4118 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
4119 _tran_cs = mbfl_no_encoding_ascii;
4120 }
4121 tran_cs = _tran_cs;
4122 }
4123 }
4124 }
4125 }
4126 suppressed_hdrs.cnt_type = 1;
4127 }
4128
4129 if (zend_hash_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1, (void **)&s) == SUCCESS) {
4130 enum mbfl_no_encoding _body_enc;
4131 SEPARATE_SMART_STR(s);
4132 smart_str_0(s);
4133
4134 _body_enc = mbfl_name2no_encoding(s->c);
4135 switch (_body_enc) {
4136 case mbfl_no_encoding_base64:
4137 case mbfl_no_encoding_7bit:
4138 case mbfl_no_encoding_8bit:
4139 body_enc = _body_enc;
4140 break;
4141
4142 default:
4143 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", s->c);
4144 body_enc = mbfl_no_encoding_8bit;
4145 break;
4146 }
4147 suppressed_hdrs.cnt_trans_enc = 1;
4148 }
4149
4150 /* To: */
4151 if (to != NULL) {
4152 if (to_len > 0) {
4153 to_r = estrndup(to, to_len);
4154 for (; to_len; to_len--) {
4155 if (!isspace((unsigned char) to_r[to_len - 1])) {
4156 break;
4157 }
4158 to_r[to_len - 1] = '\0';
4159 }
4160 for (i = 0; to_r[i]; i++) {
4161 if (iscntrl((unsigned char) to_r[i])) {
4162 /* According to RFC 822, section 3.1.1 long headers may be separated into
4163 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
4164 * To prevent these separators from being replaced with a space, we use the
4165 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
4166 */
4167 SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
4168 to_r[i] = ' ';
4169 }
4170 }
4171 } else {
4172 to_r = to;
4173 }
4174 } else {
4175 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
4176 err = 1;
4177 }
4178
4179 /* Subject: */
4180 if (subject != NULL && subject_len >= 0) {
4181 orig_str.no_language = MBSTRG(language);
4182 orig_str.val = (unsigned char *)subject;
4183 orig_str.len = subject_len;
4184 orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4185 if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4186 const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4187 orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4188 }
4189 pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4190 if (pstr != NULL) {
4191 subject_buf = subject = (char *)pstr->val;
4192 }
4193 } else {
4194 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field");
4195 err = 1;
4196 }
4197
4198 /* message body */
4199 if (message != NULL) {
4200 orig_str.no_language = MBSTRG(language);
4201 orig_str.val = (unsigned char *)message;
4202 orig_str.len = (unsigned int)message_len;
4203 orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4204
4205 if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4206 const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4207 orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4208 }
4209
4210 pstr = NULL;
4211 {
4212 mbfl_string tmpstr;
4213
4214 if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4215 tmpstr.no_encoding=mbfl_no_encoding_8bit;
4216 pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4217 efree(tmpstr.val);
4218 }
4219 }
4220 if (pstr != NULL) {
4221 message_buf = message = (char *)pstr->val;
4222 }
4223 } else {
4224 /* this is not really an error, so it is allowed. */
4225 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body");
4226 message = NULL;
4227 }
4228
4229 /* other headers */
4230 #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4231 #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4232 #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4233 #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4234 if (headers != NULL) {
4235 p = headers;
4236 n = headers_len;
4237 mbfl_memory_device_strncat(&device, p, n);
4238 if (n > 0 && p[n - 1] != '\n') {
4239 mbfl_memory_device_strncat(&device, "\n", 1);
4240 }
4241 }
4242
4243 if (!zend_hash_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4244 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4245 mbfl_memory_device_strncat(&device, "\n", 1);
4246 }
4247
4248 if (!suppressed_hdrs.cnt_type) {
4249 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4250
4251 p = (char *)mbfl_no2preferred_mime_name(tran_cs);
4252 if (p != NULL) {
4253 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4254 mbfl_memory_device_strcat(&device, p);
4255 }
4256 mbfl_memory_device_strncat(&device, "\n", 1);
4257 }
4258 if (!suppressed_hdrs.cnt_trans_enc) {
4259 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4260 p = (char *)mbfl_no2preferred_mime_name(body_enc);
4261 if (p == NULL) {
4262 p = "7bit";
4263 }
4264 mbfl_memory_device_strcat(&device, p);
4265 mbfl_memory_device_strncat(&device, "\n", 1);
4266 }
4267
4268 mbfl_memory_device_unput(&device);
4269 mbfl_memory_device_output('\0', &device);
4270 headers = (char *)device.buffer;
4271
4272 if (force_extra_parameters) {
4273 extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4274 } else if (extra_cmd) {
4275 extra_cmd = php_escape_shell_cmd(extra_cmd);
4276 }
4277
4278 if (!err && php_mail(to_r, subject, message, headers, extra_cmd TSRMLS_CC)) {
4279 RETVAL_TRUE;
4280 } else {
4281 RETVAL_FALSE;
4282 }
4283
4284 if (extra_cmd) {
4285 efree(extra_cmd);
4286 }
4287 if (to_r != to) {
4288 efree(to_r);
4289 }
4290 if (subject_buf) {
4291 efree((void *)subject_buf);
4292 }
4293 if (message_buf) {
4294 efree((void *)message_buf);
4295 }
4296 mbfl_memory_device_clear(&device);
4297 zend_hash_destroy(&ht_headers);
4298 }
4299
4300 #undef SKIP_LONG_HEADER_SEP_MBSTRING
4301 #undef MAIL_ASCIIZ_CHECK_MBSTRING
4302 #undef APPEND_ONE_CHAR
4303 #undef SEPARATE_SMART_STR
4304 #undef PHP_MBSTR_MAIL_MIME_HEADER1
4305 #undef PHP_MBSTR_MAIL_MIME_HEADER2
4306 #undef PHP_MBSTR_MAIL_MIME_HEADER3
4307 #undef PHP_MBSTR_MAIL_MIME_HEADER4
4308 /* }}} */
4309
4310 /* {{{ proto mixed mb_get_info([string type])
4311 Returns the current settings of mbstring */
PHP_FUNCTION(mb_get_info)4312 PHP_FUNCTION(mb_get_info)
4313 {
4314 char *typ = NULL;
4315 int typ_len;
4316 size_t n;
4317 char *name;
4318 const struct mb_overload_def *over_func;
4319 zval *row1, *row2;
4320 const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4321 const mbfl_encoding **entry;
4322
4323 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
4324 RETURN_FALSE;
4325 }
4326
4327 if (!typ || !strcasecmp("all", typ)) {
4328 array_init(return_value);
4329 if (MBSTRG(current_internal_encoding)) {
4330 add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name, 1);
4331 }
4332 if (MBSTRG(http_input_identify)) {
4333 add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name, 1);
4334 }
4335 if (MBSTRG(current_http_output_encoding)) {
4336 add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name, 1);
4337 }
4338 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4339 add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1);
4340 }
4341 add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4342 if (MBSTRG(func_overload)){
4343 over_func = &(mb_ovld[0]);
4344 MAKE_STD_ZVAL(row1);
4345 array_init(row1);
4346 while (over_func->type > 0) {
4347 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4348 add_assoc_string(row1, over_func->orig_func, over_func->ovld_func, 1);
4349 }
4350 over_func++;
4351 }
4352 add_assoc_zval(return_value, "func_overload_list", row1);
4353 } else {
4354 add_assoc_string(return_value, "func_overload_list", "no overload", 1);
4355 }
4356 if (lang != NULL) {
4357 if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4358 add_assoc_string(return_value, "mail_charset", name, 1);
4359 }
4360 if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4361 add_assoc_string(return_value, "mail_header_encoding", name, 1);
4362 }
4363 if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4364 add_assoc_string(return_value, "mail_body_encoding", name, 1);
4365 }
4366 }
4367 add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4368 if (MBSTRG(encoding_translation)) {
4369 add_assoc_string(return_value, "encoding_translation", "On", 1);
4370 } else {
4371 add_assoc_string(return_value, "encoding_translation", "Off", 1);
4372 }
4373 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4374 add_assoc_string(return_value, "language", name, 1);
4375 }
4376 n = MBSTRG(current_detect_order_list_size);
4377 entry = MBSTRG(current_detect_order_list);
4378 if (n > 0) {
4379 size_t i;
4380 MAKE_STD_ZVAL(row2);
4381 array_init(row2);
4382 for (i = 0; i < n; i++) {
4383 add_next_index_string(row2, (*entry)->name, 1);
4384 entry++;
4385 }
4386 add_assoc_zval(return_value, "detect_order", row2);
4387 }
4388 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4389 add_assoc_string(return_value, "substitute_character", "none", 1);
4390 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4391 add_assoc_string(return_value, "substitute_character", "long", 1);
4392 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4393 add_assoc_string(return_value, "substitute_character", "entity", 1);
4394 } else {
4395 add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4396 }
4397 if (MBSTRG(strict_detection)) {
4398 add_assoc_string(return_value, "strict_detection", "On", 1);
4399 } else {
4400 add_assoc_string(return_value, "strict_detection", "Off", 1);
4401 }
4402 } else if (!strcasecmp("internal_encoding", typ)) {
4403 if (MBSTRG(current_internal_encoding)) {
4404 RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name, 1);
4405 }
4406 } else if (!strcasecmp("http_input", typ)) {
4407 if (MBSTRG(http_input_identify)) {
4408 RETVAL_STRING((char *)MBSTRG(http_input_identify)->name, 1);
4409 }
4410 } else if (!strcasecmp("http_output", typ)) {
4411 if (MBSTRG(current_http_output_encoding)) {
4412 RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name, 1);
4413 }
4414 } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4415 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4416 RETVAL_STRING(name, 1);
4417 }
4418 } else if (!strcasecmp("func_overload", typ)) {
4419 RETVAL_LONG(MBSTRG(func_overload));
4420 } else if (!strcasecmp("func_overload_list", typ)) {
4421 if (MBSTRG(func_overload)){
4422 over_func = &(mb_ovld[0]);
4423 array_init(return_value);
4424 while (over_func->type > 0) {
4425 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4426 add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func, 1);
4427 }
4428 over_func++;
4429 }
4430 } else {
4431 RETVAL_STRING("no overload", 1);
4432 }
4433 } else if (!strcasecmp("mail_charset", typ)) {
4434 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4435 RETVAL_STRING(name, 1);
4436 }
4437 } else if (!strcasecmp("mail_header_encoding", typ)) {
4438 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4439 RETVAL_STRING(name, 1);
4440 }
4441 } else if (!strcasecmp("mail_body_encoding", typ)) {
4442 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4443 RETVAL_STRING(name, 1);
4444 }
4445 } else if (!strcasecmp("illegal_chars", typ)) {
4446 RETVAL_LONG(MBSTRG(illegalchars));
4447 } else if (!strcasecmp("encoding_translation", typ)) {
4448 if (MBSTRG(encoding_translation)) {
4449 RETVAL_STRING("On", 1);
4450 } else {
4451 RETVAL_STRING("Off", 1);
4452 }
4453 } else if (!strcasecmp("language", typ)) {
4454 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4455 RETVAL_STRING(name, 1);
4456 }
4457 } else if (!strcasecmp("detect_order", typ)) {
4458 n = MBSTRG(current_detect_order_list_size);
4459 entry = MBSTRG(current_detect_order_list);
4460 if (n > 0) {
4461 size_t i;
4462 array_init(return_value);
4463 for (i = 0; i < n; i++) {
4464 add_next_index_string(return_value, (*entry)->name, 1);
4465 entry++;
4466 }
4467 }
4468 } else if (!strcasecmp("substitute_character", typ)) {
4469 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4470 RETVAL_STRING("none", 1);
4471 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4472 RETVAL_STRING("long", 1);
4473 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4474 RETVAL_STRING("entity", 1);
4475 } else {
4476 RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4477 }
4478 } else if (!strcasecmp("strict_detection", typ)) {
4479 if (MBSTRG(strict_detection)) {
4480 RETVAL_STRING("On", 1);
4481 } else {
4482 RETVAL_STRING("Off", 1);
4483 }
4484 } else {
4485 RETURN_FALSE;
4486 }
4487 }
4488 /* }}} */
4489
4490 /* {{{ proto bool mb_check_encoding([string var[, string encoding]])
4491 Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)4492 PHP_FUNCTION(mb_check_encoding)
4493 {
4494 char *var = NULL;
4495 int var_len;
4496 char *enc = NULL;
4497 int enc_len;
4498 mbfl_buffer_converter *convd;
4499 const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4500 mbfl_string string, result, *ret = NULL;
4501 long illegalchars = 0;
4502
4503 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
4504 RETURN_FALSE;
4505 }
4506
4507 if (var == NULL) {
4508 RETURN_BOOL(MBSTRG(illegalchars) == 0);
4509 }
4510
4511 if (enc != NULL) {
4512 encoding = mbfl_name2encoding(enc);
4513 if (!encoding || encoding == &mbfl_encoding_pass) {
4514 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc);
4515 RETURN_FALSE;
4516 }
4517 }
4518
4519 convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
4520 if (convd == NULL) {
4521 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
4522 RETURN_FALSE;
4523 }
4524 mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4525 mbfl_buffer_converter_illegal_substchar(convd, 0);
4526
4527 /* initialize string */
4528 mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
4529 mbfl_string_init(&result);
4530
4531 string.val = (unsigned char *)var;
4532 string.len = var_len;
4533 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4534 illegalchars = mbfl_buffer_illegalchars(convd);
4535 mbfl_buffer_converter_delete(convd);
4536
4537 RETVAL_FALSE;
4538 if (ret != NULL) {
4539 if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4540 RETVAL_TRUE;
4541 }
4542 mbfl_string_clear(&result);
4543 }
4544 }
4545 /* }}} */
4546
4547
4548 /* {{{ php_mb_populate_current_detect_order_list */
php_mb_populate_current_detect_order_list(TSRMLS_D)4549 static void php_mb_populate_current_detect_order_list(TSRMLS_D)
4550 {
4551 const mbfl_encoding **entry = 0;
4552 size_t nentries;
4553
4554 if (MBSTRG(current_detect_order_list)) {
4555 return;
4556 }
4557
4558 if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
4559 nentries = MBSTRG(detect_order_list_size);
4560 entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4561 memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
4562 } else {
4563 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
4564 size_t i;
4565 nentries = MBSTRG(default_detect_order_list_size);
4566 entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4567 for (i = 0; i < nentries; i++) {
4568 entry[i] = mbfl_no2encoding(src[i]);
4569 }
4570 }
4571 MBSTRG(current_detect_order_list) = entry;
4572 MBSTRG(current_detect_order_list_size) = nentries;
4573 }
4574
4575 /* {{{ static int php_mb_encoding_translation() */
php_mb_encoding_translation(TSRMLS_D)4576 static int php_mb_encoding_translation(TSRMLS_D)
4577 {
4578 return MBSTRG(encoding_translation);
4579 }
4580 /* }}} */
4581
4582 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
php_mb_mbchar_bytes_ex(const char * s,const mbfl_encoding * enc)4583 MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
4584 {
4585 if (enc != NULL) {
4586 if (enc->flag & MBFL_ENCTYPE_MBCS) {
4587 if (enc->mblen_table != NULL) {
4588 if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
4589 }
4590 } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
4591 return 2;
4592 } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
4593 return 4;
4594 }
4595 }
4596 return 1;
4597 }
4598 /* }}} */
4599
4600 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
php_mb_mbchar_bytes(const char * s TSRMLS_DC)4601 MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
4602 {
4603 return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
4604 }
4605 /* }}} */
4606
4607 /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
php_mb_safe_strrchr_ex(const char * s,unsigned int c,size_t nbytes,const mbfl_encoding * enc)4608 MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
4609 {
4610 register const char *p = s;
4611 char *last=NULL;
4612
4613 if (nbytes == (size_t)-1) {
4614 size_t nb = 0;
4615
4616 while (*p != '\0') {
4617 if (nb == 0) {
4618 if ((unsigned char)*p == (unsigned char)c) {
4619 last = (char *)p;
4620 }
4621 nb = php_mb_mbchar_bytes_ex(p, enc);
4622 if (nb == 0) {
4623 return NULL; /* something is going wrong! */
4624 }
4625 }
4626 --nb;
4627 ++p;
4628 }
4629 } else {
4630 register size_t bcnt = nbytes;
4631 register size_t nbytes_char;
4632 while (bcnt > 0) {
4633 if ((unsigned char)*p == (unsigned char)c) {
4634 last = (char *)p;
4635 }
4636 nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
4637 if (bcnt < nbytes_char) {
4638 return NULL;
4639 }
4640 p += nbytes_char;
4641 bcnt -= nbytes_char;
4642 }
4643 }
4644 return last;
4645 }
4646 /* }}} */
4647
4648 /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
php_mb_safe_strrchr(const char * s,unsigned int c,size_t nbytes TSRMLS_DC)4649 MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
4650 {
4651 return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
4652 }
4653 /* }}} */
4654
4655 /* {{{ MBSTRING_API int php_mb_stripos()
4656 */
php_mb_stripos(int mode,const char * old_haystack,unsigned int old_haystack_len,const char * old_needle,unsigned int old_needle_len,long offset,const char * from_encoding TSRMLS_DC)4657 MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC)
4658 {
4659 int n;
4660 mbfl_string haystack, needle;
4661 n = -1;
4662
4663 mbfl_string_init(&haystack);
4664 mbfl_string_init(&needle);
4665 haystack.no_language = MBSTRG(language);
4666 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4667 needle.no_language = MBSTRG(language);
4668 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4669
4670 do {
4671 size_t len = 0;
4672 haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding TSRMLS_CC);
4673 haystack.len = len;
4674
4675 if (!haystack.val) {
4676 break;
4677 }
4678
4679 if (haystack.len <= 0) {
4680 break;
4681 }
4682
4683 needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding TSRMLS_CC);
4684 needle.len = len;
4685
4686 if (!needle.val) {
4687 break;
4688 }
4689
4690 if (needle.len <= 0) {
4691 break;
4692 }
4693
4694 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
4695 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
4696 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
4697 break;
4698 }
4699
4700 {
4701 int haystack_char_len = mbfl_strlen(&haystack);
4702
4703 if (mode) {
4704 if ((offset > 0 && offset > haystack_char_len) ||
4705 (offset < 0 && -offset > haystack_char_len)) {
4706 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
4707 break;
4708 }
4709 } else {
4710 if (offset < 0 || offset > haystack_char_len) {
4711 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
4712 break;
4713 }
4714 }
4715 }
4716
4717 n = mbfl_strpos(&haystack, &needle, offset, mode);
4718 } while(0);
4719
4720 if (haystack.val) {
4721 efree(haystack.val);
4722 }
4723
4724 if (needle.val) {
4725 efree(needle.val);
4726 }
4727
4728 return n;
4729 }
4730 /* }}} */
4731
php_mb_gpc_get_detect_order(const zend_encoding *** list,size_t * list_size TSRMLS_DC)4732 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC) /* {{{ */
4733 {
4734 *list = (const zend_encoding **)MBSTRG(http_input_list);
4735 *list_size = MBSTRG(http_input_list_size);
4736 }
4737 /* }}} */
4738
php_mb_gpc_set_input_encoding(const zend_encoding * encoding TSRMLS_DC)4739 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC) /* {{{ */
4740 {
4741 MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
4742 }
4743 /* }}} */
4744
4745 #endif /* HAVE_MBSTRING */
4746
4747 /*
4748 * Local variables:
4749 * tab-width: 4
4750 * c-basic-offset: 4
4751 * End:
4752 * vim600: fdm=marker
4753 * vim: noet sw=4 ts=4
4754 */
4755